This commit is contained in:
2021-05-06 09:58:16 +02:00
parent 3f640e6e13
commit 64f3e49c37
15 changed files with 3075 additions and 0 deletions

154
src/include/Attachment.hpp Normal file
View File

@ -0,0 +1,154 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _kaz_Attachment_hpp
#define _kaz_Attachment_hpp
#include <string>
#include <regex>
#include <map>
#include <utility>
#include "EmbeddedData.hpp"
namespace kaz {
using namespace std;
// ================================================================================
/*! e-mail structure */
class Attachment {
public:
/*! tokens indicat candidats to be updated by remove attachment */
//static const vector<const string> stringsToUpdate;
static vector<string> stringsToUpdate;
/*! mime tokens */
static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE;
/*! pattern to extract mime values */
static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx;
/*! get uniq filename */
static string getUnknown (const string &ext = "");
/*! remove all sections in content given by boundary tags */
static void removeSection (string &content, const string &beginTag, const string &endTag);
/*! catenates all sections in content given by boundary tags (use temporary vector) */
static string getSection (const string &content, const string &beginTag, const string &endTag);
/*! get all sections in content given by boundary marks and put them in result */
static void getSection (const string &content, const string &beginTag, const string &endTag, vector<string> &result);
/*! return the content-type */
const string getContentType () const;
/*! return the filename in mime (or uniq name if missing) */
const string getAttachName () const;
/*! return reference to the saved boundary. Empty value if attachment is not a multipart */
const string &getBoundary () const;
/*! return the size of the content */
const streamoff getSize () const;
/*! get a part of a mime header value */
const string getProp (const string &token, const regex &regEx) const;
/*! return if base64 encoded */
const bool isBase64Encoding () const;
/*! return if quoted-printable encoded */
const bool isQuotedPrintableEnconding () const;
/*! return if text (plain or html) and base64 encoded */
const bool isTextBase64 () const;
/*! return check if value exists in mime header */
const bool isDefProp (const string &token, const string &val) const;
protected:
/*! HTML image tag*/
static const string IMG_BEGIN, IMG_END;
/*! Attachment level (0 is main) */
const int level;
/*! char position in the mbox of the boundary before this attachment */
const streamoff beginInParent;
/*! char position of attachment including mime */
const streamoff beginPos;
/*! char position of attachment content */
streamoff contentPos, endPos;
/*! properties of the attachment */
bool toExtract, toUpdate, toDisclaim;
/*! id of an image embedded in mbox */
string cid;
/*! url to replace the attachment and its short id */
string downloadUrl, downloadId;
/*! properties of embedded image (self encoded with base64)*/
vector<EmbeddedData> embeddedData;
/*! mime values of the attachment */
map<string, string> env;
/*! boundary if the attachment is a multipart including previous and next "--" */
string boundary;
/*! size of boundary before the last "--" */
streamoff boundaryMiddleSize;
/*! sub attachment if the attachment is a multipart */
vector<Attachment> subAttachements;
/*! called during the parse process */
Attachment (ifstream &mbox, const int &level, const streamoff beginInParent, streamoff &curPos);
/*! called one time by the constructor */
void readMime (ifstream &mbox, streamoff &curPos);
/*! called one time by the constructor */
void readBoundaries (ifstream &mbox, streamoff &curPos);
/*! called for each part during the parse process add add a subAttachement. Return false when found last boundary */
bool nextBondary (ifstream &mbox, streamoff &curPos);
/*! recursively marks alternative attachments to be disclaim */
void markDisclaim (bool &plainMarked, bool &htmlMarked);
/*! recursively marks big attachments to be removed and upated (including disclaim). return true when part need to be updated (can't be extracted). */
bool markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector<Attachment *> &allMarkedPtrs);
/*! get a copy of the content. Base64 is decoded. Quoted-Printable is unwarp and unquoted */
string getContent (ifstream &mbox) const;
/*! write the content, encoded if necessary (base64 and quoted-printable) */
void println (ofstream &outbox, string content) const;
/*! replace embedded image */
void replaceEmbedded (string &content) const;
public:
friend class MainAttachment;
friend ostream& operator << (ostream& os, const Attachment& attachment);
};
/*! for debug pupose */
ostream& operator << (ostream& os, const Attachment& attachment);
// ================================================================================
}
#endif // _kaz_Attachment_hpp

View File

@ -0,0 +1,70 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _kaz_EmbeddedData_hpp
#define _kaz_EmbeddedData_hpp
#include <string>
#include <vector>
namespace kaz {
using namespace std;
// ================================================================================
/*! properties of embedded image in html part (rfc2397) */
class EmbeddedData {
public:
/*! rank of this image tag */
int imgIdx;
/*! extracted in first pass */
string contentType, name;
string downloadUrl, downloadId;
/*! area of base64 relative in the image section */
string::size_type startData, dataLength;
/*! initialisation in the first pass */
EmbeddedData (const int &imgIdx, const string &contentType, const string &name, const string::size_type &startData, const string::size_type &dataLength);
/*! records properties */
static void fillEmbeddedData (const vector<string> &imgs, const streamoff &minAttachSize, vector<EmbeddedData> &data);
// friend ostream& operator << (ostream& os, const EmbeddedData& embeddedData);
};
ostream& operator << (ostream& os, const EmbeddedData& embeddedData);
// ================================================================================
}
#endif // _kaz_EmbeddedData_hpp

View File

@ -0,0 +1,121 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _kaz_MainAttachment_hpp
#define _kaz_MainAttachment_hpp
#include <boost/filesystem.hpp>
#include "Attachment.hpp"
namespace kaz {
using namespace std;
namespace bfs = boost::filesystem;
// ================================================================================
/*! root level of e-mail structure */
class MainAttachment : public Attachment {
public:
/*! text to add in disclaim */
static const string templatePlainAddLink, templatePlainAllLink, templateHtmlHeader, templateHtmlAddLink, templateHtmlOtherLink, templateHtmlAllLink, templateHtmlFooter;
/*! white space to split a text */
static const regex whiteSpaceRegEx;
/*! copy a slice of mbox to stdout */
static void copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end);
/*! get url and id (space separated) from stdin */
void fillUrlId (string &url, string &id);
/*! location of extracted files */
void setExtractDir (const bfs::path &extractDir);
/*! URL base for archive download of all extracted files */
void setArchiveDownloadURL (const string &archiveDownloadURL);
/*! add a single link in disclaim */
void addLink (string &plain, string &html, const string &url, const string &name) const;
/*! get disclaim according alls links (retreived or create) */
void getDisclaim (string &plain, string &html) const;
private:
/*! for boot strap the attachment constructor */
streamoff &initTmpPos () { return tmpPos = 0; }
/*! for boot strap the attachment constructor */
int &initTmpLevel () { return tmpLevel = 0; }
/*! volatile values*/
streamoff tmpPos;
int tmpLevel;
/*! dir path for extraction */
bfs::path extractDir;
/*! URL base for download archives */
string archiveDownloadURL;
/*! subset in the tree of all attachments to be consider for extraction or modification */
vector<Attachment *> allMarkedPtrs;
/*! previous links find in mbox */
map<string, string> previousLinks;
/*! add link only if no significant value already exist. */
void addPrevious (const string &href, const string &name);
/*! extract previous links from plain text. Used by extractPreviousKAZ */
void extractLinks (const string &extractedPlainKAZ);
/*! extract previous links from html-li list. Used by extractPreviousKAZ */
void extractLinks (const vector<string> &liOne);
/*! extract previous links in mbox. Used by getUpdatedURL and substitute */
void extractPreviousKAZ (ifstream &mbox);
/*! remove previous links to archive. Used by substitute */
void removePreviousArchive ();
public:
/*! the main attachment in mbox */
MainAttachment (ifstream &mbox);
/*! mark disclaim, update and extract attachments. Must be call before: getUpdatedURL, extract or substitute */
void markSignificant (const streamoff &minAttachSize, ifstream &mbox);
/*! write to stdout le list of previous links in mbox */
void getUpdatedURL (ifstream &mbox);
/*! create record for extraction */
void newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const;
/*! extract big attachments in mbox to extractDir and write to stdout le dirname of each extraction */
void extract (ifstream &mbox, const SizeArg &minSize) const;
/*! substitute big attachments by the url give in stdin */
void substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize);
};
// ================================================================================
}
#endif // _kaz_MainAttachment_hpp

74
src/include/SizeArg.hpp Normal file
View File

@ -0,0 +1,74 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _kaz_SizeArg_hpp
#define _kaz_SizeArg_hpp
#include <iostream>
#include <string>
#include <boost/lexical_cast.hpp>
namespace kaz {
using namespace std;
// ================================================================================
/*! human readable of size values */
class SizeArg {
private:
/*! the size */
size_t bytes;
/*! human readable convertion */
void init (const string &option);
public:
/*! scalar convertion */
operator size_t () const { return bytes; }
/*! initialization from scalar value */
SizeArg (const size_t &bytes = 0);
/*! initialization from human readable value */
SizeArg (const string &option);
friend ostream &operator << (ostream &out, const SizeArg &sizeArg);
friend istream &operator >> (istream &in, SizeArg &sizeArg);
};
// ================================================================================
/*! human readable convertion */
ostream &operator << (ostream &out, const SizeArg &sizeArg);
istream &operator >> (istream &in, SizeArg &sizeArg);
}
#endif // _kaz_Attachment_hpp

134
src/include/kazDebug.hpp Normal file
View File

@ -0,0 +1,134 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _Kaz_Debug_hpp
#define _Kaz_Debug_hpp
#include <iostream>
#include <string>
/*! log error */
#define LOG_BUG(cond, action, expr) {if (cond) {std::cerr << endl << expr << std::endl << std::flush; action; }}
#ifdef ENABLE_SMART_LOG
#ifndef SMART_DEF_LOG
#define SMART_DEF_LOG(name, expr) DEF_LOG (name, expr)
#endif
#ifndef SMART_LOG
#define SMART_LOG(expr) LOG (expr)
#endif
#ifndef SMART_LOG_EXPR
#define SMART_LOG_EXPR(expr) {if (::kaz::Log::debug) {expr;} }
#endif
#else
#ifndef SMART_DEF_LOG
#define SMART_DEF_LOG(name, expr)
#endif
#ifndef SMART_LOG
#define SMART_LOG(expr)
#endif
#ifndef SMART_LOG_EXPR
#define SMART_LOG_EXPR(expr)
#endif
#endif
#ifdef DISABLE_LOG
#ifndef DEF_LOG
#define DEF_LOG(name, expr)
#endif
#ifndef LOG
#define LOG(expr) {}
#endif
#ifndef DEBUG
#define DEBUG(expr) {}
#endif
#else
#ifndef DEF_LOG
/*! to placed as the first instruction to log entry and return method */
#define DEF_LOG(name, expr) ::kaz::Log log (name); { if (::kaz::Log::debug) std::cerr << expr << std::endl << std::flush; }
#endif
#ifndef LOG
/*! to placed in methode where DEF_LOG if call previously */
// _______________________________________________________ Don't forget DEF_LOG
#define LOG(expr) { if (::kaz::Log::debug) std::cerr << log << "| " << expr << std::endl << std::flush; }
#endif
#ifndef DEBUG
/*! log without format */
#define DEBUG(expr) { if (::kaz::Log::debug) std::cerr << expr << std::endl << std::flush; }
#endif
#endif
namespace kaz {
// ================================================================================
using namespace std;
/*! manage prety print log */
class Log {
/*! visual indentation of call */
static size_t indent;
/*! name recall in log */
string functName;
public:
/*! switch on the log */
static bool debug;
/*! log entry of a method */
Log (const string &functName);
/*! log return of a method */
~Log ();
/*! timestamp of the log */
static string getLocalTimeStr ();
friend ostream &operator << (ostream &out, const Log &log);
};
ostream &operator << (ostream &out, const Log &log);
// ================================================================================
} // kaz
#endif //_Kaz_Debug_hpp

141
src/include/kazMisc.hpp Normal file
View File

@ -0,0 +1,141 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#ifndef _kaz_misc_hpp
#define _kaz_misc_hpp
#include <string>
#include <ctype.h>
#include <map>
namespace kaz {
using namespace std;
// =======================================================================
/*! ordered base64 chars */
extern const char * const base64Chars;
/*! set of chars available in URL */
extern const string availableURLChars;
// =======================================================================
/*! get the width of the terminal */
uint16_t getCols ();
/*! display time. */
string ns2string (const double &delta);
// =======================================================================
/*! side effect on str to replace "from" by "to" */
void replaceAll (string& str, const string &from, const string &to);
/*! side effect on str to replace a set of "from" by a set of "to" */
void replaceAll (string& str, const map<const string, const string> &subst);
// =======================================================================
/*! side effect to lower case a string (in mime section) */
void toLower (string &content);
/*! compare strings are done in uppercase to avoid accents. Give token in uppercase spin up the process */
const string &toUpperIfNeed (const string &src, string &tmp);
/*! find upper case of p in upper case of s */
string::size_type caseInsensitiveFind (const string& s, const string& p, const string::size_type &pos = 0);
/*! reverse find upper case of p in upper case of s */
string::size_type caseInsensitiveRFind (const string& s, const string& p, const string::size_type &pos = 0);
/*! side effect to repplace =XX by the char with de haxe value XX. It could be %XX in rfc2184 */
template<char delim='='>
void quotedDecode (string &content);
/*! side effect to quoted-printable content rfc2045 */
void quotedEncode (string &content);
/*! side effect to decode base64 */
void base64Decode (string &content);
/*! side effect to encode base64 */
void base64Encode (string &content);
/*! side effect to change charset of content */
void iso2utf (string &content);
/*! side effect to get the encodedWord according rfc2047 */
void encodedWord (string &content);
/*! side effect to get the charsetValue according rfc2184 */
void charsetValue (string &content);
// =======================================================================
/*! return if the c need no quote */
inline bool
isQuotedPrintable (const char &c) {
return
c == ' ' || c == '\t' || (c >= 33 && c <= 126 && c != '=' && c != '.');
// '.' is available in rfc2184 but it avoid to check '.' alone in a line :-)
}
/*! return if the c is in available base64 chars */
inline bool
isBase64 (const char &c) {
return (isalnum (c) || (c == '+') || (c == '/'));
}
/*! get the order of c in the base64 set of values */
inline unsigned char
getBase64Val (const char &c) {
if (c == '+')
return 62;
if (c == '/')
return 63;
if (c <= '9')
return (c-'0')+52;
if (c <= 'Z')
return (c-'A');
return (c-'a')+26;
}
/*! get the nibble value of c representation of an hexa digit */
inline unsigned char
getHexaVal (const char &c) {
if (c <= '9')
return c-'0';
if (c <= 'F')
return (c-'A')+10;
return (c-'a')+10;
}
/*! get the nibble value of c representation of an hexa digit */
inline void
getHexa (const char &c, char &upper, char &lower) {
upper = c >> 4 & 0xF;
upper += upper > 9 ? ('A'-10) : '0';
lower = c & 0xF;
lower += lower > 9 ? ('A'-10) : '0';
}
// =======================================================================
}
#endif // _kaz_Attachment_hpp