You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
156 lines
7.2 KiB
156 lines
7.2 KiB
////////////////////////////////////////////////////////////////////////////
|
|
// Copyright KAZ 2021 //
|
|
// //
|
|
// contact (at) kaz.bzh //
|
|
// //
|
|
// This software is a filter to shrink email by attachment extraction. //
|
|
// //
|
|
// This software is governed by the CeCILL-B license under French law and //
|
|
// abiding by the rules of distribution of free software. You can use, //
|
|
// modify and/or redistribute the software under the terms of the //
|
|
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|
// URL "http://www.cecill.info". //
|
|
// //
|
|
// As a counterpart to the access to the source code and rights to copy, //
|
|
// modify and redistribute granted by the license, users are provided //
|
|
// only with a limited warranty and the software's author, the holder of //
|
|
// the economic rights, and the successive licensors have only limited //
|
|
// liability. //
|
|
// //
|
|
// In this respect, the user's attention is drawn to the risks associated //
|
|
// with loading, using, modifying and/or developing or reproducing the //
|
|
// software by the user in light of its specific status of free software, //
|
|
// that may mean that it is complicated to manipulate, and that also //
|
|
// therefore means that it is reserved for developers and experienced //
|
|
// professionals having in-depth computer knowledge. Users are therefore //
|
|
// encouraged to load and test the software's suitability as regards //
|
|
// their requirements in conditions enabling the security of their //
|
|
// systems and/or data to be ensured and, more generally, to use and //
|
|
// operate it in the same conditions as regards security. //
|
|
// //
|
|
// The fact that you are presently reading this means that you have had //
|
|
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef _kaz_Attachment_hpp
|
|
#define _kaz_Attachment_hpp
|
|
|
|
#include <map>
|
|
#include <regex>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "EmbeddedData.hpp"
|
|
|
|
namespace kaz {
|
|
|
|
using namespace std;
|
|
|
|
// ================================================================================
|
|
/*! e-mail structure */
|
|
class Attachment {
|
|
public:
|
|
/*! tokens indicat candidats to be updated by remove attachment */
|
|
//static const vector<const string> stringsToUpdate;
|
|
static vector<string> stringsToUpdate;
|
|
/*! mime tokens */
|
|
static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, MULTIPART, RELATED, ALTERNATIVE, SIGNED, KAZ_ATTACH_NAME;
|
|
/*! pattern to extract mime values */
|
|
static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx;
|
|
|
|
/*! get uniq filename */
|
|
static string getUnknown (const string &ext = "");
|
|
/*! remove all sections in content given by boundary tags */
|
|
static void removeSection (string &content, const string &beginTag, const string &endTag);
|
|
/*! catenates all sections in content given by boundary tags (use temporary vector) */
|
|
static string getSection (const string &content, const string &beginTag, const string &endTag);
|
|
/*! get all sections in content given by boundary marks and put them in result */
|
|
static void getSection (const string &content, const string &beginTag, const string &endTag, vector<string> &result);
|
|
|
|
/*! return the content-type */
|
|
const string getContentType () const;
|
|
/*! return the filename in mime (or uniq name if missing) */
|
|
const string getAttachName () const;
|
|
/*! return reference to the saved boundary. Empty value if attachment is not a multipart */
|
|
const string &getBoundary () const;
|
|
/*! return the size of the content */
|
|
const streamoff getSize () const;
|
|
/*! get a part of a mime header value */
|
|
const string getProp (const string &token, const regex ®Ex) const;
|
|
|
|
/*! return if base64 encoded */
|
|
const bool isBase64Encoding () const;
|
|
/*! return if quoted-printable encoded */
|
|
const bool isQuotedPrintableEnconding () const;
|
|
/*! return if text (plain or html) and base64 encoded */
|
|
const bool isTextBase64 () const;
|
|
/*! return check if value exists in mime header */
|
|
const bool isDefProp (const string &token, const string &val) const;
|
|
|
|
protected:
|
|
/*! HTML image tag*/
|
|
static const string IMG_BEGIN, IMG_END;
|
|
|
|
/*! Attachment level (0 is main) */
|
|
const int level;
|
|
/*! char position in the mbox of the boundary before this attachment */
|
|
const streamoff beginInParent;
|
|
/*! char position of attachment including mime */
|
|
const streamoff beginPos;
|
|
/*! char position of attachment content */
|
|
streamoff contentPos, endPos;
|
|
/*! properties of the attachment */
|
|
bool toExtract, toUpdate, toDisclaim, isKazAttachment, isSigned;
|
|
/*! id of an image embedded in mbox */
|
|
string cid;
|
|
/*! url to replace the attachment */
|
|
string downloadUrl;
|
|
/*! properties of embedded image (self encoded with base64)*/
|
|
vector<EmbeddedData> embeddedData;
|
|
|
|
/*! mime values of the attachment */
|
|
map<string, string> env;
|
|
/*! boundary if the attachment is a multipart including previous and next "--" */
|
|
string boundary;
|
|
/*! size of boundary before the last "--" */
|
|
streamoff boundaryMiddleSize;
|
|
|
|
/*! sub attachment if the attachment is a multipart */
|
|
vector<Attachment> subAttachements;
|
|
|
|
/*! called during the parse process */
|
|
Attachment (ifstream &mbox, const int &level, const streamoff beginInParent, streamoff &curPos);
|
|
|
|
/*! called one time by the constructor */
|
|
void readMime (ifstream &mbox, streamoff &curPos);
|
|
/*! called one time by the constructor */
|
|
void readBoundaries (ifstream &mbox, streamoff &curPos);
|
|
/*! called for each part during the parse process add add a subAttachement. Return false when found last boundary */
|
|
bool nextBondary (ifstream &mbox, streamoff &curPos);
|
|
|
|
/*! recursively marks alternative attachments to be disclaim */
|
|
void markDisclaim (bool &plainMarked, bool &htmlMarked);
|
|
/*! recursively marks big attachments to be removed and upated (including disclaim). return true when part need to be updated (can't be extracted). */
|
|
bool markSignificant (const string &parentMultiProp, const bool &parentSigned, const streamoff &minAttachSize, ifstream &mbox, vector<Attachment *> &allMarkedPtrs);
|
|
/*! get a copy of mime header */
|
|
string getMime (ifstream &mbox) const;
|
|
/*! get a copy of the content. Base64 is decoded. Quoted-Printable is unwarp and unquoted */
|
|
string getContent (ifstream &mbox) const;
|
|
/*! write the content, encoded if necessary (base64 and quoted-printable) */
|
|
void println (ofstream &outbox, string content) const;
|
|
|
|
/*! replace embedded image */
|
|
void replaceEmbedded (string &content) const;
|
|
|
|
public:
|
|
friend class MainAttachment;
|
|
friend ostream& operator << (ostream& os, const Attachment& attachment);
|
|
};
|
|
|
|
/*! for debug pupose */
|
|
ostream& operator << (ostream& os, const Attachment& attachment);
|
|
|
|
// ================================================================================
|
|
}
|
|
|
|
#endif // _kaz_Attachment_hpp
|
|
|