125 lines
5.6 KiB
C++
125 lines
5.6 KiB
C++
////////////////////////////////////////////////////////////////////////////
|
|
// Copyright KAZ 2021 //
|
|
// //
|
|
// contact (at) kaz.bzh //
|
|
// //
|
|
// This software is a filter to shrink email by attachment extraction. //
|
|
// //
|
|
// This software is governed by the CeCILL-B license under French law and //
|
|
// abiding by the rules of distribution of free software. You can use, //
|
|
// modify and/or redistribute the software under the terms of the //
|
|
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|
// URL "http://www.cecill.info". //
|
|
// //
|
|
// As a counterpart to the access to the source code and rights to copy, //
|
|
// modify and redistribute granted by the license, users are provided //
|
|
// only with a limited warranty and the software's author, the holder of //
|
|
// the economic rights, and the successive licensors have only limited //
|
|
// liability. //
|
|
// //
|
|
// In this respect, the user's attention is drawn to the risks associated //
|
|
// with loading, using, modifying and/or developing or reproducing the //
|
|
// software by the user in light of its specific status of free software, //
|
|
// that may mean that it is complicated to manipulate, and that also //
|
|
// therefore means that it is reserved for developers and experienced //
|
|
// professionals having in-depth computer knowledge. Users are therefore //
|
|
// encouraged to load and test the software's suitability as regards //
|
|
// their requirements in conditions enabling the security of their //
|
|
// systems and/or data to be ensured and, more generally, to use and //
|
|
// operate it in the same conditions as regards security. //
|
|
// //
|
|
// The fact that you are presently reading this means that you have had //
|
|
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef _kaz_MainAttachment_hpp
|
|
#define _kaz_MainAttachment_hpp
|
|
|
|
#include <boost/filesystem.hpp>
|
|
#include "Attachment.hpp"
|
|
|
|
namespace kaz {
|
|
|
|
using namespace std;
|
|
namespace bfs = boost::filesystem;
|
|
|
|
// ================================================================================
|
|
/*! root level of e-mail structure */
|
|
class MainAttachment : public Attachment {
|
|
public:
|
|
/*! text to add in disclaim */
|
|
static const string templatePlainAddLink, templatePlainAllLink, templateHtmlHeader, templateHtmlAddLink, templateHtmlOtherLink, templateHtmlAllLink, templateHtmlFooter;
|
|
|
|
/*! white space to split a text */
|
|
static const regex whiteSpaceRegEx;
|
|
|
|
/*! copy a slice of mbox to stdout */
|
|
static void copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end);
|
|
|
|
/*! get url from stdin */
|
|
void readDownloadUrl (string &url);
|
|
|
|
/*! get archive url from stdin */
|
|
void readArchiveUrl ();
|
|
|
|
/*! location of extracted files */
|
|
void setExtractDir (const bfs::path &extractDir);
|
|
/*! add a single link in disclaim */
|
|
void addLink (string &plain, string &html, const string &url, const string &name) const;
|
|
/*! get disclaim according alls links (retreived or create) */
|
|
void getDisclaim (string &plain, string &html) const;
|
|
|
|
private:
|
|
/*! for boot strap the attachment constructor */
|
|
streamoff &initTmpPos () { return tmpPos = 0; }
|
|
/*! for boot strap the attachment constructor */
|
|
int &initTmpLevel () { return tmpLevel = 0; }
|
|
|
|
/*! volatile values*/
|
|
streamoff tmpPos;
|
|
int tmpLevel;
|
|
|
|
/*! dir path for extraction */
|
|
bfs::path extractDir;
|
|
/*! URL for download archives */
|
|
string archiveDownloadURL;
|
|
/*! no main text in email can be use to add disclaim */
|
|
bool forceMainText;
|
|
|
|
/*! subset in the tree of all attachments to be consider for extraction or modification */
|
|
vector<Attachment *> allMarkedPtrs;
|
|
/*! previous links find in mbox */
|
|
map<string, string> previousLinks;
|
|
/*! add link only if no significant value already exist. Trust the values from html.*/
|
|
void addPrevious (const string &href, const string &name, const bool &trust = false);
|
|
|
|
/*! extract previous links from plain text. Used by extractPreviousKAZ */
|
|
void extractLinks (const string &extractedPlainKAZ);
|
|
/*! extract previous links from html-li list. Used by extractPreviousKAZ */
|
|
void extractLinks (const vector<string> &liOne);
|
|
/*! extract previous links in mbox. Used by getUpdatedURL and substitute */
|
|
void extractPreviousKAZ (ifstream &mbox);
|
|
/*! remove previous links to archive. Used by substitute */
|
|
void removePreviousArchive ();
|
|
|
|
public:
|
|
/*! the main attachment in mbox */
|
|
MainAttachment (ifstream &mbox);
|
|
|
|
/*! mark disclaim, update and extract attachments. Must be call before: getUpdatedURL, extract or substitute */
|
|
void markSignificant (const streamoff &minAttachSize, ifstream &mbox);
|
|
/*! write to stdout le list of previous links in mbox */
|
|
void getUpdatedURL (ifstream &mbox);
|
|
/*! create record for extraction */
|
|
void newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const;
|
|
/*! extract big attachments in mbox to extractDir and write to stdout le dirname of each extraction */
|
|
void extract (ifstream &mbox, const SizeArg &minSize) const;
|
|
/*! substitute big attachments by the url give in stdin */
|
|
void substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize);
|
|
};
|
|
|
|
// ================================================================================
|
|
}
|
|
|
|
#endif // _kaz_MainAttachment_hpp
|