Attachment mode

This commit is contained in:
2022-11-26 21:55:05 +01:00
parent 9c07023316
commit d6e167b83b
13 changed files with 2898 additions and 86 deletions

View File

@ -61,6 +61,7 @@ const string Attachment::PLAIN ("plain");
const string Attachment::HTML ("html");
const string Attachment::RELATED ("related");
const string Attachment::ALTERNATIVE ("alternative");
const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html");
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
@ -273,6 +274,7 @@ Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginI
toExtract (false),
toUpdate (false),
toDisclaim (false),
isKazAttachment (false),
boundaryMiddleSize (0) {
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level);
readMime (mbox, curPos);
@ -417,17 +419,21 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min
for (Attachment &sub : subAttachements)
cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs);
if (getProp (contentTypeToken, textRegEx) == HTML) {
string content = getContent (mbox);
vector<string> imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs);
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData);
if (embeddedData.size ())
toUpdate = true;
if (KAZ_ATTACH_NAME == getAttachName ()) {
isKazAttachment = true;
} else {
string content = getContent (mbox);
vector<string> imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs);
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData);
if (embeddedData.size ())
toUpdate = true;
}
}
cantBeExtract |= toUpdate;
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract)
cantBeExtract = toExtract = true; // XXX cantBeExtract ?
if (toExtract || toUpdate || toDisclaim)
if (toExtract || toUpdate || toDisclaim || isKazAttachment)
allMarkedPtrs.push_back (this);
return cantBeExtract;
}

View File

@ -42,6 +42,9 @@
#include <unistd.h>
#include <vector>
#include <boost/assign.hpp>
#include <boost/algorithm/string.hpp>
#include "kazDebug.hpp"
#include "kazMisc.hpp"
#include "SizeArg.hpp"
@ -83,6 +86,7 @@ static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
static const string LI_ALL (LI_BEGIN+" class=\"all\">");
static const string LI_END ("</li>");
static const string HREF_ONE ("href=\"");
static const string KAZ_HTML_CONTENT ("<!DOCTYPE html><html lang=\"fr\"><head><meta charset=\"utf-8\"><title>KAZ</title>"+KAZ_CSS+"</head><body>");
static const string BODY_END ("</body>");
static const string HTML_END ("</html>");
@ -98,6 +102,9 @@ static const string KAZ_HTML_ARCHIVE ("archive");
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: base64\n");
static const string KAZ_ATTACHMENT_TEXT_HTML ("Content-Type: text/html; charset=utf-8\n"
"Content-Disposition: attachment; filename=\"" + Attachment::KAZ_ATTACH_NAME + "\"\n"
"Content-Transfer-Encoding: base64\n");
// ================================================================================
@ -121,6 +128,35 @@ const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
// ================================================================================
const string
kaz::attachModeLabels[] = {
"None", "Footer", "Attachment", "Both"
};
const map<string, AttachMode>
kaz::attachModeMap = boost::assign::map_list_of
("none", NONE)
("footer", FOOTER)
("attachment", ATTACHMENT)
("both", BOTH)
;
ostream &
kaz::operator << (ostream &out, const AttachMode &attachMode) {
//BOOST_ASSERT (treeType >= MIN && treeType <= ALPHA);
return out << attachModeLabels [attachMode];
}
istream &
kaz::operator >> (istream &in, AttachMode &attachMode) {
string token;
in >> token;
auto pos = attachModeMap.find (boost::algorithm::to_lower_copy (token));
if (pos == attachModeMap.end ())
in.setstate (ios_base::failbit);
else
attachMode = pos->second;
return in;
}
// ================================================================================
void
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
@ -179,8 +215,8 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
plain += plainNewOneLink;
string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url);
// XXX amp ?
//replaceAll (codedUrl, "&", "&amp;");
// pb &amp;
// replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
html += htmlNewOneLink;
@ -340,7 +376,7 @@ MainAttachment::extractPreviousKAZ (ifstream &mbox) {
DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
string extractedPlainKAZ, extractedHtmlKAZ;
for (const Attachment *attachP : allMarkedPtrs) {
if (!attachP->toUpdate || isBase64Encoding ())
if (!(attachP->toUpdate || attachP->isKazAttachment)) // isKazAttachment => toUpdate
continue;
string textProp = attachP->getProp (contentTypeToken, textRegEx);
if (textProp.empty ())
@ -388,7 +424,8 @@ MainAttachment::removePreviousArchive () {
// ================================================================================
MainAttachment::MainAttachment (ifstream &mbox)
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
forceMainText (false) {
emptyEMail (false),
previousKazAttachment (false) {
DEF_LOG ("MainAttachment::MainAttachment", "");
string line;
for (; getline (mbox, line); )
@ -402,7 +439,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked);
forceMainText = ! (plainMarked || htmlMarked);
emptyEMail = ! (plainMarked || htmlMarked);
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
}
@ -446,7 +483,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
int attachCount (0);
string dirName, mediaName;
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->toExtract)
if (attachP->isKazAttachment || !attachP->toExtract)
continue;
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
++attachCount;
@ -521,15 +558,15 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
// ================================================================================
void
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) {
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize);
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, const AttachMode &attachMode) {
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize << " AttachMode: " << attachMode);
// preparation
extractPreviousKAZ (mbox);
removePreviousArchive ();
map<const string, const string> translateHtml;
for (Attachment *attachP : allMarkedPtrs)
if (attachP->toExtract) {
if (attachP->toExtract && !attachP->isKazAttachment) {
readDownloadUrl (attachP->downloadUrl);
if (attachP->downloadUrl.empty ()) {
LOG ("no change");
@ -555,27 +592,28 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email
streamoff curPos = 0;
if (forceMainText) {
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;
// check no main text
LOG ("Force main text");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
cerr << " #################### coucou " << curPos << endl << endl;
string content (plainDisclaim);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_EMPTY_TEXT_PLAIN << endl
<< content << endl;
outbox.flush ();
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
if (plainDisclaim.size ()) {
if (emptyEMail && (attachMode & FOOTER)) {
// check no main text
LOG ("Force main text");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
string content (plainDisclaim);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_EMPTY_TEXT_PLAIN << endl
<< content << endl;
outbox.flush ();
}
}
for (Attachment *attachP : allMarkedPtrs) {
copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
if (attachP->toExtract) {
LOG ("skip Extracted");
if (attachP->toExtract || attachP->isKazAttachment) {
LOG ("skip Extracted or previous attachments");
} else if (attachP->toUpdate) {
string textProp = attachP->getProp (contentTypeToken, textRegEx);
@ -623,7 +661,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
}
if (isPlain)
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
if (isDisclaimer) {
if (isDisclaimer && (attachMode & FOOTER)) {
if (isHtml) {
for (string endTag : {BODY_END, HTML_END}) {
LOG ("try tag: " << endTag);
@ -646,6 +684,19 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
outbox.flush ();
curPos = attachP->endPos;
}
if (plainDisclaim.size () && (attachMode & ATTACHMENT)) {
LOG ("Add kaz attachment");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't add Kaz attachment M10: : " << *this);
streamoff lastPos = subAttachements.back ().endPos;
copy (mbox, outbox, curPos, lastPos);
curPos = lastPos;
string content (KAZ_HTML_CONTENT+htmlDisclaim+BODY_END+HTML_END);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_ATTACHMENT_TEXT_HTML << endl
<< content << endl;
outbox.flush ();
}
copy (mbox, outbox, curPos, endPos);
outbox.close ();
}

View File

@ -32,7 +32,7 @@
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#define LAST_VERSION "2.2 2022-11-20 eMailShrinker"
#define LAST_VERSION "2.3 2022-11-25 eMailShrinker"
#include <iostream>
#include <fstream>
@ -66,8 +66,8 @@ usage (const string &msg = "", const bool &hidden = false) {
cout << endl
<< "Usage: " << endl
<< " A) " << prog << " -u mbox > url-list" << endl
<< " B) " << prog << " [-s size] [-d dirName}] mbox > file-list" << endl
<< " C) " << prog << " [-s size] mbox altered-mbox < url-list" << endl
<< " B) " << prog << " [-s size] [-d dirName] mbox > file-list" << endl
<< " C) " << prog << " [-s size] [-m {Footer|Attachment|Both}] mbox altered-mbox < url-list" << endl
<< endl << " filter attachments" << endl << endl
<< " A: list previous embded url need to be refresh (no added option)" << endl
<< " => downloadURL list" << endl
@ -108,15 +108,16 @@ main (int argc, char** argv) {
DEF_LOG ("main:", "");
prog = argv [0];
bool
debugFlag (false),
helpFlag (false),
versionFlag (false),
updateListFlag (false),
useTheForceLuke (false),
listFlag (false);
listFlag (false),
debugFlag (false);
string inputName, outputName;
bfs::path extractDir (bfs::temp_directory_path ());
SizeArg minAttachSize ("48 Ki");
AttachMode attachMode (FOOTER);
try {
mainDescription.add_options ()
@ -125,6 +126,7 @@ main (int argc, char** argv) {
("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration")
("updateList,u", bool_switch (&updateListFlag), "list URL need refresh")
("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction")
("mode,m", boost::program_options::value<AttachMode> (&attachMode)->default_value (attachMode), "set attachment mode")
;
hide.add_options ()
@ -217,13 +219,17 @@ main (int argc, char** argv) {
showTime ("Extraction");
return 0;
}
// case substitute
if (attachMode == NONE) {
cerr << endl << prog << ": attachMode can't be NONE (forced FOOTER mode)" << endl;
attachMode = FOOTER;
}
mbox.open (inputName);
ofstream outbox (outputName);
attachment.substitute (mbox, outbox, minAttachSize);
attachment.substitute (mbox, outbox, minAttachSize, attachMode);
showTime ("Substitution");
return 0;
}
// ================================================================================
// ================================================================================

View File

@ -50,7 +50,7 @@ using namespace kaz;
static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78);
const char *const kaz::base64Chars =
const char *const kaz::base64Chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"