Attachment mode
This commit is contained in:
@ -61,6 +61,7 @@ const string Attachment::PLAIN ("plain");
|
||||
const string Attachment::HTML ("html");
|
||||
const string Attachment::RELATED ("related");
|
||||
const string Attachment::ALTERNATIVE ("alternative");
|
||||
const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html");
|
||||
|
||||
|
||||
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
|
||||
@ -273,6 +274,7 @@ Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginI
|
||||
toExtract (false),
|
||||
toUpdate (false),
|
||||
toDisclaim (false),
|
||||
isKazAttachment (false),
|
||||
boundaryMiddleSize (0) {
|
||||
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level);
|
||||
readMime (mbox, curPos);
|
||||
@ -417,17 +419,21 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min
|
||||
for (Attachment &sub : subAttachements)
|
||||
cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs);
|
||||
if (getProp (contentTypeToken, textRegEx) == HTML) {
|
||||
string content = getContent (mbox);
|
||||
vector<string> imgs;
|
||||
getSection (content, IMG_BEGIN, IMG_END, imgs);
|
||||
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData);
|
||||
if (embeddedData.size ())
|
||||
toUpdate = true;
|
||||
if (KAZ_ATTACH_NAME == getAttachName ()) {
|
||||
isKazAttachment = true;
|
||||
} else {
|
||||
string content = getContent (mbox);
|
||||
vector<string> imgs;
|
||||
getSection (content, IMG_BEGIN, IMG_END, imgs);
|
||||
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData);
|
||||
if (embeddedData.size ())
|
||||
toUpdate = true;
|
||||
}
|
||||
}
|
||||
cantBeExtract |= toUpdate;
|
||||
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract)
|
||||
cantBeExtract = toExtract = true; // XXX cantBeExtract ?
|
||||
if (toExtract || toUpdate || toDisclaim)
|
||||
if (toExtract || toUpdate || toDisclaim || isKazAttachment)
|
||||
allMarkedPtrs.push_back (this);
|
||||
return cantBeExtract;
|
||||
}
|
||||
|
@ -42,6 +42,9 @@
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/assign.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "kazDebug.hpp"
|
||||
#include "kazMisc.hpp"
|
||||
#include "SizeArg.hpp"
|
||||
@ -83,6 +86,7 @@ static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
|
||||
static const string LI_ALL (LI_BEGIN+" class=\"all\">");
|
||||
static const string LI_END ("</li>");
|
||||
static const string HREF_ONE ("href=\"");
|
||||
static const string KAZ_HTML_CONTENT ("<!DOCTYPE html><html lang=\"fr\"><head><meta charset=\"utf-8\"><title>KAZ</title>"+KAZ_CSS+"</head><body>");
|
||||
static const string BODY_END ("</body>");
|
||||
static const string HTML_END ("</html>");
|
||||
|
||||
@ -98,6 +102,9 @@ static const string KAZ_HTML_ARCHIVE ("archive");
|
||||
|
||||
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: base64\n");
|
||||
static const string KAZ_ATTACHMENT_TEXT_HTML ("Content-Type: text/html; charset=utf-8\n"
|
||||
"Content-Disposition: attachment; filename=\"" + Attachment::KAZ_ATTACH_NAME + "\"\n"
|
||||
"Content-Transfer-Encoding: base64\n");
|
||||
|
||||
|
||||
// ================================================================================
|
||||
@ -121,6 +128,35 @@ const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
|
||||
|
||||
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
|
||||
|
||||
// ================================================================================
|
||||
const string
|
||||
kaz::attachModeLabels[] = {
|
||||
"None", "Footer", "Attachment", "Both"
|
||||
};
|
||||
const map<string, AttachMode>
|
||||
kaz::attachModeMap = boost::assign::map_list_of
|
||||
("none", NONE)
|
||||
("footer", FOOTER)
|
||||
("attachment", ATTACHMENT)
|
||||
("both", BOTH)
|
||||
;
|
||||
ostream &
|
||||
kaz::operator << (ostream &out, const AttachMode &attachMode) {
|
||||
//BOOST_ASSERT (treeType >= MIN && treeType <= ALPHA);
|
||||
return out << attachModeLabels [attachMode];
|
||||
}
|
||||
istream &
|
||||
kaz::operator >> (istream &in, AttachMode &attachMode) {
|
||||
string token;
|
||||
in >> token;
|
||||
auto pos = attachModeMap.find (boost::algorithm::to_lower_copy (token));
|
||||
if (pos == attachModeMap.end ())
|
||||
in.setstate (ios_base::failbit);
|
||||
else
|
||||
attachMode = pos->second;
|
||||
return in;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
void
|
||||
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
|
||||
@ -179,8 +215,8 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
|
||||
plain += plainNewOneLink;
|
||||
string htmlNewOneLink (templateHtmlAddLink);
|
||||
string codedUrl (url);
|
||||
// XXX amp ?
|
||||
//replaceAll (codedUrl, "&", "&");
|
||||
// pb &
|
||||
// replaceAll (codedUrl, "&", "&");
|
||||
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
|
||||
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
|
||||
html += htmlNewOneLink;
|
||||
@ -340,7 +376,7 @@ MainAttachment::extractPreviousKAZ (ifstream &mbox) {
|
||||
DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
|
||||
string extractedPlainKAZ, extractedHtmlKAZ;
|
||||
for (const Attachment *attachP : allMarkedPtrs) {
|
||||
if (!attachP->toUpdate || isBase64Encoding ())
|
||||
if (!(attachP->toUpdate || attachP->isKazAttachment)) // isKazAttachment => toUpdate
|
||||
continue;
|
||||
string textProp = attachP->getProp (contentTypeToken, textRegEx);
|
||||
if (textProp.empty ())
|
||||
@ -388,7 +424,8 @@ MainAttachment::removePreviousArchive () {
|
||||
// ================================================================================
|
||||
MainAttachment::MainAttachment (ifstream &mbox)
|
||||
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
|
||||
forceMainText (false) {
|
||||
emptyEMail (false),
|
||||
previousKazAttachment (false) {
|
||||
DEF_LOG ("MainAttachment::MainAttachment", "");
|
||||
string line;
|
||||
for (; getline (mbox, line); )
|
||||
@ -402,7 +439,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
|
||||
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
|
||||
bool plainMarked (false), htmlMarked (false);
|
||||
markDisclaim (plainMarked, htmlMarked);
|
||||
forceMainText = ! (plainMarked || htmlMarked);
|
||||
emptyEMail = ! (plainMarked || htmlMarked);
|
||||
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
|
||||
}
|
||||
|
||||
@ -446,7 +483,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
|
||||
int attachCount (0);
|
||||
string dirName, mediaName;
|
||||
for (Attachment *attachP : allMarkedPtrs) {
|
||||
if (!attachP->toExtract)
|
||||
if (attachP->isKazAttachment || !attachP->toExtract)
|
||||
continue;
|
||||
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
|
||||
++attachCount;
|
||||
@ -521,15 +558,15 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
|
||||
|
||||
// ================================================================================
|
||||
void
|
||||
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) {
|
||||
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize);
|
||||
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, const AttachMode &attachMode) {
|
||||
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize << " AttachMode: " << attachMode);
|
||||
|
||||
// preparation
|
||||
extractPreviousKAZ (mbox);
|
||||
removePreviousArchive ();
|
||||
map<const string, const string> translateHtml;
|
||||
for (Attachment *attachP : allMarkedPtrs)
|
||||
if (attachP->toExtract) {
|
||||
if (attachP->toExtract && !attachP->isKazAttachment) {
|
||||
readDownloadUrl (attachP->downloadUrl);
|
||||
if (attachP->downloadUrl.empty ()) {
|
||||
LOG ("no change");
|
||||
@ -555,27 +592,28 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
getDisclaim (plainDisclaim, htmlDisclaim);
|
||||
// copy email
|
||||
streamoff curPos = 0;
|
||||
if (forceMainText) {
|
||||
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;
|
||||
// check no main text
|
||||
LOG ("Force main text");
|
||||
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
|
||||
copy (mbox, outbox, curPos, contentPos);
|
||||
curPos = contentPos;
|
||||
cerr << " #################### coucou " << curPos << endl << endl;
|
||||
string content (plainDisclaim);
|
||||
base64Encode (content);
|
||||
outbox << boundary.substr (0, boundary.length () -2) << endl
|
||||
<< KAZ_EMPTY_TEXT_PLAIN << endl
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
copy (mbox, outbox, curPos, contentPos);
|
||||
curPos = contentPos;
|
||||
|
||||
if (plainDisclaim.size ()) {
|
||||
if (emptyEMail && (attachMode & FOOTER)) {
|
||||
// check no main text
|
||||
LOG ("Force main text");
|
||||
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
|
||||
string content (plainDisclaim);
|
||||
base64Encode (content);
|
||||
outbox << boundary.substr (0, boundary.length () -2) << endl
|
||||
<< KAZ_EMPTY_TEXT_PLAIN << endl
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
}
|
||||
}
|
||||
for (Attachment *attachP : allMarkedPtrs) {
|
||||
copy (mbox, outbox, curPos, attachP->beginInParent);
|
||||
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
|
||||
|
||||
if (attachP->toExtract) {
|
||||
LOG ("skip Extracted");
|
||||
if (attachP->toExtract || attachP->isKazAttachment) {
|
||||
LOG ("skip Extracted or previous attachments");
|
||||
|
||||
} else if (attachP->toUpdate) {
|
||||
string textProp = attachP->getProp (contentTypeToken, textRegEx);
|
||||
@ -623,7 +661,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
}
|
||||
if (isPlain)
|
||||
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
||||
if (isDisclaimer) {
|
||||
if (isDisclaimer && (attachMode & FOOTER)) {
|
||||
if (isHtml) {
|
||||
for (string endTag : {BODY_END, HTML_END}) {
|
||||
LOG ("try tag: " << endTag);
|
||||
@ -646,6 +684,19 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
outbox.flush ();
|
||||
curPos = attachP->endPos;
|
||||
}
|
||||
if (plainDisclaim.size () && (attachMode & ATTACHMENT)) {
|
||||
LOG ("Add kaz attachment");
|
||||
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't add Kaz attachment M10: : " << *this);
|
||||
streamoff lastPos = subAttachements.back ().endPos;
|
||||
copy (mbox, outbox, curPos, lastPos);
|
||||
curPos = lastPos;
|
||||
string content (KAZ_HTML_CONTENT+htmlDisclaim+BODY_END+HTML_END);
|
||||
base64Encode (content);
|
||||
outbox << boundary.substr (0, boundary.length () -2) << endl
|
||||
<< KAZ_ATTACHMENT_TEXT_HTML << endl
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
}
|
||||
copy (mbox, outbox, curPos, endPos);
|
||||
outbox.close ();
|
||||
}
|
||||
|
@ -32,7 +32,7 @@
|
||||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define LAST_VERSION "2.2 2022-11-20 eMailShrinker"
|
||||
#define LAST_VERSION "2.3 2022-11-25 eMailShrinker"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
@ -66,8 +66,8 @@ usage (const string &msg = "", const bool &hidden = false) {
|
||||
cout << endl
|
||||
<< "Usage: " << endl
|
||||
<< " A) " << prog << " -u mbox > url-list" << endl
|
||||
<< " B) " << prog << " [-s size] [-d dirName}] mbox > file-list" << endl
|
||||
<< " C) " << prog << " [-s size] mbox altered-mbox < url-list" << endl
|
||||
<< " B) " << prog << " [-s size] [-d dirName] mbox > file-list" << endl
|
||||
<< " C) " << prog << " [-s size] [-m {Footer|Attachment|Both}] mbox altered-mbox < url-list" << endl
|
||||
<< endl << " filter attachments" << endl << endl
|
||||
<< " A: list previous embded url need to be refresh (no added option)" << endl
|
||||
<< " => downloadURL list" << endl
|
||||
@ -108,15 +108,16 @@ main (int argc, char** argv) {
|
||||
DEF_LOG ("main:", "");
|
||||
prog = argv [0];
|
||||
bool
|
||||
debugFlag (false),
|
||||
helpFlag (false),
|
||||
versionFlag (false),
|
||||
updateListFlag (false),
|
||||
useTheForceLuke (false),
|
||||
listFlag (false);
|
||||
listFlag (false),
|
||||
debugFlag (false);
|
||||
string inputName, outputName;
|
||||
bfs::path extractDir (bfs::temp_directory_path ());
|
||||
SizeArg minAttachSize ("48 Ki");
|
||||
AttachMode attachMode (FOOTER);
|
||||
|
||||
try {
|
||||
mainDescription.add_options ()
|
||||
@ -125,6 +126,7 @@ main (int argc, char** argv) {
|
||||
("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration")
|
||||
("updateList,u", bool_switch (&updateListFlag), "list URL need refresh")
|
||||
("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction")
|
||||
("mode,m", boost::program_options::value<AttachMode> (&attachMode)->default_value (attachMode), "set attachment mode")
|
||||
;
|
||||
|
||||
hide.add_options ()
|
||||
@ -217,13 +219,17 @@ main (int argc, char** argv) {
|
||||
showTime ("Extraction");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// case substitute
|
||||
if (attachMode == NONE) {
|
||||
cerr << endl << prog << ": attachMode can't be NONE (forced FOOTER mode)" << endl;
|
||||
attachMode = FOOTER;
|
||||
}
|
||||
mbox.open (inputName);
|
||||
ofstream outbox (outputName);
|
||||
attachment.substitute (mbox, outbox, minAttachSize);
|
||||
attachment.substitute (mbox, outbox, minAttachSize, attachMode);
|
||||
showTime ("Substitution");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
// ================================================================================
|
||||
|
@ -50,7 +50,7 @@ using namespace kaz;
|
||||
|
||||
static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78);
|
||||
|
||||
const char *const kaz::base64Chars =
|
||||
const char *const kaz::base64Chars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789"
|
||||
|
Reference in New Issue
Block a user