fix boundary

This commit is contained in:
François 2022-11-20 14:55:44 +01:00
parent 6def55c5a7
commit 29480b299c
3 changed files with 90 additions and 54 deletions

View File

@ -65,7 +65,14 @@ const string Attachment::ALTERNATIVE ("alternative");
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
const regex Attachment::nameRegEx (".*name=\"([^\"]*)\".*");
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\" ]*)\"?.*");
// boundary="----=_Part_796779_1154936629.1668080348646"
// boundary="------------040709000505010508040808"
// boundary="----------=_1668606031-941125-91"
// boundary="_004_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_";
// boundary="_000_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_"
// boundary=--boundary_1351_64006126-2b0e-4a3b-98ac-4797d1634188
// boundary=--boundary_1352_7e294c9a-cfab-44a0-bfb3-7310380ac7cb;
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\"; ]*)\"?;?.*");
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*");
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*");
const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*");
@ -354,7 +361,11 @@ Attachment::nextBondary (ifstream &mbox, streamoff &curPos) {
++it)
if (couple.find (*it) != string::npos) {
LOG ("find: "+ *it);
subAttachements.back ().toUpdate = true;
LOG ("size: " << subAttachements.size ());
if (subAttachements.size ())
subAttachements.back ().toUpdate = true;
else
LOG_BUG (true, continue, "eMailShrinker: bug A10: boundary format ? " << *this);
}
prev = clearLine;
continue;
@ -489,7 +500,7 @@ kaz::operator << (ostream& os, const Attachment& attachment) {
if (prop.length ())
prop = " ["+prop+"]";
os << setw ((attachment.level % 20)*2) << "" << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType ()
os << ("****************************************"+40-(attachment.level % 20)*2) << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType ()
<< prop << (attachment.cid.length () ? " id: "+attachment.cid : "")
<< (attachment.boundary.length () ? " boundary: "+attachment.boundary : "")
<< " (" << attachment.beginPos << " / " << attachment.contentPos << " / " << attachment.endPos << ") " << endl;

View File

@ -51,68 +51,75 @@
using namespace std;
using namespace kaz;
static const string KAZ_WEB_SITE = "https://kaz.bzh/";
static const string TMPL_DOWNLOAD = "{{DOWNLOAD}}";
static const string TMPL_FILENAME = "{{FILENAME}}";
static const string CID = "cid:";
static const string KAZ_WEB_SITE ("https://kaz.bzh/");
static const string TMPL_DOWNLOAD ("{{DOWNLOAD}}");
static const string TMPL_FILENAME ("{{FILENAME}}");
static const string CID ("cid:");
// "l=/" => v1 compatibility
static const regex archiveURLSignature (".*(([&?]g=)|([&?]l=/)).*");
static const string KAZ_PLAIN_HR = "______________________________________________________________________________";
static const string KAZ_PLAIN_START = "~~ PJ-KAZ !"; // don't end whith space
static const string KAZ_PLAIN_STOP = KAZ_PLAIN_START+" ~~";
static const string KAZ_PLAIN_DONT_TOUCH = "(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)";
static const string KAZ_PLAIN_WARNING = "Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. Elles seront automatiquement supprimées dans 1 mois. Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. Pour mieux comprendre la politique de nos services visitez kaz.bzh";
static const string KAZ_PLAIN_DOWLOAD_ONE = "Vos pièces jointes sont à télécharger individuellement ici :";
static const string KAZ_PLAIN_DOWLOAD_OTHER = "(Contenu dans des messages précédents)";
static const string KAZ_PLAIN_DOWLOAD_ALL = "Vous pouvez télécharger l'ensemble dans une archive là :";
static const string KAZ_PLAIN_HR ("______________________________________________________________________________");
static const string KAZ_PLAIN_START ("~~ PJ-KAZ !"); // don't end whith space
static const string KAZ_PLAIN_STOP (KAZ_PLAIN_START+" ~~");
static const string KAZ_PLAIN_DONT_TOUCH ("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)");
static const string KAZ_PLAIN_WARNING ("Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. "
"Elles seront automatiquement supprimées dans 1 mois. "
"Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. "
"Pour mieux comprendre la politique de nos services visitez kaz.bzh");
static const string KAZ_PLAIN_DOWLOAD_ONE ("Vos pièces jointes sont à télécharger individuellement ici :");
static const string KAZ_PLAIN_DOWLOAD_OTHER ("(Contenu dans des messages précédents)");
static const string KAZ_PLAIN_DOWLOAD_ALL ("Vous pouvez télécharger l'ensemble dans une archive là :");
static const string HEAD = "<head>";
static const string HEAD_END = "</head>";
static const string KAZ_CSS_URL = "https://kaz.bzh/m/email.css";
static const string KAZ_CSS = "<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>";
static const string A_END = "</a>";
static const string LI_BEGIN = "<li";
static const string CLASS_ONE = "class=\"one\"";
static const string LI_ONE = LI_BEGIN+" "+CLASS_ONE+">";
static const string LI_ALL = LI_BEGIN+" class=\"all\">";
static const string LI_END = "</li>";
static const string HREF_ONE = "href=\"";
static const string BODY_END = "</body>";
static const string HTML_END = "</html>";
static const string HEAD ("<head>");
static const string HEAD_END ("</head>");
static const string KAZ_CSS_URL ("https://kaz.bzh/m/email.css");
static const string KAZ_CSS ("<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>");
static const string A_END ("</a>");
static const string LI_BEGIN ("<li");
static const string CLASS_ONE ("class=\"one\"");
static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
static const string LI_ALL (LI_BEGIN+" class=\"all\">");
static const string LI_END ("</li>");
static const string HREF_ONE ("href=\"");
static const string BODY_END ("</body>");
static const string HTML_END ("</html>");
static const string KAZ_HTML_TAG = "<!--KAZ"; // don't end whith space
static const string KAZ_HTML_START = KAZ_HTML_TAG+" START-->";
static const string KAZ_HTML_STOP = KAZ_HTML_TAG+" STOP-->";
static const string KAZ_HTML_TAG ("<!--KAZ"); // don't end whith space
static const string KAZ_HTML_START (KAZ_HTML_TAG+" START-->");
static const string KAZ_HTML_STOP (KAZ_HTML_TAG+" STOP-->");
// Textes précédents encodés en SGML
static const string KAZ_HTML_DONT_TOUCH = "(conservez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ONE = "Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :";
static const string KAZ_HTML_DOWLOAD_OTHER = "(Contenu dans des messages pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ALL = "Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :";
static const string KAZ_HTML_ARCHIVE = "archive";
static const string KAZ_HTML_DONT_TOUCH ("(conservez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)");
static const string KAZ_HTML_DOWLOAD_ONE ("Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :");
static const string KAZ_HTML_DOWLOAD_OTHER ("(Contenu dans des messages pr&eacute;c&eacute;dents)");
static const string KAZ_HTML_DOWLOAD_ALL ("Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :");
static const string KAZ_HTML_ARCHIVE ("archive");
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: base64\n");
// ================================================================================
vector <string>
Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});
// ================================================================================
const string MainAttachment::templatePlainAddLink (" * "+TMPL_FILENAME+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templatePlainAllLink ("\r\n * "+KAZ_PLAIN_DOWLOAD_ALL+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templatePlainAddLink (" * "+TMPL_FILENAME+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templatePlainAllLink ("\r\n * "+KAZ_PLAIN_DOWLOAD_ALL+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
"<div class=\"kaz\">"
"<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"
"<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");
const string MainAttachment::templateHtmlAddLink (LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlOtherLink ("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");
const string MainAttachment::templateHtmlAllLink ("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
"<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"
"<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n");
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
"<div class=\"kaz\">"
"<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"
"<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");
const string MainAttachment::templateHtmlAddLink (LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlOtherLink ("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");
const string MainAttachment::templateHtmlAllLink ("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
"<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"
"<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n");
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
// ================================================================================
void
@ -173,7 +180,7 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url);
// XXX amp ?
replaceAll (codedUrl, "&", "&amp;");
//replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
html += htmlNewOneLink;
@ -228,7 +235,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const {
replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);
html += allHtmlLinks;
}
html += templateHtmlFooter;
html += templateHtmlFooter+"\r\n";
plain += "\r\n\r\n"+KAZ_WEB_SITE+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_STOP+"\r\n";
// & => &amp; done
LOG ("plain: " << plain);
@ -380,7 +387,8 @@ MainAttachment::removePreviousArchive () {
// ================================================================================
MainAttachment::MainAttachment (ifstream &mbox)
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()) {
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
forceMainText (false) {
DEF_LOG ("MainAttachment::MainAttachment", "");
string line;
for (; getline (mbox, line); )
@ -394,6 +402,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked);
forceMainText = ! (plainMarked || htmlMarked);
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
}
@ -529,7 +538,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
}
if (attachP->cid.length ()) {
string tmp (attachP->downloadUrl);
replaceAll (tmp, "&", "&amp;");
//replaceAll (tmp, "&", "&amp;");
translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));
}
}
@ -546,9 +555,23 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email
streamoff curPos = 0;
if (forceMainText) {
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;
// check no main text
LOG ("Force main text");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
cerr << " #################### coucou " << curPos << endl << endl;
string content (plainDisclaim);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_EMPTY_TEXT_PLAIN << endl
<< content << endl;
outbox.flush ();
}
for (Attachment *attachP : allMarkedPtrs) {
copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
if (attachP->toExtract) {

View File

@ -83,6 +83,8 @@ namespace kaz {
bfs::path extractDir;
/*! URL for download archives */
string archiveDownloadURL;
/*! no main text in email can be use to add disclaim */
bool forceMainText;
/*! subset in the tree of all attachments to be consider for extraction or modification */
vector<Attachment *> allMarkedPtrs;