654 lines
26 KiB
C++
654 lines
26 KiB
C++
////////////////////////////////////////////////////////////////////////////
|
|
// Copyright KAZ 2021 //
|
|
// //
|
|
// contact (at) kaz.bzh //
|
|
// //
|
|
// This software is a filter to shrink email by attachment extraction. //
|
|
// //
|
|
// This software is governed by the CeCILL-B license under French law and //
|
|
// abiding by the rules of distribution of free software. You can use, //
|
|
// modify and/or redistribute the software under the terms of the //
|
|
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|
// URL "http://www.cecill.info". //
|
|
// //
|
|
// As a counterpart to the access to the source code and rights to copy, //
|
|
// modify and redistribute granted by the license, users are provided //
|
|
// only with a limited warranty and the software's author, the holder of //
|
|
// the economic rights, and the successive licensors have only limited //
|
|
// liability. //
|
|
// //
|
|
// In this respect, the user's attention is drawn to the risks associated //
|
|
// with loading, using, modifying and/or developing or reproducing the //
|
|
// software by the user in light of its specific status of free software, //
|
|
// that may mean that it is complicated to manipulate, and that also //
|
|
// therefore means that it is reserved for developers and experienced //
|
|
// professionals having in-depth computer knowledge. Users are therefore //
|
|
// encouraged to load and test the software's suitability as regards //
|
|
// their requirements in conditions enabling the security of their //
|
|
// systems and/or data to be ensured and, more generally, to use and //
|
|
// operate it in the same conditions as regards security. //
|
|
// //
|
|
// The fact that you are presently reading this means that you have had //
|
|
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <algorithm>
|
|
#include <fstream>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <math.h>
|
|
#include <regex>
|
|
#include <set>
|
|
#include <unistd.h>
|
|
#include <vector>
|
|
|
|
#include "kazDebug.hpp"
|
|
#include "kazMisc.hpp"
|
|
#include "SizeArg.hpp"
|
|
#include "Attachment.hpp"
|
|
#include "MainAttachment.hpp"
|
|
|
|
using namespace std;
|
|
using namespace kaz;
|
|
|
|
static const string KAZ_WEB_SITE ("https://kaz.bzh/");
|
|
static const string TMPL_DOWNLOAD ("{{DOWNLOAD}}");
|
|
static const string TMPL_FILENAME ("{{FILENAME}}");
|
|
static const string CID ("cid:");
|
|
|
|
// "l=/" => v1 compatibility
|
|
static const regex archiveURLSignature (".*(([&?]g=)|([&?]l=/)).*");
|
|
|
|
|
|
static const string KAZ_PLAIN_HR ("______________________________________________________________________________");
|
|
static const string KAZ_PLAIN_START ("~~ PJ-KAZ !"); // don't end whith space
|
|
static const string KAZ_PLAIN_STOP (KAZ_PLAIN_START+" ~~");
|
|
static const string KAZ_PLAIN_DONT_TOUCH ("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)");
|
|
static const string KAZ_PLAIN_WARNING ("Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. "
|
|
"Elles seront automatiquement supprimées dans 1 mois. "
|
|
"Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. "
|
|
"Pour mieux comprendre la politique de nos services visitez kaz.bzh");
|
|
static const string KAZ_PLAIN_DOWLOAD_ONE ("Vos pièces jointes sont à télécharger individuellement ici :");
|
|
static const string KAZ_PLAIN_DOWLOAD_OTHER ("(Contenu dans des messages précédents)");
|
|
static const string KAZ_PLAIN_DOWLOAD_ALL ("Vous pouvez télécharger l'ensemble dans une archive là :");
|
|
|
|
static const string HEAD ("<head>");
|
|
static const string HEAD_END ("</head>");
|
|
static const string KAZ_CSS_URL ("https://kaz.bzh/m/email.css");
|
|
static const string KAZ_CSS ("<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>");
|
|
static const string A_END ("</a>");
|
|
static const string LI_BEGIN ("<li");
|
|
static const string CLASS_ONE ("class=\"one\"");
|
|
static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
|
|
static const string LI_ALL (LI_BEGIN+" class=\"all\">");
|
|
static const string LI_END ("</li>");
|
|
static const string HREF_ONE ("href=\"");
|
|
static const string BODY_END ("</body>");
|
|
static const string HTML_END ("</html>");
|
|
|
|
static const string KAZ_HTML_TAG ("<!--KAZ"); // don't end whith space
|
|
static const string KAZ_HTML_START (KAZ_HTML_TAG+" START-->");
|
|
static const string KAZ_HTML_STOP (KAZ_HTML_TAG+" STOP-->");
|
|
// Textes précédents encodés en SGML
|
|
static const string KAZ_HTML_DONT_TOUCH ("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)");
|
|
static const string KAZ_HTML_DOWLOAD_ONE ("Vos pièces jointes sont à télécharger individuellement ici :");
|
|
static const string KAZ_HTML_DOWLOAD_OTHER ("(Contenu dans des messages précédents)");
|
|
static const string KAZ_HTML_DOWLOAD_ALL ("Vous pouvez télécharger l'ensemble dans une archive là :");
|
|
static const string KAZ_HTML_ARCHIVE ("archive");
|
|
|
|
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
|
|
"Content-Transfer-Encoding: base64\n");
|
|
|
|
|
|
// ================================================================================
|
|
vector <string>
|
|
Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});
|
|
|
|
// ================================================================================
|
|
const string MainAttachment::templatePlainAddLink (" * "+TMPL_FILENAME+" <"+TMPL_DOWNLOAD+">\r\n");
|
|
const string MainAttachment::templatePlainAllLink ("\r\n * "+KAZ_PLAIN_DOWLOAD_ALL+" <"+TMPL_DOWNLOAD+">\r\n");
|
|
|
|
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
|
|
"<div class=\"kaz\">"
|
|
"<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"
|
|
"<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");
|
|
const string MainAttachment::templateHtmlAddLink (LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");
|
|
const string MainAttachment::templateHtmlOtherLink ("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");
|
|
const string MainAttachment::templateHtmlAllLink ("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");
|
|
const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
|
|
"<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"
|
|
"<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n");
|
|
|
|
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
|
|
DEF_LOG ("MainAttachment::copy", "begin: " << begin << " end: " << end);
|
|
mbox.seekg (begin, ios::beg);
|
|
char c;
|
|
for (streamoff pos (begin); pos < end; ++pos) {
|
|
mbox.get (c);
|
|
outbox.put (c);
|
|
}
|
|
outbox.flush ();
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::readArchiveUrl () {
|
|
DEF_LOG ("MainAttachment::readArchiveUrl", "");
|
|
archiveDownloadURL.clear ();
|
|
string line;
|
|
getline (cin, line);
|
|
LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug ZZ: no archive link. (line: " << line << ")");
|
|
LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug ZZ: bad archive link. (line: " << line << ")");
|
|
if (line.rfind ("arch: none", 0) == 0)
|
|
return;
|
|
archiveDownloadURL = line.substr (6);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::readDownloadUrl (string &url) {
|
|
DEF_LOG ("MainAttachment::readDownloadUrl", "");
|
|
url = "";
|
|
string line;
|
|
getline (cin, line);
|
|
LOG ("get URL: " << line);
|
|
LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug ZZ: no download link. (line: " << line << ")");
|
|
url = line.substr (5);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::setExtractDir (const bfs::path &extractDir) {
|
|
if (extractDir.empty ())
|
|
throw invalid_argument ("no tmp dir");
|
|
this->extractDir = extractDir;
|
|
if (! is_directory (extractDir))
|
|
bfs::create_directory (extractDir);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::addLink (string &plain, string &html, const string &url, const string &name) const {
|
|
string plainNewOneLink (templatePlainAddLink);
|
|
replaceAll (plainNewOneLink, TMPL_DOWNLOAD, url);
|
|
replaceAll (plainNewOneLink, TMPL_FILENAME, "\""+name+"\"");
|
|
plain += plainNewOneLink;
|
|
string htmlNewOneLink (templateHtmlAddLink);
|
|
string codedUrl (url);
|
|
// XXX amp ?
|
|
//replaceAll (codedUrl, "&", "&");
|
|
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
|
|
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
|
|
html += htmlNewOneLink;
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::getDisclaim (string &plain, string &html) const {
|
|
DEF_LOG ("Attachment::getDisclaim", "");
|
|
plain = html = "";
|
|
|
|
int linkCount (0);
|
|
string plainNewLinks, htmlNewLinks;
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->toExtract)
|
|
continue;
|
|
addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ());
|
|
++linkCount;
|
|
// if (previousLinks [attachP->downloadUrl] != previousLinks.end ())
|
|
// // impossible puisque le lien est toujours nouveau
|
|
// previousLinks.erase (attachP->downloadUrl);
|
|
}
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->embeddedData.size ())
|
|
continue;
|
|
for (EmbeddedData &embedded : attachP->embeddedData) {
|
|
addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name);
|
|
++linkCount;
|
|
}
|
|
}
|
|
|
|
string plainOldLinks, htmlOldLinks;
|
|
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
|
|
addLink (plainOldLinks, htmlOldLinks, it->first, it->second);
|
|
linkCount += previousLinks.size ();
|
|
if (! linkCount) {
|
|
LOG ("no attach");
|
|
return;
|
|
}
|
|
|
|
plain = "\r\n"+KAZ_PLAIN_START+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_DONT_TOUCH+"\r\n\r\n"+KAZ_PLAIN_WARNING+"\r\n\r\n"+KAZ_PLAIN_DOWLOAD_ONE+"\r\n"+plainNewLinks;
|
|
html = templateHtmlHeader+htmlNewLinks;
|
|
if (previousLinks.size ()) {
|
|
plain += "\r\n"+KAZ_PLAIN_DOWLOAD_OTHER+"\r\n"+plainOldLinks;
|
|
html += templateHtmlOtherLink+htmlOldLinks;
|
|
}
|
|
if (linkCount > 1 && archiveDownloadURL.length ()) {
|
|
string allPlainLinks (templatePlainAllLink);
|
|
replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL);
|
|
plain += allPlainLinks;
|
|
string allHtmlLinks (templateHtmlAllLink);
|
|
replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);
|
|
html += allHtmlLinks;
|
|
}
|
|
html += templateHtmlFooter+"\r\n";
|
|
plain += "\r\n\r\n"+KAZ_WEB_SITE+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_STOP+"\r\n";
|
|
// & => & done
|
|
LOG ("plain: " << plain);
|
|
LOG ("html: " << html);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::addPrevious (const string &href, const string &name, const bool &trust) {
|
|
DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name);
|
|
const string oldVal = previousLinks [href];
|
|
if (name.empty ())
|
|
return;
|
|
if (oldVal.length () && name.length () && !trust)
|
|
return;
|
|
previousLinks.erase (href);
|
|
previousLinks [href] = regex_replace (name, regex (R"([\t\r\n\"]+|(\\\")|(>\s*))"), "");
|
|
LOG ("inserted: " << href << ": " << previousLinks[href]);
|
|
}
|
|
|
|
void
|
|
MainAttachment::extractLinks (const string &extractedPlainKAZ) {
|
|
// plain text => "* name <url>"
|
|
DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ);
|
|
for (string::size_type startPos (0);
|
|
(startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos;
|
|
) {
|
|
streamoff stopPos = startPos;
|
|
while (extractedPlainKAZ [stopPos] && availableURLChars.find (extractedPlainKAZ [stopPos]) != string::npos)
|
|
++stopPos;
|
|
const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos));
|
|
LOG ("plain href: " << href);
|
|
|
|
if (extractedPlainKAZ [stopPos])
|
|
++stopPos;
|
|
streamoff stopName = startPos;
|
|
startPos = stopPos;
|
|
|
|
// get all href but KAZ_WEB_SITE
|
|
// the archive link will be skip by filter.sh
|
|
if (href == KAZ_WEB_SITE)
|
|
continue;
|
|
|
|
// backward find name
|
|
string::size_type startName = extractedPlainKAZ.rfind ("* ", stopName);
|
|
string name;
|
|
|
|
if (startName != string::npos) {
|
|
name = string (extractedPlainKAZ, startName+3, stopName - startName - 3);
|
|
// skip [> \r\n\t]
|
|
string::size_type nextPos = name.find_first_not_of ("[>< \t\r\n\"]");
|
|
if (nextPos != string::npos)
|
|
name.erase (0, nextPos);
|
|
// stop before [>\r\n\t]
|
|
nextPos = name.find_first_of ("[\"<]");
|
|
if (nextPos != string::npos)
|
|
name.erase (nextPos);
|
|
}
|
|
|
|
LOG ("plain name: " << name);
|
|
addPrevious (href, name);
|
|
}
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::extractLinks (const vector<string> &liOne) {
|
|
// html text => "<li ...><a href="url">name</a>"
|
|
DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ());
|
|
for (const string &one : liOne) {
|
|
if (caseInsensitiveFind (one, CLASS_ONE) == string::npos)
|
|
continue;
|
|
string::size_type startPos = caseInsensitiveFind (one, HREF_ONE);
|
|
|
|
LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug M1: no href KAZ link. (one: " << one << ")");
|
|
startPos += HREF_ONE.length ();
|
|
LOG ("startPos: " << startPos);
|
|
string::size_type stopPos = one.find ("\"", startPos);
|
|
|
|
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M2: no ending href KAZ link. (one: " << one << ")");
|
|
LOG ("stopPos: " << stopPos);
|
|
string href (one.substr (startPos, stopPos-startPos));
|
|
LOG ("html href: " << href);
|
|
stopPos = one.find (">", startPos);
|
|
|
|
LOG_BUG (one [stopPos] != '>', break, "eMailShrinker: bug M3: no ending href KAZ link. (one: " << one << ")");
|
|
++stopPos;
|
|
startPos = stopPos;
|
|
LOG ("startPos: " << startPos);
|
|
stopPos = caseInsensitiveFind (one, A_END, startPos);
|
|
LOG ("stopPos: " << stopPos);
|
|
|
|
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M4: no ending anchor KAZ link. (one: " << one << ")");
|
|
string name (one.substr (startPos, stopPos-startPos));
|
|
LOG ("html name: " << name);
|
|
addPrevious (href, name);
|
|
}
|
|
}
|
|
|
|
void
|
|
MainAttachment::extractPreviousKAZ (ifstream &mbox) {
|
|
DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
|
|
string extractedPlainKAZ, extractedHtmlKAZ;
|
|
for (const Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->toUpdate || isBase64Encoding ())
|
|
continue;
|
|
string textProp = attachP->getProp (contentTypeToken, textRegEx);
|
|
if (textProp.empty ())
|
|
continue;
|
|
string content (attachP->getContent (mbox));
|
|
if (textProp == PLAIN) {
|
|
LOG (PLAIN);
|
|
extractedPlainKAZ += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
|
}
|
|
if (textProp == HTML) {
|
|
LOG (HTML);
|
|
string section = attachP->getSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
|
|
section += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
|
// update href from HTML attachments
|
|
replaceAll (section, "&", "&");
|
|
extractedHtmlKAZ += section;
|
|
}
|
|
}
|
|
LOG ("extractedPlainKAZ: "<< extractedPlainKAZ);
|
|
extractLinks (extractedPlainKAZ);
|
|
|
|
LOG ("extractedHtmlKAZ: "<< extractedHtmlKAZ);
|
|
vector<string> liOne;
|
|
getSection (extractedHtmlKAZ, LI_BEGIN, LI_END, liOne);
|
|
extractLinks (liOne);
|
|
|
|
#ifndef DISABLE_LOG
|
|
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
|
|
LOG ("oldLink link: " << it->first << " name: " << it->second);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
MainAttachment::removePreviousArchive () {
|
|
vector<string> toRemove;
|
|
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) {
|
|
const string key (it->first);
|
|
if (regex_match (key, archiveURLSignature))
|
|
toRemove.push_back (key);
|
|
}
|
|
for (string old : toRemove)
|
|
previousLinks.erase (old);
|
|
}
|
|
|
|
// ================================================================================
|
|
MainAttachment::MainAttachment (ifstream &mbox)
|
|
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
|
|
forceMainText (false) {
|
|
DEF_LOG ("MainAttachment::MainAttachment", "");
|
|
string line;
|
|
for (; getline (mbox, line); )
|
|
tmpPos += line.length () + 1;
|
|
endPos = tmpPos;
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) {
|
|
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
|
|
bool plainMarked (false), htmlMarked (false);
|
|
markDisclaim (plainMarked, htmlMarked);
|
|
forceMainText = ! (plainMarked || htmlMarked);
|
|
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::getUpdatedURL (ifstream &mbox) {
|
|
DEF_LOG ("MainAttachment::getUpdatedURL", "");
|
|
extractPreviousKAZ (mbox);
|
|
for (map <string, string>::iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
|
|
cout << it->first << endl;
|
|
}
|
|
|
|
void
|
|
MainAttachment::newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const {
|
|
DEF_LOG ("MainAttachment::newPjEntry", "attachCount: " << attachCount << " contentType: " << contentType << " name: " << name);
|
|
ostringstream dirNameStream;
|
|
dirNameStream << "PJ-" << std::setfill ('0') << std::setw (3) << int (attachCount);
|
|
dirName = dirNameStream.str ();
|
|
bfs::path dirPath (extractDir / dirName);
|
|
|
|
bfs::create_directory (dirPath);
|
|
bfs::path metaPath (dirPath / "meta");
|
|
|
|
ofstream metaOut (metaPath.c_str ());
|
|
metaOut
|
|
<< "Content-Type: " << contentType << endl
|
|
<< "Name: " << name << endl;
|
|
metaOut.flush ();
|
|
metaOut.close ();
|
|
|
|
bfs::path filePath (dirPath / "media");
|
|
mediaName = filePath.c_str ();
|
|
dirName = dirPath.c_str ();
|
|
LOG ("dirName: " << dirName << " mediaName: " << mediaName);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
|
|
DEF_LOG ("MainAttachment::extract", "minSize: " << minSize);
|
|
int attachCount (0);
|
|
string dirName, mediaName;
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->toExtract)
|
|
continue;
|
|
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
|
|
++attachCount;
|
|
ofstream out (mediaName);
|
|
|
|
streamoff
|
|
start (attachP->Attachment::contentPos),
|
|
end (attachP->Attachment::endPos+1); // pour assurer le cas sans ^M
|
|
mbox.seekg (start, ios::beg);
|
|
if (attachP->isBase64Encoding ()) {
|
|
unsigned char buff[4];
|
|
int idx = 0;
|
|
char c;
|
|
for (streamoff curPos (start); mbox.get (c) && curPos < end; ++curPos) {
|
|
if (c == '=')
|
|
break;
|
|
if (!isBase64 (c))
|
|
continue;
|
|
buff [idx] = getBase64Val (c);
|
|
if (++idx != 4)
|
|
continue;
|
|
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
|
|
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
|
|
out.put (buff [2] << 6 | buff [3]);
|
|
idx = 0;
|
|
}
|
|
if (idx) {
|
|
for (int j = idx; j < 4; ++j)
|
|
buff [j] = 0;
|
|
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
|
|
--idx;
|
|
if (idx)
|
|
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
|
|
}
|
|
} else {
|
|
string line;
|
|
for (streamoff curPos (start); getline (mbox, line); ) {
|
|
curPos += line.length () + 1;
|
|
if (curPos >= end) {
|
|
out << line.substr (0, end + line.length () - curPos) << endl;
|
|
break;
|
|
}
|
|
out << line << endl;
|
|
}
|
|
}
|
|
out.flush ();
|
|
out.close ();
|
|
cout << dirName << endl;
|
|
}
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->embeddedData.size ())
|
|
continue;
|
|
string content = attachP->getContent (mbox);
|
|
vector<string> imgs;
|
|
getSection (content, IMG_BEGIN, IMG_END, imgs);
|
|
for (const EmbeddedData &embedded : attachP->embeddedData) {
|
|
string &img (imgs[embedded.imgIdx]);
|
|
img.erase (0, embedded.startData);
|
|
img.erase (embedded.dataLength);
|
|
base64Decode (img);
|
|
newPjEntry (attachCount, embedded.contentType, embedded.name, dirName, mediaName);
|
|
++attachCount;
|
|
|
|
ofstream out (mediaName);
|
|
out.write (img.c_str (), img.size ());
|
|
out.flush ();
|
|
out.close ();
|
|
cout << dirName << endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) {
|
|
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize);
|
|
|
|
// preparation
|
|
extractPreviousKAZ (mbox);
|
|
removePreviousArchive ();
|
|
map<const string, const string> translateHtml;
|
|
for (Attachment *attachP : allMarkedPtrs)
|
|
if (attachP->toExtract) {
|
|
readDownloadUrl (attachP->downloadUrl);
|
|
if (attachP->downloadUrl.empty ()) {
|
|
LOG ("no change");
|
|
attachP->toExtract = false;
|
|
continue;
|
|
}
|
|
if (attachP->cid.length ()) {
|
|
string tmp (attachP->downloadUrl);
|
|
//replaceAll (tmp, "&", "&");
|
|
translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));
|
|
}
|
|
}
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
if (!attachP->embeddedData.size ())
|
|
continue;
|
|
for (EmbeddedData &embedded : attachP->embeddedData)
|
|
readDownloadUrl (embedded.downloadUrl);
|
|
// XXX test empty ?
|
|
}
|
|
readArchiveUrl ();
|
|
removePreviousArchive ();
|
|
string plainDisclaim, htmlDisclaim;
|
|
getDisclaim (plainDisclaim, htmlDisclaim);
|
|
// copy email
|
|
streamoff curPos = 0;
|
|
if (forceMainText) {
|
|
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;
|
|
// check no main text
|
|
LOG ("Force main text");
|
|
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
|
|
copy (mbox, outbox, curPos, contentPos);
|
|
curPos = contentPos;
|
|
cerr << " #################### coucou " << curPos << endl << endl;
|
|
string content (plainDisclaim);
|
|
base64Encode (content);
|
|
outbox << boundary.substr (0, boundary.length () -2) << endl
|
|
<< KAZ_EMPTY_TEXT_PLAIN << endl
|
|
<< content << endl;
|
|
outbox.flush ();
|
|
}
|
|
for (Attachment *attachP : allMarkedPtrs) {
|
|
copy (mbox, outbox, curPos, attachP->beginInParent);
|
|
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
|
|
|
|
if (attachP->toExtract) {
|
|
LOG ("skip Extracted");
|
|
|
|
} else if (attachP->toUpdate) {
|
|
string textProp = attachP->getProp (contentTypeToken, textRegEx);
|
|
bool isPlain = textProp == PLAIN;
|
|
bool isHtml = textProp == HTML;
|
|
bool isDisclaimer = attachP->toDisclaim;
|
|
|
|
LOG_BUG (isPlain && isHtml, /**/, "eMailShrinker: bug M6: plain and html: " << attachP->getContentType ());
|
|
LOG_BUG (! (isPlain || isHtml), /**/, "eMailShrinker: bug M7: not plain or html: " << attachP->getContentType ());
|
|
LOG ("toUpdate: isPlain: " << isPlain << " isHtml: " << isHtml << " isDisclaimer: " << isDisclaimer);
|
|
copy (mbox, outbox, attachP->beginInParent, attachP->contentPos);
|
|
|
|
string content = attachP->getContent (mbox);
|
|
if (isHtml) {
|
|
string::size_type headStart (caseInsensitiveFind (content, HEAD));
|
|
LOG ("HEAD start: " << headStart);
|
|
if (headStart != string::npos) {
|
|
headStart += HEAD.length ();
|
|
string::size_type headStop (caseInsensitiveFind (content, HEAD_END, headStart));
|
|
if (headStop != string::npos) {
|
|
// to reduce the scoop of search
|
|
string oldHead (content.substr (headStart, headStop-headStart));
|
|
LOG ("HEAD start: " << headStart << " stop: " << headStop << " old: " << oldHead);
|
|
string::size_type oldCssPos (oldHead.find (KAZ_CSS_URL));
|
|
if (oldCssPos != string::npos) {
|
|
string::size_type oldStart (oldHead.rfind ('<', oldCssPos));
|
|
string::size_type oldStop (oldHead.find ('>', oldCssPos));
|
|
if (oldStart != string::npos && oldStop != string::npos) {
|
|
++oldStop;
|
|
if (oldStop < oldHead.length () && oldHead [oldStop] == '\n')
|
|
++oldStop;
|
|
content.erase (headStart+oldStart, oldStop-oldStart);
|
|
}
|
|
}
|
|
content.insert (headStart, "\n"+KAZ_CSS);
|
|
}
|
|
// else XXX pas de /head (if faut en ajouter un (avec <html> ?))
|
|
}
|
|
removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
|
|
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
|
// XXX case insensitive ??
|
|
if (content.find (CID) != string::npos)
|
|
replaceAll (content, translateHtml);
|
|
attachP->replaceEmbedded (content);
|
|
}
|
|
if (isPlain)
|
|
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
|
if (isDisclaimer) {
|
|
if (isHtml) {
|
|
for (string endTag : {BODY_END, HTML_END}) {
|
|
LOG ("try tag: " << endTag);
|
|
string::size_type endTagStart = caseInsensitiveRFind (content, endTag);
|
|
if (endTagStart != string::npos) {
|
|
content = content.substr (0, endTagStart);
|
|
LOG ("remove tag: " << endTag << " content: " << content);
|
|
}
|
|
}
|
|
content += htmlDisclaim+BODY_END+HTML_END;
|
|
LOG ("content: " << content);
|
|
}
|
|
if (isPlain)
|
|
content += plainDisclaim;
|
|
}
|
|
attachP->println (outbox, content);
|
|
} else {
|
|
LOG_BUG (true, continue, "eMailShrinker: bug M8: can't change" << *attachP);
|
|
}
|
|
outbox.flush ();
|
|
curPos = attachP->endPos;
|
|
}
|
|
copy (mbox, outbox, curPos, endPos);
|
|
outbox.close ();
|
|
}
|
|
|
|
// ================================================================================
|