832 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			832 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| ////////////////////////////////////////////////////////////////////////////
 | |
| // Copyright KAZ 2021							  //
 | |
| // 									  //
 | |
| // contact (at) kaz.bzh							  //
 | |
| // 									  //
 | |
| // This software is a filter to shrink email by attachment extraction.	  //
 | |
| // 									  //
 | |
| // This software is governed by the CeCILL-B license under French law and //
 | |
| // abiding by  the rules of distribution  of free software. You  can use, //
 | |
| // modify  and/or  redistribute  the  software under  the  terms  of  the //
 | |
| // CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
 | |
| // URL "http://www.cecill.info".					  //
 | |
| // 									  //
 | |
| // As a counterpart to the access to  the source code and rights to copy, //
 | |
| // modify and  redistribute granted  by the  license, users  are provided //
 | |
| // only with a limited warranty and  the software's author, the holder of //
 | |
| // the economic  rights, and the  successive licensors have  only limited //
 | |
| // liability.								  //
 | |
| // 									  //
 | |
| // In this respect, the user's attention is drawn to the risks associated //
 | |
| // with loading,  using, modifying  and/or developing or  reproducing the //
 | |
| // software by the user in light of its specific status of free software, //
 | |
| // that may  mean that  it is  complicated to  manipulate, and  that also //
 | |
| // therefore means  that it  is reserved  for developers  and experienced //
 | |
| // professionals having in-depth computer  knowledge. Users are therefore //
 | |
| // encouraged  to load  and test  the software's  suitability as  regards //
 | |
| // their  requirements  in  conditions  enabling the  security  of  their //
 | |
| // systems and/or  data to  be ensured  and, more  generally, to  use and //
 | |
| // operate it in the same conditions as regards security.		  //
 | |
| // 									  //
 | |
| // The fact that  you are presently reading this means  that you have had //
 | |
| // knowledge of the CeCILL-B license and that you accept its terms.	  //
 | |
| ////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <fstream>
 | |
| #include <iomanip>
 | |
| #include <iostream>
 | |
| #include <math.h>
 | |
| #include <regex>
 | |
| #include <set>
 | |
| #include <unistd.h>
 | |
| #include <vector>
 | |
| 
 | |
| #include <boost/assign.hpp>
 | |
| #include <boost/algorithm/string.hpp>
 | |
| 
 | |
| #include "version.hpp"
 | |
| #include "kazDebug.hpp"
 | |
| #include "kazMisc.hpp"
 | |
| #include "SizeArg.hpp"
 | |
| #include "Attachment.hpp"
 | |
| #include "MainAttachment.hpp"
 | |
| 
 | |
| using namespace std;
 | |
| using namespace kaz;
 | |
| 
 | |
| static const string KAZ_WEB_SITE		("https://kaz.bzh/");
 | |
| static const string TMPL_DOWNLOAD		("{{DOWNLOAD}}");
 | |
| static const string TMPL_FILENAME		("{{FILENAME}}");
 | |
| static const string CID				("cid:");
 | |
| 
 | |
| // "l=/" => v1 compatibility
 | |
| static const regex archiveURLRegex		(".*(([&?]g=)|([&?]l=/)).*");
 | |
| 
 | |
| static const string KAZ_PLAIN_HR		("______________________________________________________________________________");
 | |
| static const string KAZ_PLAIN_START		("~~ PJ-KAZ !"); // don't end whith space
 | |
| static const string KAZ_PLAIN_STOP		(KAZ_PLAIN_START+" ~~");
 | |
| static const string KAZ_PLAIN_DONT_TOUCH	("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents (version "+LAST_VERSION_NUM+"))");
 | |
| static const string KAZ_PLAIN_WARNING	        ("Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. "
 | |
| 						 "Elles seront automatiquement supprimées dans 1 mois. "
 | |
| 						 "Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. "
 | |
| 						 "Pour mieux comprendre la politique de nos services visitez kaz.bzh");
 | |
| static const string KAZ_PLAIN_DOWLOAD_ONE	("Vos pièces jointes sont à télécharger individuellement ici :");
 | |
| static const string KAZ_PLAIN_DOWLOAD_OTHER	("(Contenu dans des messages précédents)");
 | |
| static const string KAZ_PLAIN_DOWLOAD_ALL	("Vous pouvez télécharger l'ensemble dans une archive là :");
 | |
| 
 | |
| static const string HEAD			("<head>");
 | |
| static const string HEAD_END			("</head>");
 | |
| static const string KAZ_CSS_URL			("https://kaz.bzh/m/email.css");
 | |
| static const string KAZ_CSS			("<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>");
 | |
| static const string A_END			("</a>");
 | |
| static const string LI_BEGIN			("<li");
 | |
| static const string CLASS_ONE			("class=\"one\"");
 | |
| static const string LI_ONE			(LI_BEGIN+" "+CLASS_ONE+">");
 | |
| static const string LI_ALL			(LI_BEGIN+" class=\"all\">");
 | |
| static const string LI_END			("</li>");
 | |
| static const string HREF_ONE			("href=\"");
 | |
| static const string KAZ_HTML_CONTENT		("<!DOCTYPE html><html lang=\"fr\"><head><meta charset=\"utf-8\"><title>KAZ</title>"+KAZ_CSS+"</head><body>");
 | |
| static const string BODY_END			("</body>");
 | |
| static const string HTML_END			("</html>");
 | |
| 
 | |
| static const string KAZ_HTML_TAG		("<!--KAZ"); // don't end whith space
 | |
| static const string KAZ_HTML_START		(KAZ_HTML_TAG+" START-->");
 | |
| static const string KAZ_HTML_STOP		(KAZ_HTML_TAG+" STOP-->");
 | |
| // Textes précédents encodés en SGML
 | |
| static const string KAZ_HTML_DONT_TOUCH		("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents (version "+LAST_VERSION_NUM+"))");
 | |
| static const string KAZ_HTML_DOWLOAD_ONE	("Vos pièces jointes sont à télécharger individuellement ici :");
 | |
| static const string KAZ_HTML_DOWLOAD_OTHER      ("(Contenu dans des messages précédents)");
 | |
| static const string KAZ_HTML_DOWLOAD_ALL	("Vous pouvez télécharger l'ensemble dans une archive là :");
 | |
| static const string KAZ_HTML_ARCHIVE		("archive");
 | |
| 
 | |
| static const string KAZ_EMPTY_TEXT_PLAIN	("Content-Type: text/plain; charset=utf-8\n"
 | |
| 						 "Content-Transfer-Encoding: base64\n");
 | |
| static const string KAZ_ATTACHMENT_TEXT_HTML	("Content-Type: text/html; charset=utf-8\n"
 | |
| 						 "Content-Disposition: attachment; filename=\"" + Attachment::KAZ_ATTACH_NAME + "\"\n"
 | |
| 						 "Content-Transfer-Encoding: base64\n");
 | |
| 
 | |
| 
 | |
| // ================================================================================
 | |
| vector <string>
 | |
| Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});
 | |
| 
 | |
| // ================================================================================
 | |
| const string MainAttachment::templatePlainAddLink	("  * "+TMPL_FILENAME+" < "+TMPL_DOWNLOAD+" >\n");
 | |
| const string MainAttachment::templatePlainAllLink	("\n  * "+KAZ_PLAIN_DOWLOAD_ALL+" < "+TMPL_DOWNLOAD+" >\n");
 | |
| 
 | |
| const string MainAttachment::templateHtmlHeader		(KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
 | |
| 							 "<div class=\"kaz\">"
 | |
| 							 "<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"
 | |
| 							 "<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");
 | |
| const string MainAttachment::templateHtmlAddLink	(LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");
 | |
| const string MainAttachment::templateHtmlOtherLink	("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");
 | |
| const string MainAttachment::templateHtmlAllLink	("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");
 | |
| const string MainAttachment::templateHtmlFooter		("</ul></p>\n"
 | |
| 							 "<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"
 | |
| 							 "<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n\n");
 | |
| 
 | |
| const regex MainAttachment::whiteSpaceRegEx		("\\s+");
 | |
| 
 | |
| // ================================================================================
 | |
| const string
 | |
| kaz::attachModeLabels[] = {
 | |
|   "None", "Footer", "Attachment", "Both"
 | |
| };
 | |
| const map<string, AttachMode>
 | |
| kaz::attachModeMap = boost::assign::map_list_of
 | |
|   ("none", NONE)
 | |
|   ("footer", FOOTER)
 | |
|   ("attachment", ATTACHMENT)
 | |
|   ("both", BOTH)
 | |
|   ;
 | |
| ostream &
 | |
| kaz::operator << (ostream &out, const AttachMode &attachMode) {
 | |
|   //BOOST_ASSERT (treeType >= MIN && treeType <= ALPHA);
 | |
|   return out << attachModeLabels [attachMode];
 | |
| }
 | |
| istream &
 | |
| kaz::operator >> (istream &in, AttachMode &attachMode) {
 | |
|   string token;
 | |
|   in >> token;
 | |
|   auto pos = attachModeMap.find (boost::algorithm::to_lower_copy (token));
 | |
|   if (pos == attachModeMap.end ())
 | |
|     in.setstate (ios_base::failbit);
 | |
|   else
 | |
|     attachMode = pos->second;
 | |
|   return in;
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| const string
 | |
| kaz::headerTypeLabels[] = {
 | |
|   "Same", "Multi", "MainPlain", "AttachHtml"
 | |
| };
 | |
| const map<string, HeaderType>
 | |
| kaz::headerTypeMap = boost::assign::map_list_of
 | |
|   ("same", SAME)
 | |
|   ("multi", MULTI)
 | |
|   ("mainplain", MAIN_PLAIN)
 | |
|   ("attachhtml", ATTACH_HTML)
 | |
|   ;
 | |
| ostream &
 | |
| kaz::operator << (ostream &out, const HeaderType &headerType) {
 | |
|   //BOOST_ASSERT (treeType >= MIN && treeType <= ALPHA);
 | |
|   return out << headerTypeLabels [headerType];
 | |
| }
 | |
| istream &
 | |
| kaz::operator >> (istream &in, HeaderType &headerType) {
 | |
|   string token;
 | |
|   in >> token;
 | |
|   auto pos = headerTypeMap.find (boost::algorithm::to_lower_copy (token));
 | |
|   if (pos == headerTypeMap.end ())
 | |
|     in.setstate (ios_base::failbit);
 | |
|   else
 | |
|     headerType = pos->second;
 | |
|   return in;
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
 | |
|   DEF_LOG ("MainAttachment::copy", "begin: " << begin << " end: " << end);
 | |
|   mbox.seekg (begin, ios::beg);
 | |
|   char c;
 | |
|   for (streamoff pos (begin); pos < end; ++pos) {
 | |
|     mbox.get (c);
 | |
|     outbox.put (c);
 | |
|   }
 | |
|   outbox.flush ();
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::readArchiveUrl () {
 | |
|   DEF_LOG ("MainAttachment::readArchiveUrl", "");
 | |
|   archiveDownloadURL.clear ();
 | |
|   string line;
 | |
|   getline (cin, line);
 | |
|   LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug M9: no archive link. (line: " << line << ")");
 | |
|   LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug M10: bad archive link. (line: " << line << ")");
 | |
|   if (line.rfind ("arch: none", 0) == 0)
 | |
|     return;
 | |
|   archiveDownloadURL = line.substr (6);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::readDownloadUrl (string &url) {
 | |
|   DEF_LOG ("MainAttachment::readDownloadUrl", "");
 | |
|   url = "";
 | |
|   string line;
 | |
|   getline (cin, line);
 | |
|   LOG ("get URL: " << line);
 | |
|   LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug M11: no download link. (line: " << line << ")");
 | |
|   url = line.substr (5);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::setExtractDir (const bfs::path &extractDir) {
 | |
|   if (extractDir.empty ())
 | |
|     throw invalid_argument ("no tmp dir");
 | |
|   this->extractDir = extractDir;
 | |
|   if (! is_directory (extractDir))
 | |
|     bfs::create_directory (extractDir);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::addLink (string &plain, string &html, const string &url, const string &name) const {
 | |
|   string plainNewOneLink (templatePlainAddLink);
 | |
|   replaceAll (plainNewOneLink, TMPL_DOWNLOAD, url);
 | |
|   replaceAll (plainNewOneLink, TMPL_FILENAME, "\""+name+"\"");
 | |
|   plain += plainNewOneLink;
 | |
|   string htmlNewOneLink (templateHtmlAddLink);
 | |
|   string codedUrl (url);
 | |
|   // pb &
 | |
|   // replaceAll (codedUrl, "&", "&");
 | |
|   replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
 | |
|   replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
 | |
|   html += htmlNewOneLink;
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::getDisclaim (string &plain, string &html) const {
 | |
|   DEF_LOG ("Attachment::getDisclaim", "");
 | |
|   plain = html = "";
 | |
| 
 | |
|   int linkCount (0);
 | |
|   string plainNewLinks, htmlNewLinks;
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     if (attachP->isSigned || !attachP->toExtract)
 | |
|       continue;
 | |
|     addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ());
 | |
|     ++linkCount;
 | |
|     // if (previousLinks [attachP->downloadUrl] != previousLinks.end ())
 | |
|     //   // impossible puisque le lien est toujours nouveau
 | |
|     //   previousLinks.erase (attachP->downloadUrl);
 | |
|   }
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     if (attachP->isSigned || !attachP->embeddedData.size ())
 | |
|       continue;
 | |
|     for (EmbeddedData &embedded : attachP->embeddedData) {
 | |
|       addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name);
 | |
|       ++linkCount;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   string plainOldLinks, htmlOldLinks;
 | |
|   for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
 | |
|     addLink (plainOldLinks, htmlOldLinks, it->first, it->second);
 | |
|   linkCount += previousLinks.size ();
 | |
|   if (! linkCount) {
 | |
|     LOG ("no attach");
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   plain = "\n"+KAZ_PLAIN_START+"\n\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_DONT_TOUCH+"\n\n"+KAZ_PLAIN_DOWLOAD_ONE+"\n"+plainNewLinks;
 | |
|   html = templateHtmlHeader+htmlNewLinks;
 | |
|   if (previousLinks.size ()) {
 | |
|     plain += "\n"+KAZ_PLAIN_DOWLOAD_OTHER+"\n"+plainOldLinks;
 | |
|     html += templateHtmlOtherLink+htmlOldLinks;
 | |
|   }
 | |
|   if (linkCount > 1 && archiveDownloadURL.length ()) {
 | |
|     string allPlainLinks (templatePlainAllLink);
 | |
|     replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL);
 | |
|     plain += allPlainLinks;
 | |
|     string allHtmlLinks (templateHtmlAllLink);
 | |
|     replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);
 | |
|     html += allHtmlLinks;
 | |
|   }
 | |
|   html += templateHtmlFooter+"\n";
 | |
|   plain += "\n\n"+KAZ_WEB_SITE+"\n\n"+KAZ_PLAIN_WARNING+"\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_STOP+"\n\n\n";
 | |
|   // & => & done
 | |
|   LOG ("plain: " << plain);
 | |
|   LOG ("html: " << html);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::addPrevious (const string &href, const string &name, const bool &trust) {
 | |
|   DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name);
 | |
|   const string oldVal = previousLinks [href];
 | |
|   if (name.empty ())
 | |
|     return;
 | |
|   if (oldVal.length () && name.length () && !trust)
 | |
|     return;
 | |
|   previousLinks.erase (href);
 | |
|   previousLinks [href] = regex_replace (name, regex (R"([\t\n\"]+|(\\\")|(>\s*))"), "");
 | |
|   LOG ("inserted: " << href << ": " << previousLinks[href]);
 | |
| }
 | |
| 
 | |
| void
 | |
| MainAttachment::extractLinks (const string &extractedPlainKAZ) {
 | |
|   // plain text => "* name <url>"
 | |
|   DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ);
 | |
|   for (string::size_type startPos (0);
 | |
|        (startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos;
 | |
|        ) {
 | |
|     streamoff stopPos = startPos;
 | |
|     while (extractedPlainKAZ [stopPos] && availableURLChars.find (extractedPlainKAZ [stopPos]) != string::npos)
 | |
|       ++stopPos;
 | |
|     const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos));
 | |
|     LOG ("plain href: " << href);
 | |
| 
 | |
|     if (extractedPlainKAZ [stopPos])
 | |
|       ++stopPos;
 | |
|     streamoff stopName = startPos;
 | |
|     startPos = stopPos;
 | |
| 
 | |
|     // get all href but KAZ_WEB_SITE
 | |
|     // the archive link will be skip by filter.sh
 | |
|     if (href == KAZ_WEB_SITE)
 | |
|       continue;
 | |
| 
 | |
|     // backward find name
 | |
|     string::size_type startName = extractedPlainKAZ.rfind ("* ", stopName);
 | |
|     string name;
 | |
| 
 | |
|     if (startName != string::npos) {
 | |
|       name = string (extractedPlainKAZ, startName+3, stopName - startName - 3);
 | |
|       // skip [> \r\n\t]
 | |
|       string::size_type nextPos = name.find_first_not_of ("[>< \t\r\n\"]");
 | |
|       if (nextPos != string::npos)
 | |
| 	name.erase (0, nextPos);
 | |
|       // stop before [>\r\n\t]
 | |
|       nextPos = name.find_first_of ("[\"<]");
 | |
|       if (nextPos != string::npos)
 | |
| 	name.erase (nextPos);
 | |
|     }
 | |
| 
 | |
|     LOG ("plain name: " << name);
 | |
|     addPrevious (href, name);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::extractLinks (const vector<string> &liOne) {
 | |
|   // html text => "<li ...><a href="url">name</a>"
 | |
|   DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ());
 | |
|   for (const string &one : liOne) {
 | |
|     if (caseInsensitiveFind (one, CLASS_ONE) == string::npos)
 | |
|       continue;
 | |
|     string::size_type startPos = caseInsensitiveFind (one, HREF_ONE);
 | |
| 
 | |
|     LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug M1: no href KAZ link. (one: " << one << ")");
 | |
|     startPos += HREF_ONE.length ();
 | |
|     LOG ("startPos: " << startPos);
 | |
|     string::size_type stopPos = one.find ("\"", startPos);
 | |
| 
 | |
|     LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M2: no ending href KAZ link. (one: " << one << ")");
 | |
|     LOG ("stopPos: " << stopPos);
 | |
|     string href (one.substr (startPos, stopPos-startPos));
 | |
|     LOG ("html href: " << href);
 | |
|     stopPos = one.find (">", startPos);
 | |
| 
 | |
|     LOG_BUG (one [stopPos] != '>', break, "eMailShrinker: bug M3: no ending href KAZ link. (one: " << one << ")");
 | |
|     ++stopPos;
 | |
|     startPos = stopPos;
 | |
|     LOG ("startPos: " << startPos);
 | |
|     stopPos = caseInsensitiveFind (one, A_END, startPos);
 | |
|     LOG ("stopPos: " << stopPos);
 | |
| 
 | |
|     LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M4: no ending anchor KAZ link. (one: " << one << ")");
 | |
|     string name (one.substr (startPos, stopPos-startPos));
 | |
|     LOG ("html name: " << name);
 | |
|     addPrevious (href, name);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void
 | |
| MainAttachment::extractPreviousKAZ (string &extractedPlainKAZ, string &extractedHtmlKAZ, ifstream &mbox, const Attachment &attach) {
 | |
|   DEF_LOG ("MainAttachment::extractPreviousKAZ", "attach:" << attach);
 | |
|   if (!(attach.toUpdate || attach.isKazAttachment)) // isKazAttachment => toUpdate
 | |
|     return;
 | |
|   string textProp = attach.getProp (contentTypeToken, textRegEx);
 | |
|   if (textProp.empty ())
 | |
|     return;
 | |
|   string content (attach.getContent (mbox));
 | |
|   replaceAll (content, "&", "&");
 | |
|   if (textProp == PLAIN) {
 | |
|     LOG (PLAIN);
 | |
|     extractedPlainKAZ += attach.getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
 | |
|   }
 | |
|   if (textProp == HTML) {
 | |
|     LOG (HTML);
 | |
|     string section = attach.getSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
 | |
|     section += attach.getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
 | |
|     // update href from HTML attachments
 | |
|     extractedHtmlKAZ += section;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void
 | |
| MainAttachment::extractPreviousKAZ (ifstream &mbox) {
 | |
|   DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
 | |
|   string extractedPlainKAZ, extractedHtmlKAZ;
 | |
|   if (boundary.empty ())
 | |
|     extractPreviousKAZ (extractedPlainKAZ, extractedHtmlKAZ, mbox, *this);
 | |
|   else
 | |
|     for (const Attachment *attachP : allMarkedPtrs)
 | |
|       extractPreviousKAZ (extractedPlainKAZ, extractedHtmlKAZ, mbox, *attachP);
 | |
| 
 | |
|   LOG ("extractedPlainKAZ: "<< extractedPlainKAZ);
 | |
|   extractLinks (extractedPlainKAZ);
 | |
| 
 | |
|   LOG ("extractedHtmlKAZ: "<< extractedHtmlKAZ);
 | |
|   vector<string> liOne;
 | |
|   getSection (extractedHtmlKAZ, LI_BEGIN, LI_END, liOne);
 | |
|   extractLinks (liOne);
 | |
| 
 | |
| #ifndef DISABLE_LOG
 | |
|   for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
 | |
|     LOG  ("oldLink link: " << it->first << " name: " << it->second);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void
 | |
| MainAttachment::removePreviousArchive () {
 | |
|   DEF_LOG ("MainAttachment::removePreviousArchive", "");
 | |
|   vector<string> toRemove;
 | |
|   for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) {
 | |
|     const string key (it->first);
 | |
|     if (regex_match (key, archiveURLRegex))
 | |
|       toRemove.push_back (key);
 | |
|   }
 | |
|   for (string old : toRemove)
 | |
|     previousLinks.erase (old);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void MainAttachment::rewriteHeaders (ifstream &mbox, ofstream &outbox, const HeaderType &headerType) {
 | |
|   DEF_LOG ("MainAttachment::rewriteHeaders", "headerType: " << headerType);
 | |
|   if (SAME == headerType) {
 | |
|     copy (mbox, outbox, 0, contentPos);
 | |
|     return;
 | |
|   }
 | |
|   string mime (getMime (mbox));
 | |
|   string::size_type startPos = (0);
 | |
|   for (string token : {string ("content-transfer-encoding"), Attachment::contentTypeToken}) {
 | |
|     startPos = caseInsensitiveFind (mime, token);
 | |
|     for (string::size_type stopPos (startPos);
 | |
| 	 (stopPos = mime.find ("\n", stopPos)) != string::npos;
 | |
| 	 ) {
 | |
|       if (string (" \t").find (mime [stopPos+1]) == string::npos) {
 | |
| 	mime.erase (startPos, stopPos-startPos);
 | |
| 	break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   string contentType (KAZ_EMPTY_TEXT_PLAIN);
 | |
|   switch (headerType) {
 | |
|   case SAME:        /* no way */;
 | |
|   case MAIN_PLAIN:  contentType = KAZ_EMPTY_TEXT_PLAIN; break;
 | |
|   case ATTACH_HTML: contentType = KAZ_ATTACHMENT_TEXT_HTML; break;
 | |
|   case MULTI:
 | |
|     boundary = "__KAZ__"+boundaryGen (40);
 | |
|     contentType = "Content-Type: multipart/mixed; boundary=\""+boundary+"\"";
 | |
|     boundary = "--"+boundary+"--";
 | |
|     boundaryMiddleSize = boundary.length () - 2;
 | |
|   }
 | |
|   if (startPos >= mime.length ())
 | |
|     startPos = mime.length ()-1;
 | |
|   mime.insert (startPos, contentType);
 | |
|   outbox << mime << flush;
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| MainAttachment::MainAttachment (ifstream &mbox)
 | |
|   : Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
 | |
|     emptyEMail (false),
 | |
|     previousKazAttachment (false) {
 | |
|   DEF_LOG ("MainAttachment::MainAttachment", "");
 | |
|   string line;
 | |
|   for (; getline (mbox, line); )
 | |
|     tmpPos += line.length () + 1;
 | |
|   endPos = tmpPos;
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) {
 | |
|   DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
 | |
|   bool plainMarked (false), htmlMarked (false);
 | |
|   markDisclaim (plainMarked, htmlMarked);
 | |
|   emptyEMail = ! (plainMarked || htmlMarked);
 | |
|   Attachment::markSignificant ("", isSigned, minAttachSize, mbox, allMarkedPtrs);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::getUpdatedURL (ifstream &mbox) {
 | |
|   DEF_LOG ("MainAttachment::getUpdatedURL", "");
 | |
|   extractPreviousKAZ (mbox);
 | |
|   for (map <string, string>::iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
 | |
|     cout << it->first << endl;
 | |
| }
 | |
| 
 | |
| void
 | |
| MainAttachment::newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const {
 | |
|   DEF_LOG ("MainAttachment::newPjEntry", "attachCount: " << attachCount << " contentType: " << contentType << " name: " << name);
 | |
|   ostringstream dirNameStream;
 | |
|   dirNameStream << "PJ-" << std::setfill ('0') << std::setw (3) << int (attachCount);
 | |
|   dirName = dirNameStream.str ();
 | |
|   bfs::path dirPath (extractDir / dirName);
 | |
| 
 | |
|   bfs::create_directory (dirPath);
 | |
|   bfs::path metaPath (dirPath / "meta");
 | |
| 
 | |
|   ofstream metaOut (metaPath.c_str ());
 | |
|   metaOut
 | |
|     << "Content-Type: " << contentType << endl
 | |
|     << "Name: " << name << endl;
 | |
|   metaOut.flush ();
 | |
|   metaOut.close ();
 | |
| 
 | |
|   bfs::path filePath (dirPath / "media");
 | |
|   mediaName = filePath.c_str ();
 | |
|   dirName = dirPath.c_str ();
 | |
|   LOG ("dirName: " << dirName << " mediaName: " << mediaName);
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| void
 | |
| MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
 | |
|   DEF_LOG ("MainAttachment::extract", "minSize: " << minSize);
 | |
|   int attachCount (0);
 | |
|   string dirName, mediaName;
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     if (attachP->isSigned || attachP->isKazAttachment || !attachP->toExtract)
 | |
|       continue;
 | |
|     newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
 | |
|     ++attachCount;
 | |
|     ofstream out (mediaName);
 | |
| 
 | |
|     streamoff
 | |
|       start (attachP->Attachment::contentPos),
 | |
|       end (attachP->Attachment::endPos+1); // pour assurer le cas sans ^M
 | |
|     mbox.seekg (start, ios::beg);
 | |
|     if (attachP->isBase64Encoding ()) {
 | |
|       unsigned char buff[4];
 | |
|       int idx = 0;
 | |
|       char c;
 | |
|       for (streamoff curPos (start); mbox.get (c) && curPos < end; ++curPos) {
 | |
|     	if (c == '=')
 | |
|     	  break;
 | |
|     	if (!isBase64 (c))
 | |
|     	  continue;
 | |
|     	buff [idx] = getBase64Val (c);
 | |
|     	if (++idx != 4)
 | |
|     	  continue;
 | |
|     	out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
 | |
|     	out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
 | |
|     	out.put (buff [2] << 6 | buff [3]);
 | |
|     	idx = 0;
 | |
|       }
 | |
|       if (idx) {
 | |
|     	for (int j = idx; j < 4; ++j)
 | |
|     	  buff [j] = 0;
 | |
|     	out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
 | |
|     	--idx;
 | |
|     	if (idx)
 | |
|     	  out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
 | |
|       }
 | |
|     } else {
 | |
|       string line;
 | |
|       for (streamoff curPos (start); getline (mbox, line); ) {
 | |
| 	curPos += line.length () + 1;
 | |
| 	if (curPos >= end) {
 | |
| 	  out << line.substr (0, end + line.length () - curPos) << endl;
 | |
| 	  break;
 | |
| 	}
 | |
| 	out << line << endl;
 | |
|       }
 | |
|     }
 | |
|     out.flush ();
 | |
|     out.close ();
 | |
|     cout << dirName << endl;
 | |
|   }
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     if (attachP->isSigned || !attachP->embeddedData.size ())
 | |
|       continue;
 | |
|     string content = attachP->getContent (mbox);
 | |
|     vector<string> imgs;
 | |
|     getSection (content, IMG_BEGIN, IMG_END, imgs);
 | |
|     for (const EmbeddedData &embedded : attachP->embeddedData) {
 | |
|       string &img (imgs[embedded.imgIdx]);
 | |
|       img.erase (0, embedded.startData);
 | |
|       img.erase (embedded.dataLength);
 | |
|       base64Decode (img);
 | |
|       newPjEntry (attachCount, embedded.contentType, embedded.name, dirName, mediaName);
 | |
|       ++attachCount;
 | |
| 
 | |
|       ofstream out (mediaName);
 | |
|       out.write (img.c_str (), img.size ());
 | |
|       out.flush ();
 | |
|       out.close ();
 | |
|       cout << dirName << endl;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| // ================================================================================
 | |
| /*!
 | |
|   Régle à appliquer dans le cas où Kaz ajoute son cartouche et que le corps principale n'est pas multipart :
 | |
| 
 | |
|   <table>
 | |
|   <tr><th>src</th><th>FOOTER</th><th>BOTH</th><th>ATTCH</th></tr>
 | |
|   <tr><th>text/plain</th><td>OK</td><td>mute multi</td><td>mute multi</td></tr>
 | |
|   <tr><th>empty mail</th><td>mute plain</td><td>mute multi</td><td>mute html</td></tr>
 | |
|   </table>
 | |
|  */
 | |
| void
 | |
| MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, AttachMode attachMode) {
 | |
|   DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize << " AttachMode: " << attachMode);
 | |
| 
 | |
|   // preparation
 | |
|   extractPreviousKAZ (mbox);
 | |
|   removePreviousArchive ();
 | |
|   map<const string, const string> translateHtml;
 | |
|   for (Attachment *attachP : allMarkedPtrs)
 | |
|     if (!attachP->isSigned && attachP->toExtract && !attachP->isKazAttachment) {
 | |
|       readDownloadUrl (attachP->downloadUrl);
 | |
|       if (attachP->downloadUrl.empty ()) {
 | |
| 	LOG ("no change");
 | |
| 	attachP->toExtract = false;
 | |
| 	continue;
 | |
|       }
 | |
|       if (attachP->cid.length ()) {
 | |
| 	string tmp (attachP->downloadUrl);
 | |
| 	//replaceAll (tmp, "&", "&");
 | |
|       	translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));
 | |
|       }
 | |
|     }
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     if (attachP->isSigned || !attachP->embeddedData.size ())
 | |
|       continue;
 | |
|     for (EmbeddedData &embedded : attachP->embeddedData)
 | |
|       readDownloadUrl (embedded.downloadUrl);
 | |
|     // XXX test empty ?
 | |
|   }
 | |
|   readArchiveUrl ();
 | |
|   removePreviousArchive ();
 | |
|   string plainDisclaim, htmlDisclaim;
 | |
|   getDisclaim (plainDisclaim, htmlDisclaim);
 | |
| 
 | |
|   HeaderType headerType (SAME);
 | |
|   // copy email
 | |
|   if (!boundary.size () && plainDisclaim.size ())
 | |
|     switch (attachMode) {
 | |
|     case NONE:       LOG_BUG (true, /* */, "eMailShrinker: bug M12: nothing to do"); break;
 | |
|     case FOOTER:     headerType = (emptyEMail ? MAIN_PLAIN : SAME); break;
 | |
|     case BOTH:       headerType = MULTI; break;
 | |
|     case ATTACHMENT: headerType = ATTACH_HTML; break;
 | |
|     }
 | |
|   rewriteHeaders (mbox, outbox, headerType);
 | |
|   streamoff curPos = contentPos;
 | |
| 
 | |
|   if (MAIN_PLAIN == headerType) {
 | |
|       LOG ("Replace old content with plain");
 | |
|       string content (plainDisclaim);
 | |
|       base64Encode (content);
 | |
|       outbox << content << endl;
 | |
|       outbox.flush ();
 | |
|       return;
 | |
|   }
 | |
|   if (ATTACH_HTML == headerType) {
 | |
|     LOG ("Replace old content with html");
 | |
|     string content (plainDisclaim);
 | |
|     base64Encode (content);
 | |
|     outbox << content << endl;
 | |
|     outbox.flush ();
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (plainDisclaim.size () && emptyEMail && (attachMode & FOOTER)) {
 | |
|     // case : multi
 | |
|     LOG ("Force main text");
 | |
|     cerr << "eMailShrinker: force main text" << endl;
 | |
|     string content (plainDisclaim);
 | |
|     base64Encode (content);
 | |
|     outbox << boundary.substr (0, boundary.length () -2) << endl
 | |
| 	   << KAZ_EMPTY_TEXT_PLAIN << endl
 | |
| 	   << content << endl;
 | |
|     outbox.flush ();
 | |
|   }
 | |
| 
 | |
|   if (MULTI == headerType) {
 | |
|     LOG ("New boundary");
 | |
|     map<string, string>::const_iterator it (env.find (contentTypeToken));
 | |
|     LOG_BUG (it == env.end (), /* */, "eMailShrinker: bug M13: no content-type");
 | |
|     outbox << boundary.substr (0, boundary.length () -2) << endl
 | |
| 	   << Attachment::contentTypeToken << ": " << it->second << endl;
 | |
|   }
 | |
| 
 | |
|   for (Attachment *attachP : allMarkedPtrs) {
 | |
|     copy (mbox, outbox, curPos, attachP->beginInParent);
 | |
|     LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
 | |
| 
 | |
|     if (attachP->isSigned) {
 | |
|       LOG ("don't change signed content");
 | |
|       copy (mbox, outbox, attachP->beginInParent, attachP->endPos);
 | |
|       
 | |
|     } else if (attachP->toExtract || attachP->isKazAttachment) {
 | |
|       LOG ("skip Extracted or previous attachments");
 | |
| 
 | |
|     } else if (attachP->toUpdate) {
 | |
|       string textProp = attachP->getProp (contentTypeToken, textRegEx);
 | |
|       bool isPlain = textProp == PLAIN;
 | |
|       bool isHtml = textProp == HTML;
 | |
|       bool isDisclaimer = attachP->toDisclaim;
 | |
| 
 | |
|       LOG_BUG (isPlain && isHtml, /**/, "eMailShrinker: bug M6: plain and html: " << attachP->getContentType ());
 | |
|       LOG_BUG (! (isPlain || isHtml), /**/, "eMailShrinker: bug M7: not plain or html: " << attachP->getContentType ());
 | |
|       LOG ("toUpdate:  isPlain: " << isPlain << " isHtml: " << isHtml << " isDisclaimer: " << isDisclaimer);
 | |
|       if (attachP != this)
 | |
| 	copy (mbox, outbox, attachP->beginInParent, attachP->contentPos);
 | |
| 
 | |
|       string content = attachP->getContent (mbox);
 | |
|       if (isHtml) {
 | |
| 	string::size_type headStart (caseInsensitiveFind (content, HEAD));
 | |
| 	LOG ("HEAD start: " << headStart);
 | |
| 	if (headStart != string::npos) {
 | |
| 	  headStart += HEAD.length ();
 | |
| 	  string::size_type headStop (caseInsensitiveFind (content, HEAD_END, headStart));
 | |
| 	  if (headStop != string::npos) {
 | |
| 	    // to reduce the scoop of search
 | |
| 	    string oldHead (content.substr (headStart, headStop-headStart));
 | |
| 	    LOG ("HEAD start: " << headStart << " stop: " << headStop << " old: " << oldHead);
 | |
| 	    string::size_type oldCssPos (oldHead.find (KAZ_CSS_URL));
 | |
| 	    if (oldCssPos != string::npos) {
 | |
| 	      string::size_type oldStart (oldHead.rfind ('<', oldCssPos));
 | |
| 	      string::size_type oldStop (oldHead.find ('>', oldCssPos));
 | |
| 	      if (oldStart != string::npos && oldStop != string::npos) {
 | |
| 		++oldStop;
 | |
| 		if (oldStop < oldHead.length () && oldHead [oldStop] == '\n')
 | |
| 		  ++oldStop;
 | |
| 		content.erase (headStart+oldStart, oldStop-oldStart);
 | |
| 	      }
 | |
| 	    }
 | |
| 	    content.insert (headStart, "\n"+KAZ_CSS);
 | |
| 	  }
 | |
| 	  // else XXX pas de /head (if faut en ajouter un (avec <html> ?))
 | |
| 	}
 | |
| 	removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
 | |
| 	removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
 | |
| 	if (caseInsensitiveFind (content, CID) != string::npos)
 | |
| 	  replaceAll (content, translateHtml);
 | |
| 	attachP->replaceEmbedded (content);
 | |
|       }
 | |
|       if (isPlain)
 | |
| 	removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
 | |
|       if (isDisclaimer && (attachMode & FOOTER)) {
 | |
| 	if (isHtml) {
 | |
| 	  for (string endTag : {BODY_END, HTML_END}) {
 | |
| 	    LOG ("try tag: " << endTag);
 | |
| 	    string::size_type endTagStart = caseInsensitiveRFind (content, endTag);
 | |
| 	    if (endTagStart != string::npos) {
 | |
| 	      content = content.substr (0, endTagStart);
 | |
| 	      LOG ("remove tag: " << endTag << " content: " << content);
 | |
| 	    }
 | |
| 	  }
 | |
| 	  content += htmlDisclaim+BODY_END+HTML_END;
 | |
| 	  LOG ("content: " << content);
 | |
| 	}
 | |
| 	if (isPlain)
 | |
| 	  content += plainDisclaim;
 | |
|       }
 | |
|       attachP->println (outbox, content);
 | |
|     } else {
 | |
|       LOG_BUG (true, continue, "eMailShrinker: upload has failed, so can't change" << *attachP);
 | |
|     }
 | |
|     outbox.flush ();
 | |
|     curPos = attachP->endPos;
 | |
|   }
 | |
| 
 | |
|   if (plainDisclaim.size () && (attachMode & ATTACHMENT)) {
 | |
|     LOG ("Add kaz attachment");
 | |
|     cerr << "eMailShrinker: force attachment" << endl;
 | |
|     if (subAttachements.size ()) {
 | |
|       streamoff lastPos = subAttachements.back ().endPos;
 | |
|       copy (mbox, outbox, curPos, lastPos);
 | |
|       curPos = lastPos;
 | |
|     }
 | |
|     string content (KAZ_HTML_CONTENT+htmlDisclaim+BODY_END+HTML_END);
 | |
|     base64Encode (content);
 | |
| 
 | |
|     outbox << boundary.substr (0, boundary.length () -2) << endl
 | |
| 	   << KAZ_ATTACHMENT_TEXT_HTML << endl
 | |
| 	   << content << endl;
 | |
|     outbox.flush ();
 | |
|   }
 | |
|   copy (mbox, outbox, curPos, endPos);
 | |
|   if (MULTI == headerType)
 | |
|     outbox << boundary.substr (0, boundary.length ()) << endl;
 | |
|   outbox.close ();
 | |
| }
 | |
| 
 | |
| // ================================================================================
 |