depollueur/src/cpp/MainAttachment.cpp


								////////////////////////////////////////////////////////////////////////////

								// Copyright KAZ 2021							  //

								// 									  //

								// contact (at) kaz.bzh							  //

								// 									  //

								// This software is a filter to shrink email by attachment extraction.	  //

								// 									  //

								// This software is governed by the CeCILL-B license under French law and //

								// abiding by  the rules of distribution  of free software. You  can use, //

								// modify  and/or  redistribute  the  software under  the  terms  of  the //

								// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //

								// URL "http://www.cecill.info".					  //

								// 									  //

								// As a counterpart to the access to  the source code and rights to copy, //

								// modify and  redistribute granted  by the  license, users  are provided //

								// only with a limited warranty and  the software's author, the holder of //

								// the economic  rights, and the  successive licensors have  only limited //

								// liability.								  //

								// 									  //

								// In this respect, the user's attention is drawn to the risks associated //

								// with loading,  using, modifying  and/or developing or  reproducing the //

								// software by the user in light of its specific status of free software, //

								// that may  mean that  it is  complicated to  manipulate, and  that also //

								// therefore means  that it  is reserved  for developers  and experienced //

								// professionals having in-depth computer  knowledge. Users are therefore //

								// encouraged  to load  and test  the software's  suitability as  regards //

								// their  requirements  in  conditions  enabling the  security  of  their //

								// systems and/or  data to  be ensured  and, more  generally, to  use and //

								// operate it in the same conditions as regards security.		  //

								// 									  //

								// The fact that  you are presently reading this means  that you have had //

								// knowledge of the CeCILL-B license and that you accept its terms.	  //

								////////////////////////////////////////////////////////////////////////////


								#include <algorithm>

								#include <fstream>

								#include <iomanip>

								#include <iostream>

								#include <math.h>

								#include <regex>

								#include <set>

								#include <unistd.h>

								#include <vector>


								#include "kazDebug.hpp"

								#include "kazMisc.hpp"

								#include "SizeArg.hpp"

								#include "Attachment.hpp"

								#include "MainAttachment.hpp"


								using namespace std;

								using namespace kaz;


								static const string KAZ_WEB_SITE		("https://kaz.bzh/");

								static const string TMPL_DOWNLOAD		("{{DOWNLOAD}}");

								static const string TMPL_FILENAME		("{{FILENAME}}");

								static const string CID				("cid:");


								// "l=/" => v1 compatibility

								static const regex archiveURLSignature		(".*(([&?]g=)|([&?]l=/)).*");


								static const string KAZ_PLAIN_HR		("______________________________________________________________________________");

								static const string KAZ_PLAIN_START		("~~ PJ-KAZ !"); // don't end whith space

								static const string KAZ_PLAIN_STOP		(KAZ_PLAIN_START+" ~~");

								static const string KAZ_PLAIN_DONT_TOUCH	("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)");

								static const string KAZ_PLAIN_WARNING	        ("Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. "

														 "Elles seront automatiquement supprimées dans 1 mois. "

														 "Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. "

														 "Pour mieux comprendre la politique de nos services visitez kaz.bzh");

								static const string KAZ_PLAIN_DOWLOAD_ONE	("Vos pièces jointes sont à télécharger individuellement ici :");

								static const string KAZ_PLAIN_DOWLOAD_OTHER	("(Contenu dans des messages précédents)");

								static const string KAZ_PLAIN_DOWLOAD_ALL	("Vous pouvez télécharger l'ensemble dans une archive là :");


								static const string HEAD			("<head>");

								static const string HEAD_END			("</head>");

								static const string KAZ_CSS_URL			("https://kaz.bzh/m/email.css");

								static const string KAZ_CSS			("<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>");

								static const string A_END			("</a>");

								static const string LI_BEGIN			("<li");

								static const string CLASS_ONE			("class=\"one\"");

								static const string LI_ONE			(LI_BEGIN+" "+CLASS_ONE+">");

								static const string LI_ALL			(LI_BEGIN+" class=\"all\">");

								static const string LI_END			("</li>");

								static const string HREF_ONE			("href=\"");

								static const string BODY_END			("</body>");

								static const string HTML_END			("</html>");


								static const string KAZ_HTML_TAG		("<!--KAZ"); // don't end whith space

								static const string KAZ_HTML_START		(KAZ_HTML_TAG+" START-->");

								static const string KAZ_HTML_STOP		(KAZ_HTML_TAG+" STOP-->");

								// Textes précédents encodés en SGML

								static const string KAZ_HTML_DONT_TOUCH		("(conservez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)");

								static const string KAZ_HTML_DOWLOAD_ONE	("Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :");

								static const string KAZ_HTML_DOWLOAD_OTHER      ("(Contenu dans des messages pr&eacute;c&eacute;dents)");

								static const string KAZ_HTML_DOWLOAD_ALL	("Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :");

								static const string KAZ_HTML_ARCHIVE		("archive");


								static const string KAZ_EMPTY_TEXT_PLAIN	("Content-Type: text/plain; charset=utf-8\n"

														 "Content-Transfer-Encoding: base64\n");


								// ================================================================================

								vector <string>

								Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});


								// ================================================================================

								const string MainAttachment::templatePlainAddLink	("  * "+TMPL_FILENAME+" <"+TMPL_DOWNLOAD+">\r\n");

								const string MainAttachment::templatePlainAllLink	("\r\n  * "+KAZ_PLAIN_DOWLOAD_ALL+" <"+TMPL_DOWNLOAD+">\r\n");


								const string MainAttachment::templateHtmlHeader		(KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"

															 "<div class=\"kaz\">"

															 "<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"

															 "<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");

								const string MainAttachment::templateHtmlAddLink	(LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");

								const string MainAttachment::templateHtmlOtherLink	("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");

								const string MainAttachment::templateHtmlAllLink	("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");

								const string MainAttachment::templateHtmlFooter		("</ul></p>\n"

															 "<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"

															 "<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n");


								const regex MainAttachment::whiteSpaceRegEx		("\\s+");


								// ================================================================================

								void

								MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {

								  DEF_LOG ("MainAttachment::copy", "begin: " << begin << " end: " << end);

								  mbox.seekg (begin, ios::beg);

								  char c;

								  for (streamoff pos (begin); pos < end; ++pos) {

								    mbox.get (c);

								    outbox.put (c);

								  }

								  outbox.flush ();

								}


								// ================================================================================

								void

								MainAttachment::readArchiveUrl () {

								  DEF_LOG ("MainAttachment::readArchiveUrl", "");

								  archiveDownloadURL.clear ();

								  string line;

								  getline (cin, line);

								  LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug ZZ: no archive link. (line: " << line << ")");

								  LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug ZZ: bad archive link. (line: " << line << ")");

								  if (line.rfind ("arch: none", 0) == 0)

								    return;

								  archiveDownloadURL = line.substr (6);

								}


								// ================================================================================

								void

								MainAttachment::readDownloadUrl (string &url) {

								  DEF_LOG ("MainAttachment::readDownloadUrl", "");

								  url = "";

								  string line;

								  getline (cin, line);

								  LOG ("get URL: " << line);

								  LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug ZZ: no download link. (line: " << line << ")");

								  url = line.substr (5);

								}


								// ================================================================================

								void

								MainAttachment::setExtractDir (const bfs::path &extractDir) {

								  if (extractDir.empty ())

								    throw invalid_argument ("no tmp dir");

								  this->extractDir = extractDir;

								  if (! is_directory (extractDir))

								    bfs::create_directory (extractDir);

								}


								// ================================================================================

								void

								MainAttachment::addLink (string &plain, string &html, const string &url, const string &name) const {

								  string plainNewOneLink (templatePlainAddLink);

								  replaceAll (plainNewOneLink, TMPL_DOWNLOAD, url);

								  replaceAll (plainNewOneLink, TMPL_FILENAME, "\""+name+"\"");

								  plain += plainNewOneLink;

								  string htmlNewOneLink (templateHtmlAddLink);

								  string codedUrl (url);

								  // XXX amp ?

								  //replaceAll (codedUrl, "&", "&amp;");

								  replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);

								  replaceAll (htmlNewOneLink, TMPL_FILENAME, name);

								  html += htmlNewOneLink;

								}


								// ================================================================================

								void

								MainAttachment::getDisclaim (string &plain, string &html) const {

								  DEF_LOG ("Attachment::getDisclaim", "");

								  plain = html = "";


								  int linkCount (0);

								  string plainNewLinks, htmlNewLinks;

								  for (Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->toExtract)

								      continue;

								    addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ());

								    ++linkCount;

								    // if (previousLinks [attachP->downloadUrl] != previousLinks.end ())

								    //   // impossible puisque le lien est toujours nouveau

								    //   previousLinks.erase (attachP->downloadUrl);

								  }

								  for (Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->embeddedData.size ())

								      continue;

								    for (EmbeddedData &embedded : attachP->embeddedData) {

								      addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name);

								      ++linkCount;

								    }

								  }


								  string plainOldLinks, htmlOldLinks;

								  for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)

								    addLink (plainOldLinks, htmlOldLinks, it->first, it->second);

								  linkCount += previousLinks.size ();

								  if (! linkCount) {

								    LOG ("no attach");

								    return;

								  }


								  plain = "\r\n"+KAZ_PLAIN_START+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_DONT_TOUCH+"\r\n\r\n"+KAZ_PLAIN_WARNING+"\r\n\r\n"+KAZ_PLAIN_DOWLOAD_ONE+"\r\n"+plainNewLinks;

								  html = templateHtmlHeader+htmlNewLinks;

								  if (previousLinks.size ()) {

								    plain += "\r\n"+KAZ_PLAIN_DOWLOAD_OTHER+"\r\n"+plainOldLinks;

								    html += templateHtmlOtherLink+htmlOldLinks;

								  }

								  if (linkCount > 1 && archiveDownloadURL.length ()) {

								    string allPlainLinks (templatePlainAllLink);

								    replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL);

								    plain += allPlainLinks;

								    string allHtmlLinks (templateHtmlAllLink);

								    replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);

								    html += allHtmlLinks;

								  }

								  html += templateHtmlFooter+"\r\n";

								  plain += "\r\n\r\n"+KAZ_WEB_SITE+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_STOP+"\r\n";

								  // & => &amp; done

								  LOG ("plain: " << plain);

								  LOG ("html: " << html);

								}


								// ================================================================================

								void

								MainAttachment::addPrevious (const string &href, const string &name, const bool &trust) {

								  DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name);

								  const string oldVal = previousLinks [href];

								  if (name.empty ())

								    return;

								  if (oldVal.length () && name.length () && !trust)

								    return;

								  previousLinks.erase (href);

								  previousLinks [href] = regex_replace (name, regex (R"([\t\r\n\"]+|(\\\")|(>\s*))"), "");

								  LOG ("inserted: " << href << ": " << previousLinks[href]);

								}


								void

								MainAttachment::extractLinks (const string &extractedPlainKAZ) {

								  // plain text => "* name <url>"

								  DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ);

								  for (string::size_type startPos (0);

								       (startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos;

								       ) {

								    streamoff stopPos = startPos;

								    while (extractedPlainKAZ [stopPos] && availableURLChars.find (extractedPlainKAZ [stopPos]) != string::npos)

								      ++stopPos;

								    const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos));

								    LOG ("plain href: " << href);


								    if (extractedPlainKAZ [stopPos])

								      ++stopPos;

								    streamoff stopName = startPos;

								    startPos = stopPos;


								    // get all href but KAZ_WEB_SITE

								    // the archive link will be skip by filter.sh

								    if (href == KAZ_WEB_SITE)

								      continue;


								    // backward find name

								    string::size_type startName = extractedPlainKAZ.rfind ("* ", stopName);

								    string name;


								    if (startName != string::npos) {

								      name = string (extractedPlainKAZ, startName+3, stopName - startName - 3);

								      // skip [> \r\n\t]

								      string::size_type nextPos = name.find_first_not_of ("[>< \t\r\n\"]");

								      if (nextPos != string::npos)

									name.erase (0, nextPos);

								      // stop before [>\r\n\t]

								      nextPos = name.find_first_of ("[\"<]");

								      if (nextPos != string::npos)

									name.erase (nextPos);

								    }


								    LOG ("plain name: " << name);

								    addPrevious (href, name);

								  }

								}


								// ================================================================================

								void

								MainAttachment::extractLinks (const vector<string> &liOne) {

								  // html text => "<li ...><a href="url">name</a>"

								  DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ());

								  for (const string &one : liOne) {

								    if (caseInsensitiveFind (one, CLASS_ONE) == string::npos)

								      continue;

								    string::size_type startPos = caseInsensitiveFind (one, HREF_ONE);


								    LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug M1: no href KAZ link. (one: " << one << ")");

								    startPos += HREF_ONE.length ();

								    LOG ("startPos: " << startPos);

								    string::size_type stopPos = one.find ("\"", startPos);


								    LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M2: no ending href KAZ link. (one: " << one << ")");

								    LOG ("stopPos: " << stopPos);

								    string href (one.substr (startPos, stopPos-startPos));

								    LOG ("html href: " << href);

								    stopPos = one.find (">", startPos);


								    LOG_BUG (one [stopPos] != '>', break, "eMailShrinker: bug M3: no ending href KAZ link. (one: " << one << ")");

								    ++stopPos;

								    startPos = stopPos;

								    LOG ("startPos: " << startPos);

								    stopPos = caseInsensitiveFind (one, A_END, startPos);

								    LOG ("stopPos: " << stopPos);


								    LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M4: no ending anchor KAZ link. (one: " << one << ")");

								    string name (one.substr (startPos, stopPos-startPos));

								    LOG ("html name: " << name);

								    addPrevious (href, name);

								  }

								}


								void

								MainAttachment::extractPreviousKAZ (ifstream &mbox) {

								  DEF_LOG ("MainAttachment::extractPreviousKAZ", "");

								  string extractedPlainKAZ, extractedHtmlKAZ;

								  for (const Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->toUpdate || isBase64Encoding ())

								      continue;

								    string textProp = attachP->getProp (contentTypeToken, textRegEx);

								    if (textProp.empty ())

								      continue;

								    string content (attachP->getContent (mbox));

								    if (textProp == PLAIN) {

								      LOG (PLAIN);

								      extractedPlainKAZ += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);

								    }

								    if (textProp == HTML) {

								      LOG (HTML);

								      string section = attachP->getSection (content, KAZ_HTML_START, KAZ_HTML_STOP);

								      section += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);

								      // update href from HTML attachments

								      replaceAll (section, "&amp;", "&");

								      extractedHtmlKAZ += section;

								    }

								  }

								  LOG ("extractedPlainKAZ: "<< extractedPlainKAZ);

								  extractLinks (extractedPlainKAZ);


								  LOG ("extractedHtmlKAZ: "<< extractedHtmlKAZ);

								  vector<string> liOne;

								  getSection (extractedHtmlKAZ, LI_BEGIN, LI_END, liOne);

								  extractLinks (liOne);


								#ifndef DISABLE_LOG

								  for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)

								    LOG  ("oldLink link: " << it->first << " name: " << it->second);

								#endif

								}


								void

								MainAttachment::removePreviousArchive () {

								  vector<string> toRemove;

								  for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) {

								    const string key (it->first);

								    if (regex_match (key, archiveURLSignature))

								      toRemove.push_back (key);

								  }

								  for (string old : toRemove)

								    previousLinks.erase (old);

								}


								// ================================================================================

								MainAttachment::MainAttachment (ifstream &mbox)

								  : Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),

								    forceMainText (false) {

								  DEF_LOG ("MainAttachment::MainAttachment", "");

								  string line;

								  for (; getline (mbox, line); )

								    tmpPos += line.length () + 1;

								  endPos = tmpPos;

								}


								// ================================================================================

								void

								MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) {

								  DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);

								  bool plainMarked (false), htmlMarked (false);

								  markDisclaim (plainMarked, htmlMarked);

								  forceMainText = ! (plainMarked || htmlMarked);

								  Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);

								}


								// ================================================================================

								void

								MainAttachment::getUpdatedURL (ifstream &mbox) {

								  DEF_LOG ("MainAttachment::getUpdatedURL", "");

								  extractPreviousKAZ (mbox);

								  for (map <string, string>::iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)

								    cout << it->first << endl;

								}


								void

								MainAttachment::newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const {

								  DEF_LOG ("MainAttachment::newPjEntry", "attachCount: " << attachCount << " contentType: " << contentType << " name: " << name);

								  ostringstream dirNameStream;

								  dirNameStream << "PJ-" << std::setfill ('0') << std::setw (3) << int (attachCount);

								  dirName = dirNameStream.str ();

								  bfs::path dirPath (extractDir / dirName);


								  bfs::create_directory (dirPath);

								  bfs::path metaPath (dirPath / "meta");


								  ofstream metaOut (metaPath.c_str ());

								  metaOut

								    << "Content-Type: " << contentType << endl

								    << "Name: " << name << endl;

								  metaOut.flush ();

								  metaOut.close ();


								  bfs::path filePath (dirPath / "media");

								  mediaName = filePath.c_str ();

								  dirName = dirPath.c_str ();

								  LOG ("dirName: " << dirName << " mediaName: " << mediaName);

								}


								// ================================================================================

								void

								MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {

								  DEF_LOG ("MainAttachment::extract", "minSize: " << minSize);

								  int attachCount (0);

								  string dirName, mediaName;

								  for (Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->toExtract)

								      continue;

								    newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);

								    ++attachCount;

								    ofstream out (mediaName);


								    streamoff

								      start (attachP->Attachment::contentPos),

								      end (attachP->Attachment::endPos+1); // pour assurer le cas sans ^M

								    mbox.seekg (start, ios::beg);

								    if (attachP->isBase64Encoding ()) {

								      unsigned char buff[4];

								      int idx = 0;

								      char c;

								      for (streamoff curPos (start); mbox.get (c) && curPos < end; ++curPos) {

								    	if (c == '=')

								    	  break;

								    	if (!isBase64 (c))

								    	  continue;

								    	buff [idx] = getBase64Val (c);

								    	if (++idx != 4)

								    	  continue;

								    	out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);

								    	out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);

								    	out.put (buff [2] << 6 | buff [3]);

								    	idx = 0;

								      }

								      if (idx) {

								    	for (int j = idx; j < 4; ++j)

								    	  buff [j] = 0;

								    	out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);

								    	--idx;

								    	if (idx)

								    	  out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);

								      }

								    } else {

								      string line;

								      for (streamoff curPos (start); getline (mbox, line); ) {

									curPos += line.length () + 1;

									if (curPos >= end) {

									  out << line.substr (0, end + line.length () - curPos) << endl;

									  break;

									}

									out << line << endl;

								      }

								    }

								    out.flush ();

								    out.close ();

								    cout << dirName << endl;

								  }

								  for (Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->embeddedData.size ())

								      continue;

								    string content = attachP->getContent (mbox);

								    vector<string> imgs;

								    getSection (content, IMG_BEGIN, IMG_END, imgs);

								    for (const EmbeddedData &embedded : attachP->embeddedData) {

								      string &img (imgs[embedded.imgIdx]);

								      img.erase (0, embedded.startData);

								      img.erase (embedded.dataLength);

								      base64Decode (img);

								      newPjEntry (attachCount, embedded.contentType, embedded.name, dirName, mediaName);

								      ++attachCount;


								      ofstream out (mediaName);

								      out.write (img.c_str (), img.size ());

								      out.flush ();

								      out.close ();

								      cout << dirName << endl;

								    }

								  }

								}


								// ================================================================================

								void

								MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) {

								  DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize);


								  // preparation

								  extractPreviousKAZ (mbox);

								  removePreviousArchive ();

								  map<const string, const string> translateHtml;

								  for (Attachment *attachP : allMarkedPtrs)

								    if (attachP->toExtract) {

								      readDownloadUrl (attachP->downloadUrl);

								      if (attachP->downloadUrl.empty ()) {

									LOG ("no change");

									attachP->toExtract = false;

									continue;

								      }

								      if (attachP->cid.length ()) {

									string tmp (attachP->downloadUrl);

									//replaceAll (tmp, "&", "&amp;");

								      	translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));

								      }

								    }

								  for (Attachment *attachP : allMarkedPtrs) {

								    if (!attachP->embeddedData.size ())

								      continue;

								    for (EmbeddedData &embedded : attachP->embeddedData)

								      readDownloadUrl (embedded.downloadUrl);

								    // XXX test empty ?

								  }

								  readArchiveUrl ();

								  removePreviousArchive ();

								  string plainDisclaim, htmlDisclaim;

								  getDisclaim (plainDisclaim, htmlDisclaim);

								  // copy email

								  streamoff curPos = 0;

								  if (forceMainText) {

								    cerr << endl << endl << "          #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;

								    // check no main text

								    LOG ("Force main text");

								    LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);

								    copy (mbox, outbox, curPos, contentPos);

								    curPos = contentPos;

								    cerr << "          #################### coucou " << curPos << endl << endl;

								    string content (plainDisclaim);

								    base64Encode (content);

								    outbox << boundary.substr (0, boundary.length () -2) << endl

									   << KAZ_EMPTY_TEXT_PLAIN << endl

									   << content << endl;

								    outbox.flush ();

								  }

								  for (Attachment *attachP : allMarkedPtrs) {

								    copy (mbox, outbox, curPos, attachP->beginInParent);

								    LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);


								    if (attachP->toExtract) {

								      LOG ("skip Extracted");


								    } else if (attachP->toUpdate) {

								      string textProp = attachP->getProp (contentTypeToken, textRegEx);

								      bool isPlain = textProp == PLAIN;

								      bool isHtml = textProp == HTML;

								      bool isDisclaimer = attachP->toDisclaim;


								      LOG_BUG (isPlain && isHtml, /**/, "eMailShrinker: bug M6: plain and html: " << attachP->getContentType ());

								      LOG_BUG (! (isPlain || isHtml), /**/, "eMailShrinker: bug M7: not plain or html: " << attachP->getContentType ());

								      LOG ("toUpdate:  isPlain: " << isPlain << " isHtml: " << isHtml << " isDisclaimer: " << isDisclaimer);

								      copy (mbox, outbox, attachP->beginInParent, attachP->contentPos);


								      string content = attachP->getContent (mbox);

								      if (isHtml) {

									string::size_type headStart (caseInsensitiveFind (content, HEAD));

									LOG ("HEAD start: " << headStart);

									if (headStart != string::npos) {

									  headStart += HEAD.length ();

									  string::size_type headStop (caseInsensitiveFind (content, HEAD_END, headStart));

									  if (headStop != string::npos) {

									    // to reduce the scoop of search

									    string oldHead (content.substr (headStart, headStop-headStart));

									    LOG ("HEAD start: " << headStart << " stop: " << headStop << " old: " << oldHead);

									    string::size_type oldCssPos (oldHead.find (KAZ_CSS_URL));

									    if (oldCssPos != string::npos) {

									      string::size_type oldStart (oldHead.rfind ('<', oldCssPos));

									      string::size_type oldStop (oldHead.find ('>', oldCssPos));

									      if (oldStart != string::npos && oldStop != string::npos) {

										++oldStop;

										if (oldStop < oldHead.length () && oldHead [oldStop] == '\n')

										  ++oldStop;

										content.erase (headStart+oldStart, oldStop-oldStart);

									      }

									    }

									    content.insert (headStart, "\n"+KAZ_CSS);

									  }

									  // else XXX pas de /head (if faut en ajouter un (avec <html> ?))

									}

									removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP);

									removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);

									// XXX case insensitive ??

									if (content.find (CID) != string::npos)

									  replaceAll (content, translateHtml);

									attachP->replaceEmbedded (content);

								      }

								      if (isPlain)

									removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);

								      if (isDisclaimer) {

									if (isHtml) {

									  for (string endTag : {BODY_END, HTML_END}) {

									    LOG ("try tag: " << endTag);

									    string::size_type endTagStart = caseInsensitiveRFind (content, endTag);

									    if (endTagStart != string::npos) {

									      content = content.substr (0, endTagStart);

									      LOG ("remove tag: " << endTag << " content: " << content);

									    }

									  }

									  content += htmlDisclaim+BODY_END+HTML_END;

									  LOG ("content: " << content);

									}

									if (isPlain)

									  content += plainDisclaim;

								      }

								      attachP->println (outbox, content);

								    } else {

								      LOG_BUG (true, continue, "eMailShrinker: bug M8: can't change" << *attachP);

								    }

								    outbox.flush ();

								    curPos = attachP->endPos;

								  }

								  copy (mbox, outbox, curPos, endPos);

								  outbox.close ();

								}


								// ================================================================================