This commit is contained in:
François 2021-05-06 09:58:16 +02:00
parent 3f640e6e13
commit 64f3e49c37
15 changed files with 3075 additions and 0 deletions

33
LICENCE Normal file
View File

@ -0,0 +1,33 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////

504
src/cpp/Attachment.cpp Normal file
View File

@ -0,0 +1,504 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <vector>
#include <set>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <algorithm>
#include <unistd.h>
#include <boost/algorithm/string.hpp>
#include "kazDebug.hpp"
#include "kazMisc.hpp"
#include "SizeArg.hpp"
#include "Attachment.hpp"
using namespace std;
using namespace kaz;
// ================================================================================
const string Attachment::contentTypeToken ("content-type");
const string Attachment::contentDispositionToken ("content-disposition");
const string Attachment::contentTransferEncodingToken ("content-transfer-encoding");
const string Attachment::base64Token ("base64");
const string Attachment::quotedPrintableToken ("quoted-printable");
const string Attachment::contentIDToken ("content-id");
const string Attachment::PLAIN ("plain");
const string Attachment::HTML ("html");
const string Attachment::RELATED ("related");
const string Attachment::ALTERNATIVE ("alternative");
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
const regex Attachment::nameRegEx (".*name=\"([^\"]*)\".*");
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\" ]*)\"?.*");
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*");
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*");
const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*");
const string Attachment::IMG_BEGIN ("<IMG");
const string Attachment::IMG_END (">");
static const string SRC_BEGIN ("SRC=\"");
static const string RFC822 ("message/rfc822");
// ================================================================================
string
Attachment::getUnknown (const string &contentType) {
DEF_LOG ("Attachment::getUnknown", "contentType: " << contentType);
static time_t now (time (NULL));
static int count (0);
tm *ltm = localtime (&now);
ostringstream nameStream;
nameStream << "U-"
<< std::setfill ('0') << std::setw (2) << (ltm->tm_year-100)
<< std::setfill ('0') << std::setw (2) << (1 + ltm->tm_mon)
<< std::setfill ('0') << std::setw (2) << ltm->tm_mday
<< std::setfill ('0') << std::setw (2) << ltm->tm_hour
<< std::setfill ('0') << std::setw (2) << ltm->tm_min
<< std::setfill ('0') << std::setw (2) << ltm->tm_sec
<< "-" << count;
const string::size_type subTypePos (contentType.find ("/"));
if (subTypePos != string::npos)
nameStream << "." << contentType.substr (subTypePos+1);
++count;
LOG ("name: " << nameStream.str ());
return nameStream.str ();
}
// ================================================================================
void
Attachment::removeSection (string &content, const string &beginTag, const string &endTag) {
DEF_LOG ("Attachment::removeSection", "beginTag: " << beginTag << " endTag: " << endTag);
for (string::size_type startPos (0);
(startPos = caseInsensitiveFind (content, beginTag, startPos)) != string::npos;
) {
string::size_type stopPos = caseInsensitiveFind (content, endTag, startPos);
LOG_BUG (stopPos == startPos, content.erase (startPos, endTag.length ()); continue, "eMailShrinker: bug A1: removeSection: no " << beginTag);
LOG_BUG (stopPos == string::npos, content.erase (startPos, beginTag.length ()); break, "eMailShrinker: bug A2: removeSection: no " << endTag);
LOG ("KAZ start: " << startPos << " stop: " << stopPos);
content.erase (startPos, stopPos+endTag.length ()-startPos);
}
}
// ================================================================================
string
Attachment::getSection (const string &content, const string &beginTag, const string &endTag) {
DEF_LOG ("Attachment::getSection", "beginTag: " << beginTag << " endTag: " << endTag << " content: " << content);
vector<string> list;
getSection (content, beginTag, endTag, list);
size_t sum (0);
for (const string &s : list)
sum += s.length ();
string result;
result.reserve (sum);
for (const string &s : list)
result += s;
LOG ("result: " << result);
return result;
}
// ================================================================================
void
Attachment::getSection (const string &content, const string &beginTag, const string &endTag, vector<string> &result) {
DEF_LOG ("Attachment::getSection", "beginTag: " << beginTag << " endTag: " << endTag << " content: " << content);
for (string::size_type startPos (0);
(startPos = caseInsensitiveFind (content, beginTag, startPos)) != string::npos;
) {
LOG (beginTag << ": " << startPos);
string::size_type stopPos = caseInsensitiveFind (content, endTag, startPos);
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug A3: " << endTag << " not found! at: " << startPos);
LOG ("start: " << startPos << " stop: " << stopPos);
LOG_BUG (startPos == stopPos, /**/, "eMailShrinker: bug A4: " << endTag << " without " << beginTag << " at: " << startPos);
if (startPos != stopPos) {
startPos += beginTag.length ();
result.push_back (content.substr (startPos, stopPos-startPos));
}
startPos = stopPos+endTag.length ();
}
}
// ================================================================================
const string
Attachment::getContentType () const {
map<string, string>::const_iterator it (env.find (contentTypeToken));
if (it == env.end ())
return "";
const string &contentTypeVal (it->second);
const string::size_type semicolonPos = contentTypeVal.find (';');
if (semicolonPos == string::npos)
return contentTypeVal;
return contentTypeVal.substr (0, semicolonPos);
}
const string
Attachment::getAttachName () const {
DEF_LOG ("Attachment::getAttachName", "");
string result = getProp (contentTypeToken, nameRegEx);
if (result.length ()) {
LOG ("name=: " << result);
encodedWord (result);
return result;
}
result = getProp (contentTypeToken, nameCharsetRegEx);
if (result.length ()) {
LOG ("name*=: " << result);
charsetValue (result);
return result;
}
// XXX il faut composer s'il y a plusieurs ligne filename*x=
result = getProp (contentDispositionToken, nameRegEx);
if (result.length ()) {
LOG ("filename=: " << result);
encodedWord (result);
return result;
}
// XXX il faut composer s'il y a plusieurs ligne filename*x*=
result = getProp (contentDispositionToken, nameRegEx);
if (result.length ()) {
LOG ("filename*=: " << result);
charsetValue (result);
return result;
}
return getUnknown (getContentType ());
}
const string &
Attachment::getBoundary () const {
return boundary;
}
const streamoff
Attachment::getSize () const {
return endPos-beginPos;
}
const string
Attachment::getProp (const string &token, const regex &regEx) const {
DEF_LOG ("Attachment::getProp", "token: " << token);
map<string, string>::const_iterator it (env.find (token));
if (it == env.end ()) {
LOG ("no token");
return "";
}
const string &val (it->second);
LOG ("val: " << val);
if (!regex_match (val.begin (), val.end (), regEx)) {
LOG ("no prop");
return "";
}
return regex_replace (val, regEx, "$1");
}
const bool
Attachment::isBase64Encoding () const {
return isDefProp (contentTransferEncodingToken, base64Token);
}
const bool
Attachment::isQuotedPrintableEnconding () const {
return isDefProp (contentTransferEncodingToken, quotedPrintableToken);
}
const bool
Attachment::isTextBase64 () const {
return !getProp (contentTypeToken, textRegEx).empty () && isBase64Encoding ();
}
const bool
Attachment::isDefProp (const string &token, const string &val) const {
DEF_LOG ("Attachment::getProp", "getProp token: " << token << " val: " << val);
map<string, string>::const_iterator it (env.find (token));
if (it == env.end ())
return false;
// XXX case insensitive ??
return it->second.find (val) != string::npos;
}
// ================================================================================
Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginInParent, streamoff &curPos)
: level (level),
beginInParent (beginInParent),
beginPos (curPos),
contentPos (0),
endPos (0),
toExtract (false),
toUpdate (false),
toDisclaim (false),
boundaryMiddleSize (0) {
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level);
readMime (mbox, curPos);
readBoundaries (mbox, curPos);
}
// ================================================================================
void
Attachment::readMime (ifstream &mbox, streamoff &curPos) {
DEF_LOG ("Attachment::readMime", "curPos: " << curPos);
string lastVar;
string line;
for (; getline (mbox, line); ) {
LOG ("pos: " << curPos << " line: " << line);
curPos += line.length () + 1;
if (line.empty ())
break;
if (line[0] == ' ' || line[0] == '\t') {
if (lastVar.empty ()) {
LOG_BUG (true, /**/, "eMailShrinker: bug A5: not compliant MIME. pos: " << (curPos - (line.length () + 1)) << " line: " << line);
} else {
LOG ("add line to var: " << line);
env.find (lastVar)->second += line;
LOG ("new val: " << env.find (lastVar)->second);
}
continue;
}
string::size_type colonPos = line.find (':');
if (colonPos != string::npos) {
lastVar = line.substr (0, colonPos);
toLower (lastVar);
LOG ("find var: " << lastVar);
string val (line.length () >= colonPos+2 ? line.substr (colonPos+2) : ""); // XXX check RFC " " after ": "
LOG ("new var: " << lastVar << " <=> " << val);
env [lastVar] = val;
}
}
LOG ("end of mime");
contentPos = curPos;
cid = getProp (contentIDToken, cidDefRegEx);
boundary = getProp (contentTypeToken, boundaryRegEx);
LOG ("boundary: " << boundary);
if (boundary.length ()) {
boundary = "--"+boundary+"--";
boundaryMiddleSize = boundary.length () - 2;
}
LOG ("readMime contentPos: " << contentPos << " cid: " << cid << " boundary: " << boundary);
}
// ================================================================================
void
Attachment::readBoundaries (ifstream &mbox, streamoff &curPos) {
DEF_LOG ("Attachment::readBoundaries", "curPos: " << curPos);
if (caseInsensitiveFind (getContentType (), RFC822) != string::npos) {
subAttachements.push_back (Attachment (mbox, level+1, curPos, curPos));
subAttachements.back ().endPos = curPos;
return;
}
if (boundary.empty ())
return;
for (; nextBondary (mbox, curPos); )
;
}
bool
Attachment::nextBondary (ifstream &mbox, streamoff &curPos) {
DEF_LOG ("Attachment::nextBondary", "curPos: " << curPos << " boundary: " << boundary);
bool isTextBase64 (subAttachements.size () && subAttachements.back ().isTextBase64 ());
LOG ("isTextBase64: " << isTextBase64 << " attach: " << *this);
for (string prev, line; getline (mbox, line); ) {
LOG ("curPos: " << curPos << " line: " << line);
streamoff lastPos = curPos;
curPos += line.length () + 1;
string::size_type bpos = line.find (boundary.c_str (), 0, boundaryMiddleSize);
if (bpos == string::npos) {
string clearLine (line);
if (isTextBase64)
base64Decode (clearLine);
string couple (prev+clearLine);
for (vector <string>::iterator it = stringsToUpdate.begin ();
it != stringsToUpdate.end ();
++it)
if (couple.find (*it) != string::npos) {
LOG ("find: "+ *it);
subAttachements.back ().toUpdate = true;
}
prev = clearLine;
continue;
}
LOG ("find: " << boundary);
LOG ("lastPos: " << lastPos << " bpos: " << bpos << " boundaryMiddleSize: " << boundaryMiddleSize);
if (subAttachements.size ())
subAttachements.back ().endPos = lastPos;
LOG ("line: " << line << "bpos+boundaryMiddleSize: " << (bpos+boundaryMiddleSize) << " find: " << line.find ("--", bpos+boundaryMiddleSize));
bpos += boundaryMiddleSize;
if (line.find ("--", bpos) == bpos) {
LOG ("end");
return false;
}
subAttachements.push_back (Attachment (mbox, level+1, lastPos, curPos));
return true;
}
endPos = curPos;
return false;
}
// ================================================================================
void
Attachment::markDisclaim (bool &plainMarked, bool &htmlMarked) {
if (plainMarked && htmlMarked)
return;
string multiProp = getProp (contentTypeToken, multiRegEx);
// LOG_BUG (multiProp == ALTERNATIVE && subAttachements.size () != 2, continue, "eMailShrinker: bug A6: alternative give not 1 case (" << subAttachements.size () << ").");
if (multiProp.length ())
for (Attachment &subAttach : subAttachements)
subAttach.markDisclaim (plainMarked, htmlMarked);
string textProp = getProp (contentTypeToken, textRegEx);
if (textProp.empty ())
return;
if (!plainMarked && textProp == PLAIN)
plainMarked = toUpdate = toDisclaim = true;
if (!htmlMarked && textProp == HTML)
htmlMarked = toUpdate = toDisclaim = true;
}
// ================================================================================
bool
Attachment::markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector<Attachment *> &allMarkedPtrs) {
DEF_LOG ("Attachment::markSignificant", "parentMultiProp: " << parentMultiProp << " minAttachSize: " << minAttachSize);
string textProp = getProp (contentTypeToken, textRegEx);
bool cantBeExtract ((parentMultiProp == ALTERNATIVE && (textProp == PLAIN || textProp == HTML)) ||
(parentMultiProp == RELATED && textProp == HTML));
string multiProp = getProp (contentTypeToken, multiRegEx);
for (Attachment &sub : subAttachements)
cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs);
if (getProp (contentTypeToken, textRegEx) == HTML) {
string content = getContent (mbox);
vector<string> imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs);
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData);
if (embeddedData.size ())
toUpdate = true;
}
cantBeExtract |= toUpdate;
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract)
cantBeExtract = toExtract = true; // XXX cantBeExtract ?
if (toExtract || toUpdate || toDisclaim)
allMarkedPtrs.push_back (this);
return cantBeExtract;
}
// ================================================================================
string
Attachment::getContent (ifstream &mbox) const {
DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos);
string content;
content.resize (endPos-contentPos);
mbox.seekg (contentPos, ios::beg);
mbox.read (&content[0], endPos-contentPos);
if (isBase64Encoding ())
base64Decode (content);
if (isQuotedPrintableEnconding ())
quotedDecode (content);
return content;
}
// ================================================================================
void
Attachment::println (ofstream &outbox, string content) const {
DEF_LOG ("Attachment::println", "content: " << content);
if (isBase64Encoding ())
base64Encode (content);
if (isQuotedPrintableEnconding ())
quotedEncode (content);
outbox << content;
if (content.length () && content.back () != '\n')
outbox << endl;
}
// ================================================================================
void
Attachment::replaceEmbedded (string &content) const {
DEF_LOG ("Attachment::replaceEmbedded", "content.length: " << content.length ());
if (!embeddedData.size ())
return;
int imgIdx (-1);
string::size_type startPos (0);
for (const EmbeddedData &embedded : embeddedData) {
LOG ("embedded: " << embedded);
for ( ; ; ) {
startPos = caseInsensitiveFind (content, IMG_BEGIN, startPos);
LOG_BUG (startPos == string::npos, return, "eMailShrinker: bug A7: can't find " << IMG_BEGIN);
++imgIdx;
if (embedded.imgIdx >= imgIdx)
break;
startPos += IMG_BEGIN.length ();
}
startPos = caseInsensitiveFind (content, SRC_BEGIN, startPos);
LOG_BUG (startPos == string::npos, return, "eMailShrinker: bug A8: can't find " << SRC_BEGIN );
startPos += SRC_BEGIN.length ();
const string::size_type endPos (content.find ("\"", startPos));
LOG_BUG (endPos == string::npos, return, "eMailShrinker: bug A9: can't find end of " << SRC_BEGIN );
content.replace (startPos, endPos-startPos, embedded.downloadUrl);
}
}
// ================================================================================
ostream&
kaz::operator << (ostream& os, const Attachment& attachment) {
string prop, sep;
if (attachment.toExtract) { prop = "to extract"; sep = ", "; }
if (attachment.toUpdate) { prop += sep+"need update"; sep = ", "; }
if (attachment.toDisclaim) { prop += sep+"need diclaim"; sep = ", "; }
if (attachment.embeddedData.size ()) { prop += sep+"embeddedData"; }
if (prop.length ())
prop = " ["+prop+"]";
os << setw ((attachment.level % 20)*2) << "" << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType ()
<< prop << (attachment.cid.length () ? " id: "+attachment.cid : "")
<< (attachment.boundary.length () ? " boundary: "+attachment.boundary : "")
<< " (" << attachment.beginPos << " / " << attachment.contentPos << " / " << attachment.endPos << ") " << endl;
for (const EmbeddedData &embedded : attachment.embeddedData)
os << setw (((attachment.level+1) % 20)*2) << "" << setw (10) << SizeArg (embedded.dataLength) << " embedded [to extract] " << embedded;
for (const Attachment &sub : attachment.subAttachements) {
os << sub;
}
return os;
}
// ================================================================================

97
src/cpp/EmbeddedData.cpp Normal file
View File

@ -0,0 +1,97 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include "kazDebug.hpp"
#include "kazMisc.hpp"
#include "EmbeddedData.hpp"
#include "Attachment.hpp"
using namespace std;
using namespace kaz;
// ================================================================================
static const string EMBEDDED_TAG ("SRC=\"DATA:");
// ================================================================================
EmbeddedData::EmbeddedData (const int &imgIdx, const string &contentType, const string &name, const string::size_type &startData, const string::size_type &dataLength)
: imgIdx (imgIdx),
contentType (contentType),
name (name),
startData (startData),
dataLength (dataLength) {
DEF_LOG ("EmbeddedData::EmbeddedData", "imgIdx: " << imgIdx << " contentType:" << contentType << " name:" << name << " startData:" << startData << " dataLength:" << dataLength);
}
// ================================================================================
void
EmbeddedData::fillEmbeddedData (const vector<string> &imgs, const streamoff &minAttachSize, vector<EmbeddedData> &data) {
DEF_LOG ("EmbeddedData::fillEmbeddedData", "imgs.size: " << imgs.size () << " minAttachSize:" << minAttachSize << " data.size:" << data.size ());
int imgIdx (-1);
for (const string &img : imgs) {
++imgIdx;
if (streamoff (img.length ()) < minAttachSize)
continue;
string::size_type startPos (caseInsensitiveFind (img, EMBEDDED_TAG));
if (startPos == string::npos)
continue;
startPos += EMBEDDED_TAG.length ();
// XXX check base64 ?
string::size_type endPos = img.find_first_of (";,", startPos);
LOG_BUG (endPos == string::npos, continue, "eMailShrinker: bug E1: can't find end of contentType" );
const string contentType (img.substr (startPos, endPos-startPos));
const string name (Attachment::getUnknown (contentType));
startPos = img.find (',', startPos);
LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug E2: can't find start data" );
++startPos;
endPos = img.find ('"', startPos);
data.push_back (EmbeddedData (imgIdx, contentType, name, startPos, endPos-startPos));
}
}
// ================================================================================
ostream&
kaz::operator << (ostream& os, const EmbeddedData& embeddedData) {
os << embeddedData.imgIdx << ": "
<< embeddedData.contentType << " - " << embeddedData.name
<< " (" << embeddedData.startData << " / " << embeddedData.dataLength << ") "
<< embeddedData.downloadUrl << " - " << embeddedData.downloadId
<< endl;
return os;
}
// ================================================================================

598
src/cpp/MainAttachment.cpp Normal file
View File

@ -0,0 +1,598 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <vector>
#include <set>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <algorithm>
#include <unistd.h>
#include "kazDebug.hpp"
#include "kazMisc.hpp"
#include "SizeArg.hpp"
#include "Attachment.hpp"
#include "MainAttachment.hpp"
using namespace std;
using namespace kaz;
static const string KAZ_WEB_SITE = "https://kaz.bzh/";
static const string TMPL_DOWNLOAD = "{{DOWNLOAD}}";
static const string TMPL_FILENAME = "{{FILENAME}}";
static const string CID = "cid:";
static const string KAZ_PLAIN_HR = "______________________________________________________________________________";
static const string KAZ_PLAIN_START = "~~ PJ-KAZ !"; // don't end whith space
static const string KAZ_PLAIN_STOP = KAZ_PLAIN_START+" ~~";
static const string KAZ_PLAIN_DONT_TOUCH = "(concervez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)";
static const string KAZ_PLAIN_WARNING = "Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. Elles seront automatiquement supprimées dans 1 mois. Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. Pour mieux comprendre la politique de nos services visitez kaz.bzh";
static const string KAZ_PLAIN_DOWLOAD_ONE = "Vos pièces jointes sont à télécharger individuellement ici :";
static const string KAZ_PLAIN_DOWLOAD_OTHER = "(Contenu dans des messages précédents)";
static const string KAZ_PLAIN_DOWLOAD_ALL = "Vous pouvez télécharger l'ensemble dans une archive là :";
static const string HEAD = "<head>";
static const string HEAD_END = "</head>";
static const string KAZ_CSS_URL = "https://kaz.bzh/m/email.css";
static const string KAZ_CSS = "<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>";
static const string A_END = "</a>";
static const string LI_BEGIN = "<li";
static const string CLASS_ONE = "class=\"one\"";
static const string LI_ONE = LI_BEGIN+" "+CLASS_ONE+">";
static const string LI_ALL = LI_BEGIN+" class=\"all\">";
static const string LI_END = "</li>";
static const string HREF_ONE = "href=\"";
static const string BODY_END = "</body>";
static const string HTML_END = "</html>";
static const string KAZ_HTML_TAG = "<!--KAZ"; // don't end whith space
static const string KAZ_HTML_START = KAZ_HTML_TAG+" START-->";
static const string KAZ_HTML_STOP = KAZ_HTML_TAG+" STOP-->";
// Textes précédents encodés en SGML
static const string KAZ_HTML_DONT_TOUCH = "(concervez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ONE = "Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :";
static const string KAZ_HTML_DOWLOAD_OTHER = "(Contenu dans des messages pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ALL = "Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :";
static const string KAZ_HTML_ARCHIVE = "archive";
// ================================================================================
vector <string>
Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});
// ================================================================================
const string MainAttachment::templatePlainAddLink (" - "+TMPL_DOWNLOAD+" "+TMPL_FILENAME+"\n");
const string MainAttachment::templatePlainAllLink ("\n"+KAZ_PLAIN_DOWLOAD_ALL+"\n * "+TMPL_DOWNLOAD+"\n");
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
"<div class=\"kaz\">"
"<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n"
"<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n");
const string MainAttachment::templateHtmlAddLink (LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlOtherLink ("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n");
const string MainAttachment::templateHtmlAllLink ("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n");
const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
"<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n"
"<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n");
const regex MainAttachment::whiteSpaceRegEx ("\\s+");
// ================================================================================
void
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
DEF_LOG ("MainAttachment::copy", "begin: " << begin << " end: " << end);
mbox.seekg (begin, ios::beg);
char c;
for (streamoff pos (begin); pos < end; ++pos) {
mbox.get (c);
outbox.put (c);
}
outbox.flush ();
}
// ================================================================================
void
MainAttachment::fillUrlId (string &url, string &id) {
DEF_LOG ("MainAttachment::fillUrlId", "");
url = id = "";
string urlId;
getline (cin, urlId);
LOG ("get URL: " << urlId);
vector<string> urlIdVect { sregex_token_iterator (urlId.begin(), urlId.end (), whiteSpaceRegEx, -1), {} };
if (urlIdVect [0].empty ())
return;
url = urlIdVect [0];
if (urlIdVect.size () > 1)
id = urlIdVect [1];
}
// ================================================================================
void
MainAttachment::setExtractDir (const bfs::path &extractDir) {
if (extractDir.empty ())
throw invalid_argument ("no tmp dir");
this->extractDir = extractDir;
if (! is_directory (extractDir))
bfs::create_directory (extractDir);
}
void
MainAttachment::setArchiveDownloadURL (const string &archiveDownloadURL) {
this->archiveDownloadURL = archiveDownloadURL;
}
// ================================================================================
void
MainAttachment::addLink (string &plain, string &html, const string &url, const string &name) const {
string plainNewOneLink (templatePlainAddLink);
replaceAll (plainNewOneLink, TMPL_DOWNLOAD, url);
replaceAll (plainNewOneLink, TMPL_FILENAME, name);
plain += plainNewOneLink;
string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url);
replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
html += htmlNewOneLink;
}
// ================================================================================
void
MainAttachment::getDisclaim (string &plain, string &html) const {
DEF_LOG ("Attachment::getDisclaim", "");
plain = html = "";
int linkCount (0);
string allId;
string plainNewLinks, htmlNewLinks;
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->toExtract)
continue;
addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ());
++linkCount;
allId += attachP->downloadId;
// if (previousLinks [attachP->downloadUrl] != previousLinks.end ())
// // impossible puisque le lien est toujours nouveau
// previousLinks.erase (attachP->downloadUrl);
}
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->embeddedData.size ())
continue;
for (EmbeddedData &embedded : attachP->embeddedData) {
addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name);
++linkCount;
allId += embedded.downloadId;
}
}
LOG ("allId:" << allId);
string plainOldLinks, htmlOldLinks;
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
addLink (plainOldLinks, htmlOldLinks, it->first, it->second);
linkCount += previousLinks.size ();
if (! linkCount) {
LOG ("no attach");
return;
}
plain = "\n"+KAZ_PLAIN_START+"\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_DONT_TOUCH+"\n\n"+KAZ_PLAIN_WARNING+"\n\n"+KAZ_PLAIN_DOWLOAD_ONE+"\n"+plainNewLinks;
html = templateHtmlHeader+htmlNewLinks;
if (previousLinks.size ()) {
plain += KAZ_PLAIN_DOWLOAD_OTHER+"\n"+plainOldLinks;
html += templateHtmlOtherLink+htmlOldLinks;
}
if (linkCount > 1 && archiveDownloadURL.length ()) {
string allPlainLinks (templatePlainAllLink);
replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL+allId);
plain += allPlainLinks;
string allLinks (templateHtmlAllLink);
// allId => & => &amp; done
replaceAll (allLinks, TMPL_DOWNLOAD, archiveDownloadURL+allId);
html += allLinks;
}
html += templateHtmlFooter;
plain += "\n\n"+KAZ_WEB_SITE+"\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_STOP+"\n";
// & => &amp; done
LOG ("plain: " << plain);
LOG ("html: " << html);
}
// ================================================================================
void
MainAttachment::addPrevious (const string &href, const string &name) {
DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name);
const string oldVal = previousLinks [href];
if (name.empty ())
return;
previousLinks.erase (href);
previousLinks [href] = name;
LOG ("inserted: " << href << ": " << previousLinks[href]);
}
void
MainAttachment::extractLinks (const string &extractedPlainKAZ) {
DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ);
for (string::size_type startPos (0);
(startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos;
) {
streamoff stopPos = startPos;
while (extractedPlainKAZ [stopPos] && availableURLChars.find (extractedPlainKAZ [stopPos]) != string::npos)
++stopPos;
const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos));
LOG ("plain href: " << href);
if (extractedPlainKAZ [stopPos] && extractedPlainKAZ [stopPos] != '\n')
++stopPos;
startPos = stopPos;
// get all href but KAZ_WEB_SITE
// the archive link while be skip by filter.sh
if (href == KAZ_WEB_SITE)
continue;
while (extractedPlainKAZ [stopPos] && extractedPlainKAZ [stopPos] != '\n')
++stopPos;
const string name (extractedPlainKAZ.substr (startPos, stopPos-startPos));
LOG ("plain name: " << name);
addPrevious (href, name);
}
}
// ================================================================================
void
MainAttachment::extractLinks (const vector<string> &liOne) {
DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ());
for (const string &one : liOne) {
if (caseInsensitiveFind (one, CLASS_ONE) == string::npos)
continue;
string::size_type startPos = caseInsensitiveFind (one, HREF_ONE);
LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug M1: no href KAZ link. (one: " << one << ")");
startPos += HREF_ONE.length ();
LOG ("startPos: " << startPos);
string::size_type stopPos = one.find ("\"", startPos);
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M2: no ending href KAZ link. (one: " << one << ")");
LOG ("stopPos: " << stopPos);
string href (one.substr (startPos, stopPos-startPos));
LOG ("html href: " << href);
stopPos = one.find (">", startPos);
LOG_BUG (one [stopPos] != '>', break, "eMailShrinker: bug M3: no ending href KAZ link. (one: " << one << ")");
++stopPos;
startPos = stopPos;
LOG ("startPos: " << startPos);
stopPos = caseInsensitiveFind (one, A_END, startPos);
LOG ("stopPos: " << stopPos);
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M4: no ending anchor KAZ link. (one: " << one << ")");
string name (one.substr (startPos, stopPos-startPos));
LOG ("html name: " << name);
addPrevious (href, name);
}
}
void
MainAttachment::extractPreviousKAZ (ifstream &mbox) {
DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
string extractedPlainKAZ, extractedHtmlKAZ;
for (const Attachment *attachP : allMarkedPtrs) {
if (!attachP->toUpdate || isBase64Encoding ())
continue;
string textProp = attachP->getProp (contentTypeToken, textRegEx);
if (textProp.empty ())
continue;
string content (attachP->getContent (mbox));
if (textProp == PLAIN) {
LOG (PLAIN);
extractedPlainKAZ += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
}
if (textProp == HTML) {
LOG (HTML);
string section = attachP->getSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
section += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
// update href from HTML attachments
replaceAll (section, "&amp;", "&");
extractedHtmlKAZ += section;
}
}
LOG ("extractedPlainKAZ: "<< extractedPlainKAZ);
extractLinks (extractedPlainKAZ);
LOG ("extractedHtmlKAZ: "<< extractedHtmlKAZ);
vector<string> liOne;
getSection (extractedHtmlKAZ, LI_BEGIN, LI_END, liOne);
extractLinks (liOne);
#ifndef DISABLE_LOG
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
LOG ("oldLink link: " << it->first << " name: " << it->second);
#endif
}
void
MainAttachment::removePreviousArchive () {
vector<string> toRemove;
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
if (it->first.find ("&l=/") != string::npos)
toRemove.push_back (it->first);
for (string old : toRemove)
previousLinks.erase (old);
}
// ================================================================================
MainAttachment::MainAttachment (ifstream &mbox)
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()) {
DEF_LOG ("MainAttachment::MainAttachment", "");
string line;
for (; getline (mbox, line); )
tmpPos += line.length () + 1;
endPos = tmpPos;
}
// ================================================================================
void
MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) {
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked);
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
}
// ================================================================================
void
MainAttachment::getUpdatedURL (ifstream &mbox) {
DEF_LOG ("MainAttachment::getUpdatedURL", "");
extractPreviousKAZ (mbox);
for (map <string, string>::iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
cout << it->first << endl;
}
void
MainAttachment::newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const {
DEF_LOG ("MainAttachment::newPjEntry", "attachCount: " << attachCount << " contentType: " << contentType << " name: " << name);
ostringstream dirNameStream;
dirNameStream << "PJ-" << std::setfill ('0') << std::setw (3) << int (attachCount);
dirName = dirNameStream.str ();
bfs::path dirPath (extractDir / dirName);
bfs::create_directory (dirPath);
bfs::path metaPath (dirPath / "meta");
ofstream metaOut (metaPath.c_str ());
metaOut
<< "Content-Type: " << contentType << endl
<< "Name: " << name << endl;
metaOut.flush ();
metaOut.close ();
bfs::path filePath (dirPath / "media");
mediaName = filePath.c_str ();
dirName = dirPath.c_str ();
LOG ("dirName: " << dirName << " mediaName: " << mediaName);
}
// ================================================================================
void
MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
DEF_LOG ("MainAttachment::extract", "minSize: " << minSize);
int attachCount (0);
string dirName, mediaName;
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->toExtract)
continue;
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
++attachCount;
ofstream out (mediaName);
streamoff
start (attachP->Attachment::contentPos),
end (attachP->Attachment::endPos+1); // pour assurer le cas sans ^M
mbox.seekg (start, ios::beg);
if (attachP->isBase64Encoding ()) {
unsigned char buff[4];
int idx = 0;
char c;
for (streamoff curPos (start); mbox.get (c) && curPos < end; ++curPos) {
if (c == '=')
break;
if (!isBase64 (c))
continue;
buff [idx] = getBase64Val (c);
if (++idx != 4)
continue;
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
out.put (buff [2] << 6 | buff [3]);
idx = 0;
}
if (idx) {
for (int j = idx; j < 4; ++j)
buff [j] = 0;
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4);
--idx;
if (idx)
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2);
}
} else {
string line;
for (streamoff curPos (start); getline (mbox, line); ) {
curPos += line.length () + 1;
if (curPos >= end) {
out << line.substr (0, end + line.length () - curPos) << endl;
break;
}
out << line << endl;
}
}
out.flush ();
out.close ();
cout << dirName << endl;
}
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->embeddedData.size ())
continue;
string content = attachP->getContent (mbox);
vector<string> imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs);
for (const EmbeddedData &embedded : attachP->embeddedData) {
string &img (imgs[embedded.imgIdx]);
img.erase (0, embedded.startData);
img.erase (embedded.dataLength);
base64Decode (img);
newPjEntry (attachCount, embedded.contentType, embedded.name, dirName, mediaName);
++attachCount;
ofstream out (mediaName);
out.write (img.c_str (), img.size ());
out.flush ();
out.close ();
cout << dirName << endl;
}
}
}
// ================================================================================
void
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) {
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize);
// preparation
extractPreviousKAZ (mbox);
removePreviousArchive ();
map<const string, const string> translateHtml;
for (Attachment *attachP : allMarkedPtrs)
if (attachP->toExtract) {
fillUrlId (attachP->downloadUrl, attachP->downloadId);
if (attachP->downloadUrl.empty ()) {
LOG ("no change");
attachP->toExtract = false;
continue;
}
if (attachP->cid.length ()) {
string tmp (attachP->downloadUrl);
replaceAll (tmp, "&", "&amp;");
translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));
}
}
for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->embeddedData.size ())
continue;
for (EmbeddedData &embedded : attachP->embeddedData)
fillUrlId (embedded.downloadUrl, embedded.downloadId);
}
string plainDisclaim, htmlDisclaim;
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email
streamoff curPos = 0;
for (Attachment *attachP : allMarkedPtrs) {
copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
if (attachP->toExtract) {
LOG ("skip Extracted");
} else if (attachP->toUpdate) {
string textProp = attachP->getProp (contentTypeToken, textRegEx);
bool isPlain = textProp == PLAIN;
bool isHtml = textProp == HTML;
bool isDisclaimer = attachP->toDisclaim;
LOG_BUG (isPlain && isHtml, /**/, "eMailShrinker: bug M6: plain and html: " << attachP->getContentType ());
LOG_BUG (! (isPlain || isHtml), /**/, "eMailShrinker: bug M7: not plain or html: " << attachP->getContentType ());
LOG ("toUpdate: isPlain: " << isPlain << " isHtml: " << isHtml << " isDisclaimer: " << isDisclaimer);
copy (mbox, outbox, attachP->beginInParent, attachP->contentPos);
string content = attachP->getContent (mbox);
if (isHtml) {
string::size_type headStart (caseInsensitiveFind (content, HEAD));
LOG ("HEAD start: " << headStart);
if (headStart != string::npos) {
headStart += HEAD.length ();
string::size_type headStop (caseInsensitiveFind (content, HEAD_END, headStart));
if (headStop != string::npos) {
// to reduce the scoop of search
string oldHead (content.substr (headStart, headStop-headStart));
LOG ("HEAD start: " << headStart << " stop: " << headStop << " old: " << oldHead);
string::size_type oldCssPos (oldHead.find (KAZ_CSS_URL));
if (oldCssPos != string::npos) {
string::size_type oldStart (oldHead.rfind ('<', oldCssPos));
string::size_type oldStop (oldHead.find ('>', oldCssPos));
if (oldStart != string::npos && oldStop != string::npos) {
++oldStop;
if (oldStop < oldHead.length () && oldHead [oldStop] == '\n')
++oldStop;
content.erase (headStart+oldStart, oldStop-oldStart);
}
}
content.insert (headStart, "\n"+KAZ_CSS);
}
// else XXX pas de /head (if faut en ajouter un (avec <html> ?))
}
removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
// XXX case insensitive ??
if (content.find (CID) != string::npos)
replaceAll (content, translateHtml);
attachP->replaceEmbedded (content);
}
if (isPlain)
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
if (isDisclaimer) {
if (isHtml) {
for (string endTag : {BODY_END, HTML_END}) {
LOG ("try tag: " << endTag);
string::size_type endTagStart = caseInsensitiveRFind (content, endTag);
if (endTagStart != string::npos) {
content = content.substr (0, endTagStart);
LOG ("remove tag: " << endTag << " content: " << content);
}
}
content += htmlDisclaim+BODY_END+HTML_END;
LOG ("content: " << content);
}
if (isPlain)
content += plainDisclaim;
}
attachP->println (outbox, content);
} else {
LOG_BUG (true, continue, "eMailShrinker: bug M8: can't change" << *attachP);
}
outbox.flush ();
curPos = attachP->endPos;
}
copy (mbox, outbox, curPos, endPos);
outbox.close ();
}
// ================================================================================

101
src/cpp/SizeArg.cpp Normal file
View File

@ -0,0 +1,101 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include <regex>
#include <boost/format.hpp>
#include <boost/lexical_cast.hpp>
#include "kazDebug.hpp"
#include "SizeArg.hpp"
using namespace std;
using namespace kaz;
// ================================================================================
SizeArg::SizeArg (const size_t &bytes)
: bytes (bytes) {
}
SizeArg::SizeArg (const string &option)
: bytes (0) {
init (option);
}
void
SizeArg::init (const string &token) {
DEF_LOG ("SizeArg::init", "token: " << token);
static const string prefix ("KMGTPEZY");
static const regex formatRegEx ("([0-9]+) *([k"+prefix+"]?)(i?)");
if (!regex_match (token.begin (), token.end (), formatRegEx))
throw invalid_argument ("Bad size");
bytes = boost::lexical_cast<uint64_t> (regex_replace (token, formatRegEx, "$1"));
const string v2 (regex_replace (token, formatRegEx, "$2"));
size_t index = prefix.find (v2);
if (v2.length ()) {
if (index == string::npos)
index = 0; // "k" case
++index;
}
bytes *= pow (regex_replace (token, formatRegEx, "$3").empty () ? 1000 : 1024, index);
LOG ("token:" << token << " index:" << index << " v2:<" << v2 << ">" << " b:" << bytes);
}
// ================================================================================
ostream &
kaz::operator << (ostream &out, const SizeArg &sizeArg) {
static string sizes [] = {"", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"};
if (!sizeArg.bytes)
return out << "0 byte";
int nbBytes = (int) floor (log (sizeArg.bytes) / log (1024));
double val ((sizeArg.bytes / pow (1024, nbBytes)));
return out << boost::str (boost::format(nbBytes ? "%.2f " : val == 1 ? "%.0f byte" : + "%.0f bytes") % val) + sizes [nbBytes];
}
istream &
kaz::operator >> (istream &in, SizeArg &sizeArg) {
string token;
in >> token;
try {
sizeArg.init (token);
} catch (...) {
in.setstate (ios_base::failbit);
}
return in;
}
// ================================================================================

232
src/cpp/eMailShrinker.cpp Normal file
View File

@ -0,0 +1,232 @@
////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#define LAST_VERSION "eMailShrinker 1.3 2021-04-04"
#include <iostream>
#include <fstream>
#include <string>
#include <chrono>
#include <boost/program_options.hpp>