Dépollution des courriel par substitution des pièces jointes par un lien temporaire;
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

481 lines
15 KiB

////////////////////////////////////////////////////////////////////////////
// Copyright KAZ 2021 //
// //
// contact (at) kaz.bzh //
// //
// This software is a filter to shrink email by attachment extraction. //
// //
// This software is governed by the CeCILL-B license under French law and //
// abiding by the rules of distribution of free software. You can use, //
// modify and/or redistribute the software under the terms of the //
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
// URL "http://www.cecill.info". //
// //
// As a counterpart to the access to the source code and rights to copy, //
// modify and redistribute granted by the license, users are provided //
// only with a limited warranty and the software's author, the holder of //
// the economic rights, and the successive licensors have only limited //
// liability. //
// //
// In this respect, the user's attention is drawn to the risks associated //
// with loading, using, modifying and/or developing or reproducing the //
// software by the user in light of its specific status of free software, //
// that may mean that it is complicated to manipulate, and that also //
// therefore means that it is reserved for developers and experienced //
// professionals having in-depth computer knowledge. Users are therefore //
// encouraged to load and test the software's suitability as regards //
// their requirements in conditions enabling the security of their //
// systems and/or data to be ensured and, more generally, to use and //
// operate it in the same conditions as regards security. //
// //
// The fact that you are presently reading this means that you have had //
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <sys/ioctl.h>
#include <algorithm>
#include <chrono>
#include <sstream>
#include <iomanip>
#include "kazDebug.hpp"
#include "kazMisc.hpp"
using namespace std;
using namespace kaz;
//template void kaz::quotedDecoded<'='> (string &content);
//template void kaz::quotedDecoded<'%'> (string &content);
static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78);
const char *const kaz::base64Chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"+/";
const string kaz::availableURLChars =
"!#$%&'()*+,-./"
"0123456789"
":;=?"
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"[]_"
"abcdefghijklmnopqrstuvwxyz"
"~";
const regex kaz::encodedWordRegex ("\\s*=\\?" // flag begin
"([0-9A-Za-z!#$%&'+^_`{}~-]+)" // charset
"\\?" // flag sep
"([QqBb])" // quoted our base64
"\\?" // flag sep
"([^ ?]+)" // encoded string
"\\?=\\s*"); // flag end
// ================================================================================
uint16_t
kaz::getCols () {
struct winsize w;
ioctl (0, TIOCGWINSZ, &w);
return w.ws_col;
}
// ================================================================================
string
kaz::ns2string (const double &delta) {
using namespace std::chrono;
ostringstream oss;
duration<double> ns (delta);
oss.fill ('0');
// typedef duration<int, ratio<86400> > days;
// auto d = duration_cast<days>(ns);
// ns -= d;
auto h = duration_cast<hours> (ns);
ns -= h;
auto m = duration_cast<minutes> (ns);
ns -= m;
oss << setw (2) << h.count () << ":"
<< setw (2) << m.count () << ":"
<< setw (9) << fixed << setprecision (6) << ns.count ();
return oss.str ();
}
// ================================================================================
void
kaz::replaceAll (string& str, const string &from, const string &to) {
DEF_LOG ("kaz::replaceAll", "form: " << from << " to: " << to);
if (str.empty () || from.empty ())
return;
for (string::size_type startPos (0);
(startPos = str.find (from, startPos)) != string::npos;
startPos += to.length ())
str.replace (startPos, from.size (), to);
}
void
kaz::replaceAll (string& str, const map<const string, const string> &subst) {
DEF_LOG ("kaz::replaceAll", "str: " << str);
for (map<const string, const string>::const_iterator it = subst.begin (); it != subst.end (); ++it)
replaceAll (str, it->first, it->second);
}
// ================================================================================
void
kaz::toLower (string &content) {
DEF_LOG ("kaz::toLower", "content: " << content);
static locale loc;
for (string::size_type i = 0; i < content.length (); ++i)
content [i] = tolower (content[i], loc);
LOG ("content: " << content);
}
const string &
kaz::toUpperIfNeed (const string &src, string &tmp) {
DEF_LOG ("kaz::toUpperIfNeed", "src: " << src);
for (string::const_iterator it = src.begin (); it != src.end (); ++it)
if (*it != toupper (*it)) {
tmp.reserve ();
for (it = src.begin (); it != src.end (); ++it)
tmp.push_back (toupper (*it));
return tmp;
}
return src;
}
inline bool
caseInsensitiveCharCompare (char a, char b) {
return (toupper (a) == b);
}
string::size_type
kaz::caseInsensitiveFind (const string& s, const string& pattern, const string::size_type &pos) {
DEF_LOG ("kaz::caseInsensitiveFind", "pattern: " << pattern << " pos: " << pos << " s: " << s);
string tmp;
const string &upperPattern (toUpperIfNeed (pattern, tmp));
LOG ("pattern: " << upperPattern);
string::const_iterator it (search (s.begin ()+pos, s.end (), upperPattern.begin (), upperPattern.end (), caseInsensitiveCharCompare));
if (it == s.end ())
return string::npos;
LOG ("find: " << (it - s.begin ()));
return it - s.begin ();
}
string::size_type
kaz::caseInsensitiveRFind (const string& s, const string& pattern, const string::size_type &pos) {
DEF_LOG ("kaz::caseInsensitiveRFind", "pattern: " << pattern << " pos: " << pos << " s: " << s);
string tmp;
const string &upperPattern (toUpperIfNeed (pattern, tmp));
LOG ("pattern: " << upperPattern);
string::const_reverse_iterator it (search (s.rbegin (), s.rend ()+pos, upperPattern.rbegin (), upperPattern.rend (), caseInsensitiveCharCompare));
if (it == s.rend ())
return string::npos;
LOG ("find: " << (s.rend () - it - pattern.length ()));
return s.rend () - it - pattern.length ();
}
string
kaz::boundaryGen (const int &size) {
static const char alphanum[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
string result;
result.reserve (size);
for (int i = 0; i < size; ++i)
result += alphanum[rand() % (sizeof (alphanum) - 1)];
return result;
}
// ================================================================================
template<char delim>
void
kaz::quotedDecode (string &content) {
DEF_LOG ("kaz::quotedDecode", "delim: " << delim << " content: " << content);
string::size_type len (content.length ());
if (!len)
return;
LOG ("len: " << len);
string::iterator p (content.begin ()), q (p);
for ( ;
p < content.end ();
++p, ++q) {
if (*p != delim) {
*q = *p;
continue;
}
if (p+1 < content.end () && *(p+1) == '\n') {
LOG_BUG (q == content.begin (), ++p;continue, "kazMisc::quotedDecode bug: bad quoted-printable format. (start with '=', delim: " << int (delim) << " content: " << content << ")");
++p;
--q;
continue;
}
LOG_BUG (p+3 > content.end () || !isxdigit (p[1]) || !isxdigit (p[2]), return, "kazMisc::quotedDecode bug: bad quoted-printable format. (delim: " << int (delim) << " content: " << content << ")");
*q = (char) ((getHexaVal (p[1]) << 4) + getHexaVal (p[2]));
p += 2;
}
content.resize (q-content.begin ());
LOG ("content: " << content);
}
// ================================================================================
void
kaz::quotedEncode (string &content) {
DEF_LOG ("kaz::quotedDecode", "content: " << content);
string::size_type nbQuoted (0);
for (string::const_iterator it = content.begin (); it != content.end (); ++it)
if (isQuotedPrintable (*it))
++nbQuoted;
if (!nbQuoted)
return;
string::size_type estimate (content.length ()+nbQuoted*3);
estimate += (estimate/MAX_QUOTED_PRINTABLE_SIZE)*2;
string result;
result.reserve (estimate);
string::size_type cols (0);
char upper, lower;
for (string::const_iterator it = content.begin (); it != content.end (); ++it) {
const char &c (*it);
if (c == '\n') {
result.push_back ('\n');
cols = 0;
continue;
}
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) {
result.push_back ('=');
result.push_back ('\n');
cols = 0;
}
if (!isQuotedPrintable (c) ||
((c == ' ' || c =='\t') && (it+1 == content.end () || *(it+1) == '\n'))) {
if (cols > MAX_QUOTED_PRINTABLE_SIZE-3) {
result.push_back ('=');
result.push_back ('\n');
cols = 0;
}
getHexa (c, upper, lower);
result.push_back ('=');
result.push_back (upper);
result.push_back (lower);
cols += 3;
continue;
}
result.push_back (c);
++cols;
}
content.swap (result);
LOG ("content: " << content);
}
// ================================================================================
void
kaz::base64Decode (string &content) {
DEF_LOG ("kaz::base64Decode", "content: " << content);
string::size_type len (content.length ());
if (!len)
return;
LOG ("len: " << len);
unsigned char buff[4];
int idx = 0;
string::iterator p (content.begin ()), q (p), lastOK (p);
for (;
p < content.end ();
++p) {
char c = *p;
if (c == '=')
break;
if (c == '\n') {
lastOK = p;
continue;
}
if (!isBase64 (c)) {
content.resize (lastOK-content.begin ());
LOG ("kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")");
return;
}
buff [idx] = getBase64Val (c);
if (++idx != 4)
continue;
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4;
*++q = buff [1] << 4 | (buff [2] & 0x3c) >> 2;
*++q = buff [2] << 6 | buff [3];
++q;
idx = 0;
}
if (idx) {
for (int j = idx; j < 4; ++j)
buff [j] = 0;
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4;
++q;
if (idx > 2) {
*q = buff [1] << 4 | (buff [2] & 0x3c) >> 2;
++q;
}
}
content.resize (q-content.begin ());
LOG ("content: " << content);
}
// ================================================================================
void
kaz::base64Encode (string &content) {
DEF_LOG ("kaz::base64Encode", "content: " << content);
string::size_type length (content.length ());
std::string result;
result.reserve ((length + 2) / 3 * 4 + length / MAX_QUOTED_PRINTABLE_SIZE + 1);
for (string::size_type pos (0), cols (0); pos < length; ) {
result.push_back (base64Chars [(content [pos + 0] & 0xfc) >> 2]);
if (pos == length-1) {
result.push_back (base64Chars [(content [pos + 0] & 0x03) << 4]);
result.push_back ('=');
result.push_back ('=');
break;
}
result.push_back (base64Chars [((content [pos + 0] & 0x03) << 4) +
((content [pos + 1] & 0xF0) >> 4)]);
if (pos == length-2) {
result.push_back (base64Chars [(content [pos + 1] & 0x0F) << 2]);
result.push_back ('=');
break;
}
result.push_back (base64Chars [((content [pos + 1] & 0x0F) << 2) +
((content [pos + 2] & 0xC0) >> 6)]);
result.push_back (base64Chars [content [pos + 2] & 0x3F]);
pos += 3;
cols += 4;
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) {
result.push_back ('\n');
cols = 0;
}
}
content = result;
LOG ("content: " << content);
}
// ================================================================================
void
kaz::iso2utf (string &content) {
DEF_LOG ("kaz::iso2utf", "content: " << content);
string::size_type len (content.length ());
if (!len)
return;
LOG ("len: " << len);
string::size_type charCount (0);
for (string::iterator it = content.begin (); it != content.end (); ++it)
if ((uint8_t) *it >= 0x80)
++charCount;
if (!charCount)
return;
LOG ("charCount: " << charCount);
content.resize (len+charCount);
string::iterator p (content.end ()-1), q (p+charCount);
for ( ; ; --p, --q) {
uint8_t ch = *p;
if (ch < 0x80)
*q = ch;
else {
*q = 0x80 | (ch & 0x3F);
*--q = 0xc0 | ch >> 6;
LOG ("ch: " << (char) ch);
}
if (p == q)
break;
}
LOG ("content: " << content);
}
// ================================================================================
void
kaz::encodedWordDecode (string &content) {
// rfc2047
DEF_LOG ("kaz::encodedWordDecode", "content: " << content);
string::size_type charsetPos = content.find ("=?");
if (charsetPos == string::npos)
return;
LOG ("charsetPos: " << charsetPos);
string result;
auto pos (0);
sregex_iterator ewItEnd;
for (sregex_iterator ewIt (content.begin (), content.end (), encodedWordRegex);
ewIt != ewItEnd;
++ewIt) {
smatch m = *ewIt;
if (pos != m.position ()) {
result += content.substr (pos, m.position () - pos);
LOG ("stantad " << content.substr (pos, m.position () - pos));
}
string encoded (m[3]);
replace (encoded.begin (), encoded.end (), '_', ' ');
LOG ("charset: " << m[1] << " mode: " << m[2] << " string: " << encoded);
switch (m[2].str ()[0]) {
case 'B':
case 'b':
base64Decode (encoded);
break;
case 'Q':
case 'q':
quotedDecode (encoded);
break;
default:
LOG_BUG (true, return, "kazMisc::encodedWordDecode bug: unknown mode. (mode: " << m[2] << ")");
}
LOG ("decoded: " << encoded);
string charset (m[1]);
toLower (charset);
if (! caseInsensitiveFind (charset, "ISO"))
iso2utf (encoded);
result += encoded;
pos = m.position () + m.str ().length ();
}
content = result + content.substr (pos);
LOG ("content: " << content);
}
// ================================================================================
void
kaz::charsetValueDecode (string &content) {
// rfc2184
DEF_LOG ("kaz::charsetValueDecode", "content: " << content);
string::size_type langPos = content.find ("'");
LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValueDecode bug: no '. (content: " << content << ")");
string::size_type contentPos = content.find ("'", langPos+1);
LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValueDecode bug: no double '. (content: " << content << ")");
string tmp (content.substr (contentPos+1));
quotedDecode<'%'> (tmp);
LOG ("tmp: " << tmp);
string charset (content.substr (0, langPos));
toLower (charset);
if (! caseInsensitiveFind (charset, "ISO"))
iso2utf (tmp);
content = tmp;
LOG ("content: " << content);
}
// ================================================================================
void
kaz::removeQuote (string &content) {
if (content.empty () || content [0] != '"')
return;
string::size_type stop = (1);
for (;;) {
stop = content.find ('"', stop);
if (stop == string::npos || content [stop-1] != '\\')
break;
++stop;
}
content = (stop != string::npos) ?
content.substr (1, stop-1) :
content.substr (1);
}
// ================================================================================