You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
481 lines
15 KiB
481 lines
15 KiB
////////////////////////////////////////////////////////////////////////////
|
|
// Copyright KAZ 2021 //
|
|
// //
|
|
// contact (at) kaz.bzh //
|
|
// //
|
|
// This software is a filter to shrink email by attachment extraction. //
|
|
// //
|
|
// This software is governed by the CeCILL-B license under French law and //
|
|
// abiding by the rules of distribution of free software. You can use, //
|
|
// modify and/or redistribute the software under the terms of the //
|
|
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|
// URL "http://www.cecill.info". //
|
|
// //
|
|
// As a counterpart to the access to the source code and rights to copy, //
|
|
// modify and redistribute granted by the license, users are provided //
|
|
// only with a limited warranty and the software's author, the holder of //
|
|
// the economic rights, and the successive licensors have only limited //
|
|
// liability. //
|
|
// //
|
|
// In this respect, the user's attention is drawn to the risks associated //
|
|
// with loading, using, modifying and/or developing or reproducing the //
|
|
// software by the user in light of its specific status of free software, //
|
|
// that may mean that it is complicated to manipulate, and that also //
|
|
// therefore means that it is reserved for developers and experienced //
|
|
// professionals having in-depth computer knowledge. Users are therefore //
|
|
// encouraged to load and test the software's suitability as regards //
|
|
// their requirements in conditions enabling the security of their //
|
|
// systems and/or data to be ensured and, more generally, to use and //
|
|
// operate it in the same conditions as regards security. //
|
|
// //
|
|
// The fact that you are presently reading this means that you have had //
|
|
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <iostream>
|
|
#include <sys/ioctl.h>
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
|
|
#include "kazDebug.hpp"
|
|
#include "kazMisc.hpp"
|
|
|
|
using namespace std;
|
|
using namespace kaz;
|
|
|
|
//template void kaz::quotedDecoded<'='> (string &content);
|
|
//template void kaz::quotedDecoded<'%'> (string &content);
|
|
|
|
static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78);
|
|
|
|
const char *const kaz::base64Chars =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
"0123456789"
|
|
"+/";
|
|
|
|
const string kaz::availableURLChars =
|
|
"!#$%&'()*+,-./"
|
|
"0123456789"
|
|
":;=?"
|
|
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
"[]_"
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
"~";
|
|
|
|
const regex kaz::encodedWordRegex ("\\s*=\\?" // flag begin
|
|
"([0-9A-Za-z!#$%&'+^_`{}~-]+)" // charset
|
|
"\\?" // flag sep
|
|
"([QqBb])" // quoted our base64
|
|
"\\?" // flag sep
|
|
"([^ ?]+)" // encoded string
|
|
"\\?=\\s*"); // flag end
|
|
|
|
|
|
// ================================================================================
|
|
uint16_t
|
|
kaz::getCols () {
|
|
struct winsize w;
|
|
ioctl (0, TIOCGWINSZ, &w);
|
|
return w.ws_col;
|
|
}
|
|
|
|
// ================================================================================
|
|
string
|
|
kaz::ns2string (const double &delta) {
|
|
using namespace std::chrono;
|
|
|
|
ostringstream oss;
|
|
duration<double> ns (delta);
|
|
oss.fill ('0');
|
|
// typedef duration<int, ratio<86400> > days;
|
|
// auto d = duration_cast<days>(ns);
|
|
// ns -= d;
|
|
auto h = duration_cast<hours> (ns);
|
|
ns -= h;
|
|
auto m = duration_cast<minutes> (ns);
|
|
ns -= m;
|
|
oss << setw (2) << h.count () << ":"
|
|
<< setw (2) << m.count () << ":"
|
|
<< setw (9) << fixed << setprecision (6) << ns.count ();
|
|
return oss.str ();
|
|
}
|
|
// ================================================================================
|
|
void
|
|
kaz::replaceAll (string& str, const string &from, const string &to) {
|
|
DEF_LOG ("kaz::replaceAll", "form: " << from << " to: " << to);
|
|
if (str.empty () || from.empty ())
|
|
return;
|
|
for (string::size_type startPos (0);
|
|
(startPos = str.find (from, startPos)) != string::npos;
|
|
startPos += to.length ())
|
|
str.replace (startPos, from.size (), to);
|
|
}
|
|
|
|
void
|
|
kaz::replaceAll (string& str, const map<const string, const string> &subst) {
|
|
DEF_LOG ("kaz::replaceAll", "str: " << str);
|
|
for (map<const string, const string>::const_iterator it = subst.begin (); it != subst.end (); ++it)
|
|
replaceAll (str, it->first, it->second);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::toLower (string &content) {
|
|
DEF_LOG ("kaz::toLower", "content: " << content);
|
|
static locale loc;
|
|
for (string::size_type i = 0; i < content.length (); ++i)
|
|
content [i] = tolower (content[i], loc);
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
const string &
|
|
kaz::toUpperIfNeed (const string &src, string &tmp) {
|
|
DEF_LOG ("kaz::toUpperIfNeed", "src: " << src);
|
|
for (string::const_iterator it = src.begin (); it != src.end (); ++it)
|
|
if (*it != toupper (*it)) {
|
|
tmp.reserve ();
|
|
for (it = src.begin (); it != src.end (); ++it)
|
|
tmp.push_back (toupper (*it));
|
|
return tmp;
|
|
}
|
|
return src;
|
|
}
|
|
|
|
inline bool
|
|
caseInsensitiveCharCompare (char a, char b) {
|
|
return (toupper (a) == b);
|
|
}
|
|
|
|
string::size_type
|
|
kaz::caseInsensitiveFind (const string& s, const string& pattern, const string::size_type &pos) {
|
|
DEF_LOG ("kaz::caseInsensitiveFind", "pattern: " << pattern << " pos: " << pos << " s: " << s);
|
|
string tmp;
|
|
const string &upperPattern (toUpperIfNeed (pattern, tmp));
|
|
LOG ("pattern: " << upperPattern);
|
|
string::const_iterator it (search (s.begin ()+pos, s.end (), upperPattern.begin (), upperPattern.end (), caseInsensitiveCharCompare));
|
|
if (it == s.end ())
|
|
return string::npos;
|
|
LOG ("find: " << (it - s.begin ()));
|
|
return it - s.begin ();
|
|
}
|
|
|
|
string::size_type
|
|
kaz::caseInsensitiveRFind (const string& s, const string& pattern, const string::size_type &pos) {
|
|
DEF_LOG ("kaz::caseInsensitiveRFind", "pattern: " << pattern << " pos: " << pos << " s: " << s);
|
|
string tmp;
|
|
const string &upperPattern (toUpperIfNeed (pattern, tmp));
|
|
LOG ("pattern: " << upperPattern);
|
|
string::const_reverse_iterator it (search (s.rbegin (), s.rend ()+pos, upperPattern.rbegin (), upperPattern.rend (), caseInsensitiveCharCompare));
|
|
if (it == s.rend ())
|
|
return string::npos;
|
|
LOG ("find: " << (s.rend () - it - pattern.length ()));
|
|
return s.rend () - it - pattern.length ();
|
|
}
|
|
|
|
string
|
|
kaz::boundaryGen (const int &size) {
|
|
static const char alphanum[] =
|
|
"0123456789"
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
"abcdefghijklmnopqrstuvwxyz";
|
|
string result;
|
|
result.reserve (size);
|
|
for (int i = 0; i < size; ++i)
|
|
result += alphanum[rand() % (sizeof (alphanum) - 1)];
|
|
return result;
|
|
}
|
|
|
|
// ================================================================================
|
|
template<char delim>
|
|
void
|
|
kaz::quotedDecode (string &content) {
|
|
DEF_LOG ("kaz::quotedDecode", "delim: " << delim << " content: " << content);
|
|
string::size_type len (content.length ());
|
|
if (!len)
|
|
return;
|
|
LOG ("len: " << len);
|
|
string::iterator p (content.begin ()), q (p);
|
|
for ( ;
|
|
p < content.end ();
|
|
++p, ++q) {
|
|
if (*p != delim) {
|
|
*q = *p;
|
|
continue;
|
|
}
|
|
if (p+1 < content.end () && *(p+1) == '\n') {
|
|
|
|
LOG_BUG (q == content.begin (), ++p;continue, "kazMisc::quotedDecode bug: bad quoted-printable format. (start with '=', delim: " << int (delim) << " content: " << content << ")");
|
|
++p;
|
|
--q;
|
|
continue;
|
|
}
|
|
|
|
LOG_BUG (p+3 > content.end () || !isxdigit (p[1]) || !isxdigit (p[2]), return, "kazMisc::quotedDecode bug: bad quoted-printable format. (delim: " << int (delim) << " content: " << content << ")");
|
|
*q = (char) ((getHexaVal (p[1]) << 4) + getHexaVal (p[2]));
|
|
p += 2;
|
|
}
|
|
content.resize (q-content.begin ());
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::quotedEncode (string &content) {
|
|
DEF_LOG ("kaz::quotedDecode", "content: " << content);
|
|
string::size_type nbQuoted (0);
|
|
for (string::const_iterator it = content.begin (); it != content.end (); ++it)
|
|
if (isQuotedPrintable (*it))
|
|
++nbQuoted;
|
|
if (!nbQuoted)
|
|
return;
|
|
string::size_type estimate (content.length ()+nbQuoted*3);
|
|
estimate += (estimate/MAX_QUOTED_PRINTABLE_SIZE)*2;
|
|
string result;
|
|
result.reserve (estimate);
|
|
string::size_type cols (0);
|
|
char upper, lower;
|
|
for (string::const_iterator it = content.begin (); it != content.end (); ++it) {
|
|
const char &c (*it);
|
|
if (c == '\n') {
|
|
result.push_back ('\n');
|
|
cols = 0;
|
|
continue;
|
|
}
|
|
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) {
|
|
result.push_back ('=');
|
|
result.push_back ('\n');
|
|
cols = 0;
|
|
}
|
|
if (!isQuotedPrintable (c) ||
|
|
((c == ' ' || c =='\t') && (it+1 == content.end () || *(it+1) == '\n'))) {
|
|
if (cols > MAX_QUOTED_PRINTABLE_SIZE-3) {
|
|
result.push_back ('=');
|
|
result.push_back ('\n');
|
|
cols = 0;
|
|
}
|
|
getHexa (c, upper, lower);
|
|
result.push_back ('=');
|
|
result.push_back (upper);
|
|
result.push_back (lower);
|
|
cols += 3;
|
|
continue;
|
|
}
|
|
result.push_back (c);
|
|
++cols;
|
|
}
|
|
content.swap (result);
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::base64Decode (string &content) {
|
|
DEF_LOG ("kaz::base64Decode", "content: " << content);
|
|
string::size_type len (content.length ());
|
|
if (!len)
|
|
return;
|
|
LOG ("len: " << len);
|
|
unsigned char buff[4];
|
|
int idx = 0;
|
|
string::iterator p (content.begin ()), q (p), lastOK (p);
|
|
for (;
|
|
p < content.end ();
|
|
++p) {
|
|
char c = *p;
|
|
if (c == '=')
|
|
break;
|
|
if (c == '\n') {
|
|
lastOK = p;
|
|
continue;
|
|
}
|
|
|
|
if (!isBase64 (c)) {
|
|
content.resize (lastOK-content.begin ());
|
|
LOG ("kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")");
|
|
}
|
|
buff [idx] = getBase64Val (c);
|
|
if (++idx != 4)
|
|
continue;
|
|
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4;
|
|
*++q = buff [1] << 4 | (buff [2] & 0x3c) >> 2;
|
|
*++q = buff [2] << 6 | buff [3];
|
|
++q;
|
|
idx = 0;
|
|
}
|
|
if (idx) {
|
|
for (int j = idx; j < 4; ++j)
|
|
buff [j] = 0;
|
|
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4;
|
|
++q;
|
|
--idx;
|
|
if (idx) {
|
|
*q = buff [1] << 4 | (buff [2] & 0x3c) >> 2;
|
|
++q;
|
|
}
|
|
}
|
|
content.resize (q-content.begin ());
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::base64Encode (string &content) {
|
|
DEF_LOG ("kaz::base64Encode", "content: " << content);
|
|
string::size_type length (content.length ());
|
|
std::string result;
|
|
result.reserve ((length + 2) / 3 * 4 + length / MAX_QUOTED_PRINTABLE_SIZE + 1);
|
|
for (string::size_type pos (0), cols (0); pos < length; ) {
|
|
result.push_back (base64Chars [(content [pos + 0] & 0xfc) >> 2]);
|
|
if (pos == length-1) {
|
|
result.push_back (base64Chars [(content [pos + 0] & 0x03) << 4]);
|
|
result.push_back ('=');
|
|
result.push_back ('=');
|
|
break;
|
|
}
|
|
result.push_back (base64Chars [((content [pos + 0] & 0x03) << 4) +
|
|
((content [pos + 1] & 0xF0) >> 4)]);
|
|
if (pos == length-2) {
|
|
result.push_back (base64Chars [(content [pos + 1] & 0x0F) << 2]);
|
|
result.push_back ('=');
|
|
break;
|
|
}
|
|
result.push_back (base64Chars [((content [pos + 1] & 0x0F) << 2) +
|
|
((content [pos + 2] & 0xC0) >> 6)]);
|
|
result.push_back (base64Chars [content [pos + 2] & 0x3F]);
|
|
pos += 3;
|
|
cols += 4;
|
|
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) {
|
|
result.push_back ('\n');
|
|
cols = 0;
|
|
}
|
|
}
|
|
content = result;
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::iso2utf (string &content) {
|
|
DEF_LOG ("kaz::iso2utf", "content: " << content);
|
|
string::size_type len (content.length ());
|
|
if (!len)
|
|
return;
|
|
LOG ("len: " << len);
|
|
string::size_type charCount (0);
|
|
for (string::iterator it = content.begin (); it != content.end (); ++it)
|
|
if ((uint8_t) *it >= 0x80)
|
|
++charCount;
|
|
if (!charCount)
|
|
return;
|
|
LOG ("charCount: " << charCount);
|
|
content.resize (len+charCount);
|
|
string::iterator p (content.end ()-1), q (p+charCount);
|
|
for ( ; ; --p, --q) {
|
|
uint8_t ch = *p;
|
|
if (ch < 0x80)
|
|
*q = ch;
|
|
else {
|
|
*q = 0x80 | (ch & 0x3F);
|
|
*--q = 0xc0 | ch >> 6;
|
|
LOG ("ch: " << (char) ch);
|
|
}
|
|
if (p == q)
|
|
break;
|
|
}
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::encodedWordDecode (string &content) {
|
|
// rfc2047
|
|
DEF_LOG ("kaz::encodedWordDecode", "content: " << content);
|
|
string::size_type charsetPos = content.find ("=?");
|
|
if (charsetPos == string::npos)
|
|
return;
|
|
LOG ("charsetPos: " << charsetPos);
|
|
|
|
string result;
|
|
auto pos (0);
|
|
sregex_iterator ewItEnd;
|
|
for (sregex_iterator ewIt (content.begin (), content.end (), encodedWordRegex);
|
|
ewIt != ewItEnd;
|
|
++ewIt) {
|
|
smatch m = *ewIt;
|
|
if (pos != m.position ()) {
|
|
result += content.substr (pos, m.position () - pos);
|
|
LOG ("stantad " << content.substr (pos, m.position () - pos));
|
|
}
|
|
string encoded (m[3]);
|
|
replace (encoded.begin (), encoded.end (), '_', ' ');
|
|
|
|
LOG ("charset: " << m[1] << " mode: " << m[2] << " string: " << encoded);
|
|
|
|
switch (m[2].str ()[0]) {
|
|
case 'B':
|
|
case 'b':
|
|
base64Decode (encoded);
|
|
break;
|
|
case 'Q':
|
|
case 'q':
|
|
quotedDecode (encoded);
|
|
break;
|
|
default:
|
|
|
|
LOG_BUG (true, return, "kazMisc::encodedWordDecode bug: unknown mode. (mode: " << m[2] << ")");
|
|
}
|
|
LOG ("decoded: " << encoded);
|
|
string charset (m[1]);
|
|
toLower (charset);
|
|
if (! caseInsensitiveFind (charset, "ISO"))
|
|
iso2utf (encoded);
|
|
result += encoded;
|
|
pos = m.position () + m.str ().length ();
|
|
}
|
|
content = result + content.substr (pos);
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::charsetValueDecode (string &content) {
|
|
// rfc2184
|
|
DEF_LOG ("kaz::charsetValueDecode", "content: " << content);
|
|
string::size_type langPos = content.find ("'");
|
|
|
|
LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValueDecode bug: no '. (content: " << content << ")");
|
|
string::size_type contentPos = content.find ("'", langPos+1);
|
|
|
|
LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValueDecode bug: no double '. (content: " << content << ")");
|
|
string tmp (content.substr (contentPos+1));
|
|
quotedDecode<'%'> (tmp);
|
|
LOG ("tmp: " << tmp);
|
|
string charset (content.substr (0, langPos));
|
|
toLower (charset);
|
|
if (! caseInsensitiveFind (charset, "ISO"))
|
|
iso2utf (tmp);
|
|
content = tmp;
|
|
LOG ("content: " << content);
|
|
}
|
|
|
|
// ================================================================================
|
|
void
|
|
kaz::removeQuote (string &content) {
|
|
if (content.empty () || content [0] != '"')
|
|
return;
|
|
string::size_type stop = (1);
|
|
for (;;) {
|
|
stop = content.find ('"', stop);
|
|
if (stop == string::npos || content [stop-1] != '\\')
|
|
break;
|
|
++stop;
|
|
}
|
|
content = (stop != string::npos) ?
|
|
content.substr (1, stop-1) :
|
|
content.substr (1);
|
|
}
|
|
|
|
// ================================================================================
|
|
|