//////////////////////////////////////////////////////////////////////////// // Copyright KAZ 2021 // // // // contact (at) kaz.bzh // // // // This software is a filter to shrink email by attachment extraction. // // // // This software is governed by the CeCILL-B license under French law and // // abiding by the rules of distribution of free software. You can use, // // modify and/or redistribute the software under the terms of the // // CeCILL-B license as circulated by CEA, CNRS and INRIA at the following // // URL "http://www.cecill.info". // // // // As a counterpart to the access to the source code and rights to copy, // // modify and redistribute granted by the license, users are provided // // only with a limited warranty and the software's author, the holder of // // the economic rights, and the successive licensors have only limited // // liability. // // // // In this respect, the user's attention is drawn to the risks associated // // with loading, using, modifying and/or developing or reproducing the // // software by the user in light of its specific status of free software, // // that may mean that it is complicated to manipulate, and that also // // therefore means that it is reserved for developers and experienced // // professionals having in-depth computer knowledge. Users are therefore // // encouraged to load and test the software's suitability as regards // // their requirements in conditions enabling the security of their // // systems and/or data to be ensured and, more generally, to use and // // operate it in the same conditions as regards security. // // // // The fact that you are presently reading this means that you have had // // knowledge of the CeCILL-B license and that you accept its terms. // //////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #include "kazDebug.hpp" #include "kazMisc.hpp" using namespace std; using namespace kaz; //template void kaz::quotedDecoded<'='> (string &content); //template void kaz::quotedDecoded<'%'> (string &content); static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78); const char *const kaz::base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/"; const string kaz::availableURLChars = "!#$%&'()*+,-./" "0123456789" ":;=?" "@ABCDEFGHIJKLMNOPQRSTUVWXYZ" "[]_" "abcdefghijklmnopqrstuvwxyz" "~"; const regex kaz::encodedWordRegex ("\\s*=\\?" // flag begin "([0-9A-Za-z!#$%&'+^_`{}~-]+)" // charset "\\?" // flag sep "([QqBb])" // quoted our base64 "\\?" // flag sep "([^ ?]+)" // encoded string "\\?=\\s*"); // flag end // ================================================================================ uint16_t kaz::getCols () { struct winsize w; ioctl (0, TIOCGWINSZ, &w); return w.ws_col; } // ================================================================================ string kaz::ns2string (const double &delta) { using namespace std::chrono; ostringstream oss; duration ns (delta); oss.fill ('0'); // typedef duration > days; // auto d = duration_cast(ns); // ns -= d; auto h = duration_cast (ns); ns -= h; auto m = duration_cast (ns); ns -= m; oss << setw (2) << h.count () << ":" << setw (2) << m.count () << ":" << setw (9) << fixed << setprecision (6) << ns.count (); return oss.str (); } // ================================================================================ void kaz::replaceAll (string& str, const string &from, const string &to) { DEF_LOG ("kaz::replaceAll", "form: " << from << " to: " << to); if (str.empty () || from.empty ()) return; for (string::size_type startPos (0); (startPos = str.find (from, startPos)) != string::npos; startPos += to.length ()) str.replace (startPos, from.size (), to); } void kaz::replaceAll (string& str, const map &subst) { DEF_LOG ("kaz::replaceAll", "str: " << str); for (map::const_iterator it = subst.begin (); it != subst.end (); ++it) replaceAll (str, it->first, it->second); } // ================================================================================ void kaz::toLower (string &content) { DEF_LOG ("kaz::toLower", "content: " << content); static locale loc; for (string::size_type i = 0; i < content.length (); ++i) content [i] = tolower (content[i], loc); LOG ("content: " << content); } const string & kaz::toUpperIfNeed (const string &src, string &tmp) { DEF_LOG ("kaz::toUpperIfNeed", "src: " << src); for (string::const_iterator it = src.begin (); it != src.end (); ++it) if (*it != toupper (*it)) { tmp.reserve (); for (it = src.begin (); it != src.end (); ++it) tmp.push_back (toupper (*it)); return tmp; } return src; } inline bool caseInsensitiveCharCompare (char a, char b) { return (toupper (a) == b); } string::size_type kaz::caseInsensitiveFind (const string& s, const string& pattern, const string::size_type &pos) { DEF_LOG ("kaz::caseInsensitiveFind", "pattern: " << pattern << " pos: " << pos << " s: " << s); string tmp; const string &upperPattern (toUpperIfNeed (pattern, tmp)); LOG ("pattern: " << upperPattern); string::const_iterator it (search (s.begin ()+pos, s.end (), upperPattern.begin (), upperPattern.end (), caseInsensitiveCharCompare)); if (it == s.end ()) return string::npos; LOG ("find: " << (it - s.begin ())); return it - s.begin (); } string::size_type kaz::caseInsensitiveRFind (const string& s, const string& pattern, const string::size_type &pos) { DEF_LOG ("kaz::caseInsensitiveRFind", "pattern: " << pattern << " pos: " << pos << " s: " << s); string tmp; const string &upperPattern (toUpperIfNeed (pattern, tmp)); LOG ("pattern: " << upperPattern); string::const_reverse_iterator it (search (s.rbegin (), s.rend ()+pos, upperPattern.rbegin (), upperPattern.rend (), caseInsensitiveCharCompare)); if (it == s.rend ()) return string::npos; LOG ("find: " << (s.rend () - it - pattern.length ())); return s.rend () - it - pattern.length (); } string kaz::boundaryGen (const int &size) { static const char alphanum[] = "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; string result; result.reserve (size); for (int i = 0; i < size; ++i) result += alphanum[rand() % (sizeof (alphanum) - 1)]; return result; } // ================================================================================ template void kaz::quotedDecode (string &content) { DEF_LOG ("kaz::quotedDecode", "delim: " << delim << " content: " << content); string::size_type len (content.length ()); if (!len) return; LOG ("len: " << len); string::iterator p (content.begin ()), q (p); for ( ; p < content.end (); ++p, ++q) { if (*p != delim) { *q = *p; continue; } if (p+1 < content.end () && *(p+1) == '\n') { LOG_BUG (q == content.begin (), ++p;continue, "kazMisc::quotedDecode bug: bad quoted-printable format. (start with '=', delim: " << int (delim) << " content: " << content << ")"); ++p; --q; continue; } LOG_BUG (p+3 > content.end () || !isxdigit (p[1]) || !isxdigit (p[2]), return, "kazMisc::quotedDecode bug: bad quoted-printable format. (delim: " << int (delim) << " content: " << content << ")"); *q = (char) ((getHexaVal (p[1]) << 4) + getHexaVal (p[2])); p += 2; } content.resize (q-content.begin ()); LOG ("content: " << content); } // ================================================================================ void kaz::quotedEncode (string &content) { DEF_LOG ("kaz::quotedDecode", "content: " << content); string::size_type nbQuoted (0); for (string::const_iterator it = content.begin (); it != content.end (); ++it) if (isQuotedPrintable (*it)) ++nbQuoted; if (!nbQuoted) return; string::size_type estimate (content.length ()+nbQuoted*3); estimate += (estimate/MAX_QUOTED_PRINTABLE_SIZE)*2; string result; result.reserve (estimate); string::size_type cols (0); char upper, lower; for (string::const_iterator it = content.begin (); it != content.end (); ++it) { const char &c (*it); if (c == '\n') { result.push_back ('\n'); cols = 0; continue; } if (cols >= MAX_QUOTED_PRINTABLE_SIZE) { result.push_back ('='); result.push_back ('\n'); cols = 0; } if (!isQuotedPrintable (c) || ((c == ' ' || c =='\t') && (it+1 == content.end () || *(it+1) == '\n'))) { if (cols > MAX_QUOTED_PRINTABLE_SIZE-3) { result.push_back ('='); result.push_back ('\n'); cols = 0; } getHexa (c, upper, lower); result.push_back ('='); result.push_back (upper); result.push_back (lower); cols += 3; continue; } result.push_back (c); ++cols; } content.swap (result); LOG ("content: " << content); } // ================================================================================ void kaz::base64Decode (string &content) { DEF_LOG ("kaz::base64Decode", "content: " << content); string::size_type len (content.length ()); if (!len) return; LOG ("len: " << len); unsigned char buff[4]; int idx = 0; string::iterator p (content.begin ()), q (p), lastOK (p); for (; p < content.end (); ++p) { char c = *p; if (c == '=') break; if (c == '\n') { lastOK = p; continue; } if (!isBase64 (c)) { content.resize (lastOK-content.begin ()); LOG ("kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")"); } buff [idx] = getBase64Val (c); if (++idx != 4) continue; *q = buff [0] << 2 | (buff [1] & 0x30) >> 4; *++q = buff [1] << 4 | (buff [2] & 0x3c) >> 2; *++q = buff [2] << 6 | buff [3]; ++q; idx = 0; } if (idx) { for (int j = idx; j < 4; ++j) buff [j] = 0; *q = buff [0] << 2 | (buff [1] & 0x30) >> 4; ++q; --idx; if (idx) { *q = buff [1] << 4 | (buff [2] & 0x3c) >> 2; ++q; } } content.resize (q-content.begin ()); LOG ("content: " << content); } // ================================================================================ void kaz::base64Encode (string &content) { DEF_LOG ("kaz::base64Encode", "content: " << content); string::size_type length (content.length ()); std::string result; result.reserve ((length + 2) / 3 * 4 + length / MAX_QUOTED_PRINTABLE_SIZE + 1); for (string::size_type pos (0), cols (0); pos < length; ) { result.push_back (base64Chars [(content [pos + 0] & 0xfc) >> 2]); if (pos == length-1) { result.push_back (base64Chars [(content [pos + 0] & 0x03) << 4]); result.push_back ('='); result.push_back ('='); break; } result.push_back (base64Chars [((content [pos + 0] & 0x03) << 4) + ((content [pos + 1] & 0xF0) >> 4)]); if (pos == length-2) { result.push_back (base64Chars [(content [pos + 1] & 0x0F) << 2]); result.push_back ('='); break; } result.push_back (base64Chars [((content [pos + 1] & 0x0F) << 2) + ((content [pos + 2] & 0xC0) >> 6)]); result.push_back (base64Chars [content [pos + 2] & 0x3F]); pos += 3; cols += 4; if (cols >= MAX_QUOTED_PRINTABLE_SIZE) { result.push_back ('\n'); cols = 0; } } content = result; LOG ("content: " << content); } // ================================================================================ void kaz::iso2utf (string &content) { DEF_LOG ("kaz::iso2utf", "content: " << content); string::size_type len (content.length ()); if (!len) return; LOG ("len: " << len); string::size_type charCount (0); for (string::iterator it = content.begin (); it != content.end (); ++it) if ((uint8_t) *it >= 0x80) ++charCount; if (!charCount) return; LOG ("charCount: " << charCount); content.resize (len+charCount); string::iterator p (content.end ()-1), q (p+charCount); for ( ; ; --p, --q) { uint8_t ch = *p; if (ch < 0x80) *q = ch; else { *q = 0x80 | (ch & 0x3F); *--q = 0xc0 | ch >> 6; LOG ("ch: " << (char) ch); } if (p == q) break; } LOG ("content: " << content); } // ================================================================================ void kaz::encodedWordDecode (string &content) { // rfc2047 DEF_LOG ("kaz::encodedWordDecode", "content: " << content); string::size_type charsetPos = content.find ("=?"); if (charsetPos == string::npos) return; LOG ("charsetPos: " << charsetPos); string result; auto pos (0); sregex_iterator ewItEnd; for (sregex_iterator ewIt (content.begin (), content.end (), encodedWordRegex); ewIt != ewItEnd; ++ewIt) { smatch m = *ewIt; if (pos != m.position ()) { result += content.substr (pos, m.position () - pos); LOG ("stantad " << content.substr (pos, m.position () - pos)); } string encoded (m[3]); replace (encoded.begin (), encoded.end (), '_', ' '); LOG ("charset: " << m[1] << " mode: " << m[2] << " string: " << encoded); switch (m[2].str ()[0]) { case 'B': case 'b': base64Decode (encoded); break; case 'Q': case 'q': quotedDecode (encoded); break; default: LOG_BUG (true, return, "kazMisc::encodedWordDecode bug: unknown mode. (mode: " << m[2] << ")"); } LOG ("decoded: " << encoded); string charset (m[1]); toLower (charset); if (! caseInsensitiveFind (charset, "ISO")) iso2utf (encoded); result += encoded; pos = m.position () + m.str ().length (); } content = result + content.substr (pos); LOG ("content: " << content); } // ================================================================================ void kaz::charsetValueDecode (string &content) { // rfc2184 DEF_LOG ("kaz::charsetValueDecode", "content: " << content); string::size_type langPos = content.find ("'"); LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValueDecode bug: no '. (content: " << content << ")"); string::size_type contentPos = content.find ("'", langPos+1); LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValueDecode bug: no double '. (content: " << content << ")"); string tmp (content.substr (contentPos+1)); quotedDecode<'%'> (tmp); LOG ("tmp: " << tmp); string charset (content.substr (0, langPos)); toLower (charset); if (! caseInsensitiveFind (charset, "ISO")) iso2utf (tmp); content = tmp; LOG ("content: " << content); } // ================================================================================ void kaz::removeQuote (string &content) { if (content.empty () || content [0] != '"') return; string::size_type stop = (1); for (;;) { stop = content.find ('"', stop); if (stop == string::npos || content [stop-1] != '\\') break; ++stop; } content = (stop != string::npos) ? content.substr (1, stop-1) : content.substr (1); } // ================================================================================