fix rfc2047 / filter log / filterTest options
This commit is contained in:
@ -65,17 +65,9 @@ const string Attachment::ALTERNATIVE ("alternative");
|
||||
const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html");
|
||||
const string Attachment::MULTIPART ("multipart/");
|
||||
|
||||
|
||||
const regex Attachment::nameCharsetRegEx (".*name\\*=[ \t]*(.*)");
|
||||
const regex Attachment::nameRegEx (".*name=[ \t]*((\"(\\\\.|[^\\\\\r])*\")|[^\r; ]*);?.*");
|
||||
// boundary="----=_Part_796779_1154936629.1668080348646"
|
||||
// boundary="------------040709000505010508040808"
|
||||
// boundary="----------=_1668606031-941125-91"
|
||||
// boundary="_004_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_";
|
||||
// boundary="_000_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_"
|
||||
// boundary=--boundary_1351_64006126-2b0e-4a3b-98ac-4797d1634188
|
||||
// boundary=--boundary_1352_7e294c9a-cfab-44a0-bfb3-7310380ac7cb;
|
||||
const regex Attachment::boundaryRegEx (".*boundary=[ \t]*((\"(\\\\.|[^\\\\])*\")|[^; ]*);?.*");
|
||||
const regex Attachment::nameCharsetRegEx (".*name\\*=\\s*([; \t]*)");
|
||||
const regex Attachment::nameRegEx ( ".*name=\\s*((\"(\\\\.|[^\\\\])*\")|[^; \t]*).*");
|
||||
const regex Attachment::boundaryRegEx (".*boundary=\\s*((\"(\\\\.|[^\\\\])*\")|[^; \t]*).*");
|
||||
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*");
|
||||
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*");
|
||||
const regex Attachment::multiRegEx ("\\s*"+MULTIPART+"(mixed|"+RELATED+"|"+ALTERNATIVE+"|"+SIGNED+").*");
|
||||
@ -188,23 +180,49 @@ Attachment::getAttachName () const {
|
||||
static string tokens [] = {contentTypeToken, contentDispositionToken};
|
||||
DEF_LOG ("Attachment::getAttachName", "");
|
||||
for (string token : tokens) {
|
||||
// name=
|
||||
string result = getProp (token, nameRegEx);
|
||||
removeQuote (result);
|
||||
if (result.length ()) {
|
||||
LOG ("name=: " << result);
|
||||
encodedWord (result);
|
||||
encodedWordDecode (result);
|
||||
return result;
|
||||
}
|
||||
// name*x=
|
||||
for (int id = 0; ; ++id) {
|
||||
string item = getProp (token, regex (".*name\\*"+to_string (id)+"=\\s*((\"(\\\\.|[^\\\\])*\")|[; \t]*).*"));
|
||||
if (item.empty ())
|
||||
break;
|
||||
result += item;
|
||||
}
|
||||
removeQuote (result);
|
||||
if (result.length ()) {
|
||||
LOG ("name*x=: " << result);
|
||||
encodedWordDecode (result);
|
||||
return result;
|
||||
}
|
||||
// name*=
|
||||
result = getProp (token, nameCharsetRegEx);
|
||||
removeQuote (result);
|
||||
if (result.length ()) {
|
||||
LOG ("name*=: " << result);
|
||||
charsetValue (result);
|
||||
charsetValueDecode (result);
|
||||
return result;
|
||||
}
|
||||
// name*x*=
|
||||
for (int id = 0; ; ++id) {
|
||||
string item = getProp (token, regex (".*name\\*"+to_string (id)+"\\*=\\s*([^; ]*)"));
|
||||
if (item.empty ())
|
||||
break;
|
||||
result += item;
|
||||
}
|
||||
removeQuote (result);
|
||||
if (result.length ()) {
|
||||
LOG ("name*x*=: " << result);
|
||||
encodedWordDecode (result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
// XXX il faut composer s'il y a plusieurs ligne filename*x=
|
||||
// XXX il faut composer s'il y a plusieurs ligne filename*x*=
|
||||
return getUnknown (getContentType ());
|
||||
}
|
||||
|
||||
@ -257,7 +275,7 @@ Attachment::isDefProp (const string &token, const string &val) const {
|
||||
if (it == env.end ())
|
||||
return false;
|
||||
// XXX case insensitive ??
|
||||
return it->second.find (val) != string::npos;
|
||||
return caseInsensitiveFind (it->second, val) != string::npos;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
@ -312,7 +330,8 @@ Attachment::readMime (ifstream &mbox, streamoff &curPos) {
|
||||
lastVar = line.substr (0, colonPos);
|
||||
toLower (lastVar);
|
||||
LOG ("find var: " << lastVar);
|
||||
string val (cleanString (line.length () >= colonPos+2 ? line.substr (colonPos+2) : "")); // XXX check RFC " " after ": "
|
||||
// XXX check in RFC if " " after ": " (=> +2 or +1)
|
||||
string val (cleanString (line.length () >= colonPos+2 ? line.substr (colonPos+2) : ""));
|
||||
LOG ("new var: <" << lastVar << " <=> " << val << ">");
|
||||
env [lastVar] = val;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ static const string TMPL_FILENAME ("{{FILENAME}}");
|
||||
static const string CID ("cid:");
|
||||
|
||||
// "l=/" => v1 compatibility
|
||||
static const regex archiveURLSignature (".*(([&?]g=)|([&?]l=/)).*");
|
||||
static const regex archiveURLRegex (".*(([&?]g=)|([&?]l=/)).*");
|
||||
|
||||
static const string KAZ_PLAIN_HR ("______________________________________________________________________________");
|
||||
static const string KAZ_PLAIN_START ("~~ PJ-KAZ !"); // don't end whith space
|
||||
@ -423,7 +423,7 @@ MainAttachment::removePreviousArchive () {
|
||||
vector<string> toRemove;
|
||||
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) {
|
||||
const string key (it->first);
|
||||
if (regex_match (key, archiveURLSignature))
|
||||
if (regex_match (key, archiveURLRegex))
|
||||
toRemove.push_back (key);
|
||||
}
|
||||
for (string old : toRemove)
|
||||
@ -567,7 +567,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
|
||||
|
||||
// ================================================================================
|
||||
void
|
||||
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, const AttachMode &attachMode) {
|
||||
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, AttachMode attachMode) {
|
||||
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize << " AttachMode: " << attachMode);
|
||||
|
||||
// preparation
|
||||
@ -601,30 +601,34 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
getDisclaim (plainDisclaim, htmlDisclaim);
|
||||
|
||||
// copy email
|
||||
if (plainDisclaim.size () && emptyEMail && boundary.empty ()) {
|
||||
// only one attachment must be replace
|
||||
cerr << "eMailShrinker: force one attachment" << endl;
|
||||
string mime (getMime (mbox));
|
||||
string::size_type startPos = (0);
|
||||
for (string token : {string ("Content-Transfer-Encoding"), Attachment::contentTypeToken}) {
|
||||
startPos = caseInsensitiveFind (mime, "Content-Transfer-Encoding");
|
||||
for (string::size_type stopPos (startPos);
|
||||
(stopPos = mime.find ("\n", stopPos)) != string::npos;
|
||||
) {
|
||||
if (string (" \t").find (mime [stopPos+1]) == string::npos) {
|
||||
mime.erase (startPos, stopPos-startPos);
|
||||
break;
|
||||
if (!boundary.size () && plainDisclaim.size ()) {
|
||||
if (attachMode & ATTACHMENT)
|
||||
attachMode = FOOTER;
|
||||
if (emptyEMail) {
|
||||
// only one attachment must be replace
|
||||
cerr << "eMailShrinker: force one attachment" << endl;
|
||||
string mime (getMime (mbox));
|
||||
string::size_type startPos = (0);
|
||||
for (string token : {string ("Content-Transfer-Encoding"), Attachment::contentTypeToken}) {
|
||||
startPos = caseInsensitiveFind (mime, "Content-Transfer-Encoding");
|
||||
for (string::size_type stopPos (startPos);
|
||||
(stopPos = mime.find ("\n", stopPos)) != string::npos;
|
||||
) {
|
||||
if (string (" \t").find (mime [stopPos+1]) == string::npos) {
|
||||
mime.erase (startPos, stopPos-startPos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
mime.insert (startPos, KAZ_EMPTY_TEXT_PLAIN);
|
||||
string content (plainDisclaim);
|
||||
base64Encode (content);
|
||||
outbox << mime
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
outbox.close ();
|
||||
return;
|
||||
}
|
||||
mime.insert (startPos, KAZ_EMPTY_TEXT_PLAIN);
|
||||
string content (plainDisclaim);
|
||||
base64Encode (content);
|
||||
outbox << mime
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
outbox.close ();
|
||||
return;
|
||||
}
|
||||
streamoff curPos = 0;
|
||||
copy (mbox, outbox, curPos, contentPos);
|
||||
@ -694,8 +698,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
}
|
||||
removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP);
|
||||
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
|
||||
// XXX case insensitive ??
|
||||
if (content.find (CID) != string::npos)
|
||||
if (caseInsensitiveFind (content, CID) != string::npos)
|
||||
replaceAll (content, translateHtml);
|
||||
attachP->replaceEmbedded (content);
|
||||
}
|
||||
@ -733,12 +736,9 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
|
||||
string content (KAZ_HTML_CONTENT+htmlDisclaim+BODY_END+HTML_END);
|
||||
base64Encode (content);
|
||||
|
||||
if (boundary.size ())
|
||||
outbox << boundary.substr (0, boundary.length () -2) << endl
|
||||
<< KAZ_ATTACHMENT_TEXT_HTML << endl
|
||||
<< content << endl;
|
||||
else
|
||||
outbox << "coucou No multipart" << endl;
|
||||
outbox << boundary.substr (0, boundary.length () -2) << endl
|
||||
<< KAZ_ATTACHMENT_TEXT_HTML << endl
|
||||
<< content << endl;
|
||||
outbox.flush ();
|
||||
}
|
||||
copy (mbox, outbox, curPos, endPos);
|
||||
|
@ -33,8 +33,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "version.hpp"
|
||||
const std::string kaz::LAST_VERSION_NUM ("2.8");
|
||||
const std::string kaz::LAST_VERSION_DATE ("2022-12-23");
|
||||
const std::string kaz::LAST_VERSION_NUM ("2.9");
|
||||
const std::string kaz::LAST_VERSION_DATE ("2022-12-24");
|
||||
const std::string kaz::LAST_VERSION (LAST_VERSION_NUM+" "+LAST_VERSION_DATE+" eMailShrinker");
|
||||
|
||||
#include <iostream>
|
||||
@ -106,7 +106,7 @@ static const char *const inputFileC = inputFile.c_str ();
|
||||
|
||||
int
|
||||
main (int argc, char** argv) {
|
||||
// XXX debug before parse options
|
||||
// uncomment next line in case of debug parse options
|
||||
// Log::debug = true;
|
||||
DEF_LOG ("main:", "");
|
||||
prog = argv [0];
|
||||
|
@ -106,7 +106,7 @@ static const char *const inputFileC = inputFile.c_str ();
|
||||
|
||||
int
|
||||
main (int argc, char** argv) {
|
||||
// XXX debug before parse options
|
||||
// uncomment next line in case of debug parse options
|
||||
// Log::debug = true;
|
||||
DEF_LOG ("main:", "");
|
||||
prog = argv [0];
|
||||
|
@ -65,6 +65,14 @@ const string kaz::availableURLChars =
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"~";
|
||||
|
||||
const regex kaz::encodedWordRegex ("\\s*=\\?" // flag begin
|
||||
"([0-9A-Za-z!#$%&'+^_`{}~-]+)" // charset
|
||||
"\\?" // flag sep
|
||||
"([QqBb])" // quoted our base64
|
||||
"\\?" // flag sep
|
||||
"([^ ?]+)" // encoded string
|
||||
"\\?=\\s*"); // flag end
|
||||
|
||||
|
||||
// ================================================================================
|
||||
uint16_t
|
||||
@ -369,65 +377,66 @@ kaz::iso2utf (string &content) {
|
||||
|
||||
// ================================================================================
|
||||
void
|
||||
kaz::encodedWord (string &content) {
|
||||
kaz::encodedWordDecode (string &content) {
|
||||
// rfc2047
|
||||
DEF_LOG ("kaz::extendedWord", "content: " << content);
|
||||
DEF_LOG ("kaz::encodedWordDecode", "content: " << content);
|
||||
string::size_type charsetPos = content.find ("=?");
|
||||
if (charsetPos == string::npos)
|
||||
return;
|
||||
LOG ("charsetPos: " << charsetPos);
|
||||
|
||||
LOG_BUG (charsetPos != 0, return, "kazMisc::extendedWord bug: =? not at begin pos. (content: " << content << ")");
|
||||
string result;
|
||||
for ( ;
|
||||
(charsetPos = content.find ("=?", charsetPos)) != string::npos;
|
||||
) {
|
||||
string::size_type modePos = content.find ("?", charsetPos+2);
|
||||
auto pos (0);
|
||||
sregex_iterator ewItEnd;
|
||||
for (sregex_iterator ewIt (content.begin (), content.end (), encodedWordRegex);
|
||||
ewIt != ewItEnd;
|
||||
++ewIt) {
|
||||
smatch m = *ewIt;
|
||||
if (pos != m.position ()) {
|
||||
result += content.substr (pos, m.position () - pos);
|
||||
LOG ("stantad " << content.substr (pos, m.position () - pos));
|
||||
}
|
||||
string encoded (m[3]);
|
||||
replace (encoded.begin (), encoded.end (), '_', ' ');
|
||||
|
||||
LOG_BUG (modePos == string::npos, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")");
|
||||
string::size_type contentPos = content.find ("?", modePos+1);
|
||||
LOG ("charset: " << m[1] << " mode: " << m[2] << " string: " << encoded);
|
||||
|
||||
LOG_BUG (contentPos != modePos+2, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")");
|
||||
string::size_type endPos = content.find ("?=", contentPos+1);
|
||||
|
||||
LOG_BUG (endPos == string::npos, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")");
|
||||
string tmp (content.substr (contentPos+1, endPos-contentPos-1));
|
||||
switch (content [modePos+1]) {
|
||||
switch (m[2].str ()[0]) {
|
||||
case 'B':
|
||||
case 'b':
|
||||
base64Decode (tmp);
|
||||
base64Decode (encoded);
|
||||
break;
|
||||
case 'Q':
|
||||
case 'q':
|
||||
quotedDecode (tmp);
|
||||
quotedDecode (encoded);
|
||||
break;
|
||||
default:
|
||||
|
||||
LOG_BUG (true, return, "kazMisc::extendedWord bug: unknown mode. (mode: " << content [modePos+1] << ")");
|
||||
LOG_BUG (true, return, "kazMisc::encodedWordDecode bug: unknown mode. (mode: " << m[2] << ")");
|
||||
}
|
||||
LOG ("tmp: " << tmp);
|
||||
string charset (content.substr (charsetPos, modePos-charsetPos-2));
|
||||
LOG ("decoded: " << encoded);
|
||||
string charset (m[1]);
|
||||
toLower (charset);
|
||||
if (! caseInsensitiveFind (charset, "ISO"))
|
||||
iso2utf (tmp);
|
||||
result += tmp;
|
||||
charsetPos = endPos+2;
|
||||
iso2utf (encoded);
|
||||
result += encoded;
|
||||
pos = m.position () + m.str ().length ();
|
||||
}
|
||||
content = result;
|
||||
content = result + content.substr (pos);
|
||||
LOG ("content: " << content);
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
void
|
||||
kaz::charsetValue (string &content) {
|
||||
kaz::charsetValueDecode (string &content) {
|
||||
// rfc2184
|
||||
DEF_LOG ("kaz::charsetValue", "content: " << content);
|
||||
DEF_LOG ("kaz::charsetValueDecode", "content: " << content);
|
||||
string::size_type langPos = content.find ("'");
|
||||
|
||||
LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValue bug: no '. (content: " << content << ")");
|
||||
LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValueDecode bug: no '. (content: " << content << ")");
|
||||
string::size_type contentPos = content.find ("'", langPos+1);
|
||||
|
||||
LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValue bug: no double '. (content: " << content << ")");
|
||||
LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValueDecode bug: no double '. (content: " << content << ")");
|
||||
string tmp (content.substr (contentPos+1));
|
||||
quotedDecode<'%'> (tmp);
|
||||
LOG ("tmp: " << tmp);
|
||||
|
Reference in New Issue
Block a user