From 596ae82fe4950f79efc8ac0cf4ff838a0f4d4b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Fri, 23 Dec 2022 11:39:44 +0100 Subject: [PATCH] fix PGP (skip signed message) --- README.md | 2 +- src/bash/filterTest.sh | 3 +- src/cpp/Attachment.cpp | 37 ++++++++++++++++++------ src/cpp/MainAttachment.cpp | 58 +++++++++++++++++++++++++++----------- src/cpp/eMailShrinker.cpp | 4 +-- src/cpp/kazMisc.cpp | 11 ++++++-- src/include/Attachment.hpp | 8 ++++-- 7 files changed, 87 insertions(+), 36 deletions(-) mode change 100755 => 100644 src/bash/filterTest.sh diff --git a/README.md b/README.md index 9ff349d..7dae8b3 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ depollueur/ ## Compilation ```bash -sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev doxygen +sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev doxygen dos2unix git clone https://git.kaz.bzh/KAZ/depollueur.git # or for contributors : diff --git a/src/bash/filterTest.sh b/src/bash/filterTest.sh old mode 100755 new mode 100644 index d7560e0..e9ca941 --- a/src/bash/filterTest.sh +++ b/src/bash/filterTest.sh @@ -33,6 +33,7 @@ usage () { ######################################## mbox=$(realpath $1) +dos2unix "${mbox}" cd $(dirname $0) DOMAINNAME="$(cat domainname)" JIRAFEAU_URL="https://depot.${DOMAINNAME}" @@ -126,7 +127,7 @@ cat "${TMP_DIR}/PJ-name.txt" | { esac } else - LOG " - ${GREEN} no archive${NC}" + LOG " - ${GREEN}no archive${NC}" echo "arch: none" fi } > "${TMP_DIR}/PJ-Keys.txt" diff --git a/src/cpp/Attachment.cpp b/src/cpp/Attachment.cpp index 1a2c755..9a365b9 100644 --- a/src/cpp/Attachment.cpp +++ b/src/cpp/Attachment.cpp @@ -60,8 +60,10 @@ const string Attachment::contentIDToken ("content-id"); const string Attachment::PLAIN ("plain"); const string Attachment::HTML ("html"); const string Attachment::RELATED ("related"); +const string Attachment::SIGNED ("signed"); const string Attachment::ALTERNATIVE ("alternative"); const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html"); +const string Attachment::MULTIPART ("multipart/"); const regex Attachment::nameCharsetRegEx (".*name\\*=[ \t]*(.*)"); @@ -76,7 +78,7 @@ const regex Attachment::nameRegEx (".*name=[ \t]*((\"(\\\\.|[^\\\\\r])*\")|[^\ const regex Attachment::boundaryRegEx (".*boundary=[ \t]*((\"(\\\\.|[^\\\\])*\")|[^; ]*);?.*"); const regex Attachment::cidDefRegEx (".*<([^>]*)>.*"); const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*"); -const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*"); +const regex Attachment::multiRegEx ("\\s*"+MULTIPART+"(mixed|"+RELATED+"|"+ALTERNATIVE+"|"+SIGNED+").*"); const string Attachment::IMG_BEGIN (""); @@ -85,7 +87,6 @@ const string Attachment::IMG_END (">"); static const string SRC_BEGIN ("SRC=\""); static const string RFC822 ("message/rfc822"); -static const string MULTIPART ("multipart/"); // ================================================================================ string @@ -270,6 +271,7 @@ Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginI toUpdate (false), toDisclaim (false), isKazAttachment (false), + isSigned (false), boundaryMiddleSize (0) { DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level); readMime (mbox, curPos); @@ -321,6 +323,9 @@ Attachment::readMime (ifstream &mbox, streamoff &curPos) { cid = getProp (contentIDToken, cidDefRegEx); if (caseInsensitiveFind (getContentType (), MULTIPART) != string::npos) { + string multiProp = getProp (contentTypeToken, multiRegEx); + if (SIGNED == multiProp) + isSigned = true; boundary = getProp (contentTypeToken, boundaryRegEx); removeQuote (boundary); LOG ("boundary: " << boundary); @@ -416,18 +421,19 @@ Attachment::markDisclaim (bool &plainMarked, bool &htmlMarked) { // ================================================================================ bool -Attachment::markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs) { +Attachment::markSignificant (const string &parentMultiProp, const bool &parentSigned, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs) { DEF_LOG ("Attachment::markSignificant", "parentMultiProp: " << parentMultiProp << " minAttachSize: " << minAttachSize); + isSigned |= parentSigned; string textProp = getProp (contentTypeToken, textRegEx); bool cantBeExtract ((parentMultiProp == ALTERNATIVE && (textProp == PLAIN || textProp == HTML)) || (parentMultiProp == RELATED && textProp == HTML)); string multiProp = getProp (contentTypeToken, multiRegEx); for (Attachment &sub : subAttachements) - cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs); + cantBeExtract |= sub.markSignificant (multiProp, parentSigned || isSigned, minAttachSize, mbox, allMarkedPtrs); if (getProp (contentTypeToken, textRegEx) == HTML) { - if (KAZ_ATTACH_NAME == getAttachName ()) { + if (KAZ_ATTACH_NAME == getAttachName ()) isKazAttachment = true; - } else { + else { string content = getContent (mbox); vector imgs; getSection (content, IMG_BEGIN, IMG_END, imgs); @@ -438,16 +444,27 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min } cantBeExtract |= toUpdate; if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract) - cantBeExtract = toExtract = true; // XXX cantBeExtract ? + cantBeExtract = toExtract = true; if (toExtract || toUpdate || toDisclaim || isKazAttachment) allMarkedPtrs.push_back (this); return cantBeExtract; } +// ================================================================================ +string +Attachment::getMime (ifstream &mbox) const { + DEF_LOG ("Attachment::getMime", "beginPos: " << beginPos << " contentPos: " << contentPos); + string mime; + mime.resize (contentPos-beginPos); + mbox.seekg (beginPos, ios::beg); + mbox.read (&mime[0], contentPos-beginPos); + return mime; +} + // ================================================================================ string Attachment::getContent (ifstream &mbox) const { - DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos); + DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos << " endPos: " << endPos); string content; content.resize (endPos-contentPos); mbox.seekg (contentPos, ios::beg); @@ -505,7 +522,9 @@ Attachment::replaceEmbedded (string &content) const { ostream& kaz::operator << (ostream& os, const Attachment& attachment) { string prop, sep; - if (attachment.toExtract) { prop = "to extract"; sep = ", "; } + if (attachment.isSigned) { prop += sep+"signed"; sep = ", "; } + if (attachment.isKazAttachment) { prop += sep+"kazDisclaim"; sep = ", "; } + if (attachment.toExtract) { prop += sep+"to extract"; sep = ", "; } if (attachment.toUpdate) { prop += sep+"need update"; sep = ", "; } if (attachment.toDisclaim) { prop += sep+"need diclaim"; sep = ", "; } if (attachment.embeddedData.size ()) { prop += sep+"embeddedData"; } diff --git a/src/cpp/MainAttachment.cpp b/src/cpp/MainAttachment.cpp index 5338138..c534d45 100644 --- a/src/cpp/MainAttachment.cpp +++ b/src/cpp/MainAttachment.cpp @@ -177,8 +177,8 @@ MainAttachment::readArchiveUrl () { archiveDownloadURL.clear (); string line; getline (cin, line); - LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug 9: no archive link. (line: " << line << ")"); - LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug 10: bad archive link. (line: " << line << ")"); + LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug M9: no archive link. (line: " << line << ")"); + LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug M10: bad archive link. (line: " << line << ")"); if (line.rfind ("arch: none", 0) == 0) return; archiveDownloadURL = line.substr (6); @@ -192,7 +192,7 @@ MainAttachment::readDownloadUrl (string &url) { string line; getline (cin, line); LOG ("get URL: " << line); - LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug 11: no download link. (line: " << line << ")"); + LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug M11: no download link. (line: " << line << ")"); url = line.substr (5); } @@ -231,7 +231,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const { int linkCount (0); string plainNewLinks, htmlNewLinks; for (Attachment *attachP : allMarkedPtrs) { - if (!attachP->toExtract) + if (attachP->isSigned || !attachP->toExtract) continue; addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ()); ++linkCount; @@ -240,7 +240,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const { // previousLinks.erase (attachP->downloadUrl); } for (Attachment *attachP : allMarkedPtrs) { - if (!attachP->embeddedData.size ()) + if (attachP->isSigned || !attachP->embeddedData.size ()) continue; for (EmbeddedData &embedded : attachP->embeddedData) { addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name); @@ -449,7 +449,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) bool plainMarked (false), htmlMarked (false); markDisclaim (plainMarked, htmlMarked); emptyEMail = ! (plainMarked || htmlMarked); - Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs); + Attachment::markSignificant ("", isSigned, minAttachSize, mbox, allMarkedPtrs); } // ================================================================================ @@ -492,7 +492,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const { int attachCount (0); string dirName, mediaName; for (Attachment *attachP : allMarkedPtrs) { - if (attachP->isKazAttachment || !attachP->toExtract) + if (attachP->isSigned || attachP->isKazAttachment || !attachP->toExtract) continue; newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName); ++attachCount; @@ -543,7 +543,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const { cout << dirName << endl; } for (Attachment *attachP : allMarkedPtrs) { - if (!attachP->embeddedData.size ()) + if (attachP->isSigned || !attachP->embeddedData.size ()) continue; string content = attachP->getContent (mbox); vector imgs; @@ -575,7 +575,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min removePreviousArchive (); map translateHtml; for (Attachment *attachP : allMarkedPtrs) - if (attachP->toExtract && !attachP->isKazAttachment) { + if (!attachP->isSigned && attachP->toExtract && !attachP->isKazAttachment) { readDownloadUrl (attachP->downloadUrl); if (attachP->downloadUrl.empty ()) { LOG ("no change"); @@ -589,7 +589,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min } } for (Attachment *attachP : allMarkedPtrs) { - if (!attachP->embeddedData.size ()) + if (attachP->isSigned || !attachP->embeddedData.size ()) continue; for (EmbeddedData &embedded : attachP->embeddedData) readDownloadUrl (embedded.downloadUrl); @@ -601,11 +601,32 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min getDisclaim (plainDisclaim, htmlDisclaim); // copy email - streamoff curPos = 0; - if (boundary.empty () && plainDisclaim.size () && (attachMode & ATTACHMENT)) { - // XXX if no multipart ? - LOG_BUG (true, /* */, "eMailShrinker: bug 12: not multipart."); + if (plainDisclaim.size () && emptyEMail && boundary.empty ()) { + // only one attachment must be replace + cerr << "eMailShrinker: force one attachment" << endl; + string mime (getMime (mbox)); + string::size_type startPos = (0); + for (string token : {string ("Content-Transfer-Encoding"), Attachment::contentTypeToken}) { + startPos = caseInsensitiveFind (mime, "Content-Transfer-Encoding"); + for (string::size_type stopPos (startPos); + (stopPos = mime.find ("\n", stopPos)) != string::npos; + ) { + if (string (" \t").find (mime [stopPos+1]) == string::npos) { + mime.erase (startPos, stopPos-startPos); + break; + } + } + } + mime.insert (startPos, KAZ_EMPTY_TEXT_PLAIN); + string content (plainDisclaim); + base64Encode (content); + outbox << mime + << content << endl; + outbox.flush (); + outbox.close (); + return; } + streamoff curPos = 0; copy (mbox, outbox, curPos, contentPos); curPos = contentPos; @@ -626,9 +647,12 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min copy (mbox, outbox, curPos, attachP->beginInParent); LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos); - if (attachP->toExtract || attachP->isKazAttachment) { - LOG ("skip Extracted or previous attachments"); + if (attachP->isSigned) { + LOG ("don't change signed content"); + copy (mbox, outbox, attachP->beginInParent, attachP->endPos); + } else if (attachP->toExtract || attachP->isKazAttachment) { + LOG ("skip Extracted or previous attachments"); } else if (attachP->toUpdate) { string textProp = attachP->getProp (contentTypeToken, textRegEx); @@ -718,7 +742,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min outbox.flush (); } copy (mbox, outbox, curPos, endPos); - outbox << endl; + //outbox << endl; outbox.close (); } diff --git a/src/cpp/eMailShrinker.cpp b/src/cpp/eMailShrinker.cpp index 1af815e..9165e9b 100644 --- a/src/cpp/eMailShrinker.cpp +++ b/src/cpp/eMailShrinker.cpp @@ -33,8 +33,8 @@ //////////////////////////////////////////////////////////////////////////// #include "version.hpp" -const std::string kaz::LAST_VERSION_NUM ("2.7"); -const std::string kaz::LAST_VERSION_DATE ("2022-12-19"); +const std::string kaz::LAST_VERSION_NUM ("2.8"); +const std::string kaz::LAST_VERSION_DATE ("2022-12-23"); const std::string kaz::LAST_VERSION (LAST_VERSION_NUM+" "+LAST_VERSION_DATE+" eMailShrinker"); #include diff --git a/src/cpp/kazMisc.cpp b/src/cpp/kazMisc.cpp index ae31a52..d0cf270 100644 --- a/src/cpp/kazMisc.cpp +++ b/src/cpp/kazMisc.cpp @@ -259,17 +259,22 @@ kaz::base64Decode (string &content) { LOG ("len: " << len); unsigned char buff[4]; int idx = 0; - string::iterator p (content.begin ()), q (p); + string::iterator p (content.begin ()), q (p), lastOK (p); for (; p < content.end (); ++p) { char c = *p; if (c == '=') break; - if (c == '\n') + if (c == '\n') { + lastOK = p; continue; + } - LOG_BUG (!isBase64 (c), return, "kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")"); + if (isBase64 (c)) { + content.resize (lastOK-content.begin ()); + LOG ("kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")"); + } buff [idx] = getBase64Val (c); if (++idx != 4) continue; diff --git a/src/include/Attachment.hpp b/src/include/Attachment.hpp index 595d097..758b009 100644 --- a/src/include/Attachment.hpp +++ b/src/include/Attachment.hpp @@ -54,7 +54,7 @@ namespace kaz { //static const vector stringsToUpdate; static vector stringsToUpdate; /*! mime tokens */ - static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE, KAZ_ATTACH_NAME; + static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, MULTIPART, RELATED, ALTERNATIVE, SIGNED, KAZ_ATTACH_NAME; /*! pattern to extract mime values */ static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx; @@ -100,7 +100,7 @@ namespace kaz { /*! char position of attachment content */ streamoff contentPos, endPos; /*! properties of the attachment */ - bool toExtract, toUpdate, toDisclaim, isKazAttachment; + bool toExtract, toUpdate, toDisclaim, isKazAttachment, isSigned; /*! id of an image embedded in mbox */ string cid; /*! url to replace the attachment */ @@ -131,7 +131,9 @@ namespace kaz { /*! recursively marks alternative attachments to be disclaim */ void markDisclaim (bool &plainMarked, bool &htmlMarked); /*! recursively marks big attachments to be removed and upated (including disclaim). return true when part need to be updated (can't be extracted). */ - bool markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs); + bool markSignificant (const string &parentMultiProp, const bool &parentSigned, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs); + /*! get a copy of mime header */ + string getMime (ifstream &mbox) const; /*! get a copy of the content. Base64 is decoded. Quoted-Printable is unwarp and unquoted */ string getContent (ifstream &mbox) const; /*! write the content, encoded if necessary (base64 and quoted-printable) */