diff --git a/README.md b/README.md
index 9ff349d..7dae8b3 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ depollueur/
## Compilation
```bash
-sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev doxygen
+sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev doxygen dos2unix
git clone https://git.kaz.bzh/KAZ/depollueur.git
# or for contributors :
diff --git a/src/bash/filterTest.sh b/src/bash/filterTest.sh
old mode 100755
new mode 100644
index d7560e0..e9ca941
--- a/src/bash/filterTest.sh
+++ b/src/bash/filterTest.sh
@@ -33,6 +33,7 @@ usage () {
########################################
mbox=$(realpath $1)
+dos2unix "${mbox}"
cd $(dirname $0)
DOMAINNAME="$(cat domainname)"
JIRAFEAU_URL="https://depot.${DOMAINNAME}"
@@ -126,7 +127,7 @@ cat "${TMP_DIR}/PJ-name.txt" | {
esac
}
else
- LOG " - ${GREEN} no archive${NC}"
+ LOG " - ${GREEN}no archive${NC}"
echo "arch: none"
fi
} > "${TMP_DIR}/PJ-Keys.txt"
diff --git a/src/cpp/Attachment.cpp b/src/cpp/Attachment.cpp
index 1a2c755..9a365b9 100644
--- a/src/cpp/Attachment.cpp
+++ b/src/cpp/Attachment.cpp
@@ -60,8 +60,10 @@ const string Attachment::contentIDToken ("content-id");
const string Attachment::PLAIN ("plain");
const string Attachment::HTML ("html");
const string Attachment::RELATED ("related");
+const string Attachment::SIGNED ("signed");
const string Attachment::ALTERNATIVE ("alternative");
const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html");
+const string Attachment::MULTIPART ("multipart/");
const regex Attachment::nameCharsetRegEx (".*name\\*=[ \t]*(.*)");
@@ -76,7 +78,7 @@ const regex Attachment::nameRegEx (".*name=[ \t]*((\"(\\\\.|[^\\\\\r])*\")|[^\
const regex Attachment::boundaryRegEx (".*boundary=[ \t]*((\"(\\\\.|[^\\\\])*\")|[^; ]*);?.*");
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*");
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*");
-const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*");
+const regex Attachment::multiRegEx ("\\s*"+MULTIPART+"(mixed|"+RELATED+"|"+ALTERNATIVE+"|"+SIGNED+").*");
const string Attachment::IMG_BEGIN ("");
@@ -85,7 +87,6 @@ const string Attachment::IMG_END (">");
static const string SRC_BEGIN ("SRC=\"");
static const string RFC822 ("message/rfc822");
-static const string MULTIPART ("multipart/");
// ================================================================================
string
@@ -270,6 +271,7 @@ Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginI
toUpdate (false),
toDisclaim (false),
isKazAttachment (false),
+ isSigned (false),
boundaryMiddleSize (0) {
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level);
readMime (mbox, curPos);
@@ -321,6 +323,9 @@ Attachment::readMime (ifstream &mbox, streamoff &curPos) {
cid = getProp (contentIDToken, cidDefRegEx);
if (caseInsensitiveFind (getContentType (), MULTIPART) != string::npos) {
+ string multiProp = getProp (contentTypeToken, multiRegEx);
+ if (SIGNED == multiProp)
+ isSigned = true;
boundary = getProp (contentTypeToken, boundaryRegEx);
removeQuote (boundary);
LOG ("boundary: " << boundary);
@@ -416,18 +421,19 @@ Attachment::markDisclaim (bool &plainMarked, bool &htmlMarked) {
// ================================================================================
bool
-Attachment::markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs) {
+Attachment::markSignificant (const string &parentMultiProp, const bool &parentSigned, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs) {
DEF_LOG ("Attachment::markSignificant", "parentMultiProp: " << parentMultiProp << " minAttachSize: " << minAttachSize);
+ isSigned |= parentSigned;
string textProp = getProp (contentTypeToken, textRegEx);
bool cantBeExtract ((parentMultiProp == ALTERNATIVE && (textProp == PLAIN || textProp == HTML)) ||
(parentMultiProp == RELATED && textProp == HTML));
string multiProp = getProp (contentTypeToken, multiRegEx);
for (Attachment &sub : subAttachements)
- cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs);
+ cantBeExtract |= sub.markSignificant (multiProp, parentSigned || isSigned, minAttachSize, mbox, allMarkedPtrs);
if (getProp (contentTypeToken, textRegEx) == HTML) {
- if (KAZ_ATTACH_NAME == getAttachName ()) {
+ if (KAZ_ATTACH_NAME == getAttachName ())
isKazAttachment = true;
- } else {
+ else {
string content = getContent (mbox);
vector imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs);
@@ -438,16 +444,27 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min
}
cantBeExtract |= toUpdate;
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract)
- cantBeExtract = toExtract = true; // XXX cantBeExtract ?
+ cantBeExtract = toExtract = true;
if (toExtract || toUpdate || toDisclaim || isKazAttachment)
allMarkedPtrs.push_back (this);
return cantBeExtract;
}
+// ================================================================================
+string
+Attachment::getMime (ifstream &mbox) const {
+ DEF_LOG ("Attachment::getMime", "beginPos: " << beginPos << " contentPos: " << contentPos);
+ string mime;
+ mime.resize (contentPos-beginPos);
+ mbox.seekg (beginPos, ios::beg);
+ mbox.read (&mime[0], contentPos-beginPos);
+ return mime;
+}
+
// ================================================================================
string
Attachment::getContent (ifstream &mbox) const {
- DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos);
+ DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos << " endPos: " << endPos);
string content;
content.resize (endPos-contentPos);
mbox.seekg (contentPos, ios::beg);
@@ -505,7 +522,9 @@ Attachment::replaceEmbedded (string &content) const {
ostream&
kaz::operator << (ostream& os, const Attachment& attachment) {
string prop, sep;
- if (attachment.toExtract) { prop = "to extract"; sep = ", "; }
+ if (attachment.isSigned) { prop += sep+"signed"; sep = ", "; }
+ if (attachment.isKazAttachment) { prop += sep+"kazDisclaim"; sep = ", "; }
+ if (attachment.toExtract) { prop += sep+"to extract"; sep = ", "; }
if (attachment.toUpdate) { prop += sep+"need update"; sep = ", "; }
if (attachment.toDisclaim) { prop += sep+"need diclaim"; sep = ", "; }
if (attachment.embeddedData.size ()) { prop += sep+"embeddedData"; }
diff --git a/src/cpp/MainAttachment.cpp b/src/cpp/MainAttachment.cpp
index 5338138..c534d45 100644
--- a/src/cpp/MainAttachment.cpp
+++ b/src/cpp/MainAttachment.cpp
@@ -177,8 +177,8 @@ MainAttachment::readArchiveUrl () {
archiveDownloadURL.clear ();
string line;
getline (cin, line);
- LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug 9: no archive link. (line: " << line << ")");
- LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug 10: bad archive link. (line: " << line << ")");
+ LOG_BUG (line.rfind ("arch: ", 0) != 0, return, "eMailShrinker: bug M9: no archive link. (line: " << line << ")");
+ LOG_BUG (line.rfind ("arch: bad", 0) == 0, return, "eMailShrinker: bug M10: bad archive link. (line: " << line << ")");
if (line.rfind ("arch: none", 0) == 0)
return;
archiveDownloadURL = line.substr (6);
@@ -192,7 +192,7 @@ MainAttachment::readDownloadUrl (string &url) {
string line;
getline (cin, line);
LOG ("get URL: " << line);
- LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug 11: no download link. (line: " << line << ")");
+ LOG_BUG (line.rfind ("url: ", 0) != 0, return, "eMailShrinker: bug M11: no download link. (line: " << line << ")");
url = line.substr (5);
}
@@ -231,7 +231,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const {
int linkCount (0);
string plainNewLinks, htmlNewLinks;
for (Attachment *attachP : allMarkedPtrs) {
- if (!attachP->toExtract)
+ if (attachP->isSigned || !attachP->toExtract)
continue;
addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ());
++linkCount;
@@ -240,7 +240,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const {
// previousLinks.erase (attachP->downloadUrl);
}
for (Attachment *attachP : allMarkedPtrs) {
- if (!attachP->embeddedData.size ())
+ if (attachP->isSigned || !attachP->embeddedData.size ())
continue;
for (EmbeddedData &embedded : attachP->embeddedData) {
addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name);
@@ -449,7 +449,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked);
emptyEMail = ! (plainMarked || htmlMarked);
- Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
+ Attachment::markSignificant ("", isSigned, minAttachSize, mbox, allMarkedPtrs);
}
// ================================================================================
@@ -492,7 +492,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
int attachCount (0);
string dirName, mediaName;
for (Attachment *attachP : allMarkedPtrs) {
- if (attachP->isKazAttachment || !attachP->toExtract)
+ if (attachP->isSigned || attachP->isKazAttachment || !attachP->toExtract)
continue;
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
++attachCount;
@@ -543,7 +543,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
cout << dirName << endl;
}
for (Attachment *attachP : allMarkedPtrs) {
- if (!attachP->embeddedData.size ())
+ if (attachP->isSigned || !attachP->embeddedData.size ())
continue;
string content = attachP->getContent (mbox);
vector imgs;
@@ -575,7 +575,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
removePreviousArchive ();
map translateHtml;
for (Attachment *attachP : allMarkedPtrs)
- if (attachP->toExtract && !attachP->isKazAttachment) {
+ if (!attachP->isSigned && attachP->toExtract && !attachP->isKazAttachment) {
readDownloadUrl (attachP->downloadUrl);
if (attachP->downloadUrl.empty ()) {
LOG ("no change");
@@ -589,7 +589,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
}
}
for (Attachment *attachP : allMarkedPtrs) {
- if (!attachP->embeddedData.size ())
+ if (attachP->isSigned || !attachP->embeddedData.size ())
continue;
for (EmbeddedData &embedded : attachP->embeddedData)
readDownloadUrl (embedded.downloadUrl);
@@ -601,11 +601,32 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email
- streamoff curPos = 0;
- if (boundary.empty () && plainDisclaim.size () && (attachMode & ATTACHMENT)) {
- // XXX if no multipart ?
- LOG_BUG (true, /* */, "eMailShrinker: bug 12: not multipart.");
+ if (plainDisclaim.size () && emptyEMail && boundary.empty ()) {
+ // only one attachment must be replace
+ cerr << "eMailShrinker: force one attachment" << endl;
+ string mime (getMime (mbox));
+ string::size_type startPos = (0);
+ for (string token : {string ("Content-Transfer-Encoding"), Attachment::contentTypeToken}) {
+ startPos = caseInsensitiveFind (mime, "Content-Transfer-Encoding");
+ for (string::size_type stopPos (startPos);
+ (stopPos = mime.find ("\n", stopPos)) != string::npos;
+ ) {
+ if (string (" \t").find (mime [stopPos+1]) == string::npos) {
+ mime.erase (startPos, stopPos-startPos);
+ break;
+ }
+ }
+ }
+ mime.insert (startPos, KAZ_EMPTY_TEXT_PLAIN);
+ string content (plainDisclaim);
+ base64Encode (content);
+ outbox << mime
+ << content << endl;
+ outbox.flush ();
+ outbox.close ();
+ return;
}
+ streamoff curPos = 0;
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
@@ -626,9 +647,12 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
- if (attachP->toExtract || attachP->isKazAttachment) {
- LOG ("skip Extracted or previous attachments");
+ if (attachP->isSigned) {
+ LOG ("don't change signed content");
+ copy (mbox, outbox, attachP->beginInParent, attachP->endPos);
+ } else if (attachP->toExtract || attachP->isKazAttachment) {
+ LOG ("skip Extracted or previous attachments");
} else if (attachP->toUpdate) {
string textProp = attachP->getProp (contentTypeToken, textRegEx);
@@ -718,7 +742,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
outbox.flush ();
}
copy (mbox, outbox, curPos, endPos);
- outbox << endl;
+ //outbox << endl;
outbox.close ();
}
diff --git a/src/cpp/eMailShrinker.cpp b/src/cpp/eMailShrinker.cpp
index 1af815e..9165e9b 100644
--- a/src/cpp/eMailShrinker.cpp
+++ b/src/cpp/eMailShrinker.cpp
@@ -33,8 +33,8 @@
////////////////////////////////////////////////////////////////////////////
#include "version.hpp"
-const std::string kaz::LAST_VERSION_NUM ("2.7");
-const std::string kaz::LAST_VERSION_DATE ("2022-12-19");
+const std::string kaz::LAST_VERSION_NUM ("2.8");
+const std::string kaz::LAST_VERSION_DATE ("2022-12-23");
const std::string kaz::LAST_VERSION (LAST_VERSION_NUM+" "+LAST_VERSION_DATE+" eMailShrinker");
#include
diff --git a/src/cpp/kazMisc.cpp b/src/cpp/kazMisc.cpp
index ae31a52..d0cf270 100644
--- a/src/cpp/kazMisc.cpp
+++ b/src/cpp/kazMisc.cpp
@@ -259,17 +259,22 @@ kaz::base64Decode (string &content) {
LOG ("len: " << len);
unsigned char buff[4];
int idx = 0;
- string::iterator p (content.begin ()), q (p);
+ string::iterator p (content.begin ()), q (p), lastOK (p);
for (;
p < content.end ();
++p) {
char c = *p;
if (c == '=')
break;
- if (c == '\n')
+ if (c == '\n') {
+ lastOK = p;
continue;
+ }
- LOG_BUG (!isBase64 (c), return, "kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")");
+ if (isBase64 (c)) {
+ content.resize (lastOK-content.begin ());
+ LOG ("kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")");
+ }
buff [idx] = getBase64Val (c);
if (++idx != 4)
continue;
diff --git a/src/include/Attachment.hpp b/src/include/Attachment.hpp
index 595d097..758b009 100644
--- a/src/include/Attachment.hpp
+++ b/src/include/Attachment.hpp
@@ -54,7 +54,7 @@ namespace kaz {
//static const vector stringsToUpdate;
static vector stringsToUpdate;
/*! mime tokens */
- static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE, KAZ_ATTACH_NAME;
+ static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, MULTIPART, RELATED, ALTERNATIVE, SIGNED, KAZ_ATTACH_NAME;
/*! pattern to extract mime values */
static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx;
@@ -100,7 +100,7 @@ namespace kaz {
/*! char position of attachment content */
streamoff contentPos, endPos;
/*! properties of the attachment */
- bool toExtract, toUpdate, toDisclaim, isKazAttachment;
+ bool toExtract, toUpdate, toDisclaim, isKazAttachment, isSigned;
/*! id of an image embedded in mbox */
string cid;
/*! url to replace the attachment */
@@ -131,7 +131,9 @@ namespace kaz {
/*! recursively marks alternative attachments to be disclaim */
void markDisclaim (bool &plainMarked, bool &htmlMarked);
/*! recursively marks big attachments to be removed and upated (including disclaim). return true when part need to be updated (can't be extracted). */
- bool markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs);
+ bool markSignificant (const string &parentMultiProp, const bool &parentSigned, const streamoff &minAttachSize, ifstream &mbox, vector &allMarkedPtrs);
+ /*! get a copy of mime header */
+ string getMime (ifstream &mbox) const;
/*! get a copy of the content. Base64 is decoded. Quoted-Printable is unwarp and unquoted */
string getContent (ifstream &mbox) const;
/*! write the content, encoded if necessary (base64 and quoted-printable) */