François
3 years ago
15 changed files with 3075 additions and 0 deletions
@ -0,0 +1,33 @@ |
|||
//////////////////////////////////////////////////////////////////////////// |
|||
// Copyright KAZ 2021 // |
|||
// // |
|||
// contact (at) kaz.bzh // |
|||
// // |
|||
// This software is a filter to shrink email by attachment extraction. // |
|||
// // |
|||
// This software is governed by the CeCILL-B license under French law and // |
|||
// abiding by the rules of distribution of free software. You can use, // |
|||
// modify and/or redistribute the software under the terms of the // |
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following // |
|||
// URL "http://www.cecill.info". // |
|||
// // |
|||
// As a counterpart to the access to the source code and rights to copy, // |
|||
// modify and redistribute granted by the license, users are provided // |
|||
// only with a limited warranty and the software's author, the holder of // |
|||
// the economic rights, and the successive licensors have only limited // |
|||
// liability. // |
|||
// // |
|||
// In this respect, the user's attention is drawn to the risks associated // |
|||
// with loading, using, modifying and/or developing or reproducing the // |
|||
// software by the user in light of its specific status of free software, // |
|||
// that may mean that it is complicated to manipulate, and that also // |
|||
// therefore means that it is reserved for developers and experienced // |
|||
// professionals having in-depth computer knowledge. Users are therefore // |
|||
// encouraged to load and test the software's suitability as regards // |
|||
// their requirements in conditions enabling the security of their // |
|||
// systems and/or data to be ensured and, more generally, to use and // |
|||
// operate it in the same conditions as regards security. // |
|||
// // |
|||
// The fact that you are presently reading this means that you have had // |
|||
// knowledge of the CeCILL-B license and that you accept its terms. // |
|||
//////////////////////////////////////////////////////////////////////////// |
@ -0,0 +1,504 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <set> |
|||
#include <fstream> |
|||
#include <iomanip> |
|||
#include <math.h> |
|||
#include <algorithm> |
|||
#include <unistd.h> |
|||
#include <boost/algorithm/string.hpp> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
#include "SizeArg.hpp" |
|||
#include "Attachment.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
// ================================================================================
|
|||
const string Attachment::contentTypeToken ("content-type"); |
|||
const string Attachment::contentDispositionToken ("content-disposition"); |
|||
const string Attachment::contentTransferEncodingToken ("content-transfer-encoding"); |
|||
const string Attachment::base64Token ("base64"); |
|||
const string Attachment::quotedPrintableToken ("quoted-printable"); |
|||
const string Attachment::contentIDToken ("content-id"); |
|||
const string Attachment::PLAIN ("plain"); |
|||
const string Attachment::HTML ("html"); |
|||
const string Attachment::RELATED ("related"); |
|||
const string Attachment::ALTERNATIVE ("alternative"); |
|||
|
|||
|
|||
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)"); |
|||
const regex Attachment::nameRegEx (".*name=\"([^\"]*)\".*"); |
|||
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\" ]*)\"?.*"); |
|||
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*"); |
|||
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*"); |
|||
const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*"); |
|||
|
|||
const string Attachment::IMG_BEGIN ("<IMG"); |
|||
const string Attachment::IMG_END (">"); |
|||
|
|||
|
|||
|
|||
static const string SRC_BEGIN ("SRC=\""); |
|||
static const string RFC822 ("message/rfc822"); |
|||
|
|||
// ================================================================================
|
|||
string |
|||
Attachment::getUnknown (const string &contentType) { |
|||
DEF_LOG ("Attachment::getUnknown", "contentType: " << contentType); |
|||
static time_t now (time (NULL)); |
|||
static int count (0); |
|||
|
|||
tm *ltm = localtime (&now); |
|||
ostringstream nameStream; |
|||
nameStream << "U-" |
|||
<< std::setfill ('0') << std::setw (2) << (ltm->tm_year-100) |
|||
<< std::setfill ('0') << std::setw (2) << (1 + ltm->tm_mon) |
|||
<< std::setfill ('0') << std::setw (2) << ltm->tm_mday |
|||
<< std::setfill ('0') << std::setw (2) << ltm->tm_hour |
|||
<< std::setfill ('0') << std::setw (2) << ltm->tm_min |
|||
<< std::setfill ('0') << std::setw (2) << ltm->tm_sec |
|||
<< "-" << count; |
|||
const string::size_type subTypePos (contentType.find ("/")); |
|||
if (subTypePos != string::npos) |
|||
nameStream << "." << contentType.substr (subTypePos+1); |
|||
++count; |
|||
LOG ("name: " << nameStream.str ()); |
|||
return nameStream.str (); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::removeSection (string &content, const string &beginTag, const string &endTag) { |
|||
DEF_LOG ("Attachment::removeSection", "beginTag: " << beginTag << " endTag: " << endTag); |
|||
for (string::size_type startPos (0); |
|||
(startPos = caseInsensitiveFind (content, beginTag, startPos)) != string::npos; |
|||
) { |
|||
string::size_type stopPos = caseInsensitiveFind (content, endTag, startPos); |
|||
|
|||
LOG_BUG (stopPos == startPos, content.erase (startPos, endTag.length ()); continue, "eMailShrinker: bug A1: removeSection: no " << beginTag); |
|||
LOG_BUG (stopPos == string::npos, content.erase (startPos, beginTag.length ()); break, "eMailShrinker: bug A2: removeSection: no " << endTag); |
|||
LOG ("KAZ start: " << startPos << " stop: " << stopPos); |
|||
|
|||
content.erase (startPos, stopPos+endTag.length ()-startPos); |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
string |
|||
Attachment::getSection (const string &content, const string &beginTag, const string &endTag) { |
|||
DEF_LOG ("Attachment::getSection", "beginTag: " << beginTag << " endTag: " << endTag << " content: " << content); |
|||
vector<string> list; |
|||
getSection (content, beginTag, endTag, list); |
|||
size_t sum (0); |
|||
for (const string &s : list) |
|||
sum += s.length (); |
|||
string result; |
|||
result.reserve (sum); |
|||
for (const string &s : list) |
|||
result += s; |
|||
LOG ("result: " << result); |
|||
return result; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::getSection (const string &content, const string &beginTag, const string &endTag, vector<string> &result) { |
|||
DEF_LOG ("Attachment::getSection", "beginTag: " << beginTag << " endTag: " << endTag << " content: " << content); |
|||
for (string::size_type startPos (0); |
|||
(startPos = caseInsensitiveFind (content, beginTag, startPos)) != string::npos; |
|||
) { |
|||
LOG (beginTag << ": " << startPos); |
|||
string::size_type stopPos = caseInsensitiveFind (content, endTag, startPos); |
|||
|
|||
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug A3: " << endTag << " not found! at: " << startPos); |
|||
LOG ("start: " << startPos << " stop: " << stopPos); |
|||
|
|||
LOG_BUG (startPos == stopPos, /**/, "eMailShrinker: bug A4: " << endTag << " without " << beginTag << " at: " << startPos); |
|||
if (startPos != stopPos) { |
|||
startPos += beginTag.length (); |
|||
result.push_back (content.substr (startPos, stopPos-startPos)); |
|||
} |
|||
startPos = stopPos+endTag.length (); |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
const string |
|||
Attachment::getContentType () const { |
|||
map<string, string>::const_iterator it (env.find (contentTypeToken)); |
|||
if (it == env.end ()) |
|||
return ""; |
|||
const string &contentTypeVal (it->second); |
|||
const string::size_type semicolonPos = contentTypeVal.find (';'); |
|||
if (semicolonPos == string::npos) |
|||
return contentTypeVal; |
|||
return contentTypeVal.substr (0, semicolonPos); |
|||
} |
|||
|
|||
const string |
|||
Attachment::getAttachName () const { |
|||
DEF_LOG ("Attachment::getAttachName", ""); |
|||
string result = getProp (contentTypeToken, nameRegEx); |
|||
if (result.length ()) { |
|||
LOG ("name=: " << result); |
|||
encodedWord (result); |
|||
return result; |
|||
} |
|||
result = getProp (contentTypeToken, nameCharsetRegEx); |
|||
if (result.length ()) { |
|||
LOG ("name*=: " << result); |
|||
charsetValue (result); |
|||
return result; |
|||
} |
|||
// XXX il faut composer s'il y a plusieurs ligne filename*x=
|
|||
result = getProp (contentDispositionToken, nameRegEx); |
|||
if (result.length ()) { |
|||
LOG ("filename=: " << result); |
|||
encodedWord (result); |
|||
return result; |
|||
} |
|||
// XXX il faut composer s'il y a plusieurs ligne filename*x*=
|
|||
result = getProp (contentDispositionToken, nameRegEx); |
|||
if (result.length ()) { |
|||
LOG ("filename*=: " << result); |
|||
charsetValue (result); |
|||
return result; |
|||
} |
|||
return getUnknown (getContentType ()); |
|||
} |
|||
|
|||
const string & |
|||
Attachment::getBoundary () const { |
|||
return boundary; |
|||
} |
|||
|
|||
const streamoff |
|||
Attachment::getSize () const { |
|||
return endPos-beginPos; |
|||
} |
|||
|
|||
const string |
|||
Attachment::getProp (const string &token, const regex ®Ex) const { |
|||
DEF_LOG ("Attachment::getProp", "token: " << token); |
|||
map<string, string>::const_iterator it (env.find (token)); |
|||
if (it == env.end ()) { |
|||
LOG ("no token"); |
|||
return ""; |
|||
} |
|||
const string &val (it->second); |
|||
LOG ("val: " << val); |
|||
if (!regex_match (val.begin (), val.end (), regEx)) { |
|||
LOG ("no prop"); |
|||
return ""; |
|||
} |
|||
return regex_replace (val, regEx, "$1"); |
|||
} |
|||
|
|||
const bool |
|||
Attachment::isBase64Encoding () const { |
|||
return isDefProp (contentTransferEncodingToken, base64Token); |
|||
} |
|||
|
|||
const bool |
|||
Attachment::isQuotedPrintableEnconding () const { |
|||
return isDefProp (contentTransferEncodingToken, quotedPrintableToken); |
|||
} |
|||
|
|||
const bool |
|||
Attachment::isTextBase64 () const { |
|||
return !getProp (contentTypeToken, textRegEx).empty () && isBase64Encoding (); |
|||
} |
|||
|
|||
const bool |
|||
Attachment::isDefProp (const string &token, const string &val) const { |
|||
DEF_LOG ("Attachment::getProp", "getProp token: " << token << " val: " << val); |
|||
map<string, string>::const_iterator it (env.find (token)); |
|||
if (it == env.end ()) |
|||
return false; |
|||
// XXX case insensitive ??
|
|||
return it->second.find (val) != string::npos; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginInParent, streamoff &curPos) |
|||
: level (level), |
|||
beginInParent (beginInParent), |
|||
beginPos (curPos), |
|||
contentPos (0), |
|||
endPos (0), |
|||
toExtract (false), |
|||
toUpdate (false), |
|||
toDisclaim (false), |
|||
boundaryMiddleSize (0) { |
|||
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level); |
|||
readMime (mbox, curPos); |
|||
readBoundaries (mbox, curPos); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::readMime (ifstream &mbox, streamoff &curPos) { |
|||
DEF_LOG ("Attachment::readMime", "curPos: " << curPos); |
|||
string lastVar; |
|||
string line; |
|||
for (; getline (mbox, line); ) { |
|||
LOG ("pos: " << curPos << " line: " << line); |
|||
curPos += line.length () + 1; |
|||
if (line.empty ()) |
|||
break; |
|||
if (line[0] == ' ' || line[0] == '\t') { |
|||
if (lastVar.empty ()) { |
|||
|
|||
LOG_BUG (true, /**/, "eMailShrinker: bug A5: not compliant MIME. pos: " << (curPos - (line.length () + 1)) << " line: " << line); |
|||
} else { |
|||
LOG ("add line to var: " << line); |
|||
env.find (lastVar)->second += line; |
|||
LOG ("new val: " << env.find (lastVar)->second); |
|||
} |
|||
continue; |
|||
} |
|||
string::size_type colonPos = line.find (':'); |
|||
if (colonPos != string::npos) { |
|||
lastVar = line.substr (0, colonPos); |
|||
toLower (lastVar); |
|||
LOG ("find var: " << lastVar); |
|||
string val (line.length () >= colonPos+2 ? line.substr (colonPos+2) : ""); // XXX check RFC " " after ": "
|
|||
LOG ("new var: " << lastVar << " <=> " << val); |
|||
env [lastVar] = val; |
|||
} |
|||
} |
|||
LOG ("end of mime"); |
|||
|
|||
contentPos = curPos; |
|||
cid = getProp (contentIDToken, cidDefRegEx); |
|||
boundary = getProp (contentTypeToken, boundaryRegEx); |
|||
LOG ("boundary: " << boundary); |
|||
if (boundary.length ()) { |
|||
boundary = "--"+boundary+"--"; |
|||
boundaryMiddleSize = boundary.length () - 2; |
|||
} |
|||
LOG ("readMime contentPos: " << contentPos << " cid: " << cid << " boundary: " << boundary); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::readBoundaries (ifstream &mbox, streamoff &curPos) { |
|||
DEF_LOG ("Attachment::readBoundaries", "curPos: " << curPos); |
|||
|
|||
if (caseInsensitiveFind (getContentType (), RFC822) != string::npos) { |
|||
subAttachements.push_back (Attachment (mbox, level+1, curPos, curPos)); |
|||
subAttachements.back ().endPos = curPos; |
|||
return; |
|||
} |
|||
if (boundary.empty ()) |
|||
return; |
|||
for (; nextBondary (mbox, curPos); ) |
|||
; |
|||
} |
|||
|
|||
bool |
|||
Attachment::nextBondary (ifstream &mbox, streamoff &curPos) { |
|||
DEF_LOG ("Attachment::nextBondary", "curPos: " << curPos << " boundary: " << boundary); |
|||
bool isTextBase64 (subAttachements.size () && subAttachements.back ().isTextBase64 ()); |
|||
LOG ("isTextBase64: " << isTextBase64 << " attach: " << *this); |
|||
for (string prev, line; getline (mbox, line); ) { |
|||
LOG ("curPos: " << curPos << " line: " << line); |
|||
streamoff lastPos = curPos; |
|||
curPos += line.length () + 1; |
|||
|
|||
string::size_type bpos = line.find (boundary.c_str (), 0, boundaryMiddleSize); |
|||
if (bpos == string::npos) { |
|||
string clearLine (line); |
|||
if (isTextBase64) |
|||
base64Decode (clearLine); |
|||
string couple (prev+clearLine); |
|||
for (vector <string>::iterator it = stringsToUpdate.begin (); |
|||
it != stringsToUpdate.end (); |
|||
++it) |
|||
if (couple.find (*it) != string::npos) { |
|||
LOG ("find: "+ *it); |
|||
subAttachements.back ().toUpdate = true; |
|||
} |
|||
prev = clearLine; |
|||
continue; |
|||
} |
|||
LOG ("find: " << boundary); |
|||
LOG ("lastPos: " << lastPos << " bpos: " << bpos << " boundaryMiddleSize: " << boundaryMiddleSize); |
|||
if (subAttachements.size ()) |
|||
subAttachements.back ().endPos = lastPos; |
|||
LOG ("line: " << line << "bpos+boundaryMiddleSize: " << (bpos+boundaryMiddleSize) << " find: " << line.find ("--", bpos+boundaryMiddleSize)); |
|||
bpos += boundaryMiddleSize; |
|||
if (line.find ("--", bpos) == bpos) { |
|||
LOG ("end"); |
|||
return false; |
|||
} |
|||
subAttachements.push_back (Attachment (mbox, level+1, lastPos, curPos)); |
|||
return true; |
|||
} |
|||
endPos = curPos; |
|||
return false; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::markDisclaim (bool &plainMarked, bool &htmlMarked) { |
|||
if (plainMarked && htmlMarked) |
|||
return; |
|||
string multiProp = getProp (contentTypeToken, multiRegEx); |
|||
// LOG_BUG (multiProp == ALTERNATIVE && subAttachements.size () != 2, continue, "eMailShrinker: bug A6: alternative give not 1 case (" << subAttachements.size () << ").");
|
|||
if (multiProp.length ()) |
|||
for (Attachment &subAttach : subAttachements) |
|||
subAttach.markDisclaim (plainMarked, htmlMarked); |
|||
string textProp = getProp (contentTypeToken, textRegEx); |
|||
if (textProp.empty ()) |
|||
return; |
|||
if (!plainMarked && textProp == PLAIN) |
|||
plainMarked = toUpdate = toDisclaim = true; |
|||
if (!htmlMarked && textProp == HTML) |
|||
htmlMarked = toUpdate = toDisclaim = true; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
bool |
|||
Attachment::markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector<Attachment *> &allMarkedPtrs) { |
|||
DEF_LOG ("Attachment::markSignificant", "parentMultiProp: " << parentMultiProp << " minAttachSize: " << minAttachSize); |
|||
string textProp = getProp (contentTypeToken, textRegEx); |
|||
bool cantBeExtract ((parentMultiProp == ALTERNATIVE && (textProp == PLAIN || textProp == HTML)) || |
|||
(parentMultiProp == RELATED && textProp == HTML)); |
|||
string multiProp = getProp (contentTypeToken, multiRegEx); |
|||
for (Attachment &sub : subAttachements) |
|||
cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs); |
|||
if (getProp (contentTypeToken, textRegEx) == HTML) { |
|||
string content = getContent (mbox); |
|||
vector<string> imgs; |
|||
getSection (content, IMG_BEGIN, IMG_END, imgs); |
|||
EmbeddedData::fillEmbeddedData (imgs, minAttachSize, embeddedData); |
|||
if (embeddedData.size ()) |
|||
toUpdate = true; |
|||
} |
|||
cantBeExtract |= toUpdate; |
|||
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract) |
|||
cantBeExtract = toExtract = true; // XXX cantBeExtract ?
|
|||
if (toExtract || toUpdate || toDisclaim) |
|||
allMarkedPtrs.push_back (this); |
|||
return cantBeExtract; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
string |
|||
Attachment::getContent (ifstream &mbox) const { |
|||
DEF_LOG ("Attachment::getContent", "contentPos: " << contentPos); |
|||
string content; |
|||
content.resize (endPos-contentPos); |
|||
mbox.seekg (contentPos, ios::beg); |
|||
mbox.read (&content[0], endPos-contentPos); |
|||
if (isBase64Encoding ()) |
|||
base64Decode (content); |
|||
if (isQuotedPrintableEnconding ()) |
|||
quotedDecode (content); |
|||
return content; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::println (ofstream &outbox, string content) const { |
|||
DEF_LOG ("Attachment::println", "content: " << content); |
|||
if (isBase64Encoding ()) |
|||
base64Encode (content); |
|||
if (isQuotedPrintableEnconding ()) |
|||
quotedEncode (content); |
|||
outbox << content; |
|||
if (content.length () && content.back () != '\n') |
|||
outbox << endl; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
Attachment::replaceEmbedded (string &content) const { |
|||
DEF_LOG ("Attachment::replaceEmbedded", "content.length: " << content.length ()); |
|||
if (!embeddedData.size ()) |
|||
return; |
|||
int imgIdx (-1); |
|||
string::size_type startPos (0); |
|||
for (const EmbeddedData &embedded : embeddedData) { |
|||
LOG ("embedded: " << embedded); |
|||
for ( ; ; ) { |
|||
startPos = caseInsensitiveFind (content, IMG_BEGIN, startPos); |
|||
LOG_BUG (startPos == string::npos, return, "eMailShrinker: bug A7: can't find " << IMG_BEGIN); |
|||
++imgIdx; |
|||
if (embedded.imgIdx >= imgIdx) |
|||
break; |
|||
startPos += IMG_BEGIN.length (); |
|||
} |
|||
startPos = caseInsensitiveFind (content, SRC_BEGIN, startPos); |
|||
|
|||
LOG_BUG (startPos == string::npos, return, "eMailShrinker: bug A8: can't find " << SRC_BEGIN ); |
|||
startPos += SRC_BEGIN.length (); |
|||
const string::size_type endPos (content.find ("\"", startPos)); |
|||
|
|||
LOG_BUG (endPos == string::npos, return, "eMailShrinker: bug A9: can't find end of " << SRC_BEGIN ); |
|||
content.replace (startPos, endPos-startPos, embedded.downloadUrl); |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
ostream& |
|||
kaz::operator << (ostream& os, const Attachment& attachment) { |
|||
string prop, sep; |
|||
if (attachment.toExtract) { prop = "to extract"; sep = ", "; } |
|||
if (attachment.toUpdate) { prop += sep+"need update"; sep = ", "; } |
|||
if (attachment.toDisclaim) { prop += sep+"need diclaim"; sep = ", "; } |
|||
if (attachment.embeddedData.size ()) { prop += sep+"embeddedData"; } |
|||
if (prop.length ()) |
|||
prop = " ["+prop+"]"; |
|||
|
|||
os << setw ((attachment.level % 20)*2) << "" << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType () |
|||
<< prop << (attachment.cid.length () ? " id: "+attachment.cid : "") |
|||
<< (attachment.boundary.length () ? " boundary: "+attachment.boundary : "") |
|||
<< " (" << attachment.beginPos << " / " << attachment.contentPos << " / " << attachment.endPos << ") " << endl; |
|||
for (const EmbeddedData &embedded : attachment.embeddedData) |
|||
os << setw (((attachment.level+1) % 20)*2) << "" << setw (10) << SizeArg (embedded.dataLength) << " embedded [to extract] " << embedded; |
|||
for (const Attachment &sub : attachment.subAttachements) { |
|||
os << sub; |
|||
} |
|||
return os; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,97 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
#include "EmbeddedData.hpp" |
|||
#include "Attachment.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
// ================================================================================
|
|||
static const string EMBEDDED_TAG ("SRC=\"DATA:"); |
|||
|
|||
// ================================================================================
|
|||
EmbeddedData::EmbeddedData (const int &imgIdx, const string &contentType, const string &name, const string::size_type &startData, const string::size_type &dataLength) |
|||
: imgIdx (imgIdx), |
|||
contentType (contentType), |
|||
name (name), |
|||
startData (startData), |
|||
dataLength (dataLength) { |
|||
DEF_LOG ("EmbeddedData::EmbeddedData", "imgIdx: " << imgIdx << " contentType:" << contentType << " name:" << name << " startData:" << startData << " dataLength:" << dataLength); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
EmbeddedData::fillEmbeddedData (const vector<string> &imgs, const streamoff &minAttachSize, vector<EmbeddedData> &data) { |
|||
DEF_LOG ("EmbeddedData::fillEmbeddedData", "imgs.size: " << imgs.size () << " minAttachSize:" << minAttachSize << " data.size:" << data.size ()); |
|||
|
|||
int imgIdx (-1); |
|||
for (const string &img : imgs) { |
|||
++imgIdx; |
|||
if (streamoff (img.length ()) < minAttachSize) |
|||
continue; |
|||
string::size_type startPos (caseInsensitiveFind (img, EMBEDDED_TAG)); |
|||
if (startPos == string::npos) |
|||
continue; |
|||
startPos += EMBEDDED_TAG.length (); |
|||
// XXX check base64 ?
|
|||
string::size_type endPos = img.find_first_of (";,", startPos); |
|||
|
|||
LOG_BUG (endPos == string::npos, continue, "eMailShrinker: bug E1: can't find end of contentType" ); |
|||
const string contentType (img.substr (startPos, endPos-startPos)); |
|||
const string name (Attachment::getUnknown (contentType)); |
|||
startPos = img.find (',', startPos); |
|||
|
|||
LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug E2: can't find start data" ); |
|||
|
|||
++startPos; |
|||
endPos = img.find ('"', startPos); |
|||
data.push_back (EmbeddedData (imgIdx, contentType, name, startPos, endPos-startPos)); |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
ostream& |
|||
kaz::operator << (ostream& os, const EmbeddedData& embeddedData) { |
|||
os << embeddedData.imgIdx << ": " |
|||
<< embeddedData.contentType << " - " << embeddedData.name |
|||
<< " (" << embeddedData.startData << " / " << embeddedData.dataLength << ") " |
|||
<< embeddedData.downloadUrl << " - " << embeddedData.downloadId |
|||
<< endl; |
|||
return os; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,598 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <set> |
|||
#include <fstream> |
|||
#include <iomanip> |
|||
#include <math.h> |
|||
#include <algorithm> |
|||
#include <unistd.h> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
#include "SizeArg.hpp" |
|||
#include "Attachment.hpp" |
|||
#include "MainAttachment.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
static const string KAZ_WEB_SITE = "https://kaz.bzh/"; |
|||
static const string TMPL_DOWNLOAD = "{{DOWNLOAD}}"; |
|||
static const string TMPL_FILENAME = "{{FILENAME}}"; |
|||
static const string CID = "cid:"; |
|||
|
|||
static const string KAZ_PLAIN_HR = "______________________________________________________________________________"; |
|||
static const string KAZ_PLAIN_START = "~~ PJ-KAZ !"; // don't end whith space
|
|||
static const string KAZ_PLAIN_STOP = KAZ_PLAIN_START+" ~~"; |
|||
static const string KAZ_PLAIN_DONT_TOUCH = "(concervez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)"; |
|||
static const string KAZ_PLAIN_WARNING = "Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. Elles seront automatiquement supprimées dans 1 mois. Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. Pour mieux comprendre la politique de nos services visitez kaz.bzh"; |
|||
static const string KAZ_PLAIN_DOWLOAD_ONE = "Vos pièces jointes sont à télécharger individuellement ici :"; |
|||
static const string KAZ_PLAIN_DOWLOAD_OTHER = "(Contenu dans des messages précédents)"; |
|||
static const string KAZ_PLAIN_DOWLOAD_ALL = "Vous pouvez télécharger l'ensemble dans une archive là :"; |
|||
|
|||
static const string HEAD = "<head>"; |
|||
static const string HEAD_END = "</head>"; |
|||
static const string KAZ_CSS_URL = "https://kaz.bzh/m/email.css"; |
|||
static const string KAZ_CSS = "<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>"; |
|||
static const string A_END = "</a>"; |
|||
static const string LI_BEGIN = "<li"; |
|||
static const string CLASS_ONE = "class=\"one\""; |
|||
static const string LI_ONE = LI_BEGIN+" "+CLASS_ONE+">"; |
|||
static const string LI_ALL = LI_BEGIN+" class=\"all\">"; |
|||
static const string LI_END = "</li>"; |
|||
static const string HREF_ONE = "href=\""; |
|||
static const string BODY_END = "</body>"; |
|||
static const string HTML_END = "</html>"; |
|||
|
|||
static const string KAZ_HTML_TAG = "<!--KAZ"; // don't end whith space
|
|||
static const string KAZ_HTML_START = KAZ_HTML_TAG+" START-->"; |
|||
static const string KAZ_HTML_STOP = KAZ_HTML_TAG+" STOP-->"; |
|||
// Textes précédents encodés en SGML
|
|||
static const string KAZ_HTML_DONT_TOUCH = "(concervez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)"; |
|||
static const string KAZ_HTML_DOWLOAD_ONE = "Vos pièces jointes sont à télécharger individuellement ici :"; |
|||
static const string KAZ_HTML_DOWLOAD_OTHER = "(Contenu dans des messages précédents)"; |
|||
static const string KAZ_HTML_DOWLOAD_ALL = "Vous pouvez télécharger l'ensemble dans une archive là :"; |
|||
static const string KAZ_HTML_ARCHIVE = "archive"; |
|||
|
|||
// ================================================================================
|
|||
vector <string> |
|||
Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID}); |
|||
|
|||
// ================================================================================
|
|||
const string MainAttachment::templatePlainAddLink (" - "+TMPL_DOWNLOAD+" "+TMPL_FILENAME+"\n"); |
|||
const string MainAttachment::templatePlainAllLink ("\n"+KAZ_PLAIN_DOWLOAD_ALL+"\n * "+TMPL_DOWNLOAD+"\n"); |
|||
|
|||
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n" |
|||
"<div class=\"kaz\">" |
|||
"<p style=\"font-size:10px; color:#969696;\">"+KAZ_HTML_DONT_TOUCH+"</p>\n" |
|||
"<p>"+KAZ_HTML_DOWLOAD_ONE+"<ul>\n"); |
|||
const string MainAttachment::templateHtmlAddLink (LI_ONE+"<a "+HREF_ONE+TMPL_DOWNLOAD+"\">"+TMPL_FILENAME+"</a>"+LI_END+"\n"); |
|||
const string MainAttachment::templateHtmlOtherLink ("</ul>"+KAZ_HTML_DOWLOAD_OTHER+"<ul>\n"); |
|||
const string MainAttachment::templateHtmlAllLink ("</ul><ul>"+LI_ALL+KAZ_HTML_DOWLOAD_ALL+" <a href=\""+TMPL_DOWNLOAD+"\">"+KAZ_HTML_ARCHIVE+"</a>"+LI_END+"\n"); |
|||
const string MainAttachment::templateHtmlFooter ("</ul></p>\n" |
|||
"<p class=\"msg\"><a class=\"kaz\" href=\""+KAZ_WEB_SITE+"\"> "+KAZ_WEB_SITE+" </a></p></div>\n" |
|||
"<hr><p style=\"font-size:10px; color:#969696;\">"+KAZ_PLAIN_STOP+"</p>"+KAZ_HTML_STOP+"\n"); |
|||
|
|||
const regex MainAttachment::whiteSpaceRegEx ("\\s+"); |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) { |
|||
DEF_LOG ("MainAttachment::copy", "begin: " << begin << " end: " << end); |
|||
mbox.seekg (begin, ios::beg); |
|||
char c; |
|||
for (streamoff pos (begin); pos < end; ++pos) { |
|||
mbox.get (c); |
|||
outbox.put (c); |
|||
} |
|||
outbox.flush (); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::fillUrlId (string &url, string &id) { |
|||
DEF_LOG ("MainAttachment::fillUrlId", ""); |
|||
url = id = ""; |
|||
string urlId; |
|||
getline (cin, urlId); |
|||
LOG ("get URL: " << urlId); |
|||
vector<string> urlIdVect { sregex_token_iterator (urlId.begin(), urlId.end (), whiteSpaceRegEx, -1), {} }; |
|||
if (urlIdVect [0].empty ()) |
|||
return; |
|||
url = urlIdVect [0]; |
|||
if (urlIdVect.size () > 1) |
|||
id = urlIdVect [1]; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::setExtractDir (const bfs::path &extractDir) { |
|||
if (extractDir.empty ()) |
|||
throw invalid_argument ("no tmp dir"); |
|||
this->extractDir = extractDir; |
|||
if (! is_directory (extractDir)) |
|||
bfs::create_directory (extractDir); |
|||
} |
|||
|
|||
void |
|||
MainAttachment::setArchiveDownloadURL (const string &archiveDownloadURL) { |
|||
this->archiveDownloadURL = archiveDownloadURL; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::addLink (string &plain, string &html, const string &url, const string &name) const { |
|||
string plainNewOneLink (templatePlainAddLink); |
|||
replaceAll (plainNewOneLink, TMPL_DOWNLOAD, url); |
|||
replaceAll (plainNewOneLink, TMPL_FILENAME, name); |
|||
plain += plainNewOneLink; |
|||
string htmlNewOneLink (templateHtmlAddLink); |
|||
string codedUrl (url); |
|||
replaceAll (codedUrl, "&", "&"); |
|||
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl); |
|||
replaceAll (htmlNewOneLink, TMPL_FILENAME, name); |
|||
html += htmlNewOneLink; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::getDisclaim (string &plain, string &html) const { |
|||
DEF_LOG ("Attachment::getDisclaim", ""); |
|||
plain = html = ""; |
|||
|
|||
int linkCount (0); |
|||
string allId; |
|||
string plainNewLinks, htmlNewLinks; |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->toExtract) |
|||
continue; |
|||
addLink (plainNewLinks, htmlNewLinks, attachP->downloadUrl, attachP->getAttachName ()); |
|||
++linkCount; |
|||
allId += attachP->downloadId; |
|||
// if (previousLinks [attachP->downloadUrl] != previousLinks.end ())
|
|||
// // impossible puisque le lien est toujours nouveau
|
|||
// previousLinks.erase (attachP->downloadUrl);
|
|||
} |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->embeddedData.size ()) |
|||
continue; |
|||
for (EmbeddedData &embedded : attachP->embeddedData) { |
|||
addLink (plainNewLinks, htmlNewLinks, embedded.downloadUrl, embedded.name); |
|||
++linkCount; |
|||
allId += embedded.downloadId; |
|||
} |
|||
} |
|||
LOG ("allId:" << allId); |
|||
|
|||
string plainOldLinks, htmlOldLinks; |
|||
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) |
|||
addLink (plainOldLinks, htmlOldLinks, it->first, it->second); |
|||
linkCount += previousLinks.size (); |
|||
if (! linkCount) { |
|||
LOG ("no attach"); |
|||
return; |
|||
} |
|||
|
|||
plain = "\n"+KAZ_PLAIN_START+"\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_DONT_TOUCH+"\n\n"+KAZ_PLAIN_WARNING+"\n\n"+KAZ_PLAIN_DOWLOAD_ONE+"\n"+plainNewLinks; |
|||
html = templateHtmlHeader+htmlNewLinks; |
|||
if (previousLinks.size ()) { |
|||
plain += KAZ_PLAIN_DOWLOAD_OTHER+"\n"+plainOldLinks; |
|||
html += templateHtmlOtherLink+htmlOldLinks; |
|||
} |
|||
if (linkCount > 1 && archiveDownloadURL.length ()) { |
|||
string allPlainLinks (templatePlainAllLink); |
|||
replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL+allId); |
|||
plain += allPlainLinks; |
|||
string allLinks (templateHtmlAllLink); |
|||
// allId => & => & done
|
|||
replaceAll (allLinks, TMPL_DOWNLOAD, archiveDownloadURL+allId); |
|||
html += allLinks; |
|||
} |
|||
html += templateHtmlFooter; |
|||
plain += "\n\n"+KAZ_WEB_SITE+"\n"+KAZ_PLAIN_HR+"\n"+KAZ_PLAIN_STOP+"\n"; |
|||
// & => & done
|
|||
LOG ("plain: " << plain); |
|||
LOG ("html: " << html); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::addPrevious (const string &href, const string &name) { |
|||
DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name); |
|||
const string oldVal = previousLinks [href]; |
|||
if (name.empty ()) |
|||
return; |
|||
previousLinks.erase (href); |
|||
previousLinks [href] = name; |
|||
LOG ("inserted: " << href << ": " << previousLinks[href]); |
|||
} |
|||
|
|||
void |
|||
MainAttachment::extractLinks (const string &extractedPlainKAZ) { |
|||
DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ); |
|||
for (string::size_type startPos (0); |
|||
(startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos; |
|||
) { |
|||
streamoff stopPos = startPos; |
|||
while (extractedPlainKAZ [stopPos] && availableURLChars.find (extractedPlainKAZ [stopPos]) != string::npos) |
|||
++stopPos; |
|||
const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos)); |
|||
LOG ("plain href: " << href); |
|||
if (extractedPlainKAZ [stopPos] && extractedPlainKAZ [stopPos] != '\n') |
|||
++stopPos; |
|||
startPos = stopPos; |
|||
// get all href but KAZ_WEB_SITE
|
|||
// the archive link while be skip by filter.sh
|
|||
if (href == KAZ_WEB_SITE) |
|||
continue; |
|||
while (extractedPlainKAZ [stopPos] && extractedPlainKAZ [stopPos] != '\n') |
|||
++stopPos; |
|||
const string name (extractedPlainKAZ.substr (startPos, stopPos-startPos)); |
|||
LOG ("plain name: " << name); |
|||
addPrevious (href, name); |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::extractLinks (const vector<string> &liOne) { |
|||
DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ()); |
|||
for (const string &one : liOne) { |
|||
if (caseInsensitiveFind (one, CLASS_ONE) == string::npos) |
|||
continue; |
|||
string::size_type startPos = caseInsensitiveFind (one, HREF_ONE); |
|||
|
|||
LOG_BUG (startPos == string::npos, continue, "eMailShrinker: bug M1: no href KAZ link. (one: " << one << ")"); |
|||
startPos += HREF_ONE.length (); |
|||
LOG ("startPos: " << startPos); |
|||
string::size_type stopPos = one.find ("\"", startPos); |
|||
|
|||
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M2: no ending href KAZ link. (one: " << one << ")"); |
|||
LOG ("stopPos: " << stopPos); |
|||
string href (one.substr (startPos, stopPos-startPos)); |
|||
LOG ("html href: " << href); |
|||
stopPos = one.find (">", startPos); |
|||
|
|||
LOG_BUG (one [stopPos] != '>', break, "eMailShrinker: bug M3: no ending href KAZ link. (one: " << one << ")"); |
|||
++stopPos; |
|||
startPos = stopPos; |
|||
LOG ("startPos: " << startPos); |
|||
stopPos = caseInsensitiveFind (one, A_END, startPos); |
|||
LOG ("stopPos: " << stopPos); |
|||
|
|||
LOG_BUG (stopPos == string::npos, break, "eMailShrinker: bug M4: no ending anchor KAZ link. (one: " << one << ")"); |
|||
string name (one.substr (startPos, stopPos-startPos)); |
|||
LOG ("html name: " << name); |
|||
addPrevious (href, name); |
|||
} |
|||
} |
|||
|
|||
void |
|||
MainAttachment::extractPreviousKAZ (ifstream &mbox) { |
|||
DEF_LOG ("MainAttachment::extractPreviousKAZ", ""); |
|||
string extractedPlainKAZ, extractedHtmlKAZ; |
|||
for (const Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->toUpdate || isBase64Encoding ()) |
|||
continue; |
|||
string textProp = attachP->getProp (contentTypeToken, textRegEx); |
|||
if (textProp.empty ()) |
|||
continue; |
|||
string content (attachP->getContent (mbox)); |
|||
if (textProp == PLAIN) { |
|||
LOG (PLAIN); |
|||
extractedPlainKAZ += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP); |
|||
} |
|||
if (textProp == HTML) { |
|||
LOG (HTML); |
|||
string section = attachP->getSection (content, KAZ_HTML_START, KAZ_HTML_STOP); |
|||
section += attachP->getSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP); |
|||
// update href from HTML attachments
|
|||
replaceAll (section, "&", "&"); |
|||
extractedHtmlKAZ += section; |
|||
} |
|||
} |
|||
LOG ("extractedPlainKAZ: "<< extractedPlainKAZ); |
|||
extractLinks (extractedPlainKAZ); |
|||
|
|||
LOG ("extractedHtmlKAZ: "<< extractedHtmlKAZ); |
|||
vector<string> liOne; |
|||
getSection (extractedHtmlKAZ, LI_BEGIN, LI_END, liOne); |
|||
extractLinks (liOne); |
|||
|
|||
#ifndef DISABLE_LOG |
|||
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) |
|||
LOG ("oldLink link: " << it->first << " name: " << it->second); |
|||
#endif |
|||
} |
|||
|
|||
void |
|||
MainAttachment::removePreviousArchive () { |
|||
vector<string> toRemove; |
|||
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) |
|||
if (it->first.find ("&l=/") != string::npos) |
|||
toRemove.push_back (it->first); |
|||
for (string old : toRemove) |
|||
previousLinks.erase (old); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
MainAttachment::MainAttachment (ifstream &mbox) |
|||
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()) { |
|||
DEF_LOG ("MainAttachment::MainAttachment", ""); |
|||
string line; |
|||
for (; getline (mbox, line); ) |
|||
tmpPos += line.length () + 1; |
|||
endPos = tmpPos; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox) { |
|||
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize); |
|||
bool plainMarked (false), htmlMarked (false); |
|||
markDisclaim (plainMarked, htmlMarked); |
|||
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::getUpdatedURL (ifstream &mbox) { |
|||
DEF_LOG ("MainAttachment::getUpdatedURL", ""); |
|||
extractPreviousKAZ (mbox); |
|||
for (map <string, string>::iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) |
|||
cout << it->first << endl; |
|||
} |
|||
|
|||
void |
|||
MainAttachment::newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const { |
|||
DEF_LOG ("MainAttachment::newPjEntry", "attachCount: " << attachCount << " contentType: " << contentType << " name: " << name); |
|||
ostringstream dirNameStream; |
|||
dirNameStream << "PJ-" << std::setfill ('0') << std::setw (3) << int (attachCount); |
|||
dirName = dirNameStream.str (); |
|||
bfs::path dirPath (extractDir / dirName); |
|||
|
|||
bfs::create_directory (dirPath); |
|||
bfs::path metaPath (dirPath / "meta"); |
|||
|
|||
ofstream metaOut (metaPath.c_str ()); |
|||
metaOut |
|||
<< "Content-Type: " << contentType << endl |
|||
<< "Name: " << name << endl; |
|||
metaOut.flush (); |
|||
metaOut.close (); |
|||
|
|||
bfs::path filePath (dirPath / "media"); |
|||
mediaName = filePath.c_str (); |
|||
dirName = dirPath.c_str (); |
|||
LOG ("dirName: " << dirName << " mediaName: " << mediaName); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const { |
|||
DEF_LOG ("MainAttachment::extract", "minSize: " << minSize); |
|||
int attachCount (0); |
|||
string dirName, mediaName; |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->toExtract) |
|||
continue; |
|||
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName); |
|||
++attachCount; |
|||
ofstream out (mediaName); |
|||
|
|||
streamoff |
|||
start (attachP->Attachment::contentPos), |
|||
end (attachP->Attachment::endPos+1); // pour assurer le cas sans ^M
|
|||
mbox.seekg (start, ios::beg); |
|||
if (attachP->isBase64Encoding ()) { |
|||
unsigned char buff[4]; |
|||
int idx = 0; |
|||
char c; |
|||
for (streamoff curPos (start); mbox.get (c) && curPos < end; ++curPos) { |
|||
if (c == '=') |
|||
break; |
|||
if (!isBase64 (c)) |
|||
continue; |
|||
buff [idx] = getBase64Val (c); |
|||
if (++idx != 4) |
|||
continue; |
|||
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4); |
|||
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2); |
|||
out.put (buff [2] << 6 | buff [3]); |
|||
idx = 0; |
|||
} |
|||
if (idx) { |
|||
for (int j = idx; j < 4; ++j) |
|||
buff [j] = 0; |
|||
out.put (buff [0] << 2 | (buff [1] & 0x30) >> 4); |
|||
--idx; |
|||
if (idx) |
|||
out.put (buff [1] << 4 | (buff [2] & 0x3c) >> 2); |
|||
} |
|||
} else { |
|||
string line; |
|||
for (streamoff curPos (start); getline (mbox, line); ) { |
|||
curPos += line.length () + 1; |
|||
if (curPos >= end) { |
|||
out << line.substr (0, end + line.length () - curPos) << endl; |
|||
break; |
|||
} |
|||
out << line << endl; |
|||
} |
|||
} |
|||
out.flush (); |
|||
out.close (); |
|||
cout << dirName << endl; |
|||
} |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->embeddedData.size ()) |
|||
continue; |
|||
string content = attachP->getContent (mbox); |
|||
vector<string> imgs; |
|||
getSection (content, IMG_BEGIN, IMG_END, imgs); |
|||
for (const EmbeddedData &embedded : attachP->embeddedData) { |
|||
string &img (imgs[embedded.imgIdx]); |
|||
img.erase (0, embedded.startData); |
|||
img.erase (embedded.dataLength); |
|||
base64Decode (img); |
|||
newPjEntry (attachCount, embedded.contentType, embedded.name, dirName, mediaName); |
|||
++attachCount; |
|||
|
|||
ofstream out (mediaName); |
|||
out.write (img.c_str (), img.size ()); |
|||
out.flush (); |
|||
out.close (); |
|||
cout << dirName << endl; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) { |
|||
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize); |
|||
|
|||
// preparation
|
|||
extractPreviousKAZ (mbox); |
|||
removePreviousArchive (); |
|||
map<const string, const string> translateHtml; |
|||
for (Attachment *attachP : allMarkedPtrs) |
|||
if (attachP->toExtract) { |
|||
fillUrlId (attachP->downloadUrl, attachP->downloadId); |
|||
if (attachP->downloadUrl.empty ()) { |
|||
LOG ("no change"); |
|||
attachP->toExtract = false; |
|||
continue; |
|||
} |
|||
if (attachP->cid.length ()) { |
|||
string tmp (attachP->downloadUrl); |
|||
replaceAll (tmp, "&", "&"); |
|||
translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp)); |
|||
} |
|||
} |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
if (!attachP->embeddedData.size ()) |
|||
continue; |
|||
for (EmbeddedData &embedded : attachP->embeddedData) |
|||
fillUrlId (embedded.downloadUrl, embedded.downloadId); |
|||
} |
|||
string plainDisclaim, htmlDisclaim; |
|||
getDisclaim (plainDisclaim, htmlDisclaim); |
|||
// copy email
|
|||
streamoff curPos = 0; |
|||
for (Attachment *attachP : allMarkedPtrs) { |
|||
copy (mbox, outbox, curPos, attachP->beginInParent); |
|||
|
|||
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos); |
|||
|
|||
if (attachP->toExtract) { |
|||
LOG ("skip Extracted"); |
|||
|
|||
} else if (attachP->toUpdate) { |
|||
string textProp = attachP->getProp (contentTypeToken, textRegEx); |
|||
bool isPlain = textProp == PLAIN; |
|||
bool isHtml = textProp == HTML; |
|||
bool isDisclaimer = attachP->toDisclaim; |
|||
|
|||
LOG_BUG (isPlain && isHtml, /**/, "eMailShrinker: bug M6: plain and html: " << attachP->getContentType ()); |
|||
LOG_BUG (! (isPlain || isHtml), /**/, "eMailShrinker: bug M7: not plain or html: " << attachP->getContentType ()); |
|||
LOG ("toUpdate: isPlain: " << isPlain << " isHtml: " << isHtml << " isDisclaimer: " << isDisclaimer); |
|||
copy (mbox, outbox, attachP->beginInParent, attachP->contentPos); |
|||
|
|||
string content = attachP->getContent (mbox); |
|||
if (isHtml) { |
|||
string::size_type headStart (caseInsensitiveFind (content, HEAD)); |
|||
LOG ("HEAD start: " << headStart); |
|||
if (headStart != string::npos) { |
|||
headStart += HEAD.length (); |
|||
string::size_type headStop (caseInsensitiveFind (content, HEAD_END, headStart)); |
|||
if (headStop != string::npos) { |
|||
// to reduce the scoop of search
|
|||
string oldHead (content.substr (headStart, headStop-headStart)); |
|||
LOG ("HEAD start: " << headStart << " stop: " << headStop << " old: " << oldHead); |
|||
string::size_type oldCssPos (oldHead.find (KAZ_CSS_URL)); |
|||
if (oldCssPos != string::npos) { |
|||
string::size_type oldStart (oldHead.rfind ('<', oldCssPos)); |
|||
string::size_type oldStop (oldHead.find ('>', oldCssPos)); |
|||
if (oldStart != string::npos && oldStop != string::npos) { |
|||
++oldStop; |
|||
if (oldStop < oldHead.length () && oldHead [oldStop] == '\n') |
|||
++oldStop; |
|||
content.erase (headStart+oldStart, oldStop-oldStart); |
|||
} |
|||
} |
|||
content.insert (headStart, "\n"+KAZ_CSS); |
|||
} |
|||
// else XXX pas de /head (if faut en ajouter un (avec <html> ?))
|
|||
} |
|||
removeSection (content, KAZ_HTML_START, KAZ_HTML_STOP); |
|||
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP); |
|||
// XXX case insensitive ??
|
|||
if (content.find (CID) != string::npos) |
|||
replaceAll (content, translateHtml); |
|||
attachP->replaceEmbedded (content); |
|||
} |
|||
if (isPlain) |
|||
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP); |
|||
if (isDisclaimer) { |
|||
if (isHtml) { |
|||
for (string endTag : {BODY_END, HTML_END}) { |
|||
LOG ("try tag: " << endTag); |
|||
string::size_type endTagStart = caseInsensitiveRFind (content, endTag); |
|||
if (endTagStart != string::npos) { |
|||
content = content.substr (0, endTagStart); |
|||
LOG ("remove tag: " << endTag << " content: " << content); |
|||
} |
|||
} |
|||
content += htmlDisclaim+BODY_END+HTML_END; |
|||
LOG ("content: " << content); |
|||
} |
|||
if (isPlain) |
|||
content += plainDisclaim; |
|||
} |
|||
attachP->println (outbox, content); |
|||
} else { |
|||
LOG_BUG (true, continue, "eMailShrinker: bug M8: can't change" << *attachP); |
|||
} |
|||
outbox.flush (); |
|||
curPos = attachP->endPos; |
|||
} |
|||
copy (mbox, outbox, curPos, endPos); |
|||
outbox.close (); |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,101 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include <regex> |
|||
|
|||
#include <boost/format.hpp> |
|||
#include <boost/lexical_cast.hpp> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "SizeArg.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
// ================================================================================
|
|||
SizeArg::SizeArg (const size_t &bytes) |
|||
: bytes (bytes) { |
|||
} |
|||
|
|||
SizeArg::SizeArg (const string &option) |
|||
: bytes (0) { |
|||
init (option); |
|||
} |
|||
|
|||
void |
|||
SizeArg::init (const string &token) { |
|||
DEF_LOG ("SizeArg::init", "token: " << token); |
|||
static const string prefix ("KMGTPEZY"); |
|||
static const regex formatRegEx ("([0-9]+) *([k"+prefix+"]?)(i?)"); |
|||
|
|||
if (!regex_match (token.begin (), token.end (), formatRegEx)) |
|||
throw invalid_argument ("Bad size"); |
|||
bytes = boost::lexical_cast<uint64_t> (regex_replace (token, formatRegEx, "$1")); |
|||
const string v2 (regex_replace (token, formatRegEx, "$2")); |
|||
size_t index = prefix.find (v2); |
|||
if (v2.length ()) { |
|||
if (index == string::npos) |
|||
index = 0; // "k" case
|
|||
++index; |
|||
} |
|||
bytes *= pow (regex_replace (token, formatRegEx, "$3").empty () ? 1000 : 1024, index); |
|||
LOG ("token:" << token << " index:" << index << " v2:<" << v2 << ">" << " b:" << bytes); |
|||
} |
|||
|
|||
|
|||
// ================================================================================
|
|||
ostream & |
|||
kaz::operator << (ostream &out, const SizeArg &sizeArg) { |
|||
static string sizes [] = {"", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"}; |
|||
|
|||
if (!sizeArg.bytes) |
|||
return out << "0 byte"; |
|||
int nbBytes = (int) floor (log (sizeArg.bytes) / log (1024)); |
|||
double val ((sizeArg.bytes / pow (1024, nbBytes))); |
|||
return out << boost::str (boost::format(nbBytes ? "%.2f " : val == 1 ? "%.0f byte" : + "%.0f bytes") % val) + sizes [nbBytes]; |
|||
} |
|||
|
|||
istream & |
|||
kaz::operator >> (istream &in, SizeArg &sizeArg) { |
|||
string token; |
|||
in >> token; |
|||
try { |
|||
sizeArg.init (token); |
|||
} catch (...) { |
|||
in.setstate (ios_base::failbit); |
|||
} |
|||
return in; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,232 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#define LAST_VERSION "eMailShrinker 1.3 2021-04-04" |
|||
|
|||
#include <iostream> |
|||
#include <fstream> |
|||
#include <string> |
|||
#include <chrono> |
|||
#include <boost/program_options.hpp> |
|||
#include <boost/filesystem.hpp> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
#include "SizeArg.hpp" |
|||
#include "MainAttachment.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace boost; |
|||
using namespace boost::program_options; |
|||
using namespace kaz; |
|||
|
|||
// ================================================================================
|
|||
static options_description mainDescription ("Main options", getCols ()); |
|||
static options_description hide ("Hidded options", getCols ()); |
|||
static char *prog = NULL; |
|||
|
|||
// ================================================================================
|
|||
void |
|||
usage (const string &msg = "", const bool &hidden = false) { |
|||
if (!msg.empty ()) { |
|||
cout << msg << endl; |
|||
exit (1); |
|||
} |
|||
cout << endl |
|||
<< "Usage: " << endl |
|||
<< " A) " << prog << " -u mbox > url-list" << endl |
|||
<< " B) " << prog << " [-s size] [-d dirName}] mbox > file-list" << endl |
|||
<< " C) " << prog << " [-s size] [-a url] mbox altered-mbox < url-list" << endl |
|||
<< endl << " filter attachments" << endl << endl |
|||
<< " A: list previous embded url need to be refresh (no added option)" << endl |
|||
<< " => downloadURL list" << endl |
|||
<< " B: attachment extraction (options : s, d)" << endl |
|||
<< " => list of (filename)" << endl |
|||
<< " C: attachment replace with url (options : s) " << endl |
|||
<< " <= list of (downloadURL [id])" << endl |
|||
<< endl << mainDescription |
|||
<< endl; |
|||
if (hidden) |
|||
cout << hide << endl; |
|||
exit (0); |
|||
} |
|||
|
|||
void |
|||
version () { |
|||
cout << LAST_VERSION << " KAZ team production (https://kaz.bzh/)" << endl; |
|||
exit (0); |
|||
} |
|||
|
|||
static auto startPrg = std::chrono::high_resolution_clock::now (); |
|||
void |
|||
showTime (string msg) { |
|||
using namespace std::chrono; |
|||
static auto stopPrg = high_resolution_clock::now (); |
|||
|
|||
cerr << msg << " done in " << ns2string (duration_cast<duration<double> > (stopPrg-startPrg).count ()) << endl; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
static const string inputFile = "input-file"; |
|||
static const char *const inputFileC = inputFile.c_str (); |
|||
|
|||
int |
|||
main (int argc, char** argv) { |
|||
// XXX debug before parse options
|
|||
// Log::debug = true;
|
|||
DEF_LOG ("main:", ""); |
|||
prog = argv [0]; |
|||
bool |
|||
debugFlag (false), |
|||
helpFlag (false), |
|||
versionFlag (false), |
|||
updateListFlag (false), |
|||
useTheForceLuke (false), |
|||
listFlag (false); |
|||
string inputName, outputName, archiveDownloadURL; |
|||
bfs::path extractDir (bfs::temp_directory_path ()); |
|||
SizeArg minAttachSize ("48 Ki"); |
|||
|
|||
try { |
|||
mainDescription.add_options () |
|||
("help,h", bool_switch (&helpFlag), "produce this help message") |
|||
("version,v", bool_switch (&versionFlag), "display version information") |
|||
("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration") |
|||
("updateList,u", bool_switch (&updateListFlag), "list URL need refresh") |
|||
("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction") |
|||
("archiveDownloadURL,a", value<string> (&archiveDownloadURL)->default_value (archiveDownloadURL), "set url root web site to get bundle (like https://file.kaz.bzh/t.php?)") |
|||
; |
|||
|
|||
hide.add_options () |
|||
("useTheForceLuke", bool_switch (&useTheForceLuke), "display hidded options") |
|||
("list,l", bool_switch (&listFlag), "get attachment list") |
|||
("debug,g", bool_switch (&debugFlag), "debug mode") |
|||
; |
|||
|
|||
options_description cmd ("All options"); |
|||
cmd.add (mainDescription).add (hide).add_options () |
|||
(inputFileC, value<vector<string> > (), "input") |
|||
; |
|||
positional_options_description p; |
|||
p.add (inputFileC, -1); |
|||
variables_map vm; |
|||
basic_parsed_options<char> parsed = command_line_parser (argc, argv).options (cmd).positional (p).run (); |
|||
store (parsed, vm); |
|||
notify (vm); |
|||
|
|||
if (debugFlag) { |
|||
#ifdef DISABLE_LOG |
|||
cerr << "No debug option available (was compiled with -DDISABLE_LOG)" << endl; |
|||
#endif |
|||
} |
|||
Log::debug = debugFlag; |
|||
|
|||
if (useTheForceLuke) |
|||
usage ("", true); |
|||
if (versionFlag) |
|||
version (); |
|||
if (helpFlag) |
|||
usage (); |
|||
|
|||
if (vm.count (inputFileC)) { |
|||
vector<string> var = vm[inputFileC].as<vector<string> > (); |
|||
int nbArgs = vm[inputFileC].as<vector<string> > ().size (); |
|||
if (!nbArgs) |
|||
usage ("No input file(s)"); |
|||
inputName = var [0]; |
|||
if (nbArgs > 1) |
|||
outputName = var [1]; |
|||
if (nbArgs > 2) |
|||
usage ("Too much arguments"); |
|||
} |
|||
} catch (std::exception &e) { |
|||
cerr << "error: " << e.what() << endl; |
|||
usage (); |
|||
return 1; |
|||
} catch (...) { |
|||
cerr << "Exception of unknown type!" << endl; |
|||
return 1; |
|||
} |
|||
|
|||
LOG ("minAttachSize: " << minAttachSize); |
|||
|
|||
if (inputName.empty ()) |
|||
usage ("no input file"); |
|||
|
|||
// input mbox file
|
|||
ifstream mbox (inputName); |
|||
MainAttachment attachment (mbox); |
|||
mbox.close (); |
|||
|
|||
if (attachment.getBoundary ().empty ()) { |
|||
cerr << "no attachment" << endl; |
|||
return 1; |
|||
} |
|||
// parse structure
|
|||
mbox.open (inputName); |
|||
attachment.markSignificant (minAttachSize, mbox); |
|||
mbox.close (); |
|||
|
|||
if (listFlag) |
|||
// debug
|
|||
cerr << attachment; |
|||
|
|||
if (updateListFlag) { |
|||
// update
|
|||
mbox.open (inputName); |
|||
attachment.getUpdatedURL (mbox); |
|||
showTime ("Find old links"); |
|||
return 0; |
|||
} |
|||
|
|||
if (outputName.empty ()) { |
|||
// extract
|
|||
attachment.setExtractDir (extractDir); |
|||
mbox.open (inputName); |
|||
attachment.extract (mbox, minAttachSize); |
|||
showTime ("Extraction"); |
|||
return 0; |
|||
} |
|||
|
|||
// substitute
|
|||
if (archiveDownloadURL.length ()) |
|||
attachment.setArchiveDownloadURL (archiveDownloadURL); |
|||
mbox.open (inputName); |
|||
ofstream outbox (outputName); |
|||
attachment.substitute (mbox, outbox, minAttachSize); |
|||
showTime ("Substitution"); |
|||
return 0; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,293 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#define LAST_VERSION "1.0 2021-02-21 jirafeauAPI" |
|||
|
|||
#include <iostream> |
|||
#include <string> |
|||
#include <curl/curl.h> |
|||
#include <chrono> |
|||
#include <boost/program_options.hpp> |
|||
#include <boost/filesystem.hpp> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
#include "SizeArg.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace boost; |
|||
using namespace boost::program_options; |
|||
using namespace kaz; |
|||
|
|||
namespace bfs = boost::filesystem; |
|||
|
|||
// ================================================================================
|
|||
static options_description mainDescription ("Main options", getCols ()); |
|||
static options_description hide ("Hidded options", getCols ()); |
|||
static char *prog = NULL; |
|||
|
|||
// ================================================================================
|
|||
void |
|||
usage (const string &msg = "", const bool &hidden = false) { |
|||
if (!msg.empty ()) { |
|||
cout << msg << endl; |
|||
exit (1); |
|||
} |
|||
cout << endl |
|||
<< "Usage: " << endl |
|||
<< " A) " << prog << " [-s size] [-t period] [-c content-type] [-n attachName] [-f server] send file [password] > url,delCode" << endl |
|||
<< " B) " << prog << " [-t period] [-f server] update ref > dealine" << endl |
|||
<< endl << " store ficle" << endl << endl |
|||
<< " A: send file (options : s, t)" << endl |
|||
<< " B: update deadline (options : t) " << endl |
|||
<< endl << mainDescription |
|||
<< endl; |
|||
if (hidden) |
|||
cout << hide << endl; |
|||
exit (0); |
|||
} |
|||
|
|||
void |
|||
version () { |
|||
cout << LAST_VERSION << " KAZ team production (https://kaz.bzh/)" << endl; |
|||
exit (0); |
|||
} |
|||
|
|||
static auto startPrg = std::chrono::high_resolution_clock::now (); |
|||
void |
|||
showTime (string msg) { |
|||
using namespace std::chrono; |
|||
static auto stopPrg = high_resolution_clock::now (); |
|||
|
|||
cerr << msg << " done in " << ns2string (duration_cast<duration<double> > (stopPrg-startPrg).count ()) << endl; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
static size_t |
|||
WriteCallback (void *contents, size_t size, size_t nmemb, void *userp) { |
|||
((std::string*) userp)->append ((char*) contents, size * nmemb); |
|||
return size * nmemb; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
static const string inputFile = "input-file"; |
|||
static const char *const inputFileC = inputFile.c_str (); |
|||
|
|||
int |
|||
main (int argc, char** argv) { |
|||
// XXX debug before parse options
|
|||
// Log::debug = true;
|
|||
DEF_LOG ("main:", ""); |
|||
prog = argv [0]; |
|||
bool |
|||
debugFlag (false), |
|||
helpFlag (false), |
|||
versionFlag (false), |
|||
useTheForceLuke (false); |
|||
enum JirCmd { SEND, UPDATE } jirCmd; |
|||
string |
|||
inputFileName, |
|||
password, |
|||
contentType, |
|||
attachName, |
|||
urlBase ("http://file.kaz.bzh"), |
|||
apiPage ("/script.php"), |
|||
downloadPage ("/f.php"), |
|||
minimumAvailability ("month"), |
|||
proxy; |
|||
|
|||
SizeArg maxUploadSize ("100 Mi"); |
|||
|
|||
try { |
|||
mainDescription.add_options () |
|||
("help,h", bool_switch (&helpFlag), "produce this help message") |
|||
("version,v", bool_switch (&versionFlag), "display version information") |
|||
("contentType,c", value<string> (&contentType)->default_value (contentType), "content-type of the sended file") |
|||
("attachName,n", value<string> (&attachName)->default_value (attachName), "force attachment name") |
|||
("minimumAvailability,t", value<string> (&minimumAvailability)->default_value (minimumAvailability), "minimum period of available download") |
|||
("maxUploadSize,s", value<SizeArg> (&maxUploadSize)->default_value (maxUploadSize), "maximum upload size") |
|||
("file server registery,f", value<string> (&urlBase)->default_value (urlBase), "server where file are temporary stored") |
|||
; |
|||
|
|||
hide.add_options () |
|||
("useTheForceLuke", bool_switch (&useTheForceLuke), "display hidded options") |
|||
("debug,g", bool_switch (&debugFlag), "debug mode") |
|||
("proxy,p", value<string> (&proxy)->default_value (proxy), "set proxy (proxy-host.org:8080)") |
|||
("uploadPage,u", value<string> (&apiPage)->default_value (apiPage), "upload page") |
|||
("downloadPage,d", value<string> (&downloadPage)->default_value (downloadPage), "download page") |
|||
; |
|||
|
|||
options_description cmd ("All options"); |
|||
cmd.add (mainDescription).add (hide).add_options () |
|||
(inputFileC, value<vector<string> > (), "input") |
|||
; |
|||
positional_options_description p; |
|||
p.add (inputFileC, -1); |
|||
variables_map vm; |
|||
basic_parsed_options<char> parsed = command_line_parser (argc, argv).options (cmd).positional (p).run (); |
|||
store (parsed, vm); |
|||
notify (vm); |
|||
|
|||
if (debugFlag) { |
|||
#ifdef DISABLE_LOG |
|||
cerr << "No debug option available (was compiled with -DDISABLE_LOG)" << endl; |
|||
#endif |
|||
} |
|||
Log::debug = debugFlag; |
|||
|
|||
if (useTheForceLuke) |
|||
usage ("", true); |
|||
if (versionFlag) |
|||
version (); |
|||
if (helpFlag) |
|||
usage (); |
|||
|
|||
if (vm.count (inputFileC)) { |
|||
vector<string> var = vm[inputFileC].as<vector<string> > (); |
|||
int nbArgs = vm[inputFileC].as<vector<string> > ().size (); |
|||
if (!nbArgs) |
|||
usage ("No command"); |
|||
if (var [0].compare ("send") == 0) |
|||
jirCmd = SEND; |
|||
else if (var [0].compare ("update") == 0) |
|||
jirCmd = UPDATE; |
|||
else |
|||
usage ("Unknown command ("+var [0]+")"); |
|||
if (nbArgs < 2) |
|||
usage ("no input file"); |
|||
inputFileName = var [1]; |
|||
if (nbArgs == 3) |
|||
password = var [2]; |
|||
if (nbArgs > 3) |
|||
usage ("Too much arguments"); |
|||
} |
|||
} catch (std::exception &e) { |
|||
cerr << "error: " << e.what() << endl; |
|||
usage (); |
|||
return 1; |
|||
} catch (...) { |
|||
cerr << "Exception of unknown type!" << endl; |
|||
return 1; |
|||
} |
|||
|
|||
if (inputFileName.empty ()) |
|||
usage ("no input"); |
|||
|
|||
CURL *easyhandle = curl_easy_init (); |
|||
if (! easyhandle) { |
|||
cerr << "no curl" << endl; |
|||
return 1; |
|||
} |
|||
|
|||
string readBuffer; |
|||
if (proxy.length ()) |
|||
curl_easy_setopt(easyhandle, CURLOPT_PROXY, proxy.c_str ()); |
|||
curl_easy_setopt (easyhandle, CURLOPT_WRITEFUNCTION, WriteCallback); |
|||
curl_easy_setopt (easyhandle, CURLOPT_WRITEDATA, &readBuffer); |
|||
curl_mime *multipart = curl_mime_init (easyhandle); |
|||
curl_mimepart *part = nullptr; |
|||
|
|||
switch (jirCmd) { |
|||
case SEND: { |
|||
LOG ("SEND: " << (urlBase+apiPage)); |
|||
curl_easy_setopt (easyhandle, CURLOPT_URL, (urlBase+apiPage).c_str ()); |
|||
|
|||
LOG ("maxUploadSize: " << maxUploadSize); |
|||
long uploadsize = (size_t) maxUploadSize; |
|||
curl_easy_setopt (easyhandle, CURLOPT_INFILESIZE, uploadsize); |
|||
|
|||
LOG ("time: " << minimumAvailability); |
|||
part = curl_mime_addpart (multipart); |
|||
curl_mime_name (part, "time"); |
|||
curl_mime_data (part, minimumAvailability.c_str (), CURL_ZERO_TERMINATED); |
|||
|
|||
if (password.size ()) { |
|||
LOG ("key: " << password); |
|||
part = curl_mime_addpart (multipart); |
|||
curl_mime_name (part, "key"); |
|||
curl_mime_data (part, password.c_str (), CURL_ZERO_TERMINATED); |
|||
} |
|||
|
|||
LOG ("inputFileName: " << bfs::path (inputFileName).filename ()); |
|||
part = curl_mime_addpart (multipart); |
|||
curl_mime_name (part, "file"); |
|||
if (contentType.length ()) { |
|||
LOG ("contentType: " << contentType); |
|||
curl_mime_type (part, contentType.c_str ()); |
|||
} |
|||
if (attachName.empty ()) { |
|||
attachName = bfs::path (inputFileName).filename ().c_str (); |
|||
LOG ("attachName: " << attachName); |
|||
} |
|||
curl_mime_filename (part, attachName.c_str ()); |
|||
FILE *fp = fopen (inputFileName.c_str (), "r"); |
|||
fseek (fp, 0L, SEEK_END); |
|||
long int fsize (ftell (fp)); |
|||
fseek (fp, 0L, SEEK_SET); |
|||
curl_mime_data_cb (part, fsize, |
|||
(curl_read_callback) fread, |
|||
(curl_seek_callback) fseek, |
|||
NULL, //(curl_seek_callback) fclose,
|
|||
fp); |
|||
} |
|||
break; |
|||
|
|||
case UPDATE: { |
|||
LOG ("UPDATE: " << (urlBase+downloadPage)); |
|||
curl_easy_setopt (easyhandle, CURLOPT_URL, (urlBase+downloadPage).c_str ()); |
|||
|
|||
LOG ("h: " << inputFileName); |
|||
part = curl_mime_addpart (multipart); |
|||
curl_mime_name (part, "h"); |
|||
curl_mime_data (part, inputFileName.c_str (), CURL_ZERO_TERMINATED); |
|||
|
|||
LOG ("u: " << minimumAvailability); |
|||
part = curl_mime_addpart (multipart); |
|||
curl_mime_name (part, "u"); |
|||
curl_mime_data (part, minimumAvailability.c_str (), CURL_ZERO_TERMINATED); |
|||
} |
|||
break; |
|||
} |
|||
|
|||
curl_easy_setopt (easyhandle, CURLOPT_MIMEPOST, multipart); |
|||
curl_easy_perform (easyhandle); |
|||
curl_easy_cleanup (easyhandle); |
|||
cout << readBuffer << endl; |
|||
|
|||
showTime ("Upload"); |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,86 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include <iomanip> |
|||
#include <sstream> |
|||
#include <boost/date_time/posix_time/posix_time.hpp> |
|||
#include <boost/chrono.hpp> |
|||
|
|||
#include "kazDebug.hpp" |
|||
|
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
bool |
|||
Log::debug = false; |
|||
|
|||
size_t |
|||
Log::indent = 0; |
|||
|
|||
// ================================================================================
|
|||
string |
|||
Log::getLocalTimeStr () { |
|||
using namespace boost::posix_time; |
|||
using namespace std; |
|||
ptime now = second_clock::second_clock::local_time (); |
|||
stringstream ss; |
|||
auto date = now.date (); |
|||
auto time = now.time_of_day (); |
|||
ss << setfill ('0') << "[" |
|||
<< setw (2) << static_cast<int> (date.month ()) << "/" << setw (2) << date.day () |
|||
<< "] " << setw (2) |
|||
<< time.hours () << ":" << setw (2) << time.minutes (); |
|||
return ss.str(); |
|||
} |
|||
|
|||
Log::Log (const string &functName) |
|||
: functName (functName) { |
|||
++indent; |
|||
if (debug) |
|||
cerr << *this << "> "; |
|||
} |
|||
|
|||
Log::~Log () { |
|||
if (debug) |
|||
cerr << *this << "<" << endl << flush; |
|||
--indent; |
|||
} |
|||
|
|||
ostream & |
|||
kaz::operator << (ostream &out, const Log &log) { |
|||
return out << Log::getLocalTimeStr () << setw (3) << setw ((log.indent % 20)*2) << "" << log.functName; |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,437 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#include <iostream> |
|||
#include <sys/ioctl.h> |
|||
#include <algorithm> |
|||
#include <chrono> |
|||
#include <sstream> |
|||
#include <iomanip> |
|||
|
|||
#include "kazDebug.hpp" |
|||
#include "kazMisc.hpp" |
|||
|
|||
using namespace std; |
|||
using namespace kaz; |
|||
|
|||
//template void kaz::quotedDecoded<'='> (string &content);
|
|||
//template void kaz::quotedDecoded<'%'> (string &content);
|
|||
|
|||
static const string::size_type MAX_QUOTED_PRINTABLE_SIZE (78); |
|||
|
|||
const char *const kaz::base64Chars = |
|||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
|||
"abcdefghijklmnopqrstuvwxyz" |
|||
"0123456789" |
|||
"+/"; |
|||
|
|||
const string kaz::availableURLChars = |
|||
"!#$%&'()*+,-./" |
|||
"0123456789" |
|||
":;=?" |
|||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
|||
"[]_" |
|||
"abcdefghijklmnopqrstuvwxyz" |
|||
"~"; |
|||
|
|||
|
|||
// ================================================================================
|
|||
uint16_t |
|||
kaz::getCols () { |
|||
struct winsize w; |
|||
ioctl (0, TIOCGWINSZ, &w); |
|||
return w.ws_col; |
|||
} |
|||
|
|||
// ================================================================================
|
|||
string |
|||
kaz::ns2string (const double &delta) { |
|||
using namespace std::chrono; |
|||
|
|||
ostringstream oss; |
|||
duration<double> ns (delta); |
|||
oss.fill ('0'); |
|||
// typedef duration<int, ratio<86400> > days;
|
|||
// auto d = duration_cast<days>(ns);
|
|||
// ns -= d;
|
|||
auto h = duration_cast<hours> (ns); |
|||
ns -= h; |
|||
auto m = duration_cast<minutes> (ns); |
|||
ns -= m; |
|||
oss << setw (2) << h.count () << ":" |
|||
<< setw (2) << m.count () << ":" |
|||
<< setw (9) << fixed << setprecision (6) << ns.count (); |
|||
return oss.str (); |
|||
} |
|||
// ================================================================================
|
|||
void |
|||
kaz::replaceAll (string& str, const string &from, const string &to) { |
|||
DEF_LOG ("kaz::replaceAll", "form: " << from << " to: " << to); |
|||
if (str.empty () || from.empty ()) |
|||
return; |
|||
for (string::size_type startPos (0); |
|||
(startPos = str.find (from, startPos)) != string::npos; |
|||
startPos += to.length ()) |
|||
str.replace (startPos, from.size (), to); |
|||
} |
|||
|
|||
void |
|||
kaz::replaceAll (string& str, const map<const string, const string> &subst) { |
|||
DEF_LOG ("kaz::replaceAll", "str: " << str); |
|||
for (map<const string, const string>::const_iterator it = subst.begin (); it != subst.end (); ++it) |
|||
replaceAll (str, it->first, it->second); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::toLower (string &content) { |
|||
DEF_LOG ("kaz::toLower", "content: " << content); |
|||
static locale loc; |
|||
for (string::size_type i = 0; i < content.length (); ++i) |
|||
content [i] = tolower (content[i], loc); |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
const string & |
|||
kaz::toUpperIfNeed (const string &src, string &tmp) { |
|||
DEF_LOG ("kaz::toUpperIfNeed", "src: " << src); |
|||
for (string::const_iterator it = src.begin (); it != src.end (); ++it) |
|||
if (*it != toupper (*it)) { |
|||
tmp.reserve (); |
|||
for (it = src.begin (); it != src.end (); ++it) |
|||
tmp.push_back (toupper (*it)); |
|||
return tmp; |
|||
} |
|||
return src; |
|||
} |
|||
|
|||
inline bool |
|||
caseInsensitiveCharCompare (char a, char b) { |
|||
return (toupper (a) == b); |
|||
} |
|||
|
|||
string::size_type |
|||
kaz::caseInsensitiveFind (const string& s, const string& pattern, const string::size_type &pos) { |
|||
DEF_LOG ("kaz::caseInsensitiveFind", "pattern: " << pattern << " pos: " << pos << " s: " << s); |
|||
string tmp; |
|||
const string &upperPattern (toUpperIfNeed (pattern, tmp)); |
|||
LOG ("pattern: " << upperPattern); |
|||
string::const_iterator it (search (s.begin ()+pos, s.end (), upperPattern.begin (), upperPattern.end (), caseInsensitiveCharCompare)); |
|||
if (it == s.end ()) |
|||
return string::npos; |
|||
LOG ("find: " << (it - s.begin ())); |
|||
return it - s.begin (); |
|||
} |
|||
|
|||
string::size_type |
|||
kaz::caseInsensitiveRFind (const string& s, const string& pattern, const string::size_type &pos) { |
|||
DEF_LOG ("kaz::caseInsensitiveRFind", "pattern: " << pattern << " pos: " << pos << " s: " << s); |
|||
string tmp; |
|||
const string &upperPattern (toUpperIfNeed (pattern, tmp)); |
|||
LOG ("pattern: " << upperPattern); |
|||
string::const_reverse_iterator it (search (s.rbegin (), s.rend ()+pos, upperPattern.rbegin (), upperPattern.rend (), caseInsensitiveCharCompare)); |
|||
if (it == s.rend ()) |
|||
return string::npos; |
|||
LOG ("find: " << (s.rend () - it - pattern.length ())); |
|||
return s.rend () - it - pattern.length (); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
template<char delim> |
|||
void |
|||
kaz::quotedDecode (string &content) { |
|||
DEF_LOG ("kaz::quotedDecode", "delim: " << delim << " content: " << content); |
|||
string::size_type len (content.length ()); |
|||
if (!len) |
|||
return; |
|||
LOG ("len: " << len); |
|||
string::iterator p (content.begin ()), q (p); |
|||
for ( ; |
|||
p < content.end (); |
|||
++p, ++q) { |
|||
if (*p != delim) { |
|||
*q = *p; |
|||
continue; |
|||
} |
|||
if (p+1 < content.end () && *(p+1) == '\n') { |
|||
|
|||
LOG_BUG (q == content.begin (), ++p;continue, "kazMisc::quotedDecode bug: bad quoted-printable format. (start with '=', content: " << content << ")"); |
|||
++p; |
|||
--q; |
|||
continue; |
|||
} |
|||
|
|||
LOG_BUG (p+3 > content.end () || !isxdigit (p[1]) || !isxdigit (p[2]), return, "kazMisc::quotedDecode bug: bad quoted-printable format. (content: " << content << ")"); |
|||
*q = (char) ((getHexaVal (p[1]) << 4) + getHexaVal (p[2])); |
|||
p += 2; |
|||
} |
|||
content.resize (q-content.begin ()); |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::quotedEncode (string &content) { |
|||
DEF_LOG ("kaz::quotedDecode", "content: " << content); |
|||
string::size_type nbQuoted (0); |
|||
for (string::const_iterator it = content.begin (); it != content.end (); ++it) |
|||
if (isQuotedPrintable (*it)) |
|||
++nbQuoted; |
|||
if (!nbQuoted) |
|||
return; |
|||
string::size_type estimate (content.length ()+nbQuoted*3); |
|||
estimate += (estimate/MAX_QUOTED_PRINTABLE_SIZE)*2; |
|||
string result; |
|||
result.reserve (estimate); |
|||
string::size_type cols (0); |
|||
char upper, lower; |
|||
for (string::const_iterator it = content.begin (); it != content.end (); ++it) { |
|||
const char &c (*it); |
|||
if (c == '\n') { |
|||
result.push_back ('\n'); |
|||
cols = 0; |
|||
continue; |
|||
} |
|||
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) { |
|||
result.push_back ('='); |
|||
result.push_back ('\n'); |
|||
cols = 0; |
|||
} |
|||
if (!isQuotedPrintable (c) || |
|||
((c == ' ' || c =='\t') && (it+1 == content.end () || *(it+1) == '\n'))) { |
|||
if (cols > MAX_QUOTED_PRINTABLE_SIZE-3) { |
|||
result.push_back ('='); |
|||
result.push_back ('\n'); |
|||
cols = 0; |
|||
} |
|||
getHexa (c, upper, lower); |
|||
result.push_back ('='); |
|||
result.push_back (upper); |
|||
result.push_back (lower); |
|||
cols += 3; |
|||
continue; |
|||
} |
|||
result.push_back (c); |
|||
++cols; |
|||
} |
|||
content.swap (result); |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::base64Decode (string &content) { |
|||
DEF_LOG ("kaz::base64Decode", "content: " << content); |
|||
string::size_type len (content.length ()); |
|||
if (!len) |
|||
return; |
|||
LOG ("len: " << len); |
|||
unsigned char buff[4]; |
|||
int idx = 0; |
|||
string::iterator p (content.begin ()), q (p); |
|||
for (; |
|||
p < content.end (); |
|||
++p) { |
|||
char c = *p; |
|||
if (c == '=') |
|||
break; |
|||
if (c == '\n') |
|||
continue; |
|||
|
|||
LOG_BUG (!isBase64 (c), return, "kazMisc::base64Decode bug: bad base64 format. (content: " << content << ")"); |
|||
buff [idx] = getBase64Val (c); |
|||
if (++idx != 4) |
|||
continue; |
|||
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4; |
|||
*++q = buff [1] << 4 | (buff [2] & 0x3c) >> 2; |
|||
*++q = buff [2] << 6 | buff [3]; |
|||
++q; |
|||
idx = 0; |
|||
} |
|||
if (idx) { |
|||
for (int j = idx; j < 4; ++j) |
|||
buff [j] = 0; |
|||
*q = buff [0] << 2 | (buff [1] & 0x30) >> 4; |
|||
++q; |
|||
--idx; |
|||
if (idx) { |
|||
*q = buff [1] << 4 | (buff [2] & 0x3c) >> 2; |
|||
++q; |
|||
} |
|||
} |
|||
content.resize (q-content.begin ()); |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::base64Encode (string &content) { |
|||
DEF_LOG ("kaz::base64Encode", "content: " << content); |
|||
string::size_type length (content.length ()); |
|||
std::string result; |
|||
result.reserve ((length + 2) / 3 * 4 + length / MAX_QUOTED_PRINTABLE_SIZE + 1); |
|||
for (string::size_type pos (0), cols (0); pos < length; ) { |
|||
result.push_back (base64Chars [(content [pos + 0] & 0xfc) >> 2]); |
|||
if (pos == length-1) { |
|||
result.push_back (base64Chars [(content [pos + 0] & 0x03) << 4]); |
|||
result.push_back ('='); |
|||
result.push_back ('='); |
|||
break; |
|||
} |
|||
result.push_back (base64Chars [((content [pos + 0] & 0x03) << 4) + |
|||
((content [pos + 1] & 0xF0) >> 4)]); |
|||
if (pos == length-2) { |
|||
result.push_back (base64Chars [(content [pos + 1] & 0x0F) << 2]); |
|||
result.push_back ('='); |
|||
break; |
|||
} |
|||
result.push_back (base64Chars [((content [pos + 1] & 0x0F) << 2) + |
|||
((content [pos + 2] & 0xC0) >> 6)]); |
|||
result.push_back (base64Chars [content [pos + 2] & 0x3F]); |
|||
pos += 3; |
|||
cols += 4; |
|||
if (cols >= MAX_QUOTED_PRINTABLE_SIZE) { |
|||
result.push_back ('\n'); |
|||
cols = 0; |
|||
} |
|||
} |
|||
content = result; |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::iso2utf (string &content) { |
|||
DEF_LOG ("kaz::iso2utf", "content: " << content); |
|||
string::size_type len (content.length ()); |
|||
if (!len) |
|||
return; |
|||
LOG ("len: " << len); |
|||
string::size_type charCount (0); |
|||
for (string::iterator it = content.begin (); it != content.end (); ++it) |
|||
if ((uint8_t) *it >= 0x80) |
|||
++charCount; |
|||
if (!charCount) |
|||
return; |
|||
LOG ("charCount: " << charCount); |
|||
content.resize (len+charCount); |
|||
string::iterator p (content.end ()-1), q (p+charCount); |
|||
for ( ; ; --p, --q) { |
|||
uint8_t ch = *p; |
|||
if (ch < 0x80) |
|||
*q = ch; |
|||
else { |
|||
*q = 0x80 | (ch & 0x3F); |
|||
*--q = 0xc0 | ch >> 6; |
|||
LOG ("ch: " << (char) ch); |
|||
} |
|||
if (p == q) |
|||
break; |
|||
} |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::encodedWord (string &content) { |
|||
// rfc2047
|
|||
DEF_LOG ("kaz::extendedWord", "content: " << content); |
|||
string::size_type charsetPos = content.find ("=?"); |
|||
if (charsetPos == string::npos) |
|||
return; |
|||
LOG ("charsetPos: " << charsetPos); |
|||
|
|||
LOG_BUG (charsetPos != 0, return, "kazMisc::extendedWord bug: =? not at begin pos. (content: " << content << ")"); |
|||
string result; |
|||
for ( ; |
|||
(charsetPos = content.find ("=?", charsetPos)) != string::npos; |
|||
) { |
|||
string::size_type modePos = content.find ("?", charsetPos+2); |
|||
|
|||
LOG_BUG (modePos == string::npos, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")"); |
|||
string::size_type contentPos = content.find ("?", modePos+1); |
|||
|
|||
LOG_BUG (contentPos != modePos+2, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")"); |
|||
string::size_type endPos = content.find ("?=", contentPos+1); |
|||
|
|||
LOG_BUG (endPos == string::npos, return, "kazMisc::extendedWord bug: no end chartset. (content: " << content << ")"); |
|||
string tmp (content.substr (contentPos+1, endPos-contentPos-1)); |
|||
switch (content [modePos+1]) { |
|||
case 'B': |
|||
case 'b': |
|||
base64Decode (tmp); |
|||
break; |
|||
case 'Q': |
|||
case 'q': |
|||
quotedDecode (tmp); |
|||
break; |
|||
default: |
|||
|
|||
LOG_BUG (true, return, "kazMisc::extendedWord bug: unknown mode. (mode: " << content [modePos+1] << ")"); |
|||
} |
|||
LOG ("tmp: " << tmp); |
|||
string charset (content.substr (charsetPos, modePos-charsetPos-2)); |
|||
toLower (charset); |
|||
if (! caseInsensitiveFind (charset, "ISO")) |
|||
iso2utf (tmp); |
|||
result += tmp; |
|||
charsetPos = endPos+2; |
|||
} |
|||
content = result; |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
|||
void |
|||
kaz::charsetValue (string &content) { |
|||
// rfc2184
|
|||
DEF_LOG ("kaz::charsetValue", "content: " << content); |
|||
string::size_type langPos = content.find ("'"); |
|||
|
|||
LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValue bug: no '. (content: " << content << ")"); |
|||
string::size_type contentPos = content.find ("'", langPos+1); |
|||
|
|||
LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValue bug: no double '. (content: " << content << ")"); |
|||
string tmp (content.substr (contentPos+1)); |
|||
quotedDecode<'%'> (tmp); |
|||
LOG ("tmp: " << tmp); |
|||
string charset (content.substr (0, langPos)); |
|||
toLower (charset); |
|||
if (! caseInsensitiveFind (charset, "ISO")) |
|||
iso2utf (tmp); |
|||
content = tmp; |
|||
LOG ("content: " << content); |
|||
} |
|||
|
|||
// ================================================================================
|
@ -0,0 +1,154 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _kaz_Attachment_hpp |
|||
#define _kaz_Attachment_hpp |
|||
|
|||
#include <string> |
|||
#include <regex> |
|||
#include <map> |
|||
#include <utility> |
|||
|
|||
#include "EmbeddedData.hpp" |
|||
|
|||
namespace kaz { |
|||
|
|||
using namespace std; |
|||
|
|||
// ================================================================================
|
|||
/*! e-mail structure */ |
|||
class Attachment { |
|||
public: |
|||
/*! tokens indicat candidats to be updated by remove attachment */ |
|||
//static const vector<const string> stringsToUpdate;
|
|||
static vector<string> stringsToUpdate; |
|||
/*! mime tokens */ |
|||
static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE; |
|||
/*! pattern to extract mime values */ |
|||
static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx; |
|||
|
|||
/*! get uniq filename */ |
|||
static string getUnknown (const string &ext = ""); |
|||
/*! remove all sections in content given by boundary tags */ |
|||
static void removeSection (string &content, const string &beginTag, const string &endTag); |
|||
/*! catenates all sections in content given by boundary tags (use temporary vector) */ |
|||
static string getSection (const string &content, const string &beginTag, const string &endTag); |
|||
/*! get all sections in content given by boundary marks and put them in result */ |
|||
static void getSection (const string &content, const string &beginTag, const string &endTag, vector<string> &result); |
|||
|
|||
/*! return the content-type */ |
|||
const string getContentType () const; |
|||
/*! return the filename in mime (or uniq name if missing) */ |
|||
const string getAttachName () const; |
|||
/*! return reference to the saved boundary. Empty value if attachment is not a multipart */ |
|||
const string &getBoundary () const; |
|||
/*! return the size of the content */ |
|||
const streamoff getSize () const; |
|||
/*! get a part of a mime header value */ |
|||
const string getProp (const string &token, const regex ®Ex) const; |
|||
|
|||
/*! return if base64 encoded */ |
|||
const bool isBase64Encoding () const; |
|||
/*! return if quoted-printable encoded */ |
|||
const bool isQuotedPrintableEnconding () const; |
|||
/*! return if text (plain or html) and base64 encoded */ |
|||
const bool isTextBase64 () const; |
|||
/*! return check if value exists in mime header */ |
|||
const bool isDefProp (const string &token, const string &val) const; |
|||
|
|||
protected: |
|||
/*! HTML image tag*/ |
|||
static const string IMG_BEGIN, IMG_END; |
|||
|
|||
/*! Attachment level (0 is main) */ |
|||
const int level; |
|||
/*! char position in the mbox of the boundary before this attachment */ |
|||
const streamoff beginInParent; |
|||
/*! char position of attachment including mime */ |
|||
const streamoff beginPos; |
|||
/*! char position of attachment content */ |
|||
streamoff contentPos, endPos; |
|||
/*! properties of the attachment */ |
|||
bool toExtract, toUpdate, toDisclaim; |
|||
/*! id of an image embedded in mbox */ |
|||
string cid; |
|||
/*! url to replace the attachment and its short id */ |
|||
string downloadUrl, downloadId; |
|||
/*! properties of embedded image (self encoded with base64)*/ |
|||
vector<EmbeddedData> embeddedData; |
|||
|
|||
/*! mime values of the attachment */ |
|||
map<string, string> env; |
|||
/*! boundary if the attachment is a multipart including previous and next "--" */ |
|||
string boundary; |
|||
/*! size of boundary before the last "--" */ |
|||
streamoff boundaryMiddleSize; |
|||
|
|||
/*! sub attachment if the attachment is a multipart */ |
|||
vector<Attachment> subAttachements; |
|||
|
|||
/*! called during the parse process */ |
|||
Attachment (ifstream &mbox, const int &level, const streamoff beginInParent, streamoff &curPos); |
|||
|
|||
/*! called one time by the constructor */ |
|||
void readMime (ifstream &mbox, streamoff &curPos); |
|||
/*! called one time by the constructor */ |
|||
void readBoundaries (ifstream &mbox, streamoff &curPos); |
|||
/*! called for each part during the parse process add add a subAttachement. Return false when found last boundary */ |
|||
bool nextBondary (ifstream &mbox, streamoff &curPos); |
|||
|
|||
/*! recursively marks alternative attachments to be disclaim */ |
|||
void markDisclaim (bool &plainMarked, bool &htmlMarked); |
|||
/*! recursively marks big attachments to be removed and upated (including disclaim). return true when part need to be updated (can't be extracted). */ |
|||
bool markSignificant (const string &parentMultiProp, const streamoff &minAttachSize, ifstream &mbox, vector<Attachment *> &allMarkedPtrs); |
|||
/*! get a copy of the content. Base64 is decoded. Quoted-Printable is unwarp and unquoted */ |
|||
string getContent (ifstream &mbox) const; |
|||
/*! write the content, encoded if necessary (base64 and quoted-printable) */ |
|||
void println (ofstream &outbox, string content) const; |
|||
|
|||
/*! replace embedded image */ |
|||
void replaceEmbedded (string &content) const; |
|||
|
|||
public: |
|||
friend class MainAttachment; |
|||
friend ostream& operator << (ostream& os, const Attachment& attachment); |
|||
}; |
|||
|
|||
/*! for debug pupose */ |
|||
ostream& operator << (ostream& os, const Attachment& attachment); |
|||
|
|||
// ================================================================================
|
|||
} |
|||
|
|||
#endif // _kaz_Attachment_hpp
|
@ -0,0 +1,70 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _kaz_EmbeddedData_hpp |
|||
#define _kaz_EmbeddedData_hpp |
|||
|
|||
#include <string> |
|||
#include <vector> |
|||
|
|||
namespace kaz { |
|||
|
|||
using namespace std; |
|||
|
|||
// ================================================================================
|
|||
/*! properties of embedded image in html part (rfc2397) */ |
|||
class EmbeddedData { |
|||
public: |
|||
/*! rank of this image tag */ |
|||
int imgIdx; |
|||
/*! extracted in first pass */ |
|||
string contentType, name; |
|||
string downloadUrl, downloadId; |
|||
/*! area of base64 relative in the image section */ |
|||
string::size_type startData, dataLength; |
|||
|
|||
/*! initialisation in the first pass */ |
|||
EmbeddedData (const int &imgIdx, const string &contentType, const string &name, const string::size_type &startData, const string::size_type &dataLength); |
|||
|
|||
/*! records properties */ |
|||
static void fillEmbeddedData (const vector<string> &imgs, const streamoff &minAttachSize, vector<EmbeddedData> &data); |
|||
|
|||
// friend ostream& operator << (ostream& os, const EmbeddedData& embeddedData);
|
|||
}; |
|||
ostream& operator << (ostream& os, const EmbeddedData& embeddedData); |
|||
|
|||
// ================================================================================
|
|||
} |
|||
|
|||
#endif // _kaz_EmbeddedData_hpp
|
@ -0,0 +1,121 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _kaz_MainAttachment_hpp |
|||
#define _kaz_MainAttachment_hpp |
|||
|
|||
#include <boost/filesystem.hpp> |
|||
#include "Attachment.hpp" |
|||
|
|||
namespace kaz { |
|||
|
|||
using namespace std; |
|||
namespace bfs = boost::filesystem; |
|||
|
|||
// ================================================================================
|
|||
/*! root level of e-mail structure */ |
|||
class MainAttachment : public Attachment { |
|||
public: |
|||
/*! text to add in disclaim */ |
|||
static const string templatePlainAddLink, templatePlainAllLink, templateHtmlHeader, templateHtmlAddLink, templateHtmlOtherLink, templateHtmlAllLink, templateHtmlFooter; |
|||
|
|||
/*! white space to split a text */ |
|||
static const regex whiteSpaceRegEx; |
|||
|
|||
/*! copy a slice of mbox to stdout */ |
|||
static void copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end); |
|||
|
|||
/*! get url and id (space separated) from stdin */ |
|||
void fillUrlId (string &url, string &id); |
|||
|
|||
/*! location of extracted files */ |
|||
void setExtractDir (const bfs::path &extractDir); |
|||
/*! URL base for archive download of all extracted files */ |
|||
void setArchiveDownloadURL (const string &archiveDownloadURL); |
|||
/*! add a single link in disclaim */ |
|||
void addLink (string &plain, string &html, const string &url, const string &name) const; |
|||
/*! get disclaim according alls links (retreived or create) */ |
|||
void getDisclaim (string &plain, string &html) const; |
|||
|
|||
private: |
|||
/*! for boot strap the attachment constructor */ |
|||
streamoff &initTmpPos () { return tmpPos = 0; } |
|||
/*! for boot strap the attachment constructor */ |
|||
int &initTmpLevel () { return tmpLevel = 0; } |
|||
|
|||
/*! volatile values*/ |
|||
streamoff tmpPos; |
|||
int tmpLevel; |
|||
|
|||
/*! dir path for extraction */ |
|||
bfs::path extractDir; |
|||
/*! URL base for download archives */ |
|||
string archiveDownloadURL; |
|||
|
|||
/*! subset in the tree of all attachments to be consider for extraction or modification */ |
|||
vector<Attachment *> allMarkedPtrs; |
|||
/*! previous links find in mbox */ |
|||
map<string, string> previousLinks; |
|||
/*! add link only if no significant value already exist. */ |
|||
void addPrevious (const string &href, const string &name); |
|||
|
|||
/*! extract previous links from plain text. Used by extractPreviousKAZ */ |
|||
void extractLinks (const string &extractedPlainKAZ); |
|||
/*! extract previous links from html-li list. Used by extractPreviousKAZ */ |
|||
void extractLinks (const vector<string> &liOne); |
|||
/*! extract previous links in mbox. Used by getUpdatedURL and substitute */ |
|||
void extractPreviousKAZ (ifstream &mbox); |
|||
/*! remove previous links to archive. Used by substitute */ |
|||
void removePreviousArchive (); |
|||
|
|||
public: |
|||
/*! the main attachment in mbox */ |
|||
MainAttachment (ifstream &mbox); |
|||
|
|||
/*! mark disclaim, update and extract attachments. Must be call before: getUpdatedURL, extract or substitute */ |
|||
void markSignificant (const streamoff &minAttachSize, ifstream &mbox); |
|||
/*! write to stdout le list of previous links in mbox */ |
|||
void getUpdatedURL (ifstream &mbox); |
|||
/*! create record for extraction */ |
|||
void newPjEntry (const int &attachCount, const string &contentType, const string &name, string &dirName, string &mediaName) const; |
|||
/*! extract big attachments in mbox to extractDir and write to stdout le dirname of each extraction */ |
|||
void extract (ifstream &mbox, const SizeArg &minSize) const; |
|||
/*! substitute big attachments by the url give in stdin */ |
|||
void substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize); |
|||
}; |
|||
|
|||
// ================================================================================
|
|||
} |
|||
|
|||
#endif // _kaz_MainAttachment_hpp
|
@ -0,0 +1,74 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _kaz_SizeArg_hpp |
|||
#define _kaz_SizeArg_hpp |
|||
|
|||
#include <iostream> |
|||
#include <string> |
|||
#include <boost/lexical_cast.hpp> |
|||
|
|||
namespace kaz { |
|||
|
|||
using namespace std; |
|||
|
|||
// ================================================================================
|
|||
/*! human readable of size values */ |
|||
class SizeArg { |
|||
private: |
|||
/*! the size */ |
|||
size_t bytes; |
|||
|
|||
/*! human readable convertion */ |
|||
void init (const string &option); |
|||
public: |
|||
/*! scalar convertion */ |
|||
operator size_t () const { return bytes; } |
|||
|
|||
/*! initialization from scalar value */ |
|||
SizeArg (const size_t &bytes = 0); |
|||
/*! initialization from human readable value */ |
|||
SizeArg (const string &option); |
|||
|
|||
friend ostream &operator << (ostream &out, const SizeArg &sizeArg); |
|||
friend istream &operator >> (istream &in, SizeArg &sizeArg); |
|||
}; |
|||
|
|||
// ================================================================================
|
|||
/*! human readable convertion */ |
|||
ostream &operator << (ostream &out, const SizeArg &sizeArg); |
|||
istream &operator >> (istream &in, SizeArg &sizeArg); |
|||
} |
|||
|
|||
#endif // _kaz_Attachment_hpp
|
@ -0,0 +1,134 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _Kaz_Debug_hpp |
|||
#define _Kaz_Debug_hpp |
|||
|
|||
#include <iostream> |
|||
#include <string> |
|||
|
|||
/*! log error */ |
|||
#define LOG_BUG(cond, action, expr) {if (cond) {std::cerr << endl << expr << std::endl << std::flush; action; }} |
|||
|
|||
#ifdef ENABLE_SMART_LOG |
|||
|
|||
#ifndef SMART_DEF_LOG |
|||
#define SMART_DEF_LOG(name, expr) DEF_LOG (name, expr) |
|||
#endif |
|||
|
|||
#ifndef SMART_LOG |
|||
#define SMART_LOG(expr) LOG (expr) |
|||
#endif |
|||
|
|||
#ifndef SMART_LOG_EXPR |
|||
#define SMART_LOG_EXPR(expr) {if (::kaz::Log::debug) {expr;} } |
|||
#endif |
|||
|
|||
#else |
|||
|
|||
#ifndef SMART_DEF_LOG |
|||
#define SMART_DEF_LOG(name, expr) |
|||
#endif |
|||
|
|||
#ifndef SMART_LOG |
|||
#define SMART_LOG(expr) |
|||
#endif |
|||
|
|||
#ifndef SMART_LOG_EXPR |
|||
#define SMART_LOG_EXPR(expr) |
|||
#endif |
|||
#endif |
|||
|
|||
#ifdef DISABLE_LOG |
|||
|
|||
#ifndef DEF_LOG |
|||
#define DEF_LOG(name, expr) |
|||
#endif |
|||
#ifndef LOG |
|||
#define LOG(expr) {} |
|||
#endif |
|||
|
|||
#ifndef DEBUG |
|||
#define DEBUG(expr) {} |
|||
#endif |
|||
|
|||
#else |
|||
|
|||
#ifndef DEF_LOG |
|||
/*! to placed as the first instruction to log entry and return method */ |
|||
#define DEF_LOG(name, expr) ::kaz::Log log (name); { if (::kaz::Log::debug) std::cerr << expr << std::endl << std::flush; } |
|||
#endif |
|||
|
|||
#ifndef LOG |
|||
/*! to placed in methode where DEF_LOG if call previously */ |
|||
// _______________________________________________________ Don't forget DEF_LOG
|
|||
#define LOG(expr) { if (::kaz::Log::debug) std::cerr << log << "| " << expr << std::endl << std::flush; } |
|||
#endif |
|||
|
|||
#ifndef DEBUG |
|||
/*! log without format */ |
|||
#define DEBUG(expr) { if (::kaz::Log::debug) std::cerr << expr << std::endl << std::flush; } |
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
namespace kaz { |
|||
// ================================================================================
|
|||
using namespace std; |
|||
|
|||
/*! manage prety print log */ |
|||
class Log { |
|||
/*! visual indentation of call */ |
|||
static size_t indent; |
|||
/*! name recall in log */ |
|||
string functName; |
|||
public: |
|||
/*! switch on the log */ |
|||
static bool debug; |
|||
|
|||
/*! log entry of a method */ |
|||
Log (const string &functName); |
|||
/*! log return of a method */ |
|||
~Log (); |
|||
|
|||
/*! timestamp of the log */ |
|||
static string getLocalTimeStr (); |
|||
friend ostream &operator << (ostream &out, const Log &log); |
|||
}; |
|||
ostream &operator << (ostream &out, const Log &log); |
|||
|
|||
// ================================================================================
|
|||
} // kaz
|
|||
|
|||
#endif //_Kaz_Debug_hpp
|
@ -0,0 +1,141 @@ |
|||
////////////////////////////////////////////////////////////////////////////
|
|||
// Copyright KAZ 2021 //
|
|||
// //
|
|||
// contact (at) kaz.bzh //
|
|||
// //
|
|||
// This software is a filter to shrink email by attachment extraction. //
|
|||
// //
|
|||
// This software is governed by the CeCILL-B license under French law and //
|
|||
// abiding by the rules of distribution of free software. You can use, //
|
|||
// modify and/or redistribute the software under the terms of the //
|
|||
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|||
// URL "http://www.cecill.info". //
|
|||
// //
|
|||
// As a counterpart to the access to the source code and rights to copy, //
|
|||
// modify and redistribute granted by the license, users are provided //
|
|||
// only with a limited warranty and the software's author, the holder of //
|
|||
// the economic rights, and the successive licensors have only limited //
|
|||
// liability. //
|
|||
// //
|
|||
// In this respect, the user's attention is drawn to the risks associated //
|
|||
// with loading, using, modifying and/or developing or reproducing the //
|
|||
// software by the user in light of its specific status of free software, //
|
|||
// that may mean that it is complicated to manipulate, and that also //
|
|||
// therefore means that it is reserved for developers and experienced //
|
|||
// professionals having in-depth computer knowledge. Users are therefore //
|
|||
// encouraged to load and test the software's suitability as regards //
|
|||
// their requirements in conditions enabling the security of their //
|
|||
// systems and/or data to be ensured and, more generally, to use and //
|
|||
// operate it in the same conditions as regards security. //
|
|||
// //
|
|||
// The fact that you are presently reading this means that you have had //
|
|||
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|||
////////////////////////////////////////////////////////////////////////////
|
|||
|
|||
#ifndef _kaz_misc_hpp |
|||
#define _kaz_misc_hpp |
|||
|
|||
#include <string> |
|||
#include <ctype.h> |
|||
#include <map> |
|||
|
|||
namespace kaz { |
|||
using namespace std; |
|||
|
|||
// =======================================================================
|
|||
/*! ordered base64 chars */ |
|||
extern const char * const base64Chars; |
|||
/*! set of chars available in URL */ |
|||
extern const string availableURLChars; |
|||
|
|||
// =======================================================================
|
|||
/*! get the width of the terminal */ |
|||
uint16_t getCols (); |
|||
|
|||
/*! display time. */ |
|||
string ns2string (const double &delta); |
|||
|
|||
// =======================================================================
|
|||
/*! side effect on str to replace "from" by "to" */ |
|||
void replaceAll (string& str, const string &from, const string &to); |
|||
/*! side effect on str to replace a set of "from" by a set of "to" */ |
|||
void replaceAll (string& str, const map<const string, const string> &subst); |
|||
|
|||
// =======================================================================
|
|||
/*! side effect to lower case a string (in mime section) */ |
|||
void toLower (string &content); |
|||
|
|||
/*! compare strings are done in uppercase to avoid accents. Give token in uppercase spin up the process */ |
|||
const string &toUpperIfNeed (const string &src, string &tmp); |
|||
/*! find upper case of p in upper case of s */ |
|||
string::size_type caseInsensitiveFind (const string& s, const string& p, const string::size_type &pos = 0); |
|||
/*! reverse find upper case of p in upper case of s */ |
|||
string::size_type caseInsensitiveRFind (const string& s, const string& p, const string::size_type &pos = 0); |
|||
/*! side effect to repplace =XX by the char with de haxe value XX. It could be %XX in rfc2184 */ |
|||
template<char delim='='> |
|||
void quotedDecode (string &content); |
|||
/*! side effect to quoted-printable content rfc2045 */ |
|||
void quotedEncode (string &content); |
|||
/*! side effect to decode base64 */ |
|||
void base64Decode (string &content); |
|||
/*! side effect to encode base64 */ |
|||
void base64Encode (string &content); |
|||
/*! side effect to change charset of content */ |
|||
void iso2utf (string &content); |
|||
/*! side effect to get the encodedWord according rfc2047 */ |
|||
void encodedWord (string &content); |
|||
/*! side effect to get the charsetValue according rfc2184 */ |
|||
void charsetValue (string &content); |
|||
|
|||
// =======================================================================
|
|||
/*! return if the c need no quote */ |
|||
inline bool |
|||
isQuotedPrintable (const char &c) { |
|||
return |
|||
c == ' ' || c == '\t' || (c >= 33 && c <= 126 && c != '=' && c != '.'); |
|||
// '.' is available in rfc2184 but it avoid to check '.' alone in a line :-)
|
|||
} |
|||
|
|||
/*! return if the c is in available base64 chars */ |
|||
inline bool |
|||
isBase64 (const char &c) { |
|||
return (isalnum (c) || (c == '+') || (c == '/')); |
|||
} |
|||
|
|||
/*! get the order of c in the base64 set of values */ |
|||
inline unsigned char |
|||
getBase64Val (const char &c) { |
|||
if (c == '+') |
|||
return 62; |
|||
if (c == '/') |
|||
return 63; |
|||
if (c <= '9') |
|||
return (c-'0')+52; |
|||
if (c <= 'Z') |
|||
return (c-'A'); |
|||
return (c-'a')+26; |
|||
} |
|||
|
|||
/*! get the nibble value of c representation of an hexa digit */ |
|||
inline unsigned char |
|||
getHexaVal (const char &c) { |
|||
if (c <= '9') |
|||
return c-'0'; |
|||
if (c <= 'F') |
|||
return (c-'A')+10; |
|||
return (c-'a')+10; |
|||
} |
|||
|
|||
/*! get the nibble value of c representation of an hexa digit */ |
|||
inline void |
|||
getHexa (const char &c, char &upper, char &lower) { |
|||
upper = c >> 4 & 0xF; |
|||
upper += upper > 9 ? ('A'-10) : '0'; |
|||
lower = c & 0xF; |
|||
lower += lower > 9 ? ('A'-10) : '0'; |
|||
} |
|||
|
|||
// =======================================================================
|
|||
} |
|||
|
|||
#endif // _kaz_Attachment_hpp
|
Loading…
Reference in new issue