Compare commits

...

2 Commits

Author SHA1 Message Date
8efa333b27 merge develpp 2022-11-20 15:02:29 +01:00
29480b299c fix boundary 2022-11-20 14:55:44 +01:00
5 changed files with 221 additions and 54 deletions

0
src/bash/filter.sh Normal file → Executable file
View File

131
src/bash/filterTest.sh Executable file
View File

@ -0,0 +1,131 @@
#!/bin/bash
PRG=$(basename $0)
BOLD=''
RED=''
GREEN=''
YELLOW=''
BLUE=''
MAGENTA=''
CYAN=''
NC='' # No Color
NL='
'
TTY=$(tty)
LOG () {
echo "$1" >> "${TTY}"
}
usage () {
echo "Usage: ${PRG} mbox"
exit 1
}
[ "$#" -eq 1 ] || usage
mbox=$(realpath $1)
cd $(dirname $0)
DOMAINNAME="$(cat domainname)"
JIRAFEAU_URL="https://depot.${DOMAINNAME}"
JIRAFEAU_LOCAL="${JIRAFEAU_URL}"
cd ../..
mkdir -p tmp
rm -fr tmp/PJ-name.txt tmp/PJ-Keys.txt tmp/PJ tmp/archive-content.txt tmp/url-to-refresh.txt tmp/new-mbox
echo -e "time: $(date "+%Y-%m-%d-%H:%M:%S")\nid: $(date +%s)" > "tmp/archive-content.txt"
LOG " - ${BLUE}mbox: ${mbox}${NC}"
build/out/eMailShrinker -l "${mbox}"
LOG
build/out/eMailShrinker -u "${mbox}" > "tmp/url-to-refresh.txt" 2>> "${TTY}"
cat "tmp/url-to-refresh.txt" | grep "${JIRAFEAU_URL}" | while read REMOTE_LINK; do
REMOTE_REF=$(echo "${REMOTE_LINK}" | sed -e 's/.*h=\([^&]*\).*/\1/' -e 's/.*http.*//')
[ -z "${REMOTE_REF}" ] && continue
LOG " - ${BLUE}update ${REMOTE_REF}${NC}"
build/out/jirafeauAPI -f "${JIRAFEAU_LOCAL}" -t "month" update "${REMOTE_REF}" 2>> "${TTY}"
LOG
echo "old: ${REMOTE_REF} ${REMOTE_KEY}" >> "tmp/archive-content.txt"
done
build/out/eMailShrinker -s "5ki" -d tmp/PJ "${mbox}" > "tmp/PJ-name.txt"
LOG " - ${BLUE}PJ-name: ${NC}"
cat tmp/PJ-name.txt
LOG
cat "tmp/PJ-name.txt" | {
while read ATTACH_TMP_NAME; do
LOG " - ${BLUE}find ATTACH_TMP_NAME: (${ATTACH_TMP_NAME}) ${NC}"
if [ -d "${ATTACH_TMP_NAME}" ]; then
ATTACH_MEDIA="${ATTACH_TMP_NAME}/media"
ATTACH_NAME=$(grep "^Name: " "${ATTACH_TMP_NAME}/meta" | cut -c 7- )
ATTACH_CONTENT_TYPE=$(grep "^Content-Type: " "${ATTACH_TMP_NAME}/meta" | cut -c 15- )
else
LOG " - ${RED}no ATTACH_TMP_NAME: ${ATTACH_TMP_NAME}${NC}"
# XXX error
continue
fi
LOG " - ${BLUE}find ${ATTACH_NAME} / (${ATTACH_CONTENT_TYPE}) / ${ATTACH_MEDIA} ${NC}"
PASSWORD=$(apg -n 1 -m 12)
PASSWORD_MD5=$(echo -n ${PASSWORD} | ${MD5_CMD} | cut -d \ -f 1)
build/out/jirafeauAPI -f "${JIRAFEAU_LOCAL}" -t "month" -s "1Gi" -c "${ATTACH_CONTENT_TYPE}" -n "${ATTACH_NAME}" send "${ATTACH_MEDIA}" "${PASSWORD}" 2>> "${TTY}" > "tmp/one.txt"
cat "tmp/one.txt" | {
read JIR_TOKEN
read JIR_CODE
case "${JIR_TOKEN}" in
"" | no | *Error* | \<* )
LOG " - ${RED}can't upload ${ATTACH_MEDIA} <${JIR_TOKEN}> <${JIR_CODE}>${NC}"
cat "tmp/one.txt" >> "${TTY}"
echo "url:"
;;
* )
LOG " - ${GREEN} upload ${ATTACH_MEDIA}${NC}"
echo "url: ${JIRAFEAU_URL}/f.php?d=1&h=${JIR_TOKEN}&k=${PASSWORD_MD5}"
echo "new: ${JIR_TOKEN} ${PASSWORD_MD5}" >> "tmp/archive-content.txt"
;;
esac
}
done
NB_ATTACH=$(grep -e "^old: " -e "^new: " "tmp/archive-content.txt" | wc -l)
if [ "${NB_ATTACH}" -gt 1 ]; then
PASSWORD=$(apg -n 1 -m 12)
PASSWORD_MD5=$(echo -n ${PASSWORD} | ${MD5_CMD} | cut -d \ -f 1)
build/out/jirafeauAPI -f "${JIRAFEAU_LOCAL}" -t "month" -s "1Gi" -c "text/kaz_email_archive" -n "archive_content" send "tmp/archive-content.txt" "${PASSWORD}" > "tmp/one.txt" 2>> "${TTY}"
cat "tmp/one.txt" | {
read JIR_TOKEN
read JIR_CODE
case "${JIR_TOKEN}" in
"" | no | *Error* | \<* )
LOG " - ${RED}can't upload tmp/archive-content.txt${NC}"
echo "arch: bad"
;;
* )
LOG " - ${GREEN} upload archive-content.txt${NC}"
echo "arch: ${JIRAFEAU_URL}/a.php?g=${JIR_TOKEN}~${PASSWORD_MD5}"
;;
esac
}
else
LOG " - ${GREEN} no archive${NC}"
echo "arch: none"
fi
} > "tmp/PJ-Keys.txt"
LOG " - ${BLUE}PJ-Keys: ${NC}"
cat tmp/PJ-Keys.txt
LOG
cat "tmp/PJ-Keys.txt" | build/out/eMailShrinker -s "5ki" "${mbox}" "tmp/new-mbox" 2>> "${TTY}"
LOG " - ${BLUE}new-mbox:${NC}"
build/out/eMailShrinker -l "tmp/new-mbox" 2>> "${TTY}"

View File

@ -65,7 +65,14 @@ const string Attachment::ALTERNATIVE ("alternative");
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
const regex Attachment::nameRegEx (".*name=\"([^\"]*)\".*");
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\" ]*)\"?.*");
// boundary="----=_Part_796779_1154936629.1668080348646"
// boundary="------------040709000505010508040808"
// boundary="----------=_1668606031-941125-91"
// boundary="_004_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_";
// boundary="_000_PAVPR10MB6792713B313048E3A259B215B2079PAVPR10MB6792EURP_"
// boundary=--boundary_1351_64006126-2b0e-4a3b-98ac-4797d1634188
// boundary=--boundary_1352_7e294c9a-cfab-44a0-bfb3-7310380ac7cb;
const regex Attachment::boundaryRegEx (".*boundary=\"?([^\"; ]*)\"?;?.*");
const regex Attachment::cidDefRegEx (".*<([^>]*)>.*");
const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*");
const regex Attachment::multiRegEx ("\\s*multipart/(mixed|"+RELATED+"|"+ALTERNATIVE+").*");
@ -354,7 +361,11 @@ Attachment::nextBondary (ifstream &mbox, streamoff &curPos) {
++it)
if (couple.find (*it) != string::npos) {
LOG ("find: "+ *it);
LOG ("size: " << subAttachements.size ());
if (subAttachements.size ())
subAttachements.back ().toUpdate = true;
else
LOG_BUG (true, continue, "eMailShrinker: bug A10: boundary format ? " << *this);
}
prev = clearLine;
continue;
@ -489,7 +500,7 @@ kaz::operator << (ostream& os, const Attachment& attachment) {
if (prop.length ())
prop = " ["+prop+"]";
os << setw ((attachment.level % 20)*2) << "" << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType ()
os << ("****************************************"+40-(attachment.level % 20)*2) << setw (10) << SizeArg (attachment.getSize ()) << " " << attachment.getContentType ()
<< prop << (attachment.cid.length () ? " id: "+attachment.cid : "")
<< (attachment.boundary.length () ? " boundary: "+attachment.boundary : "")
<< " (" << attachment.beginPos << " / " << attachment.contentPos << " / " << attachment.endPos << ") " << endl;

View File

@ -51,47 +51,54 @@
using namespace std;
using namespace kaz;
static const string KAZ_WEB_SITE = "https://kaz.bzh/";
static const string TMPL_DOWNLOAD = "{{DOWNLOAD}}";
static const string TMPL_FILENAME = "{{FILENAME}}";
static const string CID = "cid:";
static const string KAZ_WEB_SITE ("https://kaz.bzh/");
static const string TMPL_DOWNLOAD ("{{DOWNLOAD}}");
static const string TMPL_FILENAME ("{{FILENAME}}");
static const string CID ("cid:");
// "l=/" => v1 compatibility
static const regex archiveURLSignature (".*(([&?]g=)|([&?]l=/)).*");
static const string KAZ_PLAIN_HR = "______________________________________________________________________________";
static const string KAZ_PLAIN_START = "~~ PJ-KAZ !"; // don't end whith space
static const string KAZ_PLAIN_STOP = KAZ_PLAIN_START+" ~~";
static const string KAZ_PLAIN_DONT_TOUCH = "(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)";
static const string KAZ_PLAIN_WARNING = "Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. Elles seront automatiquement supprimées dans 1 mois. Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. Pour mieux comprendre la politique de nos services visitez kaz.bzh";
static const string KAZ_PLAIN_DOWLOAD_ONE = "Vos pièces jointes sont à télécharger individuellement ici :";
static const string KAZ_PLAIN_DOWLOAD_OTHER = "(Contenu dans des messages précédents)";
static const string KAZ_PLAIN_DOWLOAD_ALL = "Vous pouvez télécharger l'ensemble dans une archive là :";
static const string KAZ_PLAIN_HR ("______________________________________________________________________________");
static const string KAZ_PLAIN_START ("~~ PJ-KAZ !"); // don't end whith space
static const string KAZ_PLAIN_STOP (KAZ_PLAIN_START+" ~~");
static const string KAZ_PLAIN_DONT_TOUCH ("(conservez cette partie intacte dans votre réponse si vous voulez transmettre les documents précédents)");
static const string KAZ_PLAIN_WARNING ("Attention : Kaz a dépollué ce message. Les pièces jointes ont été retirées et placées dans un dépôt provisoire. "
"Elles seront automatiquement supprimées dans 1 mois. "
"Si elles sont importantes et que vous souhaitez les conserver, vous devez utiliser les liens ci-dessous. "
"Pour mieux comprendre la politique de nos services visitez kaz.bzh");
static const string KAZ_PLAIN_DOWLOAD_ONE ("Vos pièces jointes sont à télécharger individuellement ici :");
static const string KAZ_PLAIN_DOWLOAD_OTHER ("(Contenu dans des messages précédents)");
static const string KAZ_PLAIN_DOWLOAD_ALL ("Vous pouvez télécharger l'ensemble dans une archive là :");
static const string HEAD = "<head>";
static const string HEAD_END = "</head>";
static const string KAZ_CSS_URL = "https://kaz.bzh/m/email.css";
static const string KAZ_CSS = "<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>";
static const string A_END = "</a>";
static const string LI_BEGIN = "<li";
static const string CLASS_ONE = "class=\"one\"";
static const string LI_ONE = LI_BEGIN+" "+CLASS_ONE+">";
static const string LI_ALL = LI_BEGIN+" class=\"all\">";
static const string LI_END = "</li>";
static const string HREF_ONE = "href=\"";
static const string BODY_END = "</body>";
static const string HTML_END = "</html>";
static const string HEAD ("<head>");
static const string HEAD_END ("</head>");
static const string KAZ_CSS_URL ("https://kaz.bzh/m/email.css");
static const string KAZ_CSS ("<link rel=\"stylesheet\" type=\"text/css\" charset=\"utf-8\" href=\""+KAZ_CSS_URL+"\"/>");
static const string A_END ("</a>");
static const string LI_BEGIN ("<li");
static const string CLASS_ONE ("class=\"one\"");
static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
static const string LI_ALL (LI_BEGIN+" class=\"all\">");
static const string LI_END ("</li>");
static const string HREF_ONE ("href=\"");
static const string BODY_END ("</body>");
static const string HTML_END ("</html>");
static const string KAZ_HTML_TAG = "<!--KAZ"; // don't end whith space
static const string KAZ_HTML_START = KAZ_HTML_TAG+" START-->";
static const string KAZ_HTML_STOP = KAZ_HTML_TAG+" STOP-->";
static const string KAZ_HTML_TAG ("<!--KAZ"); // don't end whith space
static const string KAZ_HTML_START (KAZ_HTML_TAG+" START-->");
static const string KAZ_HTML_STOP (KAZ_HTML_TAG+" STOP-->");
// Textes précédents encodés en SGML
static const string KAZ_HTML_DONT_TOUCH = "(conservez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ONE = "Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :";
static const string KAZ_HTML_DOWLOAD_OTHER = "(Contenu dans des messages pr&eacute;c&eacute;dents)";
static const string KAZ_HTML_DOWLOAD_ALL = "Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :";
static const string KAZ_HTML_ARCHIVE = "archive";
static const string KAZ_HTML_DONT_TOUCH ("(conservez cette partie intacte dans votre r&eacute;ponse si vous voulez transmettre les documents pr&eacute;c&eacute;dents)");
static const string KAZ_HTML_DOWLOAD_ONE ("Vos pi&egrave;ces jointes sont &agrave; t&eacute;l&eacute;charger individuellement ici :");
static const string KAZ_HTML_DOWLOAD_OTHER ("(Contenu dans des messages pr&eacute;c&eacute;dents)");
static const string KAZ_HTML_DOWLOAD_ALL ("Vous pouvez t&eacute;l&eacute;charger l'ensemble dans une archive l&agrave; :");
static const string KAZ_HTML_ARCHIVE ("archive");
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: base64\n");
// ================================================================================
vector <string>
@ -173,7 +180,7 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url);
// XXX amp ?
replaceAll (codedUrl, "&", "&amp;");
//replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
html += htmlNewOneLink;
@ -228,7 +235,7 @@ MainAttachment::getDisclaim (string &plain, string &html) const {
replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);
html += allHtmlLinks;
}
html += templateHtmlFooter;
html += templateHtmlFooter+"\r\n";
plain += "\r\n\r\n"+KAZ_WEB_SITE+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_STOP+"\r\n";
// & => &amp; done
LOG ("plain: " << plain);
@ -380,7 +387,8 @@ MainAttachment::removePreviousArchive () {
// ================================================================================
MainAttachment::MainAttachment (ifstream &mbox)
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()) {
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
forceMainText (false) {
DEF_LOG ("MainAttachment::MainAttachment", "");
string line;
for (; getline (mbox, line); )
@ -394,6 +402,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked);
forceMainText = ! (plainMarked || htmlMarked);
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
}
@ -529,7 +538,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
}
if (attachP->cid.length ()) {
string tmp (attachP->downloadUrl);
replaceAll (tmp, "&", "&amp;");
//replaceAll (tmp, "&", "&amp;");
translateHtml.insert (pair<const string, const string> (CID+attachP->cid, tmp));
}
}
@ -546,9 +555,23 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email
streamoff curPos = 0;
if (forceMainText) {
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl;
// check no main text
LOG ("Force main text");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
cerr << " #################### coucou " << curPos << endl << endl;
string content (plainDisclaim);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_EMPTY_TEXT_PLAIN << endl
<< content << endl;
outbox.flush ();
}
for (Attachment *attachP : allMarkedPtrs) {
copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
if (attachP->toExtract) {

View File

@ -83,6 +83,8 @@ namespace kaz {
bfs::path extractDir;
/*! URL for download archives */
string archiveDownloadURL;
/*! no main text in email can be use to add disclaim */
bool forceMainText;
/*! subset in the tree of all attachments to be consider for extraction or modification */
vector<Attachment *> allMarkedPtrs;