Attachment mode

This commit is contained in:
François 2022-11-26 21:55:05 +01:00
parent 9c07023316
commit d6e167b83b
13 changed files with 2898 additions and 86 deletions

View File

@ -71,7 +71,7 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/*/%.cpp
$(CC) $< $(IFLAGS) -cpp -c -o $@ $(CC) $< $(IFLAGS) -cpp -c -o $@
## ENTRIES ############################# ## ENTRIES #############################
all: init eMailShrinker jirafeauAPI all: init eMailShrinker jirafeauAPI doc
eMailShrinker: $(KAZ_OUT) eMailShrinker: $(KAZ_OUT)
@ -83,6 +83,9 @@ jirafeauAPI: $(JIR_OUT)
$(JIR_OUT): $(JIR_OBJ) $(JIR_OUT): $(JIR_OBJ)
$(CC) $(JIR_OBJ) $(IFLAGS) -cpp -L$(LIB_DIR) $(LFLAGS) -o $@ $(CC) $(JIR_OBJ) $(IFLAGS) -cpp -L$(LIB_DIR) $(LFLAGS) -o $@
doc:
doxygen src/Doxyfile
init: init:
mkdir -p $(OUT_DIR) $(OBJ_DIR) $(LIB_DIR) mkdir -p $(OUT_DIR) $(OBJ_DIR) $(LIB_DIR)

View File

@ -33,7 +33,7 @@ depollueur/
## Compilation ## Compilation
```bash ```bash
sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev libssl-dev doxygen
git clone https://git.kaz.bzh/KAZ/depollueur.git git clone https://git.kaz.bzh/KAZ/depollueur.git
# or for contributors : # or for contributors :

2658
src/Doxyfile Normal file

File diff suppressed because it is too large Load Diff

View File

@ -25,9 +25,10 @@ require (JIRAFEAU_ROOT . 'lib/functions.php');
require (JIRAFEAU_ROOT . 'lib/lang.php'); require (JIRAFEAU_ROOT . 'lib/lang.php');
define ('VAR_TOKENS', $cfg ['var_root'].'tokens/'); define ('VAR_TOKENS', $cfg ['var_root'].'tokens/');
define ('VAR_MODE', $cfg ['var_root'].'mode/');
define ('VAR_TRACKS', $cfg ['var_root'].'tracks/'); define ('VAR_TRACKS', $cfg ['var_root'].'tracks/');
define ('VAR_LANG', $cfg ['var_root'].'lang/');
define ('VAR_PERIOD', $cfg ['var_root'].'period/'); define ('VAR_PERIOD', $cfg ['var_root'].'period/');
define ('VAR_LANG', $cfg ['var_root'].'lang/');
define ('VAR_FAKE', $cfg ['var_root'].'fake/'); define ('VAR_FAKE', $cfg ['var_root'].'fake/');
define ('VAR_ADMIN', $cfg ['var_root'].'admin/'); define ('VAR_ADMIN', $cfg ['var_root'].'admin/');
@ -39,6 +40,7 @@ define ('MAX_VALID_UPLOAD_TIME', 60);
define ('TOKEN_USE_LIMIT', "-2 hours"); define ('TOKEN_USE_LIMIT', "-2 hours");
define ('TOKEN_LOGIN_LIMIT', "-15 minutes"); define ('TOKEN_LOGIN_LIMIT', "-15 minutes");
define ('TOKEN_LOGOUT_LIMIT', "-8 hours"); define ('TOKEN_LOGOUT_LIMIT', "-8 hours");
define ('DEFAULT_MODE', "footer");
define ('DEFAULT_PERIOD', "month"); define ('DEFAULT_PERIOD', "month");
define ('DEFAULT_LANG', "fr"); define ('DEFAULT_LANG', "fr");
@ -75,13 +77,14 @@ define ('M_WELCOME', "<p>Informations concernant le compte : <b>___SENDER___</b>
define ('M_INCONSISTENT_DATES', define ('M_INCONSISTENT_DATES',
" (dates incoh&eacute;antes avec ___FILENAME___ : ___DIRTIME___ != ___FILETIME___)"); " (dates incoh&eacute;antes avec ___FILENAME___ : ___DIRTIME___ != ___FILETIME___)");
define ('A_ACTION', 'a'); // action : T_LOGIN, T_LOGOUT, A_RECORD+(on|off), A_LANG(fr|en|br), A_PERIOD(minute|hour|day|week|month|quarter) define ('A_ACTION', 'a'); // action : T_LOGIN, T_LOGOUT, A_MODE(footer|attachment|both), A_RECORD+(on|off), A_PERIOD(minute|hour|day|week|month|quarter), A_LANG(fr|en|br)
define ('A_GET', 'g'); // get archive define ('A_GET', 'g'); // get archive
define ('A_HASH', 'h'); // file to update or delete define ('A_HASH', 'h'); // file to update or delete
define ('A_OPEN_TOKEN', 'o'); // ask token define ('A_OPEN_TOKEN', 'o'); // ask token
define ('A_SENDER', 's'); // session sender define ('A_SENDER', 's'); // session sender
define ('A_TOKEN', 't'); // session token define ('A_TOKEN', 't'); // session token
define ('A_UPDATE', 'u'); // update perriod for file or archive define ('A_UPDATE', 'u'); // update perriod for file or archive
define ('A_MODE', 'm'); // get mode status
define ('A_RECORD', 'r'); // get track status define ('A_RECORD', 'r'); // get track status
define ('A_PERIOD', 'p'); // get period status define ('A_PERIOD', 'p'); // get period status
define ('A_LANG', 'l'); // get lang status define ('A_LANG', 'l'); // get lang status
@ -111,7 +114,8 @@ define ('T_ARCHIVE_TITLE', "archive_content");
define ('T_ARCHIVE_MIME', "text/kaz_email_archive"); define ('T_ARCHIVE_MIME', "text/kaz_email_archive");
$langText = ['fr' => "Francais", 'br' => "Breton", 'en' => "english"]; $modeText = ['footer' => "pied de page", 'attachment' => "pi&egrave;ce jointe", 'both' => "les deux"];
$trackText = ['on' => "oui", 'off' => "non"];
$periodText = ['minute' => "minute", 'hour' => "heure", 'day' => "jour", 'week' => "semaine", 'month' => "mois"]; $periodText = ['minute' => "minute", 'hour' => "heure", 'day' => "jour", 'week' => "semaine", 'month' => "mois"];
// XXX , 'quarter' => "trimestre"]; // XXX , 'quarter' => "trimestre"];
$periodButton = ['hour' => ["&#128341;", ">1 heure"], $periodButton = ['hour' => ["&#128341;", ">1 heure"],
@ -119,7 +123,7 @@ $periodButton = ['hour' => ["&#128341;", ">1 heure"],
'week' => ["&#128349;", "> 1 semaine"], 'week' => ["&#128349;", "> 1 semaine"],
'month' => ["&#128350;", "> 1 mois"]]; 'month' => ["&#128350;", "> 1 mois"]];
// XXX 'quarter' => ["&#128351;", "> 1 trimestre"]]; // XXX 'quarter' => ["&#128351;", "> 1 trimestre"]];
$trackText = ['on' => "oui", 'off' => "non"]; $langText = ['fr' => "Francais", 'br' => "Breton", 'en' => "english"];
$doLogout = ''; $doLogout = '';
$message = ''; $message = '';
@ -130,6 +134,19 @@ $message = '';
/* Remove errors. */ /* Remove errors. */
@error_reporting (0); @error_reporting (0);
// ========================================
if (isset ($_REQUEST [A_MODE]) && !empty ($_REQUEST [A_MODE])) {
if (!preg_match ("/^([a-z0-9\+_\-]+)(\.[a-z0-9\+_\-]+)*@([a-z0-9\-]+\.)+[a-z]{2,6}$/i", $_REQUEST [A_MODE]))
$content = DEFAULT_MODE.NL;
else
$content = getSenderMode ($_REQUEST [A_MODE]).NL;
header ('HTTP/1.0 200 OK');
header ('Content-Length: ' . strlen ($content));
header ('Content-Type: text/plain');
echo $content;
exit;
}
// ======================================== // ========================================
if (isset ($_REQUEST [A_RECORD]) && !empty ($_REQUEST [A_RECORD])) { if (isset ($_REQUEST [A_RECORD]) && !empty ($_REQUEST [A_RECORD])) {
if (!preg_match ("/^([a-z0-9\+_\-]+)(\.[a-z0-9\+_\-]+)*@([a-z0-9\-]+\.)+[a-z]{2,6}$/i", $_REQUEST [A_RECORD])) if (!preg_match ("/^([a-z0-9\+_\-]+)(\.[a-z0-9\+_\-]+)*@([a-z0-9\-]+\.)+[a-z]{2,6}$/i", $_REQUEST [A_RECORD]))
@ -193,6 +210,29 @@ function returnError ($msg) {
exit; exit;
} }
// ========================================
function setSenderMode ($sender, $mode) {
if (!$sender)
return;
if (!file_exists (VAR_MODE))
mkdir (VAR_MODE, 0755);
if (empty ($mode) || DEFAULT_MODE == $mode) {
rmSenderMode ($sender);
} else
file_put_contents (VAR_MODE.$sender, $mode.NL);
}
function rmSenderMode ($sender) {
if (!$sender)
return;
if (file_exists (VAR_MODE.$sender))
unlink (VAR_MODE.$sender);
}
function getSenderMode ($sender) {
if ($sender && file_exists (VAR_MODE.$sender))
return trim (file (VAR_MODE.$sender)[0]);
return DEFAULT_MODE;
}
// ======================================== // ========================================
function setSenderTrack ($sender) { function setSenderTrack ($sender) {
if (!$sender) if (!$sender)
@ -211,29 +251,6 @@ function isSenderTrack ($sender) {
return $sender && file_exists (VAR_TRACKS.$sender); return $sender && file_exists (VAR_TRACKS.$sender);
} }
// ========================================
function setSenderLang ($sender, $lang) {
if (!$sender)
return;
if (!file_exists (VAR_LANG))
mkdir (VAR_LANG, 0755);
if (empty ($lang) || DEFAULT_LANG == $lang) {
rmSenderLang ($sender);
} else
file_put_contents (VAR_LANG.$sender, $lang.NL);
}
function rmSenderLang ($sender) {
if (!$sender)
return;
if (file_exists (VAR_LANG.$sender))
unlink (VAR_LANG.$sender);
}
function getSenderLang ($sender) {
if ($sender && file_exists (VAR_LANG.$sender))
return trim (file (VAR_LANG.$sender)[0]);
return DEFAULT_LANG;
}
// ======================================== // ========================================
function setSenderPeriod ($sender, $period) { function setSenderPeriod ($sender, $period) {
if (!$sender) if (!$sender)
@ -286,6 +303,29 @@ function period2seconds ($periodName) {
} }
} }
// ========================================
function setSenderLang ($sender, $lang) {
if (!$sender)
return;
if (!file_exists (VAR_LANG))
mkdir (VAR_LANG, 0755);
if (empty ($lang) || DEFAULT_LANG == $lang) {
rmSenderLang ($sender);
} else
file_put_contents (VAR_LANG.$sender, $lang.NL);
}
function rmSenderLang ($sender) {
if (!$sender)
return;
if (file_exists (VAR_LANG.$sender))
unlink (VAR_LANG.$sender);
}
function getSenderLang ($sender) {
if ($sender && file_exists (VAR_LANG.$sender))
return trim (file (VAR_LANG.$sender)[0]);
return DEFAULT_LANG;
}
// ======================================== // ========================================
function setSenderFake ($error, $sender, $owner, $dirLink, $fileLink) { function setSenderFake ($error, $sender, $owner, $dirLink, $fileLink) {
global $doLogout; global $doLogout;
@ -976,6 +1016,10 @@ if ($doLogout || (isset ($_REQUEST [A_ACTION]) && $_REQUEST [A_ACTION] == T_LOGO
if (isset ($_REQUEST [A_ACTION])) { if (isset ($_REQUEST [A_ACTION])) {
// change track // change track
switch (true) { switch (true) {
case preg_match ("/^".A_MODE."(".implode ("|", array_keys ($modeText)).")$/i", $_REQUEST [A_ACTION], $matches):
setSenderMode ($sender, $matches [1]);
$message .= "Votre mode &agrave; &eacute;t&eacute; mise &agrave; jour.";
break;
case preg_match ("/^".A_RECORD."(on|off)$/i", $_REQUEST [A_ACTION], $matches): case preg_match ("/^".A_RECORD."(on|off)$/i", $_REQUEST [A_ACTION], $matches):
if ($matches [1] == "on") if ($matches [1] == "on")
setSenderTrack ($sender); setSenderTrack ($sender);
@ -983,14 +1027,14 @@ if (isset ($_REQUEST [A_ACTION])) {
rmSenderTrack ($sender); rmSenderTrack ($sender);
$message .= "Votre suivi &agrave; &eacute;t&eacute; mise &agrave; jour."; $message .= "Votre suivi &agrave; &eacute;t&eacute; mise &agrave; jour.";
break; break;
case preg_match ("/^".A_LANG."(".implode ("|", array_keys ($langText)).")$/i", $_REQUEST [A_ACTION], $matches):
setSenderLang ($sender, $matches [1]);
$message .= "Votre lang &agrave; &eacute;t&eacute; mise &agrave; jour.";
break;
case preg_match ("/^".A_PERIOD."(".implode ("|", array_keys ($periodText)).")$/i", $_REQUEST [A_ACTION], $matches): case preg_match ("/^".A_PERIOD."(".implode ("|", array_keys ($periodText)).")$/i", $_REQUEST [A_ACTION], $matches):
setSenderPeriod ($sender, $matches [1]); setSenderPeriod ($sender, $matches [1]);
$message .= "Votre p&eacute;riode &agrave; &eacute;t&eacute; mise &agrave; jour."; $message .= "Votre p&eacute;riode &agrave; &eacute;t&eacute; mise &agrave; jour.";
break; break;
case preg_match ("/^".A_LANG."(".implode ("|", array_keys ($langText)).")$/i", $_REQUEST [A_ACTION], $matches):
setSenderLang ($sender, $matches [1]);
$message .= "Votre lang &agrave; &eacute;t&eacute; mise &agrave; jour.";
break;
} }
} }
@ -1085,10 +1129,20 @@ div.frame {border: 1px; border-style: solid; padding: 1em; margin: 1em;}
--></style> --></style>
<?php <?php
$defaultChecked = []; $defaultChecked = [];
$defaultChecked [getSenderMode ($sender)] = ' selected="selected"';
$defaultChecked [isSenderTrack ($sender) ? "on" : "off"] = ' checked="checked"'; $defaultChecked [isSenderTrack ($sender) ? "on" : "off"] = ' checked="checked"';
$defaultChecked [getSenderPeriod ($sender)] = ' selected="selected"'; $defaultChecked [getSenderPeriod ($sender)] = ' selected="selected"';
$defaultChecked [getSenderLang ($sender)] = ' selected="selected"'; $defaultChecked [getSenderLang ($sender)] = ' selected="selected"';
echo echo
'<form method="post">'.
'Je veux que mes futurs envois soient d&eacute;pollu&eacute; en pla&ccedil;ant les liens de t&eacute;l&eacute;chargements '.
'<select name="'.A_ACTION.'" style="width: auto !important;">';
foreach ($modeText as $item => $text)
echo ' <option value="'.A_MODE.$item.'"'.$defaultChecked [$item].'>'.$text.'</option>';
echo
'</select> '.
'<button type="submit">'."valider".'</button>'.
'</form>'.
'<form method="post">'. '<form method="post">'.
'Je veux que Kaz suive tous mes futurs envois: '. 'Je veux que Kaz suive tous mes futurs envois: '.
'<input type="hidden" name="'.A_SENDER.'" value="'.$sender.'"/>'. '<input type="hidden" name="'.A_SENDER.'" value="'.$sender.'"/>'.

16
src/bash/filter.sh Executable file → Normal file
View File

@ -1,4 +1,4 @@
#!/bin/sh #!/bin/bash
########################################################################## ##########################################################################
# Copyright KAZ 2021 # # Copyright KAZ 2021 #
# # # #
@ -41,6 +41,10 @@
# le roriétaire du script # le roriétaire du script
########################################################################## ##########################################################################
DEFAULT_MODE="footer"
DEFAULT_PERIOD="month"
DEFAULT_TRACK=""
cd $(dirname $0) cd $(dirname $0)
DOMAINNAME=$(cat domainname) DOMAINNAME=$(cat domainname)
# Exit codes from <sysexits.h> # Exit codes from <sysexits.h>
@ -107,8 +111,14 @@ fi
MAIL_SOURCE=$(echo $@ | awk 'BEGIN{FS=" "} {print $2}') MAIL_SOURCE=$(echo $@ | awk 'BEGIN{FS=" "} {print $2}')
DATE_TEMPS=$(date "+%Y-%m-%d-%H:%M:%S") DATE_TEMPS=$(date "+%Y-%m-%d-%H:%M:%S")
REP_PIECE_JOINTE="${MAILS}/${DATE_TEMPS}_${MAIL_SOURCE}_$$" REP_PIECE_JOINTE="${MAILS}/${DATE_TEMPS}_${MAIL_SOURCE}_$$"
MODE=$(curl "${JIRAFEAU_LOCAL}/a.php?m=${MAIL_SOURCE}" 2>/dev/null )
[[ "${MODE}" =~ ^(footer|attachment|both)$ ]] || MODE="${DEFAULT_MODE}"
TRACK=$(curl "${JIRAFEAU_LOCAL}/a.php?r=${MAIL_SOURCE}" 2>/dev/null ) TRACK=$(curl "${JIRAFEAU_LOCAL}/a.php?r=${MAIL_SOURCE}" 2>/dev/null )
[[ "${TRACK}" =~ ^(|0|1|false|true|FALSE|TRUE|on|off)$ ]] || TRACK="${DEFAULT_TRACK}"
PERIOD=$(curl "${JIRAFEAU_LOCAL}/a.php?p=${MAIL_SOURCE}" 2>/dev/null ) PERIOD=$(curl "${JIRAFEAU_LOCAL}/a.php?p=${MAIL_SOURCE}" 2>/dev/null )
[[ "${PERIOD}" =~ ^(minute|hour|day|week|month|quarter)$ ]] || PERIOD="${DEFAULT_PERIOD}"
if [ -n "$(echo "${PERIOD}" | grep -e minute -e hour -e day -e week -e month -e quarter 2>/dev/null)" ]; then if [ -n "$(echo "${PERIOD}" | grep -e minute -e hour -e day -e week -e month -e quarter 2>/dev/null)" ]; then
JIRAFEAU_TIME="${PERIOD}" JIRAFEAU_TIME="${PERIOD}"
fi fi
@ -237,8 +247,8 @@ LOG_FIC "${CYAN}${SHRINK_CMD} -s ${MAX_KEEP_IN_MAIL} -d ${REP_PIECE_JOINTE} ${IN
echo "arch: none" echo "arch: none"
fi fi
# Etape de substitution # Etape de substitution
LOG_FIC "${CYAN}${SHRINK_CMD} -s \"${MAX_KEEP_IN_MAIL}\" \"${INSPECT_DIR}/in.$$\" \"${INSPECT_DIR}/in.$$.altered\" 2>> \"${FIC_LOG}\"${NC}" LOG_FIC "${CYAN}${SHRINK_CMD} -m \"${MODE}\" -s \"${MAX_KEEP_IN_MAIL}\" \"${INSPECT_DIR}/in.$$\" \"${INSPECT_DIR}/in.$$.altered\" 2>> \"${FIC_LOG}\"${NC}"
} | "${SHRINK_CMD}" -s "${MAX_KEEP_IN_MAIL}" "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.altered" 2>> "${FIC_LOG}" } | "${SHRINK_CMD}" -m "${MODE}" -s "${MAX_KEEP_IN_MAIL}" "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.altered" 2>> "${FIC_LOG}"
[ -n "${DEBUG}" ] && (mkdir -p "${DIR_LOG}/pb/" ; cp "${INSPECT_DIR}/in.$$.altered" "${DIR_LOG}/pb/in.$$.altered") [ -n "${DEBUG}" ] && (mkdir -p "${DIR_LOG}/pb/" ; cp "${INSPECT_DIR}/in.$$.altered" "${DIR_LOG}/pb/in.$$.altered")

7
src/bash/filterTest.sh Executable file → Normal file
View File

@ -1,6 +1,9 @@
#!/bin/bash #!/bin/bash
PRG=$(basename $0) PRG=$(basename $0)
ATTACH_MODE="-m BOTH"
BOLD='' BOLD=''
RED='' RED=''
GREEN='' GREEN=''
@ -124,7 +127,9 @@ LOG " - ${BLUE}PJ-Keys: ${NC}"
cat tmp/PJ-Keys.txt cat tmp/PJ-Keys.txt
LOG LOG
cat "tmp/PJ-Keys.txt" | build/out/eMailShrinker -s "5ki" "${mbox}" "tmp/new-mbox" 2>> "${TTY}" LOG " - ${GREEN}ATTACH_MODE: ${ATTACH_MODE}${NC}"
cat "tmp/PJ-Keys.txt" | build/out/eMailShrinker ${ATTACH_MODE} -s "5ki" "${mbox}" "tmp/new-mbox" 2>> "${TTY}"
LOG " - ${BLUE}new-mbox:${NC}" LOG " - ${BLUE}new-mbox:${NC}"

View File

@ -61,6 +61,7 @@ const string Attachment::PLAIN ("plain");
const string Attachment::HTML ("html"); const string Attachment::HTML ("html");
const string Attachment::RELATED ("related"); const string Attachment::RELATED ("related");
const string Attachment::ALTERNATIVE ("alternative"); const string Attachment::ALTERNATIVE ("alternative");
const string Attachment::KAZ_ATTACH_NAME (".---KazAttachment---.html");
const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)"); const regex Attachment::nameCharsetRegEx (".*name\\*=(.*)");
@ -273,6 +274,7 @@ Attachment::Attachment (ifstream &mbox, const int &level, const streamoff beginI
toExtract (false), toExtract (false),
toUpdate (false), toUpdate (false),
toDisclaim (false), toDisclaim (false),
isKazAttachment (false),
boundaryMiddleSize (0) { boundaryMiddleSize (0) {
DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level); DEF_LOG ("Attachment::Attachment", "curPos: " << curPos << " level: " << level);
readMime (mbox, curPos); readMime (mbox, curPos);
@ -417,6 +419,9 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min
for (Attachment &sub : subAttachements) for (Attachment &sub : subAttachements)
cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs); cantBeExtract |= sub.markSignificant (multiProp, minAttachSize, mbox, allMarkedPtrs);
if (getProp (contentTypeToken, textRegEx) == HTML) { if (getProp (contentTypeToken, textRegEx) == HTML) {
if (KAZ_ATTACH_NAME == getAttachName ()) {
isKazAttachment = true;
} else {
string content = getContent (mbox); string content = getContent (mbox);
vector<string> imgs; vector<string> imgs;
getSection (content, IMG_BEGIN, IMG_END, imgs); getSection (content, IMG_BEGIN, IMG_END, imgs);
@ -424,10 +429,11 @@ Attachment::markSignificant (const string &parentMultiProp, const streamoff &min
if (embeddedData.size ()) if (embeddedData.size ())
toUpdate = true; toUpdate = true;
} }
}
cantBeExtract |= toUpdate; cantBeExtract |= toUpdate;
if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract) if (boundary.empty () && getSize () >= minAttachSize && !cantBeExtract)
cantBeExtract = toExtract = true; // XXX cantBeExtract ? cantBeExtract = toExtract = true; // XXX cantBeExtract ?
if (toExtract || toUpdate || toDisclaim) if (toExtract || toUpdate || toDisclaim || isKazAttachment)
allMarkedPtrs.push_back (this); allMarkedPtrs.push_back (this);
return cantBeExtract; return cantBeExtract;
} }

View File

@ -42,6 +42,9 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
#include <boost/assign.hpp>
#include <boost/algorithm/string.hpp>
#include "kazDebug.hpp" #include "kazDebug.hpp"
#include "kazMisc.hpp" #include "kazMisc.hpp"
#include "SizeArg.hpp" #include "SizeArg.hpp"
@ -83,6 +86,7 @@ static const string LI_ONE (LI_BEGIN+" "+CLASS_ONE+">");
static const string LI_ALL (LI_BEGIN+" class=\"all\">"); static const string LI_ALL (LI_BEGIN+" class=\"all\">");
static const string LI_END ("</li>"); static const string LI_END ("</li>");
static const string HREF_ONE ("href=\""); static const string HREF_ONE ("href=\"");
static const string KAZ_HTML_CONTENT ("<!DOCTYPE html><html lang=\"fr\"><head><meta charset=\"utf-8\"><title>KAZ</title>"+KAZ_CSS+"</head><body>");
static const string BODY_END ("</body>"); static const string BODY_END ("</body>");
static const string HTML_END ("</html>"); static const string HTML_END ("</html>");
@ -98,6 +102,9 @@ static const string KAZ_HTML_ARCHIVE ("archive");
static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n" static const string KAZ_EMPTY_TEXT_PLAIN ("Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: base64\n"); "Content-Transfer-Encoding: base64\n");
static const string KAZ_ATTACHMENT_TEXT_HTML ("Content-Type: text/html; charset=utf-8\n"
"Content-Disposition: attachment; filename=\"" + Attachment::KAZ_ATTACH_NAME + "\"\n"
"Content-Transfer-Encoding: base64\n");
// ================================================================================ // ================================================================================
@ -121,6 +128,35 @@ const string MainAttachment::templateHtmlFooter ("</ul></p>\n"
const regex MainAttachment::whiteSpaceRegEx ("\\s+"); const regex MainAttachment::whiteSpaceRegEx ("\\s+");
// ================================================================================
const string
kaz::attachModeLabels[] = {
"None", "Footer", "Attachment", "Both"
};
const map<string, AttachMode>
kaz::attachModeMap = boost::assign::map_list_of
("none", NONE)
("footer", FOOTER)
("attachment", ATTACHMENT)
("both", BOTH)
;
ostream &
kaz::operator << (ostream &out, const AttachMode &attachMode) {
//BOOST_ASSERT (treeType >= MIN && treeType <= ALPHA);
return out << attachModeLabels [attachMode];
}
istream &
kaz::operator >> (istream &in, AttachMode &attachMode) {
string token;
in >> token;
auto pos = attachModeMap.find (boost::algorithm::to_lower_copy (token));
if (pos == attachModeMap.end ())
in.setstate (ios_base::failbit);
else
attachMode = pos->second;
return in;
}
// ================================================================================ // ================================================================================
void void
MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) { MainAttachment::copy (ifstream &mbox, ofstream &outbox, const streamoff &begin, const streamoff &end) {
@ -179,7 +215,7 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
plain += plainNewOneLink; plain += plainNewOneLink;
string htmlNewOneLink (templateHtmlAddLink); string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url); string codedUrl (url);
// XXX amp ? // pb &amp;
// replaceAll (codedUrl, "&", "&amp;"); // replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl); replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name); replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
@ -340,7 +376,7 @@ MainAttachment::extractPreviousKAZ (ifstream &mbox) {
DEF_LOG ("MainAttachment::extractPreviousKAZ", ""); DEF_LOG ("MainAttachment::extractPreviousKAZ", "");
string extractedPlainKAZ, extractedHtmlKAZ; string extractedPlainKAZ, extractedHtmlKAZ;
for (const Attachment *attachP : allMarkedPtrs) { for (const Attachment *attachP : allMarkedPtrs) {
if (!attachP->toUpdate || isBase64Encoding ()) if (!(attachP->toUpdate || attachP->isKazAttachment)) // isKazAttachment => toUpdate
continue; continue;
string textProp = attachP->getProp (contentTypeToken, textRegEx); string textProp = attachP->getProp (contentTypeToken, textRegEx);
if (textProp.empty ()) if (textProp.empty ())
@ -388,7 +424,8 @@ MainAttachment::removePreviousArchive () {
// ================================================================================ // ================================================================================
MainAttachment::MainAttachment (ifstream &mbox) MainAttachment::MainAttachment (ifstream &mbox)
: Attachment (mbox, initTmpLevel (), 0, initTmpPos ()), : Attachment (mbox, initTmpLevel (), 0, initTmpPos ()),
forceMainText (false) { emptyEMail (false),
previousKazAttachment (false) {
DEF_LOG ("MainAttachment::MainAttachment", ""); DEF_LOG ("MainAttachment::MainAttachment", "");
string line; string line;
for (; getline (mbox, line); ) for (; getline (mbox, line); )
@ -402,7 +439,7 @@ MainAttachment::markSignificant (const streamoff &minAttachSize, ifstream &mbox)
DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize); DEF_LOG ("MainAttachment::markSignificant", "minAttachSize: " << minAttachSize);
bool plainMarked (false), htmlMarked (false); bool plainMarked (false), htmlMarked (false);
markDisclaim (plainMarked, htmlMarked); markDisclaim (plainMarked, htmlMarked);
forceMainText = ! (plainMarked || htmlMarked); emptyEMail = ! (plainMarked || htmlMarked);
Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs); Attachment::markSignificant ("", minAttachSize, mbox, allMarkedPtrs);
} }
@ -446,7 +483,7 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
int attachCount (0); int attachCount (0);
string dirName, mediaName; string dirName, mediaName;
for (Attachment *attachP : allMarkedPtrs) { for (Attachment *attachP : allMarkedPtrs) {
if (!attachP->toExtract) if (attachP->isKazAttachment || !attachP->toExtract)
continue; continue;
newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName); newPjEntry (attachCount, attachP->getContentType (), attachP->getAttachName (), dirName, mediaName);
++attachCount; ++attachCount;
@ -521,15 +558,15 @@ MainAttachment::extract (ifstream &mbox, const SizeArg &minSize) const {
// ================================================================================ // ================================================================================
void void
MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize) { MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, const AttachMode &attachMode) {
DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize); DEF_LOG ("MainAttachment::substitute", "minSize: " << minSize << " AttachMode: " << attachMode);
// preparation // preparation
extractPreviousKAZ (mbox); extractPreviousKAZ (mbox);
removePreviousArchive (); removePreviousArchive ();
map<const string, const string> translateHtml; map<const string, const string> translateHtml;
for (Attachment *attachP : allMarkedPtrs) for (Attachment *attachP : allMarkedPtrs)
if (attachP->toExtract) { if (attachP->toExtract && !attachP->isKazAttachment) {
readDownloadUrl (attachP->downloadUrl); readDownloadUrl (attachP->downloadUrl);
if (attachP->downloadUrl.empty ()) { if (attachP->downloadUrl.empty ()) {
LOG ("no change"); LOG ("no change");
@ -555,14 +592,14 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
getDisclaim (plainDisclaim, htmlDisclaim); getDisclaim (plainDisclaim, htmlDisclaim);
// copy email // copy email
streamoff curPos = 0; streamoff curPos = 0;
if (forceMainText) { copy (mbox, outbox, curPos, contentPos);
cerr << endl << endl << " #################### coucou " << forceMainText << " " << contentPos << " " << *this << endl; curPos = contentPos;
if (plainDisclaim.size ()) {
if (emptyEMail && (attachMode & FOOTER)) {
// check no main text // check no main text
LOG ("Force main text"); LOG ("Force main text");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this); LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't force add footer M9: : " << *this);
copy (mbox, outbox, curPos, contentPos);
curPos = contentPos;
cerr << " #################### coucou " << curPos << endl << endl;
string content (plainDisclaim); string content (plainDisclaim);
base64Encode (content); base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl outbox << boundary.substr (0, boundary.length () -2) << endl
@ -570,12 +607,13 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
<< content << endl; << content << endl;
outbox.flush (); outbox.flush ();
} }
}
for (Attachment *attachP : allMarkedPtrs) { for (Attachment *attachP : allMarkedPtrs) {
copy (mbox, outbox, curPos, attachP->beginInParent); copy (mbox, outbox, curPos, attachP->beginInParent);
LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos); LOG_BUG (attachP->toUpdate && attachP->toExtract, /**/, "eMailShrinker: bug M5: update and extract. pos: " << attachP->beginPos);
if (attachP->toExtract) { if (attachP->toExtract || attachP->isKazAttachment) {
LOG ("skip Extracted"); LOG ("skip Extracted or previous attachments");
} else if (attachP->toUpdate) { } else if (attachP->toUpdate) {
string textProp = attachP->getProp (contentTypeToken, textRegEx); string textProp = attachP->getProp (contentTypeToken, textRegEx);
@ -623,7 +661,7 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
} }
if (isPlain) if (isPlain)
removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP); removeSection (content, KAZ_PLAIN_START, KAZ_PLAIN_STOP);
if (isDisclaimer) { if (isDisclaimer && (attachMode & FOOTER)) {
if (isHtml) { if (isHtml) {
for (string endTag : {BODY_END, HTML_END}) { for (string endTag : {BODY_END, HTML_END}) {
LOG ("try tag: " << endTag); LOG ("try tag: " << endTag);
@ -646,6 +684,19 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
outbox.flush (); outbox.flush ();
curPos = attachP->endPos; curPos = attachP->endPos;
} }
if (plainDisclaim.size () && (attachMode & ATTACHMENT)) {
LOG ("Add kaz attachment");
LOG_BUG (boundary.empty () || ! subAttachements.size (), /**/, "eMailShrinker: can't add Kaz attachment M10: : " << *this);
streamoff lastPos = subAttachements.back ().endPos;
copy (mbox, outbox, curPos, lastPos);
curPos = lastPos;
string content (KAZ_HTML_CONTENT+htmlDisclaim+BODY_END+HTML_END);
base64Encode (content);
outbox << boundary.substr (0, boundary.length () -2) << endl
<< KAZ_ATTACHMENT_TEXT_HTML << endl
<< content << endl;
outbox.flush ();
}
copy (mbox, outbox, curPos, endPos); copy (mbox, outbox, curPos, endPos);
outbox.close (); outbox.close ();
} }

View File

@ -32,7 +32,7 @@
// knowledge of the CeCILL-B license and that you accept its terms. // // knowledge of the CeCILL-B license and that you accept its terms. //
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
#define LAST_VERSION "2.2 2022-11-20 eMailShrinker" #define LAST_VERSION "2.3 2022-11-25 eMailShrinker"
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@ -66,8 +66,8 @@ usage (const string &msg = "", const bool &hidden = false) {
cout << endl cout << endl
<< "Usage: " << endl << "Usage: " << endl
<< " A) " << prog << " -u mbox > url-list" << endl << " A) " << prog << " -u mbox > url-list" << endl
<< " B) " << prog << " [-s size] [-d dirName}] mbox > file-list" << endl << " B) " << prog << " [-s size] [-d dirName] mbox > file-list" << endl
<< " C) " << prog << " [-s size] mbox altered-mbox < url-list" << endl << " C) " << prog << " [-s size] [-m {Footer|Attachment|Both}] mbox altered-mbox < url-list" << endl
<< endl << " filter attachments" << endl << endl << endl << " filter attachments" << endl << endl
<< " A: list previous embded url need to be refresh (no added option)" << endl << " A: list previous embded url need to be refresh (no added option)" << endl
<< " => downloadURL list" << endl << " => downloadURL list" << endl
@ -108,15 +108,16 @@ main (int argc, char** argv) {
DEF_LOG ("main:", ""); DEF_LOG ("main:", "");
prog = argv [0]; prog = argv [0];
bool bool
debugFlag (false),
helpFlag (false), helpFlag (false),
versionFlag (false), versionFlag (false),
updateListFlag (false), updateListFlag (false),
useTheForceLuke (false), useTheForceLuke (false),
listFlag (false); listFlag (false),
debugFlag (false);
string inputName, outputName; string inputName, outputName;
bfs::path extractDir (bfs::temp_directory_path ()); bfs::path extractDir (bfs::temp_directory_path ());
SizeArg minAttachSize ("48 Ki"); SizeArg minAttachSize ("48 Ki");
AttachMode attachMode (FOOTER);
try { try {
mainDescription.add_options () mainDescription.add_options ()
@ -125,6 +126,7 @@ main (int argc, char** argv) {
("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration") ("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration")
("updateList,u", bool_switch (&updateListFlag), "list URL need refresh") ("updateList,u", bool_switch (&updateListFlag), "list URL need refresh")
("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction") ("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction")
("mode,m", boost::program_options::value<AttachMode> (&attachMode)->default_value (attachMode), "set attachment mode")
; ;
hide.add_options () hide.add_options ()
@ -219,9 +221,13 @@ main (int argc, char** argv) {
} }
// case substitute // case substitute
if (attachMode == NONE) {
cerr << endl << prog << ": attachMode can't be NONE (forced FOOTER mode)" << endl;
attachMode = FOOTER;
}
mbox.open (inputName); mbox.open (inputName);
ofstream outbox (outputName); ofstream outbox (outputName);
attachment.substitute (mbox, outbox, minAttachSize); attachment.substitute (mbox, outbox, minAttachSize, attachMode);
showTime ("Substitution"); showTime ("Substitution");
return 0; return 0;
} }

View File

@ -54,7 +54,7 @@ namespace kaz {
//static const vector<const string> stringsToUpdate; //static const vector<const string> stringsToUpdate;
static vector<string> stringsToUpdate; static vector<string> stringsToUpdate;
/*! mime tokens */ /*! mime tokens */
static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE; static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, RELATED, ALTERNATIVE, KAZ_ATTACH_NAME;
/*! pattern to extract mime values */ /*! pattern to extract mime values */
static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx; static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx;
@ -100,7 +100,7 @@ namespace kaz {
/*! char position of attachment content */ /*! char position of attachment content */
streamoff contentPos, endPos; streamoff contentPos, endPos;
/*! properties of the attachment */ /*! properties of the attachment */
bool toExtract, toUpdate, toDisclaim; bool toExtract, toUpdate, toDisclaim, isKazAttachment;
/*! id of an image embedded in mbox */ /*! id of an image embedded in mbox */
string cid; string cid;
/*! url to replace the attachment */ /*! url to replace the attachment */

View File

@ -44,6 +44,13 @@ namespace kaz {
namespace bfs = boost::filesystem; namespace bfs = boost::filesystem;
// ================================================================================ // ================================================================================
/*! place to add download link (footer, attachment or both) */
enum AttachMode { NONE = 0, FOOTER = 1, ATTACHMENT = 2, BOTH = (FOOTER|ATTACHMENT) };
extern const string attachModeLabels[];
extern const map<string, AttachMode> attachModeMap;
ostream &operator << (ostream &out, const AttachMode &attachMode);
istream &operator >> (istream &in, AttachMode &attachMode);
/*! root level of e-mail structure */ /*! root level of e-mail structure */
class MainAttachment : public Attachment { class MainAttachment : public Attachment {
public: public:
@ -83,8 +90,10 @@ namespace kaz {
bfs::path extractDir; bfs::path extractDir;
/*! URL for download archives */ /*! URL for download archives */
string archiveDownloadURL; string archiveDownloadURL;
/*! no main text in email can be use to add disclaim */ /*! if no main text in email can be used to add disclaim */
bool forceMainText; bool emptyEMail;
/*! if contain previous kaz attachment */
bool previousKazAttachment;
/*! subset in the tree of all attachments to be consider for extraction or modification */ /*! subset in the tree of all attachments to be consider for extraction or modification */
vector<Attachment *> allMarkedPtrs; vector<Attachment *> allMarkedPtrs;
@ -115,7 +124,7 @@ namespace kaz {
/*! extract big attachments in mbox to extractDir and write to stdout le dirname of each extraction */ /*! extract big attachments in mbox to extractDir and write to stdout le dirname of each extraction */
void extract (ifstream &mbox, const SizeArg &minSize) const; void extract (ifstream &mbox, const SizeArg &minSize) const;
/*! substitute big attachments by the url give in stdin */ /*! substitute big attachments by the url give in stdin */
void substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize); void substitute (ifstream &mbox, ofstream &outbox, const SizeArg &minSize, const AttachMode &attachMode);
}; };
// ================================================================================ // ================================================================================

10
src/mainpage.md Normal file
View File

@ -0,0 +1,10 @@
**Dépollueur**
Main Programmes:
* [eMailShrinker](eMailShrinker_8cpp.html)
* [jirafeauAPI](jirafeauAPI_8cpp.html)
Main classes:
* [MainAttachment](classkaz_1_1MainAttachment.html)
* [Attachment](classkaz_1_1Attachment.html)