diff --git a/README.md b/README.md index 05501f0..e065515 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,13 @@ # Dépollueur de courriel +## Compilation + +```bash +sudo apt-get install --fix-missing build-essential make g++ libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libcurl4-gnutls-dev + +git clone https://git.kaz.bzh/KAZ/depollueur.git +# or for contributors : +# git clone git+ssh://git@git.kaz.bzh:2202/KAZ/depollueur.git + +make -j $(nproc) +``` diff --git a/src/Jirafeau/f.php b/src/Jirafeau/f.php new file mode 100644 index 0000000..26c8ae1 --- /dev/null +++ b/src/Jirafeau/f.php @@ -0,0 +1,304 @@ + + * Copyright (C) 2015 Jerome Jutteau + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +define('JIRAFEAU_ROOT', dirname(__FILE__) . '/'); + +require(JIRAFEAU_ROOT . 'lib/settings.php'); +require(JIRAFEAU_ROOT . 'lib/functions.php'); +require(JIRAFEAU_ROOT . 'lib/lang.php'); + +if (!isset($_GET['h']) || empty($_GET['h'])) { + header('Location: ./'); + exit; +} + +/* Operations may take a long time. + * Be sure PHP's safe mode is off. + */ +@set_time_limit(0); +/* Remove errors. */ +@error_reporting(0); + +$link_name = $_GET['h']; +if (!preg_match('/[0-9a-zA-Z_-]+$/', $link_name)) { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

' . t('FILE_404') . '

'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; +} + +$link = jirafeau_get_link($link_name); +if (count($link) == 0) { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

' . t('FILE_404') . + '

'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; +} + +$delete_code = ''; +if (isset($_GET['d']) && !empty($_GET['d']) && $_GET['d'] != '1') { + $delete_code = $_GET['d']; +} + +$update_period = ''; +if (isset($_GET['u']) && !empty($_GET['u'])) { + $update_period = $_GET['u']; +} + +$crypt_key = ''; +if (isset($_GET['k']) && !empty($_GET['k'])) { + $crypt_key = $_GET['k']; +} + +$do_download = false; +if (isset($_GET['d']) && $_GET['d'] == '1') { + $do_download = true; +} + +$do_preview = false; +if (isset($_GET['p']) && !empty($_GET['p'])) { + $do_preview = true; +} + +// XXX KAZ +$do_update = false; +if (isset($_GET['u']) && !empty($_GET['u'])) { + $do_update = true; +} + +$p = s2p($link['md5']); +if (!file_exists(VAR_FILES . $p . $link['md5'])) { + jirafeau_delete_link($link_name); + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

'.t('FILE_NOT_AVAIL'). + '

'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; +} + +if (!empty($delete_code) && $delete_code == $link['link_code']) { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + if (isset($_POST['do_delete'])) { + jirafeau_delete_link($link_name); + echo '

'.t('FILE_DELETED'). + '

'; + } else { ?> +
+
$link['time']) { + jirafeau_delete_link($link_name); + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

'. + t('FILE_EXPIRED') . ' ' . + t('FILE_DELETED') . + '

'; + require(JIRAFEAU_ROOT . 'lib/template/footer.php'); + exit; +} + +if (empty($crypt_key) && $link['crypted']) { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

' . t('FILE_404') . + '

'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; +} + +$password_challenged = false; +if (!empty($link['key'])) { + if (!isset($_POST['key'])) { + if (!empty($crypt_key) && $link['key'] == $crypt_key) + $password_challenged = true; + else { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '
' . + '
'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; + } + } else { + if ($link['key'] == md5($_POST['key'])) { + $password_challenged = true; + } else { + sleep(2); + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '

' . t('ACCESS_KO') . + '

'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; + } + } +} + +if (!$password_challenged && !$do_download && !$do_preview) { + require(JIRAFEAU_ROOT.'lib/template/header.php'); + echo '
' . + '
'; ?> + ' . jirafeau_escape($link['file_name']) . '' . + '' . + ''; + + if ($link['onetime'] == 'O') { + echo ''; + } ?> + '; + echo '
' . + t('NOW_DOWNLOADING') . ' "' . jirafeau_escape($link['file_name']) . '" (' . jirafeau_human_size($link['file_size']) . ').' . + '
' . + t('USING_SERVICE'). ' ' . t('TOS') . '.' . + '
' . + t('AUTO_DESTRUCT') . + '
'; + require(JIRAFEAU_ROOT.'lib/template/footer.php'); + exit; +} + +header('HTTP/1.0 200 OK'); +header('Content-Length: ' . $link['file_size']); +if (!jirafeau_is_viewable($link['mime_type']) || !$cfg['preview'] || $do_download) { + header('Content-Disposition: attachment; filename="' . $link['file_name'] . '"'); +} else { + header('Content-Disposition: filename="' . $link['file_name'] . '"'); +} +header('Content-Type: ' . $link['mime_type']); +header('Content-MD5: ' . hex_to_base64($link['md5'])); + +/* Read encrypted file. */ +if ($link['crypted']) { + /* Init module */ + $m = mcrypt_module_open('rijndael-256', '', 'ofb', ''); + /* Extract key and iv. */ + $md5_key = md5($crypt_key); + $iv = jirafeau_crypt_create_iv($md5_key, mcrypt_enc_get_iv_size($m)); + /* Init module. */ + mcrypt_generic_init($m, $md5_key, $iv); + /* Decrypt file. */ + $r = fopen(VAR_FILES . $p . $link['md5'], 'r'); + while (!feof($r)) { + $dec = mdecrypt_generic($m, fread($r, 1024)); + print $dec; + ob_flush(); + } + fclose($r); + /* Cleanup. */ + mcrypt_generic_deinit($m); + mcrypt_module_close($m); +} +/* Read file. */ +else { + $r = fopen(VAR_FILES . $p . $link['md5'], 'r'); + while (!feof($r)) { + print fread($r, 1024); + ob_flush(); + } + fclose($r); +} + +if ($link['onetime'] == 'O') { + jirafeau_delete_link($link_name); +} +exit; + +?> diff --git a/src/Jirafeau/t.php b/src/Jirafeau/t.php new file mode 100644 index 0000000..e8b2355 --- /dev/null +++ b/src/Jirafeau/t.php @@ -0,0 +1,111 @@ +open ($tmpFileName.".zip", ZipArchive::CREATE) !== TRUE) + die ("can't create tmp"); + +if ($notFoundCount) { + $zip->addFromString ($dirname."-Avertissement.txt", $notFoundCount. ($notFoundCount ? " fichier est expiré." : " fichiers sont expirés.")); +} +foreach ($map as $link_name => $crypt_key) { + $link = jirafeau_get_link ($link_name); + $p = s2p ($link ['md5']); + + // send + if ($link['crypted']) { + $m = mcrypt_module_open ('rijndael-256', '', 'ofb', ''); + $md5_key = md5 ($crypt_key); + $iv = jirafeau_crypt_create_iv ($md5_key, mcrypt_enc_get_iv_size ($m)); + mcrypt_generic_init ($m, $md5_key, $iv); + $r = fopen (VAR_FILES . $p . $link['md5'], 'r'); + $content = ""; + while (!feof ($r)) { + $dec = mdecrypt_generic ($m, fread ($r, 1024)); + $content .= $dec; + ob_flush (); + } + fclose ($r); + $zip->addFromString ($dirname."/".$link['file_name'], $content); + + mcrypt_generic_deinit ($m); + mcrypt_module_close ($m); + continue; + } + $zip->addFile (VAR_FILES . $p . $link['md5'], $dirname."/".$link['file_name']); +} +$zip->close (); + + +if (!is_file ($tmpFileName.".zip")) + die ("can't retreive tmp"); + +header ("HTTP/1.0 200 OK"); +header ("Content-Type: application/zip"); +header ('Content-Disposition: filename="'.$dirname.".zip\"'"); +$r = fopen($tmpFileName.".zip", 'r'); +while (!feof ($r)) { + print fread ($r, 1024); + ob_flush (); +} +fclose ($r); + +unlink ($tmpFileName.".zip"); +unlink ($tmpFileName); diff --git a/src/bash/filter.sh b/src/bash/filter.sh new file mode 100644 index 0000000..9a860b1 --- /dev/null +++ b/src/bash/filter.sh @@ -0,0 +1,188 @@ +#!/bin/sh +########################################################################## +# Copyright KAZ 2021 # +# # +# contact (at) kaz.bzh # +# # +# This software is a filter to shrink email by attachment extraction. # +# # +# This software is governed by the CeCILL-B license under French law and # +# abiding by the rules of distribution of free software. You can use, # +# modify and/or redistribute the software under the terms of the # +# CeCILL-B license as circulated by CEA, CNRS and INRIA at the following # +# URL "http://www.cecill.info". # +# # +# As a counterpart to the access to the source code and rights to copy, # +# modify and redistribute granted by the license, users are provided # +# only with a limited warranty and the software's author, the holder of # +# the economic rights, and the successive licensors have only limited # +# liability. # +# # +# In this respect, the user's attention is drawn to the risks associated # +# with loading, using, modifying and/or developing or reproducing the # +# software by the user in light of its specific status of free software, # +# that may mean that it is complicated to manipulate, and that also # +# therefore means that it is reserved for developers and experienced # +# professionals having in-depth computer knowledge. Users are therefore # +# encouraged to load and test the software's suitability as regards # +# their requirements in conditions enabling the security of their # +# systems and/or data to be ensured and, more generally, to use and # +# operate it in the same conditions as regards security. # +# # +# The fact that you are presently reading this means that you have had # +# knowledge of the CeCILL-B license and that you accept its terms. # +########################################################################## + +########################################################################## +# - installer l' utilitaire apg pour génération de mot de passes +# - le contenu de INSPECT_DIR doit être accessible en écriture pour le +# proriétaire du script +# - shrinkEMail et jirafeau.sh doivent être accessible en execution pour +# le roriétaire du script +########################################################################## + +cd $(dirname $0) + +# Exit coINSPECT_DIRdes from +EX_TEMPFAIL=75 +EX_UNAVAILABLE=69 +EX_TOO_LARGE=552 +INSPECT_DIR=/var/spool/filter +FIC_LOG=/var/log/mail/filter.log +SENDMAIL="/usr/sbin/sendmail -G -i" +MAILS=/tmp/FILTER +MAX_KEEP_IN_MAIL=24ki +MAX_UPLOAD_SIZE=100Mi +SHRINK_CMD=/home/filter/eMailShrinker +JIRAFEAU_CMD=/home/filter/jirafeauAPI +JIRAFEAU_OLDURL="https://\(file\|depot\)\.kaz\.bzh" +JIRAFEAU_URL=https://depot.kaz.bzh +JIRAFEAU_TIME=month +MD5_CMD=/usr/bin/md5sum +DISCLAMER_CMD=altermime +MAX_FINAL_SIZE=204800 # 200ki + +#################### FONCTIONS ############################################ +#--------------------- Fichier de LOG ------------------- +LOG_FIC() { + echo "$(date +%d-%m-%Y-%H-%M-%S) : $*" >> "${FIC_LOG}" +} + +#################### MAIN ################################################# +LOG_FIC "------------------------------------------------------" +LOG_FIC "--------------- debut de programme -------------------" + +mkdir -p "${MAILS}" || { LOG_FIC "impossible de creer ce dossier"; exit "${EX_UNAVAILABLE}"; } +MAIL_SOURCE=$(echo $@ | awk 'BEGIN{FS=" "} {print $2}') +DATE_TEMPS=$(date "+%Y-%m-%d-%H:%M:%S") +REP_PIECE_JOINTE=$(echo "${MAILS}/${DATE_TEMPS}_${MAIL_SOURCE}_$$") +cd "${INSPECT_DIR}" || { echo "${INSPECT_DIR} does not exist"; exit "${EX_TEMPFAIL}"; } + +ONE_LINK="${REP_PIECE_JOINTE}/one.txt" +ALL_LINKS="${REP_PIECE_JOINTE}/url-list.txt" +OLD_LINKS="${REP_PIECE_JOINTE}/url-to-refresh.txt" +PREV_CODES="${REP_PIECE_JOINTE}/prev-codes.txt" + +# Clean up when done or when aborting. +trap "rm -rf in.$$ in.$$.altered ${REP_PIECE_JOINTE}" 0 1 2 3 15 + +cat > "in.$$" || { LOG_FIC "Cannot save mail to file"; exit "${EX_TEMPFAIL}"; } +# XXX trace +# cp "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.bak" + +mkdir -p "${REP_PIECE_JOINTE}/" +>"${ALL_LINKS}" +>"${OLD_LINKS}" +>"${PREV_CODES}" + +# Etape de rafraichissement des anciens fichiers inclus +OLD_CODES="" +LOG_FIC "${SHRINK_CMD} -u \"${INSPECT_DIR}/in.$$\" 2>> \"${FIC_LOG}\" > \"${OLD_LINKS}\"" +"${SHRINK_CMD}" -u "${INSPECT_DIR}/in.$$" 2>> "${FIC_LOG}" > "${OLD_LINKS}" + +cat "${OLD_LINKS}" | grep "${JIRAFEAU_OLDURL}" | while read REMOTE_LINK +do + REMOTE_REF=$(echo "${REMOTE_LINK}" | sed -e 's/.*h=\([^&]*\).*/\1/' -e 's/.*http.*//') + [ -z "${REMOTE_REF}" ] && continue + REMOTE_KEY=$(echo "${REMOTE_LINK}" | grep "k=" | sed 's%.*k=\([^&]*\).*%\1%') + # update periode for download + LOG_FIC " - \"${JIRAFEAU_CMD}\" -f \"${JIRAFEAU_URL}\" -t \"${JIRAFEAU_TIME}\" update \"${REMOTE_REF}\" 2>&1 >> \"${FIC_LOG}\"" + "${JIRAFEAU_CMD}" -f "${JIRAFEAU_URL}" -t "${JIRAFEAU_TIME}" update "${REMOTE_REF}" 2>&1 >> "${FIC_LOG}" + echo -n "/${REMOTE_REF}~${REMOTE_KEY}" >> "${PREV_CODES}" +done +OLD_CODES=$(cat "${PREV_CODES}") +LOG_FIC " - OLD_CODES=${OLD_CODES}" + +# Etape extraction des pieces jointes +LOG_FIC "${SHRINK_CMD} -s ${MAX_KEEP_IN_MAIL} -d ${REP_PIECE_JOINTE} ${INSPECT_DIR}/in.$$" +"${SHRINK_CMD}" -s "${MAX_KEEP_IN_MAIL}" -d "${REP_PIECE_JOINTE}" "${INSPECT_DIR}/in.$$" 2>> "${FIC_LOG}" | { + while read ATTACH_TMP_NAME + do + if [ -d "${ATTACH_TMP_NAME}" ] + then + ATTACH_MEDIA="${ATTACH_TMP_NAME}/media" + ATTACH_NAME=$(grep "^Name: " "${ATTACH_TMP_NAME}/meta" | cut -c 7- ) + ATTACH_CONTENT_TYPE=$(grep "^Content-Type: " "${ATTACH_TMP_NAME}/meta" | cut -c 15- ) + else + # XXX a virer + ATTACH_MEDIA="${ATTACH_TMP_NAME}" + ATTACH_NAME=$(basename "${ATTACH_MEDIA}") + ATTACH_CONTENT_TYPE="" + fi + # Etape de televersement des pieces jointes + PASSWORD=$(apg -n 1 -m 12) + PASSWORD_MD5=$(echo -n ${PASSWORD} | ${MD5_CMD} | cut -d \ -f 1) + actualSize=$(ls -l "${ATTACH_MEDIA}") + LOG_FIC " - \"${JIRAFEAU_CMD}\" -f \"${JIRAFEAU_URL}\" -s \"${MAX_UPLOAD_SIZE}\" -c \"${ATTACH_CONTENT_TYPE}\" -n \"${ATTACH_NAME}\" send \"${ATTACH_MEDIA}\" \"${PASSWORD}\" 2>> \"${FIC_LOG}\" > \"${ONE_LINK}\"" + "${JIRAFEAU_CMD}" -f "${JIRAFEAU_URL}" -s "${MAX_UPLOAD_SIZE}" -c "${ATTACH_CONTENT_TYPE}" -n "${ATTACH_NAME}" send "${ATTACH_MEDIA}" "${PASSWORD}" 2>> "${FIC_LOG}" > "${ONE_LINK}" + cat "${ONE_LINK}" | { + read JIR_TOKEN + read JIR_CODE + LOG_FIC " - Jirafeau envoie ${JIR_TOKEN} et ${JIR_CODE}" + case "${JIR_TOKEN}" in + "" | no | *Error* | \<* ) + LOG_FIC " - impossible de televerser ${ATTACH_TMP_FILE} (${JIR_TOKEN}), il ne sera pas remplace dans le message" + echo "" + ;; + * ) + LOG_FIC " - substitution par ${JIRAFEAU_URL}/f.php?d=1&h=${JIR_TOKEN}&k=${PASSWORD_MD5}" + echo "${JIRAFEAU_URL}/f.php?d=1&h=${JIR_TOKEN}&k=${PASSWORD_MD5} /${JIR_TOKEN}~${PASSWORD_MD5}" + echo "${JIRAFEAU_URL}/f.php?d=1&h=${JIR_TOKEN}&k=${PASSWORD_MD5}" >> "${ALL_LINKS}" + ;; + esac + } + LOG_FIC " - supprimer l'extraction ${ATTACH_TMP_FILE}" + rm -f "${ATTACH_TMP_FILE}" + done + # Etape de substitution + LOG_FIC "${SHRINK_CMD} -a \"${JIRAFEAU_URL}/t.php?n=${MAIL_SOURCE}_${DATE_TEMPS}&l=${OLD_CODES}\" -s \"${MAX_KEEP_IN_MAIL}\" \"${INSPECT_DIR}/in.$$\" \"${INSPECT_DIR}/in.$$.altered\" 2>> \"${FIC_LOG}\"" +} | "${SHRINK_CMD}" -a "${JIRAFEAU_URL}/t.php?n=${MAIL_SOURCE}_${DATE_TEMPS}&l=${OLD_CODES}" -s "${MAX_KEEP_IN_MAIL}" "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.altered" 2>> "${FIC_LOG}" + +# XXX trace +# cp "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.altered" /var/mail/tmp/ + +# Etape choix de modification du message d'origine +if [ -s "${ALL_LINKS}" -o -s "${OLD_LINKS}" ] +then + # verification de taille finale + actualSize=$(wc -c < "${INSPECT_DIR}/in.$$.altered") + if [ ${actualSize} -ge $MAX_FINAL_SIZE ]; then + LOG_FIC " - message trop gros apres regime ${INSPECT_DIR}/in.$$.altered (${actualSize})" + exit "${EX_TOO_LARGE}"; + fi + LOG_FIC " - envoi de la commande : ${SENDMAIL} $@ ${INSPECT_DIR}/in.$$.altered" + ${SENDMAIL} "$@" < "${INSPECT_DIR}/in.$$.altered" +else + # verification de taille finale + actualSize=$(wc -c < "${INSPECT_DIR}/in.$$") + if [ ${actualSize} -ge $MAX_FINAL_SIZE ]; then + LOG_FIC " - message trop gros sans regime ${INSPECT_DIR}/in.$$ (${actualSize})" + exit "${EX_TOO_LARGE}"; + fi + LOG_FIC " - pas de piece jointe" + ${SENDMAIL} "$@" < "in.$$" +fi + +exit 0 + +########################################################################## diff --git a/src/cpp/MainAttachment.cpp b/src/cpp/MainAttachment.cpp index 5117e0f..e826d2e 100644 --- a/src/cpp/MainAttachment.cpp +++ b/src/cpp/MainAttachment.cpp @@ -231,11 +231,13 @@ MainAttachment::getDisclaim (string &plain, string &html) const { // ================================================================================ void -MainAttachment::addPrevious (const string &href, const string &name) { +MainAttachment::addPrevious (const string &href, const string &name, const bool &trust) { DEF_LOG ("Attachment::addPrevious", "href: " << href << " name: " << name); const string oldVal = previousLinks [href]; if (name.empty ()) return; + if (oldVal.length () && name.length () && !trust) + return; previousLinks.erase (href); previousLinks [href] = name; LOG ("inserted: " << href << ": " << previousLinks[href]); @@ -252,7 +254,7 @@ MainAttachment::extractLinks (const string &extractedPlainKAZ) { ++stopPos; const string href (extractedPlainKAZ.substr (startPos, stopPos-startPos)); LOG ("plain href: " << href); - + if (extractedPlainKAZ [stopPos] && extractedPlainKAZ [stopPos] != '\n') ++stopPos; startPos = stopPos; diff --git a/src/cpp/eMailShrinker.cpp b/src/cpp/eMailShrinker.cpp index d7f0f62..a5e6513 100644 --- a/src/cpp/eMailShrinker.cpp +++ b/src/cpp/eMailShrinker.cpp @@ -32,7 +32,7 @@ // knowledge of the CeCILL-B license and that you accept its terms. // //////////////////////////////////////////////////////////////////////////// -#define LAST_VERSION "eMailShrinker 1.3 2021-04-04" +#define LAST_VERSION "eMailShrinker 1.4 2021-05-07" #include #include diff --git a/src/include/MainAttachment.hpp b/src/include/MainAttachment.hpp index 5b2e9c4..8b4d4fd 100644 --- a/src/include/MainAttachment.hpp +++ b/src/include/MainAttachment.hpp @@ -87,8 +87,8 @@ namespace kaz { vector allMarkedPtrs; /*! previous links find in mbox */ map previousLinks; - /*! add link only if no significant value already exist. */ - void addPrevious (const string &href, const string &name); + /*! add link only if no significant value already exist. Trust the values from html.*/ + void addPrevious (const string &href, const string &name, const bool &trust = false); /*! extract previous links from plain text. Used by extractPreviousKAZ */ void extractLinks (const string &extractedPlainKAZ);