#!/bin/bash
##########################################################################
# Copyright KAZ 2021							 #
# 									 #
# contact (at) kaz.bzh							 #
# 									 #
# This software is a filter to shrink email by attachment extraction.	 #
# 									 #
# This software is governed by the CeCILL-B license under French law and #
# abiding by  the rules of distribution  of free software. You  can use, #
# modify  and/or  redistribute  the  software under  the  terms  of  the #
# CeCILL-B license as circulated by CEA, CNRS and INRIA at the following #
# URL "http://www.cecill.info".						 #
# 									 #
# As a counterpart to the access to  the source code and rights to copy, #
# modify and  redistribute granted  by the  license, users  are provided #
# only with a limited warranty and  the software's author, the holder of #
# the economic  rights, and the  successive licensors have  only limited #
# liability.								 #
# 									 #
# In this respect, the user's attention is drawn to the risks associated #
# with loading,  using, modifying  and/or developing or  reproducing the #
# software by the user in light of its specific status of free software, #
# that may  mean that  it is  complicated to  manipulate, and  that also #
# therefore means  that it  is reserved  for developers  and experienced #
# professionals having in-depth computer  knowledge. Users are therefore #
# encouraged  to load  and test  the software's  suitability as  regards #
# their  requirements  in  conditions  enabling the  security  of  their #
# systems and/or  data to  be ensured  and, more  generally, to  use and #
# operate it in the same conditions as regards security.		 #
# 									 #
# The fact that  you are presently reading this means  that you have had #
# knowledge of the CeCILL-B license and that you accept its terms.	 #
##########################################################################
#  Kaz addon (see https://git.kaz.bzh/KAZ/depollueur for information)
#  version : 2.25 (2025-05-08)

##########################################################################
# - installer l'utilitaire apg pour génération de mot de passes
# - installer l'utilitaire dos2unix
# - le contenu de INSPECT_DIR doit être accessible en écriture pour le
#   proriétaire du script
# - shrinkEMail doit être accessible en execution pour le proriétaire
# - comme le programme n'a pas de privigèle, il faut que root fasse avant :
#   mkdir -p "${DIR_LOG}/pb/" ; chmod a+rwx "${DIR_LOG}/pb/"
##########################################################################

DEFAULT_MODE="both"
DEFAULT_PERIOD="month"
DEFAULT_TRACK=""

cd "$(dirname $0)"
DOMAINEDEPOT="$(cat config/domainedepot)"
# Exit codes from <sysexits.h>
EX_TEMPFAIL=75
EX_UNAVAILABLE=69
EX_TOO_LARGE=552
INSPECT_DIR=/var/spool/filter
DIR_LOG=/var/log/mail
FIC_LOG="${DIR_LOG}/filter.log"
TMP_LOG="$(mktemp)"
SENDMAIL="/usr/sbin/sendmail -G -i"
MAILS=/tmp/FILTER
MAX_KEEP_IN_MAIL=5ki
MAX_UPLOAD_SIZE=1Gi
SHRINK_CMD=/home/filter/eMailShrinker
JIRAFEAU_URL="https://depot.${DOMAINEDEPOT:-"kaz.bzh"}"
JIRAFEAU_LOCAL=http://depot
MD5_CMD=/usr/bin/md5sum
DISCLAMER_CMD=altermime
MAX_FINAL_SIZE=2097152 # 2Mi
ARCHIVE_TITLE="archive_content"
ARCHIVE_MIME="text/kaz_email_archive"

FILE_SKIP_DOMAINS="config/file_domaines_non_depollues.txt"
#on enlève les commentaires et les lignes vides
SKIP_DOMAINS="$(sed 's:#.*$::g' 's/[[:blank:]]//g' "${FILE_SKIP_DOMAINS}" 2>/dev/null)"

KEEP_FAILED=true
DEBUG=true

####################  FONCTIONS ############################################
BOLD='\e[1m'
RED='\e[0;31m'
GREEN='\e[0;32m'
YELLOW='\e[0;33m'
BLUE='\e[0;34m'
MAGENTA='\e[0;35m'
CYAN='\e[0;36m'
NC='\e[0m' # No Color
NL='
'

#--------------------- Fichier de LOG -------------------
LOG_FIC () {
    echo -e "${BLUE}$(date +%d-%m-%Y-%H-%M-%S)${NC} : $*" >> "${TMP_LOG}"
}

quitFilter () {
    LOG_FIC "${GREEN}######################################## filter stop${NC}"
    cat "${TMP_LOG}" >> "${FIC_LOG}"
    rm -f "${TMP_LOG}"
    exit $1
}

keepFailed () {
    [ -z "${KEEP_FAILED}"  ] && return
    cp "$1" "${DIR_LOG}/pb/"
}

########################################
# curl Jirafeau
curlJirafeauUpdate () {
    # $1: periode
    # $2: jirafeauItemRef
    LOG_FIC " - ${CYAN}curl -X POST -d \"u=$1\" -d \"h=$2\" \"${JIRAFEAU_LOCAL}/a.php}\""
    curl -X POST -d "u=$1" -d "h=$2" "${JIRAFEAU_LOCAL}/a.php"
}

curlJirafeauSend () {
    # $1: periode
    # $2: filename
    # $3: content-type
    # $4: name
    # $5: password

    type="type=$3;"
    [ -z "$3" -o "$3" = "/" ] && type=""
    LOG_FIC " - curl -X POST -F \"time=$1\" -F \"key=$5\" -F \"file=@$2;${type}filename=\\\"$4\\\"\" \"${JIRAFEAU_LOCAL}/s.php\""
    for num in {1..2}; do
	OUTPUT=$(curl -X POST -F "time=$1" -F "key=$5" -F "file=@$2;${type}filename=\"$4\"" "${JIRAFEAU_LOCAL}/s.php")
        read JIR_TOKEN <<< "${OUTPUT}"
	case "${JIR_TOKEN}" in
	    "" | no | *Error* | \<* )
		sleep 30
		continue
		;;
	esac
	break
    done
    echo "${OUTPUT}"
}

# Définir une fonction pour vérifier si le domaine d'un email est dans la liste SKIP_DOMAINS
function check_skip_domains() {
    local SKIP_DOMAINS="$1"
    local LIST_EMAILS="$2"

    for email in ${LIST_EMAILS}; do
        # Extraire le domaine de l'email (partie après le "@")
        local domain="${email##*@}"

	for domain in ${SKIP_DOMAINS}; do
            #  vérification si le domaine est dans la liste des domaines à sauter
	    if [[ " ${email} " =~ "${domain} " ]] ; then
		echo "yes"
		return
            fi
	done
    done

    # Si aucun domaine n'a été trouvé, retourner "no"
    echo "no"
}

# choisir la période max
function check_max_period() {
    local LIST_EMAILS="$1"
    LPERIOD=""

    for email in ${LIST_EMAILS}; do
	LPERIOD="${LPERIOD} $(curl "${JIRAFEAU_LOCAL}/a.php?p=${email}" 2>/dev/null)"
    done

    for p in semester quarter month week day hour minute; do
	if [[ "${LPERIOD}" =~ (^|[[:space:]])$p($|[[:space:]]) ]]; then
	    echo "${p}"
	    return
	fi
    done
    echo "${DEFAULT_PERIOD}"
}

####################  MAIN #################################################
echo -e "${NL}${BLUE}$(date +%d-%m-%Y-%H-%M-%S)${NC} : ${GREEN}######################################## filter start (log in ${TMP_LOG})${NC}" >> "${FIC_LOG}"
LOG_FIC "${GREEN}######################################## ${TMP_LOG} ${NC}"

if ! mkdir -p "${MAILS}"; then
    LOG_FIC "${RED}Can't mkdir ${MAILS} ${NC}"
    quitFilter "${EX_UNAVAILABLE}"
fi

#$@ contient le sender et les destinataires
#format "-f sender -- dest1 [...dest(i)] "
LIST_EMAILS=$(echo $@)
#on nettoie
LIST_EMAILS=$(sed 's/-f//g' <<< ${LIST_EMAILS})
LIST_DST_EMAILS=$(sed 's/.*--//g' <<< ${LIST_EMAILS})
LIST_EMAILS=$(sed 's/--//g' <<< ${LIST_EMAILS})

MAIL_SOURCE=$(echo $@ | awk 'BEGIN{FS=" "} {print $2}')
DATE_TEMPS=$(date "+%Y-%m-%d-%H:%M:%S")
REP_PIECE_JOINTE="${MAILS}/${DATE_TEMPS}_${MAIL_SOURCE}_$$"

MODE=$(curl "${JIRAFEAU_LOCAL}/a.php?m=${MAIL_SOURCE}" 2>/dev/null )
[[ "${MODE}" =~ ^(none|footer|attachment|both)$ ]] || MODE="${DEFAULT_MODE}"
TRACK=$(curl "${JIRAFEAU_LOCAL}/a.php?r=${MAIL_SOURCE}" 2>/dev/null )
[[ "${TRACK}" =~ ^(|0|1|false|true|FALSE|TRUE|on|off)$ ]] || TRACK="${DEFAULT_TRACK}"
PERIOD=$(check_max_period "${LIST_EMAILS}")
[[ "${PERIOD}" =~ ^(minute|hour|day|week|month|quarter|semester)$ ]] || PERIOD="${DEFAULT_PERIOD}"

LOG_FIC "${NL}" \
	"    MAIL_SOURCE    : ${YELLOW}${MAIL_SOURCE}${NC}${NL}" \
	"    DATE_TEMPS     : ${YELLOW}${DATE_TEMPS=}${NC}${NL}" \
	"    MODE           : ${YELLOW}${MODE}${NC}${NL}" \
	"    TRACK          : ${YELLOW}${TRACK}${NC}${NL}" \
	"    PERIOD         : ${YELLOW}${PERIOD}${NC}${NL}"

if ! cd "${INSPECT_DIR}"; then
    echo "${INSPECT_DIR} does not exist"
    quitFilter "${EX_TEMPFAIL}"
fi

# lien renvoyé par le téléverssement
ONE_LINK="${REP_PIECE_JOINTE}/one.txt"
# anciens liens à réactiver
OLD_LINKS="${REP_PIECE_JOINTE}/url-to-refresh.txt"
# contenu de l'archive
ARCHIVE_CONTENT="${REP_PIECE_JOINTE}/archive-content.txt"
# create if error
JIRAFEAU_ERROR="${REP_PIECE_JOINTE}/jirafeau-error.txt"

# Clean up when done or when aborting.
[ -z "${DEBUG}" ] && trap "cd ${INSPECT_DIR}; rm -rf in.$$ in.$$.altered ${REP_PIECE_JOINTE}" 0 1 2 3 15

if ! cat > "${INSPECT_DIR}/in.$$"; then
    LOG_FIC "${RED}Cannot save mail to file${NC}"
    quitFilter "${EX_TEMPFAIL}"
fi
dos2unix "${INSPECT_DIR}/in.$$" 2> /dev/null

LOG_FIC "${NL}" \
	"             size: ${YELLOW}$(wc -c < "${INSPECT_DIR}/in.$$")${NC}"
[ -z "${DEBUG}" ] || (cp "${INSPECT_DIR}/in.$$" "${DIR_LOG}/pb/in.$$.orig")

mkdir -p "${REP_PIECE_JOINTE}/"
>"${OLD_LINKS}"
>"${ARCHIVE_CONTENT}"

if [ "${MODE}" = "none" ]; then
    LOG_FIC " - ${GREEN}send without change (MODE=none)${NC}"
    ${SENDMAIL} "$@" < "${INSPECT_DIR}/in.$$"
    quitFilter 0
fi


if [ "$(check_skip_domains "${SKIP_DOMAINS}" "${LIST_DST_EMAILS}")" = "yes" ]; then
    LOG_FIC " - ${GREEN}send without change (skip domain)${NC}"
    ${SENDMAIL} "$@" < "${INSPECT_DIR}/in.$$"
    quitFilter 0
fi

########################################
# Préparation pour le cloud
rm -f "${REP_PIECE_JOINTE}/last.txt"

# Etape de rafraichissement des anciens fichiers inclus
echo "time: ${DATE_TEMPS}${NL}id: $(date +%s)" > "${ARCHIVE_CONTENT}"
[ -n "${TRACK}" ] && echo "sender: ${MAIL_SOURCE}" >> "${ARCHIVE_CONTENT}"

LOG_FIC "${CYAN}${SHRINK_CMD} -u \"${INSPECT_DIR}/in.$$\" 2>> \"${TMP_LOG}\" > \"${OLD_LINKS}\"${NC}"
"${SHRINK_CMD}" -u "${INSPECT_DIR}/in.$$" 2>> "${TMP_LOG}" > "${OLD_LINKS}"

cat "${OLD_LINKS}" | grep "${JIRAFEAU_URL}" | while read REMOTE_LINK; do
    REMOTE_REF=$(echo "${REMOTE_LINK}" | sed -e 's/.*h=\([^&]*\).*/\1/' -e 's/.*http.*//')
    [ -z "${REMOTE_REF}" ] && continue
    REMOTE_KEY=$(echo "${REMOTE_LINK}" | grep "k=" | sed 's%.*k=\([^&]*\).*%\1%')
    # update periode for download
    curlJirafeauUpdate "${PERIOD}" "${REMOTE_REF}" 2>&1 >> "${TMP_LOG}"
    echo "old: ${REMOTE_REF} ${REMOTE_KEY}" >> "${ARCHIVE_CONTENT}"
    echo "h=${REMOTE_REF}~${REMOTE_KEY}" > "${REP_PIECE_JOINTE}/last.txt"
done
LOG_FIC " - archive starts with: ${NL}${YELLOW}$(cat ${ARCHIVE_CONTENT})${NC}"

# Etape extraction des pieces jointes
LOG_FIC "${CYAN}${SHRINK_CMD} -s \"${MAX_KEEP_IN_MAIL}\" -d \"${REP_PIECE_JOINTE}\" \"${INSPECT_DIR}/in.$$\"${NC}"
"${SHRINK_CMD}" -s "${MAX_KEEP_IN_MAIL}" -d "${REP_PIECE_JOINTE}" "${INSPECT_DIR}/in.$$"  2>> "${TMP_LOG}" | {
    while read ATTACH_TMP_NAME; do
	if [ -d "${ATTACH_TMP_NAME}" ]; then
	    ATTACH_MEDIA="${ATTACH_TMP_NAME}/media"
	    ATTACH_NAME=$(grep "^Name: " "${ATTACH_TMP_NAME}/meta" | cut -c 7- )
	    ATTACH_CONTENT_TYPE=$(grep "^Content-Type: " "${ATTACH_TMP_NAME}/meta" | cut -c 15- )
	else
	    LOG_FIC " - ${RED}no ATTACH_TMP_NAME (${ATTACH_TMP_NAME}) ${NC}"
	    # XXX error
	    continue
	fi

	# Etape de televersement des pieces jointes
	PASSWORD=$(apg -n 1 -m 12 -M cln)
	PASSWORD_MD5=$(echo -n ${PASSWORD} | ${MD5_CMD} | cut -d \  -f 1)
	curlJirafeauSend "${PERIOD}" "${ATTACH_MEDIA}" "${ATTACH_CONTENT_TYPE}" "${ATTACH_NAME}" "${PASSWORD}" 2>> "${TMP_LOG}" > "${ONE_LINK}"
	cat "${ONE_LINK}"  | {
	    read JIR_TOKEN
	    read JIR_CODE
	    LOG_FIC " - return code ${YELLOW}${JIR_TOKEN}${NC} / ${YELLOW}${JIR_CODE}${NC}"
	    case "${JIR_TOKEN}" in
		"" | no | *Error* | \<* )
		    LOG_FIC " - ${RED}Can't upload <${ATTACH_NAME}> <${ATTACH_CONTENT_TYPE}> <$(wc -c < "${ATTACH_MEDIA}")> (${JIR_TOKEN}) <in.$$.bak>. It will be not change in e-mail.${NC}"
		    echo "url:"
		    keepFailed "${INSPECT_DIR}/in.$$"
		    echo "UPLOAD_FAIL" >> "${JIRAFEAU_ERROR}"
		    ;;
		* )
		    LOG_FIC " - change by link ${YELLOW}${JIRAFEAU_URL}/f.php?d=0&h=${JIR_TOKEN}&k=${PASSWORD_MD5}${NC}"
		    echo "url: ${JIRAFEAU_URL}/f.php?d=0&h=${JIR_TOKEN}&k=${PASSWORD_MD5}"
		    echo "new: ${JIR_TOKEN} ${PASSWORD_MD5}" >> "${ARCHIVE_CONTENT}"
		    echo "h=${JIR_TOKEN}~${PASSWORD_MD5}" > "${REP_PIECE_JOINTE}/last.txt"
		    ;;
	    esac
	}
    done

    # Création de l'archive
    NB_ATTACH=$(grep -e "^old: " -e "^new: " "${ARCHIVE_CONTENT}" | wc -l)
    if [ \( -n "${TRACK}" -a "${NB_ATTACH}" -gt 0 \) -o "${NB_ATTACH}" -gt 1 ]; then
	PASSWORD=$(apg -n 1 -m 12 -M cln)
	PASSWORD_MD5=$(echo -n ${PASSWORD} | ${MD5_CMD} | cut -d \  -f 1)
	LOG_FIC " - ${MAGENTA}upload archive${NC}"

	curlJirafeauSend "${PERIOD}" "${ARCHIVE_CONTENT}" "${ARCHIVE_MIME}" "${ARCHIVE_TITLE}" "${PASSWORD}" 2>> "${TMP_LOG}" > "${ONE_LINK}"
    fi
    LOG_FIC " - final archive content: ${NL}${YELLOW}$(cat ${ARCHIVE_CONTENT})${NC}"
    if  [ "${NB_ATTACH}" -gt 1 ]; then
	cat "${ONE_LINK}"  | {
	    read JIR_TOKEN
	    read JIR_CODE
	    LOG_FIC " - return code ${YELLOW}${JIR_TOKEN}${NC} / ${YELLOW}${JIR_CODE}${NC}"
	    case "${JIR_TOKEN}" in
		"" | no | *Error* | \<* )
		    LOG_FIC " - ${RED}can't upload archive (${JIR_TOKEN}) <in.$$.bak>, substitution couldn't be done${NC}"
		    echo "arch: bad"
		    keepFailed "${INSPECT_DIR}/in.$$"
		    echo "ARCHIVE_FAIL" >> "${JIRAFEAU_ERROR}"
		    ;;
		* )
		    LOG_FIC " - add archive ${YELLOW}${JIRAFEAU_URL}/a.php?g=${JIR_TOKEN}~${PASSWORD_MD5}${NC}"
		    echo "arch: ${JIRAFEAU_URL}/a.php?g=${JIR_TOKEN}~${PASSWORD_MD5}"
		    echo "g=${JIR_TOKEN}~${PASSWORD_MD5}" > "${REP_PIECE_JOINTE}/last.txt"
		    ;;
	    esac
	}
    else
	LOG_FIC " - no archive (less than 2 attach file)"
	echo "arch: none"
    fi
    if [ -s "${REP_PIECE_JOINTE}/last.txt" ]; then
	echo "cloud: ${JIRAFEAU_URL}/c.php?$(cat "${REP_PIECE_JOINTE}/last.txt")"
    else
	echo "cloud: none"
    fi

    # Etape de substitution
    LOG_FIC "${CYAN}${SHRINK_CMD} -m \"${MODE}\" -s \"${MAX_KEEP_IN_MAIL}\" \"${INSPECT_DIR}/in.$$\" \"${INSPECT_DIR}/in.$$.altered\" 2>> \"${TMP_LOG}\"${NC}"
} | "${SHRINK_CMD}" -m "${MODE}" -s "${MAX_KEEP_IN_MAIL}" "${INSPECT_DIR}/in.$$" "${INSPECT_DIR}/in.$$.altered" 2>> "${TMP_LOG}"

[ -z "${DEBUG}" ] || (cp "${INSPECT_DIR}/in.$$.altered" "${DIR_LOG}/pb/in.$$.altered")

if [ -s "${JIRAFEAU_ERROR}" ]; then
    LOG_FIC " - ${RED}upload fail${NC}"
    quitFilter "${EX_TEMPFAIL}"
fi

# Etape choix de modification du message d'origine
if [ "$(wc -l < "${ARCHIVE_CONTENT}")" -ge 3 ]; then
    # verification de taille finale
    actualSize=$(wc -c < "${INSPECT_DIR}/in.$$.altered")
    if [ "${actualSize}" -ge "${MAX_FINAL_SIZE}" ]; then
	LOG_FIC " - ${RED}too big even after diet ${INSPECT_DIR}/in.$$.altered (${actualSize})${NC}"
	keepFailed "${INSPECT_DIR}/in.$$"
	quitFilter "${EX_TOO_LARGE}"
    fi
    LOG_FIC " - ${GREEN}send with : ${SENDMAIL} $@ ${INSPECT_DIR}/in.$$.altered ${NC}"
    ${SENDMAIL} "$@" < "${INSPECT_DIR}/in.$$.altered"
else
    # verification de taille finale
    actualSize=$(wc -c < "${INSPECT_DIR}/in.$$")
    if [ "${actualSize}" -ge "${MAX_FINAL_SIZE}" ]; then
	LOG_FIC " - ${RED}too big without diet ${INSPECT_DIR}/in.$$ (${actualSize}) ${NC}"
	keepFailed "${INSPECT_DIR}/in.$$"
	quitFilter "${EX_TOO_LARGE}"
    fi
    LOG_FIC " - ${GREEN}send without attach file${NC}"
    ${SENDMAIL} "$@" < "${INSPECT_DIR}/in.$$"
fi

quitFilter 0

##########################################################################