2021-05-06 09:58:16 +02:00
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Copyright KAZ 2021 //
|
|
|
|
// //
|
|
|
|
// contact (at) kaz.bzh //
|
|
|
|
// //
|
|
|
|
// This software is a filter to shrink email by attachment extraction. //
|
|
|
|
// //
|
|
|
|
// This software is governed by the CeCILL-B license under French law and //
|
|
|
|
// abiding by the rules of distribution of free software. You can use, //
|
|
|
|
// modify and/or redistribute the software under the terms of the //
|
|
|
|
// CeCILL-B license as circulated by CEA, CNRS and INRIA at the following //
|
|
|
|
// URL "http://www.cecill.info". //
|
|
|
|
// //
|
|
|
|
// As a counterpart to the access to the source code and rights to copy, //
|
|
|
|
// modify and redistribute granted by the license, users are provided //
|
|
|
|
// only with a limited warranty and the software's author, the holder of //
|
|
|
|
// the economic rights, and the successive licensors have only limited //
|
|
|
|
// liability. //
|
|
|
|
// //
|
|
|
|
// In this respect, the user's attention is drawn to the risks associated //
|
|
|
|
// with loading, using, modifying and/or developing or reproducing the //
|
|
|
|
// software by the user in light of its specific status of free software, //
|
|
|
|
// that may mean that it is complicated to manipulate, and that also //
|
|
|
|
// therefore means that it is reserved for developers and experienced //
|
|
|
|
// professionals having in-depth computer knowledge. Users are therefore //
|
|
|
|
// encouraged to load and test the software's suitability as regards //
|
|
|
|
// their requirements in conditions enabling the security of their //
|
|
|
|
// systems and/or data to be ensured and, more generally, to use and //
|
|
|
|
// operate it in the same conditions as regards security. //
|
|
|
|
// //
|
|
|
|
// The fact that you are presently reading this means that you have had //
|
|
|
|
// knowledge of the CeCILL-B license and that you accept its terms. //
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
2022-02-10 01:37:43 +01:00
|
|
|
#define LAST_VERSION "2.0 2022-02-08 eMailShrinker"
|
2021-05-06 09:58:16 +02:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
|
|
|
#include <string>
|
|
|
|
#include <chrono>
|
|
|
|
#include <boost/program_options.hpp>
|
|
|
|
#include <boost/filesystem.hpp>
|
|
|
|
|
|
|
|
#include "kazDebug.hpp"
|
|
|
|
#include "kazMisc.hpp"
|
|
|
|
#include "SizeArg.hpp"
|
|
|
|
#include "MainAttachment.hpp"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace boost;
|
|
|
|
using namespace boost::program_options;
|
|
|
|
using namespace kaz;
|
|
|
|
|
|
|
|
// ================================================================================
|
|
|
|
static options_description mainDescription ("Main options", getCols ());
|
|
|
|
static options_description hide ("Hidded options", getCols ());
|
|
|
|
static char *prog = NULL;
|
|
|
|
|
|
|
|
// ================================================================================
|
|
|
|
void
|
|
|
|
usage (const string &msg = "", const bool &hidden = false) {
|
|
|
|
if (!msg.empty ()) {
|
|
|
|
cout << msg << endl;
|
|
|
|
exit (1);
|
|
|
|
}
|
|
|
|
cout << endl
|
|
|
|
<< "Usage: " << endl
|
|
|
|
<< " A) " << prog << " -u mbox > url-list" << endl
|
|
|
|
<< " B) " << prog << " [-s size] [-d dirName}] mbox > file-list" << endl
|
2022-02-10 01:37:43 +01:00
|
|
|
<< " C) " << prog << " [-s size] mbox altered-mbox < url-list" << endl
|
2021-05-06 09:58:16 +02:00
|
|
|
<< endl << " filter attachments" << endl << endl
|
|
|
|
<< " A: list previous embded url need to be refresh (no added option)" << endl
|
|
|
|
<< " => downloadURL list" << endl
|
|
|
|
<< " B: attachment extraction (options : s, d)" << endl
|
|
|
|
<< " => list of (filename)" << endl
|
|
|
|
<< " C: attachment replace with url (options : s) " << endl
|
|
|
|
<< " <= list of (downloadURL [id])" << endl
|
|
|
|
<< endl << mainDescription
|
|
|
|
<< endl;
|
|
|
|
if (hidden)
|
|
|
|
cout << hide << endl;
|
|
|
|
exit (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
version () {
|
|
|
|
cout << LAST_VERSION << " KAZ team production (https://kaz.bzh/)" << endl;
|
|
|
|
exit (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static auto startPrg = std::chrono::high_resolution_clock::now ();
|
|
|
|
void
|
|
|
|
showTime (string msg) {
|
|
|
|
using namespace std::chrono;
|
|
|
|
static auto stopPrg = high_resolution_clock::now ();
|
|
|
|
|
|
|
|
cerr << msg << " done in " << ns2string (duration_cast<duration<double> > (stopPrg-startPrg).count ()) << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ================================================================================
|
|
|
|
static const string inputFile = "input-file";
|
|
|
|
static const char *const inputFileC = inputFile.c_str ();
|
|
|
|
|
|
|
|
int
|
|
|
|
main (int argc, char** argv) {
|
|
|
|
// XXX debug before parse options
|
|
|
|
// Log::debug = true;
|
|
|
|
DEF_LOG ("main:", "");
|
|
|
|
prog = argv [0];
|
|
|
|
bool
|
|
|
|
debugFlag (false),
|
|
|
|
helpFlag (false),
|
|
|
|
versionFlag (false),
|
|
|
|
updateListFlag (false),
|
|
|
|
useTheForceLuke (false),
|
|
|
|
listFlag (false);
|
2022-02-10 01:37:43 +01:00
|
|
|
string inputName, outputName;
|
2021-05-06 09:58:16 +02:00
|
|
|
bfs::path extractDir (bfs::temp_directory_path ());
|
|
|
|
SizeArg minAttachSize ("48 Ki");
|
|
|
|
|
|
|
|
try {
|
|
|
|
mainDescription.add_options ()
|
|
|
|
("help,h", bool_switch (&helpFlag), "produce this help message")
|
|
|
|
("version,v", bool_switch (&versionFlag), "display version information")
|
|
|
|
("size,s", value<SizeArg> (&minAttachSize)->default_value (minAttachSize), "minimum size for extration")
|
|
|
|
("updateList,u", bool_switch (&updateListFlag), "list URL need refresh")
|
|
|
|
("extractDir,d", value<bfs::path> (&extractDir)->default_value (extractDir), "set tmp directory name for extraction")
|
|
|
|
;
|
|
|
|
|
|
|
|
hide.add_options ()
|
|
|
|
("useTheForceLuke", bool_switch (&useTheForceLuke), "display hidded options")
|
|
|
|
("list,l", bool_switch (&listFlag), "get attachment list")
|
|
|
|
("debug,g", bool_switch (&debugFlag), "debug mode")
|
|
|
|
;
|
|
|
|
|
|
|
|
options_description cmd ("All options");
|
|
|
|
cmd.add (mainDescription).add (hide).add_options ()
|
|
|
|
(inputFileC, value<vector<string> > (), "input")
|
|
|
|
;
|
|
|
|
positional_options_description p;
|
|
|
|
p.add (inputFileC, -1);
|
|
|
|
variables_map vm;
|
|
|
|
basic_parsed_options<char> parsed = command_line_parser (argc, argv).options (cmd).positional (p).run ();
|
|
|
|
store (parsed, vm);
|
|
|
|
notify (vm);
|
|
|
|
|
|
|
|
if (debugFlag) {
|
|
|
|
#ifdef DISABLE_LOG
|
|
|
|
cerr << "No debug option available (was compiled with -DDISABLE_LOG)" << endl;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
Log::debug = debugFlag;
|
|
|
|
|
|
|
|
if (useTheForceLuke)
|
|
|
|
usage ("", true);
|
|
|
|
if (versionFlag)
|
|
|
|
version ();
|
|
|
|
if (helpFlag)
|
|
|
|
usage ();
|
|
|
|
|
|
|
|
if (vm.count (inputFileC)) {
|
|
|
|
vector<string> var = vm[inputFileC].as<vector<string> > ();
|
|
|
|
int nbArgs = vm[inputFileC].as<vector<string> > ().size ();
|
|
|
|
if (!nbArgs)
|
|
|
|
usage ("No input file(s)");
|
|
|
|
inputName = var [0];
|
|
|
|
if (nbArgs > 1)
|
|
|
|
outputName = var [1];
|
|
|
|
if (nbArgs > 2)
|
|
|
|
usage ("Too much arguments");
|
|
|
|
}
|
|
|
|
} catch (std::exception &e) {
|
|
|
|
cerr << "error: " << e.what() << endl;
|
|
|
|
usage ();
|
|
|
|
return 1;
|
|
|
|
} catch (...) {
|
|
|
|
cerr << "Exception of unknown type!" << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG ("minAttachSize: " << minAttachSize);
|
|
|
|
|
|
|
|
if (inputName.empty ())
|
|
|
|
usage ("no input file");
|
|
|
|
|
|
|
|
// input mbox file
|
|
|
|
ifstream mbox (inputName);
|
|
|
|
MainAttachment attachment (mbox);
|
|
|
|
mbox.close ();
|
|
|
|
|
|
|
|
if (attachment.getBoundary ().empty ()) {
|
|
|
|
cerr << "no attachment" << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
// parse structure
|
|
|
|
mbox.open (inputName);
|
|
|
|
attachment.markSignificant (minAttachSize, mbox);
|
|
|
|
mbox.close ();
|
|
|
|
|
|
|
|
if (listFlag)
|
|
|
|
// debug
|
|
|
|
cerr << attachment;
|
|
|
|
|
|
|
|
if (updateListFlag) {
|
2022-02-10 01:37:43 +01:00
|
|
|
// case update
|
2021-05-06 09:58:16 +02:00
|
|
|
mbox.open (inputName);
|
|
|
|
attachment.getUpdatedURL (mbox);
|
|
|
|
showTime ("Find old links");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (outputName.empty ()) {
|
2022-02-10 01:37:43 +01:00
|
|
|
// case extract
|
2021-05-06 09:58:16 +02:00
|
|
|
attachment.setExtractDir (extractDir);
|
|
|
|
mbox.open (inputName);
|
|
|
|
attachment.extract (mbox, minAttachSize);
|
|
|
|
showTime ("Extraction");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-02-10 01:37:43 +01:00
|
|
|
// case substitute
|
2021-05-06 09:58:16 +02:00
|
|
|
mbox.open (inputName);
|
|
|
|
ofstream outbox (outputName);
|
|
|
|
attachment.substitute (mbox, outbox, minAttachSize);
|
|
|
|
showTime ("Substitution");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ================================================================================
|