fix synchro plain and html

This commit is contained in:
2022-10-31 07:08:19 +01:00
parent 99e888f5d1
commit b4ea2185f0
7 changed files with 94 additions and 49 deletions

View File

@ -32,14 +32,15 @@
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <vector>
#include <set>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <math.h>
#include <algorithm>
#include <regex>
#include <set>
#include <unistd.h>
#include <vector>
#include "kazDebug.hpp"
#include "kazMisc.hpp"
@ -55,6 +56,10 @@ static const string TMPL_DOWNLOAD = "{{DOWNLOAD}}";
static const string TMPL_FILENAME = "{{FILENAME}}";
static const string CID = "cid:";
// "l=/" => v1 compatibility
static const regex archiveURLSignature (".*(([&?]g=)|([&?]l=/)).*");
static const string KAZ_PLAIN_HR = "______________________________________________________________________________";
static const string KAZ_PLAIN_START = "~~ PJ-KAZ !"; // don't end whith space
static const string KAZ_PLAIN_STOP = KAZ_PLAIN_START+" ~~";
@ -93,8 +98,8 @@ vector <string>
Attachment::stringsToUpdate ({KAZ_PLAIN_START, "\""+CID});
// ================================================================================
const string MainAttachment::templatePlainAddLink (" - "+TMPL_DOWNLOAD+" "+TMPL_FILENAME+"\r\n");
const string MainAttachment::templatePlainAllLink ("\r\n"+KAZ_PLAIN_DOWLOAD_ALL+"\r\n * "+TMPL_DOWNLOAD+"\r\n");
const string MainAttachment::templatePlainAddLink (" * "+TMPL_FILENAME+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templatePlainAllLink ("\r\n * "+KAZ_PLAIN_DOWLOAD_ALL+" <"+TMPL_DOWNLOAD+">\r\n");
const string MainAttachment::templateHtmlHeader (KAZ_HTML_START+"<p style=\"clear: left; padding: 1pc 0 0 0; font-size:10px; color:#969696;\">"+KAZ_PLAIN_START+"</p><hr>\n"
"<div class=\"kaz\">"
@ -167,6 +172,7 @@ MainAttachment::addLink (string &plain, string &html, const string &url, const s
plain += plainNewOneLink;
string htmlNewOneLink (templateHtmlAddLink);
string codedUrl (url);
// XXX amp ?
replaceAll (codedUrl, "&", "&amp;");
replaceAll (htmlNewOneLink, TMPL_DOWNLOAD, codedUrl);
replaceAll (htmlNewOneLink, TMPL_FILENAME, name);
@ -218,9 +224,9 @@ MainAttachment::getDisclaim (string &plain, string &html) const {
string allPlainLinks (templatePlainAllLink);
replaceAll (allPlainLinks, TMPL_DOWNLOAD, archiveDownloadURL);
plain += allPlainLinks;
string allLinks (templateHtmlAllLink);
replaceAll (allLinks, TMPL_DOWNLOAD, archiveDownloadURL);
html += allLinks;
string allHtmlLinks (templateHtmlAllLink);
replaceAll (allHtmlLinks, TMPL_DOWNLOAD, archiveDownloadURL);
html += allHtmlLinks;
}
html += templateHtmlFooter;
plain += "\r\n\r\n"+KAZ_WEB_SITE+"\r\n"+KAZ_PLAIN_HR+"\r\n"+KAZ_PLAIN_STOP+"\r\n";
@ -245,6 +251,7 @@ MainAttachment::addPrevious (const string &href, const string &name, const bool
void
MainAttachment::extractLinks (const string &extractedPlainKAZ) {
// plain text => "* name <url>"
DEF_LOG ("Attachment::extractedPlainKAZ", "extractedPlainKAZ: " << extractedPlainKAZ);
for (string::size_type startPos (0);
(startPos = extractedPlainKAZ.find ("http", startPos)) != string::npos;
@ -257,22 +264,30 @@ MainAttachment::extractLinks (const string &extractedPlainKAZ) {
if (extractedPlainKAZ [stopPos])
++stopPos;
streamoff stopName = startPos;
startPos = stopPos;
// get all href but KAZ_WEB_SITE
// the archive link will be skip by filter.sh
if (href == KAZ_WEB_SITE)
continue;
string::size_type nextPos = extractedPlainKAZ.find ("http", startPos);
string name (extractedPlainKAZ, startPos, (nextPos == string::npos ? extractedPlainKAZ.length () : nextPos) - startPos);
// skip [> \r\n\t]
nextPos = name.find_first_not_of ("[> \t\r\n\"]");
if (nextPos != string::npos)
name.erase (0, nextPos);
// stop before [>\r\n\t]
nextPos = name.find_first_of ("[\"]");
if (nextPos != string::npos)
name.erase (nextPos);
// backward find name
string::size_type startName = extractedPlainKAZ.rfind ("* ", stopName);
string name;
if (startName != string::npos) {
name = string (extractedPlainKAZ, startName+3, stopName - startName - 3);
// skip [> \r\n\t]
string::size_type nextPos = name.find_first_not_of ("[>< \t\r\n\"]");
if (nextPos != string::npos)
name.erase (0, nextPos);
// stop before [>\r\n\t]
nextPos = name.find_first_of ("[\"<]");
if (nextPos != string::npos)
name.erase (nextPos);
}
LOG ("plain name: " << name);
addPrevious (href, name);
}
@ -281,6 +296,7 @@ MainAttachment::extractLinks (const string &extractedPlainKAZ) {
// ================================================================================
void
MainAttachment::extractLinks (const vector<string> &liOne) {
// html text => "<li ...><a href="url">name</a>"
DEF_LOG ("Attachment::extractedPlainKAZ", "liOne.size: " << liOne.size ());
for (const string &one : liOne) {
if (caseInsensitiveFind (one, CLASS_ONE) == string::npos)
@ -353,10 +369,11 @@ MainAttachment::extractPreviousKAZ (ifstream &mbox) {
void
MainAttachment::removePreviousArchive () {
vector<string> toRemove;
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it)
if (it->first.find ("&g=") != string::npos ||
it->first.find ("&l=/") != string::npos) // v1 compatibility
toRemove.push_back (it->first);
for (map <string, string>::const_iterator it = previousLinks.begin (); it != previousLinks.end (); ++it) {
const string key (it->first);
if (regex_match (key, archiveURLSignature))
toRemove.push_back (key);
}
for (string old : toRemove)
previousLinks.erase (old);
}
@ -521,8 +538,10 @@ MainAttachment::substitute (ifstream &mbox, ofstream &outbox, const SizeArg &min
continue;
for (EmbeddedData &embedded : attachP->embeddedData)
readDownloadUrl (embedded.downloadUrl);
// XXX test empty ?
}
readArchiveUrl ();
removePreviousArchive ();
string plainDisclaim, htmlDisclaim;
getDisclaim (plainDisclaim, htmlDisclaim);
// copy email

View File

@ -32,7 +32,7 @@
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#define LAST_VERSION "2.0 2022-02-08 eMailShrinker"
#define LAST_VERSION "2.1 2022-10-30 eMailShrinker"
#include <iostream>
#include <fstream>
@ -188,10 +188,10 @@ main (int argc, char** argv) {
MainAttachment attachment (mbox);
mbox.close ();
if (attachment.getBoundary ().empty ()) {
cerr << "no attachment" << endl;
return 1;
}
// if (attachment.getBoundary ().empty ()) {
// cerr << "no attachment" << endl;
// return 1;
// }
// parse structure
mbox.open (inputName);
attachment.markSignificant (minAttachSize, mbox);

View File

@ -32,7 +32,7 @@
// knowledge of the CeCILL-B license and that you accept its terms. //
////////////////////////////////////////////////////////////////////////////
#define LAST_VERSION "1.0 2021-02-21 jirafeauAPI"
#define LAST_VERSION "1.1 2022-10-30 jirafeauAPI"
#include <iostream>
#include <string>
@ -281,12 +281,13 @@ main (int argc, char** argv) {
}
curl_easy_setopt (easyhandle, CURLOPT_MIMEPOST, multipart);
curl_easy_perform (easyhandle);
CURLcode res (curl_easy_perform (easyhandle));
curl_easy_cleanup (easyhandle);
cout << readBuffer << endl;
showTime ("Upload");
if (res != CURLE_OK)
cerr << prog << " failed: " << curl_easy_strerror (res) << endl;
return 0;
}