From 430317c1b905852ceb4818b495ea1c7afd89eb47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Sun, 11 Jan 2026 18:03:17 +0100 Subject: [PATCH] managed rfc2047 and rfc2184 for attache name manage name= / name*= / name*[0-9]= / name*[0-9]*= --- src/cpp/Attachment.cpp | 70 +++++++++++++++----------------------- src/cpp/kazMisc.cpp | 20 ++++++++--- src/include/Attachment.hpp | 3 +- src/include/kazMisc.hpp | 6 ++-- 4 files changed, 49 insertions(+), 50 deletions(-) diff --git a/src/cpp/Attachment.cpp b/src/cpp/Attachment.cpp index d9de07f..0f9a092 100644 --- a/src/cpp/Attachment.cpp +++ b/src/cpp/Attachment.cpp @@ -65,8 +65,8 @@ const string Attachment::ALTERNATIVE ("alternative"); const string Attachment::KAZ_ATTACH_NAME ("vos-pieces-jointes-kaz-ici.html"); const string Attachment::MULTIPART ("multipart/"); -const regex Attachment::nameCharsetRegEx ( ".*name\\*=\\s*([; \t]*)"); -const regex Attachment::nameRegEx ( ".*name=\\s*((\"(\\\\.|[^\\\\])*\")|[^; \t]*).*"); +const string Attachment::nameLeftToken (".*name"); +const string Attachment::nameRightToken ("=\\s*((\"(\\\\.|[^\\\\])*\")|[^; \t]*).*"); const regex Attachment::boundaryRegEx (".*boundary=\\s*((\"(\\\\.|[^\\\\])*\")|[^; \t]*).*"); const regex Attachment::cidDefRegEx (".*<([^>]*)>.*"); const regex Attachment::textRegEx (".*text/("+PLAIN+"|"+HTML+").*"); @@ -179,47 +179,31 @@ Attachment::getAttachName () const { static string tokens [] = {contentTypeToken, contentDispositionToken}; DEF_LOG ("Attachment::getAttachName", ""); for (string token : tokens) { - // name= - string result = getProp (token, nameRegEx); - removeQuote (result); - if (result.length ()) { - LOG ("name=: " << result); - encodedWordDecode (result); - return result; - } - // name*x= - for (int id = 0; ; ++id) { - string item = getProp (token, regex (".*name\\*"+to_string (id)+"=\\s*((\"(\\\\.|[^\\\\])*\")|[; \t]*).*")); - if (item.empty ()) - break; - result += item; - } - removeQuote (result); - if (result.length ()) { - LOG ("name*x=: " << result); - encodedWordDecode (result); - return result; - } - // name*= - result = getProp (token, nameCharsetRegEx); - removeQuote (result); - if (result.length ()) { - LOG ("name*=: " << result); - charsetValueDecode (result); - return result; - } - // name*x*= - for (int id = 0; ; ++id) { - string item = getProp (token, regex (".*name\\*"+to_string (id)+"\\*=\\s*([^; ]*)")); - if (item.empty ()) - break; - result += item; - } - removeQuote (result); - if (result.length ()) { - LOG ("name*x*=: " << result); - encodedWordDecode (result); - return result; + for (string star : {"", "\\*"}) { + + // name= | name*= + regex nameAloneRegEx (nameLeftToken+star+nameRightToken); + string result = getProp (token, nameAloneRegEx); + removeQuote (result); + if (result.length ()) { + LOG (("name"+star+=": ") << result); + charsetDecode (result); + return result; + } + + // name*[0-9]= | name*[0-9]*= + for (int id = 0; ; ++id) { + string item = getProp (token, regex (nameLeftToken+"\\*"+to_string (id)+star+nameRightToken)); + if (item.empty ()) + break; + result += item; + } + removeQuote (result); + if (result.length ()) { + LOG (("name*x"+star+"=: ") << result); + charsetDecode (result); + return result; + } } } return getUnknown (getContentType ()); diff --git a/src/cpp/kazMisc.cpp b/src/cpp/kazMisc.cpp index 39baf42..c812dff 100644 --- a/src/cpp/kazMisc.cpp +++ b/src/cpp/kazMisc.cpp @@ -439,16 +439,17 @@ kaz::encodedWordDecode (string &content) { } // ================================================================================ -void +bool kaz::charsetValueDecode (string &content) { // rfc2184 DEF_LOG ("kazMisc::charsetValueDecode", "content: " << content.substr (0, 100) << "..."); string::size_type langPos = content.find ("'"); - - LOG_BUG (langPos == string::npos, return, "kazMisc::charsetValueDecode bug: no '. (content: " << content.substr (0, 100) << "...)"); + if (langPos == string::npos) + return false; string::size_type contentPos = content.find ("'", langPos+1); + if (contentPos == string::npos) + return false; - LOG_BUG (contentPos == string::npos, return, "kazMisc::charsetValueDecode bug: no double '. (content: " << content.substr (0, 100) << "...)"); string tmp (content.substr (contentPos+1)); quotedDecode<'%'> (tmp); LOG ("tmp: " << tmp.substr (0, 100) << "..."); @@ -458,6 +459,17 @@ kaz::charsetValueDecode (string &content) { iso2utf (tmp); content = tmp; LOG ("content: " << content.substr (0, 100) << "..."); + return true; +} + +// ================================================================================ +void +kaz::charsetDecode (string &content) { + // rfc2047 | rfc2184 + DEF_LOG ("kazMisc::charsetDecode", "content: " << content.substr (0, 100) << "..."); + if (charsetValueDecode (content)) + return; + encodedWordDecode (content); } // ================================================================================ diff --git a/src/include/Attachment.hpp b/src/include/Attachment.hpp index 758b009..05a1bc4 100644 --- a/src/include/Attachment.hpp +++ b/src/include/Attachment.hpp @@ -55,8 +55,9 @@ namespace kaz { static vector stringsToUpdate; /*! mime tokens */ static const string contentTypeToken, contentDispositionToken, contentTransferEncodingToken, base64Token, quotedPrintableToken, contentIDToken, PLAIN, HTML, MULTIPART, RELATED, ALTERNATIVE, SIGNED, KAZ_ATTACH_NAME; + static const string nameLeftToken, nameRightToken; /*! pattern to extract mime values */ - static const regex nameRegEx, nameCharsetRegEx, boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx; + static const regex boundaryRegEx, cidDefRegEx, textRegEx, multiRegEx; /*! get uniq filename */ static string getUnknown (const string &ext = ""); diff --git a/src/include/kazMisc.hpp b/src/include/kazMisc.hpp index 112cd17..a7f4423 100644 --- a/src/include/kazMisc.hpp +++ b/src/include/kazMisc.hpp @@ -36,7 +36,7 @@ #define _kaz_misc_hpp #include -#include +#include #include #include @@ -91,7 +91,9 @@ namespace kaz { /*! side effect to get the encoded word according rfc2047 rfc5987 rfc2978 */ void encodedWordDecode (string &content); /*! side effect to get the charsetValue according rfc2184 */ - void charsetValueDecode (string &content); + bool charsetValueDecode (string &content); + /*! side effect to remove quote */ + void charsetDecode (string &content); /*! side effect to remove quote */ void removeQuote (string &content);