first commit

2024-06-03 18:43:35 +02:00
parent 2da01a3f6e
commit f501d519af
883 changed files with 71550 additions and 2 deletions
--- a/dockers/apikaz/source/Sympa/Tools/Data.pm
+++ b/dockers/apikaz/source/Sympa/Tools/Data.pm
@@ -0,0 +1,506 @@
+# -*- indent-tabs-mode: nil; -*-
+# vim:ft=perl:et:sw=4
+# $Id$
+
+# Sympa - SYsteme de Multi-Postage Automatique
+#
+# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
+# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
+# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
+# Copyright 2018 The Sympa Community. See the AUTHORS.md file at the
+# top-level directory of this distribution and at
+# <https://github.com/sympa-community/sympa.git>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Sympa::Tools::Data;
+
+use strict;
+use warnings;
+use Encode qw();
+use English qw(-no_match_vars);
+use POSIX qw();
+use XML::LibXML qw();
+BEGIN { eval 'use Clone qw()'; }
+
+use Sympa::Tools::Text;
+
+## This applies recursively to a data structure
+## The transformation subroutine is passed as a ref
+sub recursive_transformation {
+    my ($var, $subref) = @_;
+
+    return unless (ref($var));
+
+    if (ref($var) eq 'ARRAY') {
+        foreach my $index (0 .. $#{$var}) {
+            if (ref($var->[$index])) {
+                recursive_transformation($var->[$index], $subref);
+            } else {
+                $var->[$index] = &{$subref}($var->[$index]);
+            }
+        }
+    } elsif (ref($var) eq 'HASH') {
+        foreach my $key (keys %{$var}) {
+            if (ref($var->{$key})) {
+                recursive_transformation($var->{$key}, $subref);
+            } else {
+                $var->{$key} = &{$subref}($var->{$key});
+            }
+        }
+    }
+
+    return;
+}
+
+## Dump a variable's content
+sub dump_var {
+    my ($var, $level, $fd) = @_;
+
+    return undef unless ($fd);
+
+    if (ref($var)) {
+        if (ref($var) eq 'ARRAY') {
+            foreach my $index (0 .. $#{$var}) {
+                print $fd "\t" x $level . $index . "\n";
+                dump_var($var->[$index], $level + 1, $fd);
+            }
+        } elsif (ref($var) eq 'HASH'
+            || ref($var) eq 'Sympa::Scenario'
+            || ref($var) eq 'Sympa::List'
+            || ref($var) eq 'CGI::Fast') {
+            foreach my $key (sort keys %{$var}) {
+                print $fd "\t" x $level . '_' . $key . '_' . "\n";
+                dump_var($var->{$key}, $level + 1, $fd);
+            }
+        } else {
+            printf $fd "\t" x $level . "'%s'" . "\n", ref($var);
+        }
+    } else {
+        if (defined $var) {
+            print $fd "\t" x $level . "'$var'" . "\n";
+        } else {
+            print $fd "\t" x $level . "UNDEF\n";
+        }
+    }
+}
+
+## Dump a variable's content
+sub dump_html_var {
+    my ($var) = shift;
+    my $html = '';
+
+    if (ref($var)) {
+
+        if (ref($var) eq 'ARRAY') {
+            $html .= '<ul>';
+            foreach my $index (0 .. $#{$var}) {
+                $html .= '<li> ' . $index . ':';
+                $html .= dump_html_var($var->[$index]);
+                $html .= '</li>';
+            }
+            $html .= '</ul>';
+        } elsif (ref($var) eq 'HASH'
+            || ref($var) eq 'Sympa::Scenario'
+            || ref($var) eq 'Sympa::List') {
+            $html .= '<ul>';
+            foreach my $key (sort keys %{$var}) {
+                $html .= '<li>' . $key . '=';
+                $html .= dump_html_var($var->{$key});
+                $html .= '</li>';
+            }
+            $html .= '</ul>';
+        } else {
+            $html .= 'EEEEEEEEEEEEEEEEEEEEE' . ref($var);
+        }
+    } else {
+        if (defined $var) {
+            $html .= Sympa::Tools::Text::encode_html($var);
+        } else {
+            $html .= 'UNDEF';
+        }
+    }
+    return $html;
+}
+
+# Duplicates a complex variable (faster).
+# CAUTION: This duplicates blessed elements even if they are
+# singleton/multiton; this breaks subroutine references.
+sub clone_var {
+    return Clone::clone($_[0]) if $Clone::VERSION;
+    goto &dup_var;    # '&' needed
+}
+
+## Duplictate a complex variable
+sub dup_var {
+    my ($var) = @_;
+
+    if (ref($var)) {
+        if (ref($var) eq 'ARRAY') {
+            my $new_var = [];
+            foreach my $index (0 .. $#{$var}) {
+                $new_var->[$index] = dup_var($var->[$index]);
+            }
+            return $new_var;
+        } elsif (ref($var) eq 'HASH') {
+            my $new_var = {};
+            foreach my $key (sort keys %{$var}) {
+                $new_var->{$key} = dup_var($var->{$key});
+            }
+            return $new_var;
+        }
+    }
+
+    return $var;
+}
+
+####################################################
+# get_array_from_splitted_string
+####################################################
+# return an array made on a string splited by ','.
+# It removes spaces.
+#
+#
+# IN : -$string (+): string to split
+#
+# OUT : -ref(ARRAY)
+#
+######################################################
+# Note: This is used only by Sympa::List.
+sub get_array_from_splitted_string {
+    my ($string) = @_;
+    my @array;
+
+    foreach my $word (split /,/, $string) {
+        $word =~ s/^\s+//;
+        $word =~ s/\s+$//;
+        push @array, $word;
+    }
+
+    return \@array;
+}
+
+####################################################
+# diff_on_arrays
+####################################################
+# Makes set operation on arrays (seen as set, with no double) :
+#  - deleted : A \ B
+#  - added : B \ A
+#  - intersection : A /\ B
+#  - union : A \/ B
+#
+# IN : -$setA : ref(ARRAY) - set
+#      -$setB : ref(ARRAY) - set
+#
+# OUT : -ref(HASH) with keys :
+#          deleted, added, intersection, union
+#
+#######################################################
+sub diff_on_arrays {
+    my ($setA, $setB) = @_;
+    my $result = {
+        'intersection' => [],
+        'union'        => [],
+        'added'        => [],
+        'deleted'      => []
+    };
+    my %deleted;
+    my %added;
+    my %intersection;
+    my %union;
+
+    my %hashA;
+    my %hashB;
+
+    foreach my $eltA (@$setA) {
+        $hashA{$eltA}   = 1;
+        $deleted{$eltA} = 1;
+        $union{$eltA}   = 1;
+    }
+
+    foreach my $eltB (@$setB) {
+        $hashB{$eltB} = 1;
+        $added{$eltB} = 1;
+
+        if ($hashA{$eltB}) {
+            $intersection{$eltB} = 1;
+            $deleted{$eltB}      = 0;
+        } else {
+            $union{$eltB} = 1;
+        }
+    }
+
+    foreach my $eltA (@$setA) {
+        if ($hashB{$eltA}) {
+            $added{$eltA} = 0;
+        }
+    }
+
+    foreach my $elt (keys %deleted) {
+        next unless $elt;
+        push @{$result->{'deleted'}}, $elt if ($deleted{$elt});
+    }
+    foreach my $elt (keys %added) {
+        next unless $elt;
+        push @{$result->{'added'}}, $elt if ($added{$elt});
+    }
+    foreach my $elt (keys %intersection) {
+        next unless $elt;
+        push @{$result->{'intersection'}}, $elt if ($intersection{$elt});
+    }
+    foreach my $elt (keys %union) {
+        next unless $elt;
+        push @{$result->{'union'}}, $elt if ($union{$elt});
+    }
+
+    return $result;
+
+}
+
+####################################################
+# is_in_array
+####################################################
+# Test if a value is on an array
+#
+# IN : -$setA : ref(ARRAY) - set
+#      -$value : a serached value
+#
+# OUT : boolean
+#######################################################
+sub is_in_array {
+    my $set = shift;
+    die 'missing parameter "$value"' unless @_;
+    my $value = shift;
+
+    if (defined $value) {
+        foreach my $elt (@{$set || []}) {
+            next unless defined $elt;
+            return 1 if $elt eq $value;
+        }
+    } else {
+        foreach my $elt (@{$set || []}) {
+            return 1 unless defined $elt;
+        }
+    }
+
+    return undef;
+}
+
+=over
+
+=item smart_eq ( $a, $b )
+
+I<Function>.
+Check if two strings are identical.
+
+Parameters:
+
+=over
+
+=item $a, $b
+
+Operands.
+
+If both of them are undefined, they are equal.
+If only one of them is undefined, the are not equal.
+If C<$b> is a L<Regexp> object and it matches to C<$a>, they are equal.
+Otherwise, they are compared as strings.
+
+=back
+
+Returns:
+
+If arguments matched, true value.  Otherwise false value.
+
+=back
+
+=cut
+
+sub smart_eq {
+    die 'missing argument' if scalar @_ < 2;
+    my ($a, $b) = @_;
+
+    if (defined $a and defined $b) {
+        if (ref $b eq 'Regexp') {
+            return 1 if $a =~ $b;
+        } else {
+            return 1 if $a eq $b;
+        }
+    } elsif (!defined $a and !defined $b) {
+        return 1;
+    }
+
+    return undef;
+}
+
+## convert a string formated as var1="value1";var2="value2"; into a hash.
+## Used when extracting from session table some session properties or when
+## extracting users preference from user table
+## Current encoding is NOT compatible with encoding of values with '"'
+##
+sub string_2_hash {
+    my $data = shift;
+    my %hash;
+
+    pos($data) = 0;
+    while ($data =~ /\G;?(\w+)\=\"((\\[\"\\]|[^\"])*)\"(?=(;|\z))/g) {
+        my ($var, $val) = ($1, $2);
+        $val =~ s/\\([\"\\])/$1/g;
+        $hash{$var} = $val;
+    }
+
+    return (%hash);
+
+}
+## convert a hash into a string formated as var1="value1";var2="value2"; into
+## a hash
+sub hash_2_string {
+    my $refhash = shift;
+
+    return undef unless ref $refhash eq 'HASH';
+
+    my $data_string;
+    foreach my $var (keys %$refhash) {
+        next unless length $var;
+        my $val = $refhash->{$var};
+        $val = '' unless defined $val;
+
+        $val =~ s/([\"\\])/\\$1/g;
+        $data_string .= ';' . $var . '="' . $val . '"';
+    }
+    return ($data_string);
+}
+
+## compare 2 scalars, string/numeric independant
+sub smart_lessthan {
+    my ($stra, $strb) = @_;
+    $stra =~ s/^\s+//;
+    $stra =~ s/\s+$//;
+    $strb =~ s/^\s+//;
+    $strb =~ s/\s+$//;
+    $ERRNO = 0;
+    my ($numa, $unparsed) = POSIX::strtod($stra);
+    my $numb;
+    $numb = POSIX::strtod($strb)
+        unless ($ERRNO || $unparsed != 0);
+
+    if (($stra eq '') || ($strb eq '') || ($unparsed != 0) || $ERRNO) {
+        return $stra lt $strb;
+    } else {
+        return $stra < $strb;
+    }
+}
+
+=over
+
+=item sort_uniq ( [ \&comp ], @items )
+
+Returns sorted array of unique elements in the list.
+
+Parameters:
+
+=over
+
+=item \&comp
+
+Optional subroutine reference to compare each pairs of elements.
+It should take two arguments and return negative, zero or positive result.
+
+=item @items
+
+Items to be sorted.
+
+=back
+
+This function was added on Sympa 6.2.16.
+
+=back
+
+=cut
+
+sub sort_uniq {
+    my $comp;
+    if (ref $_[0] eq 'CODE') {
+        $comp = shift;
+    }
+
+    my %items;
+    @items{@_} = ();
+
+    if ($comp) {
+        return sort { $comp->($a, $b) } keys %items;
+    } else {
+        return sort keys %items;
+    }
+}
+
+# Create a custom attribute from an XML description
+# IN : A string, XML formed data as stored in database
+# OUT : HASH data storing custome attributes.
+# Old name: Sympa::List::parseCustomAttribute().
+sub decode_custom_attribute {
+    my $xmldoc = shift;
+    return undef unless defined $xmldoc and length $xmldoc;
+
+    my $parser = XML::LibXML->new();
+    my $tree;
+
+    ## We should use eval to parse to prevent the program to crash if it fails
+    if (ref($xmldoc) eq 'GLOB') {
+        $tree = eval { $parser->parse_fh($xmldoc) };
+    } else {
+        $tree = eval { $parser->parse_string($xmldoc) };
+    }
+
+    return undef unless defined $tree;
+
+    my $doc = $tree->getDocumentElement;
+
+    my @custom_attr = $doc->getChildrenByTagName('custom_attribute');
+    my %ca;
+    foreach my $ca (@custom_attr) {
+        my $id    = Encode::encode_utf8($ca->getAttribute('id'));
+        my $value = Encode::encode_utf8($ca->getElementsByTagName('value'));
+        $ca{$id} = {value => $value};
+    }
+    return \%ca;
+}
+
+# Create an XML Custom attribute to be stored into data base.
+# IN : HASH data storing custome attributes
+# OUT : string, XML formed data to be stored in database
+# Old name: Sympa::List::createXMLCustomAttribute().
+sub encode_custom_attribute {
+    my $custom_attr = shift;
+    return
+        '<?xml version="1.0" encoding="UTF-8" ?><custom_attributes></custom_attributes>'
+        if (not defined $custom_attr);
+    my $XMLstr = '<?xml version="1.0" encoding="UTF-8" ?><custom_attributes>';
+    foreach my $k (sort keys %{$custom_attr}) {
+        my $value = $custom_attr->{$k}{value};
+        $value = '' unless defined $value;
+
+        $XMLstr .=
+              "<custom_attribute id=\"$k\"><value>"
+            . Sympa::Tools::Text::encode_html($value, '\000-\037')
+            . "</value></custom_attribute>";
+    }
+    $XMLstr .= "</custom_attributes>";
+    $XMLstr =~ s/\s*\n\s*/ /g;
+
+    return $XMLstr;
+}
+
+1;
--- a/dockers/apikaz/source/Sympa/Tools/Text.pm
+++ b/dockers/apikaz/source/Sympa/Tools/Text.pm
@@ -0,0 +1,942 @@
+# -*- indent-tabs-mode: nil; -*-
+# vim:ft=perl:et:sw=4
+# $Id$
+
+# Sympa - SYsteme de Multi-Postage Automatique
+#
+# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
+# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
+# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
+# Copyright 2018, 2020 The Sympa Community. See the AUTHORS.md
+# file at the top-level directory of this distribution and at
+# <https://github.com/sympa-community/sympa.git>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Sympa::Tools::Text;
+
+use strict;
+use warnings;
+use Encode qw();
+use English qw(-no_match_vars);
+use Encode::MIME::Header;    # 'MIME-Q' encoding.
+use HTML::Entities qw();
+use MIME::EncWords;
+use Text::LineFold;
+use Unicode::GCString;
+use URI::Escape qw();
+use if ($] < 5.016), qw(Unicode::CaseFold fc);
+use if (5.016 <= $]), qw(feature fc);
+BEGIN { eval 'use Unicode::Normalize qw()'; }
+BEGIN { eval 'use Unicode::UTF8 qw()'; }
+
+use Sympa::Language;
+use Sympa::Regexps;
+
+# Old name: tools::addrencode().
+sub addrencode {
+    my $addr    = shift;
+    my $phrase  = (shift || '');
+    my $charset = (shift || 'utf8');
+    my $comment = (shift || '');
+
+    return undef unless $addr =~ /\S/;
+
+    if ($phrase =~ /[^\s\x21-\x7E]/) {
+        $phrase = MIME::EncWords::encode_mimewords(
+            Encode::decode('utf8', $phrase),
+            'Encoding'    => 'A',
+            'Charset'     => $charset,
+            'Replacement' => 'FALLBACK',
+            'Field'       => 'Resent-Sender', # almost longest
+            'Minimal'     => 'DISPNAME',      # needs MIME::EncWords >= 1.012.
+        );
+    } elsif ($phrase =~ /\S/) {
+        $phrase =~ s/([\\\"])/\\$1/g;
+        $phrase = '"' . $phrase . '"';
+    }
+    if ($comment =~ /[^\s\x21-\x27\x2A-\x5B\x5D-\x7E]/) {
+        $comment = MIME::EncWords::encode_mimewords(
+            Encode::decode('utf8', $comment),
+            'Encoding'    => 'A',
+            'Charset'     => $charset,
+            'Replacement' => 'FALLBACK',
+            'Minimal'     => 'DISPNAME',
+        );
+    } elsif ($comment =~ /\S/) {
+        $comment =~ s/([\\\"])/\\$1/g;
+    }
+
+    return
+          ($phrase =~ /\S/  ? "$phrase "    : '')
+        . ($comment =~ /\S/ ? "($comment) " : '')
+        . "<$addr>";
+}
+
+# Old names: tools::clean_email(), tools::get_canonical_email().
+sub canonic_email {
+    my $email = shift;
+
+    return undef unless defined $email;
+
+    # Remove leading and trailing white spaces.
+    $email =~ s/\A\s+//;
+    $email =~ s/\s+\z//;
+
+    # Lower-case.
+    $email =~ tr/A-Z/a-z/;
+
+    return (length $email) ? $email : undef;
+}
+
+# Old name: tools::clean_msg_id().
+sub canonic_message_id {
+    my $msg_id = shift;
+
+    return $msg_id unless defined $msg_id;
+
+    chomp $msg_id;
+
+    if ($msg_id =~ /\<(.+)\>/) {
+        $msg_id = $1;
+    }
+
+    return $msg_id;
+}
+
+sub canonic_text {
+    my $text = shift;
+
+    return undef unless defined $text;
+
+    # Normalize text. See also discussion on
+    # https://listes.renater.fr/sympa/arc/sympa-developpers/2018-03/thrd1.html
+    #
+    # N.B.: Corresponding modules are optional by now, and should be
+    # mandatory in the future.
+    my $utext;
+    if (Encode::is_utf8($text)) {
+        $utext = $text;
+    } elsif ($Unicode::UTF8::VERSION) {
+        no warnings 'utf8';
+        $utext = Unicode::UTF8::decode_utf8($text);
+    } else {
+        $utext = Encode::decode_utf8($text);
+    }
+    if ($Unicode::Normalize::VERSION) {
+        $utext = Unicode::Normalize::normalize('NFC', $utext);
+    }
+
+    # Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
+    # and EIMS:
+    $utext =~ s/\r\n|\r/\n/g;
+
+    if (Encode::is_utf8($text)) {
+        return $utext;
+    } else {
+        return Encode::encode_utf8($utext);
+    }
+}
+
+sub slurp {
+    my $path = shift;
+
+    my $ifh;
+    return undef unless open $ifh, '<', $path;
+    my $text = do { local $RS; <$ifh> };
+    close $ifh;
+
+    return canonic_text($text);
+}
+
+sub wrap_text {
+    my $text = shift;
+    my $init = shift;
+    my $subs = shift;
+    my $cols = shift;
+
+    $init //= '';
+    $subs //= '';
+    $cols //= 78;
+    return $text unless $cols;
+
+    my $email_re = Sympa::Regexps::email();
+    my $linefold = Text::LineFold->new(
+        Language   => Sympa::Language->instance->get_lang,
+        Prep       => 'NONBREAKURI',
+        prep       => [$email_re, sub { shift; @_ }],
+        ColumnsMax => $cols,
+        Format     => sub {
+            shift;
+            my $event = shift;
+            my $str   = shift;
+            if ($event =~ /^eo/)     { return "\n"; }
+            if ($event =~ /^so[tp]/) { return $init . $str; }
+            if ($event eq 'sol')     { return $subs . $str; }
+            undef;
+        },
+    );
+
+    my $t = Encode::is_utf8($text) ? $text : Encode::decode_utf8($text);
+
+    my $ret = '';
+    while (1000 < length $t) {
+        my $s = substr $t, 0, 1000;
+        $ret .= $linefold->break_partial($s);
+        $t = substr $t, 1000;
+    }
+    $ret .= $linefold->break_partial($t) if length $t;
+    $ret .= $linefold->break_partial(undef);
+
+    return Encode::is_utf8($text) ? $ret : Encode::encode_utf8($ret);
+}
+
+sub decode_filesystem_safe {
+    my $str = shift;
+    return '' unless defined $str and length $str;
+
+    $str = Encode::encode_utf8($str) if Encode::is_utf8($str);
+    # On case-insensitive filesystem "_XX" along with "_xx" should be decoded.
+    $str =~ s/_([0-9A-Fa-f]{2})/chr hex "0x$1"/eg;
+    return $str;
+}
+
+sub decode_html {
+    my $str = shift;
+
+    Encode::encode_utf8(
+        HTML::Entities::decode_entities(Encode::decode_utf8($str)));
+}
+
+sub encode_filesystem_safe {
+    my $str = shift;
+    return '' unless defined $str and length $str;
+
+    $str = Encode::encode_utf8($str) if Encode::is_utf8($str);
+    $str =~ s/([^-+.0-9\@A-Za-z])/sprintf '_%02x', ord $1/eg;
+    return $str;
+}
+
+sub encode_html {
+    my $str = shift;
+    my $additional_unsafe = shift || '';
+
+    HTML::Entities::encode_entities($str, '<>&"' . $additional_unsafe);
+}
+
+sub encode_uri {
+    my $str     = shift;
+    my %options = @_;
+
+    # Note: URI-1.35 (URI::Escape 3.28) or later is required.
+    return Encode::encode_utf8(
+        URI::Escape::uri_escape_utf8(
+            Encode::decode_utf8($str),
+            '^-A-Za-z0-9._~' . (exists $options{omit} ? $options{omit} : '')
+        )
+    );
+}
+
+# Old name: tools::escape_chars().
+sub escape_chars {
+    my $s          = shift;
+    my $except     = shift;                            ## Exceptions
+    my $ord_except = ord $except if defined $except;
+
+    ## Escape chars
+    ##  !"#$%&'()+,:;<=>?[] AND accented chars
+    ## escape % first
+    foreach my $i (
+        0x25,
+        0x20 .. 0x24,
+        0x26 .. 0x2c,
+        0x3a .. 0x3f,
+        0x5b, 0x5d,
+        0x80 .. 0x9f,
+        0xa0 .. 0xff
+    ) {
+        next if defined $ord_except and $i == $ord_except;
+        my $hex_i = sprintf "%lx", $i;
+        $s =~ s/\x$hex_i/%$hex_i/g;
+    }
+    ## Special traetment for '/'
+    $s =~ s/\//%a5/g unless defined $except and $except eq '/';
+
+    return $s;
+}
+
+# Old name: tt2::escape_url().
+# DEPRECATED.  Use Sympa::Tools::Text::escape_uri() or
+# Sympa::Tools::Text::mailtourl().
+#sub escape_url;
+
+sub foldcase {
+    my $str = shift;
+
+    return '' unless defined $str and length $str;
+    # Perl 5.16.0 and later have built-in fc(). Earlier uses Unicode::CaseFold.
+    return Encode::encode_utf8(fc(Encode::decode_utf8($str)));
+}
+
+my %legacy_charsets = (
+    'ar'    => [qw(iso-8859-6)],
+    'bs'    => [qw(iso-8859-2)],
+    'cs'    => [qw(iso-8859-2)],
+    'eo'    => [qw(iso-8859-3)],
+    'et'    => [qw(iso-8859-4)],
+    'he'    => [qw(iso-8859-8)],
+    'hr'    => [qw(iso-8859-2)],
+    'hu'    => [qw(iso-8859-2)],
+    'ja'    => [qw(euc-jp cp932 MacJapanese)],
+    'kl'    => [qw(iso-8859-4)],
+    'ko'    => [qw(cp949)],
+    'lt'    => [qw(iso-8859-4)],
+    'lv'    => [qw(iso-8859-4)],
+    'mt'    => [qw(iso-8859-3)],
+    'pl'    => [qw(iso-8859-2)],
+    'ro'    => [qw(iso-8859-2)],
+    'ru'    => [qw(koi8-r cp1251)],               # cp866? MacCyrillic?
+    'sk'    => [qw(iso-8859-2)],
+    'sl'    => [qw(iso-8859-2)],
+    'th'    => [qw(iso-8859-11 cp874 MacThai)],
+    'tr'    => [qw(iso-8859-9)],
+    'uk'    => [qw(koi8-u)],                      # MacUkrainian?
+    'zh-CN' => [qw(euc-cn)],
+    'zh-TW' => [qw(big5-eten)],
+);
+
+sub guessed_to_utf8 {
+    my $text  = shift;
+    my @langs = @_;
+
+    return Encode::encode_utf8($text) if Encode::is_utf8($text);
+    return $text
+        unless defined $text
+        and length $text
+        and $text =~ /[^\x00-\x7F]/;
+
+    my $utf8;
+    if ($Unicode::UTF8::VERSION) {
+        $utf8 =
+            eval { Unicode::UTF8::decode_utf8($text, Encode::FB_CROAK()) };
+    }
+    unless (defined $utf8) {
+        foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
+            $utf8 =
+                eval { Encode::decode($charset, $text, Encode::FB_CROAK()) };
+            last if defined $utf8;
+        }
+    }
+    unless (defined $utf8) {
+        $utf8 = Encode::decode('iso-8859-1', $text);
+    }
+
+    # Apply NFC: e.g. for modified-NFD by Mac OS X.
+    $utf8 = Unicode::Normalize::normalize('NFC', $utf8)
+        if $Unicode::Normalize::VERSION;
+
+    return Encode::encode_utf8($utf8);
+}
+
+sub mailtourl {
+    my $text    = shift;
+    my %options = @_;
+
+    my $dtext =
+          (not defined $text)   ? ''
+        : $options{decode_html} ? Sympa::Tools::Text::decode_html($text)
+        :                         $text;
+    $dtext =~ s/\A\s+//;
+    $dtext =~ s/\s+\z//;
+    $dtext =~ s/(?:\r\n|\r|\n)(?=[ \t])//g;
+    $dtext =~ s/\r\n|\r|\n/ /g;
+
+    # The ``@'' in email address should not be encoded because some MUAs
+    # aren't able to decode ``%40'' in e-mail address of mailto: URL.
+    # Contrary, ``@'' in query component should be encoded because some
+    # MUAs take it for a delimiter to separate URL from the rest.
+    my ($format, $utext, $qsep);
+    if ($dtext =~ /[()<>\[\]:;,\"\s]/) {
+        # Use "to" header if source text includes any of RFC 5322
+        # "specials", minus ``@'' and ``\'', plus whitespaces.
+        $format = 'mailto:?to=%s%s';
+        $utext  = Sympa::Tools::Text::encode_uri($dtext);
+        $qsep   = '&';
+    } else {
+        $format = 'mailto:%s%s';
+        $utext  = Sympa::Tools::Text::encode_uri($dtext, omit => '@');
+        $qsep   = '?';
+    }
+    my $qstring = _url_query_string(
+        $options{query},
+        decode_html => $options{decode_html},
+        leadchar    => $qsep,
+        sepchar     => '&',
+        trim_values => 1,
+    );
+
+    return sprintf $format, $utext, $qstring;
+}
+
+sub _url_query_string {
+    my $query   = shift;
+    my %options = @_;
+
+    unless (ref $query eq 'HASH' and %$query) {
+        return '';
+    } else {
+        my $decode_html = $options{decode_html};
+        my $trim_values = $options{trim_values};
+        return ($options{leadchar} || '?') . join(
+            ($options{sepchar} || ';'),
+            map {
+                my ($dkey, $dval) = map {
+                          (not defined $_) ? ''
+                        : $decode_html ? Sympa::Tools::Text::decode_html($_)
+                        :                $_;
+                } ($_, $query->{$_});
+                if ($trim_values and lc $dkey ne 'body') {
+                    $dval =~ s/\A\s+//;
+                    $dval =~ s/\s+\z//;
+                    $dval =~ s/(?:\r\n|\r|\n)(?=[ \t])//g;
+                    $dval =~ s/\r\n|\r|\n/ /g;
+                }
+
+                sprintf '%s=%s',
+                    Sympa::Tools::Text::encode_uri($dkey),
+                    Sympa::Tools::Text::encode_uri($dval);
+            } sort keys %$query
+        );
+    }
+}
+
+sub pad {
+    my $str   = shift;
+    my $width = shift;
+
+    return $str unless $width and defined $str;
+
+    my $ustr = Encode::is_utf8($str) ? $str : Encode::decode_utf8($str);
+    my $cols = Unicode::GCString->new($ustr)->columns;
+
+    unless ($cols < abs $width) {
+        return $str;
+    } elsif ($width < 0) {
+        return $str . (' ' x (-$width - $cols));
+    } else {
+        return (' ' x ($width - $cols)) . $str;
+    }
+}
+
+# Old name: tools::qdecode_filename().
+sub qdecode_filename {
+    my $filename = shift;
+
+    ## We don't use MIME::Words here because it does not encode properly
+    ## Unicode
+    ## Check if string is already Q-encoded first
+    #if ($filename =~ /\=\?UTF-8\?/) {
+    $filename = Encode::encode_utf8(Encode::decode('MIME-Q', $filename));
+    #}
+
+    return $filename;
+}
+
+# Old name: tools::qencode_filename().
+sub qencode_filename {
+    my $filename = shift;
+
+    ## We don't use MIME::Words here because it does not encode properly
+    ## Unicode
+    ## Check if string is already Q-encoded first
+    ## Also check if the string contains 8bit chars
+    unless ($filename =~ /\=\?UTF-8\?/
+        || $filename =~ /^[\x00-\x7f]*$/) {
+
+        ## Don't encode elements such as .desc. or .url or .moderate
+        ## or .extension
+        my $part = $filename;
+        my ($leading, $trailing);
+        $leading  = $1 if ($part =~ s/^(\.desc\.)//);    ## leading .desc
+        $trailing = $1 if ($part =~ s/((\.\w+)+)$//);    ## trailing .xx
+
+        my $encoded_part = MIME::EncWords::encode_mimewords(
+            $part,
+            Charset    => 'utf8',
+            Encoding   => 'q',
+            MaxLineLen => 1000,
+            Minimal    => 'NO'
+        );
+
+        $filename = $leading . $encoded_part . $trailing;
+    }
+
+    return $filename;
+}
+
+# Old name: tools::unescape_chars().
+sub unescape_chars {
+    my $s = shift;
+
+    $s =~ s/%a5/\//g;    ## Special traetment for '/'
+    foreach my $i (0x20 .. 0x2c, 0x3a .. 0x3f, 0x5b, 0x5d, 0x80 .. 0x9f,
+        0xa0 .. 0xff) {
+        my $hex_i = sprintf "%lx", $i;
+        my $hex_s = sprintf "%c",  $i;
+        $s =~ s/%$hex_i/$hex_s/g;
+    }
+
+    return $s;
+}
+
+# Old name: tools::valid_email().
+sub valid_email {
+    my $email = shift;
+
+    my $email_re = Sympa::Regexps::email();
+    return undef unless $email =~ /^${email_re}$/;
+
+    # Forbidden characters.
+    return undef if $email =~ /[\|\$\*\?\!]/;
+
+    return 1;
+}
+
+sub weburl {
+    my $base    = shift;
+    my $paths   = shift;
+    my %options = @_;
+
+    my @paths = map {
+        Sympa::Tools::Text::encode_uri(
+              (not defined $_)      ? ''
+            : $options{decode_html} ? Sympa::Tools::Text::decode_html($_)
+            :                         $_
+        );
+    } @{$paths || []};
+
+    my $qstring = _url_query_string(
+        $options{query},
+        decode_html => $options{decode_html},
+        sepchar     => '&',
+    );
+
+    my $fstring;
+    my $fragment = $options{fragment};
+    if (defined $fragment) {
+        $fstring = '#'
+            . Sympa::Tools::Text::encode_uri(
+            $options{decode_html}
+            ? Sympa::Tools::Text::decode_html($fragment)
+            : $fragment
+            );
+    } else {
+        $fstring = '';
+    }
+
+    return sprintf '%s%s%s', join('/', grep { defined $_ } ($base, @paths)),
+        $qstring, $fstring;
+}
+
+1;
+__END__
+
+=encoding utf-8
+
+=head1 NAME
+
+Sympa::Tools::Text - Text-related functions
+
+=head1 DESCRIPTION
+
+This package provides some text-related functions.
+
+=head2 Functions
+
+=over
+
+=item addrencode ( $addr, [ $phrase, [ $charset, [ $comment ] ] ] )
+
+Returns formatted (and encoded) name-addr as RFC5322 3.4.
+
+=item canonic_email ( $email )
+
+I<Function>.
+Returns canonical form of e-mail address.
+
+Leading and trailing white spaces are removed.
+Latin letters without accents are lower-cased.
+
+For malformed inputs returns C<undef>.
+
+=item canonic_message_id ( $message_id )
+
+Returns canonical form of message ID without trailing or leading whitespaces
+or C<E<lt>>, C<E<gt>>.
+
+=item canonic_text ( $text )
+
+Canonicalizes text.
+C<$text> should be a binary string encoded by UTF-8 character set or
+a Unicode string.
+Forbidden sequences in binary string will be replaced by
+U+FFFD REPLACEMENT CHARACTERs, and Normalization Form C (NFC) will be applied.
+
+=item decode_filesystem_safe ( $str )
+
+I<Function>.
+Decodes a string encoded by encode_filesystem_safe().
+
+Parameter:
+
+=over
+
+=item $str
+
+String to be decoded.
+
+=back
+
+Returns:
+
+Decoded string, stripped C<utf8> flag if any.
+
+=item decode_html ( $str )
+
+I<Function>.
+Decodes HTML entities in a string encoded by UTF-8 or a Unicode string.
+
+Parameter:
+
+=over
+
+=item $str
+
+String to be decoded.
+
+=back
+
+Returns:
+
+Decoded string, stripped C<utf8> flag if any.
+
+=item encode_filesystem_safe ( $str )
+
+I<Function>.
+Encodes a string $str to be suitable for filesystem.
+
+Parameter:
+
+=over
+
+=item $str
+
+String to be encoded.
+
+=back
+
+Returns:
+
+Encoded string, stripped C<utf8> flag if any.
+All bytes except C<'-'>, C<'+'>, C<'.'>, C<'@'>
+and alphanumeric characters are encoded to sequences C<'_'> followed by
+two hexdigits.
+
+Note that C<'/'> will also be encoded.
+
+=item encode_html ( $str, [ $additional_unsafe ] )
+
+I<Function>.
+Encodes characters in a string $str to HTML entities.
+By default
+C<'E<lt>'>, C<'E<gt>'>, C<'E<amp>'> and C<'E<quot>'> are encoded.
+
+Parameter:
+
+=over
+
+=item $str
+
+String to be encoded.
+
+=item $additional_unsafe
+
+Character or range of characters additionally encoded as entity references.
+
+This optional parameter was introduced on Sympa 6.2.37b.3.
+
+=back
+
+Returns:
+
+Encoded string, I<not> stripping utf8 flag if any.
+
+=item encode_uri ( $str, [ omit => $chars ] )
+
+I<Function>.
+Encodes potentially unsafe characters in the string using "percent" encoding
+suitable for URIs.
+
+Parameters:
+
+=over
+
+=item $str
+
+String to be encoded.
+
+=item omit =E<gt> $chars
+
+By default, all characters except those defined as "unreserved" in RFC 3986
+are encoded, that is, C<[^-A-Za-z0-9._~]>.
+If this parameter is given, it will prevent encoding additional characters.
+
+=back
+
+Returns:
+
+Encoded string, stripped C<utf8> flag if any.
+
+=item escape_chars ( $str )
+
+Escape weird characters.
+
+ToDo: This should be obsoleted in the future release: Would be better to use
+L</encode_filesystem_safe>.
+
+=item escape_url ( $str )
+
+DEPRECATED.
+Would be better to use L</"encode_uri"> or L</"mailtourl">.
+
+=item foldcase ( $str )
+
+I<Function>.
+Returns "fold-case" string suitable for case-insensitive match.
+For example, a code below looks for a needle in haystack not regarding case,
+even if they are non-ASCII UTF-8 strings.
+
+  $haystack = Sympa::Tools::Text::foldcase($HayStack);
+  $needle   = Sympa::Tools::Text::foldcase($NeedLe);
+  if (index $haystack, $needle >= 0) {
+      ...
+  }
+
+Parameter:
+
+=over
+
+=item $str
+
+A string.
+
+=back
+
+=item guessed_to_utf8( $text, [ lang, ... ] )
+
+I<Function>.
+Guesses text charset considering language context
+and returns the text reencoded by UTF-8.
+
+Parameters:
+
+=over
+
+=item $text
+
+Text to be reencoded.
+
+=item lang, ...
+
+Language tag(s) which may be given by L<Sympa::Language/"implicated_langs">.
+
+=back
+
+Returns:
+
+Reencoded text.
+If any charsets could not be guessed, C<iso-8859-1> will be used
+as the last resort, just because it covers full range of 8-bit.
+
+=item mailtourl ( $email, [ decode_html =E<gt> 1 ],
+[ query =E<gt> {key =E<gt> val, ...} ] )
+
+I<Function>.
+Constructs a C<mailto:> URL for given e-mail.
+
+Parameters:
+
+=over
+
+=item $email
+
+E-mail address.
+
+=item decode_html =E<gt> 1
+
+If set, arguments are assumed to include HTML entities.
+
+=item query =E<gt> {key =E<gt> val, ...}
+
+Optional query.
+
+=back
+
+Returns:
+
+Constructed URL.
+
+=item pad ( $str, $width )
+
+Pads space a string so that result will not be narrower than given width.
+
+Parameters:
+
+=over
+
+=item $str
+
+A string.
+
+=item $width
+
+If $width is false value or width of $str is not less than $width,
+does nothing.
+If $width is less than C<0>, pads right.
+Otherwise, pads left.
+
+=back
+
+Returns:
+
+Padded string.
+
+=item qdecode_filename ( $filename )
+
+Q-Decodes web file name.
+
+ToDo:
+This should be obsoleted in the future release: Would be better to use
+L</decode_filesystem_safe>.
+
+=item qencode_filename ( $filename )
+
+Q-Encodes web file name.
+
+ToDo:
+This should be obsoleted in the future release: Would be better to use
+L</encode_filesystem_safe>.
+
+=item slurp ( $file )
+
+Get entire content of the file.
+Normalization by canonic_text() is applied.
+C<$file> is the path to text file.
+
+=item unescape_chars ( $str )
+
+Unescape weird characters.
+
+ToDo: This should be obsoleted in the future release: Would be better to use
+L</decode_filesystem_safe>.
+
+=item valid_email ( $string )
+
+Basic check of an email address.
+
+=item weburl ( $base, \@paths, [ decode_html =E<gt> 1 ],
+[ fragment =E<gt> $fragment ], [ query =E<gt> \%query ] )
+
+Constructs a C<http:> or C<https:> URL under given base URI.
+
+Parameters:
+
+=over
+
+=item $base
+
+Base URI.
+
+=item \@paths
+
+Additional path components.
+
+=item decode_html =E<gt> 1
+
+If set, arguments are assumed to include HTML entities.
+Exception is $base:
+It is assumed not to include entities.
+
+=item fragment =E<gt> $fragment
+
+Optional fragment.
+
+=item query =E<gt> \%query
+
+Optional query.
+
+=back
+
+Returns:
+
+A URI.
+
+=item wrap_text ( $text, [ $init_tab, [ $subsequent_tab, [ $cols ] ] ] )
+
+I<Function>.
+Returns line-wrapped text.
+
+Parameters:
+
+=over
+
+=item $text
+
+The text to be folded.
+
+=item $init_tab
+
+Indentation prepended to the first line of paragraph.
+Default is C<''>, no indentation.
+
+=item $subsequent_tab
+
+Indentation prepended to each subsequent line of folded paragraph.
+Default is C<''>, no indentation.
+
+=item $cols
+
+Max number of columns of folded text.
+Default is C<78>.
+
+=back
+
+=back
+
+=head1 HISTORY
+
+L<Sympa::Tools::Text> appeared on Sympa 6.2a.41.
+
+decode_filesystem_safe() and encode_filesystem_safe() were added
+on Sympa 6.2.10.
+
+decode_html(), encode_html(), encode_uri() and mailtourl()
+were added on Sympa 6.2.14, and escape_url() was deprecated.
+
+guessed_to_utf8() and pad() were added on Sympa 6.2.17.
+
+canonic_text() and slurp() were added on Sympa 6.2.53b.
+
+=cut