# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id$

# Sympa - SYsteme de Multi-Postage Automatique
#
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
# Copyright 2017, 2018 The Sympa Community. See the AUTHORS.md file at the
# top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

package Sympa::Language;

use strict;
use warnings;
use base qw(Class::Singleton);

use Encode qw();
use Locale::Messages;
use POSIX qw();

use Sympa::Constants;

BEGIN {
    ## Using the Pure Perl implementation of gettext
    ## This is required on Solaris : native implementation of gettext does not
    ## map ll_RR with ll.
    # libintl-perl 1.22 (virtually 1.23) or later is recommended to use
    # 'gettext_dumb' package which is independent from POSIX locale.  If older
    # version is used.  falls back to 'gettext_pp'.
    my $package = Locale::Messages->select_package('gettext_dumb');
    Locale::Messages->select_package('gettext_pp')
        unless $package and $package eq 'gettext_dumb';
    ## Workaround: Prevent from searching catalogs in /usr/share/locale.
    undef $Locale::gettext_pp::__gettext_pp_default_dir;

    ## Define what catalogs are used
    Locale::Messages::bindtextdomain(sympa    => Sympa::Constants::LOCALEDIR);
    Locale::Messages::bindtextdomain(web_help => Sympa::Constants::LOCALEDIR);
    Locale::Messages::textdomain('sympa');
    ## Get translations by internal encoding.
    Locale::Messages::bind_textdomain_codeset(sympa    => 'utf-8');
    Locale::Messages::bind_textdomain_codeset(web_help => 'utf-8');
}

# Constructor for Class::Singleton.
sub _new_instance {
    my $class = shift;
    my $self  = $class->SUPER::_new_instance();

    ## Initialize lang/locale.
    $self->set_lang('en');
    return $self;
}

## The map to get language tag from older non-POSIX locale naming.
my %language_equiv = (
    'cn' => 'zh-CN',
    'tw' => 'zh-TW',
    'cz' => 'cs',
    'us' => 'en-US',
);

## The map to get appropriate POSIX locale name from language code.
## Why this is required is that on many systems locales often have canonic
## "ll_RR.ENCODING" names only.  n.b. This format can not express all
## languages in proper way, e.g. Common Arabic ("ar"), Esperanto ("eo").
##
## This map is also used to convert old-style Sympa "locales" to language
## tags ('en' is special case. cf. set_lang()).
my %lang2oldlocale = (
    'af' => 'af_ZA',
    'ar' => 'ar_SY',
    'br' => 'br_FR',
    'bg' => 'bg_BG',
    'ca' => 'ca_ES',
    'cs' => 'cs_CZ',
    'de' => 'de_DE',
    'el' => 'el_GR',
    'es' => 'es_ES',
    'et' => 'et_EE',
    'eu' => 'eu_ES',
    'fi' => 'fi_FI',
    'fr' => 'fr_FR',
    'gl' => 'gl_ES',
    'hu' => 'hu_HU',
    'id' => 'id_ID',
    'it' => 'it_IT',
    'ja' => 'ja_JP',
    'ko' => 'ko_KR',
    'la' => 'la_VA',    # from OpenOffice.org
    'ml' => 'ml_IN',
    'nb' => 'nb_NO',
    'nn' => 'nn_NO',
    'nl' => 'nl_NL',
    'oc' => 'oc_FR',
    'pl' => 'pl_PL',
    'pt' => 'pt_PT',
    'rm' => 'rm_CH',    # CLDR
    'ro' => 'ro_RO',
    'ru' => 'ru_RU',
    'sv' => 'sv_SE',
    'tr' => 'tr_TR',
    'vi' => 'vi_VN',
);

## Regexp for old style canonical locale used by Sympa-6.2a or earlier.
my $oldlocale_re = qr/^([a-z]{2})_([A-Z]{2})(?![A-Z])/i;

## Regexp for IETF language tag described in RFC 5646 (BCP 47), modified.
my $language_tag_re = qr/^
    ([a-z]{2}(?:-[a-z]{3}){1,3} | [a-z]{2,3})       # language (and ext.)
    (?:-([a-z]{4}))?                                # script
    (?:-([a-z]{2}))?                                # region (no UN M.49)
    (?:-(                                           # variant
	(?:[a-z0-9]{5,} | [0-9][a-z0-9]{3,})
	(?:-[a-z0-9]{5,} | -[0-9][a-z0-9]{3,})*
    ))?
$/ix;

## A tiny subset of script codes and gettext modifier names.
## Keys are ISO 15924 script codes (Titlecased, four characters).
## Values are property value aliases standardised by Unicode Consortium
## (lowercased).  cf. <http://www.unicode.org/iso15924/iso15924-codes.html>.
my %script2modifier = (
    'Arab' => 'arabic',
    'Cyrl' => 'cyrillic',
    'Deva' => 'devanagari',
    'Dsrt' => 'deseret',
    'Glag' => 'glagolitic',
    'Grek' => 'greek',
    'Guru' => 'gurmukhi',
    'Hebr' => 'hebrew',
    'Latn' => 'latin',
    'Mong' => 'mongolian',
    'Shaw' => 'shaw',         # found in Debian "en@shaw" locale.
    'Tfng' => 'tifinagh',
);

sub canonic_lang {
    my $lang = shift;
    return unless $lang;

    ## Compatibility: older non-POSIX locale names.
    if ($language_equiv{$lang}) {
        $lang = $language_equiv{$lang};
    }
    ## Compatibility: names used as "lang" or "locale" by Sympa <= 6.2a.
    elsif ($lang =~ $oldlocale_re) {
        $lang = _oldlocale2lang(lc($1) . '_' . uc($2));
    }

    my @subtags;

    # unknown format.
    return unless @subtags = ($lang =~ $language_tag_re);

    ## Canonicalize cases of subtags: ll-ext-Scri-RR-variant-...
    $subtags[0] = lc $subtags[0];
    $subtags[1] =~ s/^(\w)(\w+)/uc($1) . lc($2)/e if $subtags[1];
    $subtags[2] = uc $subtags[2] if $subtags[2];
    $subtags[3] = lc $subtags[3] if $subtags[3];

    ##XXX Maybe more canonicalizations here.

    ## Check subtags,
    # won't support language extension subtags.
    return unless $subtags[0] =~ /^[a-z]{2,3}$/;

    # won't allow multiple variant subtags.
    $subtags[3] =~ s/-.+// if $subtags[3];

    ##XXX Maybe more checks here.

    return @subtags if wantarray;
    return join '-', grep {$_} @subtags;
}

sub implicated_langs {
    my @langs = @_;
    die 'missing langs parameter' unless @langs;

    my @implicated_langs = ();

    foreach my $lang (@langs) {
        my @subtags = canonic_lang($lang);
        while (@subtags) {
            my $l = join '-', grep {$_} @subtags;
            @implicated_langs = ((grep { $_ ne $l } @implicated_langs), $l);

            ## Workaround:
            ## - "zh-Hans-CN", "zh-Hant-TW", ... may occasionally be
            ##   identified with "zh-CN", "zh-TW" etc.  Add them to
            ##   implication list.
            if ($l =~ /^zh-(Hans|Hant)-[A-Z]{2}\b/) {
                $l = join '-', grep {$_} @subtags[0, 2 .. $#subtags];
                @implicated_langs =
                    ((grep { $_ ne $l } @implicated_langs), $l);
            }

            1 until pop @subtags;
        }
    }

    return @implicated_langs;
}

## Parses content of HTTP 1.1 Accept-Charset, Accept-Encoding or
## Accept-Language request header field.
## Returns an array of arrayrefs [ITEM, WEIGHT].
##
## NOTE: This might be moved to utility package such as tools.pm.
sub parse_http_accept_string {
    my $accept_string = shift || '';

    $accept_string =~ s/^\s+//;
    $accept_string =~ s/\s+$//;
    $accept_string ||= '*';
    my @pairs = split /\s*,\s*/, $accept_string;

    my @ret = ();
    foreach my $pair (@pairs) {
        my ($item, $weight) = split /\s*;\s*/, $pair, 2;
        if (    defined $weight
            and $weight =~ s/^q\s*=\s*//i
            and $weight =~ /^(\d+(\.\d*)?|\.\d+)$/) {
            $weight += 0.0;
        } else {
            $weight = 1.0;
        }
        push @ret, [$item => $weight];
    }
    return @ret;
}

sub negotiate_lang {
    my $accept_string = shift || '*';
    my @supported_languages = grep {$_} map { split /[\s,]+/, $_ } @_;

    ## parse Accept-Language: header field.
    ## unknown languages are ignored.
    my @accept_languages =
        grep { $_->[0] eq '*' or $_->[0] = canonic_lang($_->[0]) }
        parse_http_accept_string($accept_string);
    return unless @accept_languages;

    ## try to find the best language.
    my $best_lang   = undef;
    my $best_weight = 0.0;
    foreach my $supported_lang (@supported_languages) {
        my @supported_pfxs = implicated_langs($supported_lang);
        foreach my $pair (@accept_languages) {
            my ($accept_lang, $weight) = @$pair;
            if ($accept_lang eq '*'
                or grep { $accept_lang eq $_ } @supported_pfxs) {
                unless ($best_lang and $weight <= $best_weight) {
                    $best_lang   = $supported_pfxs[0];    # canonic form
                    $best_weight = $weight;
                }
            }
        }
    }

    return $best_lang;
}

##sub GetSupportedLanguages {
##DEPRECATED: use Sympa::get_supported_languages().
## Supported languages are defined by 'supported_lang' sympa.conf parameter.

## Old name: PushLang()
sub push_lang {
    my $self  = shift;
    my @langs = @_;

    push @{$self->{previous_lang}}, $self->get_lang;
    $self->set_lang(@langs);

    return 1;
}

## Old name: PopLang()
sub pop_lang {
    my $self = shift;

    die 'calling pop_lang() without push_lang()'
        unless @{$self->{previous_lang}};
    my $lang = pop @{$self->{previous_lang}};
    $self->set_lang($lang);

    return 1;
}

## Old name: SetLang()
sub set_lang {
    my $self  = shift;
    my @langs = @_;
    my $locale;

    foreach my $lang (@langs) {
        # Canonicalize lang.
        # Note: 'en' is always allowed.  Use 'en-US' and so on to provide NLS
        # for English.
        next unless $lang = canonic_lang($lang);

        # Try to set POSIX locale and gettext locale, and get lang actually
        # set.
        # Note: Macrolanguage 'zh', 'zh-Hans' or 'zh-Hant' may fallback to
        # lang with available region.
        if ($locale = _resolve_gettext_locale(lang2locale($lang))) {
            ($lang) =
                grep { lang2locale($_) eq $locale } implicated_langs($lang);
        } elsif ($lang =~ /^zh\b/) {
            my @rr;
            if ($lang =~ /^zh-Hans\b/) {
                @rr = qw(CN SG  HK MO TW);    # try simp. first
            } elsif ($lang =~ /^zh-Hant\b/) {
                @rr = qw(HK MO TW  CN SG);    # try trad. first
            } else {
                @rr = qw(CN HK MO SG TW);
            }
            foreach my $rr (@rr) {
                $lang = "zh-$rr";
                last if $locale = _resolve_gettext_locale(lang2locale($lang));
            }
        }

        next unless $locale and $lang;

        # The locale is the gettext catalog name; lang is the IETF language
        # tag.  Ex: locale = pt_BR ; lang = pt-BR
        # locale_numeric and locale_time are POSIX locales for LC_NUMERIC and
        # POSIX::LC_TIME catogories, respectively.  As of 6.2b, they became
        # optional:
        # If setting each of them failed, 'C' locale will be set.
        $self->{lang}   = $lang;
        $self->{locale} = $locale;
        $self->{locale_numeric} =
            _find_posix_locale(POSIX::LC_NUMERIC(), $locale)
            || 'C';
        $self->{locale_time} = _find_posix_locale(POSIX::LC_TIME(), $locale)
            || 'C';

        return $lang;
    }

    return;
}

## Trys to set gettext locale and returns actually set locale.
## Mandatory parameter is gettext locale name.
sub _resolve_gettext_locale {
    my $locale = shift or die 'missing locale parameter';

    # 'en' is always allowed.
    return $locale if $locale eq 'en';

    # Workaround:
    # - "nb" and "nn" are recommended not to have "_NO" region suffix:
    #   Both of them are official languages in Norway.  However, current Sympa
    #   provides "nb_NO" NLS catalog.
    $locale =~ s/^(nb|nn)\b/${1}_NO/;

    ## Check if catalog is loaded.
    local %ENV;
    $ENV{'LANGUAGE'} = $locale;
    my $metadata = Locale::Messages::gettext('');    # get header

    unless ($metadata) {
        ## If a sub-locale of 'en' (en-CA, en@shaw, ...) failed, fallback to
        ## 'en'.  Otherwise fails.
        if ($locale =~ /^en(?![a-z])/) {
            $locale = 'en';
        } else {
            return;
        }
    } elsif ($metadata =~ /(?:\A|\n)Language:\s*([\@\w]+)/i) {
        ## Get precise name of gettext locale if possible.
        $locale = $1;
    }

    ## Workaround for "nb" and "nn": See above.
    $locale =~ s/^(nb|nn)_NO\b/$1/;

    return $locale;
}

# Trys to set POSIX locale which affects to strftime, sprintf etc.
sub _find_posix_locale {
    my $type   = shift;
    my $locale = shift;

    # Special case: 'en' is an alias of 'C' locale.  Use 'en_US' and so on for
    # real English.
    return 'C' if $locale eq 'en';

    my $orig_locale = POSIX::setlocale($type);

    ## From "ll@modifier", gets "ll", "ll_RR" and "@modifier".
    my ($loc, $mod) = split /(?=\@)/, $locale, 2;
    my $machloc = $loc;
    $machloc =~ s/^([a-z]{2,3})(?!_)/$lang2oldlocale{$1} || $1/e;
    $mod ||= '';

    ## Set POSIX locale
    my $posix_locale;
    my @try;

    ## Add codeset.
    ## UpperCase required for FreeBSD; dashless required on HP-UX;
    ## null codeset is last resort.
    foreach my $cs ('.utf-8', '.UTF-8', '.utf8', '') {
        ## Truncate locale similarly in gettext: full locale, and omit
        ## region then modifier.
        push @try,
            map { sprintf $_, $cs }
            ("$machloc%s$mod", "$loc%s$mod", "$loc%s");
    }
    foreach my $try (@try) {
        if (POSIX::setlocale($type, $try)) {
            $posix_locale = $try;
            last;
        }
    }

    POSIX::setlocale($type, $orig_locale);

    return $posix_locale;
}

## Old name: GetLangName()
## Note: Optional $lang argument was deprecated.
sub native_name {
    my $self = shift;
    die 'extra argument(s)' if @_;
    my $name;

    unless ($self->{lang} and $self->{lang} ne 'en') {
        $name = 'English';
    } else {
        ## Workaround for nb/nn.
        my $locale = $self->{locale};
        $locale =~ s/^(nb|nn)\b/${1}_NO/;

        local %ENV;
        $ENV{'LANGUAGE'} = $locale;
        my $metadata = Locale::Messages::gettext('');    # get header

        if ($metadata =~ /(?:\A|\n)Language-Team:\s*(.+)/i) {
            $name = $1;
            $name =~ s/\s*\<\S+\>//;
        }
    }

    return (defined $name and $name =~ /\S/) ? $name : '';
}

## Old name: GetLang()
sub get_lang {
    my $self = shift;
    return $self->{lang} || 'en';    # the last resort
}

# DEPRECATED: use Conf::lang2charset().
# sub GetCharset;

## DEPRECATED: Use canonic_lang().
## sub Locale2Lang;

# Internal function.
# Convert language tag to gettext locale name.
sub lang2locale {
    my $lang = shift;
    my $locale;
    my @subtags;

    ## unknown format.
    return unless @subtags = canonic_lang($lang);

    ## convert from "ll-Scri-RR" to "ll_RR@scriptname", or
    ## from "ll-RR-variant" to "ll_RR@variant".
    $locale = $subtags[0];
    if ($subtags[2]) {
        $locale .= '_' . $subtags[2];
    }
    if ($subtags[1]) {
        $locale .= '@' . ($script2modifier{$subtags[1]} || $subtags[1]);
    } elsif ($subtags[3]) {
        $locale .= '@' . $subtags[3];
    }

    return $locale;
}

# Internal function.
# Get language tag from old-style "locale".
# Note: Old name is Locale2Lang().
# Note: Use canonic_lang().
sub _oldlocale2lang {
    my $oldlocale = shift;
    my @parts = split /[\W_]/, $oldlocale;
    my $lang;

    if ($lang = {reverse %lang2oldlocale}->{$oldlocale}) {
        return $lang;
    } elsif (scalar @parts > 1 and length $parts[1]) {
        return join '-', lc $parts[0], uc $parts[1];
    } else {
        return lc $parts[0];
    }
}

# Convert language tag to old style "locale".
# Note: This function in earlier releases was named Lang2Locale().
sub lang2oldlocale {
    my $lang = shift;
    my $oldlocale;
    my @subtags;

    ## unknown format.
    return unless @subtags = canonic_lang($lang);

    ## 'zh-Hans' and 'zh-Hant' cannot map to useful POSIX locale.  Map them to
    ## 'zh_CN' and 'zh_TW'.
    ## 'zh' cannot map.
    if ($subtags[0] eq 'zh' and $subtags[1] and not $subtags[2]) {
        if ($subtags[1] eq 'Hans') {
            $subtags[2] = 'CN';
        } elsif ($subtags[1] eq 'Hant') {
            $subtags[2] = 'TW';
        }
    }

    unless ($subtags[2]) {
        if ($lang2oldlocale{$subtags[0]}) {
            return $lang2oldlocale{$subtags[0]};
        }
    } else {
        return join '_', $subtags[0], $subtags[2];
    }
    ## unconvertible locale name
    return;
}

# Note: older name is sympa_dgettext().
sub dgettext {
    my $self       = shift;
    my $textdomain = shift;
    my $msgid      = shift;

    # Returns meta information on the catalog.
    # Note: currently, charset is always 'utf-8'; encoding won't be used.
    unless (defined $msgid) {
        return;
    } elsif ($msgid eq '') {    # prevents meta information to be returned
        return '';
    } elsif ($msgid eq '_language_') {
        return $self->native_name;
    } elsif ($msgid eq '_charset_') {
        return 'UTF-8';
    } elsif ($msgid eq '_encoding_') {
        return '8bit';
    }

    my $gettext_locale;
    unless ($self->{lang} and $self->{lang} ne 'en') {
        $gettext_locale = 'en_US';
    } else {
        $gettext_locale = $self->{locale};

        # Workaround for nb/nn.
        $gettext_locale =~ s/^(nb|nn)\b/${1}_NO/;
    }

    local %ENV;
    $ENV{'LANGUAGE'} = $gettext_locale;
    return Locale::Messages::dgettext($textdomain, $msgid);
}

sub gettext {
    my $self  = shift;
    my $msgid = shift;

    return $self->dgettext('', $msgid);
}

sub gettext_sprintf {
    my $self   = shift;
    my $format = shift;
    my @args   = @_;

    my $orig_locale = POSIX::setlocale(POSIX::LC_NUMERIC());

    ## if lang has not been set or 'en' is set, fallback to native sprintf().
    unless ($self->{lang} and $self->{lang} ne 'en') {
        POSIX::setlocale(POSIX::LC_NUMERIC(), 'C');
    } else {
        $format = $self->gettext($format);
        POSIX::setlocale(POSIX::LC_NUMERIC(), $self->{locale_numeric});
    }
    my $ret = sprintf($format, @args);

    POSIX::setlocale(POSIX::LC_NUMERIC(), $orig_locale);
    return $ret;
}

my %date_part_names = (
    '%a' => {
        'index'      => 6,
        'gettext_id' => 'Sun:Mon:Tue:Wed:Thu:Fri:Sat'
    },
    '%A' => {
        'index' => 6,
        'gettext_id' =>
            'Sunday:Monday:Tuesday:Wednesday:Thursday:Friday:Saturday'
    },
    '%b' => {
        'index'      => 4,
        'gettext_id' => 'Jan:Feb:Mar:Apr:May:Jun:Jul:Aug:Sep:Oct:Nov:Dec'
    },
    '%B' => {
        'index' => 4,
        'gettext_id' =>
            'January:February:March:April:May:June:July:August:September:October:November:December'
    },
    '%p' => {
        'index'      => 2,
        'gettext_id' => 'AM:PM'
    },
);

sub gettext_strftime {
    my $self   = shift;
    my $format = shift;
    my @args   = @_;

    my $orig_locale = POSIX::setlocale(POSIX::LC_TIME());

    ## if lang has not been set or 'en' is set, fallback to native
    ## POSIX::strftime().
    unless ($self->{lang} and $self->{lang} ne 'en') {
        POSIX::setlocale(POSIX::LC_TIME(), 'C');
    } else {
        $format = $self->gettext($format);

        ## If POSIX locale was not set, emulate format strings.
        unless ($self->{locale_time}
            and $self->{locale_time} ne 'C'
            and $self->{locale_time} ne 'POSIX') {
            my %names;
            foreach my $k (keys %date_part_names) {
                $names{$k} = [
                    split /:/,
                    $self->gettext($date_part_names{$k}->{'gettext_id'})
                ];
            }
            $format =~ s{(\%[EO]?.)}{
                my $index;
                if (    $names{$1}
                    and defined(
                        $index = $args[$date_part_names{$1}->{'index'}]
                    )
                    ) {
                    $index = ($index < 12) ? 0 : 1
                        if $1 eq '%p';
                    $names{$1}->[$index];
                } else {
                    $1;
                }
            }eg;
        }

        POSIX::setlocale(POSIX::LC_TIME(), $self->{locale_time});
    }
    my $ret = POSIX::strftime($format, @args);
    Encode::_utf8_off($ret);

    POSIX::setlocale(POSIX::LC_TIME(), $orig_locale);
    return $ret;
}

sub maketext {
    my $self       = shift;
    my $textdomain = shift;
    my $template   = shift;
    my @args       = @_;

    my $orig_locale = POSIX::setlocale(POSIX::LC_NUMERIC());

    unless ($self->{lang} and $self->{lang} ne 'en') {
        POSIX::setlocale(POSIX::LC_NUMERIC(), 'C');
    } else {
        $template = $self->dgettext($textdomain, $template);
        POSIX::setlocale(POSIX::LC_NUMERIC(), $self->{locale_numeric});
    }
    my $ret = $template;
    # replace parameters in string
    $ret =~ s/[%]([%]|\d+)/($1 eq '%') ? '%' : $args[$1 - 1]/eg;

    POSIX::setlocale(POSIX::LC_NUMERIC(), $orig_locale);
    return $ret;
}

1;
__END__

=encoding utf-8

=head1 NAME

Sympa::Language - Handling languages and locales

=head1 SYNOPSIS

  use Sympa::Language;
  my $language = Sympa::Language->instance;
  $language->set_lang('zh-TW', 'zh', 'en');
  
  print $language->gettext('Lorem ipsum dolor sit amet.');

=head1 DESCRIPTION

This package provides interfaces for i18n (internationalization) of Sympa.

The language tags are used to determine each language.
A language tag consists of one or more subtags: language, script, region and
variant.  Below are some examples.

=over 4

=item *

C<ar> - Arabic language

=item *

C<ain> - Ainu language

=item *

C<pt-BR> - Portuguese language in Brazil

=item *

C<be-Latn> - Belarusian language in Latin script

=item *

C<ca-ES-valencia> - Valencian variant of Catalan

=back

Other two sorts of identifiers are derived from language tags:
gettext locales and POSIX locales.

The gettext locales determine each translation catalog.
It consists of one to three parts: language, territory and modifier.
For example, their equivalents of language tags above are C<ar>, C<ain>,
C<pt_BR>, C<be@latin> and C<ca_ES@valencia>, respectively.

The POSIX locales determine each I<locale>.  They have similar forms to
gettext locales and are used by this package internally.

=head2 Functions

=head3 Manipulating language tags

=over 4

=item canonic_lang ( $lang )

I<Function>.
Canonicalizes language tag according to RFC 5646 (BCP 47) and returns it.

Parameter:

=over

=item $lang

Language tag or similar thing.
Old style "locale" by Sympa (see also L</Compatibility>) will also be
accepted.

=back

Returns:

Canonicalized language tag.
In array context, returns an array
C<(I<language>, I<script>, I<region>, I<variant>)>.
For malformed inputs, returns C<undef> or empty array.

See L</CAVEATS> about details on format.

=item implicated_langs ( $lang, ... )

I<Function>.
Gets a list of each language $lang itself and its "super" languages.
For example:
If C<'tyv-Latn-MN'> is given, this function returns
C<('tyv-Latn-MN', 'tyv-Latn', 'tyv')>.

Parameters:

=over

=item $lang, ...

Language tags or similar things.
They will be canonicalized by L</canonic_lang>()
and malformed inputs will be ignored.

=back

Returns:

A list of implicated languages, if any.
If no $lang arguments were given, this function will die.

=item lang2locale ( $lang )

I<Function>, I<internal use>.
Convert language tag to gettext locale name
(see also L</"Native language support (NLS)">).
This function may be useful if you want to know internal information such as
name of catalog file.

Parameter:

=over

=item $lang

Language tag or similar thing.

=back

Returns:

The gettext locale name.
For malformed inputs returns C<undef>.

=item negotiate_lang ( $string, $lang, ... )

I<Function>.
Get the best language according to the content of C<Accept-Language:> HTTP
request header field.

Parameters:

=over

=item $string

Content of the header.  If it is false value, C<'*'> is assumed.

=item $lang, ...

Acceptable languages.

=back

Returns:

The best language or, if negotiation failed, C<undef>.

=back

=head3 Compatibility

As of Sympa 6.2b, language tags are used to specify languages along with
locales.  Earlier releases used POSIX locale names.

These functions are used to migrate data structures and configurations of
earlier versions.

=over 4

=item lang2oldlocale ( $lang )

I<Function>.
Convert language tag to old-style "locale".

Parameter:

=over

=item $lang

Language tag or similar thing.

=back

Returns:

Old-style "locale".
If corresponding locale could not be determined, returns C<undef>.

Note:
In earlier releases this function was named Lang2Locale()
(don't confuse with L</lang2locale>()).

=back

=head2 Methods

=over 4

=item instance ( )

I<Constructor>.
Gets the singleton instance of L<Sympa::Language> class.

=back

=head3 Getting/setting language context

=over 4

=item push_lang ( [ $lang, ... ] )

I<Instance method>.
Set current language by L</set_lang>() keeping the previous one;
it can be restored with L</pop_lang>().

Parameter:

=over

=item $lang, ...

Language tags or similar things.

=back

Returns:

Always C<1>.

=item pop_lang

I<Instance method>.
Restores previous language.

Parameters:

None.

Returns:

Always C<1>.

=item set_lang ( [ $lang, ... ] )

I<Instance method>.
Sets current language along with translation catalog,
and POSIX locale if possible.

Parameter:

=over

=item $lang, ...

Language tags or similar things.
Old style "locale" by Sympa (see also L</Compatibility>) will also be
accepted.
If multiple tags are specified, this function tries each of them in order.

Note that C<'en'> will always succeed.  Thus, putting it at the end of
argument list may be useful.

=back

Returns:

Canonic language tag actually set or, if no usable catalogs were found,
C<undef>.  If no arguments are given, do nothing and returns C<undef>.

Note that the language actually set may not be identical to the parameter
$lang, even when latter has been canonicalized.

The language tag C<'en'> is special:
It is used to set C<'C'> locale and will succeed always.

Note:
This function of Sympa 6.2a or earlier returned old style "locale" names.

=item native_name ( )

I<Instance method>.
Get the name of the language, i.e. the one defined in the catalog.

Parameters:

None.

Returns:

Name of the language in native notation.
If it was not found, returns an empty string C<''>.

Note:
The name is the content of C<Language-Team:> field in the header of catalog.

=item get_lang ()

I<Instance method>.
Get current language tag.

Parameters:

None.

Returns:

Current language.
If it is not known, returns default language tag.

=back

=head3 Native language support (NLS)

=over 4

=item dgettext ( $domain, $msgid )

I<Instance method>.
Returns the translation of given string using NLS catalog in domain $domain.
Note that L</set_lang>() must be called in advance.

Parameter:

=over

=item $domain

gettext domain.

=item $msgid

gettext message ID.

=back

Returns:

Translated string or, if it wasn't found, original string.

=item gettext ( $msgid )

I<Instance method>.
Returns the translation of given string using current NLS catalog.
Note that L</set_lang>() must be called in advance.

Parameter:

=over

=item $msgid

gettext message ID.

=back

Returns:

Translated string or, if it wasn't found, original string.

If special argument C<'_language_'> is given,
returns the name of language in native form (See L<native_name>()).
For argument C<''> returns empty string.

=item gettext_sprintf ( $format, $args, ... )

I<Instance method>.
Internationalized L<sprintf>().
At first, translates $format argument using L</gettext>().
Then returns formatted string by remainder of arguments.

This is equivalent to C<sprintf( gettext($format), $args, ... )>
with appropriate POSIX locale if possible.

Parameters:

=over

=item $format

Format string.
See also L<perlfunc/sprintf>.

=item $args, ...

Arguments fed to sprintf().

=back

Returns:

Translated and formatted string.

=item gettext_strftime ( $format, $args, ... )

I<Instance method>.
Internationalized L<strftime|POSIX/strftime>().
At first, translates $format argument using L</gettext>().
Then returns formatted date/time by remainder of arguments.

If appropriate POSIX locale is not available, parts of result (names of days,
months etc.) will be taken from the catalog.

Parameters:

=over

=item $format

Format string.
See also L<POSIX/strftime>.

=item $args, ...

Arguments fed to POSIX::strftime().

=back

Returns:

Translated and formatted string.

=item maketext ( $textdomain, $template, $args, ... )

I<Instance method>.
At first, translates $template argument using L</gettext>().
Then replaces placeholders (C<%1>, C<%2>, ...) in template with arguments.

Numeric arguments will be formatted using appropriate locale, if any:
Typically, the decimal point specific to each locale may be used.

Parameters:

=over

=item $textdomain

NLS domain to be used for searching catalogs.

=item $template

Template string which may include placeholders.

=item $args, ...

Arguments corresponding to placeholders.

=back

Returns:

Translated and replaced string.

=back


B<Note>:

Calls of L</gettext>(), L</gettext_sprintf>() and L</gettext_strftime>() are 
extracted during packaging process and are added to translation catalog.

=head1 CAVEATS

=over

=item *

We impose some restrictions and modifications to the format described in
BCP 47:
language extension subtags won't be supported;
if script and variant subtags co-exist, latter will be ignored;
the first one of multiple variant subtags will be used;
each variant subtag may be longer than eight characters;
extension subtags are not supported.

=item *

Since catalogs for C<zh>, C<zh-Hans> or C<zh-Hant> may not be provided,
L</set_lang>() will choose approximate catalogs for these tags.

=back

=head1 SEE ALSO

RFC 5646 I<Tags for Identifying Languages>.
L<http://tools.ietf.org/html/rfc5646>.

I<Translating Sympa>.
L<https://translate.sympa.org/pages/help>.

=head1 HISTORY

L<Language> module supporting multiple languages by single installation
and using NLS catalog in msgcat format appeared on Sympa 3.0a.

Sympa 4.2b.3 adopted gettext portable object (PO) catalog and POSIX locale.

On Sympa 6.2, rewritten module L<Sympa::Language> adopted BCP 47 language tag
to determine language context, and installing POSIX locale became optional.