1226 lines
30 KiB
Perl
1226 lines
30 KiB
Perl
# -*- indent-tabs-mode: nil; -*-
|
|
# vim:ft=perl:et:sw=4
|
|
# $Id$
|
|
|
|
# Sympa - SYsteme de Multi-Postage Automatique
|
|
#
|
|
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
|
|
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
|
|
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
|
|
# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
|
|
# Copyright 2017, 2018 The Sympa Community. See the AUTHORS.md file at the
|
|
# top-level directory of this distribution and at
|
|
# <https://github.com/sympa-community/sympa.git>.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package Sympa::Language;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use base qw(Class::Singleton);
|
|
|
|
use Encode qw();
|
|
use Locale::Messages;
|
|
use POSIX qw();
|
|
|
|
use Sympa::Constants;
|
|
|
|
BEGIN {
|
|
## Using the Pure Perl implementation of gettext
|
|
## This is required on Solaris : native implementation of gettext does not
|
|
## map ll_RR with ll.
|
|
# libintl-perl 1.22 (virtually 1.23) or later is recommended to use
|
|
# 'gettext_dumb' package which is independent from POSIX locale. If older
|
|
# version is used. falls back to 'gettext_pp'.
|
|
my $package = Locale::Messages->select_package('gettext_dumb');
|
|
Locale::Messages->select_package('gettext_pp')
|
|
unless $package and $package eq 'gettext_dumb';
|
|
## Workaround: Prevent from searching catalogs in /usr/share/locale.
|
|
undef $Locale::gettext_pp::__gettext_pp_default_dir;
|
|
|
|
## Define what catalogs are used
|
|
Locale::Messages::bindtextdomain(sympa => Sympa::Constants::LOCALEDIR);
|
|
Locale::Messages::bindtextdomain(web_help => Sympa::Constants::LOCALEDIR);
|
|
Locale::Messages::textdomain('sympa');
|
|
## Get translations by internal encoding.
|
|
Locale::Messages::bind_textdomain_codeset(sympa => 'utf-8');
|
|
Locale::Messages::bind_textdomain_codeset(web_help => 'utf-8');
|
|
}
|
|
|
|
# Constructor for Class::Singleton.
|
|
sub _new_instance {
|
|
my $class = shift;
|
|
my $self = $class->SUPER::_new_instance();
|
|
|
|
## Initialize lang/locale.
|
|
$self->set_lang('en');
|
|
return $self;
|
|
}
|
|
|
|
## The map to get language tag from older non-POSIX locale naming.
|
|
my %language_equiv = (
|
|
'cn' => 'zh-CN',
|
|
'tw' => 'zh-TW',
|
|
'cz' => 'cs',
|
|
'us' => 'en-US',
|
|
);
|
|
|
|
## The map to get appropriate POSIX locale name from language code.
|
|
## Why this is required is that on many systems locales often have canonic
|
|
## "ll_RR.ENCODING" names only. n.b. This format can not express all
|
|
## languages in proper way, e.g. Common Arabic ("ar"), Esperanto ("eo").
|
|
##
|
|
## This map is also used to convert old-style Sympa "locales" to language
|
|
## tags ('en' is special case. cf. set_lang()).
|
|
my %lang2oldlocale = (
|
|
'af' => 'af_ZA',
|
|
'ar' => 'ar_SY',
|
|
'br' => 'br_FR',
|
|
'bg' => 'bg_BG',
|
|
'ca' => 'ca_ES',
|
|
'cs' => 'cs_CZ',
|
|
'de' => 'de_DE',
|
|
'el' => 'el_GR',
|
|
'es' => 'es_ES',
|
|
'et' => 'et_EE',
|
|
'eu' => 'eu_ES',
|
|
'fi' => 'fi_FI',
|
|
'fr' => 'fr_FR',
|
|
'gl' => 'gl_ES',
|
|
'hu' => 'hu_HU',
|
|
'id' => 'id_ID',
|
|
'it' => 'it_IT',
|
|
'ja' => 'ja_JP',
|
|
'ko' => 'ko_KR',
|
|
'la' => 'la_VA', # from OpenOffice.org
|
|
'ml' => 'ml_IN',
|
|
'nb' => 'nb_NO',
|
|
'nn' => 'nn_NO',
|
|
'nl' => 'nl_NL',
|
|
'oc' => 'oc_FR',
|
|
'pl' => 'pl_PL',
|
|
'pt' => 'pt_PT',
|
|
'rm' => 'rm_CH', # CLDR
|
|
'ro' => 'ro_RO',
|
|
'ru' => 'ru_RU',
|
|
'sv' => 'sv_SE',
|
|
'tr' => 'tr_TR',
|
|
'vi' => 'vi_VN',
|
|
);
|
|
|
|
## Regexp for old style canonical locale used by Sympa-6.2a or earlier.
|
|
my $oldlocale_re = qr/^([a-z]{2})_([A-Z]{2})(?![A-Z])/i;
|
|
|
|
## Regexp for IETF language tag described in RFC 5646 (BCP 47), modified.
|
|
my $language_tag_re = qr/^
|
|
([a-z]{2}(?:-[a-z]{3}){1,3} | [a-z]{2,3}) # language (and ext.)
|
|
(?:-([a-z]{4}))? # script
|
|
(?:-([a-z]{2}))? # region (no UN M.49)
|
|
(?:-( # variant
|
|
(?:[a-z0-9]{5,} | [0-9][a-z0-9]{3,})
|
|
(?:-[a-z0-9]{5,} | -[0-9][a-z0-9]{3,})*
|
|
))?
|
|
$/ix;
|
|
|
|
## A tiny subset of script codes and gettext modifier names.
|
|
## Keys are ISO 15924 script codes (Titlecased, four characters).
|
|
## Values are property value aliases standardised by Unicode Consortium
|
|
## (lowercased). cf. <http://www.unicode.org/iso15924/iso15924-codes.html>.
|
|
my %script2modifier = (
|
|
'Arab' => 'arabic',
|
|
'Cyrl' => 'cyrillic',
|
|
'Deva' => 'devanagari',
|
|
'Dsrt' => 'deseret',
|
|
'Glag' => 'glagolitic',
|
|
'Grek' => 'greek',
|
|
'Guru' => 'gurmukhi',
|
|
'Hebr' => 'hebrew',
|
|
'Latn' => 'latin',
|
|
'Mong' => 'mongolian',
|
|
'Shaw' => 'shaw', # found in Debian "en@shaw" locale.
|
|
'Tfng' => 'tifinagh',
|
|
);
|
|
|
|
sub canonic_lang {
|
|
my $lang = shift;
|
|
return unless $lang;
|
|
|
|
## Compatibility: older non-POSIX locale names.
|
|
if ($language_equiv{$lang}) {
|
|
$lang = $language_equiv{$lang};
|
|
}
|
|
## Compatibility: names used as "lang" or "locale" by Sympa <= 6.2a.
|
|
elsif ($lang =~ $oldlocale_re) {
|
|
$lang = _oldlocale2lang(lc($1) . '_' . uc($2));
|
|
}
|
|
|
|
my @subtags;
|
|
|
|
# unknown format.
|
|
return unless @subtags = ($lang =~ $language_tag_re);
|
|
|
|
## Canonicalize cases of subtags: ll-ext-Scri-RR-variant-...
|
|
$subtags[0] = lc $subtags[0];
|
|
$subtags[1] =~ s/^(\w)(\w+)/uc($1) . lc($2)/e if $subtags[1];
|
|
$subtags[2] = uc $subtags[2] if $subtags[2];
|
|
$subtags[3] = lc $subtags[3] if $subtags[3];
|
|
|
|
##XXX Maybe more canonicalizations here.
|
|
|
|
## Check subtags,
|
|
# won't support language extension subtags.
|
|
return unless $subtags[0] =~ /^[a-z]{2,3}$/;
|
|
|
|
# won't allow multiple variant subtags.
|
|
$subtags[3] =~ s/-.+// if $subtags[3];
|
|
|
|
##XXX Maybe more checks here.
|
|
|
|
return @subtags if wantarray;
|
|
return join '-', grep {$_} @subtags;
|
|
}
|
|
|
|
sub implicated_langs {
|
|
my @langs = @_;
|
|
die 'missing langs parameter' unless @langs;
|
|
|
|
my @implicated_langs = ();
|
|
|
|
foreach my $lang (@langs) {
|
|
my @subtags = canonic_lang($lang);
|
|
while (@subtags) {
|
|
my $l = join '-', grep {$_} @subtags;
|
|
@implicated_langs = ((grep { $_ ne $l } @implicated_langs), $l);
|
|
|
|
## Workaround:
|
|
## - "zh-Hans-CN", "zh-Hant-TW", ... may occasionally be
|
|
## identified with "zh-CN", "zh-TW" etc. Add them to
|
|
## implication list.
|
|
if ($l =~ /^zh-(Hans|Hant)-[A-Z]{2}\b/) {
|
|
$l = join '-', grep {$_} @subtags[0, 2 .. $#subtags];
|
|
@implicated_langs =
|
|
((grep { $_ ne $l } @implicated_langs), $l);
|
|
}
|
|
|
|
1 until pop @subtags;
|
|
}
|
|
}
|
|
|
|
return @implicated_langs;
|
|
}
|
|
|
|
## Parses content of HTTP 1.1 Accept-Charset, Accept-Encoding or
|
|
## Accept-Language request header field.
|
|
## Returns an array of arrayrefs [ITEM, WEIGHT].
|
|
##
|
|
## NOTE: This might be moved to utility package such as tools.pm.
|
|
sub parse_http_accept_string {
|
|
my $accept_string = shift || '';
|
|
|
|
$accept_string =~ s/^\s+//;
|
|
$accept_string =~ s/\s+$//;
|
|
$accept_string ||= '*';
|
|
my @pairs = split /\s*,\s*/, $accept_string;
|
|
|
|
my @ret = ();
|
|
foreach my $pair (@pairs) {
|
|
my ($item, $weight) = split /\s*;\s*/, $pair, 2;
|
|
if ( defined $weight
|
|
and $weight =~ s/^q\s*=\s*//i
|
|
and $weight =~ /^(\d+(\.\d*)?|\.\d+)$/) {
|
|
$weight += 0.0;
|
|
} else {
|
|
$weight = 1.0;
|
|
}
|
|
push @ret, [$item => $weight];
|
|
}
|
|
return @ret;
|
|
}
|
|
|
|
sub negotiate_lang {
|
|
my $accept_string = shift || '*';
|
|
my @supported_languages = grep {$_} map { split /[\s,]+/, $_ } @_;
|
|
|
|
## parse Accept-Language: header field.
|
|
## unknown languages are ignored.
|
|
my @accept_languages =
|
|
grep { $_->[0] eq '*' or $_->[0] = canonic_lang($_->[0]) }
|
|
parse_http_accept_string($accept_string);
|
|
return unless @accept_languages;
|
|
|
|
## try to find the best language.
|
|
my $best_lang = undef;
|
|
my $best_weight = 0.0;
|
|
foreach my $supported_lang (@supported_languages) {
|
|
my @supported_pfxs = implicated_langs($supported_lang);
|
|
foreach my $pair (@accept_languages) {
|
|
my ($accept_lang, $weight) = @$pair;
|
|
if ($accept_lang eq '*'
|
|
or grep { $accept_lang eq $_ } @supported_pfxs) {
|
|
unless ($best_lang and $weight <= $best_weight) {
|
|
$best_lang = $supported_pfxs[0]; # canonic form
|
|
$best_weight = $weight;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $best_lang;
|
|
}
|
|
|
|
##sub GetSupportedLanguages {
|
|
##DEPRECATED: use Sympa::get_supported_languages().
|
|
## Supported languages are defined by 'supported_lang' sympa.conf parameter.
|
|
|
|
## Old name: PushLang()
|
|
sub push_lang {
|
|
my $self = shift;
|
|
my @langs = @_;
|
|
|
|
push @{$self->{previous_lang}}, $self->get_lang;
|
|
$self->set_lang(@langs);
|
|
|
|
return 1;
|
|
}
|
|
|
|
## Old name: PopLang()
|
|
sub pop_lang {
|
|
my $self = shift;
|
|
|
|
die 'calling pop_lang() without push_lang()'
|
|
unless @{$self->{previous_lang}};
|
|
my $lang = pop @{$self->{previous_lang}};
|
|
$self->set_lang($lang);
|
|
|
|
return 1;
|
|
}
|
|
|
|
## Old name: SetLang()
|
|
sub set_lang {
|
|
my $self = shift;
|
|
my @langs = @_;
|
|
my $locale;
|
|
|
|
foreach my $lang (@langs) {
|
|
# Canonicalize lang.
|
|
# Note: 'en' is always allowed. Use 'en-US' and so on to provide NLS
|
|
# for English.
|
|
next unless $lang = canonic_lang($lang);
|
|
|
|
# Try to set POSIX locale and gettext locale, and get lang actually
|
|
# set.
|
|
# Note: Macrolanguage 'zh', 'zh-Hans' or 'zh-Hant' may fallback to
|
|
# lang with available region.
|
|
if ($locale = _resolve_gettext_locale(lang2locale($lang))) {
|
|
($lang) =
|
|
grep { lang2locale($_) eq $locale } implicated_langs($lang);
|
|
} elsif ($lang =~ /^zh\b/) {
|
|
my @rr;
|
|
if ($lang =~ /^zh-Hans\b/) {
|
|
@rr = qw(CN SG HK MO TW); # try simp. first
|
|
} elsif ($lang =~ /^zh-Hant\b/) {
|
|
@rr = qw(HK MO TW CN SG); # try trad. first
|
|
} else {
|
|
@rr = qw(CN HK MO SG TW);
|
|
}
|
|
foreach my $rr (@rr) {
|
|
$lang = "zh-$rr";
|
|
last if $locale = _resolve_gettext_locale(lang2locale($lang));
|
|
}
|
|
}
|
|
|
|
next unless $locale and $lang;
|
|
|
|
# The locale is the gettext catalog name; lang is the IETF language
|
|
# tag. Ex: locale = pt_BR ; lang = pt-BR
|
|
# locale_numeric and locale_time are POSIX locales for LC_NUMERIC and
|
|
# POSIX::LC_TIME catogories, respectively. As of 6.2b, they became
|
|
# optional:
|
|
# If setting each of them failed, 'C' locale will be set.
|
|
$self->{lang} = $lang;
|
|
$self->{locale} = $locale;
|
|
$self->{locale_numeric} =
|
|
_find_posix_locale(POSIX::LC_NUMERIC(), $locale)
|
|
|| 'C';
|
|
$self->{locale_time} = _find_posix_locale(POSIX::LC_TIME(), $locale)
|
|
|| 'C';
|
|
|
|
return $lang;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
## Trys to set gettext locale and returns actually set locale.
|
|
## Mandatory parameter is gettext locale name.
|
|
sub _resolve_gettext_locale {
|
|
my $locale = shift or die 'missing locale parameter';
|
|
|
|
# 'en' is always allowed.
|
|
return $locale if $locale eq 'en';
|
|
|
|
# Workaround:
|
|
# - "nb" and "nn" are recommended not to have "_NO" region suffix:
|
|
# Both of them are official languages in Norway. However, current Sympa
|
|
# provides "nb_NO" NLS catalog.
|
|
$locale =~ s/^(nb|nn)\b/${1}_NO/;
|
|
|
|
## Check if catalog is loaded.
|
|
local %ENV;
|
|
$ENV{'LANGUAGE'} = $locale;
|
|
my $metadata = Locale::Messages::gettext(''); # get header
|
|
|
|
unless ($metadata) {
|
|
## If a sub-locale of 'en' (en-CA, en@shaw, ...) failed, fallback to
|
|
## 'en'. Otherwise fails.
|
|
if ($locale =~ /^en(?![a-z])/) {
|
|
$locale = 'en';
|
|
} else {
|
|
return;
|
|
}
|
|
} elsif ($metadata =~ /(?:\A|\n)Language:\s*([\@\w]+)/i) {
|
|
## Get precise name of gettext locale if possible.
|
|
$locale = $1;
|
|
}
|
|
|
|
## Workaround for "nb" and "nn": See above.
|
|
$locale =~ s/^(nb|nn)_NO\b/$1/;
|
|
|
|
return $locale;
|
|
}
|
|
|
|
# Trys to set POSIX locale which affects to strftime, sprintf etc.
|
|
sub _find_posix_locale {
|
|
my $type = shift;
|
|
my $locale = shift;
|
|
|
|
# Special case: 'en' is an alias of 'C' locale. Use 'en_US' and so on for
|
|
# real English.
|
|
return 'C' if $locale eq 'en';
|
|
|
|
my $orig_locale = POSIX::setlocale($type);
|
|
|
|
## From "ll@modifier", gets "ll", "ll_RR" and "@modifier".
|
|
my ($loc, $mod) = split /(?=\@)/, $locale, 2;
|
|
my $machloc = $loc;
|
|
$machloc =~ s/^([a-z]{2,3})(?!_)/$lang2oldlocale{$1} || $1/e;
|
|
$mod ||= '';
|
|
|
|
## Set POSIX locale
|
|
my $posix_locale;
|
|
my @try;
|
|
|
|
## Add codeset.
|
|
## UpperCase required for FreeBSD; dashless required on HP-UX;
|
|
## null codeset is last resort.
|
|
foreach my $cs ('.utf-8', '.UTF-8', '.utf8', '') {
|
|
## Truncate locale similarly in gettext: full locale, and omit
|
|
## region then modifier.
|
|
push @try,
|
|
map { sprintf $_, $cs }
|
|
("$machloc%s$mod", "$loc%s$mod", "$loc%s");
|
|
}
|
|
foreach my $try (@try) {
|
|
if (POSIX::setlocale($type, $try)) {
|
|
$posix_locale = $try;
|
|
last;
|
|
}
|
|
}
|
|
|
|
POSIX::setlocale($type, $orig_locale);
|
|
|
|
return $posix_locale;
|
|
}
|
|
|
|
## Old name: GetLangName()
|
|
## Note: Optional $lang argument was deprecated.
|
|
sub native_name {
|
|
my $self = shift;
|
|
die 'extra argument(s)' if @_;
|
|
my $name;
|
|
|
|
unless ($self->{lang} and $self->{lang} ne 'en') {
|
|
$name = 'English';
|
|
} else {
|
|
## Workaround for nb/nn.
|
|
my $locale = $self->{locale};
|
|
$locale =~ s/^(nb|nn)\b/${1}_NO/;
|
|
|
|
local %ENV;
|
|
$ENV{'LANGUAGE'} = $locale;
|
|
my $metadata = Locale::Messages::gettext(''); # get header
|
|
|
|
if ($metadata =~ /(?:\A|\n)Language-Team:\s*(.+)/i) {
|
|
$name = $1;
|
|
$name =~ s/\s*\<\S+\>//;
|
|
}
|
|
}
|
|
|
|
return (defined $name and $name =~ /\S/) ? $name : '';
|
|
}
|
|
|
|
## Old name: GetLang()
|
|
sub get_lang {
|
|
my $self = shift;
|
|
return $self->{lang} || 'en'; # the last resort
|
|
}
|
|
|
|
# DEPRECATED: use Conf::lang2charset().
|
|
# sub GetCharset;
|
|
|
|
## DEPRECATED: Use canonic_lang().
|
|
## sub Locale2Lang;
|
|
|
|
# Internal function.
|
|
# Convert language tag to gettext locale name.
|
|
sub lang2locale {
|
|
my $lang = shift;
|
|
my $locale;
|
|
my @subtags;
|
|
|
|
## unknown format.
|
|
return unless @subtags = canonic_lang($lang);
|
|
|
|
## convert from "ll-Scri-RR" to "ll_RR@scriptname", or
|
|
## from "ll-RR-variant" to "ll_RR@variant".
|
|
$locale = $subtags[0];
|
|
if ($subtags[2]) {
|
|
$locale .= '_' . $subtags[2];
|
|
}
|
|
if ($subtags[1]) {
|
|
$locale .= '@' . ($script2modifier{$subtags[1]} || $subtags[1]);
|
|
} elsif ($subtags[3]) {
|
|
$locale .= '@' . $subtags[3];
|
|
}
|
|
|
|
return $locale;
|
|
}
|
|
|
|
# Internal function.
|
|
# Get language tag from old-style "locale".
|
|
# Note: Old name is Locale2Lang().
|
|
# Note: Use canonic_lang().
|
|
sub _oldlocale2lang {
|
|
my $oldlocale = shift;
|
|
my @parts = split /[\W_]/, $oldlocale;
|
|
my $lang;
|
|
|
|
if ($lang = {reverse %lang2oldlocale}->{$oldlocale}) {
|
|
return $lang;
|
|
} elsif (scalar @parts > 1 and length $parts[1]) {
|
|
return join '-', lc $parts[0], uc $parts[1];
|
|
} else {
|
|
return lc $parts[0];
|
|
}
|
|
}
|
|
|
|
# Convert language tag to old style "locale".
|
|
# Note: This function in earlier releases was named Lang2Locale().
|
|
sub lang2oldlocale {
|
|
my $lang = shift;
|
|
my $oldlocale;
|
|
my @subtags;
|
|
|
|
## unknown format.
|
|
return unless @subtags = canonic_lang($lang);
|
|
|
|
## 'zh-Hans' and 'zh-Hant' cannot map to useful POSIX locale. Map them to
|
|
## 'zh_CN' and 'zh_TW'.
|
|
## 'zh' cannot map.
|
|
if ($subtags[0] eq 'zh' and $subtags[1] and not $subtags[2]) {
|
|
if ($subtags[1] eq 'Hans') {
|
|
$subtags[2] = 'CN';
|
|
} elsif ($subtags[1] eq 'Hant') {
|
|
$subtags[2] = 'TW';
|
|
}
|
|
}
|
|
|
|
unless ($subtags[2]) {
|
|
if ($lang2oldlocale{$subtags[0]}) {
|
|
return $lang2oldlocale{$subtags[0]};
|
|
}
|
|
} else {
|
|
return join '_', $subtags[0], $subtags[2];
|
|
}
|
|
## unconvertible locale name
|
|
return;
|
|
}
|
|
|
|
# Note: older name is sympa_dgettext().
|
|
sub dgettext {
|
|
my $self = shift;
|
|
my $textdomain = shift;
|
|
my $msgid = shift;
|
|
|
|
# Returns meta information on the catalog.
|
|
# Note: currently, charset is always 'utf-8'; encoding won't be used.
|
|
unless (defined $msgid) {
|
|
return;
|
|
} elsif ($msgid eq '') { # prevents meta information to be returned
|
|
return '';
|
|
} elsif ($msgid eq '_language_') {
|
|
return $self->native_name;
|
|
} elsif ($msgid eq '_charset_') {
|
|
return 'UTF-8';
|
|
} elsif ($msgid eq '_encoding_') {
|
|
return '8bit';
|
|
}
|
|
|
|
my $gettext_locale;
|
|
unless ($self->{lang} and $self->{lang} ne 'en') {
|
|
$gettext_locale = 'en_US';
|
|
} else {
|
|
$gettext_locale = $self->{locale};
|
|
|
|
# Workaround for nb/nn.
|
|
$gettext_locale =~ s/^(nb|nn)\b/${1}_NO/;
|
|
}
|
|
|
|
local %ENV;
|
|
$ENV{'LANGUAGE'} = $gettext_locale;
|
|
return Locale::Messages::dgettext($textdomain, $msgid);
|
|
}
|
|
|
|
sub gettext {
|
|
my $self = shift;
|
|
my $msgid = shift;
|
|
|
|
return $self->dgettext('', $msgid);
|
|
}
|
|
|
|
sub gettext_sprintf {
|
|
my $self = shift;
|
|
my $format = shift;
|
|
my @args = @_;
|
|
|
|
my $orig_locale = POSIX::setlocale(POSIX::LC_NUMERIC());
|
|
|
|
## if lang has not been set or 'en' is set, fallback to native sprintf().
|
|
unless ($self->{lang} and $self->{lang} ne 'en') {
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), 'C');
|
|
} else {
|
|
$format = $self->gettext($format);
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), $self->{locale_numeric});
|
|
}
|
|
my $ret = sprintf($format, @args);
|
|
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), $orig_locale);
|
|
return $ret;
|
|
}
|
|
|
|
my %date_part_names = (
|
|
'%a' => {
|
|
'index' => 6,
|
|
'gettext_id' => 'Sun:Mon:Tue:Wed:Thu:Fri:Sat'
|
|
},
|
|
'%A' => {
|
|
'index' => 6,
|
|
'gettext_id' =>
|
|
'Sunday:Monday:Tuesday:Wednesday:Thursday:Friday:Saturday'
|
|
},
|
|
'%b' => {
|
|
'index' => 4,
|
|
'gettext_id' => 'Jan:Feb:Mar:Apr:May:Jun:Jul:Aug:Sep:Oct:Nov:Dec'
|
|
},
|
|
'%B' => {
|
|
'index' => 4,
|
|
'gettext_id' =>
|
|
'January:February:March:April:May:June:July:August:September:October:November:December'
|
|
},
|
|
'%p' => {
|
|
'index' => 2,
|
|
'gettext_id' => 'AM:PM'
|
|
},
|
|
);
|
|
|
|
sub gettext_strftime {
|
|
my $self = shift;
|
|
my $format = shift;
|
|
my @args = @_;
|
|
|
|
my $orig_locale = POSIX::setlocale(POSIX::LC_TIME());
|
|
|
|
## if lang has not been set or 'en' is set, fallback to native
|
|
## POSIX::strftime().
|
|
unless ($self->{lang} and $self->{lang} ne 'en') {
|
|
POSIX::setlocale(POSIX::LC_TIME(), 'C');
|
|
} else {
|
|
$format = $self->gettext($format);
|
|
|
|
## If POSIX locale was not set, emulate format strings.
|
|
unless ($self->{locale_time}
|
|
and $self->{locale_time} ne 'C'
|
|
and $self->{locale_time} ne 'POSIX') {
|
|
my %names;
|
|
foreach my $k (keys %date_part_names) {
|
|
$names{$k} = [
|
|
split /:/,
|
|
$self->gettext($date_part_names{$k}->{'gettext_id'})
|
|
];
|
|
}
|
|
$format =~ s{(\%[EO]?.)}{
|
|
my $index;
|
|
if ( $names{$1}
|
|
and defined(
|
|
$index = $args[$date_part_names{$1}->{'index'}]
|
|
)
|
|
) {
|
|
$index = ($index < 12) ? 0 : 1
|
|
if $1 eq '%p';
|
|
$names{$1}->[$index];
|
|
} else {
|
|
$1;
|
|
}
|
|
}eg;
|
|
}
|
|
|
|
POSIX::setlocale(POSIX::LC_TIME(), $self->{locale_time});
|
|
}
|
|
my $ret = POSIX::strftime($format, @args);
|
|
Encode::_utf8_off($ret);
|
|
|
|
POSIX::setlocale(POSIX::LC_TIME(), $orig_locale);
|
|
return $ret;
|
|
}
|
|
|
|
sub maketext {
|
|
my $self = shift;
|
|
my $textdomain = shift;
|
|
my $template = shift;
|
|
my @args = @_;
|
|
|
|
my $orig_locale = POSIX::setlocale(POSIX::LC_NUMERIC());
|
|
|
|
unless ($self->{lang} and $self->{lang} ne 'en') {
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), 'C');
|
|
} else {
|
|
$template = $self->dgettext($textdomain, $template);
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), $self->{locale_numeric});
|
|
}
|
|
my $ret = $template;
|
|
# replace parameters in string
|
|
$ret =~ s/[%]([%]|\d+)/($1 eq '%') ? '%' : $args[$1 - 1]/eg;
|
|
|
|
POSIX::setlocale(POSIX::LC_NUMERIC(), $orig_locale);
|
|
return $ret;
|
|
}
|
|
|
|
1;
|
|
__END__
|
|
|
|
=encoding utf-8
|
|
|
|
=head1 NAME
|
|
|
|
Sympa::Language - Handling languages and locales
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Sympa::Language;
|
|
my $language = Sympa::Language->instance;
|
|
$language->set_lang('zh-TW', 'zh', 'en');
|
|
|
|
print $language->gettext('Lorem ipsum dolor sit amet.');
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This package provides interfaces for i18n (internationalization) of Sympa.
|
|
|
|
The language tags are used to determine each language.
|
|
A language tag consists of one or more subtags: language, script, region and
|
|
variant. Below are some examples.
|
|
|
|
=over 4
|
|
|
|
=item *
|
|
|
|
C<ar> - Arabic language
|
|
|
|
=item *
|
|
|
|
C<ain> - Ainu language
|
|
|
|
=item *
|
|
|
|
C<pt-BR> - Portuguese language in Brazil
|
|
|
|
=item *
|
|
|
|
C<be-Latn> - Belarusian language in Latin script
|
|
|
|
=item *
|
|
|
|
C<ca-ES-valencia> - Valencian variant of Catalan
|
|
|
|
=back
|
|
|
|
Other two sorts of identifiers are derived from language tags:
|
|
gettext locales and POSIX locales.
|
|
|
|
The gettext locales determine each translation catalog.
|
|
It consists of one to three parts: language, territory and modifier.
|
|
For example, their equivalents of language tags above are C<ar>, C<ain>,
|
|
C<pt_BR>, C<be@latin> and C<ca_ES@valencia>, respectively.
|
|
|
|
The POSIX locales determine each I<locale>. They have similar forms to
|
|
gettext locales and are used by this package internally.
|
|
|
|
=head2 Functions
|
|
|
|
=head3 Manipulating language tags
|
|
|
|
=over 4
|
|
|
|
=item canonic_lang ( $lang )
|
|
|
|
I<Function>.
|
|
Canonicalizes language tag according to RFC 5646 (BCP 47) and returns it.
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $lang
|
|
|
|
Language tag or similar thing.
|
|
Old style "locale" by Sympa (see also L</Compatibility>) will also be
|
|
accepted.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Canonicalized language tag.
|
|
In array context, returns an array
|
|
C<(I<language>, I<script>, I<region>, I<variant>)>.
|
|
For malformed inputs, returns C<undef> or empty array.
|
|
|
|
See L</CAVEATS> about details on format.
|
|
|
|
=item implicated_langs ( $lang, ... )
|
|
|
|
I<Function>.
|
|
Gets a list of each language $lang itself and its "super" languages.
|
|
For example:
|
|
If C<'tyv-Latn-MN'> is given, this function returns
|
|
C<('tyv-Latn-MN', 'tyv-Latn', 'tyv')>.
|
|
|
|
Parameters:
|
|
|
|
=over
|
|
|
|
=item $lang, ...
|
|
|
|
Language tags or similar things.
|
|
They will be canonicalized by L</canonic_lang>()
|
|
and malformed inputs will be ignored.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
A list of implicated languages, if any.
|
|
If no $lang arguments were given, this function will die.
|
|
|
|
=item lang2locale ( $lang )
|
|
|
|
I<Function>, I<internal use>.
|
|
Convert language tag to gettext locale name
|
|
(see also L</"Native language support (NLS)">).
|
|
This function may be useful if you want to know internal information such as
|
|
name of catalog file.
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $lang
|
|
|
|
Language tag or similar thing.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
The gettext locale name.
|
|
For malformed inputs returns C<undef>.
|
|
|
|
=item negotiate_lang ( $string, $lang, ... )
|
|
|
|
I<Function>.
|
|
Get the best language according to the content of C<Accept-Language:> HTTP
|
|
request header field.
|
|
|
|
Parameters:
|
|
|
|
=over
|
|
|
|
=item $string
|
|
|
|
Content of the header. If it is false value, C<'*'> is assumed.
|
|
|
|
=item $lang, ...
|
|
|
|
Acceptable languages.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
The best language or, if negotiation failed, C<undef>.
|
|
|
|
=back
|
|
|
|
=head3 Compatibility
|
|
|
|
As of Sympa 6.2b, language tags are used to specify languages along with
|
|
locales. Earlier releases used POSIX locale names.
|
|
|
|
These functions are used to migrate data structures and configurations of
|
|
earlier versions.
|
|
|
|
=over 4
|
|
|
|
=item lang2oldlocale ( $lang )
|
|
|
|
I<Function>.
|
|
Convert language tag to old-style "locale".
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $lang
|
|
|
|
Language tag or similar thing.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Old-style "locale".
|
|
If corresponding locale could not be determined, returns C<undef>.
|
|
|
|
Note:
|
|
In earlier releases this function was named Lang2Locale()
|
|
(don't confuse with L</lang2locale>()).
|
|
|
|
=back
|
|
|
|
=head2 Methods
|
|
|
|
=over 4
|
|
|
|
=item instance ( )
|
|
|
|
I<Constructor>.
|
|
Gets the singleton instance of L<Sympa::Language> class.
|
|
|
|
=back
|
|
|
|
=head3 Getting/setting language context
|
|
|
|
=over 4
|
|
|
|
=item push_lang ( [ $lang, ... ] )
|
|
|
|
I<Instance method>.
|
|
Set current language by L</set_lang>() keeping the previous one;
|
|
it can be restored with L</pop_lang>().
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $lang, ...
|
|
|
|
Language tags or similar things.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Always C<1>.
|
|
|
|
=item pop_lang
|
|
|
|
I<Instance method>.
|
|
Restores previous language.
|
|
|
|
Parameters:
|
|
|
|
None.
|
|
|
|
Returns:
|
|
|
|
Always C<1>.
|
|
|
|
=item set_lang ( [ $lang, ... ] )
|
|
|
|
I<Instance method>.
|
|
Sets current language along with translation catalog,
|
|
and POSIX locale if possible.
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $lang, ...
|
|
|
|
Language tags or similar things.
|
|
Old style "locale" by Sympa (see also L</Compatibility>) will also be
|
|
accepted.
|
|
If multiple tags are specified, this function tries each of them in order.
|
|
|
|
Note that C<'en'> will always succeed. Thus, putting it at the end of
|
|
argument list may be useful.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Canonic language tag actually set or, if no usable catalogs were found,
|
|
C<undef>. If no arguments are given, do nothing and returns C<undef>.
|
|
|
|
Note that the language actually set may not be identical to the parameter
|
|
$lang, even when latter has been canonicalized.
|
|
|
|
The language tag C<'en'> is special:
|
|
It is used to set C<'C'> locale and will succeed always.
|
|
|
|
Note:
|
|
This function of Sympa 6.2a or earlier returned old style "locale" names.
|
|
|
|
=item native_name ( )
|
|
|
|
I<Instance method>.
|
|
Get the name of the language, i.e. the one defined in the catalog.
|
|
|
|
Parameters:
|
|
|
|
None.
|
|
|
|
Returns:
|
|
|
|
Name of the language in native notation.
|
|
If it was not found, returns an empty string C<''>.
|
|
|
|
Note:
|
|
The name is the content of C<Language-Team:> field in the header of catalog.
|
|
|
|
=item get_lang ()
|
|
|
|
I<Instance method>.
|
|
Get current language tag.
|
|
|
|
Parameters:
|
|
|
|
None.
|
|
|
|
Returns:
|
|
|
|
Current language.
|
|
If it is not known, returns default language tag.
|
|
|
|
=back
|
|
|
|
=head3 Native language support (NLS)
|
|
|
|
=over 4
|
|
|
|
=item dgettext ( $domain, $msgid )
|
|
|
|
I<Instance method>.
|
|
Returns the translation of given string using NLS catalog in domain $domain.
|
|
Note that L</set_lang>() must be called in advance.
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $domain
|
|
|
|
gettext domain.
|
|
|
|
=item $msgid
|
|
|
|
gettext message ID.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Translated string or, if it wasn't found, original string.
|
|
|
|
=item gettext ( $msgid )
|
|
|
|
I<Instance method>.
|
|
Returns the translation of given string using current NLS catalog.
|
|
Note that L</set_lang>() must be called in advance.
|
|
|
|
Parameter:
|
|
|
|
=over
|
|
|
|
=item $msgid
|
|
|
|
gettext message ID.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Translated string or, if it wasn't found, original string.
|
|
|
|
If special argument C<'_language_'> is given,
|
|
returns the name of language in native form (See L<native_name>()).
|
|
For argument C<''> returns empty string.
|
|
|
|
=item gettext_sprintf ( $format, $args, ... )
|
|
|
|
I<Instance method>.
|
|
Internationalized L<sprintf>().
|
|
At first, translates $format argument using L</gettext>().
|
|
Then returns formatted string by remainder of arguments.
|
|
|
|
This is equivalent to C<sprintf( gettext($format), $args, ... )>
|
|
with appropriate POSIX locale if possible.
|
|
|
|
Parameters:
|
|
|
|
=over
|
|
|
|
=item $format
|
|
|
|
Format string.
|
|
See also L<perlfunc/sprintf>.
|
|
|
|
=item $args, ...
|
|
|
|
Arguments fed to sprintf().
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Translated and formatted string.
|
|
|
|
=item gettext_strftime ( $format, $args, ... )
|
|
|
|
I<Instance method>.
|
|
Internationalized L<strftime|POSIX/strftime>().
|
|
At first, translates $format argument using L</gettext>().
|
|
Then returns formatted date/time by remainder of arguments.
|
|
|
|
If appropriate POSIX locale is not available, parts of result (names of days,
|
|
months etc.) will be taken from the catalog.
|
|
|
|
Parameters:
|
|
|
|
=over
|
|
|
|
=item $format
|
|
|
|
Format string.
|
|
See also L<POSIX/strftime>.
|
|
|
|
=item $args, ...
|
|
|
|
Arguments fed to POSIX::strftime().
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Translated and formatted string.
|
|
|
|
=item maketext ( $textdomain, $template, $args, ... )
|
|
|
|
I<Instance method>.
|
|
At first, translates $template argument using L</gettext>().
|
|
Then replaces placeholders (C<%1>, C<%2>, ...) in template with arguments.
|
|
|
|
Numeric arguments will be formatted using appropriate locale, if any:
|
|
Typically, the decimal point specific to each locale may be used.
|
|
|
|
Parameters:
|
|
|
|
=over
|
|
|
|
=item $textdomain
|
|
|
|
NLS domain to be used for searching catalogs.
|
|
|
|
=item $template
|
|
|
|
Template string which may include placeholders.
|
|
|
|
=item $args, ...
|
|
|
|
Arguments corresponding to placeholders.
|
|
|
|
=back
|
|
|
|
Returns:
|
|
|
|
Translated and replaced string.
|
|
|
|
=back
|
|
|
|
|
|
B<Note>:
|
|
|
|
Calls of L</gettext>(), L</gettext_sprintf>() and L</gettext_strftime>() are
|
|
extracted during packaging process and are added to translation catalog.
|
|
|
|
=head1 CAVEATS
|
|
|
|
=over
|
|
|
|
=item *
|
|
|
|
We impose some restrictions and modifications to the format described in
|
|
BCP 47:
|
|
language extension subtags won't be supported;
|
|
if script and variant subtags co-exist, latter will be ignored;
|
|
the first one of multiple variant subtags will be used;
|
|
each variant subtag may be longer than eight characters;
|
|
extension subtags are not supported.
|
|
|
|
=item *
|
|
|
|
Since catalogs for C<zh>, C<zh-Hans> or C<zh-Hant> may not be provided,
|
|
L</set_lang>() will choose approximate catalogs for these tags.
|
|
|
|
=back
|
|
|
|
=head1 SEE ALSO
|
|
|
|
RFC 5646 I<Tags for Identifying Languages>.
|
|
L<http://tools.ietf.org/html/rfc5646>.
|
|
|
|
I<Translating Sympa>.
|
|
L<https://translate.sympa.org/pages/help>.
|
|
|
|
=head1 HISTORY
|
|
|
|
L<Language> module supporting multiple languages by single installation
|
|
and using NLS catalog in msgcat format appeared on Sympa 3.0a.
|
|
|
|
Sympa 4.2b.3 adopted gettext portable object (PO) catalog and POSIX locale.
|
|
|
|
On Sympa 6.2, rewritten module L<Sympa::Language> adopted BCP 47 language tag
|
|
to determine language context, and installing POSIX locale became optional.
|