Date: Thu, 16 Jul 2009 22:30:12 +0000 (UTC) From: Edwin Groothuis <edwin@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r195730 - user/edwin/locale/tools Message-ID: <200907162230.n6GMUCMo044652@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: edwin Date: Thu Jul 16 22:30:11 2009 New Revision: 195730 URL: http://svn.freebsd.org/changeset/base/195730 Log: For in src/tools/tools/locale: tools/charmaps.xml - datafile with the languages, countries and encodings. tools/cldr2def.pl - convertor from the CLDR data. tools/charmaps.pm - interface between the XML data file and the perl script. Added: user/edwin/locale/tools/ user/edwin/locale/tools/charmaps.pm user/edwin/locale/tools/charmaps.xml user/edwin/locale/tools/cldr2def.pl (contents, props changed) Added: user/edwin/locale/tools/charmaps.pm ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/edwin/locale/tools/charmaps.pm Thu Jul 16 22:30:11 2009 (r195730) @@ -0,0 +1,99 @@ +#!/usr/local/bin/perl -w + +use strict; +use XML::Parser; +use Data::Dumper; + +my %data = (); +my %d = (); +my $index = -1; + +sub get_xmldata { + open(FIN, "charmaps.xml"); + my @xml = <FIN>; + chomp(@xml); + close(FIN); + + my $xml = new XML::Parser(Handlers => { + Start => \&h_start, + End => \&h_end, + Char => \&h_char + }); + $xml->parse(join("", @xml)); + return %d; +} + +sub h_start { + my $expat = shift; + my $element = shift; + my @attrs = @_; + my %attrs = (); + + + while ($#attrs >= 0) { + $attrs{$attrs[0]} = $attrs[1]; + shift(@attrs); + shift(@attrs); + } + + $data{element}{++$index} = $element; + + if ($element eq "language") { + my $name = $attrs{name}; + my $encoding = $attrs{encoding}; + my $countries = $attrs{countries}; + my $family = $attrs{family}; + my $f = defined $attrs{family} ? $attrs{family} : "x"; + my $link = $attrs{link}; + my $fallback = $attrs{fallback}; + + $d{L}{$name}{$f}{fallback} = $fallback; + $d{L}{$name}{$f}{link} = $link; + $d{L}{$name}{$f}{family} = $family; + $d{L}{$name}{$f}{encoding} = $encoding; + $d{L}{$name}{$f}{countries} = $countries; + foreach my $c (split(" ", $countries)) { + if (defined $encoding) { + foreach my $e (split(" ", $encoding)) { + $d{L}{$name}{$f}{data}{$c}{$e} = undef; + } + } + $d{L}{$name}{$f}{data}{$c}{"UTF-8"} = undef; + } + return; + } + + if ($element eq "translation") { + if (defined $attrs{hex}) { + my $k = "<" . $attrs{cldr} . ">"; + my $hs = $attrs{hex}; + $d{T}{$attrs{encoding}}{$k} = ""; + while ($hs ne "") { + $d{T}{$attrs{encoding}}{$k} .= + chr(hex(substr($hs, 0, 2))); + $hs = substr($hs, 2); + } + } + if (defined $attrs{string}) { + $d{T}{$attrs{encoding}}{"<" . $attrs{cldr} . ">"} = + $attrs{string}; + } + return; + } +} + +sub h_end { + my $expat = shift; + my $element = shift; + $index--; +} + +sub h_char { + my $expat = shift; + my $string = shift; +} + +#use Data::Dumper; +#my %D = get_xmldata(); +#print Dumper(%D); +1; Added: user/edwin/locale/tools/charmaps.xml ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/edwin/locale/tools/charmaps.xml Thu Jul 16 22:30:11 2009 (r195730) @@ -0,0 +1,237 @@ +<data> +<languages> + <!-- Attributes known: + name = langugage name - only one + encoding = encodings to be done - seperated by space + countries = countries to create - seperated by space + family = which font family - only one + link = only with family, create this original file too - only one + fallback = read this file if name_countries doesn't exist - only one + + By default, the name of the input file is name_countries. + If family is defined, the name of the input file will be name_family_countries. + If fallback is defined, the name of the input file will be that. + + By default, the name of the output file is name_countries. + If family is defined, the name of the output file will be name_family_countries. + For backwards compatibility you use link which is name_countries. + + --> + <language name="af" + encoding="ISO8859-1 ISO8859-15" + countries="ZA" /> + <language name="am" + countries="ET" /> <!-- UTF-8 only --> + <language name="be" + encoding="CP1131 CP1251 ISO8859-5" + countries="BY" /> + <language name="bg" + encoding="CP1251" + countries="BG" /> + <language name="ca" + fallback="ca_ES" + encoding="ISO8859-1 ISO8859-15" + countries="AD ES FR IT" /> <!-- Not defined for anything else --> + <language name="cs" + encoding="ISO8859-2" + countries="CZ" /> + <language name="da" + encoding="ISO8859-1 ISO8859-15" + countries="DK" /> + <language name="de" + encoding="ISO8859-1 ISO8859-15" + countries="AT CH DE" /> + <language name="el" + encoding="ISO8859-7" + countries="GR" /> + <language name="en" + encoding="ISO8859-1 ISO8859-15 US-ASCII" + countries="AU CA GB NZ US" /> + <language name="en" + countries="IE" /> + <language name="es" + encoding="ISO8859-1 ISO8859-15" + countries="ES" /> + <language name="et" + encoding="ISO8859-15" + countries="EE" /> + <language name="eu" + encoding="ISO8859-1 ISO8859-15" + countries="ES" /> + <language name="fi" + encoding="ISO8859-1 ISO8859-15" + countries="FI" /> + <language name="fr" + encoding="ISO8859-1 ISO8859-15" + countries="BE CA CH FR" /> + <language name="he" + countries="IL" /> + <language name="hi" + encoding="ISCII-DEV" + countries="IN" /> + <language name="hr" + encoding="ISO8859-2" + countries="HR" /> + <language name="hu" + encoding="ISO8859-2" + countries="HU" /> + <language name="hy" + encoding="ARMSCII-8" + countries="AM" /> + <language name="is" + encoding="ISO8859-1 ISO8859-15" + countries="IS" /> + <language name="it" + encoding="ISO8859-1 ISO8859-15" + countries="CH IT" /> + <language name="ja" + link="jp_JP" + encoding="SJIS eucJP" + countries="JP" /> + <language name="kk" + family="Cyrl" + link="kk_KZ" + encoding="PT154" + countries="KZ" /> + <language name="ko" + encoding="CP949 eucKR" + countries="KR" /> + <language name="la" + encoding="ISO8859-1 ISO8859-2 ISO8859-4 ISO8859-15 US-ASCII" + countries="LN" /> + <language name="lt" + encoding="ISO8859-4 ISO8859-13" + countries="LT" /> + <language name="mn" + family="Cyrl" + link="mn_MN" + countries="MN" /> + <language name="nb" + link="no_NO" + encoding="ISO8859-1 ISO8859-15" + countries="NO" /> + <language name="nl" + encoding="ISO8859-1 ISO8859-15" + countries="BE NL" /> + <language name="nn" + encoding="ISO8859-1 ISO8859-15" + countries="NO" /> + <language name="pl" + encoding="ISO8859-2" + countries="PL" /> + <language name="pt" + encoding="ISO8859-1 ISO8859-15" + countries="PT" /> + <language name="ro" + encoding="ISO8859-2" + countries="RO" /> + <language name="ru" + encoding="CP1251 CP866 ISO8859-5 KOI8-R" + countries="RU" /> + <language name="sk" + encoding="ISO8859-2" + countries="SK" /> + <language name="sl" + encoding="ISO8859-2" + countries="SI" /> + <language name="sr" + family="Latn" + link="sr_YU" + encoding="ISO8859-2" + countries="RS" /> + <language name="sr" + family="Cyrl" + link="sr_YU" + encoding="ISO8859-5" + countries="RS" /> + <language name="sv" + encoding="ISO8859-1 ISO8859-15" + countries="SE" /> + <language name="tr" + encoding="ISO8859-9" + countries="TR" /> + <language name="uk" + encoding="CP1251 ISO8859-5 KOI8-U" + countries="UA" /> + <language name="zh" + family="Hans" + link="zh_CN" + encoding="GB18030 GB2312 GBK eucCN" + countries="CN" /> + <language name="zh" + family="Hant" + link="zh_HK" + encoding="Big5HKSCS" + countries="HK" /> + <language name="zh" + family="Hant" + link="zh_TW" + encoding="Big5" + countries="TW" /> +</languages> + +<translations> + <!-- These don't have a special Euro sign so just use Eu for it --> + <translation encoding="ISO8859-1" cldr="EURO_SIGN" string="Eu" /> + <translation encoding="ISO8859-2" cldr="EURO_SIGN" string="Eu" /> + + <!-- These don't have a special Kow sign so just use KRW for it --> + <translation encoding="CP949" cldr="WON_SIGN" hex="5C" /> + <translation encoding="eucKR" cldr="WON_SIGN" hex="5C" /> + + <!-- Minus and dashes --> + <translation encoding="ISO8859-1" cldr="MINUS_SIGN" string="-" /> + <translation encoding="ISO8859-4" cldr="MINUS_SIGN" string="-" /> + <translation encoding="ISO8859-13" cldr="MINUS_SIGN" string="-" /> + <translation encoding="ISO8859-15" cldr="MINUS_SIGN" string="-" /> + <translation encoding="ISO8859-2" cldr="EN_DASH" string="-" /> + + <!-- Copied from the original FreeBSD src/share/monetdef --> + <translation encoding="CP1251" cldr="HRYVNIA_SIGN" hex="E3F0ED" /> + <translation encoding="ISO8859-5" cldr="HRYVNIA_SIGN" hex="D3E0DD" /> + <translation encoding="KOI8-U" cldr="HRYVNIA_SIGN" hex="C7D2CE" /> + + <!-- Value found in http://en.wikipedia.org/wiki/Pound_sign --> + <translation encoding="US-ASCII" cldr="POUND_SIGN" hex="A3" /> + + <!-- Values found in http://en.wikipedia.org/wiki/Ya_(Cyrillic) --> + <translation encoding="CP1251" cldr="CYRILLIC_SMALL_LETTER_YA" hex="FF" /> + <translation encoding="ISO8859-5" cldr="CYRILLIC_SMALL_LETTER_YA" hex="EF" /> + <translation encoding="KOI8-U" cldr="CYRILLIC_SMALL_LETTER_YA" hex="D1" /> + <!-- Values found in http://en.wikipedia.org/wiki/Cyrillic_characters_in_Unicode --> + <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_A" string="A" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_A" string="a" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_KA" string="k" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_O" string="o" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_DE" string="D" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_DE" string="d" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_IE" string="E" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_IE" string="e" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_CAPITAL_LETTER_EN" string="N" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EN" string="n" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_BE" string="b" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_ER" string="r" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_JE" string="j" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_EL" string="l" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_GHE" string="g" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_PE" string="p" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_TE" string="t" /> + <translation encoding="ISO8859-2" cldr="CYRILLIC_SMALL_LETTER_VE" string="v" /> + + <!-- Not sure why these ones aren't known by libiconv --> + <translation encoding="ISO8859-2" cldr="a" string="a" /> + <translation encoding="ISO8859-2" cldr="d" string="d" /> + <translation encoding="ISO8859-2" cldr="e" string="e" /> + <translation encoding="ISO8859-2" cldr="i" string="i" /> + <translation encoding="ISO8859-2" cldr="n" string="n" /> + <translation encoding="ISO8859-2" cldr="r" string="r" /> + + <translation encoding="ISO8859-5" cldr="t" string="t" /> + <translation encoding="ISO8859-5" cldr="k" string="k" /> + + <!-- Just a . ? --> + <translation encoding="ISO8859-2" cldr="FULL_STOP" string="." /> + <translation encoding="ARMSCII-8" cldr="ONE_DOT_LEADER" string="." /> + +</translations> +</data> Added: user/edwin/locale/tools/cldr2def.pl ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/edwin/locale/tools/cldr2def.pl Thu Jul 16 22:30:11 2009 (r195730) @@ -0,0 +1,527 @@ +#!/usr/bin/perl -wC + +use strict; +use XML::Parser; +use Text::Iconv; +use Tie::IxHash; +use Data::Dumper; +use Digest::SHA qw(sha1_hex); +require "charmaps.pm"; + +if ($#ARGV < 2) { + print "Usage: $0 <cldrdir> <charmaps> <type> [la_CC]\n"; + exit(1); +} + +my $DEFENCODING = "UTF-8"; +my $DIR = shift(@ARGV); +my $CHARMAPS = shift(@ARGV); +my $TYPE = shift(@ARGV); +my $doonly = shift(@ARGV); +my @filter = (); + +my %convertors = (); + +my %values = (); +my %hashtable = (); +my %languages = (); +my %translations = (); +get_languages(); + +my %cm = (); +get_utf8map(); + +my %keys = (); +tie(%keys, "Tie::IxHash"); +tie(%hashtable, "Tie::IxHash"); + +my %FILESNAMES = ( + "monetdef" => "LC_MONETARY", + "timedef" => "LC_TIME", + "msgdef" => "LC_MESSAGES", + "numericdef" => "LC_NUMERIC" +); + +my %callback = ( + mdorder => \&callback_mdorder, +); + +my %DESC = ( + + # numericdef + "decimal_point" => "decimal_point", + "thousands_sep" => "thousands_sep", + "grouping" => "grouping", + + # monetdef + "int_curr_symbol" => "int_curr_symbol (last character always " . + "SPACE)", + "currency_symbol" => "currency_symbol", + "mon_decimal_point" => "mon_decimal_point", + "mon_thousands_sep" => "mon_thousands_sep", + "mon_grouping" => "mon_grouping", + "positive_sign" => "positive_sign", + "negative_sign" => "negative_sign", + "int_frac_digits" => "int_frac_digits", + "frac_digits" => "frac_digits", + "p_cs_precedes" => "p_cs_precedes", + "p_sep_by_space" => "p_sep_by_space", + "n_cs_precedes" => "n_cs_precedes", + "n_sep_by_space" => "n_sep_by_space", + "p_sign_posn" => "p_sign_posn", + "n_sign_posn" => "n_sign_posn", + + # msgdef + "yesexpr" => "yesexpr", + "noexpr" => "noexpr", + "yesstr" => "yesstr", + "nostr" => "nostr", + + # timedef + "abmon" => "Short month names", + "mon" => "Long month names (as in a date)", + "abday" => "Short weekday names", + "day" => "Long weekday names", + "t_fmt" => "X_fmt", + "d_fmt" => "x_fmt", + "XXX" => "c_fmt", + "am_pm" => "AM/PM", + "d_t_fmt" => "date_fmt", + "mon2" => "Long month names (without case ending)", + "md_order" => "md_order", + "t_fmt_ampm" => "ampm_fmt", + +); + +if ($TYPE eq "numericdef") { + %keys = ( + "decimal_point" => "s", + "thousands_sep" => "s", + "grouping" => "ai", + ); + get_fields(); + print_fields(); + make_makefile(); +} + +if ($TYPE eq "monetdef") { + %keys = ( + "int_curr_symbol" => "s", + "currency_symbol" => "s", + "mon_decimal_point" => "s", + "mon_thousands_sep" => "s", + "mon_grouping" => "ai", + "positive_sign" => "s", + "negative_sign" => "s", + "int_frac_digits" => "i", + "frac_digits" => "i", + "p_cs_precedes" => "i", + "p_sep_by_space" => "i", + "n_cs_precedes" => "i", + "n_sep_by_space" => "i", + "p_sign_posn" => "i", + "n_sign_posn" => "i" + ); + get_fields(); + print_fields(); + make_makefile(); +} + +if ($TYPE eq "msgdef") { + %keys = ( + "yesexpr" => "s", + "noexpr" => "s", + "yesstr" => "s", + "nostr" => "s" + ); + get_fields(); + print_fields(); + make_makefile(); +} + +if ($TYPE eq "timedef") { + %keys = ( + "abmon" => "as", + "mon" => "as", + "abday" => "as", + "day" => "as", + "t_fmt" => "s", + "d_fmt" => "s", + "XXX" => "s", + "am_pm" => "as", + "d_fmt" => "s", + "d_t_fmt" => "s", + "mon2" => ">mon", # repeat them for now + "md_order" => "<mdorder<d_fmt<s", + "t_fmt_ampm" => "s", + ); + get_fields(); + print_fields(); + make_makefile(); +} + +sub callback_mdorder { + my $s = shift; + return undef if (!defined $s); + $s =~ s/[^dm]//g; + return $s; +}; + +############################ + +sub get_utf8map { + open(FIN, "$DIR/posix/$DEFENCODING.cm"); + my @lines = <FIN>; + close(FIN); + chomp(@lines); + my $incharmap = 0; + foreach my $l (@lines) { + $l =~ s/\r//; + next if ($l =~ /^\#/); + next if ($l eq ""); + if ($l eq "CHARMAP") { + $incharmap = 1; + next; + } + next if (!$incharmap); + last if ($l eq "END CHARMAP"); + $l =~ /^([^\s]+)\s+(.*)/; + my $k = $1; + my $v = $2; + $v =~ s/\\x//g; + $cm{$k} = $v; + } +} + +sub get_languages { + my %data = get_xmldata($CHARMAPS); + %languages = %{$data{L}}; + %translations = %{$data{T}}; + + return if (!defined $doonly); + + my @a = split(/_/, $doonly); + if ($#a == 1) { + $filter[0] = $a[0]; + $filter[1] = "x"; + $filter[2] = $a[1]; + } elsif ($#a == 2) { + $filter[0] = $a[0]; + $filter[1] = $a[1]; + $filter[2] = $a[2]; + } + + print Dumper(@filter); + return; +} + +sub get_fields { + foreach my $l (sort keys(%languages)) { + foreach my $f (sort keys(%{$languages{$l}})) { + foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { + next if ($#filter == 2 && ($filter[0] ne $l + || $filter[1] ne $f || $filter[2] ne $c)); + + $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread + my $file; + $file = $l . "_"; + $file .= $f . "_" if ($f ne "x"); + $file .= $c; + if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) { + if (!defined $languages{$l}{$f}{fallback}) { + print STDERR + "Cannot open $file.$DEFENCODING.src\n"; + next; + } + $file = $languages{$l}{$f}{fallback}; + if (!open(FIN, "$DIR/posix/$file.$DEFENCODING.src")) { + print STDERR + "Cannot open fallback " . + "$file.$DEFENCODING.src\n"; + next; + } + } + print "Reading from $file.$DEFENCODING.src for ${l}_${f}_${c}\n"; + $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read + my @lines = <FIN>; + chomp(@lines); + close(FIN); + my $continue = 0; + foreach my $k (keys(%keys)) { + foreach my $line (@lines) { + $line =~ s/\r//; + next if (!$continue && $line !~ /^$k\s/); + if ($continue) { + $line =~ s/^\s+//; + } else { + $line =~ s/^$k\s+//; + } + + $values{$l}{$c}{$k} = "" + if (!defined $values{$l}{$c}{$k}); + + $continue = ($line =~ /\/$/); + $line =~ s/\/$// if ($continue); + $values{$l}{$c}{$k} .= $line; + + last if (!$continue); + } + } + } + } + } +} + +sub decodecldr { + my $s = shift; + my $v = $cm{$s}; + + return pack("C", hex($v)) if (length($v) == 2); + return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) + if (length($v) == 4); + return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), + hex(substr($v, 4, 2))) if (length($v) == 6); + return "length = " . length($v); +} + +sub translate { + my $enc = shift; + my $v = shift; + + return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); + return undef; +} + +sub print_fields { + foreach my $l (sort keys(%languages)) { + foreach my $f (sort keys(%{$languages{$l}})) { + foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { + next if ($#filter == 2 && ($filter[0] ne $l + || $filter[1] ne $f || $filter[2] ne $c)); + foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { + if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { + print "Skipping ${l}_" . + ($f eq "x" ? "" : "${f}_") . + "${c} - not read\n"; + next; + } + my $file = $l; + $file .= "_" . $f if ($f ne "x"); + $file .= "_" . $c; + print "Writing to $file in $enc\n"; + + eval { + $convertors{$enc} = + Text::Iconv->new($DEFENCODING, $enc); + } if (!defined $convertors{$enc}); + if (!defined $convertors{$enc}) { + print "Failed! Cannot convert between " . + "$DEFENCODING and $enc.\n"; + next; + }; + + open(FOUT, ">$TYPE/$file.$enc.new"); + my $okay = 1; + my $output = ""; + print FOUT <<EOF; +# \$FreeBSD\$ +# +# Warning: Do not edit. This file is automatically generated from the +# tools in /usr/src/tools/tools/locale. The data is obtained from the +# CLDR project, obtained from http://cldr.unicode.org/ +# +# ${l}_$c in $enc +# +# ----------------------------------------------------------------------------- +EOF + foreach my $k (keys(%keys)) { + my $f = $keys{$k}; + + die("Unknown $k in \%DESC") + if (!defined $DESC{$k}); + + $output .= "#\n# $DESC{$k}\n"; + + if ($f =~ /^>/) { + $k = substr($f, 1); + $f = $keys{$k}; + } + if ($f =~ /^\</) { + my @a = split(/\</, substr($f, 1)); + my $rv = + &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); + $values{$l}{$c}{$k} = $rv; + $f = $a[2]; + } + + my $v = $values{$l}{$c}{$k}; + $v = "undef" if (!defined $v); + + if ($f eq "i") { + $output .= "$v\n"; + next; + } + if ($f eq "ai") { + $output .= "$v\n"; + next; + } + if ($f eq "s") { + $v =~ s/^"//; + $v =~ s/"$//; + my $cm = ""; + while ($v =~ /^(.*?)(<.*?>)(.*)/) { + $cm = $2; + $v = $1 . decodecldr($2) . $3; + } + my $fv = + $convertors{$enc}->convert("$v"); + $fv = translate($enc, $cm) + if (!defined $fv); + if (!defined $fv) { + print STDERR + "Could not convert $k " . + "($cm) from $DEFENCODING " . + "to $enc\n"; + $okay = 0; + next; + } + $output .= "$fv\n"; + next; + } + if ($f eq "as") { + foreach my $v (split(/;/, $v)) { + $v =~ s/^"//; + $v =~ s/"$//; + my $cm = ""; + while ($v =~ /^(.*?)(<.*?>)(.*)/) { + $cm = $2; + $v = $1 . + decodecldr($2) . $3; + } + my $fv = + $convertors{$enc}->convert("$v"); + $fv = translate($enc, $cm) + if (!defined $fv); + if (!defined $fv) { + print STDERR + "Could not " . + "convert $k ($cm)" . + " from " . + "$DEFENCODING to " . + "$enc\n"; + $okay = 0; + next; + } + $output .= "$fv\n"; + } + next; + } + + die("$k is '$f'"); + + } + + $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); + $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; + print FOUT "$output# EOF\n"; + close(FOUT); + + if ($okay) { + rename("$TYPE/$file.$enc.new", + "$TYPE/$file.$enc.src"); + } else { + rename("$TYPE/$file.$enc.new", + "$TYPE/$file.$enc.failed"); + } + } + } + } + } +} + +sub make_makefile { + return if ($#filter > -1); + print "Creating Makefile for $TYPE\n"; + open(FOUT, ">$TYPE/Makefile"); + print FOUT <<EOF; +# +# \$FreeBSD\$ +# +# Warning: Do not edit. This file is automatically generated from the +# tools in /usr/src/tools/tools/locale. +# + +LOCALEDIR= /usr/share/locale +FILESNAME= $FILESNAMES{$TYPE} +.SUFFIXES: .src .out + +.src.out: + grep -v '^\#' < \${.IMPSRC} > \${.TARGET} + +EOF + + foreach my $hash (keys(%hashtable)) { + my @files = sort(keys(%{$hashtable{$hash}})); + if ($#files > 0) { + my $link = shift(@files); + $link =~ s/_x_/_/; # strip family if none there + foreach my $file (@files) { + my @a = split(/_/, $file); + my @b = split(/\./, $a[-1]); + $file =~ s/_x_/_/; + print FOUT "SAME+=\t\t$link:$file\t#hash\n"; + undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); + } + } + } + + foreach my $l (sort keys(%languages)) { + foreach my $f (sort keys(%{$languages{$l}})) { + foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { + next if ($#filter == 2 && ($filter[0] ne $l + || $filter[1] ne $f || $filter[2] ne $c)); + foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { + my $file = $l . "_"; + $file .= $f . "_" if ($f ne "x"); + $file .= $c; + next if (!defined $languages{$l}{$f}{data}{$c}{$e}); + print FOUT "LOCALES+=\t$file.$e\n"; + } + + if (defined $languages{$l}{$f}{link}) { + foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { + my $file = $l . "_"; + $file .= $f . "_" if ($f ne "x"); + $file .= $c; + print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{link}.$e\t# legacy\n"; + + } + + } + + } + } + } + + print FOUT <<EOF; + +FILES= \${LOCALES:S/\$/.out/} +CLEANFILES= \${FILES} + +.for f in \${SAME} +SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://} +.endfor + +.for f in \${LOCALES} +FILESDIR_\${f}.out= \${LOCALEDIR}/\${f} +.endfor + + +src: + ./cldr2def.pl /home/edwin/cldr/1.7.0/ charmaps.xml timedef nl_NL + +.include <bsd.prog.mk> +EOF + + close(FOUT); +}
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200907162230.n6GMUCMo044652>