Date: Wed, 29 Jul 2009 21:54:34 +0000 (UTC) From: Edwin Groothuis <edwin@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r195954 - user/edwin/locale/tools Message-ID: <200907292154.n6TLsYnT083423@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: edwin Date: Wed Jul 29 21:54:34 2009 New Revision: 195954 URL: http://svn.freebsd.org/changeset/base/195954 Log: Add small tool to convert UTF-8 encoded strings back into CLDR "markup" language. Added: user/edwin/locale/tools/UTF82encoding.pl (contents, props changed) Added: user/edwin/locale/tools/UTF82encoding.pl ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/edwin/locale/tools/UTF82encoding.pl Wed Jul 29 21:54:34 2009 (r195954) @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w + +use strict; +use Data::Dumper; + +open(FIN, "$ARGV[0]/posix/UTF-8.cm"); +my @lines = <FIN>; +chomp(@lines); +close(FIN); + +my %cm = (); +foreach my $line (@lines) { + next if ($line =~ /^#/); + next if ($line eq ""); + next if ($line !~ /^</); + + my @a = split(" ", $line); + next if ($#a != 1); + + $a[1] =~ s/\\x//g; + $cm{$a[1]} = $a[0]; +} + +print Dumper($cm{"4D"}), "\n"; + +open(FIN, $ARGV[1]); +@lines = <FIN>; +chomp(@lines); +close(FIN); + +foreach my $line (@lines) { + if ($line =~ /^#/) { + print "$line\n"; + next; + } + + my @l = split(//, $line); + for (my $i = 0; $i <= $#l; $i++) { + my $hex = sprintf("%X", ord($l[$i])); + if (defined $cm{$hex}) { + print $cm{$hex}; + next; + } + + $hex = sprintf("%X%X", ord($l[$i]), ord($l[$i + 1])); + if (defined $cm{$hex}) { + $i += 1; + print $cm{$hex}; + next; + } + + $hex = sprintf("%X%X%X", + ord($l[$i]), ord($l[$i + 1]), ord($l[$i + 2 ])); + if (defined $cm{$hex}) { + $i += 2; + print $cm{$hex}; + next; + } + + print "\n--$hex--\n"; + } + print "\n"; + +}
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200907292154.n6TLsYnT083423>