#!/pro/bin/perl use strict; use warnings; our $VERSION = "2.6-20120401"; binmode STDOUT, ":utf8"; use Module::CoreList; sub usage { my $err = shift and select STDERR; (my $pv = "$]0000000") =~ s{^([0-9]+)\.([0-9]{3})([0-9]{3})[0-9]*} {sprintf "%d.%d.%d",$1,$2,$3}e; (my $cv = $]) =~ s/0+$//; (my $uv = $Module::CoreList::version{$cv}{Unicode} // "unknown"); print "usage: uchar [-v] [-m base[:count] [ -m base[:count] ] ...\n", " uchar [-v] [-f] char ...\n", " perl $pv with Unicode $uv\n", "\n", " -m show map[s]\n", " -v verbosity\n", " -l list GBA characters\n", " -f find\n", " -F find (only chars supported in current font)\n", " -k show matching key combo(s)\n", " -d apply random diacricals\n", " -e show character encodings (uchar -e -f u_BREVE)\n", " -o also show octal version of encoding\n", " -E show character decodings (uchar -E fc)\n", " -b strip to base\n", " -h also show html entity if available\n"; @_ and print join "\n", @_, ""; exit $err; } # usage use Getopt::Long qw(:config nopermute bundling noignorecase); my @opt_m; my $opt_v = 0; my $opt_f = 0; my $opt_d = 0; my $opt_b = 0; my $opt_e = 0; my $opt_E = 0; my $opt_k = 0; my $opt_h = 0; my $opt_l = 0; my $opt_o = 0; GetOptions ( "help|?" => sub { usage (0); }, "v|version" => sub { print "$0: $VERSION\n"; exit 0; }, "m|map:s" => \@opt_m, "v|verbose:1" => \$opt_v, "f|find" => \$opt_f, "F|find-font" => sub { $opt_f = 3 }, "k|key|c|combo" => \$opt_k, "d|diac|random" => \$opt_d, "e|encodings" => \$opt_e, "o|oct|octal!" => \$opt_o, "E|decodings" => \$opt_E, "b|base|strip-base" => \$opt_b, "h|html" => \$opt_h, ) or usage (1); $opt_e and $opt_v ||= 1; use HTML::Entities; use charnames ":full"; use Encode qw(encode decode from_to); $ENV{LANG} //= $ENV{LC_ALL} // undef; my %compose; if ($ENV{LANG} && $^O eq "linux") { chomp (my @compose_files = grep { m{/Compose$} } `locate Compose`); my $compose_file = "/usr/share/X11/locale/$ENV{LANG}/Compose"; $compose_file && -s $compose_file or ($compose_file) = grep { m{/$ENV{LANG}/Compose$} } @compose_files; $compose_file && -s $compose_file or ($compose_file) = grep { m{/$ENV{LANG}.(?i:utf-?8)/Compose$} } @compose_files; foreach my $cfn ($compose_file, "$ENV{HOME}/.XCompose") { $cfn && open my $cf, "<", $cfn or next; while (<$cf>) { # Format 1 includes the Uxxxx notation if (my ($key, $ucp) = m/^\s*(.*?)\s*:\s*(?:".*?"\s+)[Uu]([0-9A-Fa-f]+)/) { $compose{sprintf "%05x", hex $ucp}{$key}++; next; } # Format 2 only has the string value if (my ($key, $str) = m/^\s*(.*?)\s*:\s*"([^"]+)"/) { $str = decode ("utf8", $str); $compose{sprintf "%05x", ord $str}{$key}++; next; } } close $cf; } } else { delete $ENV{LANG}; } my %xlat = ( ":)" => "\N{WHITE SMILING FACE}", ":(" => "\N{WHITE FROWNING FACE}", "->" => "\N{WHITE RIGHT POINTING INDEX}", "<-" => "\N{WHITE LEFT POINTING INDEX}", "<3" => "\N{BLACK HEART SUIT}", phone => "\N{WHITE TELEPHONE}", death => "\N{SKULL AND CROSSBONES}", euro => "\N{EURO SIGN}", heart => "\N{BLACK HEART SUIT}", ); @opt_m == 1 && !$opt_m[0] and @opt_m = qw( 00a0:df 2000:3f 20a0:1f 2140:1f 2190:1f 21c0:1f 2630:1f ); sub Names () { do "unicore/Name.pl"; } # Names my (%name, %cp, $n); for (split m/\n/ => Names ()) { s/\s+$//; my @n = split m/\t/ => $_, 3; @n == 2 and splice @n, 1, 0, 0; my ($cp, $cp2, $name) = @n; $name =~ m/[a-z]/ and next; # Non-character ($cp, $cp2) = map { hex "0$_" } ((split m/\s+/ => $cp), $cp2); $name{$cp} = $name; $cp{$name} //= $cp; } my %fcp = map { $_ => 1 } 0x20 .. 0x7f; if ($opt_d || $opt_f > 1) { if (my $font = (grep m{^ xterm \* (?:vt100\*)? (?:font|facename): \s* (.*) }ix => sort `xrdb -query` )[-1] ) { $font =~ s/^\S+:\s+(\S.*\S)\s*/$1/; $font =~ m/-.*-/ or $font = lc "*-$font-*"; #print STDERR "xlsfonts -lll -fn '$font'\n"; local @ARGV = ("xlsfonts -lll -fn '$font' |"); while (<>) { my ($cp, $m) = m/^\s+0x\w+\s+\((\d+)\)((?:\s+\d+)+)\s+0x\w+/ or next; $m =~ m/[1-9]/ and $fcp{$cp}++; } } } sub show_cp ($$$$) { my ($cp, $c, $pro, $name) = @_; my $encoded = $opt_v ? sprintf "%-9s ", join " ", map { sprintf "%02X", $_ } unpack "C*", $c : ""; my $chr_h = ""; if ($opt_h) { $chr_h = encode_entities ($c); $chr_h eq $c || $chr_h =~ m/^&#x/ and $chr_h = ""; $chr_h =~ s/^&// and chop $chr_h; $chr_h = sprintf "%-8.7s", $chr_h; } printf "%06x %s %s%-15s %s%s\n", $cp, $c, $chr_h, $pro && $pro->[1] ? $pro->[2] : "", $encoded, $name; $opt_k or return; my $h = sprintf "%05x", $cp; exists $compose{$h} or return; print "\t $_\n" for sort keys %{$compose{$h}}; } # show_cp sub show_enc ($) { my $uc = shift; foreach my $e (grep { !m/^mime/i } Encode->encodings (":all")) { my $x = encode ("utf8", $uc); from_to ($x, "utf8", $e); $x eq "?" && $uc ne "?" and next; printf " %-30s %-10s", $e, unpack "H*", $x; print " ", map { sprintf "\\%03o", hex $_ } unpack "(A2)*", unpack "H*", $x if $opt_o; print "\n" } exit; } # show_enc if ($opt_E) { my $c = pack "H*", shift; foreach my $e (grep { !m/^mime/i } Encode->encodings (":all")) { my $x = eval { decode ($e, $c) }; !defined $x || $x =~ m/^(?:$|\x{fffd})/ and next; $e eq "utf8" and $x .= "\t(".(join " " => map { sprintf "U+%05X", ord $_ } split // => $x) . ")"; printf " %-30s %s\n", $e, $x; } exit; } for (@ARGV) { s{\\x(?:\{([\dA-Fa-f]+)\}|([\dA-Fa-f]{2}))}{encode ("utf8", chr hex ($1//$2))}gie + s{&#(\d+);}{encode ("utf8", chr ($1))}ge + s{\\?U\+?([\dA-Fa-f]{4,6})}{encode ("utf8", chr hex $1)}gie and ($opt_f, $opt_v) = (0, 1); } if ($opt_f) { my ($found, $pro, $c) = (0); foreach my $w (['\b', '\b'], ['\b', ''], ['', '']) { my $pat = join ".*", map { "$w->[0]$_$w->[1]" } map { split m/_/ } @ARGV; foreach my $ipat (qr{$pat}, qr{$pat}i) { print STDERR "Searching for $ipat\n"; foreach my $name (sort {$cp{$a} <=> $cp{$b} } grep m/$ipat/ => keys %cp) { my $cp = $cp{$name}; $opt_f > 1 && !exists $fcp{$cp} and next; my $c = chr $cp; $pro = undef; $opt_v > 3 and print STDERR Dumper $pro; $name =~ m/^COMBINING / and $c = " $c"; show_cp ($cp, $c, $pro, $name); $found++; } $found and last; } $found and last; } exit; } if ($opt_b) { foreach (unpack "U*", decode ("utf8", join " ", @ARGV)) { my $n = charnames::viacode ($_); my $c = $n =~ m/\b(SMALL|CAPITAL)\s+LETTER\s+(.)\s+WITH/ ? $1 eq "SMALL" ? lc $2 : $2 : chr $_; print $c; } print "\n"; exit; } if ($opt_d) { my %ll; for (keys %cp) { m{^LATIN (SMALL|CAPITAL) LETTER (.) WITH (.*)} or next; my $cp = $cp{$_}; exists $fcp{$cp} or next; # Not in this font my $bc = $1 eq "SMALL" ? lc $2 : $2; push @{$ll{$bc}}, $cp; } if (@ARGV && -f $ARGV[0]) { open my $fh, "<:encoding(utf-8)", $ARGV[0]; @ARGV = <$fh>; } elsif (@ARGV == 1 && $ARGV[0] eq "-" or @ARGV == 0) { @ARGV = ; } my ($p, $c) = (""); foreach (unpack "U*", decode ("utf8", join " ", @ARGV)) { $c = chr $_; $c eq " " && $p eq "\n" and next; if ($c =~ m/[A-Za-z]/) { exists $ll{$c} and $c = chr $ll{$c}[int rand scalar @{$ll{$c}}]; } print $c; } continue { $p = $c; } print "\n"; exit; } if (@opt_m) { @opt_m == 1 and push @opt_m, @ARGV; @opt_m == 1 && $opt_m[0] =~ m/^(0|all|\*)$/ and @opt_m = ("a0:5f", map { sprintf "%x", 0x100 * $_ } 1..0x2e); for (@opt_m) { my ($base, $count) = map { m/^0?x?([\da-f]+)$/i ? hex $1 : 0 } split m/:/, "$_:7f"; $count += $base; print " 0123456789abcdef 0123456789abcdef\n"; while ($base <= $count) { printf "0x%05x: ", $base; print chr ($base + $_) for 0 .. 15; print " "; print chr ($base + $_) for 16 .. 31; print "\n"; $base += 32; } print "\n"; } exit; } my $c; if ($opt_v || $opt_k) { @ARGV = map { chr $_ } unpack "U*", decode ("utf8", join " ", @ARGV); } for (@ARGV) { exists $xlat{$_} and $_ = $xlat{$_}, next; s/^(?:0?x)?([a-f\d]+)$/chr hex $1/e and next; $c = charnames::vianame ($_) and $_ = chr $c, next; $c = charnames::vianame (uc $_) and $_ = chr $c; } my $uc = @ARGV == 1 ? $ARGV[0] : ""; if ($opt_k) { for (@ARGV) { my $cp = ord $_; my $h = sprintf "%05x", $cp; $_ .= "\t". join "\n\t", map { sprintf "%-20s", $_ } @{$compose{$h}//[]}; } } if ($opt_v) { for (@ARGV) { my $cp = ord $_; $_ .= sprintf " U%05x \\N{%s}\n", $cp, charnames::viacode $cp; } } print join "", @ARGV, "\n"; $opt_e && $uc and show_enc ($uc);