| 1 | BEGIN {
|
|---|
| 2 | chdir 't' if -d 't';
|
|---|
| 3 | @INC = qw(../lib .);
|
|---|
| 4 | require "test.pl";
|
|---|
| 5 | }
|
|---|
| 6 |
|
|---|
| 7 | plan tests => 4670;
|
|---|
| 8 |
|
|---|
| 9 | sub MyUniClass {
|
|---|
| 10 | <<END;
|
|---|
| 11 | 0030 004F
|
|---|
| 12 | END
|
|---|
| 13 | }
|
|---|
| 14 |
|
|---|
| 15 | sub Other::Class {
|
|---|
| 16 | <<END;
|
|---|
| 17 | 0040 005F
|
|---|
| 18 | END
|
|---|
| 19 | }
|
|---|
| 20 |
|
|---|
| 21 | sub A::B::Intersection {
|
|---|
| 22 | <<END;
|
|---|
| 23 | +main::MyUniClass
|
|---|
| 24 | &Other::Class
|
|---|
| 25 | END
|
|---|
| 26 | }
|
|---|
| 27 |
|
|---|
| 28 | sub test_regexp ($$) {
|
|---|
| 29 | # test that given string consists of N-1 chars matching $qr1, and 1
|
|---|
| 30 | # char matching $qr2
|
|---|
| 31 | my ($str, $blk) = @_;
|
|---|
| 32 |
|
|---|
| 33 | # constructing these objects here makes the last test loop go much faster
|
|---|
| 34 | my $qr1 = qr/(\p{$blk}+)/;
|
|---|
| 35 | if ($str =~ $qr1) {
|
|---|
| 36 | is($1, substr($str, 0, -1)); # all except last char
|
|---|
| 37 | }
|
|---|
| 38 | else {
|
|---|
| 39 | fail('first N-1 chars did not match');
|
|---|
| 40 | }
|
|---|
| 41 |
|
|---|
| 42 | my $qr2 = qr/(\P{$blk}+)/;
|
|---|
| 43 | if ($str =~ $qr2) {
|
|---|
| 44 | is($1, substr($str, -1)); # only last char
|
|---|
| 45 | }
|
|---|
| 46 | else {
|
|---|
| 47 | fail('last char did not match');
|
|---|
| 48 | }
|
|---|
| 49 | }
|
|---|
| 50 |
|
|---|
| 51 | use strict;
|
|---|
| 52 |
|
|---|
| 53 | my $str = join "", map chr($_), 0x20 .. 0x6F;
|
|---|
| 54 |
|
|---|
| 55 | # make sure it finds built-in class
|
|---|
| 56 | is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
|---|
| 57 | is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
|---|
| 58 |
|
|---|
| 59 | # make sure it finds user-defined class
|
|---|
| 60 | is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
|
|---|
| 61 |
|
|---|
| 62 | # make sure it finds class in other package
|
|---|
| 63 | is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
|
|---|
| 64 |
|
|---|
| 65 | # make sure it finds class in other OTHER package
|
|---|
| 66 | is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
|
|---|
| 67 |
|
|---|
| 68 | # all of these should look in lib/unicore/bc/AL.pl
|
|---|
| 69 | $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
|
|---|
| 70 | is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
|
|---|
| 71 | is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
|
|---|
| 72 | is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
|
|---|
| 73 | is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
|
|---|
| 74 |
|
|---|
| 75 | # make sure InGreek works
|
|---|
| 76 | $str = "[\x{038B}\x{038C}\x{038D}]";
|
|---|
| 77 |
|
|---|
| 78 | is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
|---|
| 79 | is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
|---|
| 80 | is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
|---|
| 81 | is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
|---|
| 82 | is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
|---|
| 83 |
|
|---|
| 84 | use File::Spec;
|
|---|
| 85 | my $updir = File::Spec->updir;
|
|---|
| 86 |
|
|---|
| 87 | # the %utf8::... hashes are already in existence
|
|---|
| 88 | # because utf8_pva.pl was run by utf8_heavy.pl
|
|---|
| 89 |
|
|---|
| 90 | *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
|
|---|
| 91 |
|
|---|
| 92 | no warnings 'utf8'; # we do not want warnings about surrogates etc
|
|---|
| 93 |
|
|---|
| 94 | # non-General Category and non-Script
|
|---|
| 95 | while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
|
|---|
| 96 | my $prop_name = $utf8::PropertyAlias{$abbrev};
|
|---|
| 97 | next unless $prop_name;
|
|---|
| 98 | next if $abbrev eq "gc_sc";
|
|---|
| 99 |
|
|---|
| 100 | for (sort keys %$files) {
|
|---|
| 101 | my $filename = File::Spec->catfile(
|
|---|
| 102 | $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
|
|---|
| 103 | );
|
|---|
| 104 |
|
|---|
| 105 | next unless -e $filename;
|
|---|
| 106 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
|---|
| 107 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
|---|
| 108 |
|
|---|
| 109 | for my $p ($prop_name, $abbrev) {
|
|---|
| 110 | for my $c ($files->{$_}, $_) {
|
|---|
| 111 | is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
|
|---|
| 112 | is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
|
|---|
| 113 | }
|
|---|
| 114 | }
|
|---|
| 115 | }
|
|---|
| 116 | }
|
|---|
| 117 |
|
|---|
| 118 | # General Category and Script
|
|---|
| 119 | for my $p ('gc', 'sc') {
|
|---|
| 120 | while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
|
|---|
| 121 | my $filename = File::Spec->catfile(
|
|---|
| 122 | $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
|
|---|
| 123 | );
|
|---|
| 124 |
|
|---|
| 125 | next unless -e $filename;
|
|---|
| 126 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
|---|
| 127 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
|---|
| 128 |
|
|---|
| 129 | for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
|
|---|
| 130 | for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
|
|---|
| 131 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
|
|---|
| 132 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
|
|---|
| 133 | test_regexp ($str, $y);
|
|---|
| 134 | }
|
|---|
| 135 | }
|
|---|
| 136 | }
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
|
|---|
| 140 | SKIP:
|
|---|
| 141 | {
|
|---|
| 142 | skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
|
|---|
| 143 |
|
|---|
| 144 | # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
|
|---|
| 145 | # return true. Try to work around this by reading the filenames explicitly
|
|---|
| 146 | # to get a case sensitive test. N.B. This will fail if filename case is
|
|---|
| 147 | # not preserved because you might go looking for a class name of CF or cf
|
|---|
| 148 | # when you really want Cf. Storing case sensitive data in filenames is
|
|---|
| 149 | # simply not portable.
|
|---|
| 150 |
|
|---|
| 151 | my %files;
|
|---|
| 152 |
|
|---|
| 153 | my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
|
|---|
| 154 | opendir D, $dirname or die $!;
|
|---|
| 155 | @files{readdir(D)} = ();
|
|---|
| 156 | closedir D;
|
|---|
| 157 |
|
|---|
| 158 | for (keys %utf8::PA_reverse) {
|
|---|
| 159 | my $leafname = "$utf8::PA_reverse{$_}.pl";
|
|---|
| 160 | next unless exists $files{$leafname};
|
|---|
| 161 |
|
|---|
| 162 | my $filename = File::Spec->catfile($dirname, $leafname);
|
|---|
| 163 |
|
|---|
| 164 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
|---|
| 165 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
|---|
| 166 |
|
|---|
| 167 | for my $x ('gc', 'General Category') {
|
|---|
| 168 | print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
|
|---|
| 169 | for my $y ($_, $utf8::PA_reverse{$_}) {
|
|---|
| 170 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
|
|---|
| 171 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
|
|---|
| 172 | test_regexp ($str, $y);
|
|---|
| 173 | }
|
|---|
| 174 | }
|
|---|
| 175 | }
|
|---|
| 176 | }
|
|---|
| 177 |
|
|---|
| 178 | # test the blocks (InFoobar)
|
|---|
| 179 | for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
|
|---|
| 180 | my $filename = File::Spec->catfile(
|
|---|
| 181 | $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
|
|---|
| 182 | );
|
|---|
| 183 |
|
|---|
| 184 | next unless -e $filename;
|
|---|
| 185 |
|
|---|
| 186 | print "# In$_ $filename\n";
|
|---|
| 187 |
|
|---|
| 188 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
|---|
| 189 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
|---|
| 190 |
|
|---|
| 191 | my $blk = $_;
|
|---|
| 192 |
|
|---|
| 193 | test_regexp ($str, $blk);
|
|---|
| 194 | $blk =~ s/^In/Block:/;
|
|---|
| 195 | test_regexp ($str, $blk);
|
|---|
| 196 | }
|
|---|
| 197 |
|
|---|