1 | BEGIN {
|
---|
2 | chdir 't' if -d 't';
|
---|
3 | @INC = qw(../lib .);
|
---|
4 | require "test.pl";
|
---|
5 | }
|
---|
6 |
|
---|
7 | plan tests => 4670;
|
---|
8 |
|
---|
9 | sub MyUniClass {
|
---|
10 | <<END;
|
---|
11 | 0030 004F
|
---|
12 | END
|
---|
13 | }
|
---|
14 |
|
---|
15 | sub Other::Class {
|
---|
16 | <<END;
|
---|
17 | 0040 005F
|
---|
18 | END
|
---|
19 | }
|
---|
20 |
|
---|
21 | sub A::B::Intersection {
|
---|
22 | <<END;
|
---|
23 | +main::MyUniClass
|
---|
24 | &Other::Class
|
---|
25 | END
|
---|
26 | }
|
---|
27 |
|
---|
28 | sub test_regexp ($$) {
|
---|
29 | # test that given string consists of N-1 chars matching $qr1, and 1
|
---|
30 | # char matching $qr2
|
---|
31 | my ($str, $blk) = @_;
|
---|
32 |
|
---|
33 | # constructing these objects here makes the last test loop go much faster
|
---|
34 | my $qr1 = qr/(\p{$blk}+)/;
|
---|
35 | if ($str =~ $qr1) {
|
---|
36 | is($1, substr($str, 0, -1)); # all except last char
|
---|
37 | }
|
---|
38 | else {
|
---|
39 | fail('first N-1 chars did not match');
|
---|
40 | }
|
---|
41 |
|
---|
42 | my $qr2 = qr/(\P{$blk}+)/;
|
---|
43 | if ($str =~ $qr2) {
|
---|
44 | is($1, substr($str, -1)); # only last char
|
---|
45 | }
|
---|
46 | else {
|
---|
47 | fail('last char did not match');
|
---|
48 | }
|
---|
49 | }
|
---|
50 |
|
---|
51 | use strict;
|
---|
52 |
|
---|
53 | my $str = join "", map chr($_), 0x20 .. 0x6F;
|
---|
54 |
|
---|
55 | # make sure it finds built-in class
|
---|
56 | is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
---|
57 | is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
---|
58 |
|
---|
59 | # make sure it finds user-defined class
|
---|
60 | is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
|
---|
61 |
|
---|
62 | # make sure it finds class in other package
|
---|
63 | is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
|
---|
64 |
|
---|
65 | # make sure it finds class in other OTHER package
|
---|
66 | is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
|
---|
67 |
|
---|
68 | # all of these should look in lib/unicore/bc/AL.pl
|
---|
69 | $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
|
---|
70 | is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
|
---|
71 | is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
|
---|
72 | is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
|
---|
73 | is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
|
---|
74 |
|
---|
75 | # make sure InGreek works
|
---|
76 | $str = "[\x{038B}\x{038C}\x{038D}]";
|
---|
77 |
|
---|
78 | is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
---|
79 | is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
---|
80 | is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
---|
81 | is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
---|
82 | is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
|
---|
83 |
|
---|
84 | use File::Spec;
|
---|
85 | my $updir = File::Spec->updir;
|
---|
86 |
|
---|
87 | # the %utf8::... hashes are already in existence
|
---|
88 | # because utf8_pva.pl was run by utf8_heavy.pl
|
---|
89 |
|
---|
90 | *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
|
---|
91 |
|
---|
92 | no warnings 'utf8'; # we do not want warnings about surrogates etc
|
---|
93 |
|
---|
94 | # non-General Category and non-Script
|
---|
95 | while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
|
---|
96 | my $prop_name = $utf8::PropertyAlias{$abbrev};
|
---|
97 | next unless $prop_name;
|
---|
98 | next if $abbrev eq "gc_sc";
|
---|
99 |
|
---|
100 | for (sort keys %$files) {
|
---|
101 | my $filename = File::Spec->catfile(
|
---|
102 | $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
|
---|
103 | );
|
---|
104 |
|
---|
105 | next unless -e $filename;
|
---|
106 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
---|
107 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
---|
108 |
|
---|
109 | for my $p ($prop_name, $abbrev) {
|
---|
110 | for my $c ($files->{$_}, $_) {
|
---|
111 | is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
|
---|
112 | is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
|
---|
113 | }
|
---|
114 | }
|
---|
115 | }
|
---|
116 | }
|
---|
117 |
|
---|
118 | # General Category and Script
|
---|
119 | for my $p ('gc', 'sc') {
|
---|
120 | while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
|
---|
121 | my $filename = File::Spec->catfile(
|
---|
122 | $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
|
---|
123 | );
|
---|
124 |
|
---|
125 | next unless -e $filename;
|
---|
126 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
---|
127 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
---|
128 |
|
---|
129 | for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
|
---|
130 | for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
|
---|
131 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
|
---|
132 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
|
---|
133 | test_regexp ($str, $y);
|
---|
134 | }
|
---|
135 | }
|
---|
136 | }
|
---|
137 | }
|
---|
138 |
|
---|
139 | # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
|
---|
140 | SKIP:
|
---|
141 | {
|
---|
142 | skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
|
---|
143 |
|
---|
144 | # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
|
---|
145 | # return true. Try to work around this by reading the filenames explicitly
|
---|
146 | # to get a case sensitive test. N.B. This will fail if filename case is
|
---|
147 | # not preserved because you might go looking for a class name of CF or cf
|
---|
148 | # when you really want Cf. Storing case sensitive data in filenames is
|
---|
149 | # simply not portable.
|
---|
150 |
|
---|
151 | my %files;
|
---|
152 |
|
---|
153 | my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
|
---|
154 | opendir D, $dirname or die $!;
|
---|
155 | @files{readdir(D)} = ();
|
---|
156 | closedir D;
|
---|
157 |
|
---|
158 | for (keys %utf8::PA_reverse) {
|
---|
159 | my $leafname = "$utf8::PA_reverse{$_}.pl";
|
---|
160 | next unless exists $files{$leafname};
|
---|
161 |
|
---|
162 | my $filename = File::Spec->catfile($dirname, $leafname);
|
---|
163 |
|
---|
164 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
---|
165 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
---|
166 |
|
---|
167 | for my $x ('gc', 'General Category') {
|
---|
168 | print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
|
---|
169 | for my $y ($_, $utf8::PA_reverse{$_}) {
|
---|
170 | is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
|
---|
171 | is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
|
---|
172 | test_regexp ($str, $y);
|
---|
173 | }
|
---|
174 | }
|
---|
175 | }
|
---|
176 | }
|
---|
177 |
|
---|
178 | # test the blocks (InFoobar)
|
---|
179 | for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
|
---|
180 | my $filename = File::Spec->catfile(
|
---|
181 | $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
|
---|
182 | );
|
---|
183 |
|
---|
184 | next unless -e $filename;
|
---|
185 |
|
---|
186 | print "# In$_ $filename\n";
|
---|
187 |
|
---|
188 | my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
|
---|
189 | my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
|
---|
190 |
|
---|
191 | my $blk = $_;
|
---|
192 |
|
---|
193 | test_regexp ($str, $blk);
|
---|
194 | $blk =~ s/^In/Block:/;
|
---|
195 | test_regexp ($str, $blk);
|
---|
196 | }
|
---|
197 |
|
---|