Context Navigation

utf8.t

Visit:

Last change on this file was 3181, checked in by bird, 18 years ago
perl 5.8.8
File size: 7.8 KB

Line
1	#!./perl
2
3	BEGIN {
4	chdir 't' if -d 't';
5	@INC = '../lib';
6	unless (find PerlIO::Layer 'perlio') {
7	print "1..0 # Skip: not perlio\n";
8	exit 0;
9	}
10	}
11
12	no utf8; # needed for use utf8 not griping about the raw octets
13
14	require "./test.pl";
15
16	plan(tests => 55);
17
18	$\| = 1;
19
20	open(F,"+>:utf8",'a');
21	print F chr(0x100).'£';
22	cmp_ok( tell(F), '==', 4, tell(F) );
23	print F "\n";
24	cmp_ok( tell(F), '>=', 5, tell(F) );
25	seek(F,0,0);
26	is( getc(F), chr(0x100) );
27	is( getc(F), "£" );
28	is( getc(F), "\n" );
29	seek(F,0,0);
30	binmode(F,":bytes");
31	my $chr = chr(0xc4);
32	if (ord('A') == 193) { $chr = chr(0x8c); } # EBCDIC
33	is( getc(F), $chr );
34	$chr = chr(0x80);
35	if (ord('A') == 193) { $chr = chr(0x41); } # EBCDIC
36	is( getc(F), $chr );
37	$chr = chr(0xc2);
38	if (ord('A') == 193) { $chr = chr(0x80); } # EBCDIC
39	is( getc(F), $chr );
40	$chr = chr(0xa3);
41	if (ord('A') == 193) { $chr = chr(0x44); } # EBCDIC
42	is( getc(F), $chr );
43	is( getc(F), "\n" );
44	seek(F,0,0);
45	binmode(F,":utf8");
46	is( scalar(<F>), "\x{100}£\n" );
47	seek(F,0,0);
48	$buf = chr(0x200);
49	$count = read(F,$buf,2,1);
50	cmp_ok( $count, '==', 2 );
51	is( $buf, "\x{200}\x{100}£" );
52	close(F);
53
54	{
55	$a = chr(300); # This is UTF-encoded
56	$b = chr(130); # This is not.
57
58	open F, ">:utf8", 'a' or die $!;
59	print F $a,"\n";
60	close F;
61
62	open F, "<:utf8", 'a' or die $!;
63	$x = <F>;
64	chomp($x);
65	is( $x, chr(300) );
66
67	open F, "a" or die $!; # Not UTF
68	binmode(F, ":bytes");
69	$x = <F>;
70	chomp($x);
71	$chr = chr(196).chr(172);
72	if (ord('A') == 193) { $chr = chr(141).chr(83); } # EBCDIC
73	is( $x, $chr );
74	close F;
75
76	open F, ">:utf8", 'a' or die $!;
77	binmode(F); # we write a "\n" and then tell() - avoid CRLF issues.
78	binmode(F,":utf8"); # turn UTF-8-ness back on
79	print F $a;
80	my $y;
81	{ my $x = tell(F);
82	{ use bytes; $y = length($a);}
83	cmp_ok( $x, '==', $y );
84	}
85
86	{ # Check byte length of $b
87	use bytes; my $y = length($b);
88	cmp_ok( $y, '==', 1 );
89	}
90
91	print F $b,"\n"; # Don't upgrades $b
92
93	{ # Check byte length of $b
94	use bytes; my $y = length($b);
95	cmp_ok( $y, '==', 1 );
96	}
97
98	{
99	my $x = tell(F);
100	{ use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII
101	cmp_ok( $x, '==', $y );
102	}
103
104	close F;
105
106	open F, "a" or die $!; # Not UTF
107	binmode(F, ":bytes");
108	$x = <F>;
109	chomp($x);
110	$chr = v196.172.194.130;
111	if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
112	is( $x, $chr, sprintf('(%vd)', $x) );
113
114	open F, "<:utf8", "a" or die $!;
115	$x = <F>;
116	chomp($x);
117	close F;
118	is( $x, chr(300).chr(130), sprintf('(%vd)', $x) );
119
120	open F, ">", "a" or die $!;
121	if (${^OPEN} =~ /:utf8/) {
122	binmode(F, ":bytes:");
123	}
124
125	# Now let's make it suffer.
126	my $w;
127	{
128	use warnings 'utf8';
129	local $SIG{__WARN__} = sub { $w = $_[0] };
130	print F $a;
131	ok( (!$@));
132	like($w, qr/Wide character in print/i );
133	}
134	}
135
136	# Hm. Time to get more evil.
137	open F, ">:utf8", "a" or die $!;
138	print F $a;
139	binmode(F, ":bytes");
140	print F chr(130)."\n";
141	close F;
142
143	open F, "<", "a" or die $!;
144	binmode(F, ":bytes");
145	$x = <F>; chomp $x;
146	$chr = v196.172.130;
147	if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC
148	is( $x, $chr );
149
150	# Right.
151	open F, ">:utf8", "a" or die $!;
152	print F $a;
153	close F;
154	open F, ">>", "a" or die $!;
155	binmode(F, ":bytes");
156	print F chr(130)."\n";
157	close F;
158
159	open F, "<", "a" or die $!;
160	binmode(F, ":bytes");
161	$x = <F>; chomp $x;
162	SKIP: {
163	skip("Defaulting to UTF-8 output means that we can't generate a mangled file")
164	if $UTF8_OUTPUT;
165	is( $x, $chr );
166	}
167
168	# Now we have a deformed file.
169
170	SKIP: {
171	if (ord('A') == 193) {
172	skip("EBCDIC doesn't complain", 2);
173	} else {
174	my @warnings;
175	open F, "<:utf8", "a" or die $!;
176	$x = <F>; chomp $x;
177	local $SIG{__WARN__} = sub { push @warnings, $_[0]; };
178	eval { sprintf "%vd\n", $x };
179	is (scalar @warnings, 1);
180	like ($warnings[0], qr/Malformed UTF-8 character \(unexpected continuation byte 0x82, with no preceding start byte/);
181	}
182	}
183
184	close F;
185	unlink('a');
186
187	open F, ">:utf8", "a";
188	@a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000
189	unshift @a, chr(0); # ... and a null byte in front just for fun
190	print F @a;
191	close F;
192
193	my $c;
194
195	# read() should work on characters, not bytes
196	open F, "<:utf8", "a";
197	$a = 0;
198	my $failed;
199	for (@a) {
200	unless (($c = read(F, $b, 1) == 1) &&
201	length($b) == 1 &&
202	ord($b) == ord($_) &&
203	tell(F) == ($a += bytes::length($b))) {
204	print '# ord($_) == ', ord($_), "\n";
205	print '# ord($b) == ', ord($b), "\n";
206	print '# length($b) == ', length($b), "\n";
207	print '# bytes::length($b) == ', bytes::length($b), "\n";
208	print '# tell(F) == ', tell(F), "\n";
209	print '# $a == ', $a, "\n";
210	print '# $c == ', $c, "\n";
211	$failed++;
212	last;
213	}
214	}
215	close F;
216	is($failed, undef);
217
218	{
219	# Check that warnings are on on I/O, and that they can be muffled.
220
221	local $SIG{__WARN__} = sub { $@ = shift };
222
223	undef $@;
224	open F, ">a";
225	binmode(F, ":bytes");
226	print F chr(0x100);
227	close(F);
228
229	like( $@, 'Wide character in print' );
230
231	undef $@;
232	open F, ">:utf8", "a";
233	print F chr(0x100);
234	close(F);
235
236	isnt( defined $@ );
237
238	undef $@;
239	open F, ">a";
240	binmode(F, ":utf8");
241	print F chr(0x100);
242	close(F);
243
244	isnt( defined $@ );
245
246	no warnings 'utf8';
247
248	undef $@;
249	open F, ">a";
250	print F chr(0x100);
251	close(F);
252
253	isnt( defined $@ );
254
255	use warnings 'utf8';
256
257	undef $@;
258	open F, ">a";
259	binmode(F, ":bytes");
260	print F chr(0x100);
261	close(F);
262
263	like( $@, 'Wide character in print' );
264	}
265
266	{
267	open F, ">:bytes","a"; print F "\xde"; close F;
268
269	open F, "<:bytes", "a";
270	my $b = chr 0x100;
271	$b .= <F>;
272	is( $b, chr(0x100).chr(0xde), "21395 '.= <>' utf8 vs. bytes" );
273	close F;
274	}
275
276	{
277	open F, ">:utf8","a"; print F chr 0x100; close F;
278
279	open F, "<:utf8", "a";
280	my $b = "\xde";
281	$b .= <F>;
282	is( $b, chr(0xde).chr(0x100), "21395 '.= <>' bytes vs. utf8" );
283	close F;
284	}
285
286	{
287	my @a = ( [ 0x007F, "bytes" ],
288	[ 0x0080, "bytes" ],
289	[ 0x0080, "utf8" ],
290	[ 0x0100, "utf8" ] );
291	my $t = 34;
292	for my $u (@a) {
293	for my $v (@a) {
294	# print "# @$u - @$v\n";
295	open F, ">a";
296	binmode(F, ":" . $u->[1]);
297	print F chr($u->[0]);
298	close F;
299
300	open F, "<a";
301	binmode(F, ":" . $u->[1]);
302
303	my $s = chr($v->[0]);
304	utf8::upgrade($s) if $v->[1] eq "utf8";
305
306	$s .= <F>;
307	is( $s, chr($v->[0]) . chr($u->[0]), 'rcatline utf8' );
308	close F;
309	$t++;
310	}
311	}
312	# last test here 49
313	}
314
315	{
316	# [perl #23428] Somethings rotten in unicode semantics
317	open F, ">a";
318	binmode F, ":utf8";
319	syswrite(F, $a = chr(0x100));
320	close F;
321	is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' );
322	like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' );
323	}
324
325	# sysread() and syswrite() tested in lib/open.t since Fcntl is used
326
327	{
328	# <FH> on a :utf8 stream should complain immediately with -w
329	# if it finds bad UTF-8 (:encoding(utf8) works this way)
330	use warnings 'utf8';
331	undef $@;
332	local $SIG{__WARN__} = sub { $@ = shift };
333	open F, ">a";
334	binmode F;
335	my ($chrE4, $chrF6) = (chr(0xE4), chr(0xF6));
336	if (ord('A') == 193) # EBCDIC
337	{ ($chrE4, $chrF6) = (chr(0x43), chr(0xEC)); }
338	print F "foo", $chrE4, "\n";
339	print F "foo", $chrF6, "\n";
340	close F;
341	open F, "<:utf8", "a";
342	undef $@;
343	my $line = <F>;
344	my ($chrE4, $chrF6) = ("E4", "F6");
345	if (ord('A') == 193) { ($chrE4, $chrF6) = ("43", "EC"); } # EBCDIC
346	like( $@, qr/utf8 "\\x$chrE4" does not map to Unicode .+ <F> line 1/,
347	"<:utf8 readline must warn about bad utf8");
348	undef $@;
349	$line .= <F>;
350	like( $@, qr/utf8 "\\x$chrF6" does not map to Unicode .+ <F> line 2/,
351	"<:utf8 rcatline must warn about bad utf8");
352	close F;
353	}
354
355	END {
356	1 while unlink "a";
357	1 while unlink "b";
358	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/dev-lang/perl/t/io/utf8.t

Download in other formats: