1 | #!./perl
|
---|
2 |
|
---|
3 | BEGIN {
|
---|
4 | chdir 't' if -d 't';
|
---|
5 | @INC = qw(. ../lib); # ../lib needed for test.deparse
|
---|
6 | require "test.pl";
|
---|
7 | }
|
---|
8 |
|
---|
9 | plan tests => 26;
|
---|
10 |
|
---|
11 | # Note that t/op/ord.t already tests for chr() <-> ord() rountripping.
|
---|
12 |
|
---|
13 | # Don't assume ASCII.
|
---|
14 |
|
---|
15 | is(chr(ord("A")), "A");
|
---|
16 |
|
---|
17 | is(chr( 0), "\x00");
|
---|
18 | is(chr(127), "\x7F");
|
---|
19 | is(chr(128), "\x80");
|
---|
20 | is(chr(255), "\xFF");
|
---|
21 |
|
---|
22 | # is(chr(-1), undef); # Shouldn't it be?
|
---|
23 |
|
---|
24 | # Check UTF-8.
|
---|
25 |
|
---|
26 | sub hexes { join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))) }
|
---|
27 |
|
---|
28 | # The following code points are some interesting steps in UTF-8.
|
---|
29 | is(hexes( 0x100), "c4 80");
|
---|
30 | is(hexes( 0x7FF), "df bf");
|
---|
31 | is(hexes( 0x800), "e0 a0 80");
|
---|
32 | is(hexes( 0xFFF), "e0 bf bf");
|
---|
33 | is(hexes( 0x1000), "e1 80 80");
|
---|
34 | is(hexes( 0xCFFF), "ec bf bf");
|
---|
35 | is(hexes( 0xD000), "ed 80 80");
|
---|
36 | is(hexes( 0xD7FF), "ed 9f bf");
|
---|
37 | is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
|
---|
38 | is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
|
---|
39 | is(hexes( 0xE000), "ee 80 80");
|
---|
40 | is(hexes( 0xFFFF), "ef bf bf");
|
---|
41 | is(hexes( 0x10000), "f0 90 80 80");
|
---|
42 | is(hexes( 0x3FFFF), "f0 bf bf bf");
|
---|
43 | is(hexes( 0x40000), "f1 80 80 80");
|
---|
44 | is(hexes( 0xFFFFF), "f3 bf bf bf");
|
---|
45 | is(hexes(0x100000), "f4 80 80 80");
|
---|
46 | is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
|
---|
47 | is(hexes(0x110000), "f4 90 80 80");
|
---|
48 | is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
|
---|
49 | is(hexes(0x200000), "f8 88 80 80 80");
|
---|
50 |
|
---|