1 | #!./perl -w
|
---|
2 |
|
---|
3 | BEGIN {
|
---|
4 | if ($ENV{'PERL_CORE'}){
|
---|
5 | chdir 't';
|
---|
6 | @INC = '../lib';
|
---|
7 | }
|
---|
8 | require Config; import Config;
|
---|
9 | if ($Config{'extensions'} !~ /\bEncode\b/) {
|
---|
10 | print "1..0 # Skip: Encode was not built\n";
|
---|
11 | exit 0;
|
---|
12 | }
|
---|
13 | if (ord("A") == 193) {
|
---|
14 | print "1..0 # Skip: EBCDIC\n";
|
---|
15 | exit 0;
|
---|
16 | }
|
---|
17 | unless (PerlIO::Layer->find('perlio')){
|
---|
18 | print "1..0 # Skip: PerlIO required\n";
|
---|
19 | exit 0;
|
---|
20 | }
|
---|
21 | if ($ENV{PERL_CORE_MINITEST}) {
|
---|
22 | print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
|
---|
23 | exit 0;
|
---|
24 | }
|
---|
25 | }
|
---|
26 |
|
---|
27 | use Encode;
|
---|
28 | use strict;
|
---|
29 | use Test::More;
|
---|
30 |
|
---|
31 | # %mbchars = (encoding => { bytes => utf8, ... }, ...);
|
---|
32 | # * pack('C*') is expected to return bytes even if ${^ENCODING} is true.
|
---|
33 | our %mbchars = (
|
---|
34 | 'big-5' => {
|
---|
35 | pack('C*', 0x40) => pack('U*', 0x40), # COMMERCIAL AT
|
---|
36 | pack('C*', 0xA4, 0x40) => "\x{4E00}", # CJK-4E00
|
---|
37 | },
|
---|
38 | 'euc-jp' => {
|
---|
39 | pack('C*', 0xB0, 0xA1) => "\x{4E9C}", # CJK-4E9C
|
---|
40 | pack('C*', 0x8F, 0xB0, 0xA1) => "\x{4E02}", # CJK-4E02
|
---|
41 | },
|
---|
42 | 'shift-jis' => {
|
---|
43 | pack('C*', 0xA9) => "\x{FF69}", # halfwidth katakana small U
|
---|
44 | pack('C*', 0x82, 0xA9) => "\x{304B}", # hiragana KA
|
---|
45 | },
|
---|
46 | );
|
---|
47 |
|
---|
48 | # 4 == @char; paired tests inside 3 nested loops,
|
---|
49 | # plus extra pair of tests in a loop, plus extra pair of tests.
|
---|
50 | plan tests => 2 * (4 ** 3 + 4 + 1) * (keys %mbchars);
|
---|
51 |
|
---|
52 | for my $enc (sort keys %mbchars) {
|
---|
53 | local ${^ENCODING} = find_encoding($enc);
|
---|
54 | my @char = (sort(keys %{ $mbchars{$enc} }),
|
---|
55 | sort(values %{ $mbchars{$enc} }));
|
---|
56 |
|
---|
57 | for my $rs (@char) {
|
---|
58 | local $/ = $rs;
|
---|
59 | for my $start (@char) {
|
---|
60 | for my $end (@char) {
|
---|
61 | my $string = $start.$end;
|
---|
62 | my ($expect, $return);
|
---|
63 | if ($end eq $rs) {
|
---|
64 | $expect = $start;
|
---|
65 | # The answer will always be a length in utf8, even if the
|
---|
66 | # scalar was encoded with a different length
|
---|
67 | $return = length ($end . "\x{100}") - 1;
|
---|
68 | } else {
|
---|
69 | $expect = $string;
|
---|
70 | $return = 0;
|
---|
71 | }
|
---|
72 | is (chomp ($string), $return);
|
---|
73 | is ($string, $expect); # "$enc \$/=$rs $start $end"
|
---|
74 | }
|
---|
75 | }
|
---|
76 | # chomp should not stringify references unless it decides to modify
|
---|
77 | # them
|
---|
78 | $_ = [];
|
---|
79 | my $got = chomp();
|
---|
80 | is ($got, 0);
|
---|
81 | is (ref($_), "ARRAY", "chomp ref (no modify)");
|
---|
82 | }
|
---|
83 |
|
---|
84 | $/ = ")"; # the last char of something like "ARRAY(0x80ff6e4)"
|
---|
85 | my $got = chomp();
|
---|
86 | is ($got, 1);
|
---|
87 | ok (!ref($_), "chomp ref (modify)");
|
---|
88 | }
|
---|