1 | #!/bin/sh
|
---|
2 |
|
---|
3 | # Test GNU extension "\u" and "\U" (uppercase conversion)
|
---|
4 | # in "s///" command.
|
---|
5 | # This is an adaptation of the old utf8-1/2/3/4 tests.
|
---|
6 |
|
---|
7 | # Copyright (C) 2017-2022 Free Software Foundation, Inc.
|
---|
8 |
|
---|
9 | # This program is free software: you can redistribute it and/or modify
|
---|
10 | # it under the terms of the GNU General Public License as published by
|
---|
11 | # the Free Software Foundation, either version 3 of the License, or
|
---|
12 | # (at your option) any later version.
|
---|
13 |
|
---|
14 | # This program is distributed in the hope that it will be useful,
|
---|
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | # GNU General Public License for more details.
|
---|
18 |
|
---|
19 | # You should have received a copy of the GNU General Public License
|
---|
20 | # along with this program. If not, see <https://www.gnu.org/licenses/>.
|
---|
21 | . "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
|
---|
22 | print_ver_ sed
|
---|
23 |
|
---|
24 | require_ru_utf8_locale_
|
---|
25 |
|
---|
26 | # The letter used in these tests are:
|
---|
27 | # UTF8:Octal UTF8:HEX CodePoint Name
|
---|
28 | # Ð \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A}
|
---|
29 | # Ð \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE}
|
---|
30 | # а \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A}
|
---|
31 | # ÐŽ \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE}
|
---|
32 |
|
---|
33 | # Using octal values, as these are the most portable access various printfs.
|
---|
34 |
|
---|
35 |
|
---|
36 | # Input: Same input for all test (all lower case letters)
|
---|
37 | # Ў а Ў
|
---|
38 | printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_
|
---|
39 |
|
---|
40 |
|
---|
41 | # Test 1: Convert "small DE" to upper case (with \U)
|
---|
42 | # s/ÐŽ/\U&/g
|
---|
43 | printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_
|
---|
44 |
|
---|
45 | # Test 1: Expected output - two capital DE letters.
|
---|
46 | # Ра Ð
|
---|
47 | printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_
|
---|
48 |
|
---|
49 |
|
---|
50 | # Test 2: Convert "small DE" to upper case (with \u - next character only)
|
---|
51 | # s/ÐŽ/\u&/g
|
---|
52 | printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_
|
---|
53 |
|
---|
54 | # The expected output of test 2 is identical to test 1.
|
---|
55 | # We create the file to make the test loop (below) simpler.
|
---|
56 | cp utf8-1-exp utf8-2-exp || framework_failure_
|
---|
57 |
|
---|
58 |
|
---|
59 |
|
---|
60 | # Test 3: Capitalize only the next character (\u)
|
---|
61 | # Only the first "DE" should be capitilized.
|
---|
62 | # s/ÐŽ.*/\u&/g
|
---|
63 | printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_
|
---|
64 |
|
---|
65 | # Test 3: Expected output - First DE capitilized, second DE not.
|
---|
66 | # Ра Ў
|
---|
67 | printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_
|
---|
68 |
|
---|
69 |
|
---|
70 | # Test 4: Capitalize all matched characters
|
---|
71 | # s/ÐŽ.*/\U&/g
|
---|
72 | printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_
|
---|
73 |
|
---|
74 |
|
---|
75 | # Test 4: Expected output - All capital letters:
|
---|
76 | # Ð Ð Ð
|
---|
77 | printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_
|
---|
78 |
|
---|
79 | # Step 1: force Russian UTF8 locale.
|
---|
80 | # The case-conversion should either work, or not modify the input.
|
---|
81 | for i in 1 2 3 4;
|
---|
82 | do
|
---|
83 | LC_ALL=ru_RU.UTF-8 \
|
---|
84 | sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1
|
---|
85 |
|
---|
86 | remove_cr_inplace utf8-$i-ru-out
|
---|
87 |
|
---|
88 | # If we have the expected output - continue to next text
|
---|
89 | compare utf8-$i-exp utf8-$i-ru-out && continue
|
---|
90 |
|
---|
91 | # Otherwise, ensure the input wasn't modified
|
---|
92 | # (i.e. sed did not modify partial octets resulting in
|
---|
93 | # invalid multibyte sequences)
|
---|
94 | compare utf8-$i-inp utf8-$i-ru-out || fail=1
|
---|
95 | done
|
---|
96 |
|
---|
97 |
|
---|
98 | # Step 2: If the current locale supports UTF8, repeat the above tests.
|
---|
99 | l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//')
|
---|
100 | case "$n" in
|
---|
101 | *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;;
|
---|
102 | *) utf8=no;;
|
---|
103 | esac
|
---|
104 |
|
---|
105 | if test "$utf8" = yes ; then
|
---|
106 | for i in 1 2 3 4;
|
---|
107 | do
|
---|
108 | sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1
|
---|
109 |
|
---|
110 | remove_cr_inplace utf8-$i-out
|
---|
111 |
|
---|
112 | # If we have the expected output - continue to next text
|
---|
113 | compare utf8-$i-exp utf8-$i-out && continue
|
---|
114 |
|
---|
115 | # Otherwise, ensure the input wasn't modified
|
---|
116 | # (i.e. sed did not modify partial octets resulting in
|
---|
117 | # invalid multibyte sequences)
|
---|
118 | compare utf8-$i-inp utf8-$i-out || fail=1
|
---|
119 | done
|
---|
120 | fi
|
---|
121 |
|
---|
122 |
|
---|
123 | Exit $fail
|
---|