source: vendor/sed/current/testsuite/utf8-ru.sh

Last change on this file was 3611, checked in by bird, 11 months ago

vendor/sed/current: GNU sed 4.9 (sed-4.9.tar.xz sha256:6e226b732e1cd739464ad6862bd1a1aba42d7982922da7a53519631d24975181)

File size: 4.0 KB
Line 
1#!/bin/sh
2
3# Test GNU extension "\u" and "\U" (uppercase conversion)
4# in "s///" command.
5# This is an adaptation of the old utf8-1/2/3/4 tests.
6
7# Copyright (C) 2017-2022 Free Software Foundation, Inc.
8
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
22print_ver_ sed
23
24require_ru_utf8_locale_
25
26# The letter used in these tests are:
27# UTF8:Octal UTF8:HEX CodePoint Name
28# А \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A}
29# Д \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE}
30# а \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A}
31# ÐŽ \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE}
32
33# Using octal values, as these are the most portable access various printfs.
34
35
36# Input: Same input for all test (all lower case letters)
37# Ў а Ў
38printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_
39
40
41# Test 1: Convert "small DE" to upper case (with \U)
42# s/ÐŽ/\U&/g
43printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_
44
45# Test 1: Expected output - two capital DE letters.
46# Д а Д
47printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_
48
49
50# Test 2: Convert "small DE" to upper case (with \u - next character only)
51# s/ÐŽ/\u&/g
52printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_
53
54# The expected output of test 2 is identical to test 1.
55# We create the file to make the test loop (below) simpler.
56cp utf8-1-exp utf8-2-exp || framework_failure_
57
58
59
60# Test 3: Capitalize only the next character (\u)
61# Only the first "DE" should be capitilized.
62# s/ÐŽ.*/\u&/g
63printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_
64
65# Test 3: Expected output - First DE capitilized, second DE not.
66# Д а ÐŽ
67printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_
68
69
70# Test 4: Capitalize all matched characters
71# s/ÐŽ.*/\U&/g
72printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_
73
74
75# Test 4: Expected output - All capital letters:
76# Д А Д
77printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_
78
79# Step 1: force Russian UTF8 locale.
80# The case-conversion should either work, or not modify the input.
81for i in 1 2 3 4;
82do
83 LC_ALL=ru_RU.UTF-8 \
84 sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1
85
86 remove_cr_inplace utf8-$i-ru-out
87
88 # If we have the expected output - continue to next text
89 compare utf8-$i-exp utf8-$i-ru-out && continue
90
91 # Otherwise, ensure the input wasn't modified
92 # (i.e. sed did not modify partial octets resulting in
93 # invalid multibyte sequences)
94 compare utf8-$i-inp utf8-$i-ru-out || fail=1
95done
96
97
98# Step 2: If the current locale supports UTF8, repeat the above tests.
99l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//')
100case "$n" in
101 *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;;
102 *) utf8=no;;
103esac
104
105if test "$utf8" = yes ; then
106 for i in 1 2 3 4;
107 do
108 sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1
109
110 remove_cr_inplace utf8-$i-out
111
112 # If we have the expected output - continue to next text
113 compare utf8-$i-exp utf8-$i-out && continue
114
115 # Otherwise, ensure the input wasn't modified
116 # (i.e. sed did not modify partial octets resulting in
117 # invalid multibyte sequences)
118 compare utf8-$i-inp utf8-$i-out || fail=1
119 done
120fi
121
122
123Exit $fail
Note: See TracBrowser for help on using the repository browser.