Context Navigation

utf8-ru.sh

Last change on this file was 3611, checked in by bird, 11 months ago
vendor/sed/current: GNU sed 4.9 (sed-4.9.tar.xz sha256:6e226b732e1cd739464ad6862bd1a1aba42d7982922da7a53519631d24975181)
File size: 4.0 KB

Line
1	#!/bin/sh
2
3	# Test GNU extension "\u" and "\U" (uppercase conversion)
4	# in "s///" command.
5	# This is an adaptation of the old utf8-1/2/3/4 tests.
6
7	# Copyright (C) 2017-2022 Free Software Foundation, Inc.
8
9	# This program is free software: you can redistribute it and/or modify
10	# it under the terms of the GNU General Public License as published by
11	# the Free Software Foundation, either version 3 of the License, or
12	# (at your option) any later version.
13
14	# This program is distributed in the hope that it will be useful,
15	# but WITHOUT ANY WARRANTY; without even the implied warranty of
16	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	# GNU General Public License for more details.
18
19	# You should have received a copy of the GNU General Public License
20	# along with this program. If not, see <https://www.gnu.org/licenses/>.
21	. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
22	print_ver_ sed
23
24	require_ru_utf8_locale_
25
26	# The letter used in these tests are:
27	# UTF8:Octal UTF8:HEX CodePoint Name
28	# Ð \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A}
29	# Ð \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE}
30	# Ð° \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A}
31	# ÐŽ \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE}
32
33	# Using octal values, as these are the most portable access various printfs.
34
35
36	# Input: Same input for all test (all lower case letters)
37	# ÐŽ Ð° ÐŽ
38	printf '\320\264\320\260 \320\264\n' > utf8-inp \|\| framework_failure_
39
40
41	# Test 1: Convert "small DE" to upper case (with \U)
42	# s/ÐŽ/\U&/g
43	printf 's/\320\264/\\U&/g' > utf8-1.sed \|\| framework_failure_
44
45	# Test 1: Expected output - two capital DE letters.
46	# Ð Ð° Ð
47	printf '\320\224\320\260 \320\224\n' > utf8-1-exp \|\| framework_failure_
48
49
50	# Test 2: Convert "small DE" to upper case (with \u - next character only)
51	# s/ÐŽ/\u&/g
52	printf 's/\320\264/\\u&/g\n' > utf8-2.sed \|\| framework_failure_
53
54	# The expected output of test 2 is identical to test 1.
55	# We create the file to make the test loop (below) simpler.
56	cp utf8-1-exp utf8-2-exp \|\| framework_failure_
57
58
59
60	# Test 3: Capitalize only the next character (\u)
61	# Only the first "DE" should be capitilized.
62	# s/ÐŽ.*/\u&/g
63	printf 's/\320\264.*/\\u&/g' > utf8-3.sed \|\| framework_failure_
64
65	# Test 3: Expected output - First DE capitilized, second DE not.
66	# Ð Ð° ÐŽ
67	printf '\320\224\320\260 \320\264\n' > utf8-3-exp \|\| framework_failure_
68
69
70	# Test 4: Capitalize all matched characters
71	# s/ÐŽ.*/\U&/g
72	printf 's/\320\264.*/\\U&/g' > utf8-4.sed \|\| framework_failure_
73
74
75	# Test 4: Expected output - All capital letters:
76	# Ð Ð Ð
77	printf '\320\224\320\220 \320\224\n' > utf8-4-exp \|\| framework_failure_
78
79	# Step 1: force Russian UTF8 locale.
80	# The case-conversion should either work, or not modify the input.
81	for i in 1 2 3 4;
82	do
83	LC_ALL=ru_RU.UTF-8 \
84	sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out \|\| fail=1
85
86	remove_cr_inplace utf8-$i-ru-out
87
88	# If we have the expected output - continue to next text
89	compare utf8-$i-exp utf8-$i-ru-out && continue
90
91	# Otherwise, ensure the input wasn't modified
92	# (i.e. sed did not modify partial octets resulting in
93	# invalid multibyte sequences)
94	compare utf8-$i-inp utf8-$i-ru-out \|\| fail=1
95	done
96
97
98	# Step 2: If the current locale supports UTF8, repeat the above tests.
99	l=$(locale \| grep '^LC_CTYPE=' \| sed 's/^.*="// ; s/"$//')
100	case "$n" in
101	UTF-8 \| UTF8 \| utf8 \| utf-8) utf8=yes;;
102	*) utf8=no;;
103	esac
104
105	if test "$utf8" = yes ; then
106	for i in 1 2 3 4;
107	do
108	sed -f utf8-$i.sed < utf8-inp > utf8-$i-out \|\| fail=1
109
110	remove_cr_inplace utf8-$i-out
111
112	# If we have the expected output - continue to next text
113	compare utf8-$i-exp utf8-$i-out && continue
114
115	# Otherwise, ensure the input wasn't modified
116	# (i.e. sed did not modify partial octets resulting in
117	# invalid multibyte sequences)
118	compare utf8-$i-inp utf8-$i-out \|\| fail=1
119	done
120	fi
121
122
123	Exit $fail

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/sed/current/testsuite/utf8-ru.sh

Download in other formats: