| 1 | /* Test of wcwidth() function.
|
|---|
| 2 | Copyright (C) 2007-2021 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This program is free software: you can redistribute it and/or modify
|
|---|
| 5 | it under the terms of the GNU General Public License as published by
|
|---|
| 6 | the Free Software Foundation; either version 3 of the License, or
|
|---|
| 7 | (at your option) any later version.
|
|---|
| 8 |
|
|---|
| 9 | This program is distributed in the hope that it will be useful,
|
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 12 | GNU General Public License for more details.
|
|---|
| 13 |
|
|---|
| 14 | You should have received a copy of the GNU General Public License
|
|---|
| 15 | along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|---|
| 16 |
|
|---|
| 17 | /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
|
|---|
| 18 |
|
|---|
| 19 | #include <config.h>
|
|---|
| 20 |
|
|---|
| 21 | #include <wchar.h>
|
|---|
| 22 |
|
|---|
| 23 | #include "signature.h"
|
|---|
| 24 | SIGNATURE_CHECK (wcwidth, int, (wchar_t));
|
|---|
| 25 |
|
|---|
| 26 | #include <locale.h>
|
|---|
| 27 | #include <string.h>
|
|---|
| 28 |
|
|---|
| 29 | #include "c-ctype.h"
|
|---|
| 30 | #include "localcharset.h"
|
|---|
| 31 | #include "macros.h"
|
|---|
| 32 |
|
|---|
| 33 | int
|
|---|
| 34 | main ()
|
|---|
| 35 | {
|
|---|
| 36 | wchar_t wc;
|
|---|
| 37 |
|
|---|
| 38 | #if !GNULIB_WCHAR_SINGLE_LOCALE
|
|---|
| 39 | # ifdef C_CTYPE_ASCII
|
|---|
| 40 | /* Test width of ASCII characters. */
|
|---|
| 41 | for (wc = 0x20; wc < 0x7F; wc++)
|
|---|
| 42 | ASSERT (wcwidth (wc) == 1);
|
|---|
| 43 | # endif
|
|---|
| 44 | #endif
|
|---|
| 45 |
|
|---|
| 46 | /* Switch to an UTF-8 locale. */
|
|---|
| 47 | if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
|
|---|
| 48 | /* Check whether it's really an UTF-8 locale.
|
|---|
| 49 | On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE
|
|---|
| 50 | category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the
|
|---|
| 51 | LC_CTYPE category is effectively set to an ASCII LC_CTYPE category;
|
|---|
| 52 | in particular, locale_charset() returns "ASCII". */
|
|---|
| 53 | && strcmp (locale_charset (), "UTF-8") == 0)
|
|---|
| 54 | {
|
|---|
| 55 | /* Test width of ASCII characters. */
|
|---|
| 56 | for (wc = 0x20; wc < 0x7F; wc++)
|
|---|
| 57 | ASSERT (wcwidth (wc) == 1);
|
|---|
| 58 |
|
|---|
| 59 | /* Test width of some non-spacing characters. */
|
|---|
| 60 | ASSERT (wcwidth (0x0301) == 0);
|
|---|
| 61 | ASSERT (wcwidth (0x05B0) == 0);
|
|---|
| 62 |
|
|---|
| 63 | /* Test width of some format control characters. */
|
|---|
| 64 | ASSERT (wcwidth (0x200E) <= 0);
|
|---|
| 65 | ASSERT (wcwidth (0x2060) <= 0);
|
|---|
| 66 | #if 0 /* wchar_t may be only 16 bits. */
|
|---|
| 67 | ASSERT (wcwidth (0xE0001) <= 0);
|
|---|
| 68 | ASSERT (wcwidth (0xE0044) <= 0);
|
|---|
| 69 | #endif
|
|---|
| 70 |
|
|---|
| 71 | /* Test width of some zero width characters. */
|
|---|
| 72 | /* While it is desirable that U+200B, U+200C, U+200D have width 0,
|
|---|
| 73 | because this makes wcswidth work better on strings that contain these
|
|---|
| 74 | characters, it is acceptable if an implementation treats these
|
|---|
| 75 | characters like control characters. */
|
|---|
| 76 | ASSERT (wcwidth (0x200B) <= 0);
|
|---|
| 77 | ASSERT (wcwidth (0xFEFF) <= 0);
|
|---|
| 78 |
|
|---|
| 79 | /* Test width of some math symbols.
|
|---|
| 80 | U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
|
|---|
| 81 | (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
|
|---|
| 82 | The Unicode Standard Annex 11
|
|---|
| 83 | <https://www.unicode.org/reports/tr11/tr11-36.html>
|
|---|
| 84 | says
|
|---|
| 85 | "Ambiguous characters behave like wide or narrow characters
|
|---|
| 86 | depending on the context (language tag, script identification,
|
|---|
| 87 | associated font, source of data, or explicit markup; all can
|
|---|
| 88 | provide the context). If the context cannot be established
|
|---|
| 89 | reliably, they should be treated as narrow characters by default."
|
|---|
| 90 | For wcwidth(), the only available context information is the locale.
|
|---|
| 91 | "fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore
|
|---|
| 92 | U+2202 should be treated like a narrow character. */
|
|---|
| 93 | ASSERT (wcwidth (0x2202) == 1);
|
|---|
| 94 |
|
|---|
| 95 | /* Test width of some CJK characters. */
|
|---|
| 96 | ASSERT (wcwidth (0x3000) == 2);
|
|---|
| 97 | ASSERT (wcwidth (0xB250) == 2);
|
|---|
| 98 | ASSERT (wcwidth (0xFF1A) == 2);
|
|---|
| 99 | #if 0 /* wchar_t may be only 16 bits. */
|
|---|
| 100 | ASSERT (wcwidth (0x20369) == 2);
|
|---|
| 101 | ASSERT (wcwidth (0x2F876) == 2);
|
|---|
| 102 | #endif
|
|---|
| 103 | }
|
|---|
| 104 |
|
|---|
| 105 | return 0;
|
|---|
| 106 | }
|
|---|