| 1 | /* Test of character set conversion.
|
|---|
| 2 | Copyright (C) 2007-2021 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This program is free software: you can redistribute it and/or modify
|
|---|
| 5 | it under the terms of the GNU General Public License as published by
|
|---|
| 6 | the Free Software Foundation; either version 3 of the License, or
|
|---|
| 7 | (at your option) any later version.
|
|---|
| 8 |
|
|---|
| 9 | This program is distributed in the hope that it will be useful,
|
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 12 | GNU General Public License for more details.
|
|---|
| 13 |
|
|---|
| 14 | You should have received a copy of the GNU General Public License
|
|---|
| 15 | along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|---|
| 16 |
|
|---|
| 17 | /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
|
|---|
| 18 |
|
|---|
| 19 | #include <config.h>
|
|---|
| 20 |
|
|---|
| 21 | #include "striconv.h"
|
|---|
| 22 |
|
|---|
| 23 | #if HAVE_ICONV
|
|---|
| 24 | # include <iconv.h>
|
|---|
| 25 | #endif
|
|---|
| 26 |
|
|---|
| 27 | #include <errno.h>
|
|---|
| 28 | #include <stdlib.h>
|
|---|
| 29 | #include <string.h>
|
|---|
| 30 |
|
|---|
| 31 | #include "macros.h"
|
|---|
| 32 |
|
|---|
| 33 | int
|
|---|
| 34 | main ()
|
|---|
| 35 | {
|
|---|
| 36 | #if HAVE_ICONV
|
|---|
| 37 | /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
|
|---|
| 38 | and UTF-8. */
|
|---|
| 39 | iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
|
|---|
| 40 | iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
|
|---|
| 41 |
|
|---|
| 42 | ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
|
|---|
| 43 | ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
|
|---|
| 44 |
|
|---|
| 45 | /* ------------------------- Test mem_cd_iconv() ------------------------- */
|
|---|
| 46 |
|
|---|
| 47 | /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
|
|---|
| 48 | {
|
|---|
| 49 | static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 50 | static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 51 | char *result = NULL;
|
|---|
| 52 | size_t length = 0;
|
|---|
| 53 | int retval = mem_cd_iconv (input, strlen (input), cd_88591_to_utf8,
|
|---|
| 54 | &result, &length);
|
|---|
| 55 | ASSERT (retval == 0);
|
|---|
| 56 | ASSERT (length == strlen (expected));
|
|---|
| 57 | ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
|
|---|
| 58 | free (result);
|
|---|
| 59 | }
|
|---|
| 60 |
|
|---|
| 61 | /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
|
|---|
| 62 | {
|
|---|
| 63 | static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 64 | static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 65 | char *result = NULL;
|
|---|
| 66 | size_t length = 0;
|
|---|
| 67 | int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591,
|
|---|
| 68 | &result, &length);
|
|---|
| 69 | ASSERT (retval == 0);
|
|---|
| 70 | ASSERT (length == strlen (expected));
|
|---|
| 71 | ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
|
|---|
| 72 | free (result);
|
|---|
| 73 | }
|
|---|
| 74 |
|
|---|
| 75 | /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
|
|---|
| 76 | {
|
|---|
| 77 | static const char input[] = "\342\202\254"; /* EURO SIGN */
|
|---|
| 78 | char *result = NULL;
|
|---|
| 79 | size_t length = 0;
|
|---|
| 80 | int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591,
|
|---|
| 81 | &result, &length);
|
|---|
| 82 | ASSERT (retval == -1 && errno == EILSEQ);
|
|---|
| 83 | ASSERT (result == NULL);
|
|---|
| 84 | }
|
|---|
| 85 |
|
|---|
| 86 | /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
|
|---|
| 87 | {
|
|---|
| 88 | static const char input[] = "\342";
|
|---|
| 89 | char *result = NULL;
|
|---|
| 90 | size_t length = 0;
|
|---|
| 91 | int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591,
|
|---|
| 92 | &result, &length);
|
|---|
| 93 | ASSERT (retval == 0);
|
|---|
| 94 | ASSERT (length == 0);
|
|---|
| 95 | free (result);
|
|---|
| 96 | }
|
|---|
| 97 |
|
|---|
| 98 | /* ------------------------- Test str_cd_iconv() ------------------------- */
|
|---|
| 99 |
|
|---|
| 100 | /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
|
|---|
| 101 | {
|
|---|
| 102 | static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 103 | static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 104 | char *result = str_cd_iconv (input, cd_88591_to_utf8);
|
|---|
| 105 | ASSERT (result != NULL);
|
|---|
| 106 | ASSERT (strcmp (result, expected) == 0);
|
|---|
| 107 | free (result);
|
|---|
| 108 | }
|
|---|
| 109 |
|
|---|
| 110 | /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
|
|---|
| 111 | {
|
|---|
| 112 | static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 113 | static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 114 | char *result = str_cd_iconv (input, cd_utf8_to_88591);
|
|---|
| 115 | ASSERT (result != NULL);
|
|---|
| 116 | ASSERT (strcmp (result, expected) == 0);
|
|---|
| 117 | free (result);
|
|---|
| 118 | }
|
|---|
| 119 |
|
|---|
| 120 | /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
|
|---|
| 121 | {
|
|---|
| 122 | static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
|
|---|
| 123 | char *result = str_cd_iconv (input, cd_utf8_to_88591);
|
|---|
| 124 | ASSERT (result == NULL && errno == EILSEQ);
|
|---|
| 125 | }
|
|---|
| 126 |
|
|---|
| 127 | /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
|
|---|
| 128 | {
|
|---|
| 129 | static const char input[] = "\342";
|
|---|
| 130 | char *result = str_cd_iconv (input, cd_utf8_to_88591);
|
|---|
| 131 | ASSERT (result != NULL);
|
|---|
| 132 | ASSERT (strcmp (result, "") == 0);
|
|---|
| 133 | free (result);
|
|---|
| 134 | }
|
|---|
| 135 |
|
|---|
| 136 | iconv_close (cd_88591_to_utf8);
|
|---|
| 137 | iconv_close (cd_utf8_to_88591);
|
|---|
| 138 |
|
|---|
| 139 | /* -------------------------- Test str_iconv() -------------------------- */
|
|---|
| 140 |
|
|---|
| 141 | /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
|
|---|
| 142 | {
|
|---|
| 143 | static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 144 | static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 145 | char *result = str_iconv (input, "ISO-8859-1", "UTF-8");
|
|---|
| 146 | ASSERT (result != NULL);
|
|---|
| 147 | ASSERT (strcmp (result, expected) == 0);
|
|---|
| 148 | free (result);
|
|---|
| 149 | }
|
|---|
| 150 |
|
|---|
| 151 | /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
|
|---|
| 152 | {
|
|---|
| 153 | static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
|
|---|
| 154 | static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
|
|---|
| 155 | char *result = str_iconv (input, "UTF-8", "ISO-8859-1");
|
|---|
| 156 | ASSERT (result != NULL);
|
|---|
| 157 | ASSERT (strcmp (result, expected) == 0);
|
|---|
| 158 | free (result);
|
|---|
| 159 | }
|
|---|
| 160 |
|
|---|
| 161 | /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
|
|---|
| 162 | {
|
|---|
| 163 | static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
|
|---|
| 164 | char *result = str_iconv (input, "UTF-8", "ISO-8859-1");
|
|---|
| 165 | ASSERT (result == NULL && errno == EILSEQ);
|
|---|
| 166 | }
|
|---|
| 167 |
|
|---|
| 168 | /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
|
|---|
| 169 | {
|
|---|
| 170 | static const char input[] = "\342";
|
|---|
| 171 | char *result = str_iconv (input, "UTF-8", "ISO-8859-1");
|
|---|
| 172 | ASSERT (result != NULL);
|
|---|
| 173 | ASSERT (strcmp (result, "") == 0);
|
|---|
| 174 | free (result);
|
|---|
| 175 | }
|
|---|
| 176 |
|
|---|
| 177 | #endif
|
|---|
| 178 |
|
|---|
| 179 | return 0;
|
|---|
| 180 | }
|
|---|