1 | /*
|
---|
2 | Unix SMB/CIFS implementation.
|
---|
3 |
|
---|
4 | local testing of iconv routines. This tests the system iconv code against
|
---|
5 | the built-in iconv code
|
---|
6 |
|
---|
7 | Copyright (C) Andrew Tridgell 2004
|
---|
8 |
|
---|
9 | This program is free software; you can redistribute it and/or modify
|
---|
10 | it under the terms of the GNU General Public License as published by
|
---|
11 | the Free Software Foundation; either version 3 of the License, or
|
---|
12 | (at your option) any later version.
|
---|
13 |
|
---|
14 | This program is distributed in the hope that it will be useful,
|
---|
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | GNU General Public License for more details.
|
---|
18 |
|
---|
19 | You should have received a copy of the GNU General Public License
|
---|
20 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
21 | */
|
---|
22 |
|
---|
23 | #include "includes.h"
|
---|
24 | #include "torture/torture.h"
|
---|
25 | #include "system/iconv.h"
|
---|
26 | #include "system/time.h"
|
---|
27 | #include "libcli/raw/libcliraw.h"
|
---|
28 | #include "param/param.h"
|
---|
29 | #include "torture/util.h"
|
---|
30 | #include "talloc.h"
|
---|
31 |
|
---|
32 | #if HAVE_NATIVE_ICONV
|
---|
33 |
|
---|
34 | static bool iconv_untestable(struct torture_context *tctx)
|
---|
35 | {
|
---|
36 | iconv_t cd;
|
---|
37 |
|
---|
38 | if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
|
---|
39 | torture_skip(tctx, "system iconv disabled - skipping test");
|
---|
40 |
|
---|
41 | cd = iconv_open("UTF-16LE", "UCS-4LE");
|
---|
42 | if (cd == (iconv_t)-1)
|
---|
43 | torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
|
---|
44 | iconv_close(cd);
|
---|
45 |
|
---|
46 | cd = iconv_open("UTF-16LE", "CP850");
|
---|
47 | if (cd == (iconv_t)-1)
|
---|
48 | torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
|
---|
49 | iconv_close(cd);
|
---|
50 |
|
---|
51 | return false;
|
---|
52 | }
|
---|
53 |
|
---|
54 | /*
|
---|
55 | generate a UTF-16LE buffer for a given unicode codepoint
|
---|
56 | */
|
---|
57 | static int gen_codepoint_utf16(unsigned int codepoint,
|
---|
58 | char *buf, size_t *size)
|
---|
59 | {
|
---|
60 | static iconv_t cd;
|
---|
61 | uint8_t in[4];
|
---|
62 | char *ptr_in;
|
---|
63 | size_t size_in, size_out, ret;
|
---|
64 | if (!cd) {
|
---|
65 | cd = iconv_open("UTF-16LE", "UCS-4LE");
|
---|
66 | if (cd == (iconv_t)-1) {
|
---|
67 | cd = NULL;
|
---|
68 | return -1;
|
---|
69 | }
|
---|
70 | }
|
---|
71 |
|
---|
72 | in[0] = codepoint & 0xFF;
|
---|
73 | in[1] = (codepoint>>8) & 0xFF;
|
---|
74 | in[2] = (codepoint>>16) & 0xFF;
|
---|
75 | in[3] = (codepoint>>24) & 0xFF;
|
---|
76 |
|
---|
77 | ptr_in = (char *)in;
|
---|
78 | size_in = 4;
|
---|
79 | size_out = 8;
|
---|
80 |
|
---|
81 | ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
|
---|
82 |
|
---|
83 | *size = 8 - size_out;
|
---|
84 |
|
---|
85 | return ret;
|
---|
86 | }
|
---|
87 |
|
---|
88 |
|
---|
89 | /*
|
---|
90 | work out the unicode codepoint of the first UTF-8 character in the buffer
|
---|
91 | */
|
---|
92 | static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
|
---|
93 | {
|
---|
94 | iconv_t cd;
|
---|
95 | uint8_t out[4];
|
---|
96 | char *ptr_out;
|
---|
97 | size_t size_out, size_in, ret;
|
---|
98 |
|
---|
99 | cd = iconv_open("UCS-4LE", charset);
|
---|
100 |
|
---|
101 | size_in = size;
|
---|
102 | ptr_out = (char *)out;
|
---|
103 | size_out = sizeof(out);
|
---|
104 | memset(out, 0, sizeof(out));
|
---|
105 |
|
---|
106 | ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
|
---|
107 |
|
---|
108 | iconv_close(cd);
|
---|
109 |
|
---|
110 | return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
|
---|
111 | }
|
---|
112 |
|
---|
113 | /*
|
---|
114 | display a buffer with name prefix
|
---|
115 | */
|
---|
116 | static void show_buf(const char *name, uint8_t *buf, size_t size)
|
---|
117 | {
|
---|
118 | int i;
|
---|
119 | printf("%s ", name);
|
---|
120 | for (i=0;i<size;i++) {
|
---|
121 | printf("%02x ", buf[i]);
|
---|
122 | }
|
---|
123 | printf("\n");
|
---|
124 | }
|
---|
125 |
|
---|
126 | /*
|
---|
127 | given a UTF-16LE buffer, test the system and built-in iconv code to
|
---|
128 | make sure they do exactly the same thing in converting the buffer to
|
---|
129 | "charset", then convert it back again and ensure we get the same
|
---|
130 | buffer back
|
---|
131 | */
|
---|
132 | static bool test_buffer(struct torture_context *test,
|
---|
133 | uint8_t *inbuf, size_t size, const char *charset)
|
---|
134 | {
|
---|
135 | uint8_t buf1[1000], buf2[1000], buf3[1000];
|
---|
136 | size_t outsize1, outsize2, outsize3;
|
---|
137 | const char *ptr_in;
|
---|
138 | char *ptr_out;
|
---|
139 | size_t size_in1, size_in2, size_in3;
|
---|
140 | size_t ret1, ret2, ret3, len1, len2;
|
---|
141 | int errno1, errno2;
|
---|
142 | static iconv_t cd;
|
---|
143 | static smb_iconv_t cd2, cd3;
|
---|
144 | static const char *last_charset;
|
---|
145 |
|
---|
146 | if (cd && last_charset) {
|
---|
147 | iconv_close(cd);
|
---|
148 | smb_iconv_close(cd2);
|
---|
149 | smb_iconv_close(cd3);
|
---|
150 | cd = NULL;
|
---|
151 | }
|
---|
152 |
|
---|
153 | if (!cd) {
|
---|
154 | cd = iconv_open(charset, "UTF-16LE");
|
---|
155 | if (cd == (iconv_t)-1) {
|
---|
156 | torture_fail(test,
|
---|
157 | talloc_asprintf(test,
|
---|
158 | "failed to open %s to UTF-16LE",
|
---|
159 | charset));
|
---|
160 | }
|
---|
161 | cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
|
---|
162 | cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
|
---|
163 | last_charset = charset;
|
---|
164 | }
|
---|
165 |
|
---|
166 | /* internal convert to charset - placing result in buf1 */
|
---|
167 | ptr_in = (const char *)inbuf;
|
---|
168 | ptr_out = (char *)buf1;
|
---|
169 | size_in1 = size;
|
---|
170 | outsize1 = sizeof(buf1);
|
---|
171 |
|
---|
172 | memset(ptr_out, 0, outsize1);
|
---|
173 | errno = 0;
|
---|
174 | ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
|
---|
175 | errno1 = errno;
|
---|
176 |
|
---|
177 | /* system convert to charset - placing result in buf2 */
|
---|
178 | ptr_in = (const char *)inbuf;
|
---|
179 | ptr_out = (char *)buf2;
|
---|
180 | size_in2 = size;
|
---|
181 | outsize2 = sizeof(buf2);
|
---|
182 |
|
---|
183 | memset(ptr_out, 0, outsize2);
|
---|
184 | errno = 0;
|
---|
185 | ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
|
---|
186 | errno2 = errno;
|
---|
187 |
|
---|
188 | len1 = sizeof(buf1) - outsize1;
|
---|
189 | len2 = sizeof(buf2) - outsize2;
|
---|
190 |
|
---|
191 | /* codepoints above 1M are not interesting for now */
|
---|
192 | if (len2 > len1 &&
|
---|
193 | memcmp(buf1, buf2, len1) == 0 &&
|
---|
194 | get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
|
---|
195 | return true;
|
---|
196 | }
|
---|
197 | if (len1 > len2 &&
|
---|
198 | memcmp(buf1, buf2, len2) == 0 &&
|
---|
199 | get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
|
---|
200 | return true;
|
---|
201 | }
|
---|
202 |
|
---|
203 | torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
|
---|
204 |
|
---|
205 | if (errno1 != errno2) {
|
---|
206 | show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
|
---|
207 | show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
|
---|
208 | torture_fail(test, talloc_asprintf(test,
|
---|
209 | "e1=%d/%s e2=%d/%s",
|
---|
210 | errno1, strerror(errno1),
|
---|
211 | errno2, strerror(errno2)));
|
---|
212 | }
|
---|
213 |
|
---|
214 | torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
|
---|
215 |
|
---|
216 | torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
|
---|
217 |
|
---|
218 | if (len1 != len2 ||
|
---|
219 | memcmp(buf1, buf2, len1) != 0) {
|
---|
220 | torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
|
---|
221 | show_buf(" IN1:", inbuf, size-size_in1);
|
---|
222 | show_buf(" IN2:", inbuf, size-size_in2);
|
---|
223 | show_buf("OUT1:", buf1, len1);
|
---|
224 | show_buf("OUT2:", buf2, len2);
|
---|
225 | if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
|
---|
226 | torture_comment(test, "next codepoint is %u",
|
---|
227 | get_codepoint((char *)(buf2+len1), len2-len1, charset));
|
---|
228 | }
|
---|
229 | if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
|
---|
230 | torture_comment(test, "next codepoint is %u",
|
---|
231 | get_codepoint((char *)(buf1+len2),len1-len2, charset));
|
---|
232 | }
|
---|
233 |
|
---|
234 | torture_fail(test, "failed");
|
---|
235 | }
|
---|
236 |
|
---|
237 | /* convert back to UTF-16, putting result in buf3 */
|
---|
238 | size = size - size_in1;
|
---|
239 | ptr_in = (const char *)buf1;
|
---|
240 | ptr_out = (char *)buf3;
|
---|
241 | size_in3 = len1;
|
---|
242 | outsize3 = sizeof(buf3);
|
---|
243 |
|
---|
244 | memset(ptr_out, 0, outsize3);
|
---|
245 | ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
|
---|
246 |
|
---|
247 | /* we only internally support the first 1M codepoints */
|
---|
248 | if (outsize3 != sizeof(buf3) - size &&
|
---|
249 | get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
|
---|
250 | size - (sizeof(buf3) - outsize3),
|
---|
251 | "UTF-16LE") >= (1<<20)) {
|
---|
252 | return true;
|
---|
253 | }
|
---|
254 |
|
---|
255 | torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
|
---|
256 | "pull failed - %s", strerror(errno)));
|
---|
257 |
|
---|
258 | if (strncmp(charset, "UTF", 3) != 0) {
|
---|
259 | /* don't expect perfect mappings for non UTF charsets */
|
---|
260 | return true;
|
---|
261 | }
|
---|
262 |
|
---|
263 |
|
---|
264 | torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
|
---|
265 | "wrong outsize3");
|
---|
266 |
|
---|
267 | if (memcmp(buf3, inbuf, size) != 0) {
|
---|
268 | torture_comment(test, "pull bytes mismatch:");
|
---|
269 | show_buf("inbuf", inbuf, size);
|
---|
270 | show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
|
---|
271 | torture_comment(test, "next codepoint is %u\n",
|
---|
272 | get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
|
---|
273 | size - (sizeof(buf3) - outsize3),
|
---|
274 | "UTF-16LE"));
|
---|
275 | torture_fail(test, "");
|
---|
276 | }
|
---|
277 |
|
---|
278 | return true;
|
---|
279 | }
|
---|
280 |
|
---|
281 |
|
---|
282 | /*
|
---|
283 | test the push_codepoint() and next_codepoint() functions for a given
|
---|
284 | codepoint
|
---|
285 | */
|
---|
286 | static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
|
---|
287 | {
|
---|
288 | uint8_t buf[10];
|
---|
289 | size_t size, size2;
|
---|
290 | codepoint_t c;
|
---|
291 |
|
---|
292 | size = push_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
|
---|
293 | torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
|
---|
294 | "Invalid Codepoint range");
|
---|
295 |
|
---|
296 | if (size == -1) return true;
|
---|
297 |
|
---|
298 | buf[size] = random();
|
---|
299 | buf[size+1] = random();
|
---|
300 | buf[size+2] = random();
|
---|
301 | buf[size+3] = random();
|
---|
302 |
|
---|
303 | c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
|
---|
304 |
|
---|
305 | torture_assert(tctx, c == codepoint,
|
---|
306 | talloc_asprintf(tctx,
|
---|
307 | "next_codepoint(%u) failed - gave %u", codepoint, c));
|
---|
308 |
|
---|
309 | torture_assert(tctx, size2 == size,
|
---|
310 | talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
|
---|
311 | codepoint, (int)size2, (int)size));
|
---|
312 |
|
---|
313 | return true;
|
---|
314 | }
|
---|
315 |
|
---|
316 | static bool test_next_codepoint(struct torture_context *tctx)
|
---|
317 | {
|
---|
318 | unsigned int codepoint;
|
---|
319 | if (iconv_untestable(tctx))
|
---|
320 | return true;
|
---|
321 |
|
---|
322 | for (codepoint=0;codepoint<(1<<20);codepoint++) {
|
---|
323 | if (!test_codepoint(tctx, codepoint))
|
---|
324 | return false;
|
---|
325 | }
|
---|
326 | return true;
|
---|
327 | }
|
---|
328 |
|
---|
329 | static bool test_first_1m(struct torture_context *tctx)
|
---|
330 | {
|
---|
331 | unsigned int codepoint;
|
---|
332 | size_t size;
|
---|
333 | unsigned char inbuf[1000];
|
---|
334 |
|
---|
335 | if (iconv_untestable(tctx))
|
---|
336 | return true;
|
---|
337 |
|
---|
338 | for (codepoint=0;codepoint<(1<<20);codepoint++) {
|
---|
339 | if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
|
---|
340 | continue;
|
---|
341 | }
|
---|
342 |
|
---|
343 | if (codepoint % 1000 == 0) {
|
---|
344 | if (torture_setting_bool(tctx, "progress", true)) {
|
---|
345 | torture_comment(tctx, "codepoint=%u \r", codepoint);
|
---|
346 | fflush(stdout);
|
---|
347 | }
|
---|
348 | }
|
---|
349 |
|
---|
350 | if (!test_buffer(tctx, inbuf, size, "UTF-8"))
|
---|
351 | return false;
|
---|
352 | }
|
---|
353 | return true;
|
---|
354 | }
|
---|
355 |
|
---|
356 | static bool test_random_5m(struct torture_context *tctx)
|
---|
357 | {
|
---|
358 | unsigned char inbuf[1000];
|
---|
359 | unsigned int i;
|
---|
360 |
|
---|
361 | if (iconv_untestable(tctx))
|
---|
362 | return true;
|
---|
363 |
|
---|
364 | for (i=0;i<500000;i++) {
|
---|
365 | size_t size;
|
---|
366 | unsigned int c;
|
---|
367 |
|
---|
368 | if (i % 1000 == 0) {
|
---|
369 | if (torture_setting_bool(tctx, "progress", true)) {
|
---|
370 | torture_comment(tctx, "i=%u \r", i);
|
---|
371 | fflush(stdout);
|
---|
372 | }
|
---|
373 | }
|
---|
374 |
|
---|
375 | size = random() % 100;
|
---|
376 | for (c=0;c<size;c++) {
|
---|
377 | if (random() % 100 < 80) {
|
---|
378 | inbuf[c] = random() % 128;
|
---|
379 | } else {
|
---|
380 | inbuf[c] = random();
|
---|
381 | }
|
---|
382 | if (random() % 10 == 0) {
|
---|
383 | inbuf[c] |= 0xd8;
|
---|
384 | }
|
---|
385 | if (random() % 10 == 0) {
|
---|
386 | inbuf[c] |= 0xdc;
|
---|
387 | }
|
---|
388 | }
|
---|
389 | if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
|
---|
390 | printf("i=%d failed UTF-8\n", i);
|
---|
391 | return false;
|
---|
392 | }
|
---|
393 |
|
---|
394 | if (!test_buffer(tctx, inbuf, size, "CP850")) {
|
---|
395 | printf("i=%d failed CP850\n", i);
|
---|
396 | return false;
|
---|
397 | }
|
---|
398 | }
|
---|
399 | return true;
|
---|
400 | }
|
---|
401 |
|
---|
402 |
|
---|
403 | static bool test_string2key(struct torture_context *tctx)
|
---|
404 | {
|
---|
405 | uint16_t *buf;
|
---|
406 | char *dest = NULL;
|
---|
407 | TALLOC_CTX *mem_ctx = talloc_new(tctx);
|
---|
408 | size_t len = (random()%1000)+1;
|
---|
409 | const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
|
---|
410 | uint8_t le1[20];
|
---|
411 | uint8_t *munged1;
|
---|
412 | uint8_t *out1;
|
---|
413 | size_t ret;
|
---|
414 | int i;
|
---|
415 | const char *correct = "a\357\277\275b\357\277\275c\001defg";
|
---|
416 |
|
---|
417 | buf = talloc_size(mem_ctx, len*2);
|
---|
418 | generate_random_buffer((uint8_t *)buf, len*2);
|
---|
419 |
|
---|
420 | torture_comment(tctx, "converting random buffer\n");
|
---|
421 |
|
---|
422 | if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
|
---|
423 | torture_fail(tctx, "Failed to convert random buffer\n");
|
---|
424 | }
|
---|
425 |
|
---|
426 | for (i=0;i<10;i++) {
|
---|
427 | SSVAL(&le1[2*i], 0, in1[i]);
|
---|
428 | }
|
---|
429 |
|
---|
430 | torture_comment(tctx, "converting fixed buffer to UTF16\n");
|
---|
431 |
|
---|
432 | if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
|
---|
433 | torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
|
---|
434 | }
|
---|
435 |
|
---|
436 | torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
|
---|
437 |
|
---|
438 | torture_comment(tctx, "converting fixed buffer to UTF8\n");
|
---|
439 |
|
---|
440 | if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
|
---|
441 | torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
|
---|
442 | }
|
---|
443 |
|
---|
444 | torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
|
---|
445 | "conversion gave incorrect result\n");
|
---|
446 |
|
---|
447 | talloc_free(mem_ctx);
|
---|
448 |
|
---|
449 | return true;
|
---|
450 | }
|
---|
451 |
|
---|
452 | struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
|
---|
453 | {
|
---|
454 | struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
|
---|
455 |
|
---|
456 | torture_suite_add_simple_test(suite, "string2key",
|
---|
457 | test_string2key);
|
---|
458 |
|
---|
459 | torture_suite_add_simple_test(suite, "next_codepoint()",
|
---|
460 | test_next_codepoint);
|
---|
461 |
|
---|
462 | torture_suite_add_simple_test(suite, "first 1M codepoints",
|
---|
463 | test_first_1m);
|
---|
464 |
|
---|
465 | torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
|
---|
466 | test_random_5m);
|
---|
467 |
|
---|
468 | torture_suite_add_simple_test(suite, "string2key",
|
---|
469 | test_string2key);
|
---|
470 | return suite;
|
---|
471 | }
|
---|
472 |
|
---|
473 | #else
|
---|
474 |
|
---|
475 | struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
|
---|
476 | {
|
---|
477 | printf("No native iconv library - can't run iconv test\n");
|
---|
478 | return NULL;
|
---|
479 | }
|
---|
480 |
|
---|
481 | #endif
|
---|