source: branches/samba-3.5.x/lib/util/charset/tests/iconv.c

Last change on this file was 414, checked in by Herwig Bauernfeind, 15 years ago

Samba 3.5.0: Initial import

File size: 12.3 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
6
7 Copyright (C) Andrew Tridgell 2004
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "includes.h"
24#include "torture/torture.h"
25#include "system/iconv.h"
26#include "system/time.h"
27#include "libcli/raw/libcliraw.h"
28#include "param/param.h"
29#include "torture/util.h"
30#include "talloc.h"
31
32#if HAVE_NATIVE_ICONV
33
34static bool iconv_untestable(struct torture_context *tctx)
35{
36 iconv_t cd;
37
38 if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
39 torture_skip(tctx, "system iconv disabled - skipping test");
40
41 cd = iconv_open("UTF-16LE", "UCS-4LE");
42 if (cd == (iconv_t)-1)
43 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
44 iconv_close(cd);
45
46 cd = iconv_open("UTF-16LE", "CP850");
47 if (cd == (iconv_t)-1)
48 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
49 iconv_close(cd);
50
51 return false;
52}
53
54/*
55 generate a UTF-16LE buffer for a given unicode codepoint
56*/
57static int gen_codepoint_utf16(unsigned int codepoint,
58 char *buf, size_t *size)
59{
60 static iconv_t cd;
61 uint8_t in[4];
62 char *ptr_in;
63 size_t size_in, size_out, ret;
64 if (!cd) {
65 cd = iconv_open("UTF-16LE", "UCS-4LE");
66 if (cd == (iconv_t)-1) {
67 cd = NULL;
68 return -1;
69 }
70 }
71
72 in[0] = codepoint & 0xFF;
73 in[1] = (codepoint>>8) & 0xFF;
74 in[2] = (codepoint>>16) & 0xFF;
75 in[3] = (codepoint>>24) & 0xFF;
76
77 ptr_in = (char *)in;
78 size_in = 4;
79 size_out = 8;
80
81 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
82
83 *size = 8 - size_out;
84
85 return ret;
86}
87
88
89/*
90 work out the unicode codepoint of the first UTF-8 character in the buffer
91*/
92static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
93{
94 iconv_t cd;
95 uint8_t out[4];
96 char *ptr_out;
97 size_t size_out, size_in, ret;
98
99 cd = iconv_open("UCS-4LE", charset);
100
101 size_in = size;
102 ptr_out = (char *)out;
103 size_out = sizeof(out);
104 memset(out, 0, sizeof(out));
105
106 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
107
108 iconv_close(cd);
109
110 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
111}
112
113/*
114 display a buffer with name prefix
115*/
116static void show_buf(const char *name, uint8_t *buf, size_t size)
117{
118 int i;
119 printf("%s ", name);
120 for (i=0;i<size;i++) {
121 printf("%02x ", buf[i]);
122 }
123 printf("\n");
124}
125
126/*
127 given a UTF-16LE buffer, test the system and built-in iconv code to
128 make sure they do exactly the same thing in converting the buffer to
129 "charset", then convert it back again and ensure we get the same
130 buffer back
131*/
132static bool test_buffer(struct torture_context *test,
133 uint8_t *inbuf, size_t size, const char *charset)
134{
135 uint8_t buf1[1000], buf2[1000], buf3[1000];
136 size_t outsize1, outsize2, outsize3;
137 const char *ptr_in;
138 char *ptr_out;
139 size_t size_in1, size_in2, size_in3;
140 size_t ret1, ret2, ret3, len1, len2;
141 int errno1, errno2;
142 static iconv_t cd;
143 static smb_iconv_t cd2, cd3;
144 static const char *last_charset;
145
146 if (cd && last_charset) {
147 iconv_close(cd);
148 smb_iconv_close(cd2);
149 smb_iconv_close(cd3);
150 cd = NULL;
151 }
152
153 if (!cd) {
154 cd = iconv_open(charset, "UTF-16LE");
155 if (cd == (iconv_t)-1) {
156 torture_fail(test,
157 talloc_asprintf(test,
158 "failed to open %s to UTF-16LE",
159 charset));
160 }
161 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
163 last_charset = charset;
164 }
165
166 /* internal convert to charset - placing result in buf1 */
167 ptr_in = (const char *)inbuf;
168 ptr_out = (char *)buf1;
169 size_in1 = size;
170 outsize1 = sizeof(buf1);
171
172 memset(ptr_out, 0, outsize1);
173 errno = 0;
174 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
175 errno1 = errno;
176
177 /* system convert to charset - placing result in buf2 */
178 ptr_in = (const char *)inbuf;
179 ptr_out = (char *)buf2;
180 size_in2 = size;
181 outsize2 = sizeof(buf2);
182
183 memset(ptr_out, 0, outsize2);
184 errno = 0;
185 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
186 errno2 = errno;
187
188 len1 = sizeof(buf1) - outsize1;
189 len2 = sizeof(buf2) - outsize2;
190
191 /* codepoints above 1M are not interesting for now */
192 if (len2 > len1 &&
193 memcmp(buf1, buf2, len1) == 0 &&
194 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
195 return true;
196 }
197 if (len1 > len2 &&
198 memcmp(buf1, buf2, len2) == 0 &&
199 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
200 return true;
201 }
202
203 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
204
205 if (errno1 != errno2) {
206 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
207 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
208 torture_fail(test, talloc_asprintf(test,
209 "e1=%d/%s e2=%d/%s",
210 errno1, strerror(errno1),
211 errno2, strerror(errno2)));
212 }
213
214 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
215
216 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
217
218 if (len1 != len2 ||
219 memcmp(buf1, buf2, len1) != 0) {
220 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
221 show_buf(" IN1:", inbuf, size-size_in1);
222 show_buf(" IN2:", inbuf, size-size_in2);
223 show_buf("OUT1:", buf1, len1);
224 show_buf("OUT2:", buf2, len2);
225 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
226 torture_comment(test, "next codepoint is %u",
227 get_codepoint((char *)(buf2+len1), len2-len1, charset));
228 }
229 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
230 torture_comment(test, "next codepoint is %u",
231 get_codepoint((char *)(buf1+len2),len1-len2, charset));
232 }
233
234 torture_fail(test, "failed");
235 }
236
237 /* convert back to UTF-16, putting result in buf3 */
238 size = size - size_in1;
239 ptr_in = (const char *)buf1;
240 ptr_out = (char *)buf3;
241 size_in3 = len1;
242 outsize3 = sizeof(buf3);
243
244 memset(ptr_out, 0, outsize3);
245 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
246
247 /* we only internally support the first 1M codepoints */
248 if (outsize3 != sizeof(buf3) - size &&
249 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
250 size - (sizeof(buf3) - outsize3),
251 "UTF-16LE") >= (1<<20)) {
252 return true;
253 }
254
255 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
256 "pull failed - %s", strerror(errno)));
257
258 if (strncmp(charset, "UTF", 3) != 0) {
259 /* don't expect perfect mappings for non UTF charsets */
260 return true;
261 }
262
263
264 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
265 "wrong outsize3");
266
267 if (memcmp(buf3, inbuf, size) != 0) {
268 torture_comment(test, "pull bytes mismatch:");
269 show_buf("inbuf", inbuf, size);
270 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
271 torture_comment(test, "next codepoint is %u\n",
272 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
273 size - (sizeof(buf3) - outsize3),
274 "UTF-16LE"));
275 torture_fail(test, "");
276 }
277
278 return true;
279}
280
281
282/*
283 test the push_codepoint() and next_codepoint() functions for a given
284 codepoint
285*/
286static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
287{
288 uint8_t buf[10];
289 size_t size, size2;
290 codepoint_t c;
291
292 size = push_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
293 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
294 "Invalid Codepoint range");
295
296 if (size == -1) return true;
297
298 buf[size] = random();
299 buf[size+1] = random();
300 buf[size+2] = random();
301 buf[size+3] = random();
302
303 c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
304
305 torture_assert(tctx, c == codepoint,
306 talloc_asprintf(tctx,
307 "next_codepoint(%u) failed - gave %u", codepoint, c));
308
309 torture_assert(tctx, size2 == size,
310 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
311 codepoint, (int)size2, (int)size));
312
313 return true;
314}
315
316static bool test_next_codepoint(struct torture_context *tctx)
317{
318 unsigned int codepoint;
319 if (iconv_untestable(tctx))
320 return true;
321
322 for (codepoint=0;codepoint<(1<<20);codepoint++) {
323 if (!test_codepoint(tctx, codepoint))
324 return false;
325 }
326 return true;
327}
328
329static bool test_first_1m(struct torture_context *tctx)
330{
331 unsigned int codepoint;
332 size_t size;
333 unsigned char inbuf[1000];
334
335 if (iconv_untestable(tctx))
336 return true;
337
338 for (codepoint=0;codepoint<(1<<20);codepoint++) {
339 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
340 continue;
341 }
342
343 if (codepoint % 1000 == 0) {
344 if (torture_setting_bool(tctx, "progress", true)) {
345 torture_comment(tctx, "codepoint=%u \r", codepoint);
346 fflush(stdout);
347 }
348 }
349
350 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
351 return false;
352 }
353 return true;
354}
355
356static bool test_random_5m(struct torture_context *tctx)
357{
358 unsigned char inbuf[1000];
359 unsigned int i;
360
361 if (iconv_untestable(tctx))
362 return true;
363
364 for (i=0;i<500000;i++) {
365 size_t size;
366 unsigned int c;
367
368 if (i % 1000 == 0) {
369 if (torture_setting_bool(tctx, "progress", true)) {
370 torture_comment(tctx, "i=%u \r", i);
371 fflush(stdout);
372 }
373 }
374
375 size = random() % 100;
376 for (c=0;c<size;c++) {
377 if (random() % 100 < 80) {
378 inbuf[c] = random() % 128;
379 } else {
380 inbuf[c] = random();
381 }
382 if (random() % 10 == 0) {
383 inbuf[c] |= 0xd8;
384 }
385 if (random() % 10 == 0) {
386 inbuf[c] |= 0xdc;
387 }
388 }
389 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
390 printf("i=%d failed UTF-8\n", i);
391 return false;
392 }
393
394 if (!test_buffer(tctx, inbuf, size, "CP850")) {
395 printf("i=%d failed CP850\n", i);
396 return false;
397 }
398 }
399 return true;
400}
401
402
403static bool test_string2key(struct torture_context *tctx)
404{
405 uint16_t *buf;
406 char *dest = NULL;
407 TALLOC_CTX *mem_ctx = talloc_new(tctx);
408 size_t len = (random()%1000)+1;
409 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
410 uint8_t le1[20];
411 uint8_t *munged1;
412 uint8_t *out1;
413 size_t ret;
414 int i;
415 const char *correct = "a\357\277\275b\357\277\275c\001defg";
416
417 buf = talloc_size(mem_ctx, len*2);
418 generate_random_buffer((uint8_t *)buf, len*2);
419
420 torture_comment(tctx, "converting random buffer\n");
421
422 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
423 torture_fail(tctx, "Failed to convert random buffer\n");
424 }
425
426 for (i=0;i<10;i++) {
427 SSVAL(&le1[2*i], 0, in1[i]);
428 }
429
430 torture_comment(tctx, "converting fixed buffer to UTF16\n");
431
432 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
433 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
434 }
435
436 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
437
438 torture_comment(tctx, "converting fixed buffer to UTF8\n");
439
440 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
441 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
442 }
443
444 torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
445 "conversion gave incorrect result\n");
446
447 talloc_free(mem_ctx);
448
449 return true;
450}
451
452struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
453{
454 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
455
456 torture_suite_add_simple_test(suite, "string2key",
457 test_string2key);
458
459 torture_suite_add_simple_test(suite, "next_codepoint()",
460 test_next_codepoint);
461
462 torture_suite_add_simple_test(suite, "first 1M codepoints",
463 test_first_1m);
464
465 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
466 test_random_5m);
467
468 torture_suite_add_simple_test(suite, "string2key",
469 test_string2key);
470 return suite;
471}
472
473#else
474
475struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
476{
477 printf("No native iconv library - can't run iconv test\n");
478 return NULL;
479}
480
481#endif
Note: See TracBrowser for help on using the repository browser.