source: branches/samba-3.0/source/lib/charcnv.c@ 312

Last change on this file since 312 was 236, checked in by Herwig Bauernfeind, 16 years ago

Fix for Ticket #85 (by diver)

File size: 40.5 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23*/
24#include "includes.h"
25
26/* We can parameterize this if someone complains.... JRA. */
27
28char lp_failed_convert_char(void)
29{
30 return '_';
31}
32
33/**
34 * @file
35 *
36 * @brief Character-set conversion routines built on our iconv.
37 *
38 * @note Samba's internal character set (at least in the 3.0 series)
39 * is always the same as the one for the Unix filesystem. It is
40 * <b>not</b> necessarily UTF-8 and may be different on machines that
41 * need i18n filenames to be compatible with Unix software. It does
42 * have to be a superset of ASCII. All multibyte sequences must start
43 * with a byte with the high bit set.
44 *
45 * @sa lib/iconv.c
46 */
47
48
49static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58#ifndef __OS2__
59 if (ch == CH_UTF16LE) ret = "UTF-16LE";
60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61#else
62 if (ch == CH_UTF16LE) ret = "IBM-1200";
63 else if (ch == CH_UTF16BE) ret = "IBM-1200";
64#endif
65 else if (ch == CH_UNIX) ret = lp_unix_charset();
66 else if (ch == CH_DOS) ret = lp_dos_charset();
67 else if (ch == CH_DISPLAY) ret = lp_display_charset();
68 else if (ch == CH_UTF8) ret = "UTF8";
69
70#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71 if (ret && !strcmp(ret, "LOCALE")) {
72 const char *ln = NULL;
73
74#ifdef HAVE_SETLOCALE
75 setlocale(LC_ALL, "");
76#endif
77 ln = nl_langinfo(CODESET);
78 if (ln) {
79 /* Check whether the charset name is supported
80 by iconv */
81 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82 if (handle == (smb_iconv_t) -1) {
83 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84 ln = NULL;
85 } else {
86 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87 smb_iconv_close(handle);
88 }
89 }
90 ret = ln;
91 }
92#endif
93
94 if (!ret || !*ret) ret = "ASCII";
95 DEBUG(10, ("codepage: %s\n",ret));
96 return ret;
97}
98
99void lazy_initialize_conv(void)
100{
101 static int initialized = False;
102
103 if (!initialized) {
104 initialized = True;
105 load_case_tables();
106 init_iconv();
107 }
108}
109
110/**
111 * Destroy global objects allocated by init_iconv()
112 **/
113void gfree_charcnv(void)
114{
115 int c1, c2;
116
117 for (c1=0;c1<NUM_CHARSETS;c1++) {
118 for (c2=0;c2<NUM_CHARSETS;c2++) {
119 if ( conv_handles[c1][c2] ) {
120 smb_iconv_close( conv_handles[c1][c2] );
121 conv_handles[c1][c2] = 0;
122 }
123 }
124 }
125}
126
127/**
128 * Initialize iconv conversion descriptors.
129 *
130 * This is called the first time it is needed, and also called again
131 * every time the configuration is reloaded, because the charset or
132 * codepage might have changed.
133 **/
134void init_iconv(void)
135{
136 int c1, c2;
137 BOOL did_reload = False;
138
139 /* so that charset_name() works we need to get the UNIX<->UCS2 going
140 first */
141 if (!conv_handles[CH_UNIX][CH_UTF16LE])
142 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
143
144 if (!conv_handles[CH_UTF16LE][CH_UNIX])
145 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
146
147 for (c1=0;c1<NUM_CHARSETS;c1++) {
148 for (c2=0;c2<NUM_CHARSETS;c2++) {
149 const char *n1 = charset_name((charset_t)c1);
150 const char *n2 = charset_name((charset_t)c2);
151 if (conv_handles[c1][c2] &&
152 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
153 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
154 continue;
155
156 did_reload = True;
157
158 if (conv_handles[c1][c2])
159 smb_iconv_close(conv_handles[c1][c2]);
160
161 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
162 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
163 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
164 charset_name((charset_t)c1), charset_name((charset_t)c2)));
165 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
166 n1 = "ASCII";
167 }
168 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
169 n2 = "ASCII";
170 }
171 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
172 n1, n2 ));
173 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
174 if (!conv_handles[c1][c2]) {
175 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
176 smb_panic("init_iconv: conv_handle initialization failed.");
177 }
178 }
179 }
180 }
181
182 if (did_reload) {
183 /* XXX: Does this really get called every time the dos
184 * codepage changes? */
185 /* XXX: Is the did_reload test too strict? */
186 conv_silent = True;
187 init_doschar_table();
188 init_valid_table();
189 conv_silent = False;
190 }
191}
192
193/**
194 * Convert string from one encoding to another, making error checking etc
195 * Slow path version - uses (slow) iconv.
196 *
197 * @param src pointer to source string (multibyte or singlebyte)
198 * @param srclen length of the source string in bytes
199 * @param dest pointer to destination string (multibyte or singlebyte)
200 * @param destlen maximal length allowed for string
201 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
202 * @returns the number of bytes occupied in the destination
203 *
204 * Ensure the srclen contains the terminating zero.
205 *
206 **/
207
208static size_t convert_string_internal(charset_t from, charset_t to,
209 void const *src, size_t srclen,
210 void *dest, size_t destlen, BOOL allow_bad_conv)
211{
212 size_t i_len, o_len;
213 size_t retval;
214 const char* inbuf = (const char*)src;
215 char* outbuf = (char*)dest;
216 smb_iconv_t descriptor;
217
218 lazy_initialize_conv();
219
220 descriptor = conv_handles[from][to];
221
222 if (srclen == (size_t)-1) {
223 if (from == CH_UTF16LE || from == CH_UTF16BE) {
224 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
225 } else {
226 srclen = strlen((const char *)src)+1;
227 }
228 }
229
230
231 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
232 if (!conv_silent)
233 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
234 return (size_t)-1;
235 }
236
237 i_len=srclen;
238 o_len=destlen;
239
240 again:
241
242 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
243 if(retval==(size_t)-1) {
244 const char *reason="unknown error";
245 switch(errno) {
246 case EINVAL:
247 reason="Incomplete multibyte sequence";
248 if (!conv_silent)
249 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
250 if (allow_bad_conv)
251 goto use_as_is;
252 break;
253 case E2BIG:
254 reason="No more room";
255 if (!conv_silent) {
256 if (from == CH_UNIX) {
257 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
258 charset_name(from), charset_name(to),
259 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
260 } else {
261 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
262 charset_name(from), charset_name(to),
263 (unsigned int)srclen, (unsigned int)destlen));
264 }
265 }
266 break;
267 case EILSEQ:
268 reason="Illegal multibyte sequence";
269 if (!conv_silent)
270 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271 if (allow_bad_conv)
272 goto use_as_is;
273 break;
274 default:
275 if (!conv_silent)
276 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
277 break;
278 }
279 /* smb_panic(reason); */
280 }
281 return destlen-o_len;
282
283 use_as_is:
284
285 /*
286 * Conversion not supported. This is actually an error, but there are so
287 * many misconfigured iconv systems and smb.conf's out there we can't just
288 * fail. Do a very bad conversion instead.... JRA.
289 */
290
291 {
292 if (o_len == 0 || i_len == 0)
293 return destlen - o_len;
294
295 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
296 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
297 /* Can't convert from utf16 any endian to multibyte.
298 Replace with the default fail char.
299 */
300 if (i_len < 2)
301 return destlen - o_len;
302 if (i_len >= 2) {
303 *outbuf = lp_failed_convert_char();
304
305 outbuf++;
306 o_len--;
307
308 inbuf += 2;
309 i_len -= 2;
310 }
311
312 if (o_len == 0 || i_len == 0)
313 return destlen - o_len;
314
315 /* Keep trying with the next char... */
316 goto again;
317
318 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
319 /* Can't convert to UTF16LE - just widen by adding the
320 default fail char then zero.
321 */
322 if (o_len < 2)
323 return destlen - o_len;
324
325 outbuf[0] = lp_failed_convert_char();
326 outbuf[1] = '\0';
327
328 inbuf++;
329 i_len--;
330
331 outbuf += 2;
332 o_len -= 2;
333
334 if (o_len == 0 || i_len == 0)
335 return destlen - o_len;
336
337 /* Keep trying with the next char... */
338 goto again;
339
340 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
341 to != CH_UTF16LE && to != CH_UTF16BE) {
342 /* Failed multibyte to multibyte. Just copy the default fail char and
343 try again. */
344 outbuf[0] = lp_failed_convert_char();
345
346 inbuf++;
347 i_len--;
348
349 outbuf++;
350 o_len--;
351
352 if (o_len == 0 || i_len == 0)
353 return destlen - o_len;
354
355 /* Keep trying with the next char... */
356 goto again;
357
358 } else {
359 /* Keep compiler happy.... */
360 return destlen - o_len;
361 }
362 }
363}
364
365/**
366 * Convert string from one encoding to another, making error checking etc
367 * Fast path version - handles ASCII first.
368 *
369 * @param src pointer to source string (multibyte or singlebyte)
370 * @param srclen length of the source string in bytes, or -1 for nul terminated.
371 * @param dest pointer to destination string (multibyte or singlebyte)
372 * @param destlen maximal length allowed for string - *NEVER* -1.
373 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
374 * @returns the number of bytes occupied in the destination
375 *
376 * Ensure the srclen contains the terminating zero.
377 *
378 * This function has been hand-tuned to provide a fast path.
379 * Don't change unless you really know what you are doing. JRA.
380 **/
381
382size_t convert_string(charset_t from, charset_t to,
383 void const *src, size_t srclen,
384 void *dest, size_t destlen, BOOL allow_bad_conv)
385{
386 /*
387 * NB. We deliberately don't do a strlen here if srclen == -1.
388 * This is very expensive over millions of calls and is taken
389 * care of in the slow path in convert_string_internal. JRA.
390 */
391
392#ifdef DEVELOPER
393 SMB_ASSERT(destlen != (size_t)-1);
394#endif
395
396 if (srclen == 0)
397 return 0;
398
399// DEBUG(10, ("convert_string: 1"));
400
401 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
402 const unsigned char *p = (const unsigned char *)src;
403 unsigned char *q = (unsigned char *)dest;
404 size_t slen = srclen;
405 size_t dlen = destlen;
406 unsigned char lastp = '\0';
407 size_t retval = 0;
408
409// DEBUG(10, ("convert_string: 2"));
410
411 /* If all characters are ascii, fast path here. */
412 while (slen && dlen) {
413 if ((lastp = *p) <= 0x7f) {
414 *q++ = *p++;
415 if (slen != (size_t)-1) {
416 slen--;
417 }
418 dlen--;
419 retval++;
420 if (!lastp)
421 break;
422 } else {
423#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
424 goto general_case;
425#else
426 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
427#endif
428 }
429 }
430 if (!dlen) {
431 /* Even if we fast path we should note if we ran out of room. */
432 if (((slen != (size_t)-1) && slen) ||
433 ((slen == (size_t)-1) && lastp)) {
434 errno = E2BIG;
435 }
436 }
437 return retval;
438// DEBUG(10, ("convert_string: 3"));
439
440 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
441 const unsigned char *p = (const unsigned char *)src;
442 unsigned char *q = (unsigned char *)dest;
443 size_t retval = 0;
444 size_t slen = srclen;
445 size_t dlen = destlen;
446 unsigned char lastp = '\0';
447
448 /* If all characters are ascii, fast path here. */
449 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
450 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
451 *q++ = *p;
452 if (slen != (size_t)-1) {
453 slen -= 2;
454 }
455 p += 2;
456 dlen--;
457 retval++;
458 if (!lastp)
459 break;
460 } else {
461#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
462 goto general_case;
463#else
464 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
465#endif
466 }
467 }
468 if (!dlen) {
469 /* Even if we fast path we should note if we ran out of room. */
470 if (((slen != (size_t)-1) && slen) ||
471 ((slen == (size_t)-1) && lastp)) {
472 errno = E2BIG;
473 }
474 }
475 return retval;
476// DEBUG(10, ("convert_string: 4"));
477
478 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
479 const unsigned char *p = (const unsigned char *)src;
480 unsigned char *q = (unsigned char *)dest;
481 size_t retval = 0;
482 size_t slen = srclen;
483 size_t dlen = destlen;
484 unsigned char lastp = '\0';
485
486 /* If all characters are ascii, fast path here. */
487 while (slen && (dlen >= 2)) {
488 if ((lastp = *p) <= 0x7F) {
489 *q++ = *p++;
490 *q++ = '\0';
491 if (slen != (size_t)-1) {
492 slen--;
493 }
494 dlen -= 2;
495 retval += 2;
496 if (!lastp)
497 break;
498 } else {
499#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
500 goto general_case;
501#else
502 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
503#endif
504 }
505 }
506 if (!dlen) {
507 /* Even if we fast path we should note if we ran out of room. */
508 if (((slen != (size_t)-1) && slen) ||
509 ((slen == (size_t)-1) && lastp)) {
510 errno = E2BIG;
511 }
512 }
513 return retval;
514 }
515
516#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517 general_case:
518#endif
519 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
520}
521
522/**
523 * Convert between character sets, allocating a new buffer for the result.
524 *
525 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
526 * @param srclen length of source buffer.
527 * @param dest always set at least to NULL
528 * @note -1 is not accepted for srclen.
529 *
530 * @returns Size in bytes of the converted string; or -1 in case of error.
531 *
532 * Ensure the srclen contains the terminating zero.
533 *
534 * I hate the goto's in this function. It's embarressing.....
535 * There has to be a cleaner way to do this. JRA.
536 **/
537
538size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
539 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)
540{
541 size_t i_len, o_len, destlen = (srclen * 3) / 2;
542 size_t retval;
543 const char *inbuf = (const char *)src;
544 char *outbuf = NULL, *ob = NULL;
545 smb_iconv_t descriptor;
546 void **dest = (void **)dst;
547
548 *dest = NULL;
549
550 if (src == NULL || srclen == (size_t)-1)
551 return (size_t)-1;
552 if (srclen == 0)
553 return 0;
554
555 lazy_initialize_conv();
556
557 descriptor = conv_handles[from][to];
558
559 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
560 if (!conv_silent)
561 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
562 return (size_t)-1;
563 }
564
565 convert:
566
567 /* +2 is for ucs2 null termination. */
568 if ((destlen*2)+2 < destlen) {
569 /* wrapped ! abort. */
570 if (!conv_silent)
571 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
572 if (!ctx)
573 SAFE_FREE(outbuf);
574 return (size_t)-1;
575 } else {
576 destlen = destlen * 2;
577 }
578
579 /* +2 is for ucs2 null termination. */
580 if (ctx) {
581 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
582 } else {
583 ob = (char *)SMB_REALLOC(ob, destlen + 2);
584 }
585
586 if (!ob) {
587 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
588 return (size_t)-1;
589 }
590 outbuf = ob;
591 i_len = srclen;
592 o_len = destlen;
593
594 again:
595 DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
596 charset_name(from), charset_name(to),
597 (unsigned int)srclen, (unsigned int)destlen));
598
599 retval = smb_iconv(descriptor,
600 &inbuf, &i_len,
601 &outbuf, &o_len);
602 if(retval == (size_t)-1) {
603 const char *reason="unknown error";
604 switch(errno) {
605 case EINVAL:
606 reason="Incomplete multibyte sequence";
607 if (!conv_silent)
608 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
609 if (allow_bad_conv)
610 goto use_as_is;
611 break;
612 case E2BIG:
613 goto convert;
614 case EILSEQ:
615 reason="Illegal multibyte sequence";
616 if (!conv_silent)
617 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
618 if (allow_bad_conv)
619 goto use_as_is;
620 break;
621 }
622 if (!conv_silent)
623 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
624 /* smb_panic(reason); */
625 return (size_t)-1;
626 }
627
628 out:
629
630 destlen = destlen - o_len;
631 if (ctx) {
632 /* We're shrinking here so we know the +2 is safe from wrap. */
633 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
634 } else {
635 ob = (char *)SMB_REALLOC(ob,destlen + 2);
636 }
637
638 if (destlen && !ob) {
639 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
640 return (size_t)-1;
641 }
642
643 *dest = ob;
644
645 /* Must ucs2 null terminate in the extra space we allocated. */
646 ob[destlen] = '\0';
647 ob[destlen+1] = '\0';
648
649 return destlen;
650
651 use_as_is:
652
653 /*
654 * Conversion not supported. This is actually an error, but there are so
655 * many misconfigured iconv systems and smb.conf's out there we can't just
656 * fail. Do a very bad conversion instead.... JRA.
657 */
658
659 {
660 if (o_len == 0 || i_len == 0)
661 goto out;
662
663 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
664 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
665 /* Can't convert from utf16 any endian to multibyte.
666 Replace with the default fail char.
667 */
668
669 if (i_len < 2)
670 goto out;
671
672 if (i_len >= 2) {
673 *outbuf = lp_failed_convert_char();
674
675 outbuf++;
676 o_len--;
677
678 inbuf += 2;
679 i_len -= 2;
680 }
681
682 if (o_len == 0 || i_len == 0)
683 goto out;
684
685 /* Keep trying with the next char... */
686 goto again;
687
688 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
689 /* Can't convert to UTF16LE - just widen by adding the
690 default fail char then zero.
691 */
692 if (o_len < 2)
693 goto out;
694
695 outbuf[0] = lp_failed_convert_char();
696 outbuf[1] = '\0';
697
698 inbuf++;
699 i_len--;
700
701 outbuf += 2;
702 o_len -= 2;
703
704 if (o_len == 0 || i_len == 0)
705 goto out;
706
707 /* Keep trying with the next char... */
708 goto again;
709
710 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
711 to != CH_UTF16LE && to != CH_UTF16BE) {
712 /* Failed multibyte to multibyte. Just copy the default fail char and
713 try again. */
714 outbuf[0] = lp_failed_convert_char();
715
716 inbuf++;
717 i_len--;
718
719 outbuf++;
720 o_len--;
721
722 if (o_len == 0 || i_len == 0)
723 goto out;
724
725 /* Keep trying with the next char... */
726 goto again;
727
728 } else {
729 /* Keep compiler happy.... */
730 goto out;
731 }
732 }
733}
734
735/**
736 * Convert between character sets, allocating a new buffer using talloc for the result.
737 *
738 * @param srclen length of source buffer.
739 * @param dest always set at least to NULL
740 * @note -1 is not accepted for srclen.
741 *
742 * @returns Size in bytes of the converted string; or -1 in case of error.
743 **/
744size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
745 void const *src, size_t srclen, void *dst,
746 BOOL allow_bad_conv)
747{
748 void **dest = (void **)dst;
749 size_t dest_len;
750
751 *dest = NULL;
752 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
753 if (dest_len == (size_t)-1)
754 return (size_t)-1;
755 if (*dest == NULL)
756 return (size_t)-1;
757 return dest_len;
758}
759
760size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
761{
762 size_t size;
763 smb_ucs2_t *buffer;
764
765 size = push_ucs2_allocate(&buffer, src);
766 if (size == (size_t)-1) {
767 smb_panic("failed to create UCS2 buffer");
768 }
769 if (!strupper_w(buffer) && (dest == src)) {
770 free(buffer);
771 return srclen;
772 }
773
774 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
775 free(buffer);
776 return size;
777}
778
779/**
780 strdup() a unix string to upper case.
781 Max size is pstring.
782**/
783
784char *strdup_upper(const char *s)
785{
786 pstring out_buffer;
787 const unsigned char *p = (const unsigned char *)s;
788 unsigned char *q = (unsigned char *)out_buffer;
789
790 /* this is quite a common operation, so we want it to be
791 fast. We optimise for the ascii case, knowing that all our
792 supported multi-byte character sets are ascii-compatible
793 (ie. they match for the first 128 chars) */
794
795 while (1) {
796 if (*p & 0x80)
797 break;
798 *q++ = toupper_ascii(*p);
799 if (!*p)
800 break;
801 p++;
802 if (p - ( const unsigned char *)s >= sizeof(pstring))
803 break;
804 }
805
806 if (*p) {
807 /* MB case. */
808 size_t size;
809 wpstring buffer;
810 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
811 if (size == (size_t)-1) {
812 return NULL;
813 }
814
815 strupper_w(buffer);
816
817 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
818 if (size == (size_t)-1) {
819 return NULL;
820 }
821 }
822
823 return SMB_STRDUP(out_buffer);
824}
825
826size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
827{
828 size_t size;
829 smb_ucs2_t *buffer = NULL;
830
831 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
832 (void **)(void *)&buffer, True);
833 if (size == (size_t)-1 || !buffer) {
834 smb_panic("failed to create UCS2 buffer");
835 }
836 if (!strlower_w(buffer) && (dest == src)) {
837 SAFE_FREE(buffer);
838 return srclen;
839 }
840 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
841 SAFE_FREE(buffer);
842 return size;
843}
844
845/**
846 strdup() a unix string to lower case.
847**/
848
849char *strdup_lower(const char *s)
850{
851 size_t size;
852 smb_ucs2_t *buffer = NULL;
853 char *out_buffer;
854
855 size = push_ucs2_allocate(&buffer, s);
856 if (size == -1 || !buffer) {
857 return NULL;
858 }
859
860 strlower_w(buffer);
861
862 size = pull_ucs2_allocate(&out_buffer, buffer);
863 SAFE_FREE(buffer);
864
865 if (size == (size_t)-1) {
866 return NULL;
867 }
868
869 return out_buffer;
870}
871
872static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
873{
874 if (flags & (STR_NOALIGN|STR_ASCII))
875 return 0;
876 return PTR_DIFF(p, base_ptr) & 1;
877}
878
879
880/**
881 * Copy a string from a char* unix src to a dos codepage string destination.
882 *
883 * @return the number of bytes occupied by the string in the destination.
884 *
885 * @param flags can include
886 * <dl>
887 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
888 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
889 * </dl>
890 *
891 * @param dest_len the maximum length in bytes allowed in the
892 * destination. If @p dest_len is -1 then no maximum is used.
893 **/
894size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
895{
896 size_t src_len = strlen(src);
897 pstring tmpbuf;
898 size_t ret;
899
900 /* No longer allow a length of -1 */
901 if (dest_len == (size_t)-1)
902 smb_panic("push_ascii - dest_len == -1");
903
904 if (flags & STR_UPPER) {
905 pstrcpy(tmpbuf, src);
906 strupper_m(tmpbuf);
907 src = tmpbuf;
908 }
909
910 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
911 src_len++;
912
913 ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
914 if (ret == (size_t)-1 &&
915 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
916 && dest_len > 0) {
917 ((char *)dest)[0] = '\0';
918 }
919 return ret;
920}
921
922size_t push_ascii_fstring(void *dest, const char *src)
923{
924 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
925}
926
927size_t push_ascii_pstring(void *dest, const char *src)
928{
929 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
930}
931
932/********************************************************************
933 Push an nstring - ensure null terminated. Written by
934 moriyama@miraclelinux.com (MORIYAMA Masayuki).
935********************************************************************/
936
937size_t push_ascii_nstring(void *dest, const char *src)
938{
939 size_t i, buffer_len, dest_len;
940 smb_ucs2_t *buffer;
941
942 conv_silent = True;
943 buffer_len = push_ucs2_allocate(&buffer, src);
944 if (buffer_len == (size_t)-1) {
945 smb_panic("failed to create UCS2 buffer");
946 }
947
948 /* We're using buffer_len below to count ucs2 characters, not bytes. */
949 buffer_len /= sizeof(smb_ucs2_t);
950
951 dest_len = 0;
952 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
953 unsigned char mb[10];
954 /* Convert one smb_ucs2_t character at a time. */
955 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
956 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
957 memcpy((char *)dest + dest_len, mb, mb_len);
958 dest_len += mb_len;
959 } else {
960 errno = E2BIG;
961 break;
962 }
963 }
964 ((char *)dest)[dest_len] = '\0';
965
966 SAFE_FREE(buffer);
967 conv_silent = False;
968 return dest_len;
969}
970
971/**
972 * Copy a string from a dos codepage source to a unix char* destination.
973 *
974 * The resulting string in "dest" is always null terminated.
975 *
976 * @param flags can have:
977 * <dl>
978 * <dt>STR_TERMINATE</dt>
979 * <dd>STR_TERMINATE means the string in @p src
980 * is null terminated, and src_len is ignored.</dd>
981 * </dl>
982 *
983 * @param src_len is the length of the source area in bytes.
984 * @returns the number of bytes occupied by the string in @p src.
985 **/
986size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
987{
988 size_t ret;
989
990 if (dest_len == (size_t)-1)
991 dest_len = sizeof(pstring);
992
993 if (flags & STR_TERMINATE) {
994 if (src_len == (size_t)-1) {
995 src_len = strlen((const char *)src) + 1;
996 } else {
997 size_t len = strnlen((const char *)src, src_len);
998 if (len < src_len)
999 len++;
1000 src_len = len;
1001 }
1002 }
1003
1004 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1005 if (ret == (size_t)-1) {
1006 ret = 0;
1007 dest_len = 0;
1008 }
1009
1010 if (dest_len && ret) {
1011 /* Did we already process the terminating zero ? */
1012 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1013 dest[MIN(ret, dest_len-1)] = 0;
1014 }
1015 } else {
1016 dest[0] = 0;
1017 }
1018
1019 return src_len;
1020}
1021
1022size_t pull_ascii_pstring(char *dest, const void *src)
1023{
1024 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1025}
1026
1027size_t pull_ascii_fstring(char *dest, const void *src)
1028{
1029 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1030}
1031
1032/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1033
1034size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1035{
1036 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1037}
1038
1039/**
1040 * Copy a string from a char* src to a unicode destination.
1041 *
1042 * @returns the number of bytes occupied by the string in the destination.
1043 *
1044 * @param flags can have:
1045 *
1046 * <dl>
1047 * <dt>STR_TERMINATE <dd>means include the null termination.
1048 * <dt>STR_UPPER <dd>means uppercase in the destination.
1049 * <dt>STR_NOALIGN <dd>means don't do alignment.
1050 * </dl>
1051 *
1052 * @param dest_len is the maximum length allowed in the
1053 * destination. If dest_len is -1 then no maxiumum is used.
1054 **/
1055
1056size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1057{
1058 size_t len=0;
1059 size_t src_len;
1060 size_t ret;
1061
1062 /* treat a pstring as "unlimited" length */
1063 if (dest_len == (size_t)-1)
1064 dest_len = sizeof(pstring);
1065
1066 if (flags & STR_TERMINATE)
1067 src_len = (size_t)-1;
1068 else
1069 src_len = strlen(src);
1070
1071 if (ucs2_align(base_ptr, dest, flags)) {
1072 *(char *)dest = 0;
1073 dest = (void *)((char *)dest + 1);
1074 if (dest_len)
1075 dest_len--;
1076 len++;
1077 }
1078
1079 /* ucs2 is always a multiple of 2 bytes */
1080 dest_len &= ~1;
1081
1082 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1083 if (ret == (size_t)-1) {
1084 return 0;
1085 }
1086
1087 len += ret;
1088
1089 if (flags & STR_UPPER) {
1090 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1091 size_t i;
1092
1093 /* We check for i < (ret / 2) below as the dest string isn't null
1094 terminated if STR_TERMINATE isn't set. */
1095
1096 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1097 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1098 if (v != dest_ucs2[i]) {
1099 dest_ucs2[i] = v;
1100 }
1101 }
1102 }
1103
1104 return len;
1105}
1106
1107
1108/**
1109 * Copy a string from a unix char* src to a UCS2 destination,
1110 * allocating a buffer using talloc().
1111 *
1112 * @param dest always set at least to NULL
1113 *
1114 * @returns The number of bytes occupied by the string in the destination
1115 * or -1 in case of error.
1116 **/
1117size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1118{
1119 size_t src_len = strlen(src)+1;
1120
1121 *dest = NULL;
1122 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1123}
1124
1125
1126/**
1127 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1128 *
1129 * @param dest always set at least to NULL
1130 *
1131 * @returns The number of bytes occupied by the string in the destination
1132 * or -1 in case of error.
1133 **/
1134
1135size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1136{
1137 size_t src_len = strlen(src)+1;
1138
1139 *dest = NULL;
1140 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1141}
1142
1143/**
1144 Copy a string from a char* src to a UTF-8 destination.
1145 Return the number of bytes occupied by the string in the destination
1146 Flags can have:
1147 STR_TERMINATE means include the null termination
1148 STR_UPPER means uppercase in the destination
1149 dest_len is the maximum length allowed in the destination. If dest_len
1150 is -1 then no maxiumum is used.
1151**/
1152
1153static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1154{
1155 size_t src_len = strlen(src);
1156 pstring tmpbuf;
1157
1158 /* treat a pstring as "unlimited" length */
1159 if (dest_len == (size_t)-1)
1160 dest_len = sizeof(pstring);
1161
1162 if (flags & STR_UPPER) {
1163 pstrcpy(tmpbuf, src);
1164 strupper_m(tmpbuf);
1165 src = tmpbuf;
1166 }
1167
1168 if (flags & STR_TERMINATE)
1169 src_len++;
1170
1171 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1172}
1173
1174size_t push_utf8_fstring(void *dest, const char *src)
1175{
1176 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1177}
1178
1179/**
1180 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1181 *
1182 * @param dest always set at least to NULL
1183 *
1184 * @returns The number of bytes occupied by the string in the destination
1185 **/
1186
1187size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1188{
1189 size_t src_len = strlen(src)+1;
1190
1191 *dest = NULL;
1192 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1193}
1194
1195/**
1196 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1197 *
1198 * @param dest always set at least to NULL
1199 *
1200 * @returns The number of bytes occupied by the string in the destination
1201 **/
1202
1203size_t push_utf8_allocate(char **dest, const char *src)
1204{
1205 size_t src_len = strlen(src)+1;
1206
1207 *dest = NULL;
1208 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1209}
1210
1211/**
1212 Copy a string from a ucs2 source to a unix char* destination.
1213 Flags can have:
1214 STR_TERMINATE means the string in src is null terminated.
1215 STR_NOALIGN means don't try to align.
1216 if STR_TERMINATE is set then src_len is ignored if it is -1.
1217 src_len is the length of the source area in bytes
1218 Return the number of bytes occupied by the string in src.
1219 The resulting string in "dest" is always null terminated.
1220**/
1221
1222size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1223{
1224 size_t ret;
1225
1226 if (dest_len == (size_t)-1)
1227 dest_len = sizeof(pstring);
1228
1229 if (ucs2_align(base_ptr, src, flags)) {
1230 src = (const void *)((const char *)src + 1);
1231 if (src_len != (size_t)-1)
1232 src_len--;
1233 }
1234
1235 if (flags & STR_TERMINATE) {
1236 /* src_len -1 is the default for null terminated strings. */
1237 if (src_len != (size_t)-1) {
1238 size_t len = strnlen_w((const smb_ucs2_t *)src,
1239 src_len/2);
1240 if (len < src_len/2)
1241 len++;
1242 src_len = len*2;
1243 }
1244 }
1245
1246 /* ucs2 is always a multiple of 2 bytes */
1247 if (src_len != (size_t)-1)
1248 src_len &= ~1;
1249
1250 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1251 if (ret == (size_t)-1) {
1252 return 0;
1253 }
1254
1255 if (src_len == (size_t)-1)
1256 src_len = ret*2;
1257
1258 if (dest_len && ret) {
1259 /* Did we already process the terminating zero ? */
1260 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1261 dest[MIN(ret, dest_len-1)] = 0;
1262 }
1263 } else {
1264 dest[0] = 0;
1265 }
1266
1267 return src_len;
1268}
1269
1270size_t pull_ucs2_pstring(char *dest, const void *src)
1271{
1272 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1273}
1274
1275size_t pull_ucs2_fstring(char *dest, const void *src)
1276{
1277 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1278}
1279
1280/**
1281 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1282 *
1283 * @param dest always set at least to NULL
1284 *
1285 * @returns The number of bytes occupied by the string in the destination
1286 **/
1287
1288size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1289{
1290 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1291 *dest = NULL;
1292 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1293}
1294
1295/**
1296 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1297 *
1298 * @param dest always set at least to NULL
1299 *
1300 * @returns The number of bytes occupied by the string in the destination
1301 **/
1302
1303size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1304{
1305 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1306 *dest = NULL;
1307 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1308}
1309
1310/**
1311 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1312 *
1313 * @param dest always set at least to NULL
1314 *
1315 * @returns The number of bytes occupied by the string in the destination
1316 **/
1317
1318size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1319{
1320 size_t src_len = strlen(src)+1;
1321 *dest = NULL;
1322 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1323}
1324
1325/**
1326 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1327 *
1328 * @param dest always set at least to NULL
1329 *
1330 * @returns The number of bytes occupied by the string in the destination
1331 **/
1332
1333size_t pull_utf8_allocate(char **dest, const char *src)
1334{
1335 size_t src_len = strlen(src)+1;
1336 *dest = NULL;
1337 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1338}
1339
1340/**
1341 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1342 *
1343 * @param dest always set at least to NULL
1344 *
1345 * @returns The number of bytes occupied by the string in the destination
1346 **/
1347
1348size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1349{
1350 size_t src_len = strlen(src)+1;
1351 *dest = NULL;
1352 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1353}
1354
1355/**
1356 Copy a string from a char* src to a unicode or ascii
1357 dos codepage destination choosing unicode or ascii based on the
1358 flags in the SMB buffer starting at base_ptr.
1359 Return the number of bytes occupied by the string in the destination.
1360 flags can have:
1361 STR_TERMINATE means include the null termination.
1362 STR_UPPER means uppercase in the destination.
1363 STR_ASCII use ascii even with unicode packet.
1364 STR_NOALIGN means don't do alignment.
1365 dest_len is the maximum length allowed in the destination. If dest_len
1366 is -1 then no maxiumum is used.
1367**/
1368
1369size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1370{
1371#ifdef DEVELOPER
1372 /* We really need to zero fill here, not clobber
1373 * region, as we want to ensure that valgrind thinks
1374 * all of the outgoing buffer has been written to
1375 * so a send() or write() won't trap an error.
1376 * JRA.
1377 */
1378#if 0
1379 if (dest_len != (size_t)-1)
1380 clobber_region(function, line, dest, dest_len);
1381#else
1382 if (dest_len != (size_t)-1)
1383 memset(dest, '\0', dest_len);
1384#endif
1385#endif
1386
1387 if (!(flags & STR_ASCII) && \
1388 ((flags & STR_UNICODE || \
1389 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1390 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1391 }
1392 return push_ascii(dest, src, dest_len, flags);
1393}
1394
1395
1396/**
1397 Copy a string from a unicode or ascii source (depending on
1398 the packet flags) to a char* destination.
1399 Flags can have:
1400 STR_TERMINATE means the string in src is null terminated.
1401 STR_UNICODE means to force as unicode.
1402 STR_ASCII use ascii even with unicode packet.
1403 STR_NOALIGN means don't do alignment.
1404 if STR_TERMINATE is set then src_len is ignored is it is -1
1405 src_len is the length of the source area in bytes.
1406 Return the number of bytes occupied by the string in src.
1407 The resulting string in "dest" is always null terminated.
1408**/
1409
1410size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1411{
1412#ifdef DEVELOPER
1413 if (dest_len != (size_t)-1)
1414 clobber_region(function, line, dest, dest_len);
1415#endif
1416
1417 if (!(flags & STR_ASCII) && \
1418 ((flags & STR_UNICODE || \
1419 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1420 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1421 }
1422 return pull_ascii(dest, src, dest_len, src_len, flags);
1423}
1424
1425size_t align_string(const void *base_ptr, const char *p, int flags)
1426{
1427 if (!(flags & STR_ASCII) && \
1428 ((flags & STR_UNICODE || \
1429 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1430 return ucs2_align(base_ptr, p, flags);
1431 }
1432 return 0;
1433}
1434
1435/*
1436 Return the unicode codepoint for the next multi-byte CH_UNIX character
1437 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1438
1439 Also return the number of bytes consumed (which tells the caller
1440 how many bytes to skip to get to the next CH_UNIX character).
1441
1442 Return INVALID_CODEPOINT if the next character cannot be converted.
1443*/
1444
1445codepoint_t next_codepoint(const char *str, size_t *size)
1446{
1447 /* It cannot occupy more than 4 bytes in UTF16 format */
1448 uint8_t buf[4];
1449 smb_iconv_t descriptor;
1450#ifdef __OS2__
1451 size_t ilen_max;
1452 size_t olen_orig;
1453 const char *inbuf;
1454#endif
1455 size_t ilen_orig;
1456 size_t ilen;
1457 size_t olen;
1458
1459 char *outbuf;
1460
1461#ifdef __OS2__
1462 *size = 1;
1463#endif
1464
1465 if ((str[0] & 0x80) == 0) {
1466#ifndef __OS2__
1467 *size = 1;
1468#endif
1469 return (codepoint_t)str[0];
1470 }
1471
1472 lazy_initialize_conv();
1473
1474 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1475 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1476#ifndef __OS2__
1477 *size = 1;
1478#endif
1479 return INVALID_CODEPOINT;
1480 }
1481#ifdef __OS2__
1482 /* We assume that no multi-byte character can take
1483 more than 5 bytes. This is OK as we only
1484 support codepoints up to 1M */
1485
1486 ilen_max = strnlen( str, 5 );
1487#else
1488 *size = 1;
1489#endif
1490 ilen_orig = 1;
1491 olen_orig = 2;
1492 while( 1 )
1493 {
1494 ilen = ilen_orig;
1495 olen = olen_orig;
1496 inbuf = str;
1497 outbuf = ( char * )buf;
1498 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1499 break;
1500
1501 switch( errno )
1502 {
1503 case E2BIG :
1504 if( olen_orig == 2 )
1505 olen_orig = 4;
1506 else
1507 return INVALID_CODEPOINT;
1508 break;
1509
1510
1511 case EINVAL :
1512#ifndef __OS2__
1513 /* We assume that no multi-byte character can take
1514 more than 5 bytes. This is OK as we only
1515 support codepoints up to 1M */
1516 if( ilen_orig < 5 )
1517#else
1518 if( ilen_orig < ilen_max )
1519#endif
1520 ilen_orig++;
1521 else
1522 return INVALID_CODEPOINT;
1523 break;
1524
1525 case EILSEQ :
1526 default :
1527 return INVALID_CODEPOINT;
1528 }
1529 }
1530
1531 olen = olen_orig - olen;
1532
1533 *size = ilen_orig - ilen;
1534
1535 if (olen == 2) {
1536 /* 2 byte, UTF16-LE encoded value. */
1537 return (codepoint_t)SVAL(buf, 0);
1538 }
1539 if (olen == 4) {
1540 /* Decode a 4 byte UTF16-LE character manually.
1541 See RFC2871 for the encoding machanism.
1542 */
1543 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1544 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1545
1546 return (codepoint_t)0x10000 +
1547 (w1 << 10) + w2;
1548 }
1549
1550 /* no other length is valid */
1551 return INVALID_CODEPOINT;
1552}
Note: See TracBrowser for help on using the repository browser.