source: branches/samba-3.0/source/lib/charcnv.c@ 770

Last change on this file since 770 was 337, checked in by Herwig Bauernfeind, 16 years ago

divers fix for Ticket #68 in 3.0

File size: 40.0 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23*/
24#include "includes.h"
25
26/* We can parameterize this if someone complains.... JRA. */
27
28char lp_failed_convert_char(void)
29{
30 return '_';
31}
32
33/**
34 * @file
35 *
36 * @brief Character-set conversion routines built on our iconv.
37 *
38 * @note Samba's internal character set (at least in the 3.0 series)
39 * is always the same as the one for the Unix filesystem. It is
40 * <b>not</b> necessarily UTF-8 and may be different on machines that
41 * need i18n filenames to be compatible with Unix software. It does
42 * have to be a superset of ASCII. All multibyte sequences must start
43 * with a byte with the high bit set.
44 *
45 * @sa lib/iconv.c
46 */
47
48
49static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58 if (ch == CH_UTF16LE) ret = "UTF-16LE";
59 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60 else if (ch == CH_UNIX) ret = lp_unix_charset();
61 else if (ch == CH_DOS) ret = lp_dos_charset();
62 else if (ch == CH_DISPLAY) ret = lp_display_charset();
63 else if (ch == CH_UTF8) ret = "UTF8";
64
65#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret && !strcmp(ret, "LOCALE")) {
67 const char *ln = NULL;
68
69#ifdef HAVE_SETLOCALE
70 setlocale(LC_ALL, "");
71#endif
72 ln = nl_langinfo(CODESET);
73 if (ln) {
74 /* Check whether the charset name is supported
75 by iconv */
76 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77 if (handle == (smb_iconv_t) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79 ln = NULL;
80 } else {
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82 smb_iconv_close(handle);
83 }
84 }
85 ret = ln;
86 }
87#endif
88
89 if (!ret || !*ret) ret = "ASCII";
90 return ret;
91}
92
93void lazy_initialize_conv(void)
94{
95 static int initialized = False;
96
97 if (!initialized) {
98 initialized = True;
99 load_case_tables();
100 init_iconv();
101 }
102}
103
104/**
105 * Destroy global objects allocated by init_iconv()
106 **/
107void gfree_charcnv(void)
108{
109 int c1, c2;
110
111 for (c1=0;c1<NUM_CHARSETS;c1++) {
112 for (c2=0;c2<NUM_CHARSETS;c2++) {
113 if ( conv_handles[c1][c2] ) {
114 smb_iconv_close( conv_handles[c1][c2] );
115 conv_handles[c1][c2] = 0;
116 }
117 }
118 }
119}
120
121/**
122 * Initialize iconv conversion descriptors.
123 *
124 * This is called the first time it is needed, and also called again
125 * every time the configuration is reloaded, because the charset or
126 * codepage might have changed.
127 **/
128void init_iconv(void)
129{
130 int c1, c2;
131 BOOL did_reload = False;
132
133 /* so that charset_name() works we need to get the UNIX<->UCS2 going
134 first */
135 if (!conv_handles[CH_UNIX][CH_UTF16LE])
136 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138 if (!conv_handles[CH_UTF16LE][CH_UNIX])
139 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141 for (c1=0;c1<NUM_CHARSETS;c1++) {
142 for (c2=0;c2<NUM_CHARSETS;c2++) {
143 const char *n1 = charset_name((charset_t)c1);
144 const char *n2 = charset_name((charset_t)c2);
145 if (conv_handles[c1][c2] &&
146 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148 continue;
149
150 did_reload = True;
151
152 if (conv_handles[c1][c2])
153 smb_iconv_close(conv_handles[c1][c2]);
154
155 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158 charset_name((charset_t)c1), charset_name((charset_t)c2)));
159 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160 n1 = "ASCII";
161 }
162 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163 n2 = "ASCII";
164 }
165 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166 n1, n2 ));
167 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168 if (!conv_handles[c1][c2]) {
169 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170 smb_panic("init_iconv: conv_handle initialization failed.");
171 }
172 }
173 }
174 }
175
176 if (did_reload) {
177 /* XXX: Does this really get called every time the dos
178 * codepage changes? */
179 /* XXX: Is the did_reload test too strict? */
180 conv_silent = True;
181 init_doschar_table();
182 init_valid_table();
183 conv_silent = False;
184 }
185}
186
187/**
188 * Convert string from one encoding to another, making error checking etc
189 * Slow path version - uses (slow) iconv.
190 *
191 * @param src pointer to source string (multibyte or singlebyte)
192 * @param srclen length of the source string in bytes
193 * @param dest pointer to destination string (multibyte or singlebyte)
194 * @param destlen maximal length allowed for string
195 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
196 * @returns the number of bytes occupied in the destination
197 *
198 * Ensure the srclen contains the terminating zero.
199 *
200 **/
201
202static size_t convert_string_internal(charset_t from, charset_t to,
203 void const *src, size_t srclen,
204 void *dest, size_t destlen, BOOL allow_bad_conv)
205{
206 size_t i_len, o_len;
207 size_t retval;
208 const char* inbuf = (const char*)src;
209 char* outbuf = (char*)dest;
210 smb_iconv_t descriptor;
211
212 lazy_initialize_conv();
213
214 descriptor = conv_handles[from][to];
215
216 if (srclen == (size_t)-1) {
217 if (from == CH_UTF16LE || from == CH_UTF16BE) {
218 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
219 } else {
220 srclen = strlen((const char *)src)+1;
221 }
222 }
223
224
225 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
226 if (!conv_silent)
227 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
228 return (size_t)-1;
229 }
230
231 i_len=srclen;
232 o_len=destlen;
233
234 again:
235
236 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
237 if(retval==(size_t)-1) {
238 const char *reason="unknown error";
239 switch(errno) {
240 case EINVAL:
241 reason="Incomplete multibyte sequence";
242 if (!conv_silent)
243 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
244 if (allow_bad_conv)
245 goto use_as_is;
246 break;
247 case E2BIG:
248 reason="No more room";
249 if (!conv_silent) {
250 if (from == CH_UNIX) {
251 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
252 charset_name(from), charset_name(to),
253 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
254 } else {
255 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
256 charset_name(from), charset_name(to),
257 (unsigned int)srclen, (unsigned int)destlen));
258 }
259 }
260 break;
261 case EILSEQ:
262 reason="Illegal multibyte sequence";
263 if (!conv_silent)
264 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
265 if (allow_bad_conv)
266 goto use_as_is;
267 break;
268 default:
269 if (!conv_silent)
270 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271 break;
272 }
273 /* smb_panic(reason); */
274 }
275 return destlen-o_len;
276
277 use_as_is:
278
279 /*
280 * Conversion not supported. This is actually an error, but there are so
281 * many misconfigured iconv systems and smb.conf's out there we can't just
282 * fail. Do a very bad conversion instead.... JRA.
283 */
284
285 {
286 if (o_len == 0 || i_len == 0)
287 return destlen - o_len;
288
289 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
290 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
291 /* Can't convert from utf16 any endian to multibyte.
292 Replace with the default fail char.
293 */
294 if (i_len < 2)
295 return destlen - o_len;
296 if (i_len >= 2) {
297 *outbuf = lp_failed_convert_char();
298
299 outbuf++;
300 o_len--;
301
302 inbuf += 2;
303 i_len -= 2;
304 }
305
306 if (o_len == 0 || i_len == 0)
307 return destlen - o_len;
308
309 /* Keep trying with the next char... */
310 goto again;
311
312 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313 /* Can't convert to UTF16LE - just widen by adding the
314 default fail char then zero.
315 */
316 if (o_len < 2)
317 return destlen - o_len;
318
319 outbuf[0] = lp_failed_convert_char();
320 outbuf[1] = '\0';
321
322 inbuf++;
323 i_len--;
324
325 outbuf += 2;
326 o_len -= 2;
327
328 if (o_len == 0 || i_len == 0)
329 return destlen - o_len;
330
331 /* Keep trying with the next char... */
332 goto again;
333
334 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335 to != CH_UTF16LE && to != CH_UTF16BE) {
336 /* Failed multibyte to multibyte. Just copy the default fail char and
337 try again. */
338 outbuf[0] = lp_failed_convert_char();
339
340 inbuf++;
341 i_len--;
342
343 outbuf++;
344 o_len--;
345
346 if (o_len == 0 || i_len == 0)
347 return destlen - o_len;
348
349 /* Keep trying with the next char... */
350 goto again;
351
352 } else {
353 /* Keep compiler happy.... */
354 return destlen - o_len;
355 }
356 }
357}
358
359/**
360 * Convert string from one encoding to another, making error checking etc
361 * Fast path version - handles ASCII first.
362 *
363 * @param src pointer to source string (multibyte or singlebyte)
364 * @param srclen length of the source string in bytes, or -1 for nul terminated.
365 * @param dest pointer to destination string (multibyte or singlebyte)
366 * @param destlen maximal length allowed for string - *NEVER* -1.
367 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368 * @returns the number of bytes occupied in the destination
369 *
370 * Ensure the srclen contains the terminating zero.
371 *
372 * This function has been hand-tuned to provide a fast path.
373 * Don't change unless you really know what you are doing. JRA.
374 **/
375
376size_t convert_string(charset_t from, charset_t to,
377 void const *src, size_t srclen,
378 void *dest, size_t destlen, BOOL allow_bad_conv)
379{
380 /*
381 * NB. We deliberately don't do a strlen here if srclen == -1.
382 * This is very expensive over millions of calls and is taken
383 * care of in the slow path in convert_string_internal. JRA.
384 */
385
386#ifdef DEVELOPER
387 SMB_ASSERT(destlen != (size_t)-1);
388#endif
389
390 if (srclen == 0)
391 return 0;
392
393 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394 const unsigned char *p = (const unsigned char *)src;
395 unsigned char *q = (unsigned char *)dest;
396 size_t slen = srclen;
397 size_t dlen = destlen;
398 unsigned char lastp = '\0';
399 size_t retval = 0;
400
401 /* If all characters are ascii, fast path here. */
402 while (slen && dlen) {
403 if ((lastp = *p) <= 0x7f) {
404 *q++ = *p++;
405 if (slen != (size_t)-1) {
406 slen--;
407 }
408 dlen--;
409 retval++;
410 if (!lastp)
411 break;
412 } else {
413#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
414 goto general_case;
415#else
416 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417#endif
418 }
419 }
420 if (!dlen) {
421 /* Even if we fast path we should note if we ran out of room. */
422 if (((slen != (size_t)-1) && slen) ||
423 ((slen == (size_t)-1) && lastp)) {
424 errno = E2BIG;
425 }
426 }
427 return retval;
428
429 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
430 const unsigned char *p = (const unsigned char *)src;
431 unsigned char *q = (unsigned char *)dest;
432 size_t retval = 0;
433 size_t slen = srclen;
434 size_t dlen = destlen;
435 unsigned char lastp = '\0';
436
437 /* If all characters are ascii, fast path here. */
438 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
439 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
440 *q++ = *p;
441 if (slen != (size_t)-1) {
442 slen -= 2;
443 }
444 p += 2;
445 dlen--;
446 retval++;
447 if (!lastp)
448 break;
449 } else {
450#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
451 goto general_case;
452#else
453 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
454#endif
455 }
456 }
457 if (!dlen) {
458 /* Even if we fast path we should note if we ran out of room. */
459 if (((slen != (size_t)-1) && slen) ||
460 ((slen == (size_t)-1) && lastp)) {
461 errno = E2BIG;
462 }
463 }
464 return retval;
465
466 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
467 const unsigned char *p = (const unsigned char *)src;
468 unsigned char *q = (unsigned char *)dest;
469 size_t retval = 0;
470 size_t slen = srclen;
471 size_t dlen = destlen;
472 unsigned char lastp = '\0';
473
474 /* If all characters are ascii, fast path here. */
475 while (slen && (dlen >= 2)) {
476 if ((lastp = *p) <= 0x7F) {
477 *q++ = *p++;
478 *q++ = '\0';
479 if (slen != (size_t)-1) {
480 slen--;
481 }
482 dlen -= 2;
483 retval += 2;
484 if (!lastp)
485 break;
486 } else {
487#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
488 goto general_case;
489#else
490 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
491#endif
492 }
493 }
494 if (!dlen) {
495 /* Even if we fast path we should note if we ran out of room. */
496 if (((slen != (size_t)-1) && slen) ||
497 ((slen == (size_t)-1) && lastp)) {
498 errno = E2BIG;
499 }
500 }
501 return retval;
502 }
503
504#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
505 general_case:
506#endif
507
508 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
509}
510
511/**
512 * Convert between character sets, allocating a new buffer for the result.
513 *
514 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
515 * @param srclen length of source buffer.
516 * @param dest always set at least to NULL
517 * @note -1 is not accepted for srclen.
518 *
519 * @returns Size in bytes of the converted string; or -1 in case of error.
520 *
521 * Ensure the srclen contains the terminating zero.
522 *
523 * I hate the goto's in this function. It's embarressing.....
524 * There has to be a cleaner way to do this. JRA.
525 **/
526
527size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)
529{
530 size_t i_len, o_len, destlen = (srclen * 3) / 2;
531 size_t retval;
532 const char *inbuf = (const char *)src;
533 char *outbuf = NULL, *ob = NULL;
534 smb_iconv_t descriptor;
535 void **dest = (void **)dst;
536
537 *dest = NULL;
538
539 if (src == NULL || srclen == (size_t)-1)
540 return (size_t)-1;
541 if (srclen == 0)
542 return 0;
543
544 lazy_initialize_conv();
545
546 descriptor = conv_handles[from][to];
547
548 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
549 if (!conv_silent)
550 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
551 return (size_t)-1;
552 }
553
554 convert:
555
556 /* +2 is for ucs2 null termination. */
557 if ((destlen*2)+2 < destlen) {
558 /* wrapped ! abort. */
559 if (!conv_silent)
560 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
561 if (!ctx)
562 SAFE_FREE(outbuf);
563 return (size_t)-1;
564 } else {
565 destlen = destlen * 2;
566 }
567
568 /* +2 is for ucs2 null termination. */
569 if (ctx) {
570 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
571 } else {
572 ob = (char *)SMB_REALLOC(ob, destlen + 2);
573 }
574
575 if (!ob) {
576 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
577 return (size_t)-1;
578 }
579 outbuf = ob;
580 i_len = srclen;
581 o_len = destlen;
582
583 again:
584
585 retval = smb_iconv(descriptor,
586 &inbuf, &i_len,
587 &outbuf, &o_len);
588 if(retval == (size_t)-1) {
589 const char *reason="unknown error";
590 switch(errno) {
591 case EINVAL:
592 reason="Incomplete multibyte sequence";
593 if (!conv_silent)
594 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
595 if (allow_bad_conv)
596 goto use_as_is;
597 break;
598 case E2BIG:
599 goto convert;
600 case EILSEQ:
601 reason="Illegal multibyte sequence";
602 if (!conv_silent)
603 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
604 if (allow_bad_conv)
605 goto use_as_is;
606 break;
607 }
608 if (!conv_silent)
609 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
610 /* smb_panic(reason); */
611 return (size_t)-1;
612 }
613
614 out:
615
616 destlen = destlen - o_len;
617 if (ctx) {
618 /* We're shrinking here so we know the +2 is safe from wrap. */
619 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
620 } else {
621 ob = (char *)SMB_REALLOC(ob,destlen + 2);
622 }
623
624 if (destlen && !ob) {
625 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
626 return (size_t)-1;
627 }
628
629 *dest = ob;
630
631 /* Must ucs2 null terminate in the extra space we allocated. */
632 ob[destlen] = '\0';
633 ob[destlen+1] = '\0';
634
635 return destlen;
636
637 use_as_is:
638
639 /*
640 * Conversion not supported. This is actually an error, but there are so
641 * many misconfigured iconv systems and smb.conf's out there we can't just
642 * fail. Do a very bad conversion instead.... JRA.
643 */
644
645 {
646 if (o_len == 0 || i_len == 0)
647 goto out;
648
649 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
650 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
651 /* Can't convert from utf16 any endian to multibyte.
652 Replace with the default fail char.
653 */
654
655 if (i_len < 2)
656 goto out;
657
658 if (i_len >= 2) {
659 *outbuf = lp_failed_convert_char();
660
661 outbuf++;
662 o_len--;
663
664 inbuf += 2;
665 i_len -= 2;
666 }
667
668 if (o_len == 0 || i_len == 0)
669 goto out;
670
671 /* Keep trying with the next char... */
672 goto again;
673
674 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
675 /* Can't convert to UTF16LE - just widen by adding the
676 default fail char then zero.
677 */
678 if (o_len < 2)
679 goto out;
680
681 outbuf[0] = lp_failed_convert_char();
682 outbuf[1] = '\0';
683
684 inbuf++;
685 i_len--;
686
687 outbuf += 2;
688 o_len -= 2;
689
690 if (o_len == 0 || i_len == 0)
691 goto out;
692
693 /* Keep trying with the next char... */
694 goto again;
695
696 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
697 to != CH_UTF16LE && to != CH_UTF16BE) {
698 /* Failed multibyte to multibyte. Just copy the default fail char and
699 try again. */
700 outbuf[0] = lp_failed_convert_char();
701
702 inbuf++;
703 i_len--;
704
705 outbuf++;
706 o_len--;
707
708 if (o_len == 0 || i_len == 0)
709 goto out;
710
711 /* Keep trying with the next char... */
712 goto again;
713
714 } else {
715 /* Keep compiler happy.... */
716 goto out;
717 }
718 }
719}
720
721/**
722 * Convert between character sets, allocating a new buffer using talloc for the result.
723 *
724 * @param srclen length of source buffer.
725 * @param dest always set at least to NULL
726 * @note -1 is not accepted for srclen.
727 *
728 * @returns Size in bytes of the converted string; or -1 in case of error.
729 **/
730size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
731 void const *src, size_t srclen, void *dst,
732 BOOL allow_bad_conv)
733{
734 void **dest = (void **)dst;
735 size_t dest_len;
736
737 *dest = NULL;
738 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
739 if (dest_len == (size_t)-1)
740 return (size_t)-1;
741 if (*dest == NULL)
742 return (size_t)-1;
743 return dest_len;
744}
745
746size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
747{
748 size_t size;
749 smb_ucs2_t *buffer;
750
751 size = push_ucs2_allocate(&buffer, src);
752 if (size == (size_t)-1) {
753 smb_panic("failed to create UCS2 buffer");
754 }
755 if (!strupper_w(buffer) && (dest == src)) {
756 free(buffer);
757 return srclen;
758 }
759
760 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
761 free(buffer);
762 return size;
763}
764
765/**
766 strdup() a unix string to upper case.
767 Max size is pstring.
768**/
769
770char *strdup_upper(const char *s)
771{
772 pstring out_buffer;
773 const unsigned char *p = (const unsigned char *)s;
774 unsigned char *q = (unsigned char *)out_buffer;
775
776 /* this is quite a common operation, so we want it to be
777 fast. We optimise for the ascii case, knowing that all our
778 supported multi-byte character sets are ascii-compatible
779 (ie. they match for the first 128 chars) */
780
781 while (1) {
782 if (*p & 0x80)
783 break;
784 *q++ = toupper_ascii(*p);
785 if (!*p)
786 break;
787 p++;
788 if (p - ( const unsigned char *)s >= sizeof(pstring))
789 break;
790 }
791
792 if (*p) {
793 /* MB case. */
794 size_t size;
795 wpstring buffer;
796 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
797 if (size == (size_t)-1) {
798 return NULL;
799 }
800
801 strupper_w(buffer);
802
803 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
804 if (size == (size_t)-1) {
805 return NULL;
806 }
807 }
808
809 return SMB_STRDUP(out_buffer);
810}
811
812size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
813{
814 size_t size;
815 smb_ucs2_t *buffer = NULL;
816
817 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
818 (void **)(void *)&buffer, True);
819 if (size == (size_t)-1 || !buffer) {
820 smb_panic("failed to create UCS2 buffer");
821 }
822 if (!strlower_w(buffer) && (dest == src)) {
823 SAFE_FREE(buffer);
824 return srclen;
825 }
826 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
827 SAFE_FREE(buffer);
828 return size;
829}
830
831/**
832 strdup() a unix string to lower case.
833**/
834
835char *strdup_lower(const char *s)
836{
837 size_t size;
838 smb_ucs2_t *buffer = NULL;
839 char *out_buffer;
840
841 size = push_ucs2_allocate(&buffer, s);
842 if (size == -1 || !buffer) {
843 return NULL;
844 }
845
846 strlower_w(buffer);
847
848 size = pull_ucs2_allocate(&out_buffer, buffer);
849 SAFE_FREE(buffer);
850
851 if (size == (size_t)-1) {
852 return NULL;
853 }
854
855 return out_buffer;
856}
857
858static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
859{
860 if (flags & (STR_NOALIGN|STR_ASCII))
861 return 0;
862 return PTR_DIFF(p, base_ptr) & 1;
863}
864
865
866/**
867 * Copy a string from a char* unix src to a dos codepage string destination.
868 *
869 * @return the number of bytes occupied by the string in the destination.
870 *
871 * @param flags can include
872 * <dl>
873 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
874 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
875 * </dl>
876 *
877 * @param dest_len the maximum length in bytes allowed in the
878 * destination. If @p dest_len is -1 then no maximum is used.
879 **/
880size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
881{
882 size_t src_len = strlen(src);
883 pstring tmpbuf;
884 size_t ret;
885
886 /* No longer allow a length of -1 */
887 if (dest_len == (size_t)-1)
888 smb_panic("push_ascii - dest_len == -1");
889
890 if (flags & STR_UPPER) {
891 pstrcpy(tmpbuf, src);
892 strupper_m(tmpbuf);
893 src = tmpbuf;
894 }
895
896 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
897 src_len++;
898
899 ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
900 if (ret == (size_t)-1 &&
901 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
902 && dest_len > 0) {
903 ((char *)dest)[0] = '\0';
904 }
905 return ret;
906}
907
908size_t push_ascii_fstring(void *dest, const char *src)
909{
910 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
911}
912
913size_t push_ascii_pstring(void *dest, const char *src)
914{
915 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
916}
917
918/********************************************************************
919 Push an nstring - ensure null terminated. Written by
920 moriyama@miraclelinux.com (MORIYAMA Masayuki).
921********************************************************************/
922
923size_t push_ascii_nstring(void *dest, const char *src)
924{
925 size_t i, buffer_len, dest_len;
926 smb_ucs2_t *buffer;
927
928 conv_silent = True;
929 buffer_len = push_ucs2_allocate(&buffer, src);
930 if (buffer_len == (size_t)-1) {
931 smb_panic("failed to create UCS2 buffer");
932 }
933
934 /* We're using buffer_len below to count ucs2 characters, not bytes. */
935 buffer_len /= sizeof(smb_ucs2_t);
936
937 dest_len = 0;
938 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
939 unsigned char mb[10];
940 /* Convert one smb_ucs2_t character at a time. */
941 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
942 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
943 memcpy((char *)dest + dest_len, mb, mb_len);
944 dest_len += mb_len;
945 } else {
946 errno = E2BIG;
947 break;
948 }
949 }
950 ((char *)dest)[dest_len] = '\0';
951
952 SAFE_FREE(buffer);
953 conv_silent = False;
954 return dest_len;
955}
956
957/**
958 * Copy a string from a dos codepage source to a unix char* destination.
959 *
960 * The resulting string in "dest" is always null terminated.
961 *
962 * @param flags can have:
963 * <dl>
964 * <dt>STR_TERMINATE</dt>
965 * <dd>STR_TERMINATE means the string in @p src
966 * is null terminated, and src_len is ignored.</dd>
967 * </dl>
968 *
969 * @param src_len is the length of the source area in bytes.
970 * @returns the number of bytes occupied by the string in @p src.
971 **/
972size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
973{
974 size_t ret;
975
976 if (dest_len == (size_t)-1)
977 dest_len = sizeof(pstring);
978
979 if (flags & STR_TERMINATE) {
980 if (src_len == (size_t)-1) {
981 src_len = strlen((const char *)src) + 1;
982 } else {
983 size_t len = strnlen((const char *)src, src_len);
984 if (len < src_len)
985 len++;
986 src_len = len;
987 }
988 }
989
990 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
991 if (ret == (size_t)-1) {
992 ret = 0;
993 dest_len = 0;
994 }
995
996 if (dest_len && ret) {
997 /* Did we already process the terminating zero ? */
998 if (dest[MIN(ret-1, dest_len-1)] != 0) {
999 dest[MIN(ret, dest_len-1)] = 0;
1000 }
1001 } else {
1002 dest[0] = 0;
1003 }
1004
1005 return src_len;
1006}
1007
1008size_t pull_ascii_pstring(char *dest, const void *src)
1009{
1010 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1011}
1012
1013size_t pull_ascii_fstring(char *dest, const void *src)
1014{
1015 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1016}
1017
1018/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1019
1020size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1021{
1022 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1023}
1024
1025/**
1026 * Copy a string from a char* src to a unicode destination.
1027 *
1028 * @returns the number of bytes occupied by the string in the destination.
1029 *
1030 * @param flags can have:
1031 *
1032 * <dl>
1033 * <dt>STR_TERMINATE <dd>means include the null termination.
1034 * <dt>STR_UPPER <dd>means uppercase in the destination.
1035 * <dt>STR_NOALIGN <dd>means don't do alignment.
1036 * </dl>
1037 *
1038 * @param dest_len is the maximum length allowed in the
1039 * destination. If dest_len is -1 then no maxiumum is used.
1040 **/
1041
1042size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1043{
1044 size_t len=0;
1045 size_t src_len;
1046 size_t ret;
1047
1048 /* treat a pstring as "unlimited" length */
1049 if (dest_len == (size_t)-1)
1050 dest_len = sizeof(pstring);
1051
1052 if (flags & STR_TERMINATE)
1053 src_len = (size_t)-1;
1054 else
1055 src_len = strlen(src);
1056
1057 if (ucs2_align(base_ptr, dest, flags)) {
1058 *(char *)dest = 0;
1059 dest = (void *)((char *)dest + 1);
1060 if (dest_len)
1061 dest_len--;
1062 len++;
1063 }
1064
1065 /* ucs2 is always a multiple of 2 bytes */
1066 dest_len &= ~1;
1067
1068 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1069 if (ret == (size_t)-1) {
1070 return 0;
1071 }
1072
1073 len += ret;
1074
1075 if (flags & STR_UPPER) {
1076 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1077 size_t i;
1078
1079 /* We check for i < (ret / 2) below as the dest string isn't null
1080 terminated if STR_TERMINATE isn't set. */
1081
1082 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1083 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1084 if (v != dest_ucs2[i]) {
1085 dest_ucs2[i] = v;
1086 }
1087 }
1088 }
1089
1090 return len;
1091}
1092
1093
1094/**
1095 * Copy a string from a unix char* src to a UCS2 destination,
1096 * allocating a buffer using talloc().
1097 *
1098 * @param dest always set at least to NULL
1099 *
1100 * @returns The number of bytes occupied by the string in the destination
1101 * or -1 in case of error.
1102 **/
1103size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1104{
1105 size_t src_len = strlen(src)+1;
1106
1107 *dest = NULL;
1108 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1109}
1110
1111
1112/**
1113 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1114 *
1115 * @param dest always set at least to NULL
1116 *
1117 * @returns The number of bytes occupied by the string in the destination
1118 * or -1 in case of error.
1119 **/
1120
1121size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1122{
1123 size_t src_len = strlen(src)+1;
1124
1125 *dest = NULL;
1126 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1127}
1128
1129/**
1130 Copy a string from a char* src to a UTF-8 destination.
1131 Return the number of bytes occupied by the string in the destination
1132 Flags can have:
1133 STR_TERMINATE means include the null termination
1134 STR_UPPER means uppercase in the destination
1135 dest_len is the maximum length allowed in the destination. If dest_len
1136 is -1 then no maxiumum is used.
1137**/
1138
1139static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1140{
1141 size_t src_len = strlen(src);
1142 pstring tmpbuf;
1143
1144 /* treat a pstring as "unlimited" length */
1145 if (dest_len == (size_t)-1)
1146 dest_len = sizeof(pstring);
1147
1148 if (flags & STR_UPPER) {
1149 pstrcpy(tmpbuf, src);
1150 strupper_m(tmpbuf);
1151 src = tmpbuf;
1152 }
1153
1154 if (flags & STR_TERMINATE)
1155 src_len++;
1156
1157 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1158}
1159
1160size_t push_utf8_fstring(void *dest, const char *src)
1161{
1162 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1163}
1164
1165/**
1166 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1167 *
1168 * @param dest always set at least to NULL
1169 *
1170 * @returns The number of bytes occupied by the string in the destination
1171 **/
1172
1173size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1174{
1175 size_t src_len = strlen(src)+1;
1176
1177 *dest = NULL;
1178 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1179}
1180
1181/**
1182 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1183 *
1184 * @param dest always set at least to NULL
1185 *
1186 * @returns The number of bytes occupied by the string in the destination
1187 **/
1188
1189size_t push_utf8_allocate(char **dest, const char *src)
1190{
1191 size_t src_len = strlen(src)+1;
1192
1193 *dest = NULL;
1194 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1195}
1196
1197/**
1198 Copy a string from a ucs2 source to a unix char* destination.
1199 Flags can have:
1200 STR_TERMINATE means the string in src is null terminated.
1201 STR_NOALIGN means don't try to align.
1202 if STR_TERMINATE is set then src_len is ignored if it is -1.
1203 src_len is the length of the source area in bytes
1204 Return the number of bytes occupied by the string in src.
1205 The resulting string in "dest" is always null terminated.
1206**/
1207
1208size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1209{
1210 size_t ret;
1211
1212 if (dest_len == (size_t)-1)
1213 dest_len = sizeof(pstring);
1214
1215 if (ucs2_align(base_ptr, src, flags)) {
1216 src = (const void *)((const char *)src + 1);
1217 if (src_len != (size_t)-1)
1218 src_len--;
1219 }
1220
1221 if (flags & STR_TERMINATE) {
1222 /* src_len -1 is the default for null terminated strings. */
1223 if (src_len != (size_t)-1) {
1224 size_t len = strnlen_w((const smb_ucs2_t *)src,
1225 src_len/2);
1226 if (len < src_len/2)
1227 len++;
1228 src_len = len*2;
1229 }
1230 }
1231
1232 /* ucs2 is always a multiple of 2 bytes */
1233 if (src_len != (size_t)-1)
1234 src_len &= ~1;
1235
1236 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1237 if (ret == (size_t)-1) {
1238 return 0;
1239 }
1240
1241 if (src_len == (size_t)-1)
1242 src_len = ret*2;
1243
1244 if (dest_len && ret) {
1245 /* Did we already process the terminating zero ? */
1246 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1247 dest[MIN(ret, dest_len-1)] = 0;
1248 }
1249 } else {
1250 dest[0] = 0;
1251 }
1252
1253 return src_len;
1254}
1255
1256size_t pull_ucs2_pstring(char *dest, const void *src)
1257{
1258 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1259}
1260
1261size_t pull_ucs2_fstring(char *dest, const void *src)
1262{
1263 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1264}
1265
1266/**
1267 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1268 *
1269 * @param dest always set at least to NULL
1270 *
1271 * @returns The number of bytes occupied by the string in the destination
1272 **/
1273
1274size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1275{
1276 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1277 *dest = NULL;
1278 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1279}
1280
1281/**
1282 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1283 *
1284 * @param dest always set at least to NULL
1285 *
1286 * @returns The number of bytes occupied by the string in the destination
1287 **/
1288
1289size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1290{
1291 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1292 *dest = NULL;
1293 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1294}
1295
1296/**
1297 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1298 *
1299 * @param dest always set at least to NULL
1300 *
1301 * @returns The number of bytes occupied by the string in the destination
1302 **/
1303
1304size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1305{
1306 size_t src_len = strlen(src)+1;
1307 *dest = NULL;
1308 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1309}
1310
1311/**
1312 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1313 *
1314 * @param dest always set at least to NULL
1315 *
1316 * @returns The number of bytes occupied by the string in the destination
1317 **/
1318
1319size_t pull_utf8_allocate(char **dest, const char *src)
1320{
1321 size_t src_len = strlen(src)+1;
1322 *dest = NULL;
1323 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1324}
1325
1326/**
1327 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1328 *
1329 * @param dest always set at least to NULL
1330 *
1331 * @returns The number of bytes occupied by the string in the destination
1332 **/
1333
1334size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1335{
1336 size_t src_len = strlen(src)+1;
1337 *dest = NULL;
1338 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1339}
1340
1341/**
1342 Copy a string from a char* src to a unicode or ascii
1343 dos codepage destination choosing unicode or ascii based on the
1344 flags in the SMB buffer starting at base_ptr.
1345 Return the number of bytes occupied by the string in the destination.
1346 flags can have:
1347 STR_TERMINATE means include the null termination.
1348 STR_UPPER means uppercase in the destination.
1349 STR_ASCII use ascii even with unicode packet.
1350 STR_NOALIGN means don't do alignment.
1351 dest_len is the maximum length allowed in the destination. If dest_len
1352 is -1 then no maxiumum is used.
1353**/
1354
1355size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1356{
1357#ifdef DEVELOPER
1358 /* We really need to zero fill here, not clobber
1359 * region, as we want to ensure that valgrind thinks
1360 * all of the outgoing buffer has been written to
1361 * so a send() or write() won't trap an error.
1362 * JRA.
1363 */
1364#if 0
1365 if (dest_len != (size_t)-1)
1366 clobber_region(function, line, dest, dest_len);
1367#else
1368 if (dest_len != (size_t)-1)
1369 memset(dest, '\0', dest_len);
1370#endif
1371#endif
1372
1373 if (!(flags & STR_ASCII) && \
1374 ((flags & STR_UNICODE || \
1375 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1376 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1377 }
1378 return push_ascii(dest, src, dest_len, flags);
1379}
1380
1381
1382/**
1383 Copy a string from a unicode or ascii source (depending on
1384 the packet flags) to a char* destination.
1385 Flags can have:
1386 STR_TERMINATE means the string in src is null terminated.
1387 STR_UNICODE means to force as unicode.
1388 STR_ASCII use ascii even with unicode packet.
1389 STR_NOALIGN means don't do alignment.
1390 if STR_TERMINATE is set then src_len is ignored is it is -1
1391 src_len is the length of the source area in bytes.
1392 Return the number of bytes occupied by the string in src.
1393 The resulting string in "dest" is always null terminated.
1394**/
1395
1396size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1397{
1398#ifdef DEVELOPER
1399 if (dest_len != (size_t)-1)
1400 clobber_region(function, line, dest, dest_len);
1401#endif
1402
1403 if (!(flags & STR_ASCII) && \
1404 ((flags & STR_UNICODE || \
1405 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1406 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1407 }
1408 return pull_ascii(dest, src, dest_len, src_len, flags);
1409}
1410
1411size_t align_string(const void *base_ptr, const char *p, int flags)
1412{
1413 if (!(flags & STR_ASCII) && \
1414 ((flags & STR_UNICODE || \
1415 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1416 return ucs2_align(base_ptr, p, flags);
1417 }
1418 return 0;
1419}
1420
1421/*
1422 Return the unicode codepoint for the next multi-byte CH_UNIX character
1423 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1424
1425 Also return the number of bytes consumed (which tells the caller
1426 how many bytes to skip to get to the next CH_UNIX character).
1427
1428 Return INVALID_CODEPOINT if the next character cannot be converted.
1429*/
1430
1431codepoint_t next_codepoint(const char *str, size_t *size)
1432{
1433 /* It cannot occupy more than 4 bytes in UTF16 format */
1434 uint8_t buf[4];
1435 smb_iconv_t descriptor;
1436#ifdef __OS2__
1437 size_t ilen_max;
1438 size_t olen_orig;
1439 const char *inbuf;
1440#endif
1441 size_t ilen_orig;
1442 size_t ilen;
1443 size_t olen;
1444
1445 char *outbuf;
1446
1447#ifdef __OS2__
1448 *size = 1;
1449#endif
1450
1451 if ((str[0] & 0x80) == 0) {
1452#ifndef __OS2__
1453 *size = 1;
1454#endif
1455 return (codepoint_t)str[0];
1456 }
1457
1458 lazy_initialize_conv();
1459
1460 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1461 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1462#ifndef __OS2__
1463 *size = 1;
1464#endif
1465 return INVALID_CODEPOINT;
1466 }
1467#ifdef __OS2__
1468 /* We assume that no multi-byte character can take
1469 more than 5 bytes. This is OK as we only
1470 support codepoints up to 1M */
1471
1472 ilen_max = strnlen( str, 5 );
1473#else
1474 *size = 1;
1475#endif
1476 ilen_orig = 1;
1477 olen_orig = 2;
1478 while( 1 )
1479 {
1480 ilen = ilen_orig;
1481 olen = olen_orig;
1482 inbuf = str;
1483 outbuf = ( char * )buf;
1484 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1485 break;
1486
1487 switch( errno )
1488 {
1489 case E2BIG :
1490 if( olen_orig == 2 )
1491 olen_orig = 4;
1492 else
1493 return INVALID_CODEPOINT;
1494 break;
1495
1496
1497 case EINVAL :
1498#ifndef __OS2__
1499 /* We assume that no multi-byte character can take
1500 more than 5 bytes. This is OK as we only
1501 support codepoints up to 1M */
1502 if( ilen_orig < 5 )
1503#else
1504 if( ilen_orig < ilen_max )
1505#endif
1506 ilen_orig++;
1507 else
1508 return INVALID_CODEPOINT;
1509 break;
1510
1511 case EILSEQ :
1512 default :
1513 return INVALID_CODEPOINT;
1514 }
1515 }
1516
1517 olen = olen_orig - olen;
1518
1519 *size = ilen_orig - ilen;
1520
1521 if (olen == 2) {
1522 /* 2 byte, UTF16-LE encoded value. */
1523 return (codepoint_t)SVAL(buf, 0);
1524 }
1525 if (olen == 4) {
1526 /* Decode a 4 byte UTF16-LE character manually.
1527 See RFC2871 for the encoding machanism.
1528 */
1529 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1530 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1531
1532 return (codepoint_t)0x10000 +
1533 (w1 << 10) + w2;
1534 }
1535
1536 /* no other length is valid */
1537 return INVALID_CODEPOINT;
1538}
Note: See TracBrowser for help on using the repository browser.