source: branches/samba-3.3.x/source/lib/charcnv.c@ 796

Last change on this file since 796 was 339, checked in by Herwig Bauernfeind, 16 years ago

divers fix for Ticket #68 in 3.3

File size: 51.1 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29 return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58 if (ch == CH_UTF16LE) ret = "UTF-16LE";
59 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60 else if (ch == CH_UNIX) ret = lp_unix_charset();
61 else if (ch == CH_DOS) ret = lp_dos_charset();
62 else if (ch == CH_DISPLAY) ret = lp_display_charset();
63 else if (ch == CH_UTF8) ret = "UTF8";
64
65#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret && !strcmp(ret, "LOCALE")) {
67 const char *ln = NULL;
68
69#ifdef HAVE_SETLOCALE
70 setlocale(LC_ALL, "");
71#endif
72 ln = nl_langinfo(CODESET);
73 if (ln) {
74 /* Check whether the charset name is supported
75 by iconv */
76 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77 if (handle == (smb_iconv_t) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79 ln = NULL;
80 } else {
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82 smb_iconv_close(handle);
83 }
84 }
85 ret = ln;
86 }
87#endif
88
89 if (!ret || !*ret) ret = "ASCII";
90 return ret;
91}
92
93void lazy_initialize_conv(void)
94{
95 if (!initialized) {
96 load_case_tables();
97 init_iconv();
98 initialized = true;
99 }
100}
101
102/**
103 * Destroy global objects allocated by init_iconv()
104 **/
105void gfree_charcnv(void)
106{
107 int c1, c2;
108
109 for (c1=0;c1<NUM_CHARSETS;c1++) {
110 for (c2=0;c2<NUM_CHARSETS;c2++) {
111 if ( conv_handles[c1][c2] ) {
112 smb_iconv_close( conv_handles[c1][c2] );
113 conv_handles[c1][c2] = 0;
114 }
115 }
116 }
117 initialized = false;
118}
119
120/**
121 * Initialize iconv conversion descriptors.
122 *
123 * This is called the first time it is needed, and also called again
124 * every time the configuration is reloaded, because the charset or
125 * codepage might have changed.
126 **/
127void init_iconv(void)
128{
129 int c1, c2;
130 bool did_reload = False;
131
132 /* so that charset_name() works we need to get the UNIX<->UCS2 going
133 first */
134 if (!conv_handles[CH_UNIX][CH_UTF16LE])
135 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137 if (!conv_handles[CH_UTF16LE][CH_UNIX])
138 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140 for (c1=0;c1<NUM_CHARSETS;c1++) {
141 for (c2=0;c2<NUM_CHARSETS;c2++) {
142 const char *n1 = charset_name((charset_t)c1);
143 const char *n2 = charset_name((charset_t)c2);
144 if (conv_handles[c1][c2] &&
145 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147 continue;
148
149 did_reload = True;
150
151 if (conv_handles[c1][c2])
152 smb_iconv_close(conv_handles[c1][c2]);
153
154 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157 charset_name((charset_t)c1), charset_name((charset_t)c2)));
158 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159 n1 = "ASCII";
160 }
161 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162 n2 = "ASCII";
163 }
164 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165 n1, n2 ));
166 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167 if (!conv_handles[c1][c2]) {
168 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169 smb_panic("init_iconv: conv_handle initialization failed");
170 }
171 }
172 }
173 }
174
175 if (did_reload) {
176 /* XXX: Does this really get called every time the dos
177 * codepage changes? */
178 /* XXX: Is the did_reload test too strict? */
179 conv_silent = True;
180 init_valid_table();
181 conv_silent = False;
182 }
183}
184
185/**
186 * Convert string from one encoding to another, making error checking etc
187 * Slow path version - uses (slow) iconv.
188 *
189 * @param src pointer to source string (multibyte or singlebyte)
190 * @param srclen length of the source string in bytes
191 * @param dest pointer to destination string (multibyte or singlebyte)
192 * @param destlen maximal length allowed for string
193 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194 * @returns the number of bytes occupied in the destination
195 *
196 * Ensure the srclen contains the terminating zero.
197 *
198 **/
199
200static size_t convert_string_internal(charset_t from, charset_t to,
201 void const *src, size_t srclen,
202 void *dest, size_t destlen, bool allow_bad_conv)
203{
204 size_t i_len, o_len;
205 size_t retval;
206 const char* inbuf = (const char*)src;
207 char* outbuf = (char*)dest;
208 smb_iconv_t descriptor;
209
210 lazy_initialize_conv();
211
212 descriptor = conv_handles[from][to];
213
214 if (srclen == (size_t)-1) {
215 if (from == CH_UTF16LE || from == CH_UTF16BE) {
216 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
217 } else {
218 srclen = strlen((const char *)src)+1;
219 }
220 }
221
222
223 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
224 if (!conv_silent)
225 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226 return (size_t)-1;
227 }
228
229 i_len=srclen;
230 o_len=destlen;
231
232 again:
233
234 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235 if(retval==(size_t)-1) {
236 const char *reason="unknown error";
237 switch(errno) {
238 case EINVAL:
239 reason="Incomplete multibyte sequence";
240 if (!conv_silent)
241 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242 if (allow_bad_conv)
243 goto use_as_is;
244 return (size_t)-1;
245 case E2BIG:
246 reason="No more room";
247 if (!conv_silent) {
248 if (from == CH_UNIX) {
249 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250 charset_name(from), charset_name(to),
251 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252 } else {
253 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254 charset_name(from), charset_name(to),
255 (unsigned int)srclen, (unsigned int)destlen));
256 }
257 }
258 break;
259 case EILSEQ:
260 reason="Illegal multibyte sequence";
261 if (!conv_silent)
262 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263 if (allow_bad_conv)
264 goto use_as_is;
265
266 return (size_t)-1;
267 default:
268 if (!conv_silent)
269 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270 return (size_t)-1;
271 }
272 /* smb_panic(reason); */
273 }
274 return destlen-o_len;
275
276 use_as_is:
277
278 /*
279 * Conversion not supported. This is actually an error, but there are so
280 * many misconfigured iconv systems and smb.conf's out there we can't just
281 * fail. Do a very bad conversion instead.... JRA.
282 */
283
284 {
285 if (o_len == 0 || i_len == 0)
286 return destlen - o_len;
287
288 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
289 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
290 /* Can't convert from utf16 any endian to multibyte.
291 Replace with the default fail char.
292 */
293 if (i_len < 2)
294 return destlen - o_len;
295 if (i_len >= 2) {
296 *outbuf = lp_failed_convert_char();
297
298 outbuf++;
299 o_len--;
300
301 inbuf += 2;
302 i_len -= 2;
303 }
304
305 if (o_len == 0 || i_len == 0)
306 return destlen - o_len;
307
308 /* Keep trying with the next char... */
309 goto again;
310
311 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312 /* Can't convert to UTF16LE - just widen by adding the
313 default fail char then zero.
314 */
315 if (o_len < 2)
316 return destlen - o_len;
317
318 outbuf[0] = lp_failed_convert_char();
319 outbuf[1] = '\0';
320
321 inbuf++;
322 i_len--;
323
324 outbuf += 2;
325 o_len -= 2;
326
327 if (o_len == 0 || i_len == 0)
328 return destlen - o_len;
329
330 /* Keep trying with the next char... */
331 goto again;
332
333 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334 to != CH_UTF16LE && to != CH_UTF16BE) {
335 /* Failed multibyte to multibyte. Just copy the default fail char and
336 try again. */
337 outbuf[0] = lp_failed_convert_char();
338
339 inbuf++;
340 i_len--;
341
342 outbuf++;
343 o_len--;
344
345 if (o_len == 0 || i_len == 0)
346 return destlen - o_len;
347
348 /* Keep trying with the next char... */
349 goto again;
350
351 } else {
352 /* Keep compiler happy.... */
353 return destlen - o_len;
354 }
355 }
356}
357
358/**
359 * Convert string from one encoding to another, making error checking etc
360 * Fast path version - handles ASCII first.
361 *
362 * @param src pointer to source string (multibyte or singlebyte)
363 * @param srclen length of the source string in bytes, or -1 for nul terminated.
364 * @param dest pointer to destination string (multibyte or singlebyte)
365 * @param destlen maximal length allowed for string - *NEVER* -1.
366 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367 * @returns the number of bytes occupied in the destination
368 *
369 * Ensure the srclen contains the terminating zero.
370 *
371 * This function has been hand-tuned to provide a fast path.
372 * Don't change unless you really know what you are doing. JRA.
373 **/
374
375size_t convert_string(charset_t from, charset_t to,
376 void const *src, size_t srclen,
377 void *dest, size_t destlen, bool allow_bad_conv)
378{
379 /*
380 * NB. We deliberately don't do a strlen here if srclen == -1.
381 * This is very expensive over millions of calls and is taken
382 * care of in the slow path in convert_string_internal. JRA.
383 */
384
385#ifdef DEVELOPER
386 SMB_ASSERT(destlen != (size_t)-1);
387#endif
388
389 if (srclen == 0)
390 return 0;
391
392 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393 const unsigned char *p = (const unsigned char *)src;
394 unsigned char *q = (unsigned char *)dest;
395 size_t slen = srclen;
396 size_t dlen = destlen;
397 unsigned char lastp = '\0';
398 size_t retval = 0;
399
400 /* If all characters are ascii, fast path here. */
401 while (slen && dlen) {
402 if ((lastp = *p) <= 0x7f) {
403 *q++ = *p++;
404 if (slen != (size_t)-1) {
405 slen--;
406 }
407 dlen--;
408 retval++;
409 if (!lastp)
410 break;
411 } else {
412#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413 goto general_case;
414#else
415 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416 if (ret == (size_t)-1) {
417 return ret;
418 }
419 return retval + ret;
420#endif
421 }
422 }
423 if (!dlen) {
424 /* Even if we fast path we should note if we ran out of room. */
425 if (((slen != (size_t)-1) && slen) ||
426 ((slen == (size_t)-1) && lastp)) {
427 errno = E2BIG;
428 }
429 }
430 return retval;
431
432 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433 const unsigned char *p = (const unsigned char *)src;
434 unsigned char *q = (unsigned char *)dest;
435 size_t retval = 0;
436 size_t slen = srclen;
437 size_t dlen = destlen;
438 unsigned char lastp = '\0';
439
440 /* If all characters are ascii, fast path here. */
441 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
442 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443 *q++ = *p;
444 if (slen != (size_t)-1) {
445 slen -= 2;
446 }
447 p += 2;
448 dlen--;
449 retval++;
450 if (!lastp)
451 break;
452 } else {
453#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454 goto general_case;
455#else
456 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457 if (ret == (size_t)-1) {
458 return ret;
459 }
460 return retval + ret;
461#endif
462 }
463 }
464 if (!dlen) {
465 /* Even if we fast path we should note if we ran out of room. */
466 if (((slen != (size_t)-1) && slen) ||
467 ((slen == (size_t)-1) && lastp)) {
468 errno = E2BIG;
469 }
470 }
471 return retval;
472
473 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
474 const unsigned char *p = (const unsigned char *)src;
475 unsigned char *q = (unsigned char *)dest;
476 size_t retval = 0;
477 size_t slen = srclen;
478 size_t dlen = destlen;
479 unsigned char lastp = '\0';
480
481 /* If all characters are ascii, fast path here. */
482 while (slen && (dlen >= 2)) {
483 if ((lastp = *p) <= 0x7F) {
484 *q++ = *p++;
485 *q++ = '\0';
486 if (slen != (size_t)-1) {
487 slen--;
488 }
489 dlen -= 2;
490 retval += 2;
491 if (!lastp)
492 break;
493 } else {
494#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
495 goto general_case;
496#else
497 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
498 if (ret == (size_t)-1) {
499 return ret;
500 }
501 return retval + ret;
502#endif
503 }
504 }
505 if (!dlen) {
506 /* Even if we fast path we should note if we ran out of room. */
507 if (((slen != (size_t)-1) && slen) ||
508 ((slen == (size_t)-1) && lastp)) {
509 errno = E2BIG;
510 }
511 }
512 return retval;
513 }
514
515#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
516 general_case:
517#endif
518 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
519}
520
521/**
522 * Convert between character sets, allocating a new buffer for the result.
523 *
524 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
525 * (this is a bad interface and needs fixing. JRA).
526 * @param srclen length of source buffer.
527 * @param dest always set at least to NULL
528 * @param converted_size set to the size of the allocated buffer on return
529 * true
530 * @note -1 is not accepted for srclen.
531 *
532 * @return true if new buffer was correctly allocated, and string was
533 * converted.
534 *
535 * Ensure the srclen contains the terminating zero.
536 *
537 * I hate the goto's in this function. It's embarressing.....
538 * There has to be a cleaner way to do this. JRA.
539 **/
540
541bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
542 void const *src, size_t srclen, void *dst,
543 size_t *converted_size, bool allow_bad_conv)
544{
545 size_t i_len, o_len, destlen = (srclen * 3) / 2;
546 size_t retval;
547 const char *inbuf = (const char *)src;
548 char *outbuf = NULL, *ob = NULL;
549 smb_iconv_t descriptor;
550 void **dest = (void **)dst;
551
552 *dest = NULL;
553
554 if (!converted_size) {
555 errno = EINVAL;
556 return false;
557 }
558
559 if (src == NULL || srclen == (size_t)-1) {
560 errno = EINVAL;
561 return false;
562 }
563 if (srclen == 0) {
564 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
565 if (ob == NULL) {
566 errno = ENOMEM;
567 return false;
568 }
569 *dest = ob;
570 *converted_size = 0;
571 return true;
572 }
573
574 lazy_initialize_conv();
575
576 descriptor = conv_handles[from][to];
577
578 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
579 if (!conv_silent)
580 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
581 errno = EOPNOTSUPP;
582 return false;
583 }
584
585 convert:
586
587 /* +2 is for ucs2 null termination. */
588 if ((destlen*2)+2 < destlen) {
589 /* wrapped ! abort. */
590 if (!conv_silent)
591 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
592 if (!ctx)
593 SAFE_FREE(outbuf);
594 errno = EOPNOTSUPP;
595 return false;
596 } else {
597 destlen = destlen * 2;
598 }
599
600 /* +2 is for ucs2 null termination. */
601 if (ctx) {
602 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
603 } else {
604 ob = (char *)SMB_REALLOC(ob, destlen + 2);
605 }
606
607 if (!ob) {
608 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
609 errno = ENOMEM;
610 return false;
611 }
612 outbuf = ob;
613 i_len = srclen;
614 o_len = destlen;
615
616 again:
617
618
619 retval = smb_iconv(descriptor,
620 &inbuf, &i_len,
621 &outbuf, &o_len);
622 if(retval == (size_t)-1) {
623 const char *reason="unknown error";
624 switch(errno) {
625 case EINVAL:
626 reason="Incomplete multibyte sequence";
627 if (!conv_silent)
628 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
629 if (allow_bad_conv)
630 goto use_as_is;
631 break;
632 case E2BIG:
633 goto convert;
634 case EILSEQ:
635 reason="Illegal multibyte sequence";
636 if (!conv_silent)
637 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
638 if (allow_bad_conv)
639 goto use_as_is;
640 break;
641 }
642 if (!conv_silent)
643 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
644 /* smb_panic(reason); */
645 if (ctx) {
646 TALLOC_FREE(ob);
647 } else {
648 SAFE_FREE(ob);
649 }
650 return false;
651 }
652
653 out:
654
655 destlen = destlen - o_len;
656 /* Don't shrink unless we're reclaiming a lot of
657 * space. This is in the hot codepath and these
658 * reallocs *cost*. JRA.
659 */
660 if (o_len > 1024) {
661 /* We're shrinking here so we know the +2 is safe from wrap. */
662 if (ctx) {
663 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
664 } else {
665 ob = (char *)SMB_REALLOC(ob,destlen + 2);
666 }
667 }
668
669 if (destlen && !ob) {
670 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
671 errno = ENOMEM;
672 return false;
673 }
674
675 *dest = ob;
676
677 /* Must ucs2 null terminate in the extra space we allocated. */
678 ob[destlen] = '\0';
679 ob[destlen+1] = '\0';
680
681 *converted_size = destlen;
682 return true;
683
684 use_as_is:
685
686 /*
687 * Conversion not supported. This is actually an error, but there are so
688 * many misconfigured iconv systems and smb.conf's out there we can't just
689 * fail. Do a very bad conversion instead.... JRA.
690 */
691
692 {
693 if (o_len == 0 || i_len == 0)
694 goto out;
695
696 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
697 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
698 /* Can't convert from utf16 any endian to multibyte.
699 Replace with the default fail char.
700 */
701
702 if (i_len < 2)
703 goto out;
704
705 if (i_len >= 2) {
706 *outbuf = lp_failed_convert_char();
707
708 outbuf++;
709 o_len--;
710
711 inbuf += 2;
712 i_len -= 2;
713 }
714
715 if (o_len == 0 || i_len == 0)
716 goto out;
717
718 /* Keep trying with the next char... */
719 goto again;
720
721 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
722 /* Can't convert to UTF16LE - just widen by adding the
723 default fail char then zero.
724 */
725 if (o_len < 2)
726 goto out;
727
728 outbuf[0] = lp_failed_convert_char();
729 outbuf[1] = '\0';
730
731 inbuf++;
732 i_len--;
733
734 outbuf += 2;
735 o_len -= 2;
736
737 if (o_len == 0 || i_len == 0)
738 goto out;
739
740 /* Keep trying with the next char... */
741 goto again;
742
743 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
744 to != CH_UTF16LE && to != CH_UTF16BE) {
745 /* Failed multibyte to multibyte. Just copy the default fail char and
746 try again. */
747 outbuf[0] = lp_failed_convert_char();
748
749 inbuf++;
750 i_len--;
751
752 outbuf++;
753 o_len--;
754
755 if (o_len == 0 || i_len == 0)
756 goto out;
757
758 /* Keep trying with the next char... */
759 goto again;
760
761 } else {
762 /* Keep compiler happy.... */
763 goto out;
764 }
765 }
766}
767
768/**
769 * Convert between character sets, allocating a new buffer using talloc for the result.
770 *
771 * @param srclen length of source buffer.
772 * @param dest always set at least to NULL
773 * @parm converted_size set to the number of bytes occupied by the string in
774 * the destination on success.
775 * @note -1 is not accepted for srclen.
776 *
777 * @return true if new buffer was correctly allocated, and string was
778 * converted.
779 */
780bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
781 void const *src, size_t srclen, void *dst,
782 size_t *converted_size, bool allow_bad_conv)
783{
784 void **dest = (void **)dst;
785
786 *dest = NULL;
787 return convert_string_allocate(ctx, from, to, src, srclen, dest,
788 converted_size, allow_bad_conv);
789}
790
791size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
792{
793 size_t size;
794 smb_ucs2_t *buffer;
795
796 if (!push_ucs2_allocate(&buffer, src, &size)) {
797 return (size_t)-1;
798 }
799
800 if (!strupper_w(buffer) && (dest == src)) {
801 free(buffer);
802 return srclen;
803 }
804
805 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
806 free(buffer);
807 return size;
808}
809
810/**
811 strdup() a unix string to upper case.
812**/
813
814char *strdup_upper(const char *s)
815{
816 char *out_buffer = SMB_STRDUP(s);
817 const unsigned char *p = (const unsigned char *)s;
818 unsigned char *q = (unsigned char *)out_buffer;
819
820 if (!q) {
821 return NULL;
822 }
823
824 /* this is quite a common operation, so we want it to be
825 fast. We optimise for the ascii case, knowing that all our
826 supported multi-byte character sets are ascii-compatible
827 (ie. they match for the first 128 chars) */
828
829 while (*p) {
830 if (*p & 0x80)
831 break;
832 *q++ = toupper_ascii_fast(*p);
833 p++;
834 }
835
836 if (*p) {
837 /* MB case. */
838 size_t converted_size, converted_size2;
839 smb_ucs2_t *buffer = NULL;
840
841 SAFE_FREE(out_buffer);
842 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
843 strlen(s) + 1,
844 (void **)(void *)&buffer,
845 &converted_size, True))
846 {
847 return NULL;
848 }
849
850 strupper_w(buffer);
851
852 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
853 converted_size,
854 (void **)(void *)&out_buffer,
855 &converted_size2, True))
856 {
857 TALLOC_FREE(buffer);
858 return NULL;
859 }
860
861 /* Don't need the intermediate buffer
862 * anymore.
863 */
864 TALLOC_FREE(buffer);
865 }
866
867 return out_buffer;
868}
869
870/**
871 talloc_strdup() a unix string to upper case.
872**/
873
874char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
875{
876 char *out_buffer = talloc_strdup(ctx,s);
877 const unsigned char *p = (const unsigned char *)s;
878 unsigned char *q = (unsigned char *)out_buffer;
879
880 if (!q) {
881 return NULL;
882 }
883
884 /* this is quite a common operation, so we want it to be
885 fast. We optimise for the ascii case, knowing that all our
886 supported multi-byte character sets are ascii-compatible
887 (ie. they match for the first 128 chars) */
888
889 while (*p) {
890 if (*p & 0x80)
891 break;
892 *q++ = toupper_ascii_fast(*p);
893 p++;
894 }
895
896 if (*p) {
897 /* MB case. */
898 size_t converted_size, converted_size2;
899 smb_ucs2_t *ubuf = NULL;
900
901 /* We're not using the ascii buffer above. */
902 TALLOC_FREE(out_buffer);
903
904 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
905 strlen(s)+1, (void *)&ubuf,
906 &converted_size, True))
907 {
908 return NULL;
909 }
910
911 strupper_w(ubuf);
912
913 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
914 converted_size, (void *)&out_buffer,
915 &converted_size2, True))
916 {
917 TALLOC_FREE(ubuf);
918 return NULL;
919 }
920
921 /* Don't need the intermediate buffer
922 * anymore.
923 */
924 TALLOC_FREE(ubuf);
925 }
926
927 return out_buffer;
928}
929
930size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
931{
932 size_t size;
933 smb_ucs2_t *buffer = NULL;
934
935 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
936 (void **)(void *)&buffer, &size,
937 True))
938 {
939 smb_panic("failed to create UCS2 buffer");
940 }
941 if (!strlower_w(buffer) && (dest == src)) {
942 SAFE_FREE(buffer);
943 return srclen;
944 }
945 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
946 SAFE_FREE(buffer);
947 return size;
948}
949
950/**
951 strdup() a unix string to lower case.
952**/
953
954char *strdup_lower(const char *s)
955{
956 size_t converted_size;
957 smb_ucs2_t *buffer = NULL;
958 char *out_buffer;
959
960 if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
961 return NULL;
962 }
963
964 strlower_w(buffer);
965
966 if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
967 SAFE_FREE(buffer);
968 return NULL;
969 }
970
971 SAFE_FREE(buffer);
972
973 return out_buffer;
974}
975
976char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
977{
978 size_t converted_size;
979 smb_ucs2_t *buffer = NULL;
980 char *out_buffer;
981
982 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
983 return NULL;
984 }
985
986 strlower_w(buffer);
987
988 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
989 TALLOC_FREE(buffer);
990 return NULL;
991 }
992
993 TALLOC_FREE(buffer);
994
995 return out_buffer;
996}
997
998
999size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1000{
1001 if (flags & (STR_NOALIGN|STR_ASCII))
1002 return 0;
1003 return PTR_DIFF(p, base_ptr) & 1;
1004}
1005
1006
1007/**
1008 * Copy a string from a char* unix src to a dos codepage string destination.
1009 *
1010 * @return the number of bytes occupied by the string in the destination.
1011 *
1012 * @param flags can include
1013 * <dl>
1014 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1015 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1016 * </dl>
1017 *
1018 * @param dest_len the maximum length in bytes allowed in the
1019 * destination.
1020 **/
1021size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1022{
1023 size_t src_len = strlen(src);
1024 char *tmpbuf = NULL;
1025 size_t ret;
1026
1027 /* No longer allow a length of -1. */
1028 if (dest_len == (size_t)-1) {
1029 smb_panic("push_ascii - dest_len == -1");
1030 }
1031
1032 if (flags & STR_UPPER) {
1033 tmpbuf = SMB_STRDUP(src);
1034 if (!tmpbuf) {
1035 smb_panic("malloc fail");
1036 }
1037 strupper_m(tmpbuf);
1038 src = tmpbuf;
1039 }
1040
1041 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1042 src_len++;
1043 }
1044
1045 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1046 if (ret == (size_t)-1 &&
1047 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1048 && dest_len > 0) {
1049 ((char *)dest)[0] = '\0';
1050 }
1051 SAFE_FREE(tmpbuf);
1052 return ret;
1053}
1054
1055size_t push_ascii_fstring(void *dest, const char *src)
1056{
1057 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1058}
1059
1060/********************************************************************
1061 Push an nstring - ensure null terminated. Written by
1062 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1063********************************************************************/
1064
1065size_t push_ascii_nstring(void *dest, const char *src)
1066{
1067 size_t i, buffer_len, dest_len;
1068 smb_ucs2_t *buffer;
1069
1070 conv_silent = True;
1071 if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1072 smb_panic("failed to create UCS2 buffer");
1073 }
1074
1075 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1076 buffer_len /= sizeof(smb_ucs2_t);
1077
1078 dest_len = 0;
1079 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1080 unsigned char mb[10];
1081 /* Convert one smb_ucs2_t character at a time. */
1082 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1083 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1084 memcpy((char *)dest + dest_len, mb, mb_len);
1085 dest_len += mb_len;
1086 } else {
1087 errno = E2BIG;
1088 break;
1089 }
1090 }
1091 ((char *)dest)[dest_len] = '\0';
1092
1093 SAFE_FREE(buffer);
1094 conv_silent = False;
1095 return dest_len;
1096}
1097
1098/********************************************************************
1099 Push and malloc an ascii string. src and dest null terminated.
1100********************************************************************/
1101
1102bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1103{
1104 size_t src_len = strlen(src)+1;
1105
1106 *dest = NULL;
1107 return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1108 (void **)dest, converted_size, True);
1109}
1110
1111/**
1112 * Copy a string from a dos codepage source to a unix char* destination.
1113 *
1114 * The resulting string in "dest" is always null terminated.
1115 *
1116 * @param flags can have:
1117 * <dl>
1118 * <dt>STR_TERMINATE</dt>
1119 * <dd>STR_TERMINATE means the string in @p src
1120 * is null terminated, and src_len is ignored.</dd>
1121 * </dl>
1122 *
1123 * @param src_len is the length of the source area in bytes.
1124 * @returns the number of bytes occupied by the string in @p src.
1125 **/
1126size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1127{
1128 size_t ret;
1129
1130 if (dest_len == (size_t)-1) {
1131 /* No longer allow dest_len of -1. */
1132 smb_panic("pull_ascii - invalid dest_len of -1");
1133 }
1134
1135 if (flags & STR_TERMINATE) {
1136 if (src_len == (size_t)-1) {
1137 src_len = strlen((const char *)src) + 1;
1138 } else {
1139 size_t len = strnlen((const char *)src, src_len);
1140 if (len < src_len)
1141 len++;
1142 src_len = len;
1143 }
1144 }
1145
1146 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1147 if (ret == (size_t)-1) {
1148 ret = 0;
1149 dest_len = 0;
1150 }
1151
1152 if (dest_len && ret) {
1153 /* Did we already process the terminating zero ? */
1154 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1155 dest[MIN(ret, dest_len-1)] = 0;
1156 }
1157 } else {
1158 dest[0] = 0;
1159 }
1160
1161 return src_len;
1162}
1163
1164/**
1165 * Copy a string from a dos codepage source to a unix char* destination.
1166 Talloc version.
1167 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1168 needs fixing. JRA).
1169 *
1170 * The resulting string in "dest" is always null terminated.
1171 *
1172 * @param flags can have:
1173 * <dl>
1174 * <dt>STR_TERMINATE</dt>
1175 * <dd>STR_TERMINATE means the string in @p src
1176 * is null terminated, and src_len is ignored.</dd>
1177 * </dl>
1178 *
1179 * @param src_len is the length of the source area in bytes.
1180 * @returns the number of bytes occupied by the string in @p src.
1181 **/
1182
1183static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1184 char **ppdest,
1185 const void *src,
1186 size_t src_len,
1187 int flags)
1188{
1189 char *dest = NULL;
1190 size_t dest_len;
1191
1192#ifdef DEVELOPER
1193 /* Ensure we never use the braindead "malloc" varient. */
1194 if (ctx == NULL) {
1195 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1196 }
1197#endif
1198
1199 *ppdest = NULL;
1200
1201 if (!src_len) {
1202 return 0;
1203 }
1204
1205 if (flags & STR_TERMINATE) {
1206 if (src_len == (size_t)-1) {
1207 src_len = strlen((const char *)src) + 1;
1208 } else {
1209 size_t len = strnlen((const char *)src, src_len);
1210 if (len < src_len)
1211 len++;
1212 src_len = len;
1213 }
1214 /* Ensure we don't use an insane length from the client. */
1215 if (src_len >= 1024*1024) {
1216 char *msg = talloc_asprintf(ctx,
1217 "Bad src length (%u) in "
1218 "pull_ascii_base_talloc",
1219 (unsigned int)src_len);
1220 smb_panic(msg);
1221 }
1222 } else {
1223 /* Can't have an unlimited length
1224 * non STR_TERMINATE'd.
1225 */
1226 if (src_len == (size_t)-1) {
1227 errno = EINVAL;
1228 return 0;
1229 }
1230 }
1231
1232 /* src_len != -1 here. */
1233
1234 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1235 &dest_len, True)) {
1236 dest_len = 0;
1237 }
1238
1239 if (dest_len && dest) {
1240 /* Did we already process the terminating zero ? */
1241 if (dest[dest_len-1] != 0) {
1242 size_t size = talloc_get_size(dest);
1243 /* Have we got space to append the '\0' ? */
1244 if (size <= dest_len) {
1245 /* No, realloc. */
1246 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1247 dest_len+1);
1248 if (!dest) {
1249 /* talloc fail. */
1250 dest_len = (size_t)-1;
1251 return 0;
1252 }
1253 }
1254 /* Yay - space ! */
1255 dest[dest_len] = '\0';
1256 dest_len++;
1257 }
1258 } else if (dest) {
1259 dest[0] = 0;
1260 }
1261
1262 *ppdest = dest;
1263 return src_len;
1264}
1265
1266size_t pull_ascii_fstring(char *dest, const void *src)
1267{
1268 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1269}
1270
1271/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1272
1273size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1274{
1275 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1276}
1277
1278/**
1279 * Copy a string from a char* src to a unicode destination.
1280 *
1281 * @returns the number of bytes occupied by the string in the destination.
1282 *
1283 * @param flags can have:
1284 *
1285 * <dl>
1286 * <dt>STR_TERMINATE <dd>means include the null termination.
1287 * <dt>STR_UPPER <dd>means uppercase in the destination.
1288 * <dt>STR_NOALIGN <dd>means don't do alignment.
1289 * </dl>
1290 *
1291 * @param dest_len is the maximum length allowed in the
1292 * destination.
1293 **/
1294
1295size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1296{
1297 size_t len=0;
1298 size_t src_len;
1299 size_t ret;
1300
1301 if (dest_len == (size_t)-1) {
1302 /* No longer allow dest_len of -1. */
1303 smb_panic("push_ucs2 - invalid dest_len of -1");
1304 }
1305
1306 if (flags & STR_TERMINATE)
1307 src_len = (size_t)-1;
1308 else
1309 src_len = strlen(src);
1310
1311 if (ucs2_align(base_ptr, dest, flags)) {
1312 *(char *)dest = 0;
1313 dest = (void *)((char *)dest + 1);
1314 if (dest_len)
1315 dest_len--;
1316 len++;
1317 }
1318
1319 /* ucs2 is always a multiple of 2 bytes */
1320 dest_len &= ~1;
1321
1322 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1323 if (ret == (size_t)-1) {
1324 if ((flags & STR_TERMINATE) &&
1325 dest &&
1326 dest_len) {
1327 *(char *)dest = 0;
1328 }
1329 return len;
1330 }
1331
1332 len += ret;
1333
1334 if (flags & STR_UPPER) {
1335 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1336 size_t i;
1337
1338 /* We check for i < (ret / 2) below as the dest string isn't null
1339 terminated if STR_TERMINATE isn't set. */
1340
1341 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1342 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1343 if (v != dest_ucs2[i]) {
1344 dest_ucs2[i] = v;
1345 }
1346 }
1347 }
1348
1349 return len;
1350}
1351
1352
1353/**
1354 * Copy a string from a unix char* src to a UCS2 destination,
1355 * allocating a buffer using talloc().
1356 *
1357 * @param dest always set at least to NULL
1358 * @parm converted_size set to the number of bytes occupied by the string in
1359 * the destination on success.
1360 *
1361 * @return true if new buffer was correctly allocated, and string was
1362 * converted.
1363 **/
1364bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1365 size_t *converted_size)
1366{
1367 size_t src_len = strlen(src)+1;
1368
1369 *dest = NULL;
1370 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1371 (void **)dest, converted_size, True);
1372}
1373
1374
1375/**
1376 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1377 *
1378 * @param dest always set at least to NULL
1379 * @parm converted_size set to the number of bytes occupied by the string in
1380 * the destination on success.
1381 *
1382 * @return true if new buffer was correctly allocated, and string was
1383 * converted.
1384 **/
1385
1386bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1387 size_t *converted_size)
1388{
1389 size_t src_len = strlen(src)+1;
1390
1391 *dest = NULL;
1392 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1393 (void **)dest, converted_size, True);
1394}
1395
1396/**
1397 Copy a string from a char* src to a UTF-8 destination.
1398 Return the number of bytes occupied by the string in the destination
1399 Flags can have:
1400 STR_TERMINATE means include the null termination
1401 STR_UPPER means uppercase in the destination
1402 dest_len is the maximum length allowed in the destination. If dest_len
1403 is -1 then no maxiumum is used.
1404**/
1405
1406static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1407{
1408 size_t src_len = 0;
1409 size_t ret;
1410 char *tmpbuf = NULL;
1411
1412 if (dest_len == (size_t)-1) {
1413 /* No longer allow dest_len of -1. */
1414 smb_panic("push_utf8 - invalid dest_len of -1");
1415 }
1416
1417 if (flags & STR_UPPER) {
1418 tmpbuf = strdup_upper(src);
1419 if (!tmpbuf) {
1420 return (size_t)-1;
1421 }
1422 src = tmpbuf;
1423 src_len = strlen(src);
1424 }
1425
1426 src_len = strlen(src);
1427 if (flags & STR_TERMINATE) {
1428 src_len++;
1429 }
1430
1431 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1432 SAFE_FREE(tmpbuf);
1433 return ret;
1434}
1435
1436size_t push_utf8_fstring(void *dest, const char *src)
1437{
1438 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1439}
1440
1441/**
1442 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1443 *
1444 * @param dest always set at least to NULL
1445 * @parm converted_size set to the number of bytes occupied by the string in
1446 * the destination on success.
1447 *
1448 * @return true if new buffer was correctly allocated, and string was
1449 * converted.
1450 **/
1451
1452bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1453 size_t *converted_size)
1454{
1455 size_t src_len = strlen(src)+1;
1456
1457 *dest = NULL;
1458 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1459 (void**)dest, converted_size, True);
1460}
1461
1462/**
1463 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1464 *
1465 * @param dest always set at least to NULL
1466 * @parm converted_size set to the number of bytes occupied by the string in
1467 * the destination on success.
1468 *
1469 * @return true if new buffer was correctly allocated, and string was
1470 * converted.
1471 **/
1472
1473bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1474{
1475 size_t src_len = strlen(src)+1;
1476
1477 *dest = NULL;
1478 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1479 (void **)dest, converted_size, True);
1480}
1481
1482/**
1483 Copy a string from a ucs2 source to a unix char* destination.
1484 Flags can have:
1485 STR_TERMINATE means the string in src is null terminated.
1486 STR_NOALIGN means don't try to align.
1487 if STR_TERMINATE is set then src_len is ignored if it is -1.
1488 src_len is the length of the source area in bytes
1489 Return the number of bytes occupied by the string in src.
1490 The resulting string in "dest" is always null terminated.
1491**/
1492
1493size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1494{
1495 size_t ret;
1496
1497 if (dest_len == (size_t)-1) {
1498 /* No longer allow dest_len of -1. */
1499 smb_panic("pull_ucs2 - invalid dest_len of -1");
1500 }
1501
1502 if (!src_len) {
1503 if (dest && dest_len > 0) {
1504 dest[0] = '\0';
1505 }
1506 return 0;
1507 }
1508
1509 if (ucs2_align(base_ptr, src, flags)) {
1510 src = (const void *)((const char *)src + 1);
1511 if (src_len != (size_t)-1)
1512 src_len--;
1513 }
1514
1515 if (flags & STR_TERMINATE) {
1516 /* src_len -1 is the default for null terminated strings. */
1517 if (src_len != (size_t)-1) {
1518 size_t len = strnlen_w((const smb_ucs2_t *)src,
1519 src_len/2);
1520 if (len < src_len/2)
1521 len++;
1522 src_len = len*2;
1523 }
1524 }
1525
1526 /* ucs2 is always a multiple of 2 bytes */
1527 if (src_len != (size_t)-1)
1528 src_len &= ~1;
1529
1530 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1531 if (ret == (size_t)-1) {
1532 ret = 0;
1533 dest_len = 0;
1534 }
1535
1536 if (src_len == (size_t)-1)
1537 src_len = ret*2;
1538
1539 if (dest_len && ret) {
1540 /* Did we already process the terminating zero ? */
1541 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1542 dest[MIN(ret, dest_len-1)] = 0;
1543 }
1544 } else {
1545 dest[0] = 0;
1546 }
1547
1548 return src_len;
1549}
1550
1551/**
1552 Copy a string from a ucs2 source to a unix char* destination.
1553 Talloc version with a base pointer.
1554 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1555 needs fixing. JRA).
1556 Flags can have:
1557 STR_TERMINATE means the string in src is null terminated.
1558 STR_NOALIGN means don't try to align.
1559 if STR_TERMINATE is set then src_len is ignored if it is -1.
1560 src_len is the length of the source area in bytes
1561 Return the number of bytes occupied by the string in src.
1562 The resulting string in "dest" is always null terminated.
1563**/
1564
1565size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1566 const void *base_ptr,
1567 char **ppdest,
1568 const void *src,
1569 size_t src_len,
1570 int flags)
1571{
1572 char *dest;
1573 size_t dest_len;
1574
1575 *ppdest = NULL;
1576
1577#ifdef DEVELOPER
1578 /* Ensure we never use the braindead "malloc" varient. */
1579 if (ctx == NULL) {
1580 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1581 }
1582#endif
1583
1584 if (!src_len) {
1585 return 0;
1586 }
1587
1588 if (ucs2_align(base_ptr, src, flags)) {
1589 src = (const void *)((const char *)src + 1);
1590 if (src_len != (size_t)-1)
1591 src_len--;
1592 }
1593
1594 if (flags & STR_TERMINATE) {
1595 /* src_len -1 is the default for null terminated strings. */
1596 if (src_len != (size_t)-1) {
1597 size_t len = strnlen_w((const smb_ucs2_t *)src,
1598 src_len/2);
1599 if (len < src_len/2)
1600 len++;
1601 src_len = len*2;
1602 } else {
1603 /*
1604 * src_len == -1 - alloc interface won't take this
1605 * so we must calculate.
1606 */
1607 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1608 }
1609 /* Ensure we don't use an insane length from the client. */
1610 if (src_len >= 1024*1024) {
1611 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1612 }
1613 } else {
1614 /* Can't have an unlimited length
1615 * non STR_TERMINATE'd.
1616 */
1617 if (src_len == (size_t)-1) {
1618 errno = EINVAL;
1619 return 0;
1620 }
1621 }
1622
1623 /* src_len != -1 here. */
1624
1625 /* ucs2 is always a multiple of 2 bytes */
1626 src_len &= ~1;
1627
1628 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1629 (void *)&dest, &dest_len, True)) {
1630 dest_len = 0;
1631 }
1632
1633 if (dest_len) {
1634 /* Did we already process the terminating zero ? */
1635 if (dest[dest_len-1] != 0) {
1636 size_t size = talloc_get_size(dest);
1637 /* Have we got space to append the '\0' ? */
1638 if (size <= dest_len) {
1639 /* No, realloc. */
1640 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1641 dest_len+1);
1642 if (!dest) {
1643 /* talloc fail. */
1644 dest_len = (size_t)-1;
1645 return 0;
1646 }
1647 }
1648 /* Yay - space ! */
1649 dest[dest_len] = '\0';
1650 dest_len++;
1651 }
1652 } else if (dest) {
1653 dest[0] = 0;
1654 }
1655
1656 *ppdest = dest;
1657 return src_len;
1658}
1659
1660size_t pull_ucs2_fstring(char *dest, const void *src)
1661{
1662 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1663}
1664
1665/**
1666 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1667 *
1668 * @param dest always set at least to NULL
1669 * @parm converted_size set to the number of bytes occupied by the string in
1670 * the destination on success.
1671 *
1672 * @return true if new buffer was correctly allocated, and string was
1673 * converted.
1674 **/
1675
1676bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1677 size_t *converted_size)
1678{
1679 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1680
1681 *dest = NULL;
1682 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1683 (void **)dest, converted_size, True);
1684}
1685
1686/**
1687 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1688 *
1689 * @param dest always set at least to NULL
1690 * @parm converted_size set to the number of bytes occupied by the string in
1691 * the destination on success.
1692 * @return true if new buffer was correctly allocated, and string was
1693 * converted.
1694 **/
1695
1696bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1697 size_t *converted_size)
1698{
1699 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1700
1701 *dest = NULL;
1702 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1703 (void **)dest, converted_size, True);
1704}
1705
1706/**
1707 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1708 *
1709 * @param dest always set at least to NULL
1710 * @parm converted_size set to the number of bytes occupied by the string in
1711 * the destination on success.
1712 *
1713 * @return true if new buffer was correctly allocated, and string was
1714 * converted.
1715 **/
1716
1717bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1718 size_t *converted_size)
1719{
1720 size_t src_len = strlen(src)+1;
1721
1722 *dest = NULL;
1723 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1724 (void **)dest, converted_size, True);
1725}
1726
1727/**
1728 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1729 *
1730 * @param dest always set at least to NULL
1731 * @parm converted_size set to the number of bytes occupied by the string in
1732 * the destination on success.
1733 *
1734 * @return true if new buffer was correctly allocated, and string was
1735 * converted.
1736 **/
1737
1738bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1739{
1740 size_t src_len = strlen(src)+1;
1741
1742 *dest = NULL;
1743 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1744 (void **)dest, converted_size, True);
1745}
1746
1747/**
1748 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1749 *
1750 * @param dest always set at least to NULL
1751 * @parm converted_size set to the number of bytes occupied by the string in
1752 * the destination on success.
1753 *
1754 * @return true if new buffer was correctly allocated, and string was
1755 * converted.
1756 **/
1757
1758bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1759 size_t *converted_size)
1760{
1761 size_t src_len = strlen(src)+1;
1762
1763 *dest = NULL;
1764 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1765 (void **)dest, converted_size, True);
1766}
1767
1768/**
1769 Copy a string from a char* src to a unicode or ascii
1770 dos codepage destination choosing unicode or ascii based on the
1771 flags in the SMB buffer starting at base_ptr.
1772 Return the number of bytes occupied by the string in the destination.
1773 flags can have:
1774 STR_TERMINATE means include the null termination.
1775 STR_UPPER means uppercase in the destination.
1776 STR_ASCII use ascii even with unicode packet.
1777 STR_NOALIGN means don't do alignment.
1778 dest_len is the maximum length allowed in the destination. If dest_len
1779 is -1 then no maxiumum is used.
1780**/
1781
1782size_t push_string_fn(const char *function, unsigned int line,
1783 const void *base_ptr, uint16 flags2,
1784 void *dest, const char *src,
1785 size_t dest_len, int flags)
1786{
1787#ifdef DEVELOPER
1788 /* We really need to zero fill here, not clobber
1789 * region, as we want to ensure that valgrind thinks
1790 * all of the outgoing buffer has been written to
1791 * so a send() or write() won't trap an error.
1792 * JRA.
1793 */
1794#if 0
1795 clobber_region(function, line, dest, dest_len);
1796#else
1797 memset(dest, '\0', dest_len);
1798#endif
1799#endif
1800
1801 if (!(flags & STR_ASCII) && \
1802 ((flags & STR_UNICODE || \
1803 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1804 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1805 }
1806 return push_ascii(dest, src, dest_len, flags);
1807}
1808
1809
1810/**
1811 Copy a string from a unicode or ascii source (depending on
1812 the packet flags) to a char* destination.
1813 Flags can have:
1814 STR_TERMINATE means the string in src is null terminated.
1815 STR_UNICODE means to force as unicode.
1816 STR_ASCII use ascii even with unicode packet.
1817 STR_NOALIGN means don't do alignment.
1818 if STR_TERMINATE is set then src_len is ignored is it is -1
1819 src_len is the length of the source area in bytes.
1820 Return the number of bytes occupied by the string in src.
1821 The resulting string in "dest" is always null terminated.
1822**/
1823
1824size_t pull_string_fn(const char *function,
1825 unsigned int line,
1826 const void *base_ptr,
1827 uint16 smb_flags2,
1828 char *dest,
1829 const void *src,
1830 size_t dest_len,
1831 size_t src_len,
1832 int flags)
1833{
1834#ifdef DEVELOPER
1835 clobber_region(function, line, dest, dest_len);
1836#endif
1837
1838 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1839 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1840 "UNICODE defined");
1841 }
1842
1843 if (!(flags & STR_ASCII) && \
1844 ((flags & STR_UNICODE || \
1845 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1846 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1847 }
1848 return pull_ascii(dest, src, dest_len, src_len, flags);
1849}
1850
1851/**
1852 Copy a string from a unicode or ascii source (depending on
1853 the packet flags) to a char* destination.
1854 Variant that uses talloc.
1855 Flags can have:
1856 STR_TERMINATE means the string in src is null terminated.
1857 STR_UNICODE means to force as unicode.
1858 STR_ASCII use ascii even with unicode packet.
1859 STR_NOALIGN means don't do alignment.
1860 if STR_TERMINATE is set then src_len is ignored is it is -1
1861 src_len is the length of the source area in bytes.
1862 Return the number of bytes occupied by the string in src.
1863 The resulting string in "dest" is always null terminated.
1864**/
1865
1866size_t pull_string_talloc_fn(const char *function,
1867 unsigned int line,
1868 TALLOC_CTX *ctx,
1869 const void *base_ptr,
1870 uint16 smb_flags2,
1871 char **ppdest,
1872 const void *src,
1873 size_t src_len,
1874 int flags)
1875{
1876 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1877 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1878 "UNICODE defined");
1879 }
1880
1881 if (!(flags & STR_ASCII) && \
1882 ((flags & STR_UNICODE || \
1883 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1884 return pull_ucs2_base_talloc(ctx,
1885 base_ptr,
1886 ppdest,
1887 src,
1888 src_len,
1889 flags);
1890 }
1891 return pull_ascii_base_talloc(ctx,
1892 ppdest,
1893 src,
1894 src_len,
1895 flags);
1896}
1897
1898
1899size_t align_string(const void *base_ptr, const char *p, int flags)
1900{
1901 if (!(flags & STR_ASCII) && \
1902 ((flags & STR_UNICODE || \
1903 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1904 return ucs2_align(base_ptr, p, flags);
1905 }
1906 return 0;
1907}
1908
1909/*
1910 Return the unicode codepoint for the next multi-byte CH_UNIX character
1911 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1912
1913 Also return the number of bytes consumed (which tells the caller
1914 how many bytes to skip to get to the next CH_UNIX character).
1915
1916 Return INVALID_CODEPOINT if the next character cannot be converted.
1917*/
1918
1919codepoint_t next_codepoint(const char *str, size_t *size)
1920{
1921 /* It cannot occupy more than 4 bytes in UTF16 format */
1922 uint8_t buf[4];
1923 smb_iconv_t descriptor;
1924#ifdef __OS2__
1925 size_t ilen_max;
1926 size_t olen_orig;
1927 const char *inbuf;
1928#endif
1929 size_t ilen_orig;
1930 size_t ilen;
1931 size_t olen;
1932
1933 char *outbuf;
1934
1935#ifdef __OS2__
1936 *size = 1;
1937#endif
1938
1939 if ((str[0] & 0x80) == 0) {
1940#ifndef __OS2__
1941 *size = 1;
1942#endif
1943 return (codepoint_t)str[0];
1944 }
1945
1946 lazy_initialize_conv();
1947
1948 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1949 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1950#ifndef __OS2__
1951 *size = 1;
1952#endif
1953 return INVALID_CODEPOINT;
1954 }
1955#ifdef __OS2__
1956 /* We assume that no multi-byte character can take
1957 more than 5 bytes. This is OK as we only
1958 support codepoints up to 1M */
1959
1960 ilen_max = strnlen( str, 5 );
1961#else
1962 *size = 1;
1963#endif
1964 ilen_orig = 1;
1965 olen_orig = 2;
1966 while( 1 )
1967 {
1968 ilen = ilen_orig;
1969 olen = olen_orig;
1970 inbuf = str;
1971 outbuf = ( char * )buf;
1972 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1973 break;
1974
1975 switch( errno )
1976 {
1977 case E2BIG :
1978 if( olen_orig == 2 )
1979 olen_orig = 4;
1980 else
1981 return INVALID_CODEPOINT;
1982 break;
1983
1984
1985 case EINVAL :
1986#ifndef __OS2__
1987 /* We assume that no multi-byte character can take
1988 more than 5 bytes. This is OK as we only
1989 support codepoints up to 1M */
1990 if( ilen_orig < 5 )
1991#else
1992 if( ilen_orig < ilen_max )
1993#endif
1994 ilen_orig++;
1995 else
1996 return INVALID_CODEPOINT;
1997 break;
1998
1999 case EILSEQ :
2000 default :
2001 return INVALID_CODEPOINT;
2002 }
2003 }
2004
2005 olen = olen_orig - olen;
2006
2007 *size = ilen_orig - ilen;
2008
2009 if (olen == 2) {
2010 /* 2 byte, UTF16-LE encoded value. */
2011 return (codepoint_t)SVAL(buf, 0);
2012 }
2013 if (olen == 4) {
2014 /* Decode a 4 byte UTF16-LE character manually.
2015 See RFC2871 for the encoding machanism.
2016 */
2017 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2018 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2019
2020 return (codepoint_t)0x10000 +
2021 (w1 << 10) + w2;
2022 }
2023
2024 /* no other length is valid */
2025 return INVALID_CODEPOINT;
2026}
Note: See TracBrowser for help on using the repository browser.