source: branches/samba-3.2.x/source/lib/charcnv.c@ 1001

Last change on this file since 1001 was 341, checked in by Herwig Bauernfeind, 16 years ago

divers fix for Ticket #68 in 3.2

File size: 49.5 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29 return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58 if (ch == CH_UTF16LE) ret = "UTF-16LE";
59 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60 else if (ch == CH_UNIX) ret = lp_unix_charset();
61 else if (ch == CH_DOS) ret = lp_dos_charset();
62 else if (ch == CH_DISPLAY) ret = lp_display_charset();
63 else if (ch == CH_UTF8) ret = "UTF8";
64
65#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret && !strcmp(ret, "LOCALE")) {
67 const char *ln = NULL;
68
69#ifdef HAVE_SETLOCALE
70 setlocale(LC_ALL, "");
71#endif
72 ln = nl_langinfo(CODESET);
73 if (ln) {
74 /* Check whether the charset name is supported
75 by iconv */
76 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77 if (handle == (smb_iconv_t) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79 ln = NULL;
80 } else {
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82 smb_iconv_close(handle);
83 }
84 }
85 ret = ln;
86 }
87#endif
88
89 if (!ret || !*ret) ret = "ASCII";
90 return ret;
91}
92
93void lazy_initialize_conv(void)
94{
95 if (!initialized) {
96 load_case_tables();
97 init_iconv();
98 initialized = true;
99 }
100}
101
102/**
103 * Destroy global objects allocated by init_iconv()
104 **/
105void gfree_charcnv(void)
106{
107 int c1, c2;
108
109 for (c1=0;c1<NUM_CHARSETS;c1++) {
110 for (c2=0;c2<NUM_CHARSETS;c2++) {
111 if ( conv_handles[c1][c2] ) {
112 smb_iconv_close( conv_handles[c1][c2] );
113 conv_handles[c1][c2] = 0;
114 }
115 }
116 }
117 initialized = false;
118}
119
120/**
121 * Initialize iconv conversion descriptors.
122 *
123 * This is called the first time it is needed, and also called again
124 * every time the configuration is reloaded, because the charset or
125 * codepage might have changed.
126 **/
127void init_iconv(void)
128{
129 int c1, c2;
130 bool did_reload = False;
131
132 /* so that charset_name() works we need to get the UNIX<->UCS2 going
133 first */
134 if (!conv_handles[CH_UNIX][CH_UTF16LE])
135 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137 if (!conv_handles[CH_UTF16LE][CH_UNIX])
138 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140 for (c1=0;c1<NUM_CHARSETS;c1++) {
141 for (c2=0;c2<NUM_CHARSETS;c2++) {
142 const char *n1 = charset_name((charset_t)c1);
143 const char *n2 = charset_name((charset_t)c2);
144 if (conv_handles[c1][c2] &&
145 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147 continue;
148
149 did_reload = True;
150
151 if (conv_handles[c1][c2])
152 smb_iconv_close(conv_handles[c1][c2]);
153
154 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157 charset_name((charset_t)c1), charset_name((charset_t)c2)));
158 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159 n1 = "ASCII";
160 }
161 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162 n2 = "ASCII";
163 }
164 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165 n1, n2 ));
166 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167 if (!conv_handles[c1][c2]) {
168 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169 smb_panic("init_iconv: conv_handle initialization failed");
170 }
171 }
172 }
173 }
174
175 if (did_reload) {
176 /* XXX: Does this really get called every time the dos
177 * codepage changes? */
178 /* XXX: Is the did_reload test too strict? */
179 conv_silent = True;
180 init_valid_table();
181 conv_silent = False;
182 }
183}
184
185/**
186 * Convert string from one encoding to another, making error checking etc
187 * Slow path version - uses (slow) iconv.
188 *
189 * @param src pointer to source string (multibyte or singlebyte)
190 * @param srclen length of the source string in bytes
191 * @param dest pointer to destination string (multibyte or singlebyte)
192 * @param destlen maximal length allowed for string
193 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194 * @returns the number of bytes occupied in the destination
195 *
196 * Ensure the srclen contains the terminating zero.
197 *
198 **/
199
200static size_t convert_string_internal(charset_t from, charset_t to,
201 void const *src, size_t srclen,
202 void *dest, size_t destlen, bool allow_bad_conv)
203{
204 size_t i_len, o_len;
205 size_t retval;
206 const char* inbuf = (const char*)src;
207 char* outbuf = (char*)dest;
208 smb_iconv_t descriptor;
209
210 lazy_initialize_conv();
211
212 descriptor = conv_handles[from][to];
213
214 if (srclen == (size_t)-1) {
215 if (from == CH_UTF16LE || from == CH_UTF16BE) {
216 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
217 } else {
218 srclen = strlen((const char *)src)+1;
219 }
220 }
221
222
223 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
224 if (!conv_silent)
225 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226 return (size_t)-1;
227 }
228
229 i_len=srclen;
230 o_len=destlen;
231
232 again:
233
234 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235 if(retval==(size_t)-1) {
236 const char *reason="unknown error";
237 switch(errno) {
238 case EINVAL:
239 reason="Incomplete multibyte sequence";
240 if (!conv_silent)
241 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242 if (allow_bad_conv)
243 goto use_as_is;
244 return (size_t)-1;
245 case E2BIG:
246 reason="No more room";
247 if (!conv_silent) {
248 if (from == CH_UNIX) {
249 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250 charset_name(from), charset_name(to),
251 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252 } else {
253 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254 charset_name(from), charset_name(to),
255 (unsigned int)srclen, (unsigned int)destlen));
256 }
257 }
258 break;
259 case EILSEQ:
260 reason="Illegal multibyte sequence";
261 if (!conv_silent)
262 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263 if (allow_bad_conv)
264 goto use_as_is;
265
266 return (size_t)-1;
267 default:
268 if (!conv_silent)
269 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270 return (size_t)-1;
271 }
272 /* smb_panic(reason); */
273 }
274 return destlen-o_len;
275
276 use_as_is:
277
278 /*
279 * Conversion not supported. This is actually an error, but there are so
280 * many misconfigured iconv systems and smb.conf's out there we can't just
281 * fail. Do a very bad conversion instead.... JRA.
282 */
283
284 {
285 if (o_len == 0 || i_len == 0)
286 return destlen - o_len;
287
288 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
289 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
290 /* Can't convert from utf16 any endian to multibyte.
291 Replace with the default fail char.
292 */
293 if (i_len < 2)
294 return destlen - o_len;
295 if (i_len >= 2) {
296 *outbuf = lp_failed_convert_char();
297
298 outbuf++;
299 o_len--;
300
301 inbuf += 2;
302 i_len -= 2;
303 }
304
305 if (o_len == 0 || i_len == 0)
306 return destlen - o_len;
307
308 /* Keep trying with the next char... */
309 goto again;
310
311 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312 /* Can't convert to UTF16LE - just widen by adding the
313 default fail char then zero.
314 */
315 if (o_len < 2)
316 return destlen - o_len;
317
318 outbuf[0] = lp_failed_convert_char();
319 outbuf[1] = '\0';
320
321 inbuf++;
322 i_len--;
323
324 outbuf += 2;
325 o_len -= 2;
326
327 if (o_len == 0 || i_len == 0)
328 return destlen - o_len;
329
330 /* Keep trying with the next char... */
331 goto again;
332
333 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334 to != CH_UTF16LE && to != CH_UTF16BE) {
335 /* Failed multibyte to multibyte. Just copy the default fail char and
336 try again. */
337 outbuf[0] = lp_failed_convert_char();
338
339 inbuf++;
340 i_len--;
341
342 outbuf++;
343 o_len--;
344
345 if (o_len == 0 || i_len == 0)
346 return destlen - o_len;
347
348 /* Keep trying with the next char... */
349 goto again;
350
351 } else {
352 /* Keep compiler happy.... */
353 return destlen - o_len;
354 }
355 }
356}
357
358/**
359 * Convert string from one encoding to another, making error checking etc
360 * Fast path version - handles ASCII first.
361 *
362 * @param src pointer to source string (multibyte or singlebyte)
363 * @param srclen length of the source string in bytes, or -1 for nul terminated.
364 * @param dest pointer to destination string (multibyte or singlebyte)
365 * @param destlen maximal length allowed for string - *NEVER* -1.
366 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367 * @returns the number of bytes occupied in the destination
368 *
369 * Ensure the srclen contains the terminating zero.
370 *
371 * This function has been hand-tuned to provide a fast path.
372 * Don't change unless you really know what you are doing. JRA.
373 **/
374
375size_t convert_string(charset_t from, charset_t to,
376 void const *src, size_t srclen,
377 void *dest, size_t destlen, bool allow_bad_conv)
378{
379 /*
380 * NB. We deliberately don't do a strlen here if srclen == -1.
381 * This is very expensive over millions of calls and is taken
382 * care of in the slow path in convert_string_internal. JRA.
383 */
384
385#ifdef DEVELOPER
386 SMB_ASSERT(destlen != (size_t)-1);
387#endif
388
389 if (srclen == 0)
390 return 0;
391
392 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393 const unsigned char *p = (const unsigned char *)src;
394 unsigned char *q = (unsigned char *)dest;
395 size_t slen = srclen;
396 size_t dlen = destlen;
397 unsigned char lastp = '\0';
398 size_t retval = 0;
399
400 /* If all characters are ascii, fast path here. */
401 while (slen && dlen) {
402 if ((lastp = *p) <= 0x7f) {
403 *q++ = *p++;
404 if (slen != (size_t)-1) {
405 slen--;
406 }
407 dlen--;
408 retval++;
409 if (!lastp)
410 break;
411 } else {
412#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413 goto general_case;
414#else
415 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416 if (ret == (size_t)-1) {
417 return ret;
418 }
419 return retval + ret;
420#endif
421 }
422 }
423 if (!dlen) {
424 /* Even if we fast path we should note if we ran out of room. */
425 if (((slen != (size_t)-1) && slen) ||
426 ((slen == (size_t)-1) && lastp)) {
427 errno = E2BIG;
428 }
429 }
430 return retval;
431
432 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433 const unsigned char *p = (const unsigned char *)src;
434 unsigned char *q = (unsigned char *)dest;
435 size_t retval = 0;
436 size_t slen = srclen;
437 size_t dlen = destlen;
438 unsigned char lastp = '\0';
439
440 /* If all characters are ascii, fast path here. */
441 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
442 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443 *q++ = *p;
444 if (slen != (size_t)-1) {
445 slen -= 2;
446 }
447 p += 2;
448 dlen--;
449 retval++;
450 if (!lastp)
451 break;
452 } else {
453#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454 goto general_case;
455#else
456 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457 if (ret == (size_t)-1) {
458 return ret;
459 }
460 return retval + ret;
461#endif
462 }
463 }
464 if (!dlen) {
465 /* Even if we fast path we should note if we ran out of room. */
466 if (((slen != (size_t)-1) && slen) ||
467 ((slen == (size_t)-1) && lastp)) {
468 errno = E2BIG;
469 }
470 }
471 return retval;
472
473 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
474 const unsigned char *p = (const unsigned char *)src;
475 unsigned char *q = (unsigned char *)dest;
476 size_t retval = 0;
477 size_t slen = srclen;
478 size_t dlen = destlen;
479 unsigned char lastp = '\0';
480
481 /* If all characters are ascii, fast path here. */
482 while (slen && (dlen >= 2)) {
483 if ((lastp = *p) <= 0x7F) {
484 *q++ = *p++;
485 *q++ = '\0';
486 if (slen != (size_t)-1) {
487 slen--;
488 }
489 dlen -= 2;
490 retval += 2;
491 if (!lastp)
492 break;
493 } else {
494#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
495 goto general_case;
496#else
497 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
498 if (ret == (size_t)-1) {
499 return ret;
500 }
501 return retval + ret;
502#endif
503 }
504 }
505 if (!dlen) {
506 /* Even if we fast path we should note if we ran out of room. */
507 if (((slen != (size_t)-1) && slen) ||
508 ((slen == (size_t)-1) && lastp)) {
509 errno = E2BIG;
510 }
511 }
512 return retval;
513 }
514
515#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
516 general_case:
517#endif
518 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
519}
520
521/**
522 * Convert between character sets, allocating a new buffer for the result.
523 *
524 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
525 * (this is a bad interface and needs fixing. JRA).
526 * @param srclen length of source buffer.
527 * @param dest always set at least to NULL
528 * @param converted_size set to the size of the allocated buffer on return
529 * true
530 * @note -1 is not accepted for srclen.
531 *
532 * @return True if new buffer was correctly allocated, and string was
533 * converted.
534 *
535 * Ensure the srclen contains the terminating zero.
536 *
537 * I hate the goto's in this function. It's embarressing.....
538 * There has to be a cleaner way to do this. JRA.
539 **/
540
541bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
542 void const *src, size_t srclen, void *dst,
543 size_t *converted_size, bool allow_bad_conv)
544{
545 size_t i_len, o_len, destlen = (srclen * 3) / 2;
546 size_t retval;
547 const char *inbuf = (const char *)src;
548 char *outbuf = NULL, *ob = NULL;
549 smb_iconv_t descriptor;
550 void **dest = (void **)dst;
551
552 *dest = NULL;
553
554 if (!converted_size) {
555 errno = EINVAL;
556 return false;
557 }
558
559 if (src == NULL || srclen == (size_t)-1) {
560 errno = EINVAL;
561 return false;
562 }
563 if (srclen == 0) {
564 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
565 if (ob == NULL) {
566 errno = ENOMEM;
567 return false;
568 }
569 *dest = ob;
570 *converted_size = 0;
571 return true;
572 }
573
574 lazy_initialize_conv();
575
576 descriptor = conv_handles[from][to];
577
578 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
579 if (!conv_silent)
580 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
581 errno = EOPNOTSUPP;
582 return false;
583 }
584
585 convert:
586
587 /* +2 is for ucs2 null termination. */
588 if ((destlen*2)+2 < destlen) {
589 /* wrapped ! abort. */
590 if (!conv_silent)
591 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
592 if (!ctx)
593 SAFE_FREE(outbuf);
594 errno = EOPNOTSUPP;
595 return false;
596 } else {
597 destlen = destlen * 2;
598 }
599
600 /* +2 is for ucs2 null termination. */
601 if (ctx) {
602 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
603 } else {
604 ob = (char *)SMB_REALLOC(ob, destlen + 2);
605 }
606
607 if (!ob) {
608 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
609 errno = ENOMEM;
610 return false;
611 }
612 outbuf = ob;
613 i_len = srclen;
614 o_len = destlen;
615
616 again:
617
618
619 retval = smb_iconv(descriptor,
620 &inbuf, &i_len,
621 &outbuf, &o_len);
622 if(retval == (size_t)-1) {
623 const char *reason="unknown error";
624 switch(errno) {
625 case EINVAL:
626 reason="Incomplete multibyte sequence";
627 if (!conv_silent)
628 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
629 if (allow_bad_conv)
630 goto use_as_is;
631 break;
632 case E2BIG:
633 goto convert;
634 case EILSEQ:
635 reason="Illegal multibyte sequence";
636 if (!conv_silent)
637 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
638 if (allow_bad_conv)
639 goto use_as_is;
640 break;
641 }
642 if (!conv_silent)
643 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
644 /* smb_panic(reason); */
645 if (ctx) {
646 TALLOC_FREE(ob);
647 } else {
648 SAFE_FREE(ob);
649 }
650 return false;
651 }
652
653 out:
654
655 destlen = destlen - o_len;
656 /* Don't shrink unless we're reclaiming a lot of
657 * space. This is in the hot codepath and these
658 * reallocs *cost*. JRA.
659 */
660 if (o_len > 1024) {
661 /* We're shrinking here so we know the +2 is safe from wrap. */
662 if (ctx) {
663 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
664 } else {
665 ob = (char *)SMB_REALLOC(ob,destlen + 2);
666 }
667 }
668
669 if (destlen && !ob) {
670 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
671 errno = ENOMEM;
672 return false;
673 }
674
675 *dest = ob;
676
677 /* Must ucs2 null terminate in the extra space we allocated. */
678 ob[destlen] = '\0';
679 ob[destlen+1] = '\0';
680
681 *converted_size = destlen;
682 return true;
683
684 use_as_is:
685
686 /*
687 * Conversion not supported. This is actually an error, but there are so
688 * many misconfigured iconv systems and smb.conf's out there we can't just
689 * fail. Do a very bad conversion instead.... JRA.
690 */
691
692 {
693 if (o_len == 0 || i_len == 0)
694 goto out;
695
696 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
697 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
698 /* Can't convert from utf16 any endian to multibyte.
699 Replace with the default fail char.
700 */
701
702 if (i_len < 2)
703 goto out;
704
705 if (i_len >= 2) {
706 *outbuf = lp_failed_convert_char();
707
708 outbuf++;
709 o_len--;
710
711 inbuf += 2;
712 i_len -= 2;
713 }
714
715 if (o_len == 0 || i_len == 0)
716 goto out;
717
718 /* Keep trying with the next char... */
719 goto again;
720
721 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
722 /* Can't convert to UTF16LE - just widen by adding the
723 default fail char then zero.
724 */
725 if (o_len < 2)
726 goto out;
727
728 outbuf[0] = lp_failed_convert_char();
729 outbuf[1] = '\0';
730
731 inbuf++;
732 i_len--;
733
734 outbuf += 2;
735 o_len -= 2;
736
737 if (o_len == 0 || i_len == 0)
738 goto out;
739
740 /* Keep trying with the next char... */
741 goto again;
742
743 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
744 to != CH_UTF16LE && to != CH_UTF16BE) {
745 /* Failed multibyte to multibyte. Just copy the default fail char and
746 try again. */
747 outbuf[0] = lp_failed_convert_char();
748
749 inbuf++;
750 i_len--;
751
752 outbuf++;
753 o_len--;
754
755 if (o_len == 0 || i_len == 0)
756 goto out;
757
758 /* Keep trying with the next char... */
759 goto again;
760
761 } else {
762 /* Keep compiler happy.... */
763 goto out;
764 }
765 }
766}
767
768/**
769 * Convert between character sets, allocating a new buffer using talloc for the result.
770 *
771 * @param srclen length of source buffer.
772 * @param dest always set at least to NULL
773 * @note -1 is not accepted for srclen.
774 *
775 * @returns Size in bytes of the converted string; or -1 in case of error.
776 **/
777size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
778 void const *src, size_t srclen, void *dst,
779 bool allow_bad_conv)
780{
781 void **dest = (void **)dst;
782 size_t dest_len;
783
784 *dest = NULL;
785 if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
786 &dest_len, allow_bad_conv))
787 return (size_t)-1;
788 if (*dest == NULL)
789 return (size_t)-1;
790 return dest_len;
791}
792
793size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
794{
795 size_t size;
796 smb_ucs2_t *buffer;
797
798 size = push_ucs2_allocate(&buffer, src);
799 if (size == (size_t)-1) {
800 return (size_t)-1;
801 }
802 if (!strupper_w(buffer) && (dest == src)) {
803 free(buffer);
804 return srclen;
805 }
806
807 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
808 free(buffer);
809 return size;
810}
811
812/**
813 strdup() a unix string to upper case.
814**/
815
816char *strdup_upper(const char *s)
817{
818 char *out_buffer = SMB_STRDUP(s);
819 const unsigned char *p = (const unsigned char *)s;
820 unsigned char *q = (unsigned char *)out_buffer;
821
822 if (!q) {
823 return NULL;
824 }
825
826 /* this is quite a common operation, so we want it to be
827 fast. We optimise for the ascii case, knowing that all our
828 supported multi-byte character sets are ascii-compatible
829 (ie. they match for the first 128 chars) */
830
831 while (*p) {
832 if (*p & 0x80)
833 break;
834 *q++ = toupper_ascii_fast(*p);
835 p++;
836 }
837
838 if (*p) {
839 /* MB case. */
840 size_t size, size2;
841 smb_ucs2_t *buffer = NULL;
842
843 SAFE_FREE(out_buffer);
844 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
845 strlen(s) + 1, (void **)(void *)&buffer, &size,
846 True)) {
847 return NULL;
848 }
849
850 strupper_w(buffer);
851
852 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
853 size, (void **)(void *)&out_buffer, &size2, True)) {
854 TALLOC_FREE(buffer);
855 return NULL;
856 }
857
858 /* Don't need the intermediate buffer
859 * anymore.
860 */
861 TALLOC_FREE(buffer);
862 }
863
864 return out_buffer;
865}
866
867/**
868 talloc_strdup() a unix string to upper case.
869**/
870
871char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
872{
873 char *out_buffer = talloc_strdup(ctx,s);
874 const unsigned char *p = (const unsigned char *)s;
875 unsigned char *q = (unsigned char *)out_buffer;
876
877 if (!q) {
878 return NULL;
879 }
880
881 /* this is quite a common operation, so we want it to be
882 fast. We optimise for the ascii case, knowing that all our
883 supported multi-byte character sets are ascii-compatible
884 (ie. they match for the first 128 chars) */
885
886 while (*p) {
887 if (*p & 0x80)
888 break;
889 *q++ = toupper_ascii_fast(*p);
890 p++;
891 }
892
893 if (*p) {
894 /* MB case. */
895 size_t size;
896 smb_ucs2_t *ubuf = NULL;
897
898 /* We're not using the ascii buffer above. */
899 TALLOC_FREE(out_buffer);
900
901 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
902 s, strlen(s)+1,
903 (void *)&ubuf,
904 True);
905 if (size == (size_t)-1) {
906 return NULL;
907 }
908
909 strupper_w(ubuf);
910
911 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
912 ubuf, size,
913 (void *)&out_buffer,
914 True);
915
916 /* Don't need the intermediate buffer
917 * anymore.
918 */
919
920 TALLOC_FREE(ubuf);
921
922 if (size == (size_t)-1) {
923 return NULL;
924 }
925 }
926
927 return out_buffer;
928}
929
930size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
931{
932 size_t size;
933 smb_ucs2_t *buffer = NULL;
934
935 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
936 (void **)(void *)&buffer, &size, True)) {
937 smb_panic("failed to create UCS2 buffer");
938 }
939 if (!strlower_w(buffer) && (dest == src)) {
940 SAFE_FREE(buffer);
941 return srclen;
942 }
943 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
944 SAFE_FREE(buffer);
945 return size;
946}
947
948/**
949 strdup() a unix string to lower case.
950**/
951
952char *strdup_lower(const char *s)
953{
954 size_t size;
955 smb_ucs2_t *buffer = NULL;
956 char *out_buffer;
957
958 size = push_ucs2_allocate(&buffer, s);
959 if (size == -1 || !buffer) {
960 return NULL;
961 }
962
963 strlower_w(buffer);
964
965 size = pull_ucs2_allocate(&out_buffer, buffer);
966 SAFE_FREE(buffer);
967
968 if (size == (size_t)-1) {
969 return NULL;
970 }
971
972 return out_buffer;
973}
974
975char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
976{
977 size_t size;
978 smb_ucs2_t *buffer = NULL;
979 char *out_buffer;
980
981 size = push_ucs2_talloc(ctx, &buffer, s);
982 if (size == -1 || !buffer) {
983 TALLOC_FREE(buffer);
984 return NULL;
985 }
986
987 strlower_w(buffer);
988
989 size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
990 TALLOC_FREE(buffer);
991
992 if (size == (size_t)-1) {
993 TALLOC_FREE(out_buffer);
994 return NULL;
995 }
996
997 return out_buffer;
998}
999
1000
1001size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1002{
1003 if (flags & (STR_NOALIGN|STR_ASCII))
1004 return 0;
1005 return PTR_DIFF(p, base_ptr) & 1;
1006}
1007
1008
1009/**
1010 * Copy a string from a char* unix src to a dos codepage string destination.
1011 *
1012 * @return the number of bytes occupied by the string in the destination.
1013 *
1014 * @param flags can include
1015 * <dl>
1016 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1017 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1018 * </dl>
1019 *
1020 * @param dest_len the maximum length in bytes allowed in the
1021 * destination.
1022 **/
1023size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1024{
1025 size_t src_len = strlen(src);
1026 char *tmpbuf = NULL;
1027 size_t ret;
1028
1029 /* No longer allow a length of -1. */
1030 if (dest_len == (size_t)-1) {
1031 smb_panic("push_ascii - dest_len == -1");
1032 }
1033
1034 if (flags & STR_UPPER) {
1035 tmpbuf = SMB_STRDUP(src);
1036 if (!tmpbuf) {
1037 smb_panic("malloc fail");
1038 }
1039 strupper_m(tmpbuf);
1040 src = tmpbuf;
1041 }
1042
1043 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1044 src_len++;
1045 }
1046
1047 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1048 if (ret == (size_t)-1 &&
1049 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1050 && dest_len > 0) {
1051 ((char *)dest)[0] = '\0';
1052 }
1053 SAFE_FREE(tmpbuf);
1054 return ret;
1055}
1056
1057size_t push_ascii_fstring(void *dest, const char *src)
1058{
1059 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1060}
1061
1062/********************************************************************
1063 Push an nstring - ensure null terminated. Written by
1064 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1065********************************************************************/
1066
1067size_t push_ascii_nstring(void *dest, const char *src)
1068{
1069 size_t i, buffer_len, dest_len;
1070 smb_ucs2_t *buffer;
1071
1072 conv_silent = True;
1073 buffer_len = push_ucs2_allocate(&buffer, src);
1074 if (buffer_len == (size_t)-1) {
1075 smb_panic("failed to create UCS2 buffer");
1076 }
1077
1078 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1079 buffer_len /= sizeof(smb_ucs2_t);
1080
1081 dest_len = 0;
1082 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1083 unsigned char mb[10];
1084 /* Convert one smb_ucs2_t character at a time. */
1085 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1086 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1087 memcpy((char *)dest + dest_len, mb, mb_len);
1088 dest_len += mb_len;
1089 } else {
1090 errno = E2BIG;
1091 break;
1092 }
1093 }
1094 ((char *)dest)[dest_len] = '\0';
1095
1096 SAFE_FREE(buffer);
1097 conv_silent = False;
1098 return dest_len;
1099}
1100
1101/********************************************************************
1102 Push and malloc an ascii string. src and dest null terminated.
1103********************************************************************/
1104
1105size_t push_ascii_allocate(char **dest, const char *src)
1106{
1107 size_t dest_len, src_len = strlen(src)+1;
1108
1109 *dest = NULL;
1110 if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1111 (void **)dest, &dest_len, True))
1112 return (size_t)-1;
1113 else
1114 return dest_len;
1115}
1116
1117/**
1118 * Copy a string from a dos codepage source to a unix char* destination.
1119 *
1120 * The resulting string in "dest" is always null terminated.
1121 *
1122 * @param flags can have:
1123 * <dl>
1124 * <dt>STR_TERMINATE</dt>
1125 * <dd>STR_TERMINATE means the string in @p src
1126 * is null terminated, and src_len is ignored.</dd>
1127 * </dl>
1128 *
1129 * @param src_len is the length of the source area in bytes.
1130 * @returns the number of bytes occupied by the string in @p src.
1131 **/
1132size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1133{
1134 size_t ret;
1135
1136 if (dest_len == (size_t)-1) {
1137 /* No longer allow dest_len of -1. */
1138 smb_panic("pull_ascii - invalid dest_len of -1");
1139 }
1140
1141 if (flags & STR_TERMINATE) {
1142 if (src_len == (size_t)-1) {
1143 src_len = strlen((const char *)src) + 1;
1144 } else {
1145 size_t len = strnlen((const char *)src, src_len);
1146 if (len < src_len)
1147 len++;
1148 src_len = len;
1149 }
1150 }
1151
1152 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1153 if (ret == (size_t)-1) {
1154 ret = 0;
1155 dest_len = 0;
1156 }
1157
1158 if (dest_len && ret) {
1159 /* Did we already process the terminating zero ? */
1160 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1161 dest[MIN(ret, dest_len-1)] = 0;
1162 }
1163 } else {
1164 dest[0] = 0;
1165 }
1166
1167 return src_len;
1168}
1169
1170/**
1171 * Copy a string from a dos codepage source to a unix char* destination.
1172 Talloc version.
1173 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1174 needs fixing. JRA).
1175 *
1176 * The resulting string in "dest" is always null terminated.
1177 *
1178 * @param flags can have:
1179 * <dl>
1180 * <dt>STR_TERMINATE</dt>
1181 * <dd>STR_TERMINATE means the string in @p src
1182 * is null terminated, and src_len is ignored.</dd>
1183 * </dl>
1184 *
1185 * @param src_len is the length of the source area in bytes.
1186 * @returns the number of bytes occupied by the string in @p src.
1187 **/
1188
1189static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1190 char **ppdest,
1191 const void *src,
1192 size_t src_len,
1193 int flags)
1194{
1195 char *dest = NULL;
1196 size_t dest_len = 0;
1197
1198#ifdef DEVELOPER
1199 /* Ensure we never use the braindead "malloc" varient. */
1200 if (ctx == NULL) {
1201 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1202 }
1203#endif
1204
1205 *ppdest = NULL;
1206
1207 if (!src_len) {
1208 return 0;
1209 }
1210
1211 if (flags & STR_TERMINATE) {
1212 if (src_len == (size_t)-1) {
1213 src_len = strlen((const char *)src) + 1;
1214 } else {
1215 size_t len = strnlen((const char *)src, src_len);
1216 if (len < src_len)
1217 len++;
1218 src_len = len;
1219 }
1220 /* Ensure we don't use an insane length from the client. */
1221 if (src_len >= 1024*1024) {
1222 char *msg = talloc_asprintf(ctx,
1223 "Bad src length (%u) in "
1224 "pull_ascii_base_talloc",
1225 (unsigned int)src_len);
1226 smb_panic(msg);
1227 }
1228 } else {
1229 /* Can't have an unlimited length
1230 * non STR_TERMINATE'd.
1231 */
1232 if (src_len == (size_t)-1) {
1233 errno = EINVAL;
1234 return 0;
1235 }
1236 }
1237
1238 /* src_len != -1 here. */
1239
1240 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1241 &dest_len, True)) {
1242 dest_len = 0;
1243 }
1244
1245 if (dest_len && dest) {
1246 /* Did we already process the terminating zero ? */
1247 if (dest[dest_len-1] != 0) {
1248 size_t size = talloc_get_size(dest);
1249 /* Have we got space to append the '\0' ? */
1250 if (size <= dest_len) {
1251 /* No, realloc. */
1252 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1253 dest_len+1);
1254 if (!dest) {
1255 /* talloc fail. */
1256 dest_len = (size_t)-1;
1257 return 0;
1258 }
1259 }
1260 /* Yay - space ! */
1261 dest[dest_len] = '\0';
1262 dest_len++;
1263 }
1264 } else if (dest) {
1265 dest[0] = 0;
1266 }
1267
1268 *ppdest = dest;
1269 return src_len;
1270}
1271
1272size_t pull_ascii_fstring(char *dest, const void *src)
1273{
1274 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1275}
1276
1277/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1278
1279size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1280{
1281 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1282}
1283
1284/**
1285 * Copy a string from a char* src to a unicode destination.
1286 *
1287 * @returns the number of bytes occupied by the string in the destination.
1288 *
1289 * @param flags can have:
1290 *
1291 * <dl>
1292 * <dt>STR_TERMINATE <dd>means include the null termination.
1293 * <dt>STR_UPPER <dd>means uppercase in the destination.
1294 * <dt>STR_NOALIGN <dd>means don't do alignment.
1295 * </dl>
1296 *
1297 * @param dest_len is the maximum length allowed in the
1298 * destination.
1299 **/
1300
1301size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1302{
1303 size_t len=0;
1304 size_t src_len;
1305 size_t ret;
1306
1307 if (dest_len == (size_t)-1) {
1308 /* No longer allow dest_len of -1. */
1309 smb_panic("push_ucs2 - invalid dest_len of -1");
1310 }
1311
1312 if (flags & STR_TERMINATE)
1313 src_len = (size_t)-1;
1314 else
1315 src_len = strlen(src);
1316
1317 if (ucs2_align(base_ptr, dest, flags)) {
1318 *(char *)dest = 0;
1319 dest = (void *)((char *)dest + 1);
1320 if (dest_len)
1321 dest_len--;
1322 len++;
1323 }
1324
1325 /* ucs2 is always a multiple of 2 bytes */
1326 dest_len &= ~1;
1327
1328 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1329 if (ret == (size_t)-1) {
1330 if ((flags & STR_TERMINATE) &&
1331 dest &&
1332 dest_len) {
1333 *(char *)dest = 0;
1334 }
1335 return len;
1336 }
1337
1338 len += ret;
1339
1340 if (flags & STR_UPPER) {
1341 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1342 size_t i;
1343
1344 /* We check for i < (ret / 2) below as the dest string isn't null
1345 terminated if STR_TERMINATE isn't set. */
1346
1347 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1348 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1349 if (v != dest_ucs2[i]) {
1350 dest_ucs2[i] = v;
1351 }
1352 }
1353 }
1354
1355 return len;
1356}
1357
1358
1359/**
1360 * Copy a string from a unix char* src to a UCS2 destination,
1361 * allocating a buffer using talloc().
1362 *
1363 * @param dest always set at least to NULL
1364 *
1365 * @returns The number of bytes occupied by the string in the destination
1366 * or -1 in case of error.
1367 **/
1368size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1369{
1370 size_t src_len = strlen(src)+1;
1371
1372 *dest = NULL;
1373 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1374}
1375
1376
1377/**
1378 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1379 *
1380 * @param dest always set at least to NULL
1381 *
1382 * @returns The number of bytes occupied by the string in the destination
1383 * or -1 in case of error.
1384 **/
1385
1386size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1387{
1388 size_t dest_len, src_len = strlen(src)+1;
1389
1390 *dest = NULL;
1391 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1392 (void **)dest, &dest_len, True))
1393 return (size_t)-1;
1394 else
1395 return dest_len;
1396}
1397
1398/**
1399 Copy a string from a char* src to a UTF-8 destination.
1400 Return the number of bytes occupied by the string in the destination
1401 Flags can have:
1402 STR_TERMINATE means include the null termination
1403 STR_UPPER means uppercase in the destination
1404 dest_len is the maximum length allowed in the destination. If dest_len
1405 is -1 then no maxiumum is used.
1406**/
1407
1408static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1409{
1410 size_t src_len = 0;
1411 size_t ret;
1412 char *tmpbuf = NULL;
1413
1414 if (dest_len == (size_t)-1) {
1415 /* No longer allow dest_len of -1. */
1416 smb_panic("push_utf8 - invalid dest_len of -1");
1417 }
1418
1419 if (flags & STR_UPPER) {
1420 tmpbuf = strdup_upper(src);
1421 if (!tmpbuf) {
1422 return (size_t)-1;
1423 }
1424 src = tmpbuf;
1425 src_len = strlen(src);
1426 }
1427
1428 src_len = strlen(src);
1429 if (flags & STR_TERMINATE) {
1430 src_len++;
1431 }
1432
1433 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1434 SAFE_FREE(tmpbuf);
1435 return ret;
1436}
1437
1438size_t push_utf8_fstring(void *dest, const char *src)
1439{
1440 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1441}
1442
1443/**
1444 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1445 *
1446 * @param dest always set at least to NULL
1447 *
1448 * @returns The number of bytes occupied by the string in the destination
1449 **/
1450
1451size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1452{
1453 size_t src_len = strlen(src)+1;
1454
1455 *dest = NULL;
1456 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1457}
1458
1459/**
1460 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1461 *
1462 * @param dest always set at least to NULL
1463 *
1464 * @returns The number of bytes occupied by the string in the destination
1465 **/
1466
1467size_t push_utf8_allocate(char **dest, const char *src)
1468{
1469 size_t dest_len, src_len = strlen(src)+1;
1470
1471 *dest = NULL;
1472 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1473 (void **)dest, &dest_len, True))
1474 return (size_t)-1;
1475 else
1476 return dest_len;
1477}
1478
1479/**
1480 Copy a string from a ucs2 source to a unix char* destination.
1481 Flags can have:
1482 STR_TERMINATE means the string in src is null terminated.
1483 STR_NOALIGN means don't try to align.
1484 if STR_TERMINATE is set then src_len is ignored if it is -1.
1485 src_len is the length of the source area in bytes
1486 Return the number of bytes occupied by the string in src.
1487 The resulting string in "dest" is always null terminated.
1488**/
1489
1490size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1491{
1492 size_t ret;
1493
1494 if (dest_len == (size_t)-1) {
1495 /* No longer allow dest_len of -1. */
1496 smb_panic("pull_ucs2 - invalid dest_len of -1");
1497 }
1498
1499 if (!src_len) {
1500 if (dest && dest_len > 0) {
1501 dest[0] = '\0';
1502 }
1503 return 0;
1504 }
1505
1506 if (ucs2_align(base_ptr, src, flags)) {
1507 src = (const void *)((const char *)src + 1);
1508 if (src_len != (size_t)-1)
1509 src_len--;
1510 }
1511
1512 if (flags & STR_TERMINATE) {
1513 /* src_len -1 is the default for null terminated strings. */
1514 if (src_len != (size_t)-1) {
1515 size_t len = strnlen_w((const smb_ucs2_t *)src,
1516 src_len/2);
1517 if (len < src_len/2)
1518 len++;
1519 src_len = len*2;
1520 }
1521 }
1522
1523 /* ucs2 is always a multiple of 2 bytes */
1524 if (src_len != (size_t)-1)
1525 src_len &= ~1;
1526
1527 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1528 if (ret == (size_t)-1) {
1529 ret = 0;
1530 dest_len = 0;
1531 }
1532
1533 if (src_len == (size_t)-1)
1534 src_len = ret*2;
1535
1536 if (dest_len && ret) {
1537 /* Did we already process the terminating zero ? */
1538 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1539 dest[MIN(ret, dest_len-1)] = 0;
1540 }
1541 } else {
1542 dest[0] = 0;
1543 }
1544
1545 return src_len;
1546}
1547
1548/**
1549 Copy a string from a ucs2 source to a unix char* destination.
1550 Talloc version with a base pointer.
1551 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1552 needs fixing. JRA).
1553 Flags can have:
1554 STR_TERMINATE means the string in src is null terminated.
1555 STR_NOALIGN means don't try to align.
1556 if STR_TERMINATE is set then src_len is ignored if it is -1.
1557 src_len is the length of the source area in bytes
1558 Return the number of bytes occupied by the string in src.
1559 The resulting string in "dest" is always null terminated.
1560**/
1561
1562size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1563 const void *base_ptr,
1564 char **ppdest,
1565 const void *src,
1566 size_t src_len,
1567 int flags)
1568{
1569 char *dest;
1570 size_t dest_len;
1571
1572 *ppdest = NULL;
1573
1574#ifdef DEVELOPER
1575 /* Ensure we never use the braindead "malloc" varient. */
1576 if (ctx == NULL) {
1577 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1578 }
1579#endif
1580
1581 if (!src_len) {
1582 return 0;
1583 }
1584
1585 if (ucs2_align(base_ptr, src, flags)) {
1586 src = (const void *)((const char *)src + 1);
1587 if (src_len != (size_t)-1)
1588 src_len--;
1589 }
1590
1591 if (flags & STR_TERMINATE) {
1592 /* src_len -1 is the default for null terminated strings. */
1593 if (src_len != (size_t)-1) {
1594 size_t len = strnlen_w((const smb_ucs2_t *)src,
1595 src_len/2);
1596 if (len < src_len/2)
1597 len++;
1598 src_len = len*2;
1599 } else {
1600 /*
1601 * src_len == -1 - alloc interface won't take this
1602 * so we must calculate.
1603 */
1604 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1605 }
1606 /* Ensure we don't use an insane length from the client. */
1607 if (src_len >= 1024*1024) {
1608 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1609 }
1610 } else {
1611 /* Can't have an unlimited length
1612 * non STR_TERMINATE'd.
1613 */
1614 if (src_len == (size_t)-1) {
1615 errno = EINVAL;
1616 return 0;
1617 }
1618 }
1619
1620 /* src_len != -1 here. */
1621
1622 /* ucs2 is always a multiple of 2 bytes */
1623 src_len &= ~1;
1624
1625 dest_len = convert_string_talloc(ctx,
1626 CH_UTF16LE,
1627 CH_UNIX,
1628 src,
1629 src_len,
1630 (void *)&dest,
1631 True);
1632 if (dest_len == (size_t)-1) {
1633 dest_len = 0;
1634 }
1635
1636 if (dest_len) {
1637 /* Did we already process the terminating zero ? */
1638 if (dest[dest_len-1] != 0) {
1639 size_t size = talloc_get_size(dest);
1640 /* Have we got space to append the '\0' ? */
1641 if (size <= dest_len) {
1642 /* No, realloc. */
1643 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1644 dest_len+1);
1645 if (!dest) {
1646 /* talloc fail. */
1647 dest_len = (size_t)-1;
1648 return 0;
1649 }
1650 }
1651 /* Yay - space ! */
1652 dest[dest_len] = '\0';
1653 dest_len++;
1654 }
1655 } else if (dest) {
1656 dest[0] = 0;
1657 }
1658
1659 *ppdest = dest;
1660 return src_len;
1661}
1662
1663size_t pull_ucs2_fstring(char *dest, const void *src)
1664{
1665 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1666}
1667
1668/**
1669 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1670 *
1671 * @param dest always set at least to NULL
1672 *
1673 * @returns The number of bytes occupied by the string in the destination
1674 **/
1675
1676size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1677{
1678 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1679 *dest = NULL;
1680 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1681}
1682
1683/**
1684 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1685 *
1686 * @param dest always set at least to NULL
1687 *
1688 * @returns The number of bytes occupied by the string in the destination
1689 **/
1690
1691size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1692{
1693 size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1694 *dest = NULL;
1695 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1696 (void **)dest, &dest_len, True))
1697 return (size_t)-1;
1698 else
1699 return dest_len;
1700}
1701
1702/**
1703 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1704 *
1705 * @param dest always set at least to NULL
1706 *
1707 * @returns The number of bytes occupied by the string in the destination
1708 **/
1709
1710size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1711{
1712 size_t src_len = strlen(src)+1;
1713 *dest = NULL;
1714 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1715}
1716
1717/**
1718 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1719 *
1720 * @param dest always set at least to NULL
1721 *
1722 * @returns The number of bytes occupied by the string in the destination
1723 **/
1724
1725size_t pull_utf8_allocate(char **dest, const char *src)
1726{
1727 size_t dest_len, src_len = strlen(src)+1;
1728 *dest = NULL;
1729 if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1730 (void **)dest, &dest_len, True))
1731 return (size_t)-1;
1732 else
1733 return dest_len;
1734}
1735
1736/**
1737 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1738 *
1739 * @param dest always set at least to NULL
1740 *
1741 * @returns The number of bytes occupied by the string in the destination
1742 **/
1743
1744size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1745{
1746 size_t src_len = strlen(src)+1;
1747 *dest = NULL;
1748 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1749}
1750
1751/**
1752 Copy a string from a char* src to a unicode or ascii
1753 dos codepage destination choosing unicode or ascii based on the
1754 flags in the SMB buffer starting at base_ptr.
1755 Return the number of bytes occupied by the string in the destination.
1756 flags can have:
1757 STR_TERMINATE means include the null termination.
1758 STR_UPPER means uppercase in the destination.
1759 STR_ASCII use ascii even with unicode packet.
1760 STR_NOALIGN means don't do alignment.
1761 dest_len is the maximum length allowed in the destination. If dest_len
1762 is -1 then no maxiumum is used.
1763**/
1764
1765size_t push_string_fn(const char *function, unsigned int line,
1766 const void *base_ptr, uint16 flags2,
1767 void *dest, const char *src,
1768 size_t dest_len, int flags)
1769{
1770#ifdef DEVELOPER
1771 /* We really need to zero fill here, not clobber
1772 * region, as we want to ensure that valgrind thinks
1773 * all of the outgoing buffer has been written to
1774 * so a send() or write() won't trap an error.
1775 * JRA.
1776 */
1777#if 0
1778 clobber_region(function, line, dest, dest_len);
1779#else
1780 memset(dest, '\0', dest_len);
1781#endif
1782#endif
1783
1784 if (!(flags & STR_ASCII) && \
1785 ((flags & STR_UNICODE || \
1786 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1787 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1788 }
1789 return push_ascii(dest, src, dest_len, flags);
1790}
1791
1792
1793/**
1794 Copy a string from a unicode or ascii source (depending on
1795 the packet flags) to a char* destination.
1796 Flags can have:
1797 STR_TERMINATE means the string in src is null terminated.
1798 STR_UNICODE means to force as unicode.
1799 STR_ASCII use ascii even with unicode packet.
1800 STR_NOALIGN means don't do alignment.
1801 if STR_TERMINATE is set then src_len is ignored is it is -1
1802 src_len is the length of the source area in bytes.
1803 Return the number of bytes occupied by the string in src.
1804 The resulting string in "dest" is always null terminated.
1805**/
1806
1807size_t pull_string_fn(const char *function,
1808 unsigned int line,
1809 const void *base_ptr,
1810 uint16 smb_flags2,
1811 char *dest,
1812 const void *src,
1813 size_t dest_len,
1814 size_t src_len,
1815 int flags)
1816{
1817#ifdef DEVELOPER
1818 clobber_region(function, line, dest, dest_len);
1819#endif
1820
1821 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1822 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1823 "UNICODE defined");
1824 }
1825
1826 if (!(flags & STR_ASCII) && \
1827 ((flags & STR_UNICODE || \
1828 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1829 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1830 }
1831 return pull_ascii(dest, src, dest_len, src_len, flags);
1832}
1833
1834/**
1835 Copy a string from a unicode or ascii source (depending on
1836 the packet flags) to a char* destination.
1837 Variant that uses talloc.
1838 Flags can have:
1839 STR_TERMINATE means the string in src is null terminated.
1840 STR_UNICODE means to force as unicode.
1841 STR_ASCII use ascii even with unicode packet.
1842 STR_NOALIGN means don't do alignment.
1843 if STR_TERMINATE is set then src_len is ignored is it is -1
1844 src_len is the length of the source area in bytes.
1845 Return the number of bytes occupied by the string in src.
1846 The resulting string in "dest" is always null terminated.
1847**/
1848
1849size_t pull_string_talloc_fn(const char *function,
1850 unsigned int line,
1851 TALLOC_CTX *ctx,
1852 const void *base_ptr,
1853 uint16 smb_flags2,
1854 char **ppdest,
1855 const void *src,
1856 size_t src_len,
1857 int flags)
1858{
1859 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1860 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1861 "UNICODE defined");
1862 }
1863
1864 if (!(flags & STR_ASCII) && \
1865 ((flags & STR_UNICODE || \
1866 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1867 return pull_ucs2_base_talloc(ctx,
1868 base_ptr,
1869 ppdest,
1870 src,
1871 src_len,
1872 flags);
1873 }
1874 return pull_ascii_base_talloc(ctx,
1875 ppdest,
1876 src,
1877 src_len,
1878 flags);
1879}
1880
1881
1882size_t align_string(const void *base_ptr, const char *p, int flags)
1883{
1884 if (!(flags & STR_ASCII) && \
1885 ((flags & STR_UNICODE || \
1886 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1887 return ucs2_align(base_ptr, p, flags);
1888 }
1889 return 0;
1890}
1891
1892/*
1893 Return the unicode codepoint for the next multi-byte CH_UNIX character
1894 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1895
1896 Also return the number of bytes consumed (which tells the caller
1897 how many bytes to skip to get to the next CH_UNIX character).
1898
1899 Return INVALID_CODEPOINT if the next character cannot be converted.
1900*/
1901
1902codepoint_t next_codepoint(const char *str, size_t *size)
1903{
1904 /* It cannot occupy more than 4 bytes in UTF16 format */
1905 uint8_t buf[4];
1906 smb_iconv_t descriptor;
1907 size_t ilen_orig;
1908 size_t ilen;
1909 size_t olen;
1910 char *outbuf;
1911
1912 if ((str[0] & 0x80) == 0) {
1913 *size = 1;
1914 return (codepoint_t)str[0];
1915 }
1916
1917 /* We assume that no multi-byte character can take
1918 more than 5 bytes. This is OK as we only
1919 support codepoints up to 1M */
1920
1921 ilen_orig = strnlen(str, 5);
1922 ilen = ilen_orig;
1923
1924 lazy_initialize_conv();
1925
1926 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1927 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1928 *size = 1;
1929 return INVALID_CODEPOINT;
1930 }
1931
1932 /* This looks a little strange, but it is needed to cope
1933 with codepoints above 64k which are encoded as per RFC2781. */
1934 olen = 2;
1935 outbuf = (char *)buf;
1936 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1937 if (olen == 2) {
1938 /* We failed to convert to a 2 byte character.
1939 See if we can convert to a 4 UTF16-LE byte char encoding.
1940 */
1941 olen = 4;
1942 outbuf = (char *)buf;
1943 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1944 if (olen == 4) {
1945 /* We didn't convert any bytes */
1946 *size = 1;
1947 return INVALID_CODEPOINT;
1948 }
1949 olen = 4 - olen;
1950 } else {
1951 olen = 2 - olen;
1952 }
1953
1954 *size = ilen_orig - ilen;
1955
1956 if (olen == 2) {
1957 /* 2 byte, UTF16-LE encoded value. */
1958 return (codepoint_t)SVAL(buf, 0);
1959 }
1960 if (olen == 4) {
1961 /* Decode a 4 byte UTF16-LE character manually.
1962 See RFC2871 for the encoding machanism.
1963 */
1964 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1965 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1966
1967 return (codepoint_t)0x10000 +
1968 (w1 << 10) + w2;
1969 }
1970
1971 /* no other length is valid */
1972 return INVALID_CODEPOINT;
1973}
Note: See TracBrowser for help on using the repository browser.