source: branches/samba-3.3.x/source/lib/charcnv.c@ 285

Last change on this file since 285 was 239, checked in by Herwig Bauernfeind, 16 years ago

Fix for Ticket #85 (by diver) in 3.3 branch

File size: 51.6 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29 return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58#ifndef __OS2__
59 if (ch == CH_UTF16LE) ret = "UTF-16LE";
60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61#else
62 if (ch == CH_UTF16LE) ret = "IBM-1200";
63 else if (ch == CH_UTF16BE) ret = "IBM-1200";
64#endif
65 else if (ch == CH_UNIX) ret = lp_unix_charset();
66 else if (ch == CH_DOS) ret = lp_dos_charset();
67 else if (ch == CH_DISPLAY) ret = lp_display_charset();
68 else if (ch == CH_UTF8) ret = "UTF8";
69
70#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71 if (ret && !strcmp(ret, "LOCALE")) {
72 const char *ln = NULL;
73
74#ifdef HAVE_SETLOCALE
75 setlocale(LC_ALL, "");
76#endif
77 ln = nl_langinfo(CODESET);
78 if (ln) {
79 /* Check whether the charset name is supported
80 by iconv */
81 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82 if (handle == (smb_iconv_t) -1) {
83 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84 ln = NULL;
85 } else {
86 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87 smb_iconv_close(handle);
88 }
89 }
90 ret = ln;
91 }
92#endif
93
94 if (!ret || !*ret) ret = "ASCII";
95 DEBUG(10, ("codepage: %s\n",ret));
96 return ret;
97}
98
99void lazy_initialize_conv(void)
100{
101 if (!initialized) {
102 load_case_tables();
103 init_iconv();
104 initialized = true;
105 }
106}
107
108/**
109 * Destroy global objects allocated by init_iconv()
110 **/
111void gfree_charcnv(void)
112{
113 int c1, c2;
114
115 for (c1=0;c1<NUM_CHARSETS;c1++) {
116 for (c2=0;c2<NUM_CHARSETS;c2++) {
117 if ( conv_handles[c1][c2] ) {
118 smb_iconv_close( conv_handles[c1][c2] );
119 conv_handles[c1][c2] = 0;
120 }
121 }
122 }
123 initialized = false;
124}
125
126/**
127 * Initialize iconv conversion descriptors.
128 *
129 * This is called the first time it is needed, and also called again
130 * every time the configuration is reloaded, because the charset or
131 * codepage might have changed.
132 **/
133void init_iconv(void)
134{
135 int c1, c2;
136 bool did_reload = False;
137
138 /* so that charset_name() works we need to get the UNIX<->UCS2 going
139 first */
140 if (!conv_handles[CH_UNIX][CH_UTF16LE])
141 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
142
143 if (!conv_handles[CH_UTF16LE][CH_UNIX])
144 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
145
146 for (c1=0;c1<NUM_CHARSETS;c1++) {
147 for (c2=0;c2<NUM_CHARSETS;c2++) {
148 const char *n1 = charset_name((charset_t)c1);
149 const char *n2 = charset_name((charset_t)c2);
150 if (conv_handles[c1][c2] &&
151 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
152 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
153 continue;
154
155 did_reload = True;
156
157 if (conv_handles[c1][c2])
158 smb_iconv_close(conv_handles[c1][c2]);
159
160 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
161 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
162 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
163 charset_name((charset_t)c1), charset_name((charset_t)c2)));
164 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
165 n1 = "ASCII";
166 }
167 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
168 n2 = "ASCII";
169 }
170 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
171 n1, n2 ));
172 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
173 if (!conv_handles[c1][c2]) {
174 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
175 smb_panic("init_iconv: conv_handle initialization failed");
176 }
177 }
178 }
179 }
180
181 if (did_reload) {
182 /* XXX: Does this really get called every time the dos
183 * codepage changes? */
184 /* XXX: Is the did_reload test too strict? */
185 conv_silent = True;
186 init_valid_table();
187 conv_silent = False;
188 }
189}
190
191/**
192 * Convert string from one encoding to another, making error checking etc
193 * Slow path version - uses (slow) iconv.
194 *
195 * @param src pointer to source string (multibyte or singlebyte)
196 * @param srclen length of the source string in bytes
197 * @param dest pointer to destination string (multibyte or singlebyte)
198 * @param destlen maximal length allowed for string
199 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
200 * @returns the number of bytes occupied in the destination
201 *
202 * Ensure the srclen contains the terminating zero.
203 *
204 **/
205
206static size_t convert_string_internal(charset_t from, charset_t to,
207 void const *src, size_t srclen,
208 void *dest, size_t destlen, bool allow_bad_conv)
209{
210 size_t i_len, o_len;
211 size_t retval;
212 const char* inbuf = (const char*)src;
213 char* outbuf = (char*)dest;
214 smb_iconv_t descriptor;
215
216 lazy_initialize_conv();
217
218 descriptor = conv_handles[from][to];
219
220 if (srclen == (size_t)-1) {
221 if (from == CH_UTF16LE || from == CH_UTF16BE) {
222 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
223 } else {
224 srclen = strlen((const char *)src)+1;
225 }
226 }
227
228
229 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
230 if (!conv_silent)
231 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
232 return (size_t)-1;
233 }
234
235 i_len=srclen;
236 o_len=destlen;
237
238 again:
239
240 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
241 if(retval==(size_t)-1) {
242 const char *reason="unknown error";
243 switch(errno) {
244 case EINVAL:
245 reason="Incomplete multibyte sequence";
246 if (!conv_silent)
247 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
248 if (allow_bad_conv)
249 goto use_as_is;
250 return (size_t)-1;
251 case E2BIG:
252 reason="No more room";
253 if (!conv_silent) {
254 if (from == CH_UNIX) {
255 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
256 charset_name(from), charset_name(to),
257 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
258 } else {
259 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
260 charset_name(from), charset_name(to),
261 (unsigned int)srclen, (unsigned int)destlen));
262 }
263 }
264 break;
265 case EILSEQ:
266 reason="Illegal multibyte sequence";
267 if (!conv_silent)
268 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
269 if (allow_bad_conv)
270 goto use_as_is;
271
272 return (size_t)-1;
273 default:
274 if (!conv_silent)
275 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
276 return (size_t)-1;
277 }
278 /* smb_panic(reason); */
279 }
280 return destlen-o_len;
281
282 use_as_is:
283
284 /*
285 * Conversion not supported. This is actually an error, but there are so
286 * many misconfigured iconv systems and smb.conf's out there we can't just
287 * fail. Do a very bad conversion instead.... JRA.
288 */
289
290 {
291 if (o_len == 0 || i_len == 0)
292 return destlen - o_len;
293
294 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
295 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
296 /* Can't convert from utf16 any endian to multibyte.
297 Replace with the default fail char.
298 */
299 if (i_len < 2)
300 return destlen - o_len;
301 if (i_len >= 2) {
302 *outbuf = lp_failed_convert_char();
303
304 outbuf++;
305 o_len--;
306
307 inbuf += 2;
308 i_len -= 2;
309 }
310
311 if (o_len == 0 || i_len == 0)
312 return destlen - o_len;
313
314 /* Keep trying with the next char... */
315 goto again;
316
317 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
318 /* Can't convert to UTF16LE - just widen by adding the
319 default fail char then zero.
320 */
321 if (o_len < 2)
322 return destlen - o_len;
323
324 outbuf[0] = lp_failed_convert_char();
325 outbuf[1] = '\0';
326
327 inbuf++;
328 i_len--;
329
330 outbuf += 2;
331 o_len -= 2;
332
333 if (o_len == 0 || i_len == 0)
334 return destlen - o_len;
335
336 /* Keep trying with the next char... */
337 goto again;
338
339 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
340 to != CH_UTF16LE && to != CH_UTF16BE) {
341 /* Failed multibyte to multibyte. Just copy the default fail char and
342 try again. */
343 outbuf[0] = lp_failed_convert_char();
344
345 inbuf++;
346 i_len--;
347
348 outbuf++;
349 o_len--;
350
351 if (o_len == 0 || i_len == 0)
352 return destlen - o_len;
353
354 /* Keep trying with the next char... */
355 goto again;
356
357 } else {
358 /* Keep compiler happy.... */
359 return destlen - o_len;
360 }
361 }
362}
363
364/**
365 * Convert string from one encoding to another, making error checking etc
366 * Fast path version - handles ASCII first.
367 *
368 * @param src pointer to source string (multibyte or singlebyte)
369 * @param srclen length of the source string in bytes, or -1 for nul terminated.
370 * @param dest pointer to destination string (multibyte or singlebyte)
371 * @param destlen maximal length allowed for string - *NEVER* -1.
372 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
373 * @returns the number of bytes occupied in the destination
374 *
375 * Ensure the srclen contains the terminating zero.
376 *
377 * This function has been hand-tuned to provide a fast path.
378 * Don't change unless you really know what you are doing. JRA.
379 **/
380
381size_t convert_string(charset_t from, charset_t to,
382 void const *src, size_t srclen,
383 void *dest, size_t destlen, bool allow_bad_conv)
384{
385 /*
386 * NB. We deliberately don't do a strlen here if srclen == -1.
387 * This is very expensive over millions of calls and is taken
388 * care of in the slow path in convert_string_internal. JRA.
389 */
390
391#ifdef DEVELOPER
392 SMB_ASSERT(destlen != (size_t)-1);
393#endif
394
395 if (srclen == 0)
396 return 0;
397
398// DEBUG(10, ("convert_string: 1"));
399
400 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
401 const unsigned char *p = (const unsigned char *)src;
402 unsigned char *q = (unsigned char *)dest;
403 size_t slen = srclen;
404 size_t dlen = destlen;
405 unsigned char lastp = '\0';
406 size_t retval = 0;
407
408// DEBUG(10, ("convert_string: 2"));
409
410 /* If all characters are ascii, fast path here. */
411 while (slen && dlen) {
412 if ((lastp = *p) <= 0x7f) {
413 *q++ = *p++;
414 if (slen != (size_t)-1) {
415 slen--;
416 }
417 dlen--;
418 retval++;
419 if (!lastp)
420 break;
421 } else {
422#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
423 goto general_case;
424#else
425 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
426 if (ret == (size_t)-1) {
427 return ret;
428 }
429 return retval + ret;
430#endif
431 }
432 }
433 if (!dlen) {
434 /* Even if we fast path we should note if we ran out of room. */
435 if (((slen != (size_t)-1) && slen) ||
436 ((slen == (size_t)-1) && lastp)) {
437 errno = E2BIG;
438 }
439 }
440 return retval;
441// DEBUG(10, ("convert_string: 3"));
442
443 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
444 const unsigned char *p = (const unsigned char *)src;
445 unsigned char *q = (unsigned char *)dest;
446 size_t retval = 0;
447 size_t slen = srclen;
448 size_t dlen = destlen;
449 unsigned char lastp = '\0';
450
451 /* If all characters are ascii, fast path here. */
452 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
453 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
454 *q++ = *p;
455 if (slen != (size_t)-1) {
456 slen -= 2;
457 }
458 p += 2;
459 dlen--;
460 retval++;
461 if (!lastp)
462 break;
463 } else {
464#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
465 goto general_case;
466#else
467 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
468 if (ret == (size_t)-1) {
469 return ret;
470 }
471 return retval + ret;
472#endif
473 }
474 }
475 if (!dlen) {
476 /* Even if we fast path we should note if we ran out of room. */
477 if (((slen != (size_t)-1) && slen) ||
478 ((slen == (size_t)-1) && lastp)) {
479 errno = E2BIG;
480 }
481 }
482 return retval;
483// DEBUG(10, ("convert_string: 4"));
484
485 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
486 const unsigned char *p = (const unsigned char *)src;
487 unsigned char *q = (unsigned char *)dest;
488 size_t retval = 0;
489 size_t slen = srclen;
490 size_t dlen = destlen;
491 unsigned char lastp = '\0';
492
493 /* If all characters are ascii, fast path here. */
494 while (slen && (dlen >= 2)) {
495 if ((lastp = *p) <= 0x7F) {
496 *q++ = *p++;
497 *q++ = '\0';
498 if (slen != (size_t)-1) {
499 slen--;
500 }
501 dlen -= 2;
502 retval += 2;
503 if (!lastp)
504 break;
505 } else {
506#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
507 goto general_case;
508#else
509 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
510 if (ret == (size_t)-1) {
511 return ret;
512 }
513 return retval + ret;
514#endif
515 }
516 }
517 if (!dlen) {
518 /* Even if we fast path we should note if we ran out of room. */
519 if (((slen != (size_t)-1) && slen) ||
520 ((slen == (size_t)-1) && lastp)) {
521 errno = E2BIG;
522 }
523 }
524 return retval;
525 }
526
527#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
528 general_case:
529#endif
530 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
531}
532
533/**
534 * Convert between character sets, allocating a new buffer for the result.
535 *
536 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
537 * (this is a bad interface and needs fixing. JRA).
538 * @param srclen length of source buffer.
539 * @param dest always set at least to NULL
540 * @param converted_size set to the size of the allocated buffer on return
541 * true
542 * @note -1 is not accepted for srclen.
543 *
544 * @return true if new buffer was correctly allocated, and string was
545 * converted.
546 *
547 * Ensure the srclen contains the terminating zero.
548 *
549 * I hate the goto's in this function. It's embarressing.....
550 * There has to be a cleaner way to do this. JRA.
551 **/
552
553bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
554 void const *src, size_t srclen, void *dst,
555 size_t *converted_size, bool allow_bad_conv)
556{
557 size_t i_len, o_len, destlen = (srclen * 3) / 2;
558 size_t retval;
559 const char *inbuf = (const char *)src;
560 char *outbuf = NULL, *ob = NULL;
561 smb_iconv_t descriptor;
562 void **dest = (void **)dst;
563
564 *dest = NULL;
565
566 if (!converted_size) {
567 errno = EINVAL;
568 return false;
569 }
570
571 if (src == NULL || srclen == (size_t)-1) {
572 errno = EINVAL;
573 return false;
574 }
575 if (srclen == 0) {
576 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
577 if (ob == NULL) {
578 errno = ENOMEM;
579 return false;
580 }
581 *dest = ob;
582 *converted_size = 0;
583 return true;
584 }
585
586 lazy_initialize_conv();
587
588 descriptor = conv_handles[from][to];
589
590 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
591 if (!conv_silent)
592 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
593 errno = EOPNOTSUPP;
594 return false;
595 }
596
597 convert:
598
599 /* +2 is for ucs2 null termination. */
600 if ((destlen*2)+2 < destlen) {
601 /* wrapped ! abort. */
602 if (!conv_silent)
603 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
604 if (!ctx)
605 SAFE_FREE(outbuf);
606 errno = EOPNOTSUPP;
607 return false;
608 } else {
609 destlen = destlen * 2;
610 }
611
612 /* +2 is for ucs2 null termination. */
613 if (ctx) {
614 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
615 } else {
616 ob = (char *)SMB_REALLOC(ob, destlen + 2);
617 }
618
619 if (!ob) {
620 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
621 errno = ENOMEM;
622 return false;
623 }
624 outbuf = ob;
625 i_len = srclen;
626 o_len = destlen;
627
628 again:
629 DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
630 charset_name(from), charset_name(to),
631 (unsigned int)srclen, (unsigned int)destlen));
632
633 retval = smb_iconv(descriptor,
634 &inbuf, &i_len,
635 &outbuf, &o_len);
636 if(retval == (size_t)-1) {
637 const char *reason="unknown error";
638 switch(errno) {
639 case EINVAL:
640 reason="Incomplete multibyte sequence";
641 if (!conv_silent)
642 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
643 if (allow_bad_conv)
644 goto use_as_is;
645 break;
646 case E2BIG:
647 goto convert;
648 case EILSEQ:
649 reason="Illegal multibyte sequence";
650 if (!conv_silent)
651 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
652 if (allow_bad_conv)
653 goto use_as_is;
654 break;
655 }
656 if (!conv_silent)
657 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
658 /* smb_panic(reason); */
659 if (ctx) {
660 TALLOC_FREE(ob);
661 } else {
662 SAFE_FREE(ob);
663 }
664 return false;
665 }
666
667 out:
668
669 destlen = destlen - o_len;
670 /* Don't shrink unless we're reclaiming a lot of
671 * space. This is in the hot codepath and these
672 * reallocs *cost*. JRA.
673 */
674 if (o_len > 1024) {
675 /* We're shrinking here so we know the +2 is safe from wrap. */
676 if (ctx) {
677 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
678 } else {
679 ob = (char *)SMB_REALLOC(ob,destlen + 2);
680 }
681 }
682
683 if (destlen && !ob) {
684 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
685 errno = ENOMEM;
686 return false;
687 }
688
689 *dest = ob;
690
691 /* Must ucs2 null terminate in the extra space we allocated. */
692 ob[destlen] = '\0';
693 ob[destlen+1] = '\0';
694
695 *converted_size = destlen;
696 return true;
697
698 use_as_is:
699
700 /*
701 * Conversion not supported. This is actually an error, but there are so
702 * many misconfigured iconv systems and smb.conf's out there we can't just
703 * fail. Do a very bad conversion instead.... JRA.
704 */
705
706 {
707 if (o_len == 0 || i_len == 0)
708 goto out;
709
710 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
711 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
712 /* Can't convert from utf16 any endian to multibyte.
713 Replace with the default fail char.
714 */
715
716 if (i_len < 2)
717 goto out;
718
719 if (i_len >= 2) {
720 *outbuf = lp_failed_convert_char();
721
722 outbuf++;
723 o_len--;
724
725 inbuf += 2;
726 i_len -= 2;
727 }
728
729 if (o_len == 0 || i_len == 0)
730 goto out;
731
732 /* Keep trying with the next char... */
733 goto again;
734
735 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
736 /* Can't convert to UTF16LE - just widen by adding the
737 default fail char then zero.
738 */
739 if (o_len < 2)
740 goto out;
741
742 outbuf[0] = lp_failed_convert_char();
743 outbuf[1] = '\0';
744
745 inbuf++;
746 i_len--;
747
748 outbuf += 2;
749 o_len -= 2;
750
751 if (o_len == 0 || i_len == 0)
752 goto out;
753
754 /* Keep trying with the next char... */
755 goto again;
756
757 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
758 to != CH_UTF16LE && to != CH_UTF16BE) {
759 /* Failed multibyte to multibyte. Just copy the default fail char and
760 try again. */
761 outbuf[0] = lp_failed_convert_char();
762
763 inbuf++;
764 i_len--;
765
766 outbuf++;
767 o_len--;
768
769 if (o_len == 0 || i_len == 0)
770 goto out;
771
772 /* Keep trying with the next char... */
773 goto again;
774
775 } else {
776 /* Keep compiler happy.... */
777 goto out;
778 }
779 }
780}
781
782/**
783 * Convert between character sets, allocating a new buffer using talloc for the result.
784 *
785 * @param srclen length of source buffer.
786 * @param dest always set at least to NULL
787 * @parm converted_size set to the number of bytes occupied by the string in
788 * the destination on success.
789 * @note -1 is not accepted for srclen.
790 *
791 * @return true if new buffer was correctly allocated, and string was
792 * converted.
793 */
794bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
795 void const *src, size_t srclen, void *dst,
796 size_t *converted_size, bool allow_bad_conv)
797{
798 void **dest = (void **)dst;
799
800 *dest = NULL;
801 return convert_string_allocate(ctx, from, to, src, srclen, dest,
802 converted_size, allow_bad_conv);
803}
804
805size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
806{
807 size_t size;
808 smb_ucs2_t *buffer;
809
810 if (!push_ucs2_allocate(&buffer, src, &size)) {
811 return (size_t)-1;
812 }
813
814 if (!strupper_w(buffer) && (dest == src)) {
815 free(buffer);
816 return srclen;
817 }
818
819 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
820 free(buffer);
821 return size;
822}
823
824/**
825 strdup() a unix string to upper case.
826**/
827
828char *strdup_upper(const char *s)
829{
830 char *out_buffer = SMB_STRDUP(s);
831 const unsigned char *p = (const unsigned char *)s;
832 unsigned char *q = (unsigned char *)out_buffer;
833
834 if (!q) {
835 return NULL;
836 }
837
838 /* this is quite a common operation, so we want it to be
839 fast. We optimise for the ascii case, knowing that all our
840 supported multi-byte character sets are ascii-compatible
841 (ie. they match for the first 128 chars) */
842
843 while (*p) {
844 if (*p & 0x80)
845 break;
846 *q++ = toupper_ascii_fast(*p);
847 p++;
848 }
849
850 if (*p) {
851 /* MB case. */
852 size_t converted_size, converted_size2;
853 smb_ucs2_t *buffer = NULL;
854
855 SAFE_FREE(out_buffer);
856 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
857 strlen(s) + 1,
858 (void **)(void *)&buffer,
859 &converted_size, True))
860 {
861 return NULL;
862 }
863
864 strupper_w(buffer);
865
866 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
867 converted_size,
868 (void **)(void *)&out_buffer,
869 &converted_size2, True))
870 {
871 TALLOC_FREE(buffer);
872 return NULL;
873 }
874
875 /* Don't need the intermediate buffer
876 * anymore.
877 */
878 TALLOC_FREE(buffer);
879 }
880
881 return out_buffer;
882}
883
884/**
885 talloc_strdup() a unix string to upper case.
886**/
887
888char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
889{
890 char *out_buffer = talloc_strdup(ctx,s);
891 const unsigned char *p = (const unsigned char *)s;
892 unsigned char *q = (unsigned char *)out_buffer;
893
894 if (!q) {
895 return NULL;
896 }
897
898 /* this is quite a common operation, so we want it to be
899 fast. We optimise for the ascii case, knowing that all our
900 supported multi-byte character sets are ascii-compatible
901 (ie. they match for the first 128 chars) */
902
903 while (*p) {
904 if (*p & 0x80)
905 break;
906 *q++ = toupper_ascii_fast(*p);
907 p++;
908 }
909
910 if (*p) {
911 /* MB case. */
912 size_t converted_size, converted_size2;
913 smb_ucs2_t *ubuf = NULL;
914
915 /* We're not using the ascii buffer above. */
916 TALLOC_FREE(out_buffer);
917
918 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
919 strlen(s)+1, (void *)&ubuf,
920 &converted_size, True))
921 {
922 return NULL;
923 }
924
925 strupper_w(ubuf);
926
927 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
928 converted_size, (void *)&out_buffer,
929 &converted_size2, True))
930 {
931 TALLOC_FREE(ubuf);
932 return NULL;
933 }
934
935 /* Don't need the intermediate buffer
936 * anymore.
937 */
938 TALLOC_FREE(ubuf);
939 }
940
941 return out_buffer;
942}
943
944size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
945{
946 size_t size;
947 smb_ucs2_t *buffer = NULL;
948
949 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
950 (void **)(void *)&buffer, &size,
951 True))
952 {
953 smb_panic("failed to create UCS2 buffer");
954 }
955 if (!strlower_w(buffer) && (dest == src)) {
956 SAFE_FREE(buffer);
957 return srclen;
958 }
959 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
960 SAFE_FREE(buffer);
961 return size;
962}
963
964/**
965 strdup() a unix string to lower case.
966**/
967
968char *strdup_lower(const char *s)
969{
970 size_t converted_size;
971 smb_ucs2_t *buffer = NULL;
972 char *out_buffer;
973
974 if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
975 return NULL;
976 }
977
978 strlower_w(buffer);
979
980 if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
981 SAFE_FREE(buffer);
982 return NULL;
983 }
984
985 SAFE_FREE(buffer);
986
987 return out_buffer;
988}
989
990char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
991{
992 size_t converted_size;
993 smb_ucs2_t *buffer = NULL;
994 char *out_buffer;
995
996 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
997 return NULL;
998 }
999
1000 strlower_w(buffer);
1001
1002 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
1003 TALLOC_FREE(buffer);
1004 return NULL;
1005 }
1006
1007 TALLOC_FREE(buffer);
1008
1009 return out_buffer;
1010}
1011
1012
1013size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1014{
1015 if (flags & (STR_NOALIGN|STR_ASCII))
1016 return 0;
1017 return PTR_DIFF(p, base_ptr) & 1;
1018}
1019
1020
1021/**
1022 * Copy a string from a char* unix src to a dos codepage string destination.
1023 *
1024 * @return the number of bytes occupied by the string in the destination.
1025 *
1026 * @param flags can include
1027 * <dl>
1028 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1029 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1030 * </dl>
1031 *
1032 * @param dest_len the maximum length in bytes allowed in the
1033 * destination.
1034 **/
1035size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1036{
1037 size_t src_len = strlen(src);
1038 char *tmpbuf = NULL;
1039 size_t ret;
1040
1041 /* No longer allow a length of -1. */
1042 if (dest_len == (size_t)-1) {
1043 smb_panic("push_ascii - dest_len == -1");
1044 }
1045
1046 if (flags & STR_UPPER) {
1047 tmpbuf = SMB_STRDUP(src);
1048 if (!tmpbuf) {
1049 smb_panic("malloc fail");
1050 }
1051 strupper_m(tmpbuf);
1052 src = tmpbuf;
1053 }
1054
1055 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1056 src_len++;
1057 }
1058
1059 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1060 if (ret == (size_t)-1 &&
1061 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1062 && dest_len > 0) {
1063 ((char *)dest)[0] = '\0';
1064 }
1065 SAFE_FREE(tmpbuf);
1066 return ret;
1067}
1068
1069size_t push_ascii_fstring(void *dest, const char *src)
1070{
1071 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1072}
1073
1074/********************************************************************
1075 Push an nstring - ensure null terminated. Written by
1076 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1077********************************************************************/
1078
1079size_t push_ascii_nstring(void *dest, const char *src)
1080{
1081 size_t i, buffer_len, dest_len;
1082 smb_ucs2_t *buffer;
1083
1084 conv_silent = True;
1085 if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1086 smb_panic("failed to create UCS2 buffer");
1087 }
1088
1089 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1090 buffer_len /= sizeof(smb_ucs2_t);
1091
1092 dest_len = 0;
1093 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1094 unsigned char mb[10];
1095 /* Convert one smb_ucs2_t character at a time. */
1096 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1097 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1098 memcpy((char *)dest + dest_len, mb, mb_len);
1099 dest_len += mb_len;
1100 } else {
1101 errno = E2BIG;
1102 break;
1103 }
1104 }
1105 ((char *)dest)[dest_len] = '\0';
1106
1107 SAFE_FREE(buffer);
1108 conv_silent = False;
1109 return dest_len;
1110}
1111
1112/********************************************************************
1113 Push and malloc an ascii string. src and dest null terminated.
1114********************************************************************/
1115
1116bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1117{
1118 size_t src_len = strlen(src)+1;
1119
1120 *dest = NULL;
1121 return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1122 (void **)dest, converted_size, True);
1123}
1124
1125/**
1126 * Copy a string from a dos codepage source to a unix char* destination.
1127 *
1128 * The resulting string in "dest" is always null terminated.
1129 *
1130 * @param flags can have:
1131 * <dl>
1132 * <dt>STR_TERMINATE</dt>
1133 * <dd>STR_TERMINATE means the string in @p src
1134 * is null terminated, and src_len is ignored.</dd>
1135 * </dl>
1136 *
1137 * @param src_len is the length of the source area in bytes.
1138 * @returns the number of bytes occupied by the string in @p src.
1139 **/
1140size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1141{
1142 size_t ret;
1143
1144 if (dest_len == (size_t)-1) {
1145 /* No longer allow dest_len of -1. */
1146 smb_panic("pull_ascii - invalid dest_len of -1");
1147 }
1148
1149 if (flags & STR_TERMINATE) {
1150 if (src_len == (size_t)-1) {
1151 src_len = strlen((const char *)src) + 1;
1152 } else {
1153 size_t len = strnlen((const char *)src, src_len);
1154 if (len < src_len)
1155 len++;
1156 src_len = len;
1157 }
1158 }
1159
1160 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1161 if (ret == (size_t)-1) {
1162 ret = 0;
1163 dest_len = 0;
1164 }
1165
1166 if (dest_len && ret) {
1167 /* Did we already process the terminating zero ? */
1168 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1169 dest[MIN(ret, dest_len-1)] = 0;
1170 }
1171 } else {
1172 dest[0] = 0;
1173 }
1174
1175 return src_len;
1176}
1177
1178/**
1179 * Copy a string from a dos codepage source to a unix char* destination.
1180 Talloc version.
1181 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1182 needs fixing. JRA).
1183 *
1184 * The resulting string in "dest" is always null terminated.
1185 *
1186 * @param flags can have:
1187 * <dl>
1188 * <dt>STR_TERMINATE</dt>
1189 * <dd>STR_TERMINATE means the string in @p src
1190 * is null terminated, and src_len is ignored.</dd>
1191 * </dl>
1192 *
1193 * @param src_len is the length of the source area in bytes.
1194 * @returns the number of bytes occupied by the string in @p src.
1195 **/
1196
1197static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1198 char **ppdest,
1199 const void *src,
1200 size_t src_len,
1201 int flags)
1202{
1203 char *dest = NULL;
1204 size_t dest_len;
1205
1206#ifdef DEVELOPER
1207 /* Ensure we never use the braindead "malloc" varient. */
1208 if (ctx == NULL) {
1209 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1210 }
1211#endif
1212
1213 *ppdest = NULL;
1214
1215 if (!src_len) {
1216 return 0;
1217 }
1218
1219 if (flags & STR_TERMINATE) {
1220 if (src_len == (size_t)-1) {
1221 src_len = strlen((const char *)src) + 1;
1222 } else {
1223 size_t len = strnlen((const char *)src, src_len);
1224 if (len < src_len)
1225 len++;
1226 src_len = len;
1227 }
1228 /* Ensure we don't use an insane length from the client. */
1229 if (src_len >= 1024*1024) {
1230 char *msg = talloc_asprintf(ctx,
1231 "Bad src length (%u) in "
1232 "pull_ascii_base_talloc",
1233 (unsigned int)src_len);
1234 smb_panic(msg);
1235 }
1236 } else {
1237 /* Can't have an unlimited length
1238 * non STR_TERMINATE'd.
1239 */
1240 if (src_len == (size_t)-1) {
1241 errno = EINVAL;
1242 return 0;
1243 }
1244 }
1245
1246 /* src_len != -1 here. */
1247
1248 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1249 &dest_len, True)) {
1250 dest_len = 0;
1251 }
1252
1253 if (dest_len && dest) {
1254 /* Did we already process the terminating zero ? */
1255 if (dest[dest_len-1] != 0) {
1256 size_t size = talloc_get_size(dest);
1257 /* Have we got space to append the '\0' ? */
1258 if (size <= dest_len) {
1259 /* No, realloc. */
1260 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1261 dest_len+1);
1262 if (!dest) {
1263 /* talloc fail. */
1264 dest_len = (size_t)-1;
1265 return 0;
1266 }
1267 }
1268 /* Yay - space ! */
1269 dest[dest_len] = '\0';
1270 dest_len++;
1271 }
1272 } else if (dest) {
1273 dest[0] = 0;
1274 }
1275
1276 *ppdest = dest;
1277 return src_len;
1278}
1279
1280size_t pull_ascii_fstring(char *dest, const void *src)
1281{
1282 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1283}
1284
1285/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1286
1287size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1288{
1289 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1290}
1291
1292/**
1293 * Copy a string from a char* src to a unicode destination.
1294 *
1295 * @returns the number of bytes occupied by the string in the destination.
1296 *
1297 * @param flags can have:
1298 *
1299 * <dl>
1300 * <dt>STR_TERMINATE <dd>means include the null termination.
1301 * <dt>STR_UPPER <dd>means uppercase in the destination.
1302 * <dt>STR_NOALIGN <dd>means don't do alignment.
1303 * </dl>
1304 *
1305 * @param dest_len is the maximum length allowed in the
1306 * destination.
1307 **/
1308
1309size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1310{
1311 size_t len=0;
1312 size_t src_len;
1313 size_t ret;
1314
1315 if (dest_len == (size_t)-1) {
1316 /* No longer allow dest_len of -1. */
1317 smb_panic("push_ucs2 - invalid dest_len of -1");
1318 }
1319
1320 if (flags & STR_TERMINATE)
1321 src_len = (size_t)-1;
1322 else
1323 src_len = strlen(src);
1324
1325 if (ucs2_align(base_ptr, dest, flags)) {
1326 *(char *)dest = 0;
1327 dest = (void *)((char *)dest + 1);
1328 if (dest_len)
1329 dest_len--;
1330 len++;
1331 }
1332
1333 /* ucs2 is always a multiple of 2 bytes */
1334 dest_len &= ~1;
1335
1336 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1337 if (ret == (size_t)-1) {
1338 if ((flags & STR_TERMINATE) &&
1339 dest &&
1340 dest_len) {
1341 *(char *)dest = 0;
1342 }
1343 return len;
1344 }
1345
1346 len += ret;
1347
1348 if (flags & STR_UPPER) {
1349 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1350 size_t i;
1351
1352 /* We check for i < (ret / 2) below as the dest string isn't null
1353 terminated if STR_TERMINATE isn't set. */
1354
1355 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1356 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1357 if (v != dest_ucs2[i]) {
1358 dest_ucs2[i] = v;
1359 }
1360 }
1361 }
1362
1363 return len;
1364}
1365
1366
1367/**
1368 * Copy a string from a unix char* src to a UCS2 destination,
1369 * allocating a buffer using talloc().
1370 *
1371 * @param dest always set at least to NULL
1372 * @parm converted_size set to the number of bytes occupied by the string in
1373 * the destination on success.
1374 *
1375 * @return true if new buffer was correctly allocated, and string was
1376 * converted.
1377 **/
1378bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1379 size_t *converted_size)
1380{
1381 size_t src_len = strlen(src)+1;
1382
1383 *dest = NULL;
1384 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1385 (void **)dest, converted_size, True);
1386}
1387
1388
1389/**
1390 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1391 *
1392 * @param dest always set at least to NULL
1393 * @parm converted_size set to the number of bytes occupied by the string in
1394 * the destination on success.
1395 *
1396 * @return true if new buffer was correctly allocated, and string was
1397 * converted.
1398 **/
1399
1400bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1401 size_t *converted_size)
1402{
1403 size_t src_len = strlen(src)+1;
1404
1405 *dest = NULL;
1406 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1407 (void **)dest, converted_size, True);
1408}
1409
1410/**
1411 Copy a string from a char* src to a UTF-8 destination.
1412 Return the number of bytes occupied by the string in the destination
1413 Flags can have:
1414 STR_TERMINATE means include the null termination
1415 STR_UPPER means uppercase in the destination
1416 dest_len is the maximum length allowed in the destination. If dest_len
1417 is -1 then no maxiumum is used.
1418**/
1419
1420static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1421{
1422 size_t src_len = 0;
1423 size_t ret;
1424 char *tmpbuf = NULL;
1425
1426 if (dest_len == (size_t)-1) {
1427 /* No longer allow dest_len of -1. */
1428 smb_panic("push_utf8 - invalid dest_len of -1");
1429 }
1430
1431 if (flags & STR_UPPER) {
1432 tmpbuf = strdup_upper(src);
1433 if (!tmpbuf) {
1434 return (size_t)-1;
1435 }
1436 src = tmpbuf;
1437 src_len = strlen(src);
1438 }
1439
1440 src_len = strlen(src);
1441 if (flags & STR_TERMINATE) {
1442 src_len++;
1443 }
1444
1445 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1446 SAFE_FREE(tmpbuf);
1447 return ret;
1448}
1449
1450size_t push_utf8_fstring(void *dest, const char *src)
1451{
1452 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1453}
1454
1455/**
1456 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1457 *
1458 * @param dest always set at least to NULL
1459 * @parm converted_size set to the number of bytes occupied by the string in
1460 * the destination on success.
1461 *
1462 * @return true if new buffer was correctly allocated, and string was
1463 * converted.
1464 **/
1465
1466bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1467 size_t *converted_size)
1468{
1469 size_t src_len = strlen(src)+1;
1470
1471 *dest = NULL;
1472 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1473 (void**)dest, converted_size, True);
1474}
1475
1476/**
1477 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1478 *
1479 * @param dest always set at least to NULL
1480 * @parm converted_size set to the number of bytes occupied by the string in
1481 * the destination on success.
1482 *
1483 * @return true if new buffer was correctly allocated, and string was
1484 * converted.
1485 **/
1486
1487bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1488{
1489 size_t src_len = strlen(src)+1;
1490
1491 *dest = NULL;
1492 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1493 (void **)dest, converted_size, True);
1494}
1495
1496/**
1497 Copy a string from a ucs2 source to a unix char* destination.
1498 Flags can have:
1499 STR_TERMINATE means the string in src is null terminated.
1500 STR_NOALIGN means don't try to align.
1501 if STR_TERMINATE is set then src_len is ignored if it is -1.
1502 src_len is the length of the source area in bytes
1503 Return the number of bytes occupied by the string in src.
1504 The resulting string in "dest" is always null terminated.
1505**/
1506
1507size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1508{
1509 size_t ret;
1510
1511 if (dest_len == (size_t)-1) {
1512 /* No longer allow dest_len of -1. */
1513 smb_panic("pull_ucs2 - invalid dest_len of -1");
1514 }
1515
1516 if (!src_len) {
1517 if (dest && dest_len > 0) {
1518 dest[0] = '\0';
1519 }
1520 return 0;
1521 }
1522
1523 if (ucs2_align(base_ptr, src, flags)) {
1524 src = (const void *)((const char *)src + 1);
1525 if (src_len != (size_t)-1)
1526 src_len--;
1527 }
1528
1529 if (flags & STR_TERMINATE) {
1530 /* src_len -1 is the default for null terminated strings. */
1531 if (src_len != (size_t)-1) {
1532 size_t len = strnlen_w((const smb_ucs2_t *)src,
1533 src_len/2);
1534 if (len < src_len/2)
1535 len++;
1536 src_len = len*2;
1537 }
1538 }
1539
1540 /* ucs2 is always a multiple of 2 bytes */
1541 if (src_len != (size_t)-1)
1542 src_len &= ~1;
1543
1544 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1545 if (ret == (size_t)-1) {
1546 ret = 0;
1547 dest_len = 0;
1548 }
1549
1550 if (src_len == (size_t)-1)
1551 src_len = ret*2;
1552
1553 if (dest_len && ret) {
1554 /* Did we already process the terminating zero ? */
1555 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1556 dest[MIN(ret, dest_len-1)] = 0;
1557 }
1558 } else {
1559 dest[0] = 0;
1560 }
1561
1562 return src_len;
1563}
1564
1565/**
1566 Copy a string from a ucs2 source to a unix char* destination.
1567 Talloc version with a base pointer.
1568 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1569 needs fixing. JRA).
1570 Flags can have:
1571 STR_TERMINATE means the string in src is null terminated.
1572 STR_NOALIGN means don't try to align.
1573 if STR_TERMINATE is set then src_len is ignored if it is -1.
1574 src_len is the length of the source area in bytes
1575 Return the number of bytes occupied by the string in src.
1576 The resulting string in "dest" is always null terminated.
1577**/
1578
1579size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1580 const void *base_ptr,
1581 char **ppdest,
1582 const void *src,
1583 size_t src_len,
1584 int flags)
1585{
1586 char *dest;
1587 size_t dest_len;
1588
1589 *ppdest = NULL;
1590
1591#ifdef DEVELOPER
1592 /* Ensure we never use the braindead "malloc" varient. */
1593 if (ctx == NULL) {
1594 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1595 }
1596#endif
1597
1598 if (!src_len) {
1599 return 0;
1600 }
1601
1602 if (ucs2_align(base_ptr, src, flags)) {
1603 src = (const void *)((const char *)src + 1);
1604 if (src_len != (size_t)-1)
1605 src_len--;
1606 }
1607
1608 if (flags & STR_TERMINATE) {
1609 /* src_len -1 is the default for null terminated strings. */
1610 if (src_len != (size_t)-1) {
1611 size_t len = strnlen_w((const smb_ucs2_t *)src,
1612 src_len/2);
1613 if (len < src_len/2)
1614 len++;
1615 src_len = len*2;
1616 } else {
1617 /*
1618 * src_len == -1 - alloc interface won't take this
1619 * so we must calculate.
1620 */
1621 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1622 }
1623 /* Ensure we don't use an insane length from the client. */
1624 if (src_len >= 1024*1024) {
1625 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1626 }
1627 } else {
1628 /* Can't have an unlimited length
1629 * non STR_TERMINATE'd.
1630 */
1631 if (src_len == (size_t)-1) {
1632 errno = EINVAL;
1633 return 0;
1634 }
1635 }
1636
1637 /* src_len != -1 here. */
1638
1639 /* ucs2 is always a multiple of 2 bytes */
1640 src_len &= ~1;
1641
1642 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1643 (void *)&dest, &dest_len, True)) {
1644 dest_len = 0;
1645 }
1646
1647 if (dest_len) {
1648 /* Did we already process the terminating zero ? */
1649 if (dest[dest_len-1] != 0) {
1650 size_t size = talloc_get_size(dest);
1651 /* Have we got space to append the '\0' ? */
1652 if (size <= dest_len) {
1653 /* No, realloc. */
1654 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1655 dest_len+1);
1656 if (!dest) {
1657 /* talloc fail. */
1658 dest_len = (size_t)-1;
1659 return 0;
1660 }
1661 }
1662 /* Yay - space ! */
1663 dest[dest_len] = '\0';
1664 dest_len++;
1665 }
1666 } else if (dest) {
1667 dest[0] = 0;
1668 }
1669
1670 *ppdest = dest;
1671 return src_len;
1672}
1673
1674size_t pull_ucs2_fstring(char *dest, const void *src)
1675{
1676 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1677}
1678
1679/**
1680 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1681 *
1682 * @param dest always set at least to NULL
1683 * @parm converted_size set to the number of bytes occupied by the string in
1684 * the destination on success.
1685 *
1686 * @return true if new buffer was correctly allocated, and string was
1687 * converted.
1688 **/
1689
1690bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1691 size_t *converted_size)
1692{
1693 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1694
1695 *dest = NULL;
1696 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1697 (void **)dest, converted_size, True);
1698}
1699
1700/**
1701 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1702 *
1703 * @param dest always set at least to NULL
1704 * @parm converted_size set to the number of bytes occupied by the string in
1705 * the destination on success.
1706 * @return true if new buffer was correctly allocated, and string was
1707 * converted.
1708 **/
1709
1710bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1711 size_t *converted_size)
1712{
1713 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1714
1715 *dest = NULL;
1716 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1717 (void **)dest, converted_size, True);
1718}
1719
1720/**
1721 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1722 *
1723 * @param dest always set at least to NULL
1724 * @parm converted_size set to the number of bytes occupied by the string in
1725 * the destination on success.
1726 *
1727 * @return true if new buffer was correctly allocated, and string was
1728 * converted.
1729 **/
1730
1731bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1732 size_t *converted_size)
1733{
1734 size_t src_len = strlen(src)+1;
1735
1736 *dest = NULL;
1737 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1738 (void **)dest, converted_size, True);
1739}
1740
1741/**
1742 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1743 *
1744 * @param dest always set at least to NULL
1745 * @parm converted_size set to the number of bytes occupied by the string in
1746 * the destination on success.
1747 *
1748 * @return true if new buffer was correctly allocated, and string was
1749 * converted.
1750 **/
1751
1752bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1753{
1754 size_t src_len = strlen(src)+1;
1755
1756 *dest = NULL;
1757 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1758 (void **)dest, converted_size, True);
1759}
1760
1761/**
1762 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1763 *
1764 * @param dest always set at least to NULL
1765 * @parm converted_size set to the number of bytes occupied by the string in
1766 * the destination on success.
1767 *
1768 * @return true if new buffer was correctly allocated, and string was
1769 * converted.
1770 **/
1771
1772bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1773 size_t *converted_size)
1774{
1775 size_t src_len = strlen(src)+1;
1776
1777 *dest = NULL;
1778 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1779 (void **)dest, converted_size, True);
1780}
1781
1782/**
1783 Copy a string from a char* src to a unicode or ascii
1784 dos codepage destination choosing unicode or ascii based on the
1785 flags in the SMB buffer starting at base_ptr.
1786 Return the number of bytes occupied by the string in the destination.
1787 flags can have:
1788 STR_TERMINATE means include the null termination.
1789 STR_UPPER means uppercase in the destination.
1790 STR_ASCII use ascii even with unicode packet.
1791 STR_NOALIGN means don't do alignment.
1792 dest_len is the maximum length allowed in the destination. If dest_len
1793 is -1 then no maxiumum is used.
1794**/
1795
1796size_t push_string_fn(const char *function, unsigned int line,
1797 const void *base_ptr, uint16 flags2,
1798 void *dest, const char *src,
1799 size_t dest_len, int flags)
1800{
1801#ifdef DEVELOPER
1802 /* We really need to zero fill here, not clobber
1803 * region, as we want to ensure that valgrind thinks
1804 * all of the outgoing buffer has been written to
1805 * so a send() or write() won't trap an error.
1806 * JRA.
1807 */
1808#if 0
1809 clobber_region(function, line, dest, dest_len);
1810#else
1811 memset(dest, '\0', dest_len);
1812#endif
1813#endif
1814
1815 if (!(flags & STR_ASCII) && \
1816 ((flags & STR_UNICODE || \
1817 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1818 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1819 }
1820 return push_ascii(dest, src, dest_len, flags);
1821}
1822
1823
1824/**
1825 Copy a string from a unicode or ascii source (depending on
1826 the packet flags) to a char* destination.
1827 Flags can have:
1828 STR_TERMINATE means the string in src is null terminated.
1829 STR_UNICODE means to force as unicode.
1830 STR_ASCII use ascii even with unicode packet.
1831 STR_NOALIGN means don't do alignment.
1832 if STR_TERMINATE is set then src_len is ignored is it is -1
1833 src_len is the length of the source area in bytes.
1834 Return the number of bytes occupied by the string in src.
1835 The resulting string in "dest" is always null terminated.
1836**/
1837
1838size_t pull_string_fn(const char *function,
1839 unsigned int line,
1840 const void *base_ptr,
1841 uint16 smb_flags2,
1842 char *dest,
1843 const void *src,
1844 size_t dest_len,
1845 size_t src_len,
1846 int flags)
1847{
1848#ifdef DEVELOPER
1849 clobber_region(function, line, dest, dest_len);
1850#endif
1851
1852 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1853 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1854 "UNICODE defined");
1855 }
1856
1857 if (!(flags & STR_ASCII) && \
1858 ((flags & STR_UNICODE || \
1859 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1860 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1861 }
1862 return pull_ascii(dest, src, dest_len, src_len, flags);
1863}
1864
1865/**
1866 Copy a string from a unicode or ascii source (depending on
1867 the packet flags) to a char* destination.
1868 Variant that uses talloc.
1869 Flags can have:
1870 STR_TERMINATE means the string in src is null terminated.
1871 STR_UNICODE means to force as unicode.
1872 STR_ASCII use ascii even with unicode packet.
1873 STR_NOALIGN means don't do alignment.
1874 if STR_TERMINATE is set then src_len is ignored is it is -1
1875 src_len is the length of the source area in bytes.
1876 Return the number of bytes occupied by the string in src.
1877 The resulting string in "dest" is always null terminated.
1878**/
1879
1880size_t pull_string_talloc_fn(const char *function,
1881 unsigned int line,
1882 TALLOC_CTX *ctx,
1883 const void *base_ptr,
1884 uint16 smb_flags2,
1885 char **ppdest,
1886 const void *src,
1887 size_t src_len,
1888 int flags)
1889{
1890 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1891 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1892 "UNICODE defined");
1893 }
1894
1895 if (!(flags & STR_ASCII) && \
1896 ((flags & STR_UNICODE || \
1897 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1898 return pull_ucs2_base_talloc(ctx,
1899 base_ptr,
1900 ppdest,
1901 src,
1902 src_len,
1903 flags);
1904 }
1905 return pull_ascii_base_talloc(ctx,
1906 ppdest,
1907 src,
1908 src_len,
1909 flags);
1910}
1911
1912
1913size_t align_string(const void *base_ptr, const char *p, int flags)
1914{
1915 if (!(flags & STR_ASCII) && \
1916 ((flags & STR_UNICODE || \
1917 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1918 return ucs2_align(base_ptr, p, flags);
1919 }
1920 return 0;
1921}
1922
1923/*
1924 Return the unicode codepoint for the next multi-byte CH_UNIX character
1925 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1926
1927 Also return the number of bytes consumed (which tells the caller
1928 how many bytes to skip to get to the next CH_UNIX character).
1929
1930 Return INVALID_CODEPOINT if the next character cannot be converted.
1931*/
1932
1933codepoint_t next_codepoint(const char *str, size_t *size)
1934{
1935 /* It cannot occupy more than 4 bytes in UTF16 format */
1936 uint8_t buf[4];
1937 smb_iconv_t descriptor;
1938#ifdef __OS2__
1939 size_t ilen_max;
1940 size_t olen_orig;
1941 const char *inbuf;
1942#endif
1943 size_t ilen_orig;
1944 size_t ilen;
1945 size_t olen;
1946
1947 char *outbuf;
1948
1949#ifdef __OS2__
1950 *size = 1;
1951#endif
1952
1953 if ((str[0] & 0x80) == 0) {
1954#ifndef __OS2__
1955 *size = 1;
1956#endif
1957 return (codepoint_t)str[0];
1958 }
1959
1960 lazy_initialize_conv();
1961
1962 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1963 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1964#ifndef __OS2__
1965 *size = 1;
1966#endif
1967 return INVALID_CODEPOINT;
1968 }
1969#ifdef __OS2__
1970 /* We assume that no multi-byte character can take
1971 more than 5 bytes. This is OK as we only
1972 support codepoints up to 1M */
1973
1974 ilen_max = strnlen( str, 5 );
1975#else
1976 *size = 1;
1977#endif
1978 ilen_orig = 1;
1979 olen_orig = 2;
1980 while( 1 )
1981 {
1982 ilen = ilen_orig;
1983 olen = olen_orig;
1984 inbuf = str;
1985 outbuf = ( char * )buf;
1986 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1987 break;
1988
1989 switch( errno )
1990 {
1991 case E2BIG :
1992 if( olen_orig == 2 )
1993 olen_orig = 4;
1994 else
1995 return INVALID_CODEPOINT;
1996 break;
1997
1998
1999 case EINVAL :
2000#ifndef __OS2__
2001 /* We assume that no multi-byte character can take
2002 more than 5 bytes. This is OK as we only
2003 support codepoints up to 1M */
2004 if( ilen_orig < 5 )
2005#else
2006 if( ilen_orig < ilen_max )
2007#endif
2008 ilen_orig++;
2009 else
2010 return INVALID_CODEPOINT;
2011 break;
2012
2013 case EILSEQ :
2014 default :
2015 return INVALID_CODEPOINT;
2016 }
2017 }
2018
2019 olen = olen_orig - olen;
2020
2021 *size = ilen_orig - ilen;
2022
2023 if (olen == 2) {
2024 /* 2 byte, UTF16-LE encoded value. */
2025 return (codepoint_t)SVAL(buf, 0);
2026 }
2027 if (olen == 4) {
2028 /* Decode a 4 byte UTF16-LE character manually.
2029 See RFC2871 for the encoding machanism.
2030 */
2031 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2032 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2033
2034 return (codepoint_t)0x10000 +
2035 (w1 << 10) + w2;
2036 }
2037
2038 /* no other length is valid */
2039 return INVALID_CODEPOINT;
2040}
Note: See TracBrowser for help on using the repository browser.