source: branches/samba-3.2.x/source/lib/charcnv.c@ 237

Last change on this file since 237 was 237, checked in by Herwig Bauernfeind, 16 years ago

Fix for Ticket #85 (by diver) in 3.2 branch

File size: 50.0 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29 return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58#ifndef __OS2__
59 if (ch == CH_UTF16LE) ret = "UTF-16LE";
60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61#else
62 if (ch == CH_UTF16LE) ret = "IBM-1200";
63 else if (ch == CH_UTF16BE) ret = "IBM-1200";
64#endif
65 else if (ch == CH_UNIX) ret = lp_unix_charset();
66 else if (ch == CH_DOS) ret = lp_dos_charset();
67 else if (ch == CH_DISPLAY) ret = lp_display_charset();
68 else if (ch == CH_UTF8) ret = "UTF8";
69
70#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71 if (ret && !strcmp(ret, "LOCALE")) {
72 const char *ln = NULL;
73
74#ifdef HAVE_SETLOCALE
75 setlocale(LC_ALL, "");
76#endif
77 ln = nl_langinfo(CODESET);
78 if (ln) {
79 /* Check whether the charset name is supported
80 by iconv */
81 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82 if (handle == (smb_iconv_t) -1) {
83 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84 ln = NULL;
85 } else {
86 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87 smb_iconv_close(handle);
88 }
89 }
90 ret = ln;
91 }
92#endif
93
94 if (!ret || !*ret) ret = "ASCII";
95 DEBUG(10, ("codepage: %s\n",ret));
96 return ret;
97}
98
99void lazy_initialize_conv(void)
100{
101 if (!initialized) {
102 load_case_tables();
103 init_iconv();
104 initialized = true;
105 }
106}
107
108/**
109 * Destroy global objects allocated by init_iconv()
110 **/
111void gfree_charcnv(void)
112{
113 int c1, c2;
114
115 for (c1=0;c1<NUM_CHARSETS;c1++) {
116 for (c2=0;c2<NUM_CHARSETS;c2++) {
117 if ( conv_handles[c1][c2] ) {
118 smb_iconv_close( conv_handles[c1][c2] );
119 conv_handles[c1][c2] = 0;
120 }
121 }
122 }
123 initialized = false;
124}
125
126/**
127 * Initialize iconv conversion descriptors.
128 *
129 * This is called the first time it is needed, and also called again
130 * every time the configuration is reloaded, because the charset or
131 * codepage might have changed.
132 **/
133void init_iconv(void)
134{
135 int c1, c2;
136 bool did_reload = False;
137
138 /* so that charset_name() works we need to get the UNIX<->UCS2 going
139 first */
140 if (!conv_handles[CH_UNIX][CH_UTF16LE])
141 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
142
143 if (!conv_handles[CH_UTF16LE][CH_UNIX])
144 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
145
146 for (c1=0;c1<NUM_CHARSETS;c1++) {
147 for (c2=0;c2<NUM_CHARSETS;c2++) {
148 const char *n1 = charset_name((charset_t)c1);
149 const char *n2 = charset_name((charset_t)c2);
150 if (conv_handles[c1][c2] &&
151 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
152 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
153 continue;
154
155 did_reload = True;
156
157 if (conv_handles[c1][c2])
158 smb_iconv_close(conv_handles[c1][c2]);
159
160 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
161 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
162 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
163 charset_name((charset_t)c1), charset_name((charset_t)c2)));
164 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
165 n1 = "ASCII";
166 }
167 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
168 n2 = "ASCII";
169 }
170 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
171 n1, n2 ));
172 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
173 if (!conv_handles[c1][c2]) {
174 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
175 smb_panic("init_iconv: conv_handle initialization failed");
176 }
177 }
178 }
179 }
180
181 if (did_reload) {
182 /* XXX: Does this really get called every time the dos
183 * codepage changes? */
184 /* XXX: Is the did_reload test too strict? */
185 conv_silent = True;
186 init_valid_table();
187 conv_silent = False;
188 }
189}
190
191/**
192 * Convert string from one encoding to another, making error checking etc
193 * Slow path version - uses (slow) iconv.
194 *
195 * @param src pointer to source string (multibyte or singlebyte)
196 * @param srclen length of the source string in bytes
197 * @param dest pointer to destination string (multibyte or singlebyte)
198 * @param destlen maximal length allowed for string
199 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
200 * @returns the number of bytes occupied in the destination
201 *
202 * Ensure the srclen contains the terminating zero.
203 *
204 **/
205
206static size_t convert_string_internal(charset_t from, charset_t to,
207 void const *src, size_t srclen,
208 void *dest, size_t destlen, bool allow_bad_conv)
209{
210 size_t i_len, o_len;
211 size_t retval;
212 const char* inbuf = (const char*)src;
213 char* outbuf = (char*)dest;
214 smb_iconv_t descriptor;
215
216 lazy_initialize_conv();
217
218 descriptor = conv_handles[from][to];
219
220 if (srclen == (size_t)-1) {
221 if (from == CH_UTF16LE || from == CH_UTF16BE) {
222 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
223 } else {
224 srclen = strlen((const char *)src)+1;
225 }
226 }
227
228
229 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
230 if (!conv_silent)
231 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
232 return (size_t)-1;
233 }
234
235 i_len=srclen;
236 o_len=destlen;
237
238 again:
239
240 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
241 if(retval==(size_t)-1) {
242 const char *reason="unknown error";
243 switch(errno) {
244 case EINVAL:
245 reason="Incomplete multibyte sequence";
246 if (!conv_silent)
247 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
248 if (allow_bad_conv)
249 goto use_as_is;
250 return (size_t)-1;
251 case E2BIG:
252 reason="No more room";
253 if (!conv_silent) {
254 if (from == CH_UNIX) {
255 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
256 charset_name(from), charset_name(to),
257 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
258 } else {
259 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
260 charset_name(from), charset_name(to),
261 (unsigned int)srclen, (unsigned int)destlen));
262 }
263 }
264 break;
265 case EILSEQ:
266 reason="Illegal multibyte sequence";
267 if (!conv_silent)
268 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
269 if (allow_bad_conv)
270 goto use_as_is;
271
272 return (size_t)-1;
273 default:
274 if (!conv_silent)
275 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
276 return (size_t)-1;
277 }
278 /* smb_panic(reason); */
279 }
280 return destlen-o_len;
281
282 use_as_is:
283
284 /*
285 * Conversion not supported. This is actually an error, but there are so
286 * many misconfigured iconv systems and smb.conf's out there we can't just
287 * fail. Do a very bad conversion instead.... JRA.
288 */
289
290 {
291 if (o_len == 0 || i_len == 0)
292 return destlen - o_len;
293
294 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
295 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
296 /* Can't convert from utf16 any endian to multibyte.
297 Replace with the default fail char.
298 */
299 if (i_len < 2)
300 return destlen - o_len;
301 if (i_len >= 2) {
302 *outbuf = lp_failed_convert_char();
303
304 outbuf++;
305 o_len--;
306
307 inbuf += 2;
308 i_len -= 2;
309 }
310
311 if (o_len == 0 || i_len == 0)
312 return destlen - o_len;
313
314 /* Keep trying with the next char... */
315 goto again;
316
317 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
318 /* Can't convert to UTF16LE - just widen by adding the
319 default fail char then zero.
320 */
321 if (o_len < 2)
322 return destlen - o_len;
323
324 outbuf[0] = lp_failed_convert_char();
325 outbuf[1] = '\0';
326
327 inbuf++;
328 i_len--;
329
330 outbuf += 2;
331 o_len -= 2;
332
333 if (o_len == 0 || i_len == 0)
334 return destlen - o_len;
335
336 /* Keep trying with the next char... */
337 goto again;
338
339 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
340 to != CH_UTF16LE && to != CH_UTF16BE) {
341 /* Failed multibyte to multibyte. Just copy the default fail char and
342 try again. */
343 outbuf[0] = lp_failed_convert_char();
344
345 inbuf++;
346 i_len--;
347
348 outbuf++;
349 o_len--;
350
351 if (o_len == 0 || i_len == 0)
352 return destlen - o_len;
353
354 /* Keep trying with the next char... */
355 goto again;
356
357 } else {
358 /* Keep compiler happy.... */
359 return destlen - o_len;
360 }
361 }
362}
363
364/**
365 * Convert string from one encoding to another, making error checking etc
366 * Fast path version - handles ASCII first.
367 *
368 * @param src pointer to source string (multibyte or singlebyte)
369 * @param srclen length of the source string in bytes, or -1 for nul terminated.
370 * @param dest pointer to destination string (multibyte or singlebyte)
371 * @param destlen maximal length allowed for string - *NEVER* -1.
372 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
373 * @returns the number of bytes occupied in the destination
374 *
375 * Ensure the srclen contains the terminating zero.
376 *
377 * This function has been hand-tuned to provide a fast path.
378 * Don't change unless you really know what you are doing. JRA.
379 **/
380
381size_t convert_string(charset_t from, charset_t to,
382 void const *src, size_t srclen,
383 void *dest, size_t destlen, bool allow_bad_conv)
384{
385 /*
386 * NB. We deliberately don't do a strlen here if srclen == -1.
387 * This is very expensive over millions of calls and is taken
388 * care of in the slow path in convert_string_internal. JRA.
389 */
390
391#ifdef DEVELOPER
392 SMB_ASSERT(destlen != (size_t)-1);
393#endif
394
395 if (srclen == 0)
396 return 0;
397
398// DEBUG(10, ("convert_string: 1"));
399
400 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
401 const unsigned char *p = (const unsigned char *)src;
402 unsigned char *q = (unsigned char *)dest;
403 size_t slen = srclen;
404 size_t dlen = destlen;
405 unsigned char lastp = '\0';
406 size_t retval = 0;
407
408// DEBUG(10, ("convert_string: 2"));
409
410 /* If all characters are ascii, fast path here. */
411 while (slen && dlen) {
412 if ((lastp = *p) <= 0x7f) {
413 *q++ = *p++;
414 if (slen != (size_t)-1) {
415 slen--;
416 }
417 dlen--;
418 retval++;
419 if (!lastp)
420 break;
421 } else {
422#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
423 goto general_case;
424#else
425 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
426 if (ret == (size_t)-1) {
427 return ret;
428 }
429 return retval + ret;
430#endif
431 }
432 }
433 if (!dlen) {
434 /* Even if we fast path we should note if we ran out of room. */
435 if (((slen != (size_t)-1) && slen) ||
436 ((slen == (size_t)-1) && lastp)) {
437 errno = E2BIG;
438 }
439 }
440 return retval;
441// DEBUG(10, ("convert_string: 3"));
442
443 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
444 const unsigned char *p = (const unsigned char *)src;
445 unsigned char *q = (unsigned char *)dest;
446 size_t retval = 0;
447 size_t slen = srclen;
448 size_t dlen = destlen;
449 unsigned char lastp = '\0';
450
451 /* If all characters are ascii, fast path here. */
452 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
453 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
454 *q++ = *p;
455 if (slen != (size_t)-1) {
456 slen -= 2;
457 }
458 p += 2;
459 dlen--;
460 retval++;
461 if (!lastp)
462 break;
463 } else {
464#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
465 goto general_case;
466#else
467 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
468 if (ret == (size_t)-1) {
469 return ret;
470 }
471 return retval + ret;
472#endif
473 }
474 }
475 if (!dlen) {
476 /* Even if we fast path we should note if we ran out of room. */
477 if (((slen != (size_t)-1) && slen) ||
478 ((slen == (size_t)-1) && lastp)) {
479 errno = E2BIG;
480 }
481 }
482 return retval;
483// DEBUG(10, ("convert_string: 4"));
484
485 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
486 const unsigned char *p = (const unsigned char *)src;
487 unsigned char *q = (unsigned char *)dest;
488 size_t retval = 0;
489 size_t slen = srclen;
490 size_t dlen = destlen;
491 unsigned char lastp = '\0';
492
493 /* If all characters are ascii, fast path here. */
494 while (slen && (dlen >= 2)) {
495 if ((lastp = *p) <= 0x7F) {
496 *q++ = *p++;
497 *q++ = '\0';
498 if (slen != (size_t)-1) {
499 slen--;
500 }
501 dlen -= 2;
502 retval += 2;
503 if (!lastp)
504 break;
505 } else {
506#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
507 goto general_case;
508#else
509 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
510 if (ret == (size_t)-1) {
511 return ret;
512 }
513 return retval + ret;
514#endif
515 }
516 }
517 if (!dlen) {
518 /* Even if we fast path we should note if we ran out of room. */
519 if (((slen != (size_t)-1) && slen) ||
520 ((slen == (size_t)-1) && lastp)) {
521 errno = E2BIG;
522 }
523 }
524 return retval;
525 }
526
527#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
528 general_case:
529#endif
530 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
531}
532
533/**
534 * Convert between character sets, allocating a new buffer for the result.
535 *
536 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
537 * (this is a bad interface and needs fixing. JRA).
538 * @param srclen length of source buffer.
539 * @param dest always set at least to NULL
540 * @param converted_size set to the size of the allocated buffer on return
541 * true
542 * @note -1 is not accepted for srclen.
543 *
544 * @return True if new buffer was correctly allocated, and string was
545 * converted.
546 *
547 * Ensure the srclen contains the terminating zero.
548 *
549 * I hate the goto's in this function. It's embarressing.....
550 * There has to be a cleaner way to do this. JRA.
551 **/
552
553bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
554 void const *src, size_t srclen, void *dst,
555 size_t *converted_size, bool allow_bad_conv)
556{
557 size_t i_len, o_len, destlen = (srclen * 3) / 2;
558 size_t retval;
559 const char *inbuf = (const char *)src;
560 char *outbuf = NULL, *ob = NULL;
561 smb_iconv_t descriptor;
562 void **dest = (void **)dst;
563
564 *dest = NULL;
565
566 if (!converted_size) {
567 errno = EINVAL;
568 return false;
569 }
570
571 if (src == NULL || srclen == (size_t)-1) {
572 errno = EINVAL;
573 return false;
574 }
575 if (srclen == 0) {
576 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
577 if (ob == NULL) {
578 errno = ENOMEM;
579 return false;
580 }
581 *dest = ob;
582 *converted_size = 0;
583 return true;
584 }
585
586 lazy_initialize_conv();
587
588 descriptor = conv_handles[from][to];
589
590 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
591 if (!conv_silent)
592 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
593 errno = EOPNOTSUPP;
594 return false;
595 }
596
597 convert:
598
599 /* +2 is for ucs2 null termination. */
600 if ((destlen*2)+2 < destlen) {
601 /* wrapped ! abort. */
602 if (!conv_silent)
603 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
604 if (!ctx)
605 SAFE_FREE(outbuf);
606 errno = EOPNOTSUPP;
607 return false;
608 } else {
609 destlen = destlen * 2;
610 }
611
612 /* +2 is for ucs2 null termination. */
613 if (ctx) {
614 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
615 } else {
616 ob = (char *)SMB_REALLOC(ob, destlen + 2);
617 }
618
619 if (!ob) {
620 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
621 errno = ENOMEM;
622 return false;
623 }
624 outbuf = ob;
625 i_len = srclen;
626 o_len = destlen;
627
628 again:
629 DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
630 charset_name(from), charset_name(to),
631 (unsigned int)srclen, (unsigned int)destlen));
632
633 retval = smb_iconv(descriptor,
634 &inbuf, &i_len,
635 &outbuf, &o_len);
636 if(retval == (size_t)-1) {
637 const char *reason="unknown error";
638 switch(errno) {
639 case EINVAL:
640 reason="Incomplete multibyte sequence";
641 if (!conv_silent)
642 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
643 if (allow_bad_conv)
644 goto use_as_is;
645 break;
646 case E2BIG:
647 goto convert;
648 case EILSEQ:
649 reason="Illegal multibyte sequence";
650 if (!conv_silent)
651 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
652 if (allow_bad_conv)
653 goto use_as_is;
654 break;
655 }
656 if (!conv_silent)
657 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
658 /* smb_panic(reason); */
659 if (ctx) {
660 TALLOC_FREE(ob);
661 } else {
662 SAFE_FREE(ob);
663 }
664 return false;
665 }
666
667 out:
668
669 destlen = destlen - o_len;
670 /* Don't shrink unless we're reclaiming a lot of
671 * space. This is in the hot codepath and these
672 * reallocs *cost*. JRA.
673 */
674 if (o_len > 1024) {
675 /* We're shrinking here so we know the +2 is safe from wrap. */
676 if (ctx) {
677 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
678 } else {
679 ob = (char *)SMB_REALLOC(ob,destlen + 2);
680 }
681 }
682
683 if (destlen && !ob) {
684 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
685 errno = ENOMEM;
686 return false;
687 }
688
689 *dest = ob;
690
691 /* Must ucs2 null terminate in the extra space we allocated. */
692 ob[destlen] = '\0';
693 ob[destlen+1] = '\0';
694
695 *converted_size = destlen;
696 return true;
697
698 use_as_is:
699
700 /*
701 * Conversion not supported. This is actually an error, but there are so
702 * many misconfigured iconv systems and smb.conf's out there we can't just
703 * fail. Do a very bad conversion instead.... JRA.
704 */
705
706 {
707 if (o_len == 0 || i_len == 0)
708 goto out;
709
710 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
711 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
712 /* Can't convert from utf16 any endian to multibyte.
713 Replace with the default fail char.
714 */
715
716 if (i_len < 2)
717 goto out;
718
719 if (i_len >= 2) {
720 *outbuf = lp_failed_convert_char();
721
722 outbuf++;
723 o_len--;
724
725 inbuf += 2;
726 i_len -= 2;
727 }
728
729 if (o_len == 0 || i_len == 0)
730 goto out;
731
732 /* Keep trying with the next char... */
733 goto again;
734
735 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
736 /* Can't convert to UTF16LE - just widen by adding the
737 default fail char then zero.
738 */
739 if (o_len < 2)
740 goto out;
741
742 outbuf[0] = lp_failed_convert_char();
743 outbuf[1] = '\0';
744
745 inbuf++;
746 i_len--;
747
748 outbuf += 2;
749 o_len -= 2;
750
751 if (o_len == 0 || i_len == 0)
752 goto out;
753
754 /* Keep trying with the next char... */
755 goto again;
756
757 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
758 to != CH_UTF16LE && to != CH_UTF16BE) {
759 /* Failed multibyte to multibyte. Just copy the default fail char and
760 try again. */
761 outbuf[0] = lp_failed_convert_char();
762
763 inbuf++;
764 i_len--;
765
766 outbuf++;
767 o_len--;
768
769 if (o_len == 0 || i_len == 0)
770 goto out;
771
772 /* Keep trying with the next char... */
773 goto again;
774
775 } else {
776 /* Keep compiler happy.... */
777 goto out;
778 }
779 }
780}
781
782/**
783 * Convert between character sets, allocating a new buffer using talloc for the result.
784 *
785 * @param srclen length of source buffer.
786 * @param dest always set at least to NULL
787 * @note -1 is not accepted for srclen.
788 *
789 * @returns Size in bytes of the converted string; or -1 in case of error.
790 **/
791size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
792 void const *src, size_t srclen, void *dst,
793 bool allow_bad_conv)
794{
795 void **dest = (void **)dst;
796 size_t dest_len;
797
798 *dest = NULL;
799 if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
800 &dest_len, allow_bad_conv))
801 return (size_t)-1;
802 if (*dest == NULL)
803 return (size_t)-1;
804 return dest_len;
805}
806
807size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
808{
809 size_t size;
810 smb_ucs2_t *buffer;
811
812 size = push_ucs2_allocate(&buffer, src);
813 if (size == (size_t)-1) {
814 return (size_t)-1;
815 }
816 if (!strupper_w(buffer) && (dest == src)) {
817 free(buffer);
818 return srclen;
819 }
820
821 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
822 free(buffer);
823 return size;
824}
825
826/**
827 strdup() a unix string to upper case.
828**/
829
830char *strdup_upper(const char *s)
831{
832 char *out_buffer = SMB_STRDUP(s);
833 const unsigned char *p = (const unsigned char *)s;
834 unsigned char *q = (unsigned char *)out_buffer;
835
836 if (!q) {
837 return NULL;
838 }
839
840 /* this is quite a common operation, so we want it to be
841 fast. We optimise for the ascii case, knowing that all our
842 supported multi-byte character sets are ascii-compatible
843 (ie. they match for the first 128 chars) */
844
845 while (*p) {
846 if (*p & 0x80)
847 break;
848 *q++ = toupper_ascii_fast(*p);
849 p++;
850 }
851
852 if (*p) {
853 /* MB case. */
854 size_t size, size2;
855 smb_ucs2_t *buffer = NULL;
856
857 SAFE_FREE(out_buffer);
858 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
859 strlen(s) + 1, (void **)(void *)&buffer, &size,
860 True)) {
861 return NULL;
862 }
863
864 strupper_w(buffer);
865
866 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
867 size, (void **)(void *)&out_buffer, &size2, True)) {
868 TALLOC_FREE(buffer);
869 return NULL;
870 }
871
872 /* Don't need the intermediate buffer
873 * anymore.
874 */
875 TALLOC_FREE(buffer);
876 }
877
878 return out_buffer;
879}
880
881/**
882 talloc_strdup() a unix string to upper case.
883**/
884
885char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
886{
887 char *out_buffer = talloc_strdup(ctx,s);
888 const unsigned char *p = (const unsigned char *)s;
889 unsigned char *q = (unsigned char *)out_buffer;
890
891 if (!q) {
892 return NULL;
893 }
894
895 /* this is quite a common operation, so we want it to be
896 fast. We optimise for the ascii case, knowing that all our
897 supported multi-byte character sets are ascii-compatible
898 (ie. they match for the first 128 chars) */
899
900 while (*p) {
901 if (*p & 0x80)
902 break;
903 *q++ = toupper_ascii_fast(*p);
904 p++;
905 }
906
907 if (*p) {
908 /* MB case. */
909 size_t size;
910 smb_ucs2_t *ubuf = NULL;
911
912 /* We're not using the ascii buffer above. */
913 TALLOC_FREE(out_buffer);
914
915 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
916 s, strlen(s)+1,
917 (void *)&ubuf,
918 True);
919 if (size == (size_t)-1) {
920 return NULL;
921 }
922
923 strupper_w(ubuf);
924
925 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
926 ubuf, size,
927 (void *)&out_buffer,
928 True);
929
930 /* Don't need the intermediate buffer
931 * anymore.
932 */
933
934 TALLOC_FREE(ubuf);
935
936 if (size == (size_t)-1) {
937 return NULL;
938 }
939 }
940
941 return out_buffer;
942}
943
944size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
945{
946 size_t size;
947 smb_ucs2_t *buffer = NULL;
948
949 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
950 (void **)(void *)&buffer, &size, True)) {
951 smb_panic("failed to create UCS2 buffer");
952 }
953 if (!strlower_w(buffer) && (dest == src)) {
954 SAFE_FREE(buffer);
955 return srclen;
956 }
957 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
958 SAFE_FREE(buffer);
959 return size;
960}
961
962/**
963 strdup() a unix string to lower case.
964**/
965
966char *strdup_lower(const char *s)
967{
968 size_t size;
969 smb_ucs2_t *buffer = NULL;
970 char *out_buffer;
971
972 size = push_ucs2_allocate(&buffer, s);
973 if (size == -1 || !buffer) {
974 return NULL;
975 }
976
977 strlower_w(buffer);
978
979 size = pull_ucs2_allocate(&out_buffer, buffer);
980 SAFE_FREE(buffer);
981
982 if (size == (size_t)-1) {
983 return NULL;
984 }
985
986 return out_buffer;
987}
988
989char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
990{
991 size_t size;
992 smb_ucs2_t *buffer = NULL;
993 char *out_buffer;
994
995 size = push_ucs2_talloc(ctx, &buffer, s);
996 if (size == -1 || !buffer) {
997 TALLOC_FREE(buffer);
998 return NULL;
999 }
1000
1001 strlower_w(buffer);
1002
1003 size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
1004 TALLOC_FREE(buffer);
1005
1006 if (size == (size_t)-1) {
1007 TALLOC_FREE(out_buffer);
1008 return NULL;
1009 }
1010
1011 return out_buffer;
1012}
1013
1014
1015size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1016{
1017 if (flags & (STR_NOALIGN|STR_ASCII))
1018 return 0;
1019 return PTR_DIFF(p, base_ptr) & 1;
1020}
1021
1022
1023/**
1024 * Copy a string from a char* unix src to a dos codepage string destination.
1025 *
1026 * @return the number of bytes occupied by the string in the destination.
1027 *
1028 * @param flags can include
1029 * <dl>
1030 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1031 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1032 * </dl>
1033 *
1034 * @param dest_len the maximum length in bytes allowed in the
1035 * destination.
1036 **/
1037size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1038{
1039 size_t src_len = strlen(src);
1040 char *tmpbuf = NULL;
1041 size_t ret;
1042
1043 /* No longer allow a length of -1. */
1044 if (dest_len == (size_t)-1) {
1045 smb_panic("push_ascii - dest_len == -1");
1046 }
1047
1048 if (flags & STR_UPPER) {
1049 tmpbuf = SMB_STRDUP(src);
1050 if (!tmpbuf) {
1051 smb_panic("malloc fail");
1052 }
1053 strupper_m(tmpbuf);
1054 src = tmpbuf;
1055 }
1056
1057 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1058 src_len++;
1059 }
1060
1061 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1062 if (ret == (size_t)-1 &&
1063 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1064 && dest_len > 0) {
1065 ((char *)dest)[0] = '\0';
1066 }
1067 SAFE_FREE(tmpbuf);
1068 return ret;
1069}
1070
1071size_t push_ascii_fstring(void *dest, const char *src)
1072{
1073 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1074}
1075
1076/********************************************************************
1077 Push an nstring - ensure null terminated. Written by
1078 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1079********************************************************************/
1080
1081size_t push_ascii_nstring(void *dest, const char *src)
1082{
1083 size_t i, buffer_len, dest_len;
1084 smb_ucs2_t *buffer;
1085
1086 conv_silent = True;
1087 buffer_len = push_ucs2_allocate(&buffer, src);
1088 if (buffer_len == (size_t)-1) {
1089 smb_panic("failed to create UCS2 buffer");
1090 }
1091
1092 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1093 buffer_len /= sizeof(smb_ucs2_t);
1094
1095 dest_len = 0;
1096 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1097 unsigned char mb[10];
1098 /* Convert one smb_ucs2_t character at a time. */
1099 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1100 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1101 memcpy((char *)dest + dest_len, mb, mb_len);
1102 dest_len += mb_len;
1103 } else {
1104 errno = E2BIG;
1105 break;
1106 }
1107 }
1108 ((char *)dest)[dest_len] = '\0';
1109
1110 SAFE_FREE(buffer);
1111 conv_silent = False;
1112 return dest_len;
1113}
1114
1115/********************************************************************
1116 Push and malloc an ascii string. src and dest null terminated.
1117********************************************************************/
1118
1119size_t push_ascii_allocate(char **dest, const char *src)
1120{
1121 size_t dest_len, src_len = strlen(src)+1;
1122
1123 *dest = NULL;
1124 if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1125 (void **)dest, &dest_len, True))
1126 return (size_t)-1;
1127 else
1128 return dest_len;
1129}
1130
1131/**
1132 * Copy a string from a dos codepage source to a unix char* destination.
1133 *
1134 * The resulting string in "dest" is always null terminated.
1135 *
1136 * @param flags can have:
1137 * <dl>
1138 * <dt>STR_TERMINATE</dt>
1139 * <dd>STR_TERMINATE means the string in @p src
1140 * is null terminated, and src_len is ignored.</dd>
1141 * </dl>
1142 *
1143 * @param src_len is the length of the source area in bytes.
1144 * @returns the number of bytes occupied by the string in @p src.
1145 **/
1146size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1147{
1148 size_t ret;
1149
1150 if (dest_len == (size_t)-1) {
1151 /* No longer allow dest_len of -1. */
1152 smb_panic("pull_ascii - invalid dest_len of -1");
1153 }
1154
1155 if (flags & STR_TERMINATE) {
1156 if (src_len == (size_t)-1) {
1157 src_len = strlen((const char *)src) + 1;
1158 } else {
1159 size_t len = strnlen((const char *)src, src_len);
1160 if (len < src_len)
1161 len++;
1162 src_len = len;
1163 }
1164 }
1165
1166 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1167 if (ret == (size_t)-1) {
1168 ret = 0;
1169 dest_len = 0;
1170 }
1171
1172 if (dest_len && ret) {
1173 /* Did we already process the terminating zero ? */
1174 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1175 dest[MIN(ret, dest_len-1)] = 0;
1176 }
1177 } else {
1178 dest[0] = 0;
1179 }
1180
1181 return src_len;
1182}
1183
1184/**
1185 * Copy a string from a dos codepage source to a unix char* destination.
1186 Talloc version.
1187 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1188 needs fixing. JRA).
1189 *
1190 * The resulting string in "dest" is always null terminated.
1191 *
1192 * @param flags can have:
1193 * <dl>
1194 * <dt>STR_TERMINATE</dt>
1195 * <dd>STR_TERMINATE means the string in @p src
1196 * is null terminated, and src_len is ignored.</dd>
1197 * </dl>
1198 *
1199 * @param src_len is the length of the source area in bytes.
1200 * @returns the number of bytes occupied by the string in @p src.
1201 **/
1202
1203static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1204 char **ppdest,
1205 const void *src,
1206 size_t src_len,
1207 int flags)
1208{
1209 char *dest = NULL;
1210 size_t dest_len = 0;
1211
1212#ifdef DEVELOPER
1213 /* Ensure we never use the braindead "malloc" varient. */
1214 if (ctx == NULL) {
1215 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1216 }
1217#endif
1218
1219 *ppdest = NULL;
1220
1221 if (!src_len) {
1222 return 0;
1223 }
1224
1225 if (flags & STR_TERMINATE) {
1226 if (src_len == (size_t)-1) {
1227 src_len = strlen((const char *)src) + 1;
1228 } else {
1229 size_t len = strnlen((const char *)src, src_len);
1230 if (len < src_len)
1231 len++;
1232 src_len = len;
1233 }
1234 /* Ensure we don't use an insane length from the client. */
1235 if (src_len >= 1024*1024) {
1236 char *msg = talloc_asprintf(ctx,
1237 "Bad src length (%u) in "
1238 "pull_ascii_base_talloc",
1239 (unsigned int)src_len);
1240 smb_panic(msg);
1241 }
1242 } else {
1243 /* Can't have an unlimited length
1244 * non STR_TERMINATE'd.
1245 */
1246 if (src_len == (size_t)-1) {
1247 errno = EINVAL;
1248 return 0;
1249 }
1250 }
1251
1252 /* src_len != -1 here. */
1253
1254 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1255 &dest_len, True)) {
1256 dest_len = 0;
1257 }
1258
1259 if (dest_len && dest) {
1260 /* Did we already process the terminating zero ? */
1261 if (dest[dest_len-1] != 0) {
1262 size_t size = talloc_get_size(dest);
1263 /* Have we got space to append the '\0' ? */
1264 if (size <= dest_len) {
1265 /* No, realloc. */
1266 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1267 dest_len+1);
1268 if (!dest) {
1269 /* talloc fail. */
1270 dest_len = (size_t)-1;
1271 return 0;
1272 }
1273 }
1274 /* Yay - space ! */
1275 dest[dest_len] = '\0';
1276 dest_len++;
1277 }
1278 } else if (dest) {
1279 dest[0] = 0;
1280 }
1281
1282 *ppdest = dest;
1283 return src_len;
1284}
1285
1286size_t pull_ascii_fstring(char *dest, const void *src)
1287{
1288 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1289}
1290
1291/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1292
1293size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1294{
1295 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1296}
1297
1298/**
1299 * Copy a string from a char* src to a unicode destination.
1300 *
1301 * @returns the number of bytes occupied by the string in the destination.
1302 *
1303 * @param flags can have:
1304 *
1305 * <dl>
1306 * <dt>STR_TERMINATE <dd>means include the null termination.
1307 * <dt>STR_UPPER <dd>means uppercase in the destination.
1308 * <dt>STR_NOALIGN <dd>means don't do alignment.
1309 * </dl>
1310 *
1311 * @param dest_len is the maximum length allowed in the
1312 * destination.
1313 **/
1314
1315size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1316{
1317 size_t len=0;
1318 size_t src_len;
1319 size_t ret;
1320
1321 if (dest_len == (size_t)-1) {
1322 /* No longer allow dest_len of -1. */
1323 smb_panic("push_ucs2 - invalid dest_len of -1");
1324 }
1325
1326 if (flags & STR_TERMINATE)
1327 src_len = (size_t)-1;
1328 else
1329 src_len = strlen(src);
1330
1331 if (ucs2_align(base_ptr, dest, flags)) {
1332 *(char *)dest = 0;
1333 dest = (void *)((char *)dest + 1);
1334 if (dest_len)
1335 dest_len--;
1336 len++;
1337 }
1338
1339 /* ucs2 is always a multiple of 2 bytes */
1340 dest_len &= ~1;
1341
1342 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1343 if (ret == (size_t)-1) {
1344 if ((flags & STR_TERMINATE) &&
1345 dest &&
1346 dest_len) {
1347 *(char *)dest = 0;
1348 }
1349 return len;
1350 }
1351
1352 len += ret;
1353
1354 if (flags & STR_UPPER) {
1355 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1356 size_t i;
1357
1358 /* We check for i < (ret / 2) below as the dest string isn't null
1359 terminated if STR_TERMINATE isn't set. */
1360
1361 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1362 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1363 if (v != dest_ucs2[i]) {
1364 dest_ucs2[i] = v;
1365 }
1366 }
1367 }
1368
1369 return len;
1370}
1371
1372
1373/**
1374 * Copy a string from a unix char* src to a UCS2 destination,
1375 * allocating a buffer using talloc().
1376 *
1377 * @param dest always set at least to NULL
1378 *
1379 * @returns The number of bytes occupied by the string in the destination
1380 * or -1 in case of error.
1381 **/
1382size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1383{
1384 size_t src_len = strlen(src)+1;
1385
1386 *dest = NULL;
1387 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1388}
1389
1390
1391/**
1392 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1393 *
1394 * @param dest always set at least to NULL
1395 *
1396 * @returns The number of bytes occupied by the string in the destination
1397 * or -1 in case of error.
1398 **/
1399
1400size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1401{
1402 size_t dest_len, src_len = strlen(src)+1;
1403
1404 *dest = NULL;
1405 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1406 (void **)dest, &dest_len, True))
1407 return (size_t)-1;
1408 else
1409 return dest_len;
1410}
1411
1412/**
1413 Copy a string from a char* src to a UTF-8 destination.
1414 Return the number of bytes occupied by the string in the destination
1415 Flags can have:
1416 STR_TERMINATE means include the null termination
1417 STR_UPPER means uppercase in the destination
1418 dest_len is the maximum length allowed in the destination. If dest_len
1419 is -1 then no maxiumum is used.
1420**/
1421
1422static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1423{
1424 size_t src_len = 0;
1425 size_t ret;
1426 char *tmpbuf = NULL;
1427
1428 if (dest_len == (size_t)-1) {
1429 /* No longer allow dest_len of -1. */
1430 smb_panic("push_utf8 - invalid dest_len of -1");
1431 }
1432
1433 if (flags & STR_UPPER) {
1434 tmpbuf = strdup_upper(src);
1435 if (!tmpbuf) {
1436 return (size_t)-1;
1437 }
1438 src = tmpbuf;
1439 src_len = strlen(src);
1440 }
1441
1442 src_len = strlen(src);
1443 if (flags & STR_TERMINATE) {
1444 src_len++;
1445 }
1446
1447 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1448 SAFE_FREE(tmpbuf);
1449 return ret;
1450}
1451
1452size_t push_utf8_fstring(void *dest, const char *src)
1453{
1454 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1455}
1456
1457/**
1458 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1459 *
1460 * @param dest always set at least to NULL
1461 *
1462 * @returns The number of bytes occupied by the string in the destination
1463 **/
1464
1465size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1466{
1467 size_t src_len = strlen(src)+1;
1468
1469 *dest = NULL;
1470 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1471}
1472
1473/**
1474 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1475 *
1476 * @param dest always set at least to NULL
1477 *
1478 * @returns The number of bytes occupied by the string in the destination
1479 **/
1480
1481size_t push_utf8_allocate(char **dest, const char *src)
1482{
1483 size_t dest_len, src_len = strlen(src)+1;
1484
1485 *dest = NULL;
1486 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1487 (void **)dest, &dest_len, True))
1488 return (size_t)-1;
1489 else
1490 return dest_len;
1491}
1492
1493/**
1494 Copy a string from a ucs2 source to a unix char* destination.
1495 Flags can have:
1496 STR_TERMINATE means the string in src is null terminated.
1497 STR_NOALIGN means don't try to align.
1498 if STR_TERMINATE is set then src_len is ignored if it is -1.
1499 src_len is the length of the source area in bytes
1500 Return the number of bytes occupied by the string in src.
1501 The resulting string in "dest" is always null terminated.
1502**/
1503
1504size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1505{
1506 size_t ret;
1507
1508 if (dest_len == (size_t)-1) {
1509 /* No longer allow dest_len of -1. */
1510 smb_panic("pull_ucs2 - invalid dest_len of -1");
1511 }
1512
1513 if (!src_len) {
1514 if (dest && dest_len > 0) {
1515 dest[0] = '\0';
1516 }
1517 return 0;
1518 }
1519
1520 if (ucs2_align(base_ptr, src, flags)) {
1521 src = (const void *)((const char *)src + 1);
1522 if (src_len != (size_t)-1)
1523 src_len--;
1524 }
1525
1526 if (flags & STR_TERMINATE) {
1527 /* src_len -1 is the default for null terminated strings. */
1528 if (src_len != (size_t)-1) {
1529 size_t len = strnlen_w((const smb_ucs2_t *)src,
1530 src_len/2);
1531 if (len < src_len/2)
1532 len++;
1533 src_len = len*2;
1534 }
1535 }
1536
1537 /* ucs2 is always a multiple of 2 bytes */
1538 if (src_len != (size_t)-1)
1539 src_len &= ~1;
1540
1541 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1542 if (ret == (size_t)-1) {
1543 ret = 0;
1544 dest_len = 0;
1545 }
1546
1547 if (src_len == (size_t)-1)
1548 src_len = ret*2;
1549
1550 if (dest_len && ret) {
1551 /* Did we already process the terminating zero ? */
1552 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1553 dest[MIN(ret, dest_len-1)] = 0;
1554 }
1555 } else {
1556 dest[0] = 0;
1557 }
1558
1559 return src_len;
1560}
1561
1562/**
1563 Copy a string from a ucs2 source to a unix char* destination.
1564 Talloc version with a base pointer.
1565 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1566 needs fixing. JRA).
1567 Flags can have:
1568 STR_TERMINATE means the string in src is null terminated.
1569 STR_NOALIGN means don't try to align.
1570 if STR_TERMINATE is set then src_len is ignored if it is -1.
1571 src_len is the length of the source area in bytes
1572 Return the number of bytes occupied by the string in src.
1573 The resulting string in "dest" is always null terminated.
1574**/
1575
1576size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1577 const void *base_ptr,
1578 char **ppdest,
1579 const void *src,
1580 size_t src_len,
1581 int flags)
1582{
1583 char *dest;
1584 size_t dest_len;
1585
1586 *ppdest = NULL;
1587
1588#ifdef DEVELOPER
1589 /* Ensure we never use the braindead "malloc" varient. */
1590 if (ctx == NULL) {
1591 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1592 }
1593#endif
1594
1595 if (!src_len) {
1596 return 0;
1597 }
1598
1599 if (ucs2_align(base_ptr, src, flags)) {
1600 src = (const void *)((const char *)src + 1);
1601 if (src_len != (size_t)-1)
1602 src_len--;
1603 }
1604
1605 if (flags & STR_TERMINATE) {
1606 /* src_len -1 is the default for null terminated strings. */
1607 if (src_len != (size_t)-1) {
1608 size_t len = strnlen_w((const smb_ucs2_t *)src,
1609 src_len/2);
1610 if (len < src_len/2)
1611 len++;
1612 src_len = len*2;
1613 } else {
1614 /*
1615 * src_len == -1 - alloc interface won't take this
1616 * so we must calculate.
1617 */
1618 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1619 }
1620 /* Ensure we don't use an insane length from the client. */
1621 if (src_len >= 1024*1024) {
1622 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1623 }
1624 } else {
1625 /* Can't have an unlimited length
1626 * non STR_TERMINATE'd.
1627 */
1628 if (src_len == (size_t)-1) {
1629 errno = EINVAL;
1630 return 0;
1631 }
1632 }
1633
1634 /* src_len != -1 here. */
1635
1636 /* ucs2 is always a multiple of 2 bytes */
1637 src_len &= ~1;
1638
1639 dest_len = convert_string_talloc(ctx,
1640 CH_UTF16LE,
1641 CH_UNIX,
1642 src,
1643 src_len,
1644 (void *)&dest,
1645 True);
1646 if (dest_len == (size_t)-1) {
1647 dest_len = 0;
1648 }
1649
1650 if (dest_len) {
1651 /* Did we already process the terminating zero ? */
1652 if (dest[dest_len-1] != 0) {
1653 size_t size = talloc_get_size(dest);
1654 /* Have we got space to append the '\0' ? */
1655 if (size <= dest_len) {
1656 /* No, realloc. */
1657 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1658 dest_len+1);
1659 if (!dest) {
1660 /* talloc fail. */
1661 dest_len = (size_t)-1;
1662 return 0;
1663 }
1664 }
1665 /* Yay - space ! */
1666 dest[dest_len] = '\0';
1667 dest_len++;
1668 }
1669 } else if (dest) {
1670 dest[0] = 0;
1671 }
1672
1673 *ppdest = dest;
1674 return src_len;
1675}
1676
1677size_t pull_ucs2_fstring(char *dest, const void *src)
1678{
1679 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1680}
1681
1682/**
1683 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1684 *
1685 * @param dest always set at least to NULL
1686 *
1687 * @returns The number of bytes occupied by the string in the destination
1688 **/
1689
1690size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1691{
1692 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1693 *dest = NULL;
1694 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1695}
1696
1697/**
1698 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1699 *
1700 * @param dest always set at least to NULL
1701 *
1702 * @returns The number of bytes occupied by the string in the destination
1703 **/
1704
1705size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1706{
1707 size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1708 *dest = NULL;
1709 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1710 (void **)dest, &dest_len, True))
1711 return (size_t)-1;
1712 else
1713 return dest_len;
1714}
1715
1716/**
1717 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1718 *
1719 * @param dest always set at least to NULL
1720 *
1721 * @returns The number of bytes occupied by the string in the destination
1722 **/
1723
1724size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1725{
1726 size_t src_len = strlen(src)+1;
1727 *dest = NULL;
1728 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1729}
1730
1731/**
1732 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1733 *
1734 * @param dest always set at least to NULL
1735 *
1736 * @returns The number of bytes occupied by the string in the destination
1737 **/
1738
1739size_t pull_utf8_allocate(char **dest, const char *src)
1740{
1741 size_t dest_len, src_len = strlen(src)+1;
1742 *dest = NULL;
1743 if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1744 (void **)dest, &dest_len, True))
1745 return (size_t)-1;
1746 else
1747 return dest_len;
1748}
1749
1750/**
1751 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1752 *
1753 * @param dest always set at least to NULL
1754 *
1755 * @returns The number of bytes occupied by the string in the destination
1756 **/
1757
1758size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1759{
1760 size_t src_len = strlen(src)+1;
1761 *dest = NULL;
1762 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1763}
1764
1765/**
1766 Copy a string from a char* src to a unicode or ascii
1767 dos codepage destination choosing unicode or ascii based on the
1768 flags in the SMB buffer starting at base_ptr.
1769 Return the number of bytes occupied by the string in the destination.
1770 flags can have:
1771 STR_TERMINATE means include the null termination.
1772 STR_UPPER means uppercase in the destination.
1773 STR_ASCII use ascii even with unicode packet.
1774 STR_NOALIGN means don't do alignment.
1775 dest_len is the maximum length allowed in the destination. If dest_len
1776 is -1 then no maxiumum is used.
1777**/
1778
1779size_t push_string_fn(const char *function, unsigned int line,
1780 const void *base_ptr, uint16 flags2,
1781 void *dest, const char *src,
1782 size_t dest_len, int flags)
1783{
1784#ifdef DEVELOPER
1785 /* We really need to zero fill here, not clobber
1786 * region, as we want to ensure that valgrind thinks
1787 * all of the outgoing buffer has been written to
1788 * so a send() or write() won't trap an error.
1789 * JRA.
1790 */
1791#if 0
1792 clobber_region(function, line, dest, dest_len);
1793#else
1794 memset(dest, '\0', dest_len);
1795#endif
1796#endif
1797
1798 if (!(flags & STR_ASCII) && \
1799 ((flags & STR_UNICODE || \
1800 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1801 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1802 }
1803 return push_ascii(dest, src, dest_len, flags);
1804}
1805
1806
1807/**
1808 Copy a string from a unicode or ascii source (depending on
1809 the packet flags) to a char* destination.
1810 Flags can have:
1811 STR_TERMINATE means the string in src is null terminated.
1812 STR_UNICODE means to force as unicode.
1813 STR_ASCII use ascii even with unicode packet.
1814 STR_NOALIGN means don't do alignment.
1815 if STR_TERMINATE is set then src_len is ignored is it is -1
1816 src_len is the length of the source area in bytes.
1817 Return the number of bytes occupied by the string in src.
1818 The resulting string in "dest" is always null terminated.
1819**/
1820
1821size_t pull_string_fn(const char *function,
1822 unsigned int line,
1823 const void *base_ptr,
1824 uint16 smb_flags2,
1825 char *dest,
1826 const void *src,
1827 size_t dest_len,
1828 size_t src_len,
1829 int flags)
1830{
1831#ifdef DEVELOPER
1832 clobber_region(function, line, dest, dest_len);
1833#endif
1834
1835 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1836 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1837 "UNICODE defined");
1838 }
1839
1840 if (!(flags & STR_ASCII) && \
1841 ((flags & STR_UNICODE || \
1842 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1843 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1844 }
1845 return pull_ascii(dest, src, dest_len, src_len, flags);
1846}
1847
1848/**
1849 Copy a string from a unicode or ascii source (depending on
1850 the packet flags) to a char* destination.
1851 Variant that uses talloc.
1852 Flags can have:
1853 STR_TERMINATE means the string in src is null terminated.
1854 STR_UNICODE means to force as unicode.
1855 STR_ASCII use ascii even with unicode packet.
1856 STR_NOALIGN means don't do alignment.
1857 if STR_TERMINATE is set then src_len is ignored is it is -1
1858 src_len is the length of the source area in bytes.
1859 Return the number of bytes occupied by the string in src.
1860 The resulting string in "dest" is always null terminated.
1861**/
1862
1863size_t pull_string_talloc_fn(const char *function,
1864 unsigned int line,
1865 TALLOC_CTX *ctx,
1866 const void *base_ptr,
1867 uint16 smb_flags2,
1868 char **ppdest,
1869 const void *src,
1870 size_t src_len,
1871 int flags)
1872{
1873 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1874 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1875 "UNICODE defined");
1876 }
1877
1878 if (!(flags & STR_ASCII) && \
1879 ((flags & STR_UNICODE || \
1880 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1881 return pull_ucs2_base_talloc(ctx,
1882 base_ptr,
1883 ppdest,
1884 src,
1885 src_len,
1886 flags);
1887 }
1888 return pull_ascii_base_talloc(ctx,
1889 ppdest,
1890 src,
1891 src_len,
1892 flags);
1893}
1894
1895
1896size_t align_string(const void *base_ptr, const char *p, int flags)
1897{
1898 if (!(flags & STR_ASCII) && \
1899 ((flags & STR_UNICODE || \
1900 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1901 return ucs2_align(base_ptr, p, flags);
1902 }
1903 return 0;
1904}
1905
1906/*
1907 Return the unicode codepoint for the next multi-byte CH_UNIX character
1908 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1909
1910 Also return the number of bytes consumed (which tells the caller
1911 how many bytes to skip to get to the next CH_UNIX character).
1912
1913 Return INVALID_CODEPOINT if the next character cannot be converted.
1914*/
1915
1916codepoint_t next_codepoint(const char *str, size_t *size)
1917{
1918 /* It cannot occupy more than 4 bytes in UTF16 format */
1919 uint8_t buf[4];
1920 smb_iconv_t descriptor;
1921 size_t ilen_orig;
1922 size_t ilen;
1923 size_t olen;
1924 char *outbuf;
1925
1926 if ((str[0] & 0x80) == 0) {
1927 *size = 1;
1928 return (codepoint_t)str[0];
1929 }
1930
1931 /* We assume that no multi-byte character can take
1932 more than 5 bytes. This is OK as we only
1933 support codepoints up to 1M */
1934
1935 ilen_orig = strnlen(str, 5);
1936 ilen = ilen_orig;
1937
1938 lazy_initialize_conv();
1939
1940 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1941 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1942 *size = 1;
1943 return INVALID_CODEPOINT;
1944 }
1945
1946 /* This looks a little strange, but it is needed to cope
1947 with codepoints above 64k which are encoded as per RFC2781. */
1948 olen = 2;
1949 outbuf = (char *)buf;
1950 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1951 if (olen == 2) {
1952 /* We failed to convert to a 2 byte character.
1953 See if we can convert to a 4 UTF16-LE byte char encoding.
1954 */
1955 olen = 4;
1956 outbuf = (char *)buf;
1957 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1958 if (olen == 4) {
1959 /* We didn't convert any bytes */
1960 *size = 1;
1961 return INVALID_CODEPOINT;
1962 }
1963 olen = 4 - olen;
1964 } else {
1965 olen = 2 - olen;
1966 }
1967
1968 *size = ilen_orig - ilen;
1969
1970 if (olen == 2) {
1971 /* 2 byte, UTF16-LE encoded value. */
1972 return (codepoint_t)SVAL(buf, 0);
1973 }
1974 if (olen == 4) {
1975 /* Decode a 4 byte UTF16-LE character manually.
1976 See RFC2871 for the encoding machanism.
1977 */
1978 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1979 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1980
1981 return (codepoint_t)0x10000 +
1982 (w1 << 10) + w2;
1983 }
1984
1985 /* no other length is valid */
1986 return INVALID_CODEPOINT;
1987}
Note: See TracBrowser for help on using the repository browser.