source: branches/samba-3.3.x/source/lib/charcnv33.c@ 374

Last change on this file since 374 was 374, checked in by Herwig Bauernfeind, 16 years ago

Update Samba 3.3 to 3.3.10 (new files)

File size: 51.2 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29 return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57 const char *ret = NULL;
58 if (ch == CH_UTF16LE) ret = "UTF-16LE";
59 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60 else if (ch == CH_UNIX) ret = lp_unix_charset();
61 else if (ch == CH_DOS) ret = lp_dos_charset();
62 else if (ch == CH_DISPLAY) ret = lp_display_charset();
63 else if (ch == CH_UTF8) ret = "UTF8";
64
65#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret && !strcmp(ret, "LOCALE")) {
67 const char *ln = NULL;
68
69#ifdef HAVE_SETLOCALE
70 setlocale(LC_ALL, "");
71#endif
72 ln = nl_langinfo(CODESET);
73 if (ln) {
74 /* Check whether the charset name is supported
75 by iconv */
76 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77 if (handle == (smb_iconv_t) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79 ln = NULL;
80 } else {
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82 smb_iconv_close(handle);
83 }
84 }
85 ret = ln;
86 }
87#endif
88
89 if (!ret || !*ret) ret = "ASCII";
90 return ret;
91}
92
93void lazy_initialize_conv(void)
94{
95 if (!initialized) {
96 load_case_tables();
97 init_iconv();
98 initialized = true;
99 }
100}
101
102/**
103 * Destroy global objects allocated by init_iconv()
104 **/
105void gfree_charcnv(void)
106{
107 int c1, c2;
108
109 for (c1=0;c1<NUM_CHARSETS;c1++) {
110 for (c2=0;c2<NUM_CHARSETS;c2++) {
111 if ( conv_handles[c1][c2] ) {
112 smb_iconv_close( conv_handles[c1][c2] );
113 conv_handles[c1][c2] = 0;
114 }
115 }
116 }
117 initialized = false;
118}
119
120/**
121 * Initialize iconv conversion descriptors.
122 *
123 * This is called the first time it is needed, and also called again
124 * every time the configuration is reloaded, because the charset or
125 * codepage might have changed.
126 **/
127void init_iconv(void)
128{
129 int c1, c2;
130 bool did_reload = False;
131
132 /* so that charset_name() works we need to get the UNIX<->UCS2 going
133 first */
134 if (!conv_handles[CH_UNIX][CH_UTF16LE])
135 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137 if (!conv_handles[CH_UTF16LE][CH_UNIX])
138 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140 for (c1=0;c1<NUM_CHARSETS;c1++) {
141 for (c2=0;c2<NUM_CHARSETS;c2++) {
142 const char *n1 = charset_name((charset_t)c1);
143 const char *n2 = charset_name((charset_t)c2);
144 if (conv_handles[c1][c2] &&
145 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147 continue;
148
149 did_reload = True;
150
151 if (conv_handles[c1][c2])
152 smb_iconv_close(conv_handles[c1][c2]);
153
154 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157 charset_name((charset_t)c1), charset_name((charset_t)c2)));
158 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159 n1 = "ASCII";
160 }
161 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162 n2 = "ASCII";
163 }
164 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165 n1, n2 ));
166 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167 if (!conv_handles[c1][c2]) {
168 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169 smb_panic("init_iconv: conv_handle initialization failed");
170 }
171 }
172 }
173 }
174
175 if (did_reload) {
176 /* XXX: Does this really get called every time the dos
177 * codepage changes? */
178 /* XXX: Is the did_reload test too strict? */
179 conv_silent = True;
180 init_valid_table();
181 conv_silent = False;
182 }
183}
184
185/**
186 * Convert string from one encoding to another, making error checking etc
187 * Slow path version - uses (slow) iconv.
188 *
189 * @param src pointer to source string (multibyte or singlebyte)
190 * @param srclen length of the source string in bytes
191 * @param dest pointer to destination string (multibyte or singlebyte)
192 * @param destlen maximal length allowed for string
193 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194 * @returns the number of bytes occupied in the destination
195 *
196 * Ensure the srclen contains the terminating zero.
197 *
198 **/
199
200static size_t convert_string_internal(charset_t from, charset_t to,
201 void const *src, size_t srclen,
202 void *dest, size_t destlen, bool allow_bad_conv)
203{
204 size_t i_len, o_len;
205 size_t retval;
206 const char* inbuf = (const char*)src;
207 char* outbuf = (char*)dest;
208 smb_iconv_t descriptor;
209
210 lazy_initialize_conv();
211
212 descriptor = conv_handles[from][to];
213
214 if (srclen == (size_t)-1) {
215 if (from == CH_UTF16LE || from == CH_UTF16BE) {
216 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
217 } else {
218 srclen = strlen((const char *)src)+1;
219 }
220 }
221
222
223 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
224 if (!conv_silent)
225 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226 return (size_t)-1;
227 }
228
229 i_len=srclen;
230 o_len=destlen;
231
232 again:
233
234 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235 if(retval==(size_t)-1) {
236 const char *reason="unknown error";
237 switch(errno) {
238 case EINVAL:
239 reason="Incomplete multibyte sequence";
240 if (!conv_silent)
241 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242 if (allow_bad_conv)
243 goto use_as_is;
244 return (size_t)-1;
245 case E2BIG:
246 reason="No more room";
247 if (!conv_silent) {
248 if (from == CH_UNIX) {
249 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250 charset_name(from), charset_name(to),
251 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252 } else {
253 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254 charset_name(from), charset_name(to),
255 (unsigned int)srclen, (unsigned int)destlen));
256 }
257 }
258 break;
259 case EILSEQ:
260 reason="Illegal multibyte sequence";
261 if (!conv_silent)
262 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263 if (allow_bad_conv)
264 goto use_as_is;
265
266 return (size_t)-1;
267 default:
268 if (!conv_silent)
269 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270 return (size_t)-1;
271 }
272 /* smb_panic(reason); */
273 }
274 return destlen-o_len;
275
276 use_as_is:
277
278 /*
279 * Conversion not supported. This is actually an error, but there are so
280 * many misconfigured iconv systems and smb.conf's out there we can't just
281 * fail. Do a very bad conversion instead.... JRA.
282 */
283
284 {
285 if (o_len == 0 || i_len == 0)
286 return destlen - o_len;
287
288 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
289 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
290 /* Can't convert from utf16 any endian to multibyte.
291 Replace with the default fail char.
292 */
293 if (i_len < 2)
294 return destlen - o_len;
295 if (i_len >= 2) {
296 *outbuf = lp_failed_convert_char();
297
298 outbuf++;
299 o_len--;
300
301 inbuf += 2;
302 i_len -= 2;
303 }
304
305 if (o_len == 0 || i_len == 0)
306 return destlen - o_len;
307
308 /* Keep trying with the next char... */
309 goto again;
310
311 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312 /* Can't convert to UTF16LE - just widen by adding the
313 default fail char then zero.
314 */
315 if (o_len < 2)
316 return destlen - o_len;
317
318 outbuf[0] = lp_failed_convert_char();
319 outbuf[1] = '\0';
320
321 inbuf++;
322 i_len--;
323
324 outbuf += 2;
325 o_len -= 2;
326
327 if (o_len == 0 || i_len == 0)
328 return destlen - o_len;
329
330 /* Keep trying with the next char... */
331 goto again;
332
333 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334 to != CH_UTF16LE && to != CH_UTF16BE) {
335 /* Failed multibyte to multibyte. Just copy the default fail char and
336 try again. */
337 outbuf[0] = lp_failed_convert_char();
338
339 inbuf++;
340 i_len--;
341
342 outbuf++;
343 o_len--;
344
345 if (o_len == 0 || i_len == 0)
346 return destlen - o_len;
347
348 /* Keep trying with the next char... */
349 goto again;
350
351 } else {
352 /* Keep compiler happy.... */
353 return destlen - o_len;
354 }
355 }
356}
357
358/**
359 * Convert string from one encoding to another, making error checking etc
360 * Fast path version - handles ASCII first.
361 *
362 * @param src pointer to source string (multibyte or singlebyte)
363 * @param srclen length of the source string in bytes, or -1 for nul terminated.
364 * @param dest pointer to destination string (multibyte or singlebyte)
365 * @param destlen maximal length allowed for string - *NEVER* -1.
366 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367 * @returns the number of bytes occupied in the destination
368 *
369 * Ensure the srclen contains the terminating zero.
370 *
371 * This function has been hand-tuned to provide a fast path.
372 * Don't change unless you really know what you are doing. JRA.
373 **/
374
375size_t convert_string(charset_t from, charset_t to,
376 void const *src, size_t srclen,
377 void *dest, size_t destlen, bool allow_bad_conv)
378{
379 /*
380 * NB. We deliberately don't do a strlen here if srclen == -1.
381 * This is very expensive over millions of calls and is taken
382 * care of in the slow path in convert_string_internal. JRA.
383 */
384
385#ifdef DEVELOPER
386 SMB_ASSERT(destlen != (size_t)-1);
387#endif
388
389 if (srclen == 0)
390 return 0;
391
392 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393 const unsigned char *p = (const unsigned char *)src;
394 unsigned char *q = (unsigned char *)dest;
395 size_t slen = srclen;
396 size_t dlen = destlen;
397 unsigned char lastp = '\0';
398 size_t retval = 0;
399
400 /* If all characters are ascii, fast path here. */
401 while (slen && dlen) {
402 if ((lastp = *p) <= 0x7f) {
403 *q++ = *p++;
404 if (slen != (size_t)-1) {
405 slen--;
406 }
407 dlen--;
408 retval++;
409 if (!lastp)
410 break;
411 } else {
412#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413 goto general_case;
414#else
415 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416 if (ret == (size_t)-1) {
417 return ret;
418 }
419 return retval + ret;
420#endif
421 }
422 }
423 if (!dlen) {
424 /* Even if we fast path we should note if we ran out of room. */
425 if (((slen != (size_t)-1) && slen) ||
426 ((slen == (size_t)-1) && lastp)) {
427 errno = E2BIG;
428 }
429 }
430 return retval;
431// DEBUG(10, ("convert_string: 3"));
432
433 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
434 const unsigned char *p = (const unsigned char *)src;
435 unsigned char *q = (unsigned char *)dest;
436 size_t retval = 0;
437 size_t slen = srclen;
438 size_t dlen = destlen;
439 unsigned char lastp = '\0';
440
441 /* If all characters are ascii, fast path here. */
442 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
443 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
444 *q++ = *p;
445 if (slen != (size_t)-1) {
446 slen -= 2;
447 }
448 p += 2;
449 dlen--;
450 retval++;
451 if (!lastp)
452 break;
453 } else {
454#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
455 goto general_case;
456#else
457 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
458 if (ret == (size_t)-1) {
459 return ret;
460 }
461 return retval + ret;
462#endif
463 }
464 }
465 if (!dlen) {
466 /* Even if we fast path we should note if we ran out of room. */
467 if (((slen != (size_t)-1) && slen) ||
468 ((slen == (size_t)-1) && lastp)) {
469 errno = E2BIG;
470 }
471 }
472 return retval;
473
474 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
475 const unsigned char *p = (const unsigned char *)src;
476 unsigned char *q = (unsigned char *)dest;
477 size_t retval = 0;
478 size_t slen = srclen;
479 size_t dlen = destlen;
480 unsigned char lastp = '\0';
481
482 /* If all characters are ascii, fast path here. */
483 while (slen && (dlen >= 2)) {
484 if ((lastp = *p) <= 0x7F) {
485 *q++ = *p++;
486 *q++ = '\0';
487 if (slen != (size_t)-1) {
488 slen--;
489 }
490 dlen -= 2;
491 retval += 2;
492 if (!lastp)
493 break;
494 } else {
495#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
496 goto general_case;
497#else
498 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
499 if (ret == (size_t)-1) {
500 return ret;
501 }
502 return retval + ret;
503#endif
504 }
505 }
506 if (!dlen) {
507 /* Even if we fast path we should note if we ran out of room. */
508 if (((slen != (size_t)-1) && slen) ||
509 ((slen == (size_t)-1) && lastp)) {
510 errno = E2BIG;
511 }
512 }
513 return retval;
514 }
515
516#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517 general_case:
518#endif
519 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
520}
521
522/**
523 * Convert between character sets, allocating a new buffer for the result.
524 *
525 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
526 * (this is a bad interface and needs fixing. JRA).
527 * @param srclen length of source buffer.
528 * @param dest always set at least to NULL
529 * @param converted_size set to the size of the allocated buffer on return
530 * true
531 * @note -1 is not accepted for srclen.
532 *
533 * @return true if new buffer was correctly allocated, and string was
534 * converted.
535 *
536 * Ensure the srclen contains the terminating zero.
537 *
538 * I hate the goto's in this function. It's embarressing.....
539 * There has to be a cleaner way to do this. JRA.
540 **/
541
542bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
543 void const *src, size_t srclen, void *dst,
544 size_t *converted_size, bool allow_bad_conv)
545{
546 size_t i_len, o_len, destlen = (srclen * 3) / 2;
547 size_t retval;
548 const char *inbuf = (const char *)src;
549 char *outbuf = NULL, *ob = NULL;
550 smb_iconv_t descriptor;
551 void **dest = (void **)dst;
552
553 *dest = NULL;
554
555 if (!converted_size) {
556 errno = EINVAL;
557 return false;
558 }
559
560 if (src == NULL || srclen == (size_t)-1) {
561 errno = EINVAL;
562 return false;
563 }
564 if (srclen == 0) {
565 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
566 if (ob == NULL) {
567 errno = ENOMEM;
568 return false;
569 }
570 *dest = ob;
571 *converted_size = 0;
572 return true;
573 }
574
575 lazy_initialize_conv();
576
577 descriptor = conv_handles[from][to];
578
579 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
580 if (!conv_silent)
581 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
582 errno = EOPNOTSUPP;
583 return false;
584 }
585
586 convert:
587
588 /* +2 is for ucs2 null termination. */
589 if ((destlen*2)+2 < destlen) {
590 /* wrapped ! abort. */
591 if (!conv_silent)
592 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
593 if (!ctx)
594 SAFE_FREE(outbuf);
595 errno = EOPNOTSUPP;
596 return false;
597 } else {
598 destlen = destlen * 2;
599 }
600
601 /* +2 is for ucs2 null termination. */
602 if (ctx) {
603 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
604 } else {
605 ob = (char *)SMB_REALLOC(ob, destlen + 2);
606 }
607
608 if (!ob) {
609 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
610 errno = ENOMEM;
611 return false;
612 }
613 outbuf = ob;
614 i_len = srclen;
615 o_len = destlen;
616
617 again:
618
619
620 retval = smb_iconv(descriptor,
621 &inbuf, &i_len,
622 &outbuf, &o_len);
623 if(retval == (size_t)-1) {
624 const char *reason="unknown error";
625 switch(errno) {
626 case EINVAL:
627 reason="Incomplete multibyte sequence";
628 if (!conv_silent)
629 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
630 if (allow_bad_conv)
631 goto use_as_is;
632 break;
633 case E2BIG:
634 goto convert;
635 case EILSEQ:
636 reason="Illegal multibyte sequence";
637 if (!conv_silent)
638 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
639 if (allow_bad_conv)
640 goto use_as_is;
641 break;
642 }
643 if (!conv_silent)
644 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
645 /* smb_panic(reason); */
646 if (ctx) {
647 TALLOC_FREE(ob);
648 } else {
649 SAFE_FREE(ob);
650 }
651 return false;
652 }
653
654 out:
655
656 destlen = destlen - o_len;
657 /* Don't shrink unless we're reclaiming a lot of
658 * space. This is in the hot codepath and these
659 * reallocs *cost*. JRA.
660 */
661 if (o_len > 1024) {
662 /* We're shrinking here so we know the +2 is safe from wrap. */
663 if (ctx) {
664 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
665 } else {
666 ob = (char *)SMB_REALLOC(ob,destlen + 2);
667 }
668 }
669
670 if (destlen && !ob) {
671 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
672 errno = ENOMEM;
673 return false;
674 }
675
676 *dest = ob;
677
678 /* Must ucs2 null terminate in the extra space we allocated. */
679 ob[destlen] = '\0';
680 ob[destlen+1] = '\0';
681
682 *converted_size = destlen;
683 return true;
684
685 use_as_is:
686
687 /*
688 * Conversion not supported. This is actually an error, but there are so
689 * many misconfigured iconv systems and smb.conf's out there we can't just
690 * fail. Do a very bad conversion instead.... JRA.
691 */
692
693 {
694 if (o_len == 0 || i_len == 0)
695 goto out;
696
697 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
698 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
699 /* Can't convert from utf16 any endian to multibyte.
700 Replace with the default fail char.
701 */
702
703 if (i_len < 2)
704 goto out;
705
706 if (i_len >= 2) {
707 *outbuf = lp_failed_convert_char();
708
709 outbuf++;
710 o_len--;
711
712 inbuf += 2;
713 i_len -= 2;
714 }
715
716 if (o_len == 0 || i_len == 0)
717 goto out;
718
719 /* Keep trying with the next char... */
720 goto again;
721
722 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
723 /* Can't convert to UTF16LE - just widen by adding the
724 default fail char then zero.
725 */
726 if (o_len < 2)
727 goto out;
728
729 outbuf[0] = lp_failed_convert_char();
730 outbuf[1] = '\0';
731
732 inbuf++;
733 i_len--;
734
735 outbuf += 2;
736 o_len -= 2;
737
738 if (o_len == 0 || i_len == 0)
739 goto out;
740
741 /* Keep trying with the next char... */
742 goto again;
743
744 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
745 to != CH_UTF16LE && to != CH_UTF16BE) {
746 /* Failed multibyte to multibyte. Just copy the default fail char and
747 try again. */
748 outbuf[0] = lp_failed_convert_char();
749
750 inbuf++;
751 i_len--;
752
753 outbuf++;
754 o_len--;
755
756 if (o_len == 0 || i_len == 0)
757 goto out;
758
759 /* Keep trying with the next char... */
760 goto again;
761
762 } else {
763 /* Keep compiler happy.... */
764 goto out;
765 }
766 }
767}
768
769/**
770 * Convert between character sets, allocating a new buffer using talloc for the result.
771 *
772 * @param srclen length of source buffer.
773 * @param dest always set at least to NULL
774 * @parm converted_size set to the number of bytes occupied by the string in
775 * the destination on success.
776 * @note -1 is not accepted for srclen.
777 *
778 * @return true if new buffer was correctly allocated, and string was
779 * converted.
780 */
781bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
782 void const *src, size_t srclen, void *dst,
783 size_t *converted_size, bool allow_bad_conv)
784{
785 void **dest = (void **)dst;
786
787 *dest = NULL;
788 return convert_string_allocate(ctx, from, to, src, srclen, dest,
789 converted_size, allow_bad_conv);
790}
791
792size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
793{
794 size_t size;
795 smb_ucs2_t *buffer;
796
797 if (!push_ucs2_allocate(&buffer, src, &size)) {
798 return (size_t)-1;
799 }
800
801 if (!strupper_w(buffer) && (dest == src)) {
802 free(buffer);
803 return srclen;
804 }
805
806 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
807 free(buffer);
808 return size;
809}
810
811/**
812 strdup() a unix string to upper case.
813**/
814
815char *strdup_upper(const char *s)
816{
817 char *out_buffer = SMB_STRDUP(s);
818 const unsigned char *p = (const unsigned char *)s;
819 unsigned char *q = (unsigned char *)out_buffer;
820
821 if (!q) {
822 return NULL;
823 }
824
825 /* this is quite a common operation, so we want it to be
826 fast. We optimise for the ascii case, knowing that all our
827 supported multi-byte character sets are ascii-compatible
828 (ie. they match for the first 128 chars) */
829
830 while (*p) {
831 if (*p & 0x80)
832 break;
833 *q++ = toupper_ascii_fast(*p);
834 p++;
835 }
836
837 if (*p) {
838 /* MB case. */
839 size_t converted_size, converted_size2;
840 smb_ucs2_t *buffer = NULL;
841
842 SAFE_FREE(out_buffer);
843 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
844 strlen(s) + 1,
845 (void **)(void *)&buffer,
846 &converted_size, True))
847 {
848 return NULL;
849 }
850
851 strupper_w(buffer);
852
853 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
854 converted_size,
855 (void **)(void *)&out_buffer,
856 &converted_size2, True))
857 {
858 TALLOC_FREE(buffer);
859 return NULL;
860 }
861
862 /* Don't need the intermediate buffer
863 * anymore.
864 */
865 TALLOC_FREE(buffer);
866 }
867
868 return out_buffer;
869}
870
871/**
872 talloc_strdup() a unix string to upper case.
873**/
874
875char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
876{
877 char *out_buffer = talloc_strdup(ctx,s);
878 const unsigned char *p = (const unsigned char *)s;
879 unsigned char *q = (unsigned char *)out_buffer;
880
881 if (!q) {
882 return NULL;
883 }
884
885 /* this is quite a common operation, so we want it to be
886 fast. We optimise for the ascii case, knowing that all our
887 supported multi-byte character sets are ascii-compatible
888 (ie. they match for the first 128 chars) */
889
890 while (*p) {
891 if (*p & 0x80)
892 break;
893 *q++ = toupper_ascii_fast(*p);
894 p++;
895 }
896
897 if (*p) {
898 /* MB case. */
899 size_t converted_size, converted_size2;
900 smb_ucs2_t *ubuf = NULL;
901
902 /* We're not using the ascii buffer above. */
903 TALLOC_FREE(out_buffer);
904
905 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
906 strlen(s)+1, (void *)&ubuf,
907 &converted_size, True))
908 {
909 return NULL;
910 }
911
912 strupper_w(ubuf);
913
914 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
915 converted_size, (void *)&out_buffer,
916 &converted_size2, True))
917 {
918 TALLOC_FREE(ubuf);
919 return NULL;
920 }
921
922 /* Don't need the intermediate buffer
923 * anymore.
924 */
925 TALLOC_FREE(ubuf);
926 }
927
928 return out_buffer;
929}
930
931size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
932{
933 size_t size;
934 smb_ucs2_t *buffer = NULL;
935
936 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
937 (void **)(void *)&buffer, &size,
938 True))
939 {
940 smb_panic("failed to create UCS2 buffer");
941 }
942 if (!strlower_w(buffer) && (dest == src)) {
943 SAFE_FREE(buffer);
944 return srclen;
945 }
946 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
947 SAFE_FREE(buffer);
948 return size;
949}
950
951/**
952 strdup() a unix string to lower case.
953**/
954
955char *strdup_lower(const char *s)
956{
957 size_t converted_size;
958 smb_ucs2_t *buffer = NULL;
959 char *out_buffer;
960
961 if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
962 return NULL;
963 }
964
965 strlower_w(buffer);
966
967 if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
968 SAFE_FREE(buffer);
969 return NULL;
970 }
971
972 SAFE_FREE(buffer);
973
974 return out_buffer;
975}
976
977char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
978{
979 size_t converted_size;
980 smb_ucs2_t *buffer = NULL;
981 char *out_buffer;
982
983 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
984 return NULL;
985 }
986
987 strlower_w(buffer);
988
989 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
990 TALLOC_FREE(buffer);
991 return NULL;
992 }
993
994 TALLOC_FREE(buffer);
995
996 return out_buffer;
997}
998
999
1000size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1001{
1002 if (flags & (STR_NOALIGN|STR_ASCII))
1003 return 0;
1004 return PTR_DIFF(p, base_ptr) & 1;
1005}
1006
1007
1008/**
1009 * Copy a string from a char* unix src to a dos codepage string destination.
1010 *
1011 * @return the number of bytes occupied by the string in the destination.
1012 *
1013 * @param flags can include
1014 * <dl>
1015 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1016 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1017 * </dl>
1018 *
1019 * @param dest_len the maximum length in bytes allowed in the
1020 * destination.
1021 **/
1022size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1023{
1024 size_t src_len = strlen(src);
1025 char *tmpbuf = NULL;
1026 size_t ret;
1027
1028 /* No longer allow a length of -1. */
1029 if (dest_len == (size_t)-1) {
1030 smb_panic("push_ascii - dest_len == -1");
1031 }
1032
1033 if (flags & STR_UPPER) {
1034 tmpbuf = SMB_STRDUP(src);
1035 if (!tmpbuf) {
1036 smb_panic("malloc fail");
1037 }
1038 strupper_m(tmpbuf);
1039 src = tmpbuf;
1040 }
1041
1042 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1043 src_len++;
1044 }
1045
1046 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1047 if (ret == (size_t)-1 &&
1048 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1049 && dest_len > 0) {
1050 ((char *)dest)[0] = '\0';
1051 }
1052 SAFE_FREE(tmpbuf);
1053 return ret;
1054}
1055
1056size_t push_ascii_fstring(void *dest, const char *src)
1057{
1058 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1059}
1060
1061/********************************************************************
1062 Push an nstring - ensure null terminated. Written by
1063 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1064********************************************************************/
1065
1066size_t push_ascii_nstring(void *dest, const char *src)
1067{
1068 size_t i, buffer_len, dest_len;
1069 smb_ucs2_t *buffer;
1070
1071 conv_silent = True;
1072 if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1073 smb_panic("failed to create UCS2 buffer");
1074 }
1075
1076 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1077 buffer_len /= sizeof(smb_ucs2_t);
1078
1079 dest_len = 0;
1080 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1081 unsigned char mb[10];
1082 /* Convert one smb_ucs2_t character at a time. */
1083 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1084 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1085 memcpy((char *)dest + dest_len, mb, mb_len);
1086 dest_len += mb_len;
1087 } else {
1088 errno = E2BIG;
1089 break;
1090 }
1091 }
1092 ((char *)dest)[dest_len] = '\0';
1093
1094 SAFE_FREE(buffer);
1095 conv_silent = False;
1096 return dest_len;
1097}
1098
1099/********************************************************************
1100 Push and malloc an ascii string. src and dest null terminated.
1101********************************************************************/
1102
1103bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1104{
1105 size_t src_len = strlen(src)+1;
1106
1107 *dest = NULL;
1108 return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1109 (void **)dest, converted_size, True);
1110}
1111
1112/**
1113 * Copy a string from a dos codepage source to a unix char* destination.
1114 *
1115 * The resulting string in "dest" is always null terminated.
1116 *
1117 * @param flags can have:
1118 * <dl>
1119 * <dt>STR_TERMINATE</dt>
1120 * <dd>STR_TERMINATE means the string in @p src
1121 * is null terminated, and src_len is ignored.</dd>
1122 * </dl>
1123 *
1124 * @param src_len is the length of the source area in bytes.
1125 * @returns the number of bytes occupied by the string in @p src.
1126 **/
1127size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1128{
1129 size_t ret;
1130
1131 if (dest_len == (size_t)-1) {
1132 /* No longer allow dest_len of -1. */
1133 smb_panic("pull_ascii - invalid dest_len of -1");
1134 }
1135
1136 if (flags & STR_TERMINATE) {
1137 if (src_len == (size_t)-1) {
1138 src_len = strlen((const char *)src) + 1;
1139 } else {
1140 size_t len = strnlen((const char *)src, src_len);
1141 if (len < src_len)
1142 len++;
1143 src_len = len;
1144 }
1145 }
1146
1147 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1148 if (ret == (size_t)-1) {
1149 ret = 0;
1150 dest_len = 0;
1151 }
1152
1153 if (dest_len && ret) {
1154 /* Did we already process the terminating zero ? */
1155 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1156 dest[MIN(ret, dest_len-1)] = 0;
1157 }
1158 } else {
1159 dest[0] = 0;
1160 }
1161
1162 return src_len;
1163}
1164
1165/**
1166 * Copy a string from a dos codepage source to a unix char* destination.
1167 Talloc version.
1168 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1169 needs fixing. JRA).
1170 *
1171 * The resulting string in "dest" is always null terminated.
1172 *
1173 * @param flags can have:
1174 * <dl>
1175 * <dt>STR_TERMINATE</dt>
1176 * <dd>STR_TERMINATE means the string in @p src
1177 * is null terminated, and src_len is ignored.</dd>
1178 * </dl>
1179 *
1180 * @param src_len is the length of the source area in bytes.
1181 * @returns the number of bytes occupied by the string in @p src.
1182 **/
1183
1184static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1185 char **ppdest,
1186 const void *src,
1187 size_t src_len,
1188 int flags)
1189{
1190 char *dest = NULL;
1191 size_t dest_len;
1192
1193#ifdef DEVELOPER
1194 /* Ensure we never use the braindead "malloc" varient. */
1195 if (ctx == NULL) {
1196 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1197 }
1198#endif
1199
1200 *ppdest = NULL;
1201
1202 if (!src_len) {
1203 return 0;
1204 }
1205
1206 if (flags & STR_TERMINATE) {
1207 if (src_len == (size_t)-1) {
1208 src_len = strlen((const char *)src) + 1;
1209 } else {
1210 size_t len = strnlen((const char *)src, src_len);
1211 if (len < src_len)
1212 len++;
1213 src_len = len;
1214 }
1215 /* Ensure we don't use an insane length from the client. */
1216 if (src_len >= 1024*1024) {
1217 char *msg = talloc_asprintf(ctx,
1218 "Bad src length (%u) in "
1219 "pull_ascii_base_talloc",
1220 (unsigned int)src_len);
1221 smb_panic(msg);
1222 }
1223 } else {
1224 /* Can't have an unlimited length
1225 * non STR_TERMINATE'd.
1226 */
1227 if (src_len == (size_t)-1) {
1228 errno = EINVAL;
1229 return 0;
1230 }
1231 }
1232
1233 /* src_len != -1 here. */
1234
1235 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1236 &dest_len, True)) {
1237 dest_len = 0;
1238 }
1239
1240 if (dest_len && dest) {
1241 /* Did we already process the terminating zero ? */
1242 if (dest[dest_len-1] != 0) {
1243 size_t size = talloc_get_size(dest);
1244 /* Have we got space to append the '\0' ? */
1245 if (size <= dest_len) {
1246 /* No, realloc. */
1247 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1248 dest_len+1);
1249 if (!dest) {
1250 /* talloc fail. */
1251 dest_len = (size_t)-1;
1252 return 0;
1253 }
1254 }
1255 /* Yay - space ! */
1256 dest[dest_len] = '\0';
1257 dest_len++;
1258 }
1259 } else if (dest) {
1260 dest[0] = 0;
1261 }
1262
1263 *ppdest = dest;
1264 return src_len;
1265}
1266
1267size_t pull_ascii_fstring(char *dest, const void *src)
1268{
1269 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1270}
1271
1272/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1273
1274size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1275{
1276 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1277}
1278
1279/**
1280 * Copy a string from a char* src to a unicode destination.
1281 *
1282 * @returns the number of bytes occupied by the string in the destination.
1283 *
1284 * @param flags can have:
1285 *
1286 * <dl>
1287 * <dt>STR_TERMINATE <dd>means include the null termination.
1288 * <dt>STR_UPPER <dd>means uppercase in the destination.
1289 * <dt>STR_NOALIGN <dd>means don't do alignment.
1290 * </dl>
1291 *
1292 * @param dest_len is the maximum length allowed in the
1293 * destination.
1294 **/
1295
1296size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1297{
1298 size_t len=0;
1299 size_t src_len;
1300 size_t ret;
1301
1302 if (dest_len == (size_t)-1) {
1303 /* No longer allow dest_len of -1. */
1304 smb_panic("push_ucs2 - invalid dest_len of -1");
1305 }
1306
1307 if (flags & STR_TERMINATE)
1308 src_len = (size_t)-1;
1309 else
1310 src_len = strlen(src);
1311
1312 if (ucs2_align(base_ptr, dest, flags)) {
1313 *(char *)dest = 0;
1314 dest = (void *)((char *)dest + 1);
1315 if (dest_len)
1316 dest_len--;
1317 len++;
1318 }
1319
1320 /* ucs2 is always a multiple of 2 bytes */
1321 dest_len &= ~1;
1322
1323 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1324 if (ret == (size_t)-1) {
1325 if ((flags & STR_TERMINATE) &&
1326 dest &&
1327 dest_len) {
1328 *(char *)dest = 0;
1329 }
1330 return len;
1331 }
1332
1333 len += ret;
1334
1335 if (flags & STR_UPPER) {
1336 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1337 size_t i;
1338
1339 /* We check for i < (ret / 2) below as the dest string isn't null
1340 terminated if STR_TERMINATE isn't set. */
1341
1342 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1343 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1344 if (v != dest_ucs2[i]) {
1345 dest_ucs2[i] = v;
1346 }
1347 }
1348 }
1349
1350 return len;
1351}
1352
1353
1354/**
1355 * Copy a string from a unix char* src to a UCS2 destination,
1356 * allocating a buffer using talloc().
1357 *
1358 * @param dest always set at least to NULL
1359 * @parm converted_size set to the number of bytes occupied by the string in
1360 * the destination on success.
1361 *
1362 * @return true if new buffer was correctly allocated, and string was
1363 * converted.
1364 **/
1365bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1366 size_t *converted_size)
1367{
1368 size_t src_len = strlen(src)+1;
1369
1370 *dest = NULL;
1371 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1372 (void **)dest, converted_size, True);
1373}
1374
1375
1376/**
1377 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1378 *
1379 * @param dest always set at least to NULL
1380 * @parm converted_size set to the number of bytes occupied by the string in
1381 * the destination on success.
1382 *
1383 * @return true if new buffer was correctly allocated, and string was
1384 * converted.
1385 **/
1386
1387bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1388 size_t *converted_size)
1389{
1390 size_t src_len = strlen(src)+1;
1391
1392 *dest = NULL;
1393 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1394 (void **)dest, converted_size, True);
1395}
1396
1397/**
1398 Copy a string from a char* src to a UTF-8 destination.
1399 Return the number of bytes occupied by the string in the destination
1400 Flags can have:
1401 STR_TERMINATE means include the null termination
1402 STR_UPPER means uppercase in the destination
1403 dest_len is the maximum length allowed in the destination. If dest_len
1404 is -1 then no maxiumum is used.
1405**/
1406
1407static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1408{
1409 size_t src_len = 0;
1410 size_t ret;
1411 char *tmpbuf = NULL;
1412
1413 if (dest_len == (size_t)-1) {
1414 /* No longer allow dest_len of -1. */
1415 smb_panic("push_utf8 - invalid dest_len of -1");
1416 }
1417
1418 if (flags & STR_UPPER) {
1419 tmpbuf = strdup_upper(src);
1420 if (!tmpbuf) {
1421 return (size_t)-1;
1422 }
1423 src = tmpbuf;
1424 src_len = strlen(src);
1425 }
1426
1427 src_len = strlen(src);
1428 if (flags & STR_TERMINATE) {
1429 src_len++;
1430 }
1431
1432 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1433 SAFE_FREE(tmpbuf);
1434 return ret;
1435}
1436
1437size_t push_utf8_fstring(void *dest, const char *src)
1438{
1439 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1440}
1441
1442/**
1443 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1444 *
1445 * @param dest always set at least to NULL
1446 * @parm converted_size set to the number of bytes occupied by the string in
1447 * the destination on success.
1448 *
1449 * @return true if new buffer was correctly allocated, and string was
1450 * converted.
1451 **/
1452
1453bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1454 size_t *converted_size)
1455{
1456 size_t src_len = strlen(src)+1;
1457
1458 *dest = NULL;
1459 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1460 (void**)dest, converted_size, True);
1461}
1462
1463/**
1464 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1465 *
1466 * @param dest always set at least to NULL
1467 * @parm converted_size set to the number of bytes occupied by the string in
1468 * the destination on success.
1469 *
1470 * @return true if new buffer was correctly allocated, and string was
1471 * converted.
1472 **/
1473
1474bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1475{
1476 size_t src_len = strlen(src)+1;
1477
1478 *dest = NULL;
1479 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1480 (void **)dest, converted_size, True);
1481}
1482
1483/**
1484 Copy a string from a ucs2 source to a unix char* destination.
1485 Flags can have:
1486 STR_TERMINATE means the string in src is null terminated.
1487 STR_NOALIGN means don't try to align.
1488 if STR_TERMINATE is set then src_len is ignored if it is -1.
1489 src_len is the length of the source area in bytes
1490 Return the number of bytes occupied by the string in src.
1491 The resulting string in "dest" is always null terminated.
1492**/
1493
1494size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1495{
1496 size_t ret;
1497
1498 if (dest_len == (size_t)-1) {
1499 /* No longer allow dest_len of -1. */
1500 smb_panic("pull_ucs2 - invalid dest_len of -1");
1501 }
1502
1503 if (!src_len) {
1504 if (dest && dest_len > 0) {
1505 dest[0] = '\0';
1506 }
1507 return 0;
1508 }
1509
1510 if (ucs2_align(base_ptr, src, flags)) {
1511 src = (const void *)((const char *)src + 1);
1512 if (src_len != (size_t)-1)
1513 src_len--;
1514 }
1515
1516 if (flags & STR_TERMINATE) {
1517 /* src_len -1 is the default for null terminated strings. */
1518 if (src_len != (size_t)-1) {
1519 size_t len = strnlen_w((const smb_ucs2_t *)src,
1520 src_len/2);
1521 if (len < src_len/2)
1522 len++;
1523 src_len = len*2;
1524 }
1525 }
1526
1527 /* ucs2 is always a multiple of 2 bytes */
1528 if (src_len != (size_t)-1)
1529 src_len &= ~1;
1530
1531 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1532 if (ret == (size_t)-1) {
1533 ret = 0;
1534 dest_len = 0;
1535 }
1536
1537 if (src_len == (size_t)-1)
1538 src_len = ret*2;
1539
1540 if (dest_len && ret) {
1541 /* Did we already process the terminating zero ? */
1542 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1543 dest[MIN(ret, dest_len-1)] = 0;
1544 }
1545 } else {
1546 dest[0] = 0;
1547 }
1548
1549 return src_len;
1550}
1551
1552/**
1553 Copy a string from a ucs2 source to a unix char* destination.
1554 Talloc version with a base pointer.
1555 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1556 needs fixing. JRA).
1557 Flags can have:
1558 STR_TERMINATE means the string in src is null terminated.
1559 STR_NOALIGN means don't try to align.
1560 if STR_TERMINATE is set then src_len is ignored if it is -1.
1561 src_len is the length of the source area in bytes
1562 Return the number of bytes occupied by the string in src.
1563 The resulting string in "dest" is always null terminated.
1564**/
1565
1566size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1567 const void *base_ptr,
1568 char **ppdest,
1569 const void *src,
1570 size_t src_len,
1571 int flags)
1572{
1573 char *dest;
1574 size_t dest_len;
1575
1576 *ppdest = NULL;
1577
1578#ifdef DEVELOPER
1579 /* Ensure we never use the braindead "malloc" varient. */
1580 if (ctx == NULL) {
1581 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1582 }
1583#endif
1584
1585 if (!src_len) {
1586 return 0;
1587 }
1588
1589 if (ucs2_align(base_ptr, src, flags)) {
1590 src = (const void *)((const char *)src + 1);
1591 if (src_len != (size_t)-1)
1592 src_len--;
1593 }
1594
1595 if (flags & STR_TERMINATE) {
1596 /* src_len -1 is the default for null terminated strings. */
1597 if (src_len != (size_t)-1) {
1598 size_t len = strnlen_w((const smb_ucs2_t *)src,
1599 src_len/2);
1600 if (len < src_len/2)
1601 len++;
1602 src_len = len*2;
1603 } else {
1604 /*
1605 * src_len == -1 - alloc interface won't take this
1606 * so we must calculate.
1607 */
1608 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1609 }
1610 /* Ensure we don't use an insane length from the client. */
1611 if (src_len >= 1024*1024) {
1612 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1613 }
1614 } else {
1615 /* Can't have an unlimited length
1616 * non STR_TERMINATE'd.
1617 */
1618 if (src_len == (size_t)-1) {
1619 errno = EINVAL;
1620 return 0;
1621 }
1622 }
1623
1624 /* src_len != -1 here. */
1625
1626 /* ucs2 is always a multiple of 2 bytes */
1627 src_len &= ~1;
1628
1629 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1630 (void *)&dest, &dest_len, True)) {
1631 dest_len = 0;
1632 }
1633
1634 if (dest_len) {
1635 /* Did we already process the terminating zero ? */
1636 if (dest[dest_len-1] != 0) {
1637 size_t size = talloc_get_size(dest);
1638 /* Have we got space to append the '\0' ? */
1639 if (size <= dest_len) {
1640 /* No, realloc. */
1641 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1642 dest_len+1);
1643 if (!dest) {
1644 /* talloc fail. */
1645 dest_len = (size_t)-1;
1646 return 0;
1647 }
1648 }
1649 /* Yay - space ! */
1650 dest[dest_len] = '\0';
1651 dest_len++;
1652 }
1653 } else if (dest) {
1654 dest[0] = 0;
1655 }
1656
1657 *ppdest = dest;
1658 return src_len;
1659}
1660
1661size_t pull_ucs2_fstring(char *dest, const void *src)
1662{
1663 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1664}
1665
1666/**
1667 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1668 *
1669 * @param dest always set at least to NULL
1670 * @parm converted_size set to the number of bytes occupied by the string in
1671 * the destination on success.
1672 *
1673 * @return true if new buffer was correctly allocated, and string was
1674 * converted.
1675 **/
1676
1677bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1678 size_t *converted_size)
1679{
1680 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1681
1682 *dest = NULL;
1683 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1684 (void **)dest, converted_size, True);
1685}
1686
1687/**
1688 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1689 *
1690 * @param dest always set at least to NULL
1691 * @parm converted_size set to the number of bytes occupied by the string in
1692 * the destination on success.
1693 * @return true if new buffer was correctly allocated, and string was
1694 * converted.
1695 **/
1696
1697bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1698 size_t *converted_size)
1699{
1700 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1701
1702 *dest = NULL;
1703 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1704 (void **)dest, converted_size, True);
1705}
1706
1707/**
1708 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1709 *
1710 * @param dest always set at least to NULL
1711 * @parm converted_size set to the number of bytes occupied by the string in
1712 * the destination on success.
1713 *
1714 * @return true if new buffer was correctly allocated, and string was
1715 * converted.
1716 **/
1717
1718bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1719 size_t *converted_size)
1720{
1721 size_t src_len = strlen(src)+1;
1722
1723 *dest = NULL;
1724 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1725 (void **)dest, converted_size, True);
1726}
1727
1728/**
1729 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1730 *
1731 * @param dest always set at least to NULL
1732 * @parm converted_size set to the number of bytes occupied by the string in
1733 * the destination on success.
1734 *
1735 * @return true if new buffer was correctly allocated, and string was
1736 * converted.
1737 **/
1738
1739bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1740{
1741 size_t src_len = strlen(src)+1;
1742
1743 *dest = NULL;
1744 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1745 (void **)dest, converted_size, True);
1746}
1747
1748/**
1749 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1750 *
1751 * @param dest always set at least to NULL
1752 * @parm converted_size set to the number of bytes occupied by the string in
1753 * the destination on success.
1754 *
1755 * @return true if new buffer was correctly allocated, and string was
1756 * converted.
1757 **/
1758
1759bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1760 size_t *converted_size)
1761{
1762 size_t src_len = strlen(src)+1;
1763
1764 *dest = NULL;
1765 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1766 (void **)dest, converted_size, True);
1767}
1768
1769/**
1770 Copy a string from a char* src to a unicode or ascii
1771 dos codepage destination choosing unicode or ascii based on the
1772 flags in the SMB buffer starting at base_ptr.
1773 Return the number of bytes occupied by the string in the destination.
1774 flags can have:
1775 STR_TERMINATE means include the null termination.
1776 STR_UPPER means uppercase in the destination.
1777 STR_ASCII use ascii even with unicode packet.
1778 STR_NOALIGN means don't do alignment.
1779 dest_len is the maximum length allowed in the destination. If dest_len
1780 is -1 then no maxiumum is used.
1781**/
1782
1783size_t push_string_fn(const char *function, unsigned int line,
1784 const void *base_ptr, uint16 flags2,
1785 void *dest, const char *src,
1786 size_t dest_len, int flags)
1787{
1788#ifdef DEVELOPER
1789 /* We really need to zero fill here, not clobber
1790 * region, as we want to ensure that valgrind thinks
1791 * all of the outgoing buffer has been written to
1792 * so a send() or write() won't trap an error.
1793 * JRA.
1794 */
1795#if 0
1796 clobber_region(function, line, dest, dest_len);
1797#else
1798 memset(dest, '\0', dest_len);
1799#endif
1800#endif
1801
1802 if (!(flags & STR_ASCII) && \
1803 ((flags & STR_UNICODE || \
1804 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1805 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1806 }
1807 return push_ascii(dest, src, dest_len, flags);
1808}
1809
1810
1811/**
1812 Copy a string from a unicode or ascii source (depending on
1813 the packet flags) to a char* destination.
1814 Flags can have:
1815 STR_TERMINATE means the string in src is null terminated.
1816 STR_UNICODE means to force as unicode.
1817 STR_ASCII use ascii even with unicode packet.
1818 STR_NOALIGN means don't do alignment.
1819 if STR_TERMINATE is set then src_len is ignored is it is -1
1820 src_len is the length of the source area in bytes.
1821 Return the number of bytes occupied by the string in src.
1822 The resulting string in "dest" is always null terminated.
1823**/
1824
1825size_t pull_string_fn(const char *function,
1826 unsigned int line,
1827 const void *base_ptr,
1828 uint16 smb_flags2,
1829 char *dest,
1830 const void *src,
1831 size_t dest_len,
1832 size_t src_len,
1833 int flags)
1834{
1835#ifdef DEVELOPER
1836 clobber_region(function, line, dest, dest_len);
1837#endif
1838
1839 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1840 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1841 "UNICODE defined");
1842 }
1843
1844 if (!(flags & STR_ASCII) && \
1845 ((flags & STR_UNICODE || \
1846 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1847 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1848 }
1849 return pull_ascii(dest, src, dest_len, src_len, flags);
1850}
1851
1852/**
1853 Copy a string from a unicode or ascii source (depending on
1854 the packet flags) to a char* destination.
1855 Variant that uses talloc.
1856 Flags can have:
1857 STR_TERMINATE means the string in src is null terminated.
1858 STR_UNICODE means to force as unicode.
1859 STR_ASCII use ascii even with unicode packet.
1860 STR_NOALIGN means don't do alignment.
1861 if STR_TERMINATE is set then src_len is ignored is it is -1
1862 src_len is the length of the source area in bytes.
1863 Return the number of bytes occupied by the string in src.
1864 The resulting string in "dest" is always null terminated.
1865**/
1866
1867size_t pull_string_talloc_fn(const char *function,
1868 unsigned int line,
1869 TALLOC_CTX *ctx,
1870 const void *base_ptr,
1871 uint16 smb_flags2,
1872 char **ppdest,
1873 const void *src,
1874 size_t src_len,
1875 int flags)
1876{
1877 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1878 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1879 "UNICODE defined");
1880 }
1881
1882 if (!(flags & STR_ASCII) && \
1883 ((flags & STR_UNICODE || \
1884 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1885 return pull_ucs2_base_talloc(ctx,
1886 base_ptr,
1887 ppdest,
1888 src,
1889 src_len,
1890 flags);
1891 }
1892 return pull_ascii_base_talloc(ctx,
1893 ppdest,
1894 src,
1895 src_len,
1896 flags);
1897}
1898
1899
1900size_t align_string(const void *base_ptr, const char *p, int flags)
1901{
1902 if (!(flags & STR_ASCII) && \
1903 ((flags & STR_UNICODE || \
1904 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1905 return ucs2_align(base_ptr, p, flags);
1906 }
1907 return 0;
1908}
1909
1910/*
1911 Return the unicode codepoint for the next multi-byte CH_UNIX character
1912 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1913
1914 Also return the number of bytes consumed (which tells the caller
1915 how many bytes to skip to get to the next CH_UNIX character).
1916
1917 Return INVALID_CODEPOINT if the next character cannot be converted.
1918*/
1919
1920codepoint_t next_codepoint(const char *str, size_t *size)
1921{
1922 /* It cannot occupy more than 4 bytes in UTF16 format */
1923 uint8_t buf[4];
1924 smb_iconv_t descriptor;
1925#ifdef __OS2__
1926 size_t ilen_max;
1927 size_t olen_orig;
1928 const char *inbuf;
1929#endif
1930 size_t ilen_orig;
1931 size_t ilen;
1932 size_t olen;
1933
1934 char *outbuf;
1935
1936#ifdef __OS2__
1937 *size = 1;
1938#endif
1939
1940 if ((str[0] & 0x80) == 0) {
1941#ifndef __OS2__
1942 *size = 1;
1943#endif
1944 return (codepoint_t)str[0];
1945 }
1946
1947 lazy_initialize_conv();
1948
1949 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1950 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1951#ifndef __OS2__
1952 *size = 1;
1953#endif
1954 return INVALID_CODEPOINT;
1955 }
1956#ifdef __OS2__
1957 /* We assume that no multi-byte character can take
1958 more than 5 bytes. This is OK as we only
1959 support codepoints up to 1M */
1960
1961 ilen_max = strnlen( str, 5 );
1962#else
1963 *size = 1;
1964#endif
1965 ilen_orig = 1;
1966 olen_orig = 2;
1967 while( 1 )
1968 {
1969 ilen = ilen_orig;
1970 olen = olen_orig;
1971 inbuf = str;
1972 outbuf = ( char * )buf;
1973 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1974 break;
1975
1976 switch( errno )
1977 {
1978 case E2BIG :
1979 if( olen_orig == 2 )
1980 olen_orig = 4;
1981 else
1982 return INVALID_CODEPOINT;
1983 break;
1984
1985
1986 case EINVAL :
1987#ifndef __OS2__
1988 /* We assume that no multi-byte character can take
1989 more than 5 bytes. This is OK as we only
1990 support codepoints up to 1M */
1991 if( ilen_orig < 5 )
1992#else
1993 if( ilen_orig < ilen_max )
1994#endif
1995 ilen_orig++;
1996 else
1997 return INVALID_CODEPOINT;
1998 break;
1999
2000 case EILSEQ :
2001 default :
2002 return INVALID_CODEPOINT;
2003 }
2004 }
2005
2006 olen = olen_orig - olen;
2007
2008 *size = ilen_orig - ilen;
2009
2010 if (olen == 2) {
2011 /* 2 byte, UTF16-LE encoded value. */
2012 return (codepoint_t)SVAL(buf, 0);
2013 }
2014 if (olen == 4) {
2015 /* Decode a 4 byte UTF16-LE character manually.
2016 See RFC2871 for the encoding machanism.
2017 */
2018 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2019 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2020
2021 return (codepoint_t)0x10000 +
2022 (w1 << 10) + w2;
2023 }
2024
2025 /* no other length is valid */
2026 return INVALID_CODEPOINT;
2027}
Note: See TracBrowser for help on using the repository browser.