Context Navigation

source: branches/samba-3.3.x/source/lib/charcnv.c@ 396

Visit:

Last change on this file since 396 was 339, checked in by Herwig Bauernfeind, 16 years ago
divers fix for Ticket #68 in 3.3
File size: 51.1 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	if (ch == CH_UTF16LE) ret = "UTF-16LE";
59	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60	else if (ch == CH_UNIX) ret = lp_unix_charset();
61	else if (ch == CH_DOS) ret = lp_dos_charset();
62	else if (ch == CH_DISPLAY) ret = lp_display_charset();
63	else if (ch == CH_UTF8) ret = "UTF8";
64
65	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66	if (ret && !strcmp(ret, "LOCALE")) {
67	const char *ln = NULL;
68
69	#ifdef HAVE_SETLOCALE
70	setlocale(LC_ALL, "");
71	#endif
72	ln = nl_langinfo(CODESET);
73	if (ln) {
74	/* Check whether the charset name is supported
75	by iconv */
76	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77	if (handle == (smb_iconv_t) -1) {
78	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79	ln = NULL;
80	} else {
81	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82	smb_iconv_close(handle);
83	}
84	}
85	ret = ln;
86	}
87	#endif
88
89	if (!ret \|\| !*ret) ret = "ASCII";
90	return ret;
91	}
92
93	void lazy_initialize_conv(void)
94	{
95	if (!initialized) {
96	load_case_tables();
97	init_iconv();
98	initialized = true;
99	}
100	}
101
102	/**
103	* Destroy global objects allocated by init_iconv()
104	**/
105	void gfree_charcnv(void)
106	{
107	int c1, c2;
108
109	for (c1=0;c1<NUM_CHARSETS;c1++) {
110	for (c2=0;c2<NUM_CHARSETS;c2++) {
111	if ( conv_handles[c1][c2] ) {
112	smb_iconv_close( conv_handles[c1][c2] );
113	conv_handles[c1][c2] = 0;
114	}
115	}
116	}
117	initialized = false;
118	}
119
120	/**
121	* Initialize iconv conversion descriptors.
122	*
123	* This is called the first time it is needed, and also called again
124	* every time the configuration is reloaded, because the charset or
125	* codepage might have changed.
126	**/
127	void init_iconv(void)
128	{
129	int c1, c2;
130	bool did_reload = False;
131
132	/* so that charset_name() works we need to get the UNIX<->UCS2 going
133	first */
134	if (!conv_handles[CH_UNIX][CH_UTF16LE])
135	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137	if (!conv_handles[CH_UTF16LE][CH_UNIX])
138	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140	for (c1=0;c1<NUM_CHARSETS;c1++) {
141	for (c2=0;c2<NUM_CHARSETS;c2++) {
142	const char *n1 = charset_name((charset_t)c1);
143	const char *n2 = charset_name((charset_t)c2);
144	if (conv_handles[c1][c2] &&
145	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147	continue;
148
149	did_reload = True;
150
151	if (conv_handles[c1][c2])
152	smb_iconv_close(conv_handles[c1][c2]);
153
154	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157	charset_name((charset_t)c1), charset_name((charset_t)c2)));
158	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159	n1 = "ASCII";
160	}
161	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162	n2 = "ASCII";
163	}
164	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165	n1, n2 ));
166	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167	if (!conv_handles[c1][c2]) {
168	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169	smb_panic("init_iconv: conv_handle initialization failed");
170	}
171	}
172	}
173	}
174
175	if (did_reload) {
176	/* XXX: Does this really get called every time the dos
177	* codepage changes? */
178	/* XXX: Is the did_reload test too strict? */
179	conv_silent = True;
180	init_valid_table();
181	conv_silent = False;
182	}
183	}
184
185	/**
186	* Convert string from one encoding to another, making error checking etc
187	* Slow path version - uses (slow) iconv.
188	*
189	* @param src pointer to source string (multibyte or singlebyte)
190	* @param srclen length of the source string in bytes
191	* @param dest pointer to destination string (multibyte or singlebyte)
192	* @param destlen maximal length allowed for string
193	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194	* @returns the number of bytes occupied in the destination
195	*
196	* Ensure the srclen contains the terminating zero.
197	*
198	**/
199
200	static size_t convert_string_internal(charset_t from, charset_t to,
201	void const *src, size_t srclen,
202	void *dest, size_t destlen, bool allow_bad_conv)
203	{
204	size_t i_len, o_len;
205	size_t retval;
206	const char* inbuf = (const char*)src;
207	char* outbuf = (char*)dest;
208	smb_iconv_t descriptor;
209
210	lazy_initialize_conv();
211
212	descriptor = conv_handles[from][to];
213
214	if (srclen == (size_t)-1) {
215	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
216	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
217	} else {
218	srclen = strlen((const char *)src)+1;
219	}
220	}
221
222
223	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
224	if (!conv_silent)
225	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226	return (size_t)-1;
227	}
228
229	i_len=srclen;
230	o_len=destlen;
231
232	again:
233
234	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235	if(retval==(size_t)-1) {
236	const char *reason="unknown error";
237	switch(errno) {
238	case EINVAL:
239	reason="Incomplete multibyte sequence";
240	if (!conv_silent)
241	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242	if (allow_bad_conv)
243	goto use_as_is;
244	return (size_t)-1;
245	case E2BIG:
246	reason="No more room";
247	if (!conv_silent) {
248	if (from == CH_UNIX) {
249	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250	charset_name(from), charset_name(to),
251	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252	} else {
253	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254	charset_name(from), charset_name(to),
255	(unsigned int)srclen, (unsigned int)destlen));
256	}
257	}
258	break;
259	case EILSEQ:
260	reason="Illegal multibyte sequence";
261	if (!conv_silent)
262	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263	if (allow_bad_conv)
264	goto use_as_is;
265
266	return (size_t)-1;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	return (size_t)-1;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	if (ret == (size_t)-1) {
417	return ret;
418	}
419	return retval + ret;
420	#endif
421	}
422	}
423	if (!dlen) {
424	/* Even if we fast path we should note if we ran out of room. */
425	if (((slen != (size_t)-1) && slen) \|\|
426	((slen == (size_t)-1) && lastp)) {
427	errno = E2BIG;
428	}
429	}
430	return retval;
431
432	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433	const unsigned char p = (const unsigned char )src;
434	unsigned char q = (unsigned char )dest;
435	size_t retval = 0;
436	size_t slen = srclen;
437	size_t dlen = destlen;
438	unsigned char lastp = '\0';
439
440	/* If all characters are ascii, fast path here. */
441	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
442	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443	q++ = p;
444	if (slen != (size_t)-1) {
445	slen -= 2;
446	}
447	p += 2;
448	dlen--;
449	retval++;
450	if (!lastp)
451	break;
452	} else {
453	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454	goto general_case;
455	#else
456	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457	if (ret == (size_t)-1) {
458	return ret;
459	}
460	return retval + ret;
461	#endif
462	}
463	}
464	if (!dlen) {
465	/* Even if we fast path we should note if we ran out of room. */
466	if (((slen != (size_t)-1) && slen) \|\|
467	((slen == (size_t)-1) && lastp)) {
468	errno = E2BIG;
469	}
470	}
471	return retval;
472
473	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
474	const unsigned char p = (const unsigned char )src;
475	unsigned char q = (unsigned char )dest;
476	size_t retval = 0;
477	size_t slen = srclen;
478	size_t dlen = destlen;
479	unsigned char lastp = '\0';
480
481	/* If all characters are ascii, fast path here. */
482	while (slen && (dlen >= 2)) {
483	if ((lastp = *p) <= 0x7F) {
484	q++ = p++;
485	*q++ = '\0';
486	if (slen != (size_t)-1) {
487	slen--;
488	}
489	dlen -= 2;
490	retval += 2;
491	if (!lastp)
492	break;
493	} else {
494	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
495	goto general_case;
496	#else
497	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
498	if (ret == (size_t)-1) {
499	return ret;
500	}
501	return retval + ret;
502	#endif
503	}
504	}
505	if (!dlen) {
506	/* Even if we fast path we should note if we ran out of room. */
507	if (((slen != (size_t)-1) && slen) \|\|
508	((slen == (size_t)-1) && lastp)) {
509	errno = E2BIG;
510	}
511	}
512	return retval;
513	}
514
515	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
516	general_case:
517	#endif
518	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
519	}
520
521	/**
522	* Convert between character sets, allocating a new buffer for the result.
523	*
524	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
525	* (this is a bad interface and needs fixing. JRA).
526	* @param srclen length of source buffer.
527	* @param dest always set at least to NULL
528	* @param converted_size set to the size of the allocated buffer on return
529	* true
530	* @note -1 is not accepted for srclen.
531	*
532	* @return true if new buffer was correctly allocated, and string was
533	* converted.
534	*
535	* Ensure the srclen contains the terminating zero.
536	*
537	* I hate the goto's in this function. It's embarressing.....
538	* There has to be a cleaner way to do this. JRA.
539	**/
540
541	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
542	void const src, size_t srclen, void dst,
543	size_t *converted_size, bool allow_bad_conv)
544	{
545	size_t i_len, o_len, destlen = (srclen * 3) / 2;
546	size_t retval;
547	const char inbuf = (const char )src;
548	char outbuf = NULL, ob = NULL;
549	smb_iconv_t descriptor;
550	void dest = (void )dst;
551
552	*dest = NULL;
553
554	if (!converted_size) {
555	errno = EINVAL;
556	return false;
557	}
558
559	if (src == NULL \|\| srclen == (size_t)-1) {
560	errno = EINVAL;
561	return false;
562	}
563	if (srclen == 0) {
564	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
565	if (ob == NULL) {
566	errno = ENOMEM;
567	return false;
568	}
569	*dest = ob;
570	*converted_size = 0;
571	return true;
572	}
573
574	lazy_initialize_conv();
575
576	descriptor = conv_handles[from][to];
577
578	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
579	if (!conv_silent)
580	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
581	errno = EOPNOTSUPP;
582	return false;
583	}
584
585	convert:
586
587	/* +2 is for ucs2 null termination. */
588	if ((destlen*2)+2 < destlen) {
589	/* wrapped ! abort. */
590	if (!conv_silent)
591	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
592	if (!ctx)
593	SAFE_FREE(outbuf);
594	errno = EOPNOTSUPP;
595	return false;
596	} else {
597	destlen = destlen * 2;
598	}
599
600	/* +2 is for ucs2 null termination. */
601	if (ctx) {
602	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
603	} else {
604	ob = (char *)SMB_REALLOC(ob, destlen + 2);
605	}
606
607	if (!ob) {
608	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
609	errno = ENOMEM;
610	return false;
611	}
612	outbuf = ob;
613	i_len = srclen;
614	o_len = destlen;
615
616	again:
617
618
619	retval = smb_iconv(descriptor,
620	&inbuf, &i_len,
621	&outbuf, &o_len);
622	if(retval == (size_t)-1) {
623	const char *reason="unknown error";
624	switch(errno) {
625	case EINVAL:
626	reason="Incomplete multibyte sequence";
627	if (!conv_silent)
628	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
629	if (allow_bad_conv)
630	goto use_as_is;
631	break;
632	case E2BIG:
633	goto convert;
634	case EILSEQ:
635	reason="Illegal multibyte sequence";
636	if (!conv_silent)
637	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
638	if (allow_bad_conv)
639	goto use_as_is;
640	break;
641	}
642	if (!conv_silent)
643	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
644	/* smb_panic(reason); */
645	if (ctx) {
646	TALLOC_FREE(ob);
647	} else {
648	SAFE_FREE(ob);
649	}
650	return false;
651	}
652
653	out:
654
655	destlen = destlen - o_len;
656	/* Don't shrink unless we're reclaiming a lot of
657	* space. This is in the hot codepath and these
658	* reallocs cost. JRA.
659	*/
660	if (o_len > 1024) {
661	/* We're shrinking here so we know the +2 is safe from wrap. */
662	if (ctx) {
663	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
664	} else {
665	ob = (char *)SMB_REALLOC(ob,destlen + 2);
666	}
667	}
668
669	if (destlen && !ob) {
670	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
671	errno = ENOMEM;
672	return false;
673	}
674
675	*dest = ob;
676
677	/* Must ucs2 null terminate in the extra space we allocated. */
678	ob[destlen] = '\0';
679	ob[destlen+1] = '\0';
680
681	*converted_size = destlen;
682	return true;
683
684	use_as_is:
685
686	/*
687	* Conversion not supported. This is actually an error, but there are so
688	* many misconfigured iconv systems and smb.conf's out there we can't just
689	* fail. Do a very bad conversion instead.... JRA.
690	*/
691
692	{
693	if (o_len == 0 \|\| i_len == 0)
694	goto out;
695
696	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
697	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
698	/* Can't convert from utf16 any endian to multibyte.
699	Replace with the default fail char.
700	*/
701
702	if (i_len < 2)
703	goto out;
704
705	if (i_len >= 2) {
706	*outbuf = lp_failed_convert_char();
707
708	outbuf++;
709	o_len--;
710
711	inbuf += 2;
712	i_len -= 2;
713	}
714
715	if (o_len == 0 \|\| i_len == 0)
716	goto out;
717
718	/* Keep trying with the next char... */
719	goto again;
720
721	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
722	/* Can't convert to UTF16LE - just widen by adding the
723	default fail char then zero.
724	*/
725	if (o_len < 2)
726	goto out;
727
728	outbuf[0] = lp_failed_convert_char();
729	outbuf[1] = '\0';
730
731	inbuf++;
732	i_len--;
733
734	outbuf += 2;
735	o_len -= 2;
736
737	if (o_len == 0 \|\| i_len == 0)
738	goto out;
739
740	/* Keep trying with the next char... */
741	goto again;
742
743	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
744	to != CH_UTF16LE && to != CH_UTF16BE) {
745	/* Failed multibyte to multibyte. Just copy the default fail char and
746	try again. */
747	outbuf[0] = lp_failed_convert_char();
748
749	inbuf++;
750	i_len--;
751
752	outbuf++;
753	o_len--;
754
755	if (o_len == 0 \|\| i_len == 0)
756	goto out;
757
758	/* Keep trying with the next char... */
759	goto again;
760
761	} else {
762	/* Keep compiler happy.... */
763	goto out;
764	}
765	}
766	}
767
768	/**
769	* Convert between character sets, allocating a new buffer using talloc for the result.
770	*
771	* @param srclen length of source buffer.
772	* @param dest always set at least to NULL
773	* @parm converted_size set to the number of bytes occupied by the string in
774	* the destination on success.
775	* @note -1 is not accepted for srclen.
776	*
777	* @return true if new buffer was correctly allocated, and string was
778	* converted.
779	*/
780	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
781	void const src, size_t srclen, void dst,
782	size_t *converted_size, bool allow_bad_conv)
783	{
784	void dest = (void )dst;
785
786	*dest = NULL;
787	return convert_string_allocate(ctx, from, to, src, srclen, dest,
788	converted_size, allow_bad_conv);
789	}
790
791	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
792	{
793	size_t size;
794	smb_ucs2_t *buffer;
795
796	if (!push_ucs2_allocate(&buffer, src, &size)) {
797	return (size_t)-1;
798	}
799
800	if (!strupper_w(buffer) && (dest == src)) {
801	free(buffer);
802	return srclen;
803	}
804
805	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
806	free(buffer);
807	return size;
808	}
809
810	/**
811	strdup() a unix string to upper case.
812	**/
813
814	char strdup_upper(const char s)
815	{
816	char *out_buffer = SMB_STRDUP(s);
817	const unsigned char p = (const unsigned char )s;
818	unsigned char q = (unsigned char )out_buffer;
819
820	if (!q) {
821	return NULL;
822	}
823
824	/* this is quite a common operation, so we want it to be
825	fast. We optimise for the ascii case, knowing that all our
826	supported multi-byte character sets are ascii-compatible
827	(ie. they match for the first 128 chars) */
828
829	while (*p) {
830	if (*p & 0x80)
831	break;
832	q++ = toupper_ascii_fast(p);
833	p++;
834	}
835
836	if (*p) {
837	/* MB case. */
838	size_t converted_size, converted_size2;
839	smb_ucs2_t *buffer = NULL;
840
841	SAFE_FREE(out_buffer);
842	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
843	strlen(s) + 1,
844	(void *)(void )&buffer,
845	&converted_size, True))
846	{
847	return NULL;
848	}
849
850	strupper_w(buffer);
851
852	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
853	converted_size,
854	(void *)(void )&out_buffer,
855	&converted_size2, True))
856	{
857	TALLOC_FREE(buffer);
858	return NULL;
859	}
860
861	/* Don't need the intermediate buffer
862	* anymore.
863	*/
864	TALLOC_FREE(buffer);
865	}
866
867	return out_buffer;
868	}
869
870	/**
871	talloc_strdup() a unix string to upper case.
872	**/
873
874	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
875	{
876	char *out_buffer = talloc_strdup(ctx,s);
877	const unsigned char p = (const unsigned char )s;
878	unsigned char q = (unsigned char )out_buffer;
879
880	if (!q) {
881	return NULL;
882	}
883
884	/* this is quite a common operation, so we want it to be
885	fast. We optimise for the ascii case, knowing that all our
886	supported multi-byte character sets are ascii-compatible
887	(ie. they match for the first 128 chars) */
888
889	while (*p) {
890	if (*p & 0x80)
891	break;
892	q++ = toupper_ascii_fast(p);
893	p++;
894	}
895
896	if (*p) {
897	/* MB case. */
898	size_t converted_size, converted_size2;
899	smb_ucs2_t *ubuf = NULL;
900
901	/* We're not using the ascii buffer above. */
902	TALLOC_FREE(out_buffer);
903
904	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
905	strlen(s)+1, (void *)&ubuf,
906	&converted_size, True))
907	{
908	return NULL;
909	}
910
911	strupper_w(ubuf);
912
913	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
914	converted_size, (void *)&out_buffer,
915	&converted_size2, True))
916	{
917	TALLOC_FREE(ubuf);
918	return NULL;
919	}
920
921	/* Don't need the intermediate buffer
922	* anymore.
923	*/
924	TALLOC_FREE(ubuf);
925	}
926
927	return out_buffer;
928	}
929
930	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
931	{
932	size_t size;
933	smb_ucs2_t *buffer = NULL;
934
935	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
936	(void *)(void )&buffer, &size,
937	True))
938	{
939	smb_panic("failed to create UCS2 buffer");
940	}
941	if (!strlower_w(buffer) && (dest == src)) {
942	SAFE_FREE(buffer);
943	return srclen;
944	}
945	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
946	SAFE_FREE(buffer);
947	return size;
948	}
949
950	/**
951	strdup() a unix string to lower case.
952	**/
953
954	char strdup_lower(const char s)
955	{
956	size_t converted_size;
957	smb_ucs2_t *buffer = NULL;
958	char *out_buffer;
959
960	if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
961	return NULL;
962	}
963
964	strlower_w(buffer);
965
966	if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
967	SAFE_FREE(buffer);
968	return NULL;
969	}
970
971	SAFE_FREE(buffer);
972
973	return out_buffer;
974	}
975
976	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
977	{
978	size_t converted_size;
979	smb_ucs2_t *buffer = NULL;
980	char *out_buffer;
981
982	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
983	return NULL;
984	}
985
986	strlower_w(buffer);
987
988	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
989	TALLOC_FREE(buffer);
990	return NULL;
991	}
992
993	TALLOC_FREE(buffer);
994
995	return out_buffer;
996	}
997
998
999	size_t ucs2_align(const void base_ptr, const void p, int flags)
1000	{
1001	if (flags & (STR_NOALIGN\|STR_ASCII))
1002	return 0;
1003	return PTR_DIFF(p, base_ptr) & 1;
1004	}
1005
1006
1007	/**
1008	* Copy a string from a char* unix src to a dos codepage string destination.
1009	*
1010	* @return the number of bytes occupied by the string in the destination.
1011	*
1012	* @param flags can include
1013	* <dl>
1014	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1015	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1016	* </dl>
1017	*
1018	* @param dest_len the maximum length in bytes allowed in the
1019	* destination.
1020	**/
1021	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1022	{
1023	size_t src_len = strlen(src);
1024	char *tmpbuf = NULL;
1025	size_t ret;
1026
1027	/* No longer allow a length of -1. */
1028	if (dest_len == (size_t)-1) {
1029	smb_panic("push_ascii - dest_len == -1");
1030	}
1031
1032	if (flags & STR_UPPER) {
1033	tmpbuf = SMB_STRDUP(src);
1034	if (!tmpbuf) {
1035	smb_panic("malloc fail");
1036	}
1037	strupper_m(tmpbuf);
1038	src = tmpbuf;
1039	}
1040
1041	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1042	src_len++;
1043	}
1044
1045	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1046	if (ret == (size_t)-1 &&
1047	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1048	&& dest_len > 0) {
1049	((char *)dest)[0] = '\0';
1050	}
1051	SAFE_FREE(tmpbuf);
1052	return ret;
1053	}
1054
1055	size_t push_ascii_fstring(void dest, const char src)
1056	{
1057	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1058	}
1059
1060	/********************************************************************
1061	Push an nstring - ensure null terminated. Written by
1062	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1063	********************************************************************/
1064
1065	size_t push_ascii_nstring(void dest, const char src)
1066	{
1067	size_t i, buffer_len, dest_len;
1068	smb_ucs2_t *buffer;
1069
1070	conv_silent = True;
1071	if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1072	smb_panic("failed to create UCS2 buffer");
1073	}
1074
1075	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1076	buffer_len /= sizeof(smb_ucs2_t);
1077
1078	dest_len = 0;
1079	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1080	unsigned char mb[10];
1081	/* Convert one smb_ucs2_t character at a time. */
1082	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1083	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1084	memcpy((char *)dest + dest_len, mb, mb_len);
1085	dest_len += mb_len;
1086	} else {
1087	errno = E2BIG;
1088	break;
1089	}
1090	}
1091	((char *)dest)[dest_len] = '\0';
1092
1093	SAFE_FREE(buffer);
1094	conv_silent = False;
1095	return dest_len;
1096	}
1097
1098	/********************************************************************
1099	Push and malloc an ascii string. src and dest null terminated.
1100	********************************************************************/
1101
1102	bool push_ascii_allocate(char *dest, const char src, size_t *converted_size)
1103	{
1104	size_t src_len = strlen(src)+1;
1105
1106	*dest = NULL;
1107	return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1108	(void **)dest, converted_size, True);
1109	}
1110
1111	/**
1112	* Copy a string from a dos codepage source to a unix char* destination.
1113	*
1114	* The resulting string in "dest" is always null terminated.
1115	*
1116	* @param flags can have:
1117	* <dl>
1118	* <dt>STR_TERMINATE</dt>
1119	* <dd>STR_TERMINATE means the string in @p src
1120	* is null terminated, and src_len is ignored.</dd>
1121	* </dl>
1122	*
1123	* @param src_len is the length of the source area in bytes.
1124	* @returns the number of bytes occupied by the string in @p src.
1125	**/
1126	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1127	{
1128	size_t ret;
1129
1130	if (dest_len == (size_t)-1) {
1131	/* No longer allow dest_len of -1. */
1132	smb_panic("pull_ascii - invalid dest_len of -1");
1133	}
1134
1135	if (flags & STR_TERMINATE) {
1136	if (src_len == (size_t)-1) {
1137	src_len = strlen((const char *)src) + 1;
1138	} else {
1139	size_t len = strnlen((const char *)src, src_len);
1140	if (len < src_len)
1141	len++;
1142	src_len = len;
1143	}
1144	}
1145
1146	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1147	if (ret == (size_t)-1) {
1148	ret = 0;
1149	dest_len = 0;
1150	}
1151
1152	if (dest_len && ret) {
1153	/* Did we already process the terminating zero ? */
1154	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1155	dest[MIN(ret, dest_len-1)] = 0;
1156	}
1157	} else {
1158	dest[0] = 0;
1159	}
1160
1161	return src_len;
1162	}
1163
1164	/**
1165	* Copy a string from a dos codepage source to a unix char* destination.
1166	Talloc version.
1167	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1168	needs fixing. JRA).
1169	*
1170	* The resulting string in "dest" is always null terminated.
1171	*
1172	* @param flags can have:
1173	* <dl>
1174	* <dt>STR_TERMINATE</dt>
1175	* <dd>STR_TERMINATE means the string in @p src
1176	* is null terminated, and src_len is ignored.</dd>
1177	* </dl>
1178	*
1179	* @param src_len is the length of the source area in bytes.
1180	* @returns the number of bytes occupied by the string in @p src.
1181	**/
1182
1183	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1184	char **ppdest,
1185	const void *src,
1186	size_t src_len,
1187	int flags)
1188	{
1189	char *dest = NULL;
1190	size_t dest_len;
1191
1192	#ifdef DEVELOPER
1193	/* Ensure we never use the braindead "malloc" varient. */
1194	if (ctx == NULL) {
1195	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1196	}
1197	#endif
1198
1199	*ppdest = NULL;
1200
1201	if (!src_len) {
1202	return 0;
1203	}
1204
1205	if (flags & STR_TERMINATE) {
1206	if (src_len == (size_t)-1) {
1207	src_len = strlen((const char *)src) + 1;
1208	} else {
1209	size_t len = strnlen((const char *)src, src_len);
1210	if (len < src_len)
1211	len++;
1212	src_len = len;
1213	}
1214	/* Ensure we don't use an insane length from the client. */
1215	if (src_len >= 1024*1024) {
1216	char *msg = talloc_asprintf(ctx,
1217	"Bad src length (%u) in "
1218	"pull_ascii_base_talloc",
1219	(unsigned int)src_len);
1220	smb_panic(msg);
1221	}
1222	} else {
1223	/* Can't have an unlimited length
1224	* non STR_TERMINATE'd.
1225	*/
1226	if (src_len == (size_t)-1) {
1227	errno = EINVAL;
1228	return 0;
1229	}
1230	}
1231
1232	/* src_len != -1 here. */
1233
1234	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1235	&dest_len, True)) {
1236	dest_len = 0;
1237	}
1238
1239	if (dest_len && dest) {
1240	/* Did we already process the terminating zero ? */
1241	if (dest[dest_len-1] != 0) {
1242	size_t size = talloc_get_size(dest);
1243	/* Have we got space to append the '\0' ? */
1244	if (size <= dest_len) {
1245	/* No, realloc. */
1246	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1247	dest_len+1);
1248	if (!dest) {
1249	/* talloc fail. */
1250	dest_len = (size_t)-1;
1251	return 0;
1252	}
1253	}
1254	/* Yay - space ! */
1255	dest[dest_len] = '\0';
1256	dest_len++;
1257	}
1258	} else if (dest) {
1259	dest[0] = 0;
1260	}
1261
1262	*ppdest = dest;
1263	return src_len;
1264	}
1265
1266	size_t pull_ascii_fstring(char dest, const void src)
1267	{
1268	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1269	}
1270
1271	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1272
1273	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1274	{
1275	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1276	}
1277
1278	/**
1279	* Copy a string from a char* src to a unicode destination.
1280	*
1281	* @returns the number of bytes occupied by the string in the destination.
1282	*
1283	* @param flags can have:
1284	*
1285	* <dl>
1286	* <dt>STR_TERMINATE <dd>means include the null termination.
1287	* <dt>STR_UPPER <dd>means uppercase in the destination.
1288	* <dt>STR_NOALIGN <dd>means don't do alignment.
1289	* </dl>
1290	*
1291	* @param dest_len is the maximum length allowed in the
1292	* destination.
1293	**/
1294
1295	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1296	{
1297	size_t len=0;
1298	size_t src_len;
1299	size_t ret;
1300
1301	if (dest_len == (size_t)-1) {
1302	/* No longer allow dest_len of -1. */
1303	smb_panic("push_ucs2 - invalid dest_len of -1");
1304	}
1305
1306	if (flags & STR_TERMINATE)
1307	src_len = (size_t)-1;
1308	else
1309	src_len = strlen(src);
1310
1311	if (ucs2_align(base_ptr, dest, flags)) {
1312	(char )dest = 0;
1313	dest = (void )((char )dest + 1);
1314	if (dest_len)
1315	dest_len--;
1316	len++;
1317	}
1318
1319	/* ucs2 is always a multiple of 2 bytes */
1320	dest_len &= ~1;
1321
1322	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1323	if (ret == (size_t)-1) {
1324	if ((flags & STR_TERMINATE) &&
1325	dest &&
1326	dest_len) {
1327	(char )dest = 0;
1328	}
1329	return len;
1330	}
1331
1332	len += ret;
1333
1334	if (flags & STR_UPPER) {
1335	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1336	size_t i;
1337
1338	/* We check for i < (ret / 2) below as the dest string isn't null
1339	terminated if STR_TERMINATE isn't set. */
1340
1341	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1342	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1343	if (v != dest_ucs2[i]) {
1344	dest_ucs2[i] = v;
1345	}
1346	}
1347	}
1348
1349	return len;
1350	}
1351
1352
1353	/**
1354	* Copy a string from a unix char* src to a UCS2 destination,
1355	* allocating a buffer using talloc().
1356	*
1357	* @param dest always set at least to NULL
1358	* @parm converted_size set to the number of bytes occupied by the string in
1359	* the destination on success.
1360	*
1361	* @return true if new buffer was correctly allocated, and string was
1362	* converted.
1363	**/
1364	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1365	size_t *converted_size)
1366	{
1367	size_t src_len = strlen(src)+1;
1368
1369	*dest = NULL;
1370	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1371	(void **)dest, converted_size, True);
1372	}
1373
1374
1375	/**
1376	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1377	*
1378	* @param dest always set at least to NULL
1379	* @parm converted_size set to the number of bytes occupied by the string in
1380	* the destination on success.
1381	*
1382	* @return true if new buffer was correctly allocated, and string was
1383	* converted.
1384	**/
1385
1386	bool push_ucs2_allocate(smb_ucs2_t *dest, const char src,
1387	size_t *converted_size)
1388	{
1389	size_t src_len = strlen(src)+1;
1390
1391	*dest = NULL;
1392	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1393	(void **)dest, converted_size, True);
1394	}
1395
1396	/**
1397	Copy a string from a char* src to a UTF-8 destination.
1398	Return the number of bytes occupied by the string in the destination
1399	Flags can have:
1400	STR_TERMINATE means include the null termination
1401	STR_UPPER means uppercase in the destination
1402	dest_len is the maximum length allowed in the destination. If dest_len
1403	is -1 then no maxiumum is used.
1404	**/
1405
1406	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1407	{
1408	size_t src_len = 0;
1409	size_t ret;
1410	char *tmpbuf = NULL;
1411
1412	if (dest_len == (size_t)-1) {
1413	/* No longer allow dest_len of -1. */
1414	smb_panic("push_utf8 - invalid dest_len of -1");
1415	}
1416
1417	if (flags & STR_UPPER) {
1418	tmpbuf = strdup_upper(src);
1419	if (!tmpbuf) {
1420	return (size_t)-1;
1421	}
1422	src = tmpbuf;
1423	src_len = strlen(src);
1424	}
1425
1426	src_len = strlen(src);
1427	if (flags & STR_TERMINATE) {
1428	src_len++;
1429	}
1430
1431	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1432	SAFE_FREE(tmpbuf);
1433	return ret;
1434	}
1435
1436	size_t push_utf8_fstring(void dest, const char src)
1437	{
1438	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1439	}
1440
1441	/**
1442	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1443	*
1444	* @param dest always set at least to NULL
1445	* @parm converted_size set to the number of bytes occupied by the string in
1446	* the destination on success.
1447	*
1448	* @return true if new buffer was correctly allocated, and string was
1449	* converted.
1450	**/
1451
1452	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1453	size_t *converted_size)
1454	{
1455	size_t src_len = strlen(src)+1;
1456
1457	*dest = NULL;
1458	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1459	(void**)dest, converted_size, True);
1460	}
1461
1462	/**
1463	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1464	*
1465	* @param dest always set at least to NULL
1466	* @parm converted_size set to the number of bytes occupied by the string in
1467	* the destination on success.
1468	*
1469	* @return true if new buffer was correctly allocated, and string was
1470	* converted.
1471	**/
1472
1473	bool push_utf8_allocate(char *dest, const char src, size_t *converted_size)
1474	{
1475	size_t src_len = strlen(src)+1;
1476
1477	*dest = NULL;
1478	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1479	(void **)dest, converted_size, True);
1480	}
1481
1482	/**
1483	Copy a string from a ucs2 source to a unix char* destination.
1484	Flags can have:
1485	STR_TERMINATE means the string in src is null terminated.
1486	STR_NOALIGN means don't try to align.
1487	if STR_TERMINATE is set then src_len is ignored if it is -1.
1488	src_len is the length of the source area in bytes
1489	Return the number of bytes occupied by the string in src.
1490	The resulting string in "dest" is always null terminated.
1491	**/
1492
1493	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1494	{
1495	size_t ret;
1496
1497	if (dest_len == (size_t)-1) {
1498	/* No longer allow dest_len of -1. */
1499	smb_panic("pull_ucs2 - invalid dest_len of -1");
1500	}
1501
1502	if (!src_len) {
1503	if (dest && dest_len > 0) {
1504	dest[0] = '\0';
1505	}
1506	return 0;
1507	}
1508
1509	if (ucs2_align(base_ptr, src, flags)) {
1510	src = (const void )((const char )src + 1);
1511	if (src_len != (size_t)-1)
1512	src_len--;
1513	}
1514
1515	if (flags & STR_TERMINATE) {
1516	/* src_len -1 is the default for null terminated strings. */
1517	if (src_len != (size_t)-1) {
1518	size_t len = strnlen_w((const smb_ucs2_t *)src,
1519	src_len/2);
1520	if (len < src_len/2)
1521	len++;
1522	src_len = len*2;
1523	}
1524	}
1525
1526	/* ucs2 is always a multiple of 2 bytes */
1527	if (src_len != (size_t)-1)
1528	src_len &= ~1;
1529
1530	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1531	if (ret == (size_t)-1) {
1532	ret = 0;
1533	dest_len = 0;
1534	}
1535
1536	if (src_len == (size_t)-1)
1537	src_len = ret*2;
1538
1539	if (dest_len && ret) {
1540	/* Did we already process the terminating zero ? */
1541	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1542	dest[MIN(ret, dest_len-1)] = 0;
1543	}
1544	} else {
1545	dest[0] = 0;
1546	}
1547
1548	return src_len;
1549	}
1550
1551	/**
1552	Copy a string from a ucs2 source to a unix char* destination.
1553	Talloc version with a base pointer.
1554	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1555	needs fixing. JRA).
1556	Flags can have:
1557	STR_TERMINATE means the string in src is null terminated.
1558	STR_NOALIGN means don't try to align.
1559	if STR_TERMINATE is set then src_len is ignored if it is -1.
1560	src_len is the length of the source area in bytes
1561	Return the number of bytes occupied by the string in src.
1562	The resulting string in "dest" is always null terminated.
1563	**/
1564
1565	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1566	const void *base_ptr,
1567	char **ppdest,
1568	const void *src,
1569	size_t src_len,
1570	int flags)
1571	{
1572	char *dest;
1573	size_t dest_len;
1574
1575	*ppdest = NULL;
1576
1577	#ifdef DEVELOPER
1578	/* Ensure we never use the braindead "malloc" varient. */
1579	if (ctx == NULL) {
1580	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1581	}
1582	#endif
1583
1584	if (!src_len) {
1585	return 0;
1586	}
1587
1588	if (ucs2_align(base_ptr, src, flags)) {
1589	src = (const void )((const char )src + 1);
1590	if (src_len != (size_t)-1)
1591	src_len--;
1592	}
1593
1594	if (flags & STR_TERMINATE) {
1595	/* src_len -1 is the default for null terminated strings. */
1596	if (src_len != (size_t)-1) {
1597	size_t len = strnlen_w((const smb_ucs2_t *)src,
1598	src_len/2);
1599	if (len < src_len/2)
1600	len++;
1601	src_len = len*2;
1602	} else {
1603	/*
1604	* src_len == -1 - alloc interface won't take this
1605	* so we must calculate.
1606	*/
1607	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1608	}
1609	/* Ensure we don't use an insane length from the client. */
1610	if (src_len >= 1024*1024) {
1611	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1612	}
1613	} else {
1614	/* Can't have an unlimited length
1615	* non STR_TERMINATE'd.
1616	*/
1617	if (src_len == (size_t)-1) {
1618	errno = EINVAL;
1619	return 0;
1620	}
1621	}
1622
1623	/* src_len != -1 here. */
1624
1625	/* ucs2 is always a multiple of 2 bytes */
1626	src_len &= ~1;
1627
1628	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1629	(void *)&dest, &dest_len, True)) {
1630	dest_len = 0;
1631	}
1632
1633	if (dest_len) {
1634	/* Did we already process the terminating zero ? */
1635	if (dest[dest_len-1] != 0) {
1636	size_t size = talloc_get_size(dest);
1637	/* Have we got space to append the '\0' ? */
1638	if (size <= dest_len) {
1639	/* No, realloc. */
1640	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1641	dest_len+1);
1642	if (!dest) {
1643	/* talloc fail. */
1644	dest_len = (size_t)-1;
1645	return 0;
1646	}
1647	}
1648	/* Yay - space ! */
1649	dest[dest_len] = '\0';
1650	dest_len++;
1651	}
1652	} else if (dest) {
1653	dest[0] = 0;
1654	}
1655
1656	*ppdest = dest;
1657	return src_len;
1658	}
1659
1660	size_t pull_ucs2_fstring(char dest, const void src)
1661	{
1662	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1663	}
1664
1665	/**
1666	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1667	*
1668	* @param dest always set at least to NULL
1669	* @parm converted_size set to the number of bytes occupied by the string in
1670	* the destination on success.
1671	*
1672	* @return true if new buffer was correctly allocated, and string was
1673	* converted.
1674	**/
1675
1676	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1677	size_t *converted_size)
1678	{
1679	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1680
1681	*dest = NULL;
1682	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1683	(void **)dest, converted_size, True);
1684	}
1685
1686	/**
1687	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1688	*
1689	* @param dest always set at least to NULL
1690	* @parm converted_size set to the number of bytes occupied by the string in
1691	* the destination on success.
1692	* @return true if new buffer was correctly allocated, and string was
1693	* converted.
1694	**/
1695
1696	bool pull_ucs2_allocate(char *dest, const smb_ucs2_t src,
1697	size_t *converted_size)
1698	{
1699	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1700
1701	*dest = NULL;
1702	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1703	(void **)dest, converted_size, True);
1704	}
1705
1706	/**
1707	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1708	*
1709	* @param dest always set at least to NULL
1710	* @parm converted_size set to the number of bytes occupied by the string in
1711	* the destination on success.
1712	*
1713	* @return true if new buffer was correctly allocated, and string was
1714	* converted.
1715	**/
1716
1717	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1718	size_t *converted_size)
1719	{
1720	size_t src_len = strlen(src)+1;
1721
1722	*dest = NULL;
1723	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1724	(void **)dest, converted_size, True);
1725	}
1726
1727	/**
1728	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1729	*
1730	* @param dest always set at least to NULL
1731	* @parm converted_size set to the number of bytes occupied by the string in
1732	* the destination on success.
1733	*
1734	* @return true if new buffer was correctly allocated, and string was
1735	* converted.
1736	**/
1737
1738	bool pull_utf8_allocate(char *dest, const char src, size_t *converted_size)
1739	{
1740	size_t src_len = strlen(src)+1;
1741
1742	*dest = NULL;
1743	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1744	(void **)dest, converted_size, True);
1745	}
1746
1747	/**
1748	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1749	*
1750	* @param dest always set at least to NULL
1751	* @parm converted_size set to the number of bytes occupied by the string in
1752	* the destination on success.
1753	*
1754	* @return true if new buffer was correctly allocated, and string was
1755	* converted.
1756	**/
1757
1758	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1759	size_t *converted_size)
1760	{
1761	size_t src_len = strlen(src)+1;
1762
1763	*dest = NULL;
1764	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1765	(void **)dest, converted_size, True);
1766	}
1767
1768	/**
1769	Copy a string from a char* src to a unicode or ascii
1770	dos codepage destination choosing unicode or ascii based on the
1771	flags in the SMB buffer starting at base_ptr.
1772	Return the number of bytes occupied by the string in the destination.
1773	flags can have:
1774	STR_TERMINATE means include the null termination.
1775	STR_UPPER means uppercase in the destination.
1776	STR_ASCII use ascii even with unicode packet.
1777	STR_NOALIGN means don't do alignment.
1778	dest_len is the maximum length allowed in the destination. If dest_len
1779	is -1 then no maxiumum is used.
1780	**/
1781
1782	size_t push_string_fn(const char *function, unsigned int line,
1783	const void *base_ptr, uint16 flags2,
1784	void dest, const char src,
1785	size_t dest_len, int flags)
1786	{
1787	#ifdef DEVELOPER
1788	/* We really need to zero fill here, not clobber
1789	* region, as we want to ensure that valgrind thinks
1790	* all of the outgoing buffer has been written to
1791	* so a send() or write() won't trap an error.
1792	* JRA.
1793	*/
1794	#if 0
1795	clobber_region(function, line, dest, dest_len);
1796	#else
1797	memset(dest, '\0', dest_len);
1798	#endif
1799	#endif
1800
1801	if (!(flags & STR_ASCII) && \
1802	((flags & STR_UNICODE \|\| \
1803	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1804	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1805	}
1806	return push_ascii(dest, src, dest_len, flags);
1807	}
1808
1809
1810	/**
1811	Copy a string from a unicode or ascii source (depending on
1812	the packet flags) to a char* destination.
1813	Flags can have:
1814	STR_TERMINATE means the string in src is null terminated.
1815	STR_UNICODE means to force as unicode.
1816	STR_ASCII use ascii even with unicode packet.
1817	STR_NOALIGN means don't do alignment.
1818	if STR_TERMINATE is set then src_len is ignored is it is -1
1819	src_len is the length of the source area in bytes.
1820	Return the number of bytes occupied by the string in src.
1821	The resulting string in "dest" is always null terminated.
1822	**/
1823
1824	size_t pull_string_fn(const char *function,
1825	unsigned int line,
1826	const void *base_ptr,
1827	uint16 smb_flags2,
1828	char *dest,
1829	const void *src,
1830	size_t dest_len,
1831	size_t src_len,
1832	int flags)
1833	{
1834	#ifdef DEVELOPER
1835	clobber_region(function, line, dest, dest_len);
1836	#endif
1837
1838	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1839	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1840	"UNICODE defined");
1841	}
1842
1843	if (!(flags & STR_ASCII) && \
1844	((flags & STR_UNICODE \|\| \
1845	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1846	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1847	}
1848	return pull_ascii(dest, src, dest_len, src_len, flags);
1849	}
1850
1851	/**
1852	Copy a string from a unicode or ascii source (depending on
1853	the packet flags) to a char* destination.
1854	Variant that uses talloc.
1855	Flags can have:
1856	STR_TERMINATE means the string in src is null terminated.
1857	STR_UNICODE means to force as unicode.
1858	STR_ASCII use ascii even with unicode packet.
1859	STR_NOALIGN means don't do alignment.
1860	if STR_TERMINATE is set then src_len is ignored is it is -1
1861	src_len is the length of the source area in bytes.
1862	Return the number of bytes occupied by the string in src.
1863	The resulting string in "dest" is always null terminated.
1864	**/
1865
1866	size_t pull_string_talloc_fn(const char *function,
1867	unsigned int line,
1868	TALLOC_CTX *ctx,
1869	const void *base_ptr,
1870	uint16 smb_flags2,
1871	char **ppdest,
1872	const void *src,
1873	size_t src_len,
1874	int flags)
1875	{
1876	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1877	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1878	"UNICODE defined");
1879	}
1880
1881	if (!(flags & STR_ASCII) && \
1882	((flags & STR_UNICODE \|\| \
1883	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1884	return pull_ucs2_base_talloc(ctx,
1885	base_ptr,
1886	ppdest,
1887	src,
1888	src_len,
1889	flags);
1890	}
1891	return pull_ascii_base_talloc(ctx,
1892	ppdest,
1893	src,
1894	src_len,
1895	flags);
1896	}
1897
1898
1899	size_t align_string(const void base_ptr, const char p, int flags)
1900	{
1901	if (!(flags & STR_ASCII) && \
1902	((flags & STR_UNICODE \|\| \
1903	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1904	return ucs2_align(base_ptr, p, flags);
1905	}
1906	return 0;
1907	}
1908
1909	/*
1910	Return the unicode codepoint for the next multi-byte CH_UNIX character
1911	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1912
1913	Also return the number of bytes consumed (which tells the caller
1914	how many bytes to skip to get to the next CH_UNIX character).
1915
1916	Return INVALID_CODEPOINT if the next character cannot be converted.
1917	*/
1918
1919	codepoint_t next_codepoint(const char str, size_t size)
1920	{
1921	/* It cannot occupy more than 4 bytes in UTF16 format */
1922	uint8_t buf[4];
1923	smb_iconv_t descriptor;
1924	#ifdef __OS2__
1925	size_t ilen_max;
1926	size_t olen_orig;
1927	const char *inbuf;
1928	#endif
1929	size_t ilen_orig;
1930	size_t ilen;
1931	size_t olen;
1932
1933	char *outbuf;
1934
1935	#ifdef __OS2__
1936	*size = 1;
1937	#endif
1938
1939	if ((str[0] & 0x80) == 0) {
1940	#ifndef __OS2__
1941	*size = 1;
1942	#endif
1943	return (codepoint_t)str[0];
1944	}
1945
1946	lazy_initialize_conv();
1947
1948	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1949	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1950	#ifndef __OS2__
1951	*size = 1;
1952	#endif
1953	return INVALID_CODEPOINT;
1954	}
1955	#ifdef __OS2__
1956	/* We assume that no multi-byte character can take
1957	more than 5 bytes. This is OK as we only
1958	support codepoints up to 1M */
1959
1960	ilen_max = strnlen( str, 5 );
1961	#else
1962	*size = 1;
1963	#endif
1964	ilen_orig = 1;
1965	olen_orig = 2;
1966	while( 1 )
1967	{
1968	ilen = ilen_orig;
1969	olen = olen_orig;
1970	inbuf = str;
1971	outbuf = ( char * )buf;
1972	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1973	break;
1974
1975	switch( errno )
1976	{
1977	case E2BIG :
1978	if( olen_orig == 2 )
1979	olen_orig = 4;
1980	else
1981	return INVALID_CODEPOINT;
1982	break;
1983
1984
1985	case EINVAL :
1986	#ifndef __OS2__
1987	/* We assume that no multi-byte character can take
1988	more than 5 bytes. This is OK as we only
1989	support codepoints up to 1M */
1990	if( ilen_orig < 5 )
1991	#else
1992	if( ilen_orig < ilen_max )
1993	#endif
1994	ilen_orig++;
1995	else
1996	return INVALID_CODEPOINT;
1997	break;
1998
1999	case EILSEQ :
2000	default :
2001	return INVALID_CODEPOINT;
2002	}
2003	}
2004
2005	olen = olen_orig - olen;
2006
2007	*size = ilen_orig - ilen;
2008
2009	if (olen == 2) {
2010	/* 2 byte, UTF16-LE encoded value. */
2011	return (codepoint_t)SVAL(buf, 0);
2012	}
2013	if (olen == 4) {
2014	/* Decode a 4 byte UTF16-LE character manually.
2015	See RFC2871 for the encoding machanism.
2016	*/
2017	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2018	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2019
2020	return (codepoint_t)0x10000 +
2021	(w1 << 10) + w2;
2022	}
2023
2024	/* no other length is valid */
2025	return INVALID_CODEPOINT;
2026	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: