Context Navigation

source: branches/samba-3.2.x/source/lib/charcnv.c@ 228

Visit:

Last change on this file since 228 was 228, checked in by Herwig Bauernfeind, 16 years ago
Update 3.2 branch to 3.2.6
File size: 49.1 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	if (!initialized) {
97	load_case_tables();
98	init_iconv();
99	initialized = true;
100	}
101	}
102
103	/**
104	* Destroy global objects allocated by init_iconv()
105	**/
106	void gfree_charcnv(void)
107	{
108	int c1, c2;
109
110	for (c1=0;c1<NUM_CHARSETS;c1++) {
111	for (c2=0;c2<NUM_CHARSETS;c2++) {
112	if ( conv_handles[c1][c2] ) {
113	smb_iconv_close( conv_handles[c1][c2] );
114	conv_handles[c1][c2] = 0;
115	}
116	}
117	}
118	initialized = false;
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	bool did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_valid_table();
182	conv_silent = False;
183	}
184	}
185
186	/**
187	* Convert string from one encoding to another, making error checking etc
188	* Slow path version - uses (slow) iconv.
189	*
190	* @param src pointer to source string (multibyte or singlebyte)
191	* @param srclen length of the source string in bytes
192	* @param dest pointer to destination string (multibyte or singlebyte)
193	* @param destlen maximal length allowed for string
194	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195	* @returns the number of bytes occupied in the destination
196	*
197	* Ensure the srclen contains the terminating zero.
198	*
199	**/
200
201	static size_t convert_string_internal(charset_t from, charset_t to,
202	void const *src, size_t srclen,
203	void *dest, size_t destlen, bool allow_bad_conv)
204	{
205	size_t i_len, o_len;
206	size_t retval;
207	const char* inbuf = (const char*)src;
208	char* outbuf = (char*)dest;
209	smb_iconv_t descriptor;
210
211	lazy_initialize_conv();
212
213	descriptor = conv_handles[from][to];
214
215	if (srclen == (size_t)-1) {
216	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
217	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
218	} else {
219	srclen = strlen((const char *)src)+1;
220	}
221	}
222
223
224	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
225	if (!conv_silent)
226	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227	return (size_t)-1;
228	}
229
230	i_len=srclen;
231	o_len=destlen;
232
233	again:
234
235	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236	if(retval==(size_t)-1) {
237	const char *reason="unknown error";
238	switch(errno) {
239	case EINVAL:
240	reason="Incomplete multibyte sequence";
241	if (!conv_silent)
242	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243	if (allow_bad_conv)
244	goto use_as_is;
245	break;
246	case E2BIG:
247	reason="No more room";
248	if (!conv_silent) {
249	if (from == CH_UNIX) {
250	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251	charset_name(from), charset_name(to),
252	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253	} else {
254	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255	charset_name(from), charset_name(to),
256	(unsigned int)srclen, (unsigned int)destlen));
257	}
258	}
259	break;
260	case EILSEQ:
261	reason="Illegal multibyte sequence";
262	if (!conv_silent)
263	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264	if (allow_bad_conv)
265	goto use_as_is;
266	break;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	break;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	#endif
417	}
418	}
419	if (!dlen) {
420	/* Even if we fast path we should note if we ran out of room. */
421	if (((slen != (size_t)-1) && slen) \|\|
422	((slen == (size_t)-1) && lastp)) {
423	errno = E2BIG;
424	}
425	}
426	return retval;
427	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
428	const unsigned char p = (const unsigned char )src;
429	unsigned char q = (unsigned char )dest;
430	size_t retval = 0;
431	size_t slen = srclen;
432	size_t dlen = destlen;
433	unsigned char lastp = '\0';
434
435	/* If all characters are ascii, fast path here. */
436	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
437	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
438	q++ = p;
439	if (slen != (size_t)-1) {
440	slen -= 2;
441	}
442	p += 2;
443	dlen--;
444	retval++;
445	if (!lastp)
446	break;
447	} else {
448	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
449	goto general_case;
450	#else
451	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
452	#endif
453	}
454	}
455	if (!dlen) {
456	/* Even if we fast path we should note if we ran out of room. */
457	if (((slen != (size_t)-1) && slen) \|\|
458	((slen == (size_t)-1) && lastp)) {
459	errno = E2BIG;
460	}
461	}
462	return retval;
463	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
464	const unsigned char p = (const unsigned char )src;
465	unsigned char q = (unsigned char )dest;
466	size_t retval = 0;
467	size_t slen = srclen;
468	size_t dlen = destlen;
469	unsigned char lastp = '\0';
470
471	/* If all characters are ascii, fast path here. */
472	while (slen && (dlen >= 2)) {
473	if ((lastp = *p) <= 0x7F) {
474	q++ = p++;
475	*q++ = '\0';
476	if (slen != (size_t)-1) {
477	slen--;
478	}
479	dlen -= 2;
480	retval += 2;
481	if (!lastp)
482	break;
483	} else {
484	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
485	goto general_case;
486	#else
487	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
488	#endif
489	}
490	}
491	if (!dlen) {
492	/* Even if we fast path we should note if we ran out of room. */
493	if (((slen != (size_t)-1) && slen) \|\|
494	((slen == (size_t)-1) && lastp)) {
495	errno = E2BIG;
496	}
497	}
498	return retval;
499	}
500
501	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
502	general_case:
503	#endif
504	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
505	}
506
507	/**
508	* Convert between character sets, allocating a new buffer for the result.
509	*
510	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
511	* (this is a bad interface and needs fixing. JRA).
512	* @param srclen length of source buffer.
513	* @param dest always set at least to NULL
514	* @param converted_size set to the size of the allocated buffer on return
515	* true
516	* @note -1 is not accepted for srclen.
517	*
518	* @return True if new buffer was correctly allocated, and string was
519	* converted.
520	*
521	* Ensure the srclen contains the terminating zero.
522	*
523	* I hate the goto's in this function. It's embarressing.....
524	* There has to be a cleaner way to do this. JRA.
525	**/
526
527	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528	void const src, size_t srclen, void dst,
529	size_t *converted_size, bool allow_bad_conv)
530	{
531	size_t i_len, o_len, destlen = (srclen * 3) / 2;
532	size_t retval;
533	const char inbuf = (const char )src;
534	char outbuf = NULL, ob = NULL;
535	smb_iconv_t descriptor;
536	void dest = (void )dst;
537
538	*dest = NULL;
539
540	if (!converted_size) {
541	errno = EINVAL;
542	return false;
543	}
544
545	if (src == NULL \|\| srclen == (size_t)-1) {
546	errno = EINVAL;
547	return false;
548	}
549	if (srclen == 0) {
550	*converted_size = 0;
551	return true;
552	}
553
554	lazy_initialize_conv();
555
556	descriptor = conv_handles[from][to];
557
558	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
559	if (!conv_silent)
560	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
561	errno = EOPNOTSUPP;
562	return false;
563	}
564
565	convert:
566
567	/* +2 is for ucs2 null termination. */
568	if ((destlen*2)+2 < destlen) {
569	/* wrapped ! abort. */
570	if (!conv_silent)
571	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
572	if (!ctx)
573	SAFE_FREE(outbuf);
574	errno = EOPNOTSUPP;
575	return false;
576	} else {
577	destlen = destlen * 2;
578	}
579
580	/* +2 is for ucs2 null termination. */
581	if (ctx) {
582	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
583	} else {
584	ob = (char *)SMB_REALLOC(ob, destlen + 2);
585	}
586
587	if (!ob) {
588	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
589	errno = ENOMEM;
590	return false;
591	}
592	outbuf = ob;
593	i_len = srclen;
594	o_len = destlen;
595
596	again:
597
598	retval = smb_iconv(descriptor,
599	&inbuf, &i_len,
600	&outbuf, &o_len);
601	if(retval == (size_t)-1) {
602	const char *reason="unknown error";
603	switch(errno) {
604	case EINVAL:
605	reason="Incomplete multibyte sequence";
606	if (!conv_silent)
607	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
608	if (allow_bad_conv)
609	goto use_as_is;
610	break;
611	case E2BIG:
612	goto convert;
613	case EILSEQ:
614	reason="Illegal multibyte sequence";
615	if (!conv_silent)
616	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
617	if (allow_bad_conv)
618	goto use_as_is;
619	break;
620	}
621	if (!conv_silent)
622	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
623	/* smb_panic(reason); */
624	if (ctx) {
625	TALLOC_FREE(ob);
626	} else {
627	SAFE_FREE(ob);
628	}
629	return false;
630	}
631
632	out:
633
634	destlen = destlen - o_len;
635	/* Don't shrink unless we're reclaiming a lot of
636	* space. This is in the hot codepath and these
637	* reallocs cost. JRA.
638	*/
639	if (o_len > 1024) {
640	/* We're shrinking here so we know the +2 is safe from wrap. */
641	if (ctx) {
642	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
643	} else {
644	ob = (char *)SMB_REALLOC(ob,destlen + 2);
645	}
646	}
647
648	if (destlen && !ob) {
649	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
650	errno = ENOMEM;
651	return false;
652	}
653
654	*dest = ob;
655
656	/* Must ucs2 null terminate in the extra space we allocated. */
657	ob[destlen] = '\0';
658	ob[destlen+1] = '\0';
659
660	*converted_size = destlen;
661	return true;
662
663	use_as_is:
664
665	/*
666	* Conversion not supported. This is actually an error, but there are so
667	* many misconfigured iconv systems and smb.conf's out there we can't just
668	* fail. Do a very bad conversion instead.... JRA.
669	*/
670
671	{
672	if (o_len == 0 \|\| i_len == 0)
673	goto out;
674
675	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
676	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
677	/* Can't convert from utf16 any endian to multibyte.
678	Replace with the default fail char.
679	*/
680
681	if (i_len < 2)
682	goto out;
683
684	if (i_len >= 2) {
685	*outbuf = lp_failed_convert_char();
686
687	outbuf++;
688	o_len--;
689
690	inbuf += 2;
691	i_len -= 2;
692	}
693
694	if (o_len == 0 \|\| i_len == 0)
695	goto out;
696
697	/* Keep trying with the next char... */
698	goto again;
699
700	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
701	/* Can't convert to UTF16LE - just widen by adding the
702	default fail char then zero.
703	*/
704	if (o_len < 2)
705	goto out;
706
707	outbuf[0] = lp_failed_convert_char();
708	outbuf[1] = '\0';
709
710	inbuf++;
711	i_len--;
712
713	outbuf += 2;
714	o_len -= 2;
715
716	if (o_len == 0 \|\| i_len == 0)
717	goto out;
718
719	/* Keep trying with the next char... */
720	goto again;
721
722	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
723	to != CH_UTF16LE && to != CH_UTF16BE) {
724	/* Failed multibyte to multibyte. Just copy the default fail char and
725	try again. */
726	outbuf[0] = lp_failed_convert_char();
727
728	inbuf++;
729	i_len--;
730
731	outbuf++;
732	o_len--;
733
734	if (o_len == 0 \|\| i_len == 0)
735	goto out;
736
737	/* Keep trying with the next char... */
738	goto again;
739
740	} else {
741	/* Keep compiler happy.... */
742	goto out;
743	}
744	}
745	}
746
747	/**
748	* Convert between character sets, allocating a new buffer using talloc for the result.
749	*
750	* @param srclen length of source buffer.
751	* @param dest always set at least to NULL
752	* @note -1 is not accepted for srclen.
753	*
754	* @returns Size in bytes of the converted string; or -1 in case of error.
755	**/
756	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
757	void const src, size_t srclen, void dst,
758	bool allow_bad_conv)
759	{
760	void dest = (void )dst;
761	size_t dest_len;
762
763	*dest = NULL;
764	if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
765	&dest_len, allow_bad_conv))
766	return (size_t)-1;
767	if (*dest == NULL)
768	return (size_t)-1;
769	return dest_len;
770	}
771
772	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
773	{
774	size_t size;
775	smb_ucs2_t *buffer;
776
777	size = push_ucs2_allocate(&buffer, src);
778	if (size == (size_t)-1) {
779	return (size_t)-1;
780	}
781	if (!strupper_w(buffer) && (dest == src)) {
782	free(buffer);
783	return srclen;
784	}
785
786	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
787	free(buffer);
788	return size;
789	}
790
791	/**
792	strdup() a unix string to upper case.
793	**/
794
795	char strdup_upper(const char s)
796	{
797	char *out_buffer = SMB_STRDUP(s);
798	const unsigned char p = (const unsigned char )s;
799	unsigned char q = (unsigned char )out_buffer;
800
801	if (!q) {
802	return NULL;
803	}
804
805	/* this is quite a common operation, so we want it to be
806	fast. We optimise for the ascii case, knowing that all our
807	supported multi-byte character sets are ascii-compatible
808	(ie. they match for the first 128 chars) */
809
810	while (*p) {
811	if (*p & 0x80)
812	break;
813	q++ = toupper_ascii_fast(p);
814	p++;
815	}
816
817	if (*p) {
818	/* MB case. */
819	size_t size, size2;
820	smb_ucs2_t *buffer = NULL;
821
822	SAFE_FREE(out_buffer);
823	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
824	strlen(s) + 1, (void *)(void )&buffer, &size,
825	True)) {
826	return NULL;
827	}
828
829	strupper_w(buffer);
830
831	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
832	size, (void *)(void )&out_buffer, &size2, True)) {
833	TALLOC_FREE(buffer);
834	return NULL;
835	}
836
837	/* Don't need the intermediate buffer
838	* anymore.
839	*/
840	TALLOC_FREE(buffer);
841	}
842
843	return out_buffer;
844	}
845
846	/**
847	talloc_strdup() a unix string to upper case.
848	**/
849
850	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
851	{
852	char *out_buffer = talloc_strdup(ctx,s);
853	const unsigned char p = (const unsigned char )s;
854	unsigned char q = (unsigned char )out_buffer;
855
856	if (!q) {
857	return NULL;
858	}
859
860	/* this is quite a common operation, so we want it to be
861	fast. We optimise for the ascii case, knowing that all our
862	supported multi-byte character sets are ascii-compatible
863	(ie. they match for the first 128 chars) */
864
865	while (*p) {
866	if (*p & 0x80)
867	break;
868	q++ = toupper_ascii_fast(p);
869	p++;
870	}
871
872	if (*p) {
873	/* MB case. */
874	size_t size;
875	smb_ucs2_t *ubuf = NULL;
876
877	/* We're not using the ascii buffer above. */
878	TALLOC_FREE(out_buffer);
879
880	size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
881	s, strlen(s)+1,
882	(void *)&ubuf,
883	True);
884	if (size == (size_t)-1) {
885	return NULL;
886	}
887
888	strupper_w(ubuf);
889
890	size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
891	ubuf, size,
892	(void *)&out_buffer,
893	True);
894
895	/* Don't need the intermediate buffer
896	* anymore.
897	*/
898
899	TALLOC_FREE(ubuf);
900
901	if (size == (size_t)-1) {
902	return NULL;
903	}
904	}
905
906	return out_buffer;
907	}
908
909	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
910	{
911	size_t size;
912	smb_ucs2_t *buffer = NULL;
913
914	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
915	(void *)(void )&buffer, &size, True)) {
916	smb_panic("failed to create UCS2 buffer");
917	}
918	if (!strlower_w(buffer) && (dest == src)) {
919	SAFE_FREE(buffer);
920	return srclen;
921	}
922	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
923	SAFE_FREE(buffer);
924	return size;
925	}
926
927	/**
928	strdup() a unix string to lower case.
929	**/
930
931	char strdup_lower(const char s)
932	{
933	size_t size;
934	smb_ucs2_t *buffer = NULL;
935	char *out_buffer;
936
937	size = push_ucs2_allocate(&buffer, s);
938	if (size == -1 \|\| !buffer) {
939	return NULL;
940	}
941
942	strlower_w(buffer);
943
944	size = pull_ucs2_allocate(&out_buffer, buffer);
945	SAFE_FREE(buffer);
946
947	if (size == (size_t)-1) {
948	return NULL;
949	}
950
951	return out_buffer;
952	}
953
954	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
955	{
956	size_t size;
957	smb_ucs2_t *buffer = NULL;
958	char *out_buffer;
959
960	size = push_ucs2_talloc(ctx, &buffer, s);
961	if (size == -1 \|\| !buffer) {
962	TALLOC_FREE(buffer);
963	return NULL;
964	}
965
966	strlower_w(buffer);
967
968	size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
969	TALLOC_FREE(buffer);
970
971	if (size == (size_t)-1) {
972	TALLOC_FREE(out_buffer);
973	return NULL;
974	}
975
976	return out_buffer;
977	}
978
979
980	size_t ucs2_align(const void base_ptr, const void p, int flags)
981	{
982	if (flags & (STR_NOALIGN\|STR_ASCII))
983	return 0;
984	return PTR_DIFF(p, base_ptr) & 1;
985	}
986
987
988	/**
989	* Copy a string from a char* unix src to a dos codepage string destination.
990	*
991	* @return the number of bytes occupied by the string in the destination.
992	*
993	* @param flags can include
994	* <dl>
995	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
996	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
997	* </dl>
998	*
999	* @param dest_len the maximum length in bytes allowed in the
1000	* destination.
1001	**/
1002	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1003	{
1004	size_t src_len = strlen(src);
1005	char *tmpbuf = NULL;
1006	size_t ret;
1007
1008	/* No longer allow a length of -1. */
1009	if (dest_len == (size_t)-1) {
1010	smb_panic("push_ascii - dest_len == -1");
1011	}
1012
1013	if (flags & STR_UPPER) {
1014	tmpbuf = SMB_STRDUP(src);
1015	if (!tmpbuf) {
1016	smb_panic("malloc fail");
1017	}
1018	strupper_m(tmpbuf);
1019	src = tmpbuf;
1020	}
1021
1022	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1023	src_len++;
1024	}
1025
1026	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1027	if (ret == (size_t)-1 &&
1028	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1029	&& dest_len > 0) {
1030	((char *)dest)[0] = '\0';
1031	}
1032	SAFE_FREE(tmpbuf);
1033	return ret;
1034	}
1035
1036	size_t push_ascii_fstring(void dest, const char src)
1037	{
1038	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1039	}
1040
1041	/********************************************************************
1042	Push an nstring - ensure null terminated. Written by
1043	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1044	********************************************************************/
1045
1046	size_t push_ascii_nstring(void dest, const char src)
1047	{
1048	size_t i, buffer_len, dest_len;
1049	smb_ucs2_t *buffer;
1050
1051	conv_silent = True;
1052	buffer_len = push_ucs2_allocate(&buffer, src);
1053	if (buffer_len == (size_t)-1) {
1054	smb_panic("failed to create UCS2 buffer");
1055	}
1056
1057	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1058	buffer_len /= sizeof(smb_ucs2_t);
1059
1060	dest_len = 0;
1061	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1062	unsigned char mb[10];
1063	/* Convert one smb_ucs2_t character at a time. */
1064	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1065	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1066	memcpy((char *)dest + dest_len, mb, mb_len);
1067	dest_len += mb_len;
1068	} else {
1069	errno = E2BIG;
1070	break;
1071	}
1072	}
1073	((char *)dest)[dest_len] = '\0';
1074
1075	SAFE_FREE(buffer);
1076	conv_silent = False;
1077	return dest_len;
1078	}
1079
1080	/********************************************************************
1081	Push and malloc an ascii string. src and dest null terminated.
1082	********************************************************************/
1083
1084	size_t push_ascii_allocate(char *dest, const char src)
1085	{
1086	size_t dest_len, src_len = strlen(src)+1;
1087
1088	*dest = NULL;
1089	if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1090	(void **)dest, &dest_len, True))
1091	return (size_t)-1;
1092	else
1093	return dest_len;
1094	}
1095
1096	/**
1097	* Copy a string from a dos codepage source to a unix char* destination.
1098	*
1099	* The resulting string in "dest" is always null terminated.
1100	*
1101	* @param flags can have:
1102	* <dl>
1103	* <dt>STR_TERMINATE</dt>
1104	* <dd>STR_TERMINATE means the string in @p src
1105	* is null terminated, and src_len is ignored.</dd>
1106	* </dl>
1107	*
1108	* @param src_len is the length of the source area in bytes.
1109	* @returns the number of bytes occupied by the string in @p src.
1110	**/
1111	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1112	{
1113	size_t ret;
1114
1115	if (dest_len == (size_t)-1) {
1116	/* No longer allow dest_len of -1. */
1117	smb_panic("pull_ascii - invalid dest_len of -1");
1118	}
1119
1120	if (flags & STR_TERMINATE) {
1121	if (src_len == (size_t)-1) {
1122	src_len = strlen((const char *)src) + 1;
1123	} else {
1124	size_t len = strnlen((const char *)src, src_len);
1125	if (len < src_len)
1126	len++;
1127	src_len = len;
1128	}
1129	}
1130
1131	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1132	if (ret == (size_t)-1) {
1133	ret = 0;
1134	dest_len = 0;
1135	}
1136
1137	if (dest_len && ret) {
1138	/* Did we already process the terminating zero ? */
1139	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1140	dest[MIN(ret, dest_len-1)] = 0;
1141	}
1142	} else {
1143	dest[0] = 0;
1144	}
1145
1146	return src_len;
1147	}
1148
1149	/**
1150	* Copy a string from a dos codepage source to a unix char* destination.
1151	Talloc version.
1152	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1153	needs fixing. JRA).
1154	*
1155	* The resulting string in "dest" is always null terminated.
1156	*
1157	* @param flags can have:
1158	* <dl>
1159	* <dt>STR_TERMINATE</dt>
1160	* <dd>STR_TERMINATE means the string in @p src
1161	* is null terminated, and src_len is ignored.</dd>
1162	* </dl>
1163	*
1164	* @param src_len is the length of the source area in bytes.
1165	* @returns the number of bytes occupied by the string in @p src.
1166	**/
1167
1168	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1169	char **ppdest,
1170	const void *src,
1171	size_t src_len,
1172	int flags)
1173	{
1174	char *dest = NULL;
1175	size_t dest_len = 0;
1176
1177	#ifdef DEVELOPER
1178	/* Ensure we never use the braindead "malloc" varient. */
1179	if (ctx == NULL) {
1180	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1181	}
1182	#endif
1183
1184	*ppdest = NULL;
1185
1186	if (!src_len) {
1187	return 0;
1188	}
1189
1190	if (flags & STR_TERMINATE) {
1191	if (src_len == (size_t)-1) {
1192	src_len = strlen((const char *)src) + 1;
1193	} else {
1194	size_t len = strnlen((const char *)src, src_len);
1195	if (len < src_len)
1196	len++;
1197	src_len = len;
1198	}
1199	/* Ensure we don't use an insane length from the client. */
1200	if (src_len >= 1024*1024) {
1201	char *msg = talloc_asprintf(ctx,
1202	"Bad src length (%u) in "
1203	"pull_ascii_base_talloc",
1204	(unsigned int)src_len);
1205	smb_panic(msg);
1206	}
1207	} else {
1208	/* Can't have an unlimited length
1209	* non STR_TERMINATE'd.
1210	*/
1211	if (src_len == (size_t)-1) {
1212	errno = EINVAL;
1213	return 0;
1214	}
1215	}
1216
1217	/* src_len != -1 here. */
1218
1219	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1220	&dest_len, True)) {
1221	dest_len = 0;
1222	}
1223
1224	if (dest_len && dest) {
1225	/* Did we already process the terminating zero ? */
1226	if (dest[dest_len-1] != 0) {
1227	size_t size = talloc_get_size(dest);
1228	/* Have we got space to append the '\0' ? */
1229	if (size <= dest_len) {
1230	/* No, realloc. */
1231	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1232	dest_len+1);
1233	if (!dest) {
1234	/* talloc fail. */
1235	dest_len = (size_t)-1;
1236	return 0;
1237	}
1238	}
1239	/* Yay - space ! */
1240	dest[dest_len] = '\0';
1241	dest_len++;
1242	}
1243	} else if (dest) {
1244	dest[0] = 0;
1245	}
1246
1247	*ppdest = dest;
1248	return src_len;
1249	}
1250
1251	size_t pull_ascii_fstring(char dest, const void src)
1252	{
1253	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1254	}
1255
1256	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1257
1258	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1259	{
1260	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1261	}
1262
1263	/**
1264	* Copy a string from a char* src to a unicode destination.
1265	*
1266	* @returns the number of bytes occupied by the string in the destination.
1267	*
1268	* @param flags can have:
1269	*
1270	* <dl>
1271	* <dt>STR_TERMINATE <dd>means include the null termination.
1272	* <dt>STR_UPPER <dd>means uppercase in the destination.
1273	* <dt>STR_NOALIGN <dd>means don't do alignment.
1274	* </dl>
1275	*
1276	* @param dest_len is the maximum length allowed in the
1277	* destination.
1278	**/
1279
1280	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1281	{
1282	size_t len=0;
1283	size_t src_len;
1284	size_t ret;
1285
1286	if (dest_len == (size_t)-1) {
1287	/* No longer allow dest_len of -1. */
1288	smb_panic("push_ucs2 - invalid dest_len of -1");
1289	}
1290
1291	if (flags & STR_TERMINATE)
1292	src_len = (size_t)-1;
1293	else
1294	src_len = strlen(src);
1295
1296	if (ucs2_align(base_ptr, dest, flags)) {
1297	(char )dest = 0;
1298	dest = (void )((char )dest + 1);
1299	if (dest_len)
1300	dest_len--;
1301	len++;
1302	}
1303
1304	/* ucs2 is always a multiple of 2 bytes */
1305	dest_len &= ~1;
1306
1307	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1308	if (ret == (size_t)-1) {
1309	if ((flags & STR_TERMINATE) &&
1310	dest &&
1311	dest_len) {
1312	(char )dest = 0;
1313	}
1314	return len;
1315	}
1316
1317	len += ret;
1318
1319	if (flags & STR_UPPER) {
1320	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1321	size_t i;
1322
1323	/* We check for i < (ret / 2) below as the dest string isn't null
1324	terminated if STR_TERMINATE isn't set. */
1325
1326	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1327	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1328	if (v != dest_ucs2[i]) {
1329	dest_ucs2[i] = v;
1330	}
1331	}
1332	}
1333
1334	return len;
1335	}
1336
1337
1338	/**
1339	* Copy a string from a unix char* src to a UCS2 destination,
1340	* allocating a buffer using talloc().
1341	*
1342	* @param dest always set at least to NULL
1343	*
1344	* @returns The number of bytes occupied by the string in the destination
1345	* or -1 in case of error.
1346	**/
1347	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1348	{
1349	size_t src_len = strlen(src)+1;
1350
1351	*dest = NULL;
1352	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1353	}
1354
1355
1356	/**
1357	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1358	*
1359	* @param dest always set at least to NULL
1360	*
1361	* @returns The number of bytes occupied by the string in the destination
1362	* or -1 in case of error.
1363	**/
1364
1365	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1366	{
1367	size_t dest_len, src_len = strlen(src)+1;
1368
1369	*dest = NULL;
1370	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1371	(void **)dest, &dest_len, True))
1372	return (size_t)-1;
1373	else
1374	return dest_len;
1375	}
1376
1377	/**
1378	Copy a string from a char* src to a UTF-8 destination.
1379	Return the number of bytes occupied by the string in the destination
1380	Flags can have:
1381	STR_TERMINATE means include the null termination
1382	STR_UPPER means uppercase in the destination
1383	dest_len is the maximum length allowed in the destination. If dest_len
1384	is -1 then no maxiumum is used.
1385	**/
1386
1387	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1388	{
1389	size_t src_len = 0;
1390	size_t ret;
1391	char *tmpbuf = NULL;
1392
1393	if (dest_len == (size_t)-1) {
1394	/* No longer allow dest_len of -1. */
1395	smb_panic("push_utf8 - invalid dest_len of -1");
1396	}
1397
1398	if (flags & STR_UPPER) {
1399	tmpbuf = strdup_upper(src);
1400	if (!tmpbuf) {
1401	return (size_t)-1;
1402	}
1403	src = tmpbuf;
1404	src_len = strlen(src);
1405	}
1406
1407	src_len = strlen(src);
1408	if (flags & STR_TERMINATE) {
1409	src_len++;
1410	}
1411
1412	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1413	SAFE_FREE(tmpbuf);
1414	return ret;
1415	}
1416
1417	size_t push_utf8_fstring(void dest, const char src)
1418	{
1419	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1420	}
1421
1422	/**
1423	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1424	*
1425	* @param dest always set at least to NULL
1426	*
1427	* @returns The number of bytes occupied by the string in the destination
1428	**/
1429
1430	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1431	{
1432	size_t src_len = strlen(src)+1;
1433
1434	*dest = NULL;
1435	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1436	}
1437
1438	/**
1439	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1440	*
1441	* @param dest always set at least to NULL
1442	*
1443	* @returns The number of bytes occupied by the string in the destination
1444	**/
1445
1446	size_t push_utf8_allocate(char *dest, const char src)
1447	{
1448	size_t dest_len, src_len = strlen(src)+1;
1449
1450	*dest = NULL;
1451	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1452	(void **)dest, &dest_len, True))
1453	return (size_t)-1;
1454	else
1455	return dest_len;
1456	}
1457
1458	/**
1459	Copy a string from a ucs2 source to a unix char* destination.
1460	Flags can have:
1461	STR_TERMINATE means the string in src is null terminated.
1462	STR_NOALIGN means don't try to align.
1463	if STR_TERMINATE is set then src_len is ignored if it is -1.
1464	src_len is the length of the source area in bytes
1465	Return the number of bytes occupied by the string in src.
1466	The resulting string in "dest" is always null terminated.
1467	**/
1468
1469	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1470	{
1471	size_t ret;
1472
1473	if (dest_len == (size_t)-1) {
1474	/* No longer allow dest_len of -1. */
1475	smb_panic("pull_ucs2 - invalid dest_len of -1");
1476	}
1477
1478	if (!src_len) {
1479	if (dest && dest_len > 0) {
1480	dest[0] = '\0';
1481	}
1482	return 0;
1483	}
1484
1485	if (ucs2_align(base_ptr, src, flags)) {
1486	src = (const void )((const char )src + 1);
1487	if (src_len != (size_t)-1)
1488	src_len--;
1489	}
1490
1491	if (flags & STR_TERMINATE) {
1492	/* src_len -1 is the default for null terminated strings. */
1493	if (src_len != (size_t)-1) {
1494	size_t len = strnlen_w((const smb_ucs2_t *)src,
1495	src_len/2);
1496	if (len < src_len/2)
1497	len++;
1498	src_len = len*2;
1499	}
1500	}
1501
1502	/* ucs2 is always a multiple of 2 bytes */
1503	if (src_len != (size_t)-1)
1504	src_len &= ~1;
1505
1506	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1507	if (ret == (size_t)-1) {
1508	ret = 0;
1509	dest_len = 0;
1510	}
1511
1512	if (src_len == (size_t)-1)
1513	src_len = ret*2;
1514
1515	if (dest_len && ret) {
1516	/* Did we already process the terminating zero ? */
1517	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1518	dest[MIN(ret, dest_len-1)] = 0;
1519	}
1520	} else {
1521	dest[0] = 0;
1522	}
1523
1524	return src_len;
1525	}
1526
1527	/**
1528	Copy a string from a ucs2 source to a unix char* destination.
1529	Talloc version with a base pointer.
1530	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1531	needs fixing. JRA).
1532	Flags can have:
1533	STR_TERMINATE means the string in src is null terminated.
1534	STR_NOALIGN means don't try to align.
1535	if STR_TERMINATE is set then src_len is ignored if it is -1.
1536	src_len is the length of the source area in bytes
1537	Return the number of bytes occupied by the string in src.
1538	The resulting string in "dest" is always null terminated.
1539	**/
1540
1541	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1542	const void *base_ptr,
1543	char **ppdest,
1544	const void *src,
1545	size_t src_len,
1546	int flags)
1547	{
1548	char *dest;
1549	size_t dest_len;
1550
1551	*ppdest = NULL;
1552
1553	#ifdef DEVELOPER
1554	/* Ensure we never use the braindead "malloc" varient. */
1555	if (ctx == NULL) {
1556	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1557	}
1558	#endif
1559
1560	if (!src_len) {
1561	return 0;
1562	}
1563
1564	if (ucs2_align(base_ptr, src, flags)) {
1565	src = (const void )((const char )src + 1);
1566	if (src_len != (size_t)-1)
1567	src_len--;
1568	}
1569
1570	if (flags & STR_TERMINATE) {
1571	/* src_len -1 is the default for null terminated strings. */
1572	if (src_len != (size_t)-1) {
1573	size_t len = strnlen_w((const smb_ucs2_t *)src,
1574	src_len/2);
1575	if (len < src_len/2)
1576	len++;
1577	src_len = len*2;
1578	} else {
1579	/*
1580	* src_len == -1 - alloc interface won't take this
1581	* so we must calculate.
1582	*/
1583	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1584	}
1585	/* Ensure we don't use an insane length from the client. */
1586	if (src_len >= 1024*1024) {
1587	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1588	}
1589	} else {
1590	/* Can't have an unlimited length
1591	* non STR_TERMINATE'd.
1592	*/
1593	if (src_len == (size_t)-1) {
1594	errno = EINVAL;
1595	return 0;
1596	}
1597	}
1598
1599	/* src_len != -1 here. */
1600
1601	/* ucs2 is always a multiple of 2 bytes */
1602	src_len &= ~1;
1603
1604	dest_len = convert_string_talloc(ctx,
1605	CH_UTF16LE,
1606	CH_UNIX,
1607	src,
1608	src_len,
1609	(void *)&dest,
1610	True);
1611	if (dest_len == (size_t)-1) {
1612	dest_len = 0;
1613	}
1614
1615	if (dest_len) {
1616	/* Did we already process the terminating zero ? */
1617	if (dest[dest_len-1] != 0) {
1618	size_t size = talloc_get_size(dest);
1619	/* Have we got space to append the '\0' ? */
1620	if (size <= dest_len) {
1621	/* No, realloc. */
1622	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1623	dest_len+1);
1624	if (!dest) {
1625	/* talloc fail. */
1626	dest_len = (size_t)-1;
1627	return 0;
1628	}
1629	}
1630	/* Yay - space ! */
1631	dest[dest_len] = '\0';
1632	dest_len++;
1633	}
1634	} else if (dest) {
1635	dest[0] = 0;
1636	}
1637
1638	*ppdest = dest;
1639	return src_len;
1640	}
1641
1642	size_t pull_ucs2_fstring(char dest, const void src)
1643	{
1644	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1645	}
1646
1647	/**
1648	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1649	*
1650	* @param dest always set at least to NULL
1651	*
1652	* @returns The number of bytes occupied by the string in the destination
1653	**/
1654
1655	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1656	{
1657	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1658	*dest = NULL;
1659	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1660	}
1661
1662	/**
1663	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1664	*
1665	* @param dest always set at least to NULL
1666	*
1667	* @returns The number of bytes occupied by the string in the destination
1668	**/
1669
1670	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1671	{
1672	size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1673	*dest = NULL;
1674	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1675	(void **)dest, &dest_len, True))
1676	return (size_t)-1;
1677	else
1678	return dest_len;
1679	}
1680
1681	/**
1682	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1683	*
1684	* @param dest always set at least to NULL
1685	*
1686	* @returns The number of bytes occupied by the string in the destination
1687	**/
1688
1689	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1690	{
1691	size_t src_len = strlen(src)+1;
1692	*dest = NULL;
1693	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1694	}
1695
1696	/**
1697	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1698	*
1699	* @param dest always set at least to NULL
1700	*
1701	* @returns The number of bytes occupied by the string in the destination
1702	**/
1703
1704	size_t pull_utf8_allocate(char *dest, const char src)
1705	{
1706	size_t dest_len, src_len = strlen(src)+1;
1707	*dest = NULL;
1708	if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1709	(void **)dest, &dest_len, True))
1710	return (size_t)-1;
1711	else
1712	return dest_len;
1713	}
1714
1715	/**
1716	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1717	*
1718	* @param dest always set at least to NULL
1719	*
1720	* @returns The number of bytes occupied by the string in the destination
1721	**/
1722
1723	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1724	{
1725	size_t src_len = strlen(src)+1;
1726	*dest = NULL;
1727	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1728	}
1729
1730	/**
1731	Copy a string from a char* src to a unicode or ascii
1732	dos codepage destination choosing unicode or ascii based on the
1733	flags in the SMB buffer starting at base_ptr.
1734	Return the number of bytes occupied by the string in the destination.
1735	flags can have:
1736	STR_TERMINATE means include the null termination.
1737	STR_UPPER means uppercase in the destination.
1738	STR_ASCII use ascii even with unicode packet.
1739	STR_NOALIGN means don't do alignment.
1740	dest_len is the maximum length allowed in the destination. If dest_len
1741	is -1 then no maxiumum is used.
1742	**/
1743
1744	size_t push_string_fn(const char *function, unsigned int line,
1745	const void *base_ptr, uint16 flags2,
1746	void dest, const char src,
1747	size_t dest_len, int flags)
1748	{
1749	#ifdef DEVELOPER
1750	/* We really need to zero fill here, not clobber
1751	* region, as we want to ensure that valgrind thinks
1752	* all of the outgoing buffer has been written to
1753	* so a send() or write() won't trap an error.
1754	* JRA.
1755	*/
1756	#if 0
1757	clobber_region(function, line, dest, dest_len);
1758	#else
1759	memset(dest, '\0', dest_len);
1760	#endif
1761	#endif
1762
1763	if (!(flags & STR_ASCII) && \
1764	((flags & STR_UNICODE \|\| \
1765	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1766	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1767	}
1768	return push_ascii(dest, src, dest_len, flags);
1769	}
1770
1771
1772	/**
1773	Copy a string from a unicode or ascii source (depending on
1774	the packet flags) to a char* destination.
1775	Flags can have:
1776	STR_TERMINATE means the string in src is null terminated.
1777	STR_UNICODE means to force as unicode.
1778	STR_ASCII use ascii even with unicode packet.
1779	STR_NOALIGN means don't do alignment.
1780	if STR_TERMINATE is set then src_len is ignored is it is -1
1781	src_len is the length of the source area in bytes.
1782	Return the number of bytes occupied by the string in src.
1783	The resulting string in "dest" is always null terminated.
1784	**/
1785
1786	size_t pull_string_fn(const char *function,
1787	unsigned int line,
1788	const void *base_ptr,
1789	uint16 smb_flags2,
1790	char *dest,
1791	const void *src,
1792	size_t dest_len,
1793	size_t src_len,
1794	int flags)
1795	{
1796	#ifdef DEVELOPER
1797	clobber_region(function, line, dest, dest_len);
1798	#endif
1799
1800	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1801	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1802	"UNICODE defined");
1803	}
1804
1805	if (!(flags & STR_ASCII) && \
1806	((flags & STR_UNICODE \|\| \
1807	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1808	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1809	}
1810	return pull_ascii(dest, src, dest_len, src_len, flags);
1811	}
1812
1813	/**
1814	Copy a string from a unicode or ascii source (depending on
1815	the packet flags) to a char* destination.
1816	Variant that uses talloc.
1817	Flags can have:
1818	STR_TERMINATE means the string in src is null terminated.
1819	STR_UNICODE means to force as unicode.
1820	STR_ASCII use ascii even with unicode packet.
1821	STR_NOALIGN means don't do alignment.
1822	if STR_TERMINATE is set then src_len is ignored is it is -1
1823	src_len is the length of the source area in bytes.
1824	Return the number of bytes occupied by the string in src.
1825	The resulting string in "dest" is always null terminated.
1826	**/
1827
1828	size_t pull_string_talloc_fn(const char *function,
1829	unsigned int line,
1830	TALLOC_CTX *ctx,
1831	const void *base_ptr,
1832	uint16 smb_flags2,
1833	char **ppdest,
1834	const void *src,
1835	size_t src_len,
1836	int flags)
1837	{
1838	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1839	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1840	"UNICODE defined");
1841	}
1842
1843	if (!(flags & STR_ASCII) && \
1844	((flags & STR_UNICODE \|\| \
1845	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1846	return pull_ucs2_base_talloc(ctx,
1847	base_ptr,
1848	ppdest,
1849	src,
1850	src_len,
1851	flags);
1852	}
1853	return pull_ascii_base_talloc(ctx,
1854	ppdest,
1855	src,
1856	src_len,
1857	flags);
1858	}
1859
1860
1861	size_t align_string(const void base_ptr, const char p, int flags)
1862	{
1863	if (!(flags & STR_ASCII) && \
1864	((flags & STR_UNICODE \|\| \
1865	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1866	return ucs2_align(base_ptr, p, flags);
1867	}
1868	return 0;
1869	}
1870
1871	/*
1872	Return the unicode codepoint for the next multi-byte CH_UNIX character
1873	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1874
1875	Also return the number of bytes consumed (which tells the caller
1876	how many bytes to skip to get to the next CH_UNIX character).
1877
1878	Return INVALID_CODEPOINT if the next character cannot be converted.
1879	*/
1880
1881	codepoint_t next_codepoint(const char str, size_t size)
1882	{
1883	/* It cannot occupy more than 4 bytes in UTF16 format */
1884	uint8_t buf[4];
1885	smb_iconv_t descriptor;
1886	size_t ilen_orig;
1887	size_t ilen;
1888	size_t olen;
1889	char *outbuf;
1890
1891	if ((str[0] & 0x80) == 0) {
1892	*size = 1;
1893	return (codepoint_t)str[0];
1894	}
1895
1896	/* We assume that no multi-byte character can take
1897	more than 5 bytes. This is OK as we only
1898	support codepoints up to 1M */
1899
1900	ilen_orig = strnlen(str, 5);
1901	ilen = ilen_orig;
1902
1903	lazy_initialize_conv();
1904
1905	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1906	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1907	*size = 1;
1908	return INVALID_CODEPOINT;
1909	}
1910
1911	/* This looks a little strange, but it is needed to cope
1912	with codepoints above 64k which are encoded as per RFC2781. */
1913	olen = 2;
1914	outbuf = (char *)buf;
1915	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1916	if (olen == 2) {
1917	/* We failed to convert to a 2 byte character.
1918	See if we can convert to a 4 UTF16-LE byte char encoding.
1919	*/
1920	olen = 4;
1921	outbuf = (char *)buf;
1922	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1923	if (olen == 4) {
1924	/* We didn't convert any bytes */
1925	*size = 1;
1926	return INVALID_CODEPOINT;
1927	}
1928	olen = 4 - olen;
1929	} else {
1930	olen = 2 - olen;
1931	}
1932
1933	*size = ilen_orig - ilen;
1934
1935	if (olen == 2) {
1936	/* 2 byte, UTF16-LE encoded value. */
1937	return (codepoint_t)SVAL(buf, 0);
1938	}
1939	if (olen == 4) {
1940	/* Decode a 4 byte UTF16-LE character manually.
1941	See RFC2871 for the encoding machanism.
1942	*/
1943	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1944	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1945
1946	return (codepoint_t)0x10000 +
1947	(w1 << 10) + w2;
1948	}
1949
1950	/* no other length is valid */
1951	return INVALID_CODEPOINT;
1952	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: