Context Navigation

source: branches/samba-3.2.x/source/lib/charcnv.c@ 1001

Visit:

Last change on this file since 1001 was 341, checked in by Herwig Bauernfeind, 16 years ago
divers fix for Ticket #68 in 3.2
File size: 49.5 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	if (ch == CH_UTF16LE) ret = "UTF-16LE";
59	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60	else if (ch == CH_UNIX) ret = lp_unix_charset();
61	else if (ch == CH_DOS) ret = lp_dos_charset();
62	else if (ch == CH_DISPLAY) ret = lp_display_charset();
63	else if (ch == CH_UTF8) ret = "UTF8";
64
65	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66	if (ret && !strcmp(ret, "LOCALE")) {
67	const char *ln = NULL;
68
69	#ifdef HAVE_SETLOCALE
70	setlocale(LC_ALL, "");
71	#endif
72	ln = nl_langinfo(CODESET);
73	if (ln) {
74	/* Check whether the charset name is supported
75	by iconv */
76	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77	if (handle == (smb_iconv_t) -1) {
78	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79	ln = NULL;
80	} else {
81	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82	smb_iconv_close(handle);
83	}
84	}
85	ret = ln;
86	}
87	#endif
88
89	if (!ret \|\| !*ret) ret = "ASCII";
90	return ret;
91	}
92
93	void lazy_initialize_conv(void)
94	{
95	if (!initialized) {
96	load_case_tables();
97	init_iconv();
98	initialized = true;
99	}
100	}
101
102	/**
103	* Destroy global objects allocated by init_iconv()
104	**/
105	void gfree_charcnv(void)
106	{
107	int c1, c2;
108
109	for (c1=0;c1<NUM_CHARSETS;c1++) {
110	for (c2=0;c2<NUM_CHARSETS;c2++) {
111	if ( conv_handles[c1][c2] ) {
112	smb_iconv_close( conv_handles[c1][c2] );
113	conv_handles[c1][c2] = 0;
114	}
115	}
116	}
117	initialized = false;
118	}
119
120	/**
121	* Initialize iconv conversion descriptors.
122	*
123	* This is called the first time it is needed, and also called again
124	* every time the configuration is reloaded, because the charset or
125	* codepage might have changed.
126	**/
127	void init_iconv(void)
128	{
129	int c1, c2;
130	bool did_reload = False;
131
132	/* so that charset_name() works we need to get the UNIX<->UCS2 going
133	first */
134	if (!conv_handles[CH_UNIX][CH_UTF16LE])
135	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137	if (!conv_handles[CH_UTF16LE][CH_UNIX])
138	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140	for (c1=0;c1<NUM_CHARSETS;c1++) {
141	for (c2=0;c2<NUM_CHARSETS;c2++) {
142	const char *n1 = charset_name((charset_t)c1);
143	const char *n2 = charset_name((charset_t)c2);
144	if (conv_handles[c1][c2] &&
145	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147	continue;
148
149	did_reload = True;
150
151	if (conv_handles[c1][c2])
152	smb_iconv_close(conv_handles[c1][c2]);
153
154	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157	charset_name((charset_t)c1), charset_name((charset_t)c2)));
158	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159	n1 = "ASCII";
160	}
161	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162	n2 = "ASCII";
163	}
164	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165	n1, n2 ));
166	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167	if (!conv_handles[c1][c2]) {
168	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169	smb_panic("init_iconv: conv_handle initialization failed");
170	}
171	}
172	}
173	}
174
175	if (did_reload) {
176	/* XXX: Does this really get called every time the dos
177	* codepage changes? */
178	/* XXX: Is the did_reload test too strict? */
179	conv_silent = True;
180	init_valid_table();
181	conv_silent = False;
182	}
183	}
184
185	/**
186	* Convert string from one encoding to another, making error checking etc
187	* Slow path version - uses (slow) iconv.
188	*
189	* @param src pointer to source string (multibyte or singlebyte)
190	* @param srclen length of the source string in bytes
191	* @param dest pointer to destination string (multibyte or singlebyte)
192	* @param destlen maximal length allowed for string
193	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194	* @returns the number of bytes occupied in the destination
195	*
196	* Ensure the srclen contains the terminating zero.
197	*
198	**/
199
200	static size_t convert_string_internal(charset_t from, charset_t to,
201	void const *src, size_t srclen,
202	void *dest, size_t destlen, bool allow_bad_conv)
203	{
204	size_t i_len, o_len;
205	size_t retval;
206	const char* inbuf = (const char*)src;
207	char* outbuf = (char*)dest;
208	smb_iconv_t descriptor;
209
210	lazy_initialize_conv();
211
212	descriptor = conv_handles[from][to];
213
214	if (srclen == (size_t)-1) {
215	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
216	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
217	} else {
218	srclen = strlen((const char *)src)+1;
219	}
220	}
221
222
223	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
224	if (!conv_silent)
225	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226	return (size_t)-1;
227	}
228
229	i_len=srclen;
230	o_len=destlen;
231
232	again:
233
234	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235	if(retval==(size_t)-1) {
236	const char *reason="unknown error";
237	switch(errno) {
238	case EINVAL:
239	reason="Incomplete multibyte sequence";
240	if (!conv_silent)
241	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242	if (allow_bad_conv)
243	goto use_as_is;
244	return (size_t)-1;
245	case E2BIG:
246	reason="No more room";
247	if (!conv_silent) {
248	if (from == CH_UNIX) {
249	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250	charset_name(from), charset_name(to),
251	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252	} else {
253	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254	charset_name(from), charset_name(to),
255	(unsigned int)srclen, (unsigned int)destlen));
256	}
257	}
258	break;
259	case EILSEQ:
260	reason="Illegal multibyte sequence";
261	if (!conv_silent)
262	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263	if (allow_bad_conv)
264	goto use_as_is;
265
266	return (size_t)-1;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	return (size_t)-1;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	if (ret == (size_t)-1) {
417	return ret;
418	}
419	return retval + ret;
420	#endif
421	}
422	}
423	if (!dlen) {
424	/* Even if we fast path we should note if we ran out of room. */
425	if (((slen != (size_t)-1) && slen) \|\|
426	((slen == (size_t)-1) && lastp)) {
427	errno = E2BIG;
428	}
429	}
430	return retval;
431
432	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433	const unsigned char p = (const unsigned char )src;
434	unsigned char q = (unsigned char )dest;
435	size_t retval = 0;
436	size_t slen = srclen;
437	size_t dlen = destlen;
438	unsigned char lastp = '\0';
439
440	/* If all characters are ascii, fast path here. */
441	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
442	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443	q++ = p;
444	if (slen != (size_t)-1) {
445	slen -= 2;
446	}
447	p += 2;
448	dlen--;
449	retval++;
450	if (!lastp)
451	break;
452	} else {
453	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454	goto general_case;
455	#else
456	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457	if (ret == (size_t)-1) {
458	return ret;
459	}
460	return retval + ret;
461	#endif
462	}
463	}
464	if (!dlen) {
465	/* Even if we fast path we should note if we ran out of room. */
466	if (((slen != (size_t)-1) && slen) \|\|
467	((slen == (size_t)-1) && lastp)) {
468	errno = E2BIG;
469	}
470	}
471	return retval;
472
473	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
474	const unsigned char p = (const unsigned char )src;
475	unsigned char q = (unsigned char )dest;
476	size_t retval = 0;
477	size_t slen = srclen;
478	size_t dlen = destlen;
479	unsigned char lastp = '\0';
480
481	/* If all characters are ascii, fast path here. */
482	while (slen && (dlen >= 2)) {
483	if ((lastp = *p) <= 0x7F) {
484	q++ = p++;
485	*q++ = '\0';
486	if (slen != (size_t)-1) {
487	slen--;
488	}
489	dlen -= 2;
490	retval += 2;
491	if (!lastp)
492	break;
493	} else {
494	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
495	goto general_case;
496	#else
497	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
498	if (ret == (size_t)-1) {
499	return ret;
500	}
501	return retval + ret;
502	#endif
503	}
504	}
505	if (!dlen) {
506	/* Even if we fast path we should note if we ran out of room. */
507	if (((slen != (size_t)-1) && slen) \|\|
508	((slen == (size_t)-1) && lastp)) {
509	errno = E2BIG;
510	}
511	}
512	return retval;
513	}
514
515	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
516	general_case:
517	#endif
518	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
519	}
520
521	/**
522	* Convert between character sets, allocating a new buffer for the result.
523	*
524	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
525	* (this is a bad interface and needs fixing. JRA).
526	* @param srclen length of source buffer.
527	* @param dest always set at least to NULL
528	* @param converted_size set to the size of the allocated buffer on return
529	* true
530	* @note -1 is not accepted for srclen.
531	*
532	* @return True if new buffer was correctly allocated, and string was
533	* converted.
534	*
535	* Ensure the srclen contains the terminating zero.
536	*
537	* I hate the goto's in this function. It's embarressing.....
538	* There has to be a cleaner way to do this. JRA.
539	**/
540
541	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
542	void const src, size_t srclen, void dst,
543	size_t *converted_size, bool allow_bad_conv)
544	{
545	size_t i_len, o_len, destlen = (srclen * 3) / 2;
546	size_t retval;
547	const char inbuf = (const char )src;
548	char outbuf = NULL, ob = NULL;
549	smb_iconv_t descriptor;
550	void dest = (void )dst;
551
552	*dest = NULL;
553
554	if (!converted_size) {
555	errno = EINVAL;
556	return false;
557	}
558
559	if (src == NULL \|\| srclen == (size_t)-1) {
560	errno = EINVAL;
561	return false;
562	}
563	if (srclen == 0) {
564	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
565	if (ob == NULL) {
566	errno = ENOMEM;
567	return false;
568	}
569	*dest = ob;
570	*converted_size = 0;
571	return true;
572	}
573
574	lazy_initialize_conv();
575
576	descriptor = conv_handles[from][to];
577
578	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
579	if (!conv_silent)
580	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
581	errno = EOPNOTSUPP;
582	return false;
583	}
584
585	convert:
586
587	/* +2 is for ucs2 null termination. */
588	if ((destlen*2)+2 < destlen) {
589	/* wrapped ! abort. */
590	if (!conv_silent)
591	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
592	if (!ctx)
593	SAFE_FREE(outbuf);
594	errno = EOPNOTSUPP;
595	return false;
596	} else {
597	destlen = destlen * 2;
598	}
599
600	/* +2 is for ucs2 null termination. */
601	if (ctx) {
602	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
603	} else {
604	ob = (char *)SMB_REALLOC(ob, destlen + 2);
605	}
606
607	if (!ob) {
608	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
609	errno = ENOMEM;
610	return false;
611	}
612	outbuf = ob;
613	i_len = srclen;
614	o_len = destlen;
615
616	again:
617
618
619	retval = smb_iconv(descriptor,
620	&inbuf, &i_len,
621	&outbuf, &o_len);
622	if(retval == (size_t)-1) {
623	const char *reason="unknown error";
624	switch(errno) {
625	case EINVAL:
626	reason="Incomplete multibyte sequence";
627	if (!conv_silent)
628	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
629	if (allow_bad_conv)
630	goto use_as_is;
631	break;
632	case E2BIG:
633	goto convert;
634	case EILSEQ:
635	reason="Illegal multibyte sequence";
636	if (!conv_silent)
637	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
638	if (allow_bad_conv)
639	goto use_as_is;
640	break;
641	}
642	if (!conv_silent)
643	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
644	/* smb_panic(reason); */
645	if (ctx) {
646	TALLOC_FREE(ob);
647	} else {
648	SAFE_FREE(ob);
649	}
650	return false;
651	}
652
653	out:
654
655	destlen = destlen - o_len;
656	/* Don't shrink unless we're reclaiming a lot of
657	* space. This is in the hot codepath and these
658	* reallocs cost. JRA.
659	*/
660	if (o_len > 1024) {
661	/* We're shrinking here so we know the +2 is safe from wrap. */
662	if (ctx) {
663	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
664	} else {
665	ob = (char *)SMB_REALLOC(ob,destlen + 2);
666	}
667	}
668
669	if (destlen && !ob) {
670	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
671	errno = ENOMEM;
672	return false;
673	}
674
675	*dest = ob;
676
677	/* Must ucs2 null terminate in the extra space we allocated. */
678	ob[destlen] = '\0';
679	ob[destlen+1] = '\0';
680
681	*converted_size = destlen;
682	return true;
683
684	use_as_is:
685
686	/*
687	* Conversion not supported. This is actually an error, but there are so
688	* many misconfigured iconv systems and smb.conf's out there we can't just
689	* fail. Do a very bad conversion instead.... JRA.
690	*/
691
692	{
693	if (o_len == 0 \|\| i_len == 0)
694	goto out;
695
696	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
697	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
698	/* Can't convert from utf16 any endian to multibyte.
699	Replace with the default fail char.
700	*/
701
702	if (i_len < 2)
703	goto out;
704
705	if (i_len >= 2) {
706	*outbuf = lp_failed_convert_char();
707
708	outbuf++;
709	o_len--;
710
711	inbuf += 2;
712	i_len -= 2;
713	}
714
715	if (o_len == 0 \|\| i_len == 0)
716	goto out;
717
718	/* Keep trying with the next char... */
719	goto again;
720
721	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
722	/* Can't convert to UTF16LE - just widen by adding the
723	default fail char then zero.
724	*/
725	if (o_len < 2)
726	goto out;
727
728	outbuf[0] = lp_failed_convert_char();
729	outbuf[1] = '\0';
730
731	inbuf++;
732	i_len--;
733
734	outbuf += 2;
735	o_len -= 2;
736
737	if (o_len == 0 \|\| i_len == 0)
738	goto out;
739
740	/* Keep trying with the next char... */
741	goto again;
742
743	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
744	to != CH_UTF16LE && to != CH_UTF16BE) {
745	/* Failed multibyte to multibyte. Just copy the default fail char and
746	try again. */
747	outbuf[0] = lp_failed_convert_char();
748
749	inbuf++;
750	i_len--;
751
752	outbuf++;
753	o_len--;
754
755	if (o_len == 0 \|\| i_len == 0)
756	goto out;
757
758	/* Keep trying with the next char... */
759	goto again;
760
761	} else {
762	/* Keep compiler happy.... */
763	goto out;
764	}
765	}
766	}
767
768	/**
769	* Convert between character sets, allocating a new buffer using talloc for the result.
770	*
771	* @param srclen length of source buffer.
772	* @param dest always set at least to NULL
773	* @note -1 is not accepted for srclen.
774	*
775	* @returns Size in bytes of the converted string; or -1 in case of error.
776	**/
777	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
778	void const src, size_t srclen, void dst,
779	bool allow_bad_conv)
780	{
781	void dest = (void )dst;
782	size_t dest_len;
783
784	*dest = NULL;
785	if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
786	&dest_len, allow_bad_conv))
787	return (size_t)-1;
788	if (*dest == NULL)
789	return (size_t)-1;
790	return dest_len;
791	}
792
793	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
794	{
795	size_t size;
796	smb_ucs2_t *buffer;
797
798	size = push_ucs2_allocate(&buffer, src);
799	if (size == (size_t)-1) {
800	return (size_t)-1;
801	}
802	if (!strupper_w(buffer) && (dest == src)) {
803	free(buffer);
804	return srclen;
805	}
806
807	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
808	free(buffer);
809	return size;
810	}
811
812	/**
813	strdup() a unix string to upper case.
814	**/
815
816	char strdup_upper(const char s)
817	{
818	char *out_buffer = SMB_STRDUP(s);
819	const unsigned char p = (const unsigned char )s;
820	unsigned char q = (unsigned char )out_buffer;
821
822	if (!q) {
823	return NULL;
824	}
825
826	/* this is quite a common operation, so we want it to be
827	fast. We optimise for the ascii case, knowing that all our
828	supported multi-byte character sets are ascii-compatible
829	(ie. they match for the first 128 chars) */
830
831	while (*p) {
832	if (*p & 0x80)
833	break;
834	q++ = toupper_ascii_fast(p);
835	p++;
836	}
837
838	if (*p) {
839	/* MB case. */
840	size_t size, size2;
841	smb_ucs2_t *buffer = NULL;
842
843	SAFE_FREE(out_buffer);
844	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
845	strlen(s) + 1, (void *)(void )&buffer, &size,
846	True)) {
847	return NULL;
848	}
849
850	strupper_w(buffer);
851
852	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
853	size, (void *)(void )&out_buffer, &size2, True)) {
854	TALLOC_FREE(buffer);
855	return NULL;
856	}
857
858	/* Don't need the intermediate buffer
859	* anymore.
860	*/
861	TALLOC_FREE(buffer);
862	}
863
864	return out_buffer;
865	}
866
867	/**
868	talloc_strdup() a unix string to upper case.
869	**/
870
871	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
872	{
873	char *out_buffer = talloc_strdup(ctx,s);
874	const unsigned char p = (const unsigned char )s;
875	unsigned char q = (unsigned char )out_buffer;
876
877	if (!q) {
878	return NULL;
879	}
880
881	/* this is quite a common operation, so we want it to be
882	fast. We optimise for the ascii case, knowing that all our
883	supported multi-byte character sets are ascii-compatible
884	(ie. they match for the first 128 chars) */
885
886	while (*p) {
887	if (*p & 0x80)
888	break;
889	q++ = toupper_ascii_fast(p);
890	p++;
891	}
892
893	if (*p) {
894	/* MB case. */
895	size_t size;
896	smb_ucs2_t *ubuf = NULL;
897
898	/* We're not using the ascii buffer above. */
899	TALLOC_FREE(out_buffer);
900
901	size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
902	s, strlen(s)+1,
903	(void *)&ubuf,
904	True);
905	if (size == (size_t)-1) {
906	return NULL;
907	}
908
909	strupper_w(ubuf);
910
911	size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
912	ubuf, size,
913	(void *)&out_buffer,
914	True);
915
916	/* Don't need the intermediate buffer
917	* anymore.
918	*/
919
920	TALLOC_FREE(ubuf);
921
922	if (size == (size_t)-1) {
923	return NULL;
924	}
925	}
926
927	return out_buffer;
928	}
929
930	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
931	{
932	size_t size;
933	smb_ucs2_t *buffer = NULL;
934
935	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
936	(void *)(void )&buffer, &size, True)) {
937	smb_panic("failed to create UCS2 buffer");
938	}
939	if (!strlower_w(buffer) && (dest == src)) {
940	SAFE_FREE(buffer);
941	return srclen;
942	}
943	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
944	SAFE_FREE(buffer);
945	return size;
946	}
947
948	/**
949	strdup() a unix string to lower case.
950	**/
951
952	char strdup_lower(const char s)
953	{
954	size_t size;
955	smb_ucs2_t *buffer = NULL;
956	char *out_buffer;
957
958	size = push_ucs2_allocate(&buffer, s);
959	if (size == -1 \|\| !buffer) {
960	return NULL;
961	}
962
963	strlower_w(buffer);
964
965	size = pull_ucs2_allocate(&out_buffer, buffer);
966	SAFE_FREE(buffer);
967
968	if (size == (size_t)-1) {
969	return NULL;
970	}
971
972	return out_buffer;
973	}
974
975	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
976	{
977	size_t size;
978	smb_ucs2_t *buffer = NULL;
979	char *out_buffer;
980
981	size = push_ucs2_talloc(ctx, &buffer, s);
982	if (size == -1 \|\| !buffer) {
983	TALLOC_FREE(buffer);
984	return NULL;
985	}
986
987	strlower_w(buffer);
988
989	size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
990	TALLOC_FREE(buffer);
991
992	if (size == (size_t)-1) {
993	TALLOC_FREE(out_buffer);
994	return NULL;
995	}
996
997	return out_buffer;
998	}
999
1000
1001	size_t ucs2_align(const void base_ptr, const void p, int flags)
1002	{
1003	if (flags & (STR_NOALIGN\|STR_ASCII))
1004	return 0;
1005	return PTR_DIFF(p, base_ptr) & 1;
1006	}
1007
1008
1009	/**
1010	* Copy a string from a char* unix src to a dos codepage string destination.
1011	*
1012	* @return the number of bytes occupied by the string in the destination.
1013	*
1014	* @param flags can include
1015	* <dl>
1016	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1017	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1018	* </dl>
1019	*
1020	* @param dest_len the maximum length in bytes allowed in the
1021	* destination.
1022	**/
1023	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1024	{
1025	size_t src_len = strlen(src);
1026	char *tmpbuf = NULL;
1027	size_t ret;
1028
1029	/* No longer allow a length of -1. */
1030	if (dest_len == (size_t)-1) {
1031	smb_panic("push_ascii - dest_len == -1");
1032	}
1033
1034	if (flags & STR_UPPER) {
1035	tmpbuf = SMB_STRDUP(src);
1036	if (!tmpbuf) {
1037	smb_panic("malloc fail");
1038	}
1039	strupper_m(tmpbuf);
1040	src = tmpbuf;
1041	}
1042
1043	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1044	src_len++;
1045	}
1046
1047	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1048	if (ret == (size_t)-1 &&
1049	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1050	&& dest_len > 0) {
1051	((char *)dest)[0] = '\0';
1052	}
1053	SAFE_FREE(tmpbuf);
1054	return ret;
1055	}
1056
1057	size_t push_ascii_fstring(void dest, const char src)
1058	{
1059	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1060	}
1061
1062	/********************************************************************
1063	Push an nstring - ensure null terminated. Written by
1064	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1065	********************************************************************/
1066
1067	size_t push_ascii_nstring(void dest, const char src)
1068	{
1069	size_t i, buffer_len, dest_len;
1070	smb_ucs2_t *buffer;
1071
1072	conv_silent = True;
1073	buffer_len = push_ucs2_allocate(&buffer, src);
1074	if (buffer_len == (size_t)-1) {
1075	smb_panic("failed to create UCS2 buffer");
1076	}
1077
1078	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1079	buffer_len /= sizeof(smb_ucs2_t);
1080
1081	dest_len = 0;
1082	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1083	unsigned char mb[10];
1084	/* Convert one smb_ucs2_t character at a time. */
1085	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1086	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1087	memcpy((char *)dest + dest_len, mb, mb_len);
1088	dest_len += mb_len;
1089	} else {
1090	errno = E2BIG;
1091	break;
1092	}
1093	}
1094	((char *)dest)[dest_len] = '\0';
1095
1096	SAFE_FREE(buffer);
1097	conv_silent = False;
1098	return dest_len;
1099	}
1100
1101	/********************************************************************
1102	Push and malloc an ascii string. src and dest null terminated.
1103	********************************************************************/
1104
1105	size_t push_ascii_allocate(char *dest, const char src)
1106	{
1107	size_t dest_len, src_len = strlen(src)+1;
1108
1109	*dest = NULL;
1110	if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1111	(void **)dest, &dest_len, True))
1112	return (size_t)-1;
1113	else
1114	return dest_len;
1115	}
1116
1117	/**
1118	* Copy a string from a dos codepage source to a unix char* destination.
1119	*
1120	* The resulting string in "dest" is always null terminated.
1121	*
1122	* @param flags can have:
1123	* <dl>
1124	* <dt>STR_TERMINATE</dt>
1125	* <dd>STR_TERMINATE means the string in @p src
1126	* is null terminated, and src_len is ignored.</dd>
1127	* </dl>
1128	*
1129	* @param src_len is the length of the source area in bytes.
1130	* @returns the number of bytes occupied by the string in @p src.
1131	**/
1132	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1133	{
1134	size_t ret;
1135
1136	if (dest_len == (size_t)-1) {
1137	/* No longer allow dest_len of -1. */
1138	smb_panic("pull_ascii - invalid dest_len of -1");
1139	}
1140
1141	if (flags & STR_TERMINATE) {
1142	if (src_len == (size_t)-1) {
1143	src_len = strlen((const char *)src) + 1;
1144	} else {
1145	size_t len = strnlen((const char *)src, src_len);
1146	if (len < src_len)
1147	len++;
1148	src_len = len;
1149	}
1150	}
1151
1152	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1153	if (ret == (size_t)-1) {
1154	ret = 0;
1155	dest_len = 0;
1156	}
1157
1158	if (dest_len && ret) {
1159	/* Did we already process the terminating zero ? */
1160	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1161	dest[MIN(ret, dest_len-1)] = 0;
1162	}
1163	} else {
1164	dest[0] = 0;
1165	}
1166
1167	return src_len;
1168	}
1169
1170	/**
1171	* Copy a string from a dos codepage source to a unix char* destination.
1172	Talloc version.
1173	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1174	needs fixing. JRA).
1175	*
1176	* The resulting string in "dest" is always null terminated.
1177	*
1178	* @param flags can have:
1179	* <dl>
1180	* <dt>STR_TERMINATE</dt>
1181	* <dd>STR_TERMINATE means the string in @p src
1182	* is null terminated, and src_len is ignored.</dd>
1183	* </dl>
1184	*
1185	* @param src_len is the length of the source area in bytes.
1186	* @returns the number of bytes occupied by the string in @p src.
1187	**/
1188
1189	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1190	char **ppdest,
1191	const void *src,
1192	size_t src_len,
1193	int flags)
1194	{
1195	char *dest = NULL;
1196	size_t dest_len = 0;
1197
1198	#ifdef DEVELOPER
1199	/* Ensure we never use the braindead "malloc" varient. */
1200	if (ctx == NULL) {
1201	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1202	}
1203	#endif
1204
1205	*ppdest = NULL;
1206
1207	if (!src_len) {
1208	return 0;
1209	}
1210
1211	if (flags & STR_TERMINATE) {
1212	if (src_len == (size_t)-1) {
1213	src_len = strlen((const char *)src) + 1;
1214	} else {
1215	size_t len = strnlen((const char *)src, src_len);
1216	if (len < src_len)
1217	len++;
1218	src_len = len;
1219	}
1220	/* Ensure we don't use an insane length from the client. */
1221	if (src_len >= 1024*1024) {
1222	char *msg = talloc_asprintf(ctx,
1223	"Bad src length (%u) in "
1224	"pull_ascii_base_talloc",
1225	(unsigned int)src_len);
1226	smb_panic(msg);
1227	}
1228	} else {
1229	/* Can't have an unlimited length
1230	* non STR_TERMINATE'd.
1231	*/
1232	if (src_len == (size_t)-1) {
1233	errno = EINVAL;
1234	return 0;
1235	}
1236	}
1237
1238	/* src_len != -1 here. */
1239
1240	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1241	&dest_len, True)) {
1242	dest_len = 0;
1243	}
1244
1245	if (dest_len && dest) {
1246	/* Did we already process the terminating zero ? */
1247	if (dest[dest_len-1] != 0) {
1248	size_t size = talloc_get_size(dest);
1249	/* Have we got space to append the '\0' ? */
1250	if (size <= dest_len) {
1251	/* No, realloc. */
1252	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1253	dest_len+1);
1254	if (!dest) {
1255	/* talloc fail. */
1256	dest_len = (size_t)-1;
1257	return 0;
1258	}
1259	}
1260	/* Yay - space ! */
1261	dest[dest_len] = '\0';
1262	dest_len++;
1263	}
1264	} else if (dest) {
1265	dest[0] = 0;
1266	}
1267
1268	*ppdest = dest;
1269	return src_len;
1270	}
1271
1272	size_t pull_ascii_fstring(char dest, const void src)
1273	{
1274	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1275	}
1276
1277	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1278
1279	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1280	{
1281	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1282	}
1283
1284	/**
1285	* Copy a string from a char* src to a unicode destination.
1286	*
1287	* @returns the number of bytes occupied by the string in the destination.
1288	*
1289	* @param flags can have:
1290	*
1291	* <dl>
1292	* <dt>STR_TERMINATE <dd>means include the null termination.
1293	* <dt>STR_UPPER <dd>means uppercase in the destination.
1294	* <dt>STR_NOALIGN <dd>means don't do alignment.
1295	* </dl>
1296	*
1297	* @param dest_len is the maximum length allowed in the
1298	* destination.
1299	**/
1300
1301	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1302	{
1303	size_t len=0;
1304	size_t src_len;
1305	size_t ret;
1306
1307	if (dest_len == (size_t)-1) {
1308	/* No longer allow dest_len of -1. */
1309	smb_panic("push_ucs2 - invalid dest_len of -1");
1310	}
1311
1312	if (flags & STR_TERMINATE)
1313	src_len = (size_t)-1;
1314	else
1315	src_len = strlen(src);
1316
1317	if (ucs2_align(base_ptr, dest, flags)) {
1318	(char )dest = 0;
1319	dest = (void )((char )dest + 1);
1320	if (dest_len)
1321	dest_len--;
1322	len++;
1323	}
1324
1325	/* ucs2 is always a multiple of 2 bytes */
1326	dest_len &= ~1;
1327
1328	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1329	if (ret == (size_t)-1) {
1330	if ((flags & STR_TERMINATE) &&
1331	dest &&
1332	dest_len) {
1333	(char )dest = 0;
1334	}
1335	return len;
1336	}
1337
1338	len += ret;
1339
1340	if (flags & STR_UPPER) {
1341	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1342	size_t i;
1343
1344	/* We check for i < (ret / 2) below as the dest string isn't null
1345	terminated if STR_TERMINATE isn't set. */
1346
1347	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1348	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1349	if (v != dest_ucs2[i]) {
1350	dest_ucs2[i] = v;
1351	}
1352	}
1353	}
1354
1355	return len;
1356	}
1357
1358
1359	/**
1360	* Copy a string from a unix char* src to a UCS2 destination,
1361	* allocating a buffer using talloc().
1362	*
1363	* @param dest always set at least to NULL
1364	*
1365	* @returns The number of bytes occupied by the string in the destination
1366	* or -1 in case of error.
1367	**/
1368	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1369	{
1370	size_t src_len = strlen(src)+1;
1371
1372	*dest = NULL;
1373	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1374	}
1375
1376
1377	/**
1378	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1379	*
1380	* @param dest always set at least to NULL
1381	*
1382	* @returns The number of bytes occupied by the string in the destination
1383	* or -1 in case of error.
1384	**/
1385
1386	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1387	{
1388	size_t dest_len, src_len = strlen(src)+1;
1389
1390	*dest = NULL;
1391	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1392	(void **)dest, &dest_len, True))
1393	return (size_t)-1;
1394	else
1395	return dest_len;
1396	}
1397
1398	/**
1399	Copy a string from a char* src to a UTF-8 destination.
1400	Return the number of bytes occupied by the string in the destination
1401	Flags can have:
1402	STR_TERMINATE means include the null termination
1403	STR_UPPER means uppercase in the destination
1404	dest_len is the maximum length allowed in the destination. If dest_len
1405	is -1 then no maxiumum is used.
1406	**/
1407
1408	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1409	{
1410	size_t src_len = 0;
1411	size_t ret;
1412	char *tmpbuf = NULL;
1413
1414	if (dest_len == (size_t)-1) {
1415	/* No longer allow dest_len of -1. */
1416	smb_panic("push_utf8 - invalid dest_len of -1");
1417	}
1418
1419	if (flags & STR_UPPER) {
1420	tmpbuf = strdup_upper(src);
1421	if (!tmpbuf) {
1422	return (size_t)-1;
1423	}
1424	src = tmpbuf;
1425	src_len = strlen(src);
1426	}
1427
1428	src_len = strlen(src);
1429	if (flags & STR_TERMINATE) {
1430	src_len++;
1431	}
1432
1433	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1434	SAFE_FREE(tmpbuf);
1435	return ret;
1436	}
1437
1438	size_t push_utf8_fstring(void dest, const char src)
1439	{
1440	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1441	}
1442
1443	/**
1444	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1445	*
1446	* @param dest always set at least to NULL
1447	*
1448	* @returns The number of bytes occupied by the string in the destination
1449	**/
1450
1451	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1452	{
1453	size_t src_len = strlen(src)+1;
1454
1455	*dest = NULL;
1456	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1457	}
1458
1459	/**
1460	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1461	*
1462	* @param dest always set at least to NULL
1463	*
1464	* @returns The number of bytes occupied by the string in the destination
1465	**/
1466
1467	size_t push_utf8_allocate(char *dest, const char src)
1468	{
1469	size_t dest_len, src_len = strlen(src)+1;
1470
1471	*dest = NULL;
1472	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1473	(void **)dest, &dest_len, True))
1474	return (size_t)-1;
1475	else
1476	return dest_len;
1477	}
1478
1479	/**
1480	Copy a string from a ucs2 source to a unix char* destination.
1481	Flags can have:
1482	STR_TERMINATE means the string in src is null terminated.
1483	STR_NOALIGN means don't try to align.
1484	if STR_TERMINATE is set then src_len is ignored if it is -1.
1485	src_len is the length of the source area in bytes
1486	Return the number of bytes occupied by the string in src.
1487	The resulting string in "dest" is always null terminated.
1488	**/
1489
1490	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1491	{
1492	size_t ret;
1493
1494	if (dest_len == (size_t)-1) {
1495	/* No longer allow dest_len of -1. */
1496	smb_panic("pull_ucs2 - invalid dest_len of -1");
1497	}
1498
1499	if (!src_len) {
1500	if (dest && dest_len > 0) {
1501	dest[0] = '\0';
1502	}
1503	return 0;
1504	}
1505
1506	if (ucs2_align(base_ptr, src, flags)) {
1507	src = (const void )((const char )src + 1);
1508	if (src_len != (size_t)-1)
1509	src_len--;
1510	}
1511
1512	if (flags & STR_TERMINATE) {
1513	/* src_len -1 is the default for null terminated strings. */
1514	if (src_len != (size_t)-1) {
1515	size_t len = strnlen_w((const smb_ucs2_t *)src,
1516	src_len/2);
1517	if (len < src_len/2)
1518	len++;
1519	src_len = len*2;
1520	}
1521	}
1522
1523	/* ucs2 is always a multiple of 2 bytes */
1524	if (src_len != (size_t)-1)
1525	src_len &= ~1;
1526
1527	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1528	if (ret == (size_t)-1) {
1529	ret = 0;
1530	dest_len = 0;
1531	}
1532
1533	if (src_len == (size_t)-1)
1534	src_len = ret*2;
1535
1536	if (dest_len && ret) {
1537	/* Did we already process the terminating zero ? */
1538	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1539	dest[MIN(ret, dest_len-1)] = 0;
1540	}
1541	} else {
1542	dest[0] = 0;
1543	}
1544
1545	return src_len;
1546	}
1547
1548	/**
1549	Copy a string from a ucs2 source to a unix char* destination.
1550	Talloc version with a base pointer.
1551	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1552	needs fixing. JRA).
1553	Flags can have:
1554	STR_TERMINATE means the string in src is null terminated.
1555	STR_NOALIGN means don't try to align.
1556	if STR_TERMINATE is set then src_len is ignored if it is -1.
1557	src_len is the length of the source area in bytes
1558	Return the number of bytes occupied by the string in src.
1559	The resulting string in "dest" is always null terminated.
1560	**/
1561
1562	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1563	const void *base_ptr,
1564	char **ppdest,
1565	const void *src,
1566	size_t src_len,
1567	int flags)
1568	{
1569	char *dest;
1570	size_t dest_len;
1571
1572	*ppdest = NULL;
1573
1574	#ifdef DEVELOPER
1575	/* Ensure we never use the braindead "malloc" varient. */
1576	if (ctx == NULL) {
1577	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1578	}
1579	#endif
1580
1581	if (!src_len) {
1582	return 0;
1583	}
1584
1585	if (ucs2_align(base_ptr, src, flags)) {
1586	src = (const void )((const char )src + 1);
1587	if (src_len != (size_t)-1)
1588	src_len--;
1589	}
1590
1591	if (flags & STR_TERMINATE) {
1592	/* src_len -1 is the default for null terminated strings. */
1593	if (src_len != (size_t)-1) {
1594	size_t len = strnlen_w((const smb_ucs2_t *)src,
1595	src_len/2);
1596	if (len < src_len/2)
1597	len++;
1598	src_len = len*2;
1599	} else {
1600	/*
1601	* src_len == -1 - alloc interface won't take this
1602	* so we must calculate.
1603	*/
1604	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1605	}
1606	/* Ensure we don't use an insane length from the client. */
1607	if (src_len >= 1024*1024) {
1608	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1609	}
1610	} else {
1611	/* Can't have an unlimited length
1612	* non STR_TERMINATE'd.
1613	*/
1614	if (src_len == (size_t)-1) {
1615	errno = EINVAL;
1616	return 0;
1617	}
1618	}
1619
1620	/* src_len != -1 here. */
1621
1622	/* ucs2 is always a multiple of 2 bytes */
1623	src_len &= ~1;
1624
1625	dest_len = convert_string_talloc(ctx,
1626	CH_UTF16LE,
1627	CH_UNIX,
1628	src,
1629	src_len,
1630	(void *)&dest,
1631	True);
1632	if (dest_len == (size_t)-1) {
1633	dest_len = 0;
1634	}
1635
1636	if (dest_len) {
1637	/* Did we already process the terminating zero ? */
1638	if (dest[dest_len-1] != 0) {
1639	size_t size = talloc_get_size(dest);
1640	/* Have we got space to append the '\0' ? */
1641	if (size <= dest_len) {
1642	/* No, realloc. */
1643	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1644	dest_len+1);
1645	if (!dest) {
1646	/* talloc fail. */
1647	dest_len = (size_t)-1;
1648	return 0;
1649	}
1650	}
1651	/* Yay - space ! */
1652	dest[dest_len] = '\0';
1653	dest_len++;
1654	}
1655	} else if (dest) {
1656	dest[0] = 0;
1657	}
1658
1659	*ppdest = dest;
1660	return src_len;
1661	}
1662
1663	size_t pull_ucs2_fstring(char dest, const void src)
1664	{
1665	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1666	}
1667
1668	/**
1669	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1670	*
1671	* @param dest always set at least to NULL
1672	*
1673	* @returns The number of bytes occupied by the string in the destination
1674	**/
1675
1676	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1677	{
1678	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1679	*dest = NULL;
1680	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1681	}
1682
1683	/**
1684	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1685	*
1686	* @param dest always set at least to NULL
1687	*
1688	* @returns The number of bytes occupied by the string in the destination
1689	**/
1690
1691	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1692	{
1693	size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1694	*dest = NULL;
1695	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1696	(void **)dest, &dest_len, True))
1697	return (size_t)-1;
1698	else
1699	return dest_len;
1700	}
1701
1702	/**
1703	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1704	*
1705	* @param dest always set at least to NULL
1706	*
1707	* @returns The number of bytes occupied by the string in the destination
1708	**/
1709
1710	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1711	{
1712	size_t src_len = strlen(src)+1;
1713	*dest = NULL;
1714	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1715	}
1716
1717	/**
1718	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1719	*
1720	* @param dest always set at least to NULL
1721	*
1722	* @returns The number of bytes occupied by the string in the destination
1723	**/
1724
1725	size_t pull_utf8_allocate(char *dest, const char src)
1726	{
1727	size_t dest_len, src_len = strlen(src)+1;
1728	*dest = NULL;
1729	if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1730	(void **)dest, &dest_len, True))
1731	return (size_t)-1;
1732	else
1733	return dest_len;
1734	}
1735
1736	/**
1737	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1738	*
1739	* @param dest always set at least to NULL
1740	*
1741	* @returns The number of bytes occupied by the string in the destination
1742	**/
1743
1744	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1745	{
1746	size_t src_len = strlen(src)+1;
1747	*dest = NULL;
1748	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1749	}
1750
1751	/**
1752	Copy a string from a char* src to a unicode or ascii
1753	dos codepage destination choosing unicode or ascii based on the
1754	flags in the SMB buffer starting at base_ptr.
1755	Return the number of bytes occupied by the string in the destination.
1756	flags can have:
1757	STR_TERMINATE means include the null termination.
1758	STR_UPPER means uppercase in the destination.
1759	STR_ASCII use ascii even with unicode packet.
1760	STR_NOALIGN means don't do alignment.
1761	dest_len is the maximum length allowed in the destination. If dest_len
1762	is -1 then no maxiumum is used.
1763	**/
1764
1765	size_t push_string_fn(const char *function, unsigned int line,
1766	const void *base_ptr, uint16 flags2,
1767	void dest, const char src,
1768	size_t dest_len, int flags)
1769	{
1770	#ifdef DEVELOPER
1771	/* We really need to zero fill here, not clobber
1772	* region, as we want to ensure that valgrind thinks
1773	* all of the outgoing buffer has been written to
1774	* so a send() or write() won't trap an error.
1775	* JRA.
1776	*/
1777	#if 0
1778	clobber_region(function, line, dest, dest_len);
1779	#else
1780	memset(dest, '\0', dest_len);
1781	#endif
1782	#endif
1783
1784	if (!(flags & STR_ASCII) && \
1785	((flags & STR_UNICODE \|\| \
1786	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1787	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1788	}
1789	return push_ascii(dest, src, dest_len, flags);
1790	}
1791
1792
1793	/**
1794	Copy a string from a unicode or ascii source (depending on
1795	the packet flags) to a char* destination.
1796	Flags can have:
1797	STR_TERMINATE means the string in src is null terminated.
1798	STR_UNICODE means to force as unicode.
1799	STR_ASCII use ascii even with unicode packet.
1800	STR_NOALIGN means don't do alignment.
1801	if STR_TERMINATE is set then src_len is ignored is it is -1
1802	src_len is the length of the source area in bytes.
1803	Return the number of bytes occupied by the string in src.
1804	The resulting string in "dest" is always null terminated.
1805	**/
1806
1807	size_t pull_string_fn(const char *function,
1808	unsigned int line,
1809	const void *base_ptr,
1810	uint16 smb_flags2,
1811	char *dest,
1812	const void *src,
1813	size_t dest_len,
1814	size_t src_len,
1815	int flags)
1816	{
1817	#ifdef DEVELOPER
1818	clobber_region(function, line, dest, dest_len);
1819	#endif
1820
1821	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1822	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1823	"UNICODE defined");
1824	}
1825
1826	if (!(flags & STR_ASCII) && \
1827	((flags & STR_UNICODE \|\| \
1828	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1829	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1830	}
1831	return pull_ascii(dest, src, dest_len, src_len, flags);
1832	}
1833
1834	/**
1835	Copy a string from a unicode or ascii source (depending on
1836	the packet flags) to a char* destination.
1837	Variant that uses talloc.
1838	Flags can have:
1839	STR_TERMINATE means the string in src is null terminated.
1840	STR_UNICODE means to force as unicode.
1841	STR_ASCII use ascii even with unicode packet.
1842	STR_NOALIGN means don't do alignment.
1843	if STR_TERMINATE is set then src_len is ignored is it is -1
1844	src_len is the length of the source area in bytes.
1845	Return the number of bytes occupied by the string in src.
1846	The resulting string in "dest" is always null terminated.
1847	**/
1848
1849	size_t pull_string_talloc_fn(const char *function,
1850	unsigned int line,
1851	TALLOC_CTX *ctx,
1852	const void *base_ptr,
1853	uint16 smb_flags2,
1854	char **ppdest,
1855	const void *src,
1856	size_t src_len,
1857	int flags)
1858	{
1859	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1860	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1861	"UNICODE defined");
1862	}
1863
1864	if (!(flags & STR_ASCII) && \
1865	((flags & STR_UNICODE \|\| \
1866	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1867	return pull_ucs2_base_talloc(ctx,
1868	base_ptr,
1869	ppdest,
1870	src,
1871	src_len,
1872	flags);
1873	}
1874	return pull_ascii_base_talloc(ctx,
1875	ppdest,
1876	src,
1877	src_len,
1878	flags);
1879	}
1880
1881
1882	size_t align_string(const void base_ptr, const char p, int flags)
1883	{
1884	if (!(flags & STR_ASCII) && \
1885	((flags & STR_UNICODE \|\| \
1886	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1887	return ucs2_align(base_ptr, p, flags);
1888	}
1889	return 0;
1890	}
1891
1892	/*
1893	Return the unicode codepoint for the next multi-byte CH_UNIX character
1894	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1895
1896	Also return the number of bytes consumed (which tells the caller
1897	how many bytes to skip to get to the next CH_UNIX character).
1898
1899	Return INVALID_CODEPOINT if the next character cannot be converted.
1900	*/
1901
1902	codepoint_t next_codepoint(const char str, size_t size)
1903	{
1904	/* It cannot occupy more than 4 bytes in UTF16 format */
1905	uint8_t buf[4];
1906	smb_iconv_t descriptor;
1907	size_t ilen_orig;
1908	size_t ilen;
1909	size_t olen;
1910	char *outbuf;
1911
1912	if ((str[0] & 0x80) == 0) {
1913	*size = 1;
1914	return (codepoint_t)str[0];
1915	}
1916
1917	/* We assume that no multi-byte character can take
1918	more than 5 bytes. This is OK as we only
1919	support codepoints up to 1M */
1920
1921	ilen_orig = strnlen(str, 5);
1922	ilen = ilen_orig;
1923
1924	lazy_initialize_conv();
1925
1926	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1927	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1928	*size = 1;
1929	return INVALID_CODEPOINT;
1930	}
1931
1932	/* This looks a little strange, but it is needed to cope
1933	with codepoints above 64k which are encoded as per RFC2781. */
1934	olen = 2;
1935	outbuf = (char *)buf;
1936	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1937	if (olen == 2) {
1938	/* We failed to convert to a 2 byte character.
1939	See if we can convert to a 4 UTF16-LE byte char encoding.
1940	*/
1941	olen = 4;
1942	outbuf = (char *)buf;
1943	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1944	if (olen == 4) {
1945	/* We didn't convert any bytes */
1946	*size = 1;
1947	return INVALID_CODEPOINT;
1948	}
1949	olen = 4 - olen;
1950	} else {
1951	olen = 2 - olen;
1952	}
1953
1954	*size = ilen_orig - ilen;
1955
1956	if (olen == 2) {
1957	/* 2 byte, UTF16-LE encoded value. */
1958	return (codepoint_t)SVAL(buf, 0);
1959	}
1960	if (olen == 4) {
1961	/* Decode a 4 byte UTF16-LE character manually.
1962	See RFC2871 for the encoding machanism.
1963	*/
1964	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1965	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1966
1967	return (codepoint_t)0x10000 +
1968	(w1 << 10) + w2;
1969	}
1970
1971	/* no other length is valid */
1972	return INVALID_CODEPOINT;
1973	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: