Context Navigation

source: branches/samba-3.2.x/source/lib/charcnv.c@ 234

Visit:

Last change on this file since 234 was 234, checked in by Herwig Bauernfeind, 16 years ago
Update 3.2 branch to 3.2.10
File size: 49.5 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	if (!initialized) {
97	load_case_tables();
98	init_iconv();
99	initialized = true;
100	}
101	}
102
103	/**
104	* Destroy global objects allocated by init_iconv()
105	**/
106	void gfree_charcnv(void)
107	{
108	int c1, c2;
109
110	for (c1=0;c1<NUM_CHARSETS;c1++) {
111	for (c2=0;c2<NUM_CHARSETS;c2++) {
112	if ( conv_handles[c1][c2] ) {
113	smb_iconv_close( conv_handles[c1][c2] );
114	conv_handles[c1][c2] = 0;
115	}
116	}
117	}
118	initialized = false;
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	bool did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_valid_table();
182	conv_silent = False;
183	}
184	}
185
186	/**
187	* Convert string from one encoding to another, making error checking etc
188	* Slow path version - uses (slow) iconv.
189	*
190	* @param src pointer to source string (multibyte or singlebyte)
191	* @param srclen length of the source string in bytes
192	* @param dest pointer to destination string (multibyte or singlebyte)
193	* @param destlen maximal length allowed for string
194	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195	* @returns the number of bytes occupied in the destination
196	*
197	* Ensure the srclen contains the terminating zero.
198	*
199	**/
200
201	static size_t convert_string_internal(charset_t from, charset_t to,
202	void const *src, size_t srclen,
203	void *dest, size_t destlen, bool allow_bad_conv)
204	{
205	size_t i_len, o_len;
206	size_t retval;
207	const char* inbuf = (const char*)src;
208	char* outbuf = (char*)dest;
209	smb_iconv_t descriptor;
210
211	lazy_initialize_conv();
212
213	descriptor = conv_handles[from][to];
214
215	if (srclen == (size_t)-1) {
216	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
217	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
218	} else {
219	srclen = strlen((const char *)src)+1;
220	}
221	}
222
223
224	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
225	if (!conv_silent)
226	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227	return (size_t)-1;
228	}
229
230	i_len=srclen;
231	o_len=destlen;
232
233	again:
234
235	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236	if(retval==(size_t)-1) {
237	const char *reason="unknown error";
238	switch(errno) {
239	case EINVAL:
240	reason="Incomplete multibyte sequence";
241	if (!conv_silent)
242	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243	if (allow_bad_conv)
244	goto use_as_is;
245	return (size_t)-1;
246	case E2BIG:
247	reason="No more room";
248	if (!conv_silent) {
249	if (from == CH_UNIX) {
250	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251	charset_name(from), charset_name(to),
252	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253	} else {
254	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255	charset_name(from), charset_name(to),
256	(unsigned int)srclen, (unsigned int)destlen));
257	}
258	}
259	break;
260	case EILSEQ:
261	reason="Illegal multibyte sequence";
262	if (!conv_silent)
263	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264	if (allow_bad_conv)
265	goto use_as_is;
266
267	return (size_t)-1;
268	default:
269	if (!conv_silent)
270	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271	return (size_t)-1;
272	}
273	/* smb_panic(reason); */
274	}
275	return destlen-o_len;
276
277	use_as_is:
278
279	/*
280	* Conversion not supported. This is actually an error, but there are so
281	* many misconfigured iconv systems and smb.conf's out there we can't just
282	* fail. Do a very bad conversion instead.... JRA.
283	*/
284
285	{
286	if (o_len == 0 \|\| i_len == 0)
287	return destlen - o_len;
288
289	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
290	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
291	/* Can't convert from utf16 any endian to multibyte.
292	Replace with the default fail char.
293	*/
294	if (i_len < 2)
295	return destlen - o_len;
296	if (i_len >= 2) {
297	*outbuf = lp_failed_convert_char();
298
299	outbuf++;
300	o_len--;
301
302	inbuf += 2;
303	i_len -= 2;
304	}
305
306	if (o_len == 0 \|\| i_len == 0)
307	return destlen - o_len;
308
309	/* Keep trying with the next char... */
310	goto again;
311
312	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313	/* Can't convert to UTF16LE - just widen by adding the
314	default fail char then zero.
315	*/
316	if (o_len < 2)
317	return destlen - o_len;
318
319	outbuf[0] = lp_failed_convert_char();
320	outbuf[1] = '\0';
321
322	inbuf++;
323	i_len--;
324
325	outbuf += 2;
326	o_len -= 2;
327
328	if (o_len == 0 \|\| i_len == 0)
329	return destlen - o_len;
330
331	/* Keep trying with the next char... */
332	goto again;
333
334	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335	to != CH_UTF16LE && to != CH_UTF16BE) {
336	/* Failed multibyte to multibyte. Just copy the default fail char and
337	try again. */
338	outbuf[0] = lp_failed_convert_char();
339
340	inbuf++;
341	i_len--;
342
343	outbuf++;
344	o_len--;
345
346	if (o_len == 0 \|\| i_len == 0)
347	return destlen - o_len;
348
349	/* Keep trying with the next char... */
350	goto again;
351
352	} else {
353	/* Keep compiler happy.... */
354	return destlen - o_len;
355	}
356	}
357	}
358
359	/**
360	* Convert string from one encoding to another, making error checking etc
361	* Fast path version - handles ASCII first.
362	*
363	* @param src pointer to source string (multibyte or singlebyte)
364	* @param srclen length of the source string in bytes, or -1 for nul terminated.
365	* @param dest pointer to destination string (multibyte or singlebyte)
366	* @param destlen maximal length allowed for string - NEVER -1.
367	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368	* @returns the number of bytes occupied in the destination
369	*
370	* Ensure the srclen contains the terminating zero.
371	*
372	* This function has been hand-tuned to provide a fast path.
373	* Don't change unless you really know what you are doing. JRA.
374	**/
375
376	size_t convert_string(charset_t from, charset_t to,
377	void const *src, size_t srclen,
378	void *dest, size_t destlen, bool allow_bad_conv)
379	{
380	/*
381	* NB. We deliberately don't do a strlen here if srclen == -1.
382	* This is very expensive over millions of calls and is taken
383	* care of in the slow path in convert_string_internal. JRA.
384	*/
385
386	#ifdef DEVELOPER
387	SMB_ASSERT(destlen != (size_t)-1);
388	#endif
389
390	if (srclen == 0)
391	return 0;
392
393	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394	const unsigned char p = (const unsigned char )src;
395	unsigned char q = (unsigned char )dest;
396	size_t slen = srclen;
397	size_t dlen = destlen;
398	unsigned char lastp = '\0';
399	size_t retval = 0;
400
401	/* If all characters are ascii, fast path here. */
402	while (slen && dlen) {
403	if ((lastp = *p) <= 0x7f) {
404	q++ = p++;
405	if (slen != (size_t)-1) {
406	slen--;
407	}
408	dlen--;
409	retval++;
410	if (!lastp)
411	break;
412	} else {
413	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
414	goto general_case;
415	#else
416	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417	if (ret == (size_t)-1) {
418	return ret;
419	}
420	return retval + ret;
421	#endif
422	}
423	}
424	if (!dlen) {
425	/* Even if we fast path we should note if we ran out of room. */
426	if (((slen != (size_t)-1) && slen) \|\|
427	((slen == (size_t)-1) && lastp)) {
428	errno = E2BIG;
429	}
430	}
431	return retval;
432	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433	const unsigned char p = (const unsigned char )src;
434	unsigned char q = (unsigned char )dest;
435	size_t retval = 0;
436	size_t slen = srclen;
437	size_t dlen = destlen;
438	unsigned char lastp = '\0';
439
440	/* If all characters are ascii, fast path here. */
441	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
442	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443	q++ = p;
444	if (slen != (size_t)-1) {
445	slen -= 2;
446	}
447	p += 2;
448	dlen--;
449	retval++;
450	if (!lastp)
451	break;
452	} else {
453	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454	goto general_case;
455	#else
456	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457	if (ret == (size_t)-1) {
458	return ret;
459	}
460	return retval + ret;
461	#endif
462	}
463	}
464	if (!dlen) {
465	/* Even if we fast path we should note if we ran out of room. */
466	if (((slen != (size_t)-1) && slen) \|\|
467	((slen == (size_t)-1) && lastp)) {
468	errno = E2BIG;
469	}
470	}
471	return retval;
472	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
473	const unsigned char p = (const unsigned char )src;
474	unsigned char q = (unsigned char )dest;
475	size_t retval = 0;
476	size_t slen = srclen;
477	size_t dlen = destlen;
478	unsigned char lastp = '\0';
479
480	/* If all characters are ascii, fast path here. */
481	while (slen && (dlen >= 2)) {
482	if ((lastp = *p) <= 0x7F) {
483	q++ = p++;
484	*q++ = '\0';
485	if (slen != (size_t)-1) {
486	slen--;
487	}
488	dlen -= 2;
489	retval += 2;
490	if (!lastp)
491	break;
492	} else {
493	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
494	goto general_case;
495	#else
496	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
497	if (ret == (size_t)-1) {
498	return ret;
499	}
500	return retval + ret;
501	#endif
502	}
503	}
504	if (!dlen) {
505	/* Even if we fast path we should note if we ran out of room. */
506	if (((slen != (size_t)-1) && slen) \|\|
507	((slen == (size_t)-1) && lastp)) {
508	errno = E2BIG;
509	}
510	}
511	return retval;
512	}
513
514	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
515	general_case:
516	#endif
517	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
518	}
519
520	/**
521	* Convert between character sets, allocating a new buffer for the result.
522	*
523	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
524	* (this is a bad interface and needs fixing. JRA).
525	* @param srclen length of source buffer.
526	* @param dest always set at least to NULL
527	* @param converted_size set to the size of the allocated buffer on return
528	* true
529	* @note -1 is not accepted for srclen.
530	*
531	* @return True if new buffer was correctly allocated, and string was
532	* converted.
533	*
534	* Ensure the srclen contains the terminating zero.
535	*
536	* I hate the goto's in this function. It's embarressing.....
537	* There has to be a cleaner way to do this. JRA.
538	**/
539
540	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
541	void const src, size_t srclen, void dst,
542	size_t *converted_size, bool allow_bad_conv)
543	{
544	size_t i_len, o_len, destlen = (srclen * 3) / 2;
545	size_t retval;
546	const char inbuf = (const char )src;
547	char outbuf = NULL, ob = NULL;
548	smb_iconv_t descriptor;
549	void dest = (void )dst;
550
551	*dest = NULL;
552
553	if (!converted_size) {
554	errno = EINVAL;
555	return false;
556	}
557
558	if (src == NULL \|\| srclen == (size_t)-1) {
559	errno = EINVAL;
560	return false;
561	}
562	if (srclen == 0) {
563	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
564	if (ob == NULL) {
565	errno = ENOMEM;
566	return false;
567	}
568	*dest = ob;
569	*converted_size = 0;
570	return true;
571	}
572
573	lazy_initialize_conv();
574
575	descriptor = conv_handles[from][to];
576
577	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
578	if (!conv_silent)
579	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
580	errno = EOPNOTSUPP;
581	return false;
582	}
583
584	convert:
585
586	/* +2 is for ucs2 null termination. */
587	if ((destlen*2)+2 < destlen) {
588	/* wrapped ! abort. */
589	if (!conv_silent)
590	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
591	if (!ctx)
592	SAFE_FREE(outbuf);
593	errno = EOPNOTSUPP;
594	return false;
595	} else {
596	destlen = destlen * 2;
597	}
598
599	/* +2 is for ucs2 null termination. */
600	if (ctx) {
601	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
602	} else {
603	ob = (char *)SMB_REALLOC(ob, destlen + 2);
604	}
605
606	if (!ob) {
607	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
608	errno = ENOMEM;
609	return false;
610	}
611	outbuf = ob;
612	i_len = srclen;
613	o_len = destlen;
614
615	again:
616
617	retval = smb_iconv(descriptor,
618	&inbuf, &i_len,
619	&outbuf, &o_len);
620	if(retval == (size_t)-1) {
621	const char *reason="unknown error";
622	switch(errno) {
623	case EINVAL:
624	reason="Incomplete multibyte sequence";
625	if (!conv_silent)
626	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
627	if (allow_bad_conv)
628	goto use_as_is;
629	break;
630	case E2BIG:
631	goto convert;
632	case EILSEQ:
633	reason="Illegal multibyte sequence";
634	if (!conv_silent)
635	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
636	if (allow_bad_conv)
637	goto use_as_is;
638	break;
639	}
640	if (!conv_silent)
641	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
642	/* smb_panic(reason); */
643	if (ctx) {
644	TALLOC_FREE(ob);
645	} else {
646	SAFE_FREE(ob);
647	}
648	return false;
649	}
650
651	out:
652
653	destlen = destlen - o_len;
654	/* Don't shrink unless we're reclaiming a lot of
655	* space. This is in the hot codepath and these
656	* reallocs cost. JRA.
657	*/
658	if (o_len > 1024) {
659	/* We're shrinking here so we know the +2 is safe from wrap. */
660	if (ctx) {
661	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
662	} else {
663	ob = (char *)SMB_REALLOC(ob,destlen + 2);
664	}
665	}
666
667	if (destlen && !ob) {
668	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
669	errno = ENOMEM;
670	return false;
671	}
672
673	*dest = ob;
674
675	/* Must ucs2 null terminate in the extra space we allocated. */
676	ob[destlen] = '\0';
677	ob[destlen+1] = '\0';
678
679	*converted_size = destlen;
680	return true;
681
682	use_as_is:
683
684	/*
685	* Conversion not supported. This is actually an error, but there are so
686	* many misconfigured iconv systems and smb.conf's out there we can't just
687	* fail. Do a very bad conversion instead.... JRA.
688	*/
689
690	{
691	if (o_len == 0 \|\| i_len == 0)
692	goto out;
693
694	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
695	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
696	/* Can't convert from utf16 any endian to multibyte.
697	Replace with the default fail char.
698	*/
699
700	if (i_len < 2)
701	goto out;
702
703	if (i_len >= 2) {
704	*outbuf = lp_failed_convert_char();
705
706	outbuf++;
707	o_len--;
708
709	inbuf += 2;
710	i_len -= 2;
711	}
712
713	if (o_len == 0 \|\| i_len == 0)
714	goto out;
715
716	/* Keep trying with the next char... */
717	goto again;
718
719	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
720	/* Can't convert to UTF16LE - just widen by adding the
721	default fail char then zero.
722	*/
723	if (o_len < 2)
724	goto out;
725
726	outbuf[0] = lp_failed_convert_char();
727	outbuf[1] = '\0';
728
729	inbuf++;
730	i_len--;
731
732	outbuf += 2;
733	o_len -= 2;
734
735	if (o_len == 0 \|\| i_len == 0)
736	goto out;
737
738	/* Keep trying with the next char... */
739	goto again;
740
741	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
742	to != CH_UTF16LE && to != CH_UTF16BE) {
743	/* Failed multibyte to multibyte. Just copy the default fail char and
744	try again. */
745	outbuf[0] = lp_failed_convert_char();
746
747	inbuf++;
748	i_len--;
749
750	outbuf++;
751	o_len--;
752
753	if (o_len == 0 \|\| i_len == 0)
754	goto out;
755
756	/* Keep trying with the next char... */
757	goto again;
758
759	} else {
760	/* Keep compiler happy.... */
761	goto out;
762	}
763	}
764	}
765
766	/**
767	* Convert between character sets, allocating a new buffer using talloc for the result.
768	*
769	* @param srclen length of source buffer.
770	* @param dest always set at least to NULL
771	* @note -1 is not accepted for srclen.
772	*
773	* @returns Size in bytes of the converted string; or -1 in case of error.
774	**/
775	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
776	void const src, size_t srclen, void dst,
777	bool allow_bad_conv)
778	{
779	void dest = (void )dst;
780	size_t dest_len;
781
782	*dest = NULL;
783	if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
784	&dest_len, allow_bad_conv))
785	return (size_t)-1;
786	if (*dest == NULL)
787	return (size_t)-1;
788	return dest_len;
789	}
790
791	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
792	{
793	size_t size;
794	smb_ucs2_t *buffer;
795
796	size = push_ucs2_allocate(&buffer, src);
797	if (size == (size_t)-1) {
798	return (size_t)-1;
799	}
800	if (!strupper_w(buffer) && (dest == src)) {
801	free(buffer);
802	return srclen;
803	}
804
805	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
806	free(buffer);
807	return size;
808	}
809
810	/**
811	strdup() a unix string to upper case.
812	**/
813
814	char strdup_upper(const char s)
815	{
816	char *out_buffer = SMB_STRDUP(s);
817	const unsigned char p = (const unsigned char )s;
818	unsigned char q = (unsigned char )out_buffer;
819
820	if (!q) {
821	return NULL;
822	}
823
824	/* this is quite a common operation, so we want it to be
825	fast. We optimise for the ascii case, knowing that all our
826	supported multi-byte character sets are ascii-compatible
827	(ie. they match for the first 128 chars) */
828
829	while (*p) {
830	if (*p & 0x80)
831	break;
832	q++ = toupper_ascii_fast(p);
833	p++;
834	}
835
836	if (*p) {
837	/* MB case. */
838	size_t size, size2;
839	smb_ucs2_t *buffer = NULL;
840
841	SAFE_FREE(out_buffer);
842	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
843	strlen(s) + 1, (void *)(void )&buffer, &size,
844	True)) {
845	return NULL;
846	}
847
848	strupper_w(buffer);
849
850	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
851	size, (void *)(void )&out_buffer, &size2, True)) {
852	TALLOC_FREE(buffer);
853	return NULL;
854	}
855
856	/* Don't need the intermediate buffer
857	* anymore.
858	*/
859	TALLOC_FREE(buffer);
860	}
861
862	return out_buffer;
863	}
864
865	/**
866	talloc_strdup() a unix string to upper case.
867	**/
868
869	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
870	{
871	char *out_buffer = talloc_strdup(ctx,s);
872	const unsigned char p = (const unsigned char )s;
873	unsigned char q = (unsigned char )out_buffer;
874
875	if (!q) {
876	return NULL;
877	}
878
879	/* this is quite a common operation, so we want it to be
880	fast. We optimise for the ascii case, knowing that all our
881	supported multi-byte character sets are ascii-compatible
882	(ie. they match for the first 128 chars) */
883
884	while (*p) {
885	if (*p & 0x80)
886	break;
887	q++ = toupper_ascii_fast(p);
888	p++;
889	}
890
891	if (*p) {
892	/* MB case. */
893	size_t size;
894	smb_ucs2_t *ubuf = NULL;
895
896	/* We're not using the ascii buffer above. */
897	TALLOC_FREE(out_buffer);
898
899	size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
900	s, strlen(s)+1,
901	(void *)&ubuf,
902	True);
903	if (size == (size_t)-1) {
904	return NULL;
905	}
906
907	strupper_w(ubuf);
908
909	size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
910	ubuf, size,
911	(void *)&out_buffer,
912	True);
913
914	/* Don't need the intermediate buffer
915	* anymore.
916	*/
917
918	TALLOC_FREE(ubuf);
919
920	if (size == (size_t)-1) {
921	return NULL;
922	}
923	}
924
925	return out_buffer;
926	}
927
928	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
929	{
930	size_t size;
931	smb_ucs2_t *buffer = NULL;
932
933	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
934	(void *)(void )&buffer, &size, True)) {
935	smb_panic("failed to create UCS2 buffer");
936	}
937	if (!strlower_w(buffer) && (dest == src)) {
938	SAFE_FREE(buffer);
939	return srclen;
940	}
941	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
942	SAFE_FREE(buffer);
943	return size;
944	}
945
946	/**
947	strdup() a unix string to lower case.
948	**/
949
950	char strdup_lower(const char s)
951	{
952	size_t size;
953	smb_ucs2_t *buffer = NULL;
954	char *out_buffer;
955
956	size = push_ucs2_allocate(&buffer, s);
957	if (size == -1 \|\| !buffer) {
958	return NULL;
959	}
960
961	strlower_w(buffer);
962
963	size = pull_ucs2_allocate(&out_buffer, buffer);
964	SAFE_FREE(buffer);
965
966	if (size == (size_t)-1) {
967	return NULL;
968	}
969
970	return out_buffer;
971	}
972
973	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
974	{
975	size_t size;
976	smb_ucs2_t *buffer = NULL;
977	char *out_buffer;
978
979	size = push_ucs2_talloc(ctx, &buffer, s);
980	if (size == -1 \|\| !buffer) {
981	TALLOC_FREE(buffer);
982	return NULL;
983	}
984
985	strlower_w(buffer);
986
987	size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
988	TALLOC_FREE(buffer);
989
990	if (size == (size_t)-1) {
991	TALLOC_FREE(out_buffer);
992	return NULL;
993	}
994
995	return out_buffer;
996	}
997
998
999	size_t ucs2_align(const void base_ptr, const void p, int flags)
1000	{
1001	if (flags & (STR_NOALIGN\|STR_ASCII))
1002	return 0;
1003	return PTR_DIFF(p, base_ptr) & 1;
1004	}
1005
1006
1007	/**
1008	* Copy a string from a char* unix src to a dos codepage string destination.
1009	*
1010	* @return the number of bytes occupied by the string in the destination.
1011	*
1012	* @param flags can include
1013	* <dl>
1014	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1015	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1016	* </dl>
1017	*
1018	* @param dest_len the maximum length in bytes allowed in the
1019	* destination.
1020	**/
1021	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1022	{
1023	size_t src_len = strlen(src);
1024	char *tmpbuf = NULL;
1025	size_t ret;
1026
1027	/* No longer allow a length of -1. */
1028	if (dest_len == (size_t)-1) {
1029	smb_panic("push_ascii - dest_len == -1");
1030	}
1031
1032	if (flags & STR_UPPER) {
1033	tmpbuf = SMB_STRDUP(src);
1034	if (!tmpbuf) {
1035	smb_panic("malloc fail");
1036	}
1037	strupper_m(tmpbuf);
1038	src = tmpbuf;
1039	}
1040
1041	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1042	src_len++;
1043	}
1044
1045	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1046	if (ret == (size_t)-1 &&
1047	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1048	&& dest_len > 0) {
1049	((char *)dest)[0] = '\0';
1050	}
1051	SAFE_FREE(tmpbuf);
1052	return ret;
1053	}
1054
1055	size_t push_ascii_fstring(void dest, const char src)
1056	{
1057	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1058	}
1059
1060	/********************************************************************
1061	Push an nstring - ensure null terminated. Written by
1062	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1063	********************************************************************/
1064
1065	size_t push_ascii_nstring(void dest, const char src)
1066	{
1067	size_t i, buffer_len, dest_len;
1068	smb_ucs2_t *buffer;
1069
1070	conv_silent = True;
1071	buffer_len = push_ucs2_allocate(&buffer, src);
1072	if (buffer_len == (size_t)-1) {
1073	smb_panic("failed to create UCS2 buffer");
1074	}
1075
1076	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1077	buffer_len /= sizeof(smb_ucs2_t);
1078
1079	dest_len = 0;
1080	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1081	unsigned char mb[10];
1082	/* Convert one smb_ucs2_t character at a time. */
1083	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1084	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1085	memcpy((char *)dest + dest_len, mb, mb_len);
1086	dest_len += mb_len;
1087	} else {
1088	errno = E2BIG;
1089	break;
1090	}
1091	}
1092	((char *)dest)[dest_len] = '\0';
1093
1094	SAFE_FREE(buffer);
1095	conv_silent = False;
1096	return dest_len;
1097	}
1098
1099	/********************************************************************
1100	Push and malloc an ascii string. src and dest null terminated.
1101	********************************************************************/
1102
1103	size_t push_ascii_allocate(char *dest, const char src)
1104	{
1105	size_t dest_len, src_len = strlen(src)+1;
1106
1107	*dest = NULL;
1108	if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1109	(void **)dest, &dest_len, True))
1110	return (size_t)-1;
1111	else
1112	return dest_len;
1113	}
1114
1115	/**
1116	* Copy a string from a dos codepage source to a unix char* destination.
1117	*
1118	* The resulting string in "dest" is always null terminated.
1119	*
1120	* @param flags can have:
1121	* <dl>
1122	* <dt>STR_TERMINATE</dt>
1123	* <dd>STR_TERMINATE means the string in @p src
1124	* is null terminated, and src_len is ignored.</dd>
1125	* </dl>
1126	*
1127	* @param src_len is the length of the source area in bytes.
1128	* @returns the number of bytes occupied by the string in @p src.
1129	**/
1130	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1131	{
1132	size_t ret;
1133
1134	if (dest_len == (size_t)-1) {
1135	/* No longer allow dest_len of -1. */
1136	smb_panic("pull_ascii - invalid dest_len of -1");
1137	}
1138
1139	if (flags & STR_TERMINATE) {
1140	if (src_len == (size_t)-1) {
1141	src_len = strlen((const char *)src) + 1;
1142	} else {
1143	size_t len = strnlen((const char *)src, src_len);
1144	if (len < src_len)
1145	len++;
1146	src_len = len;
1147	}
1148	}
1149
1150	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1151	if (ret == (size_t)-1) {
1152	ret = 0;
1153	dest_len = 0;
1154	}
1155
1156	if (dest_len && ret) {
1157	/* Did we already process the terminating zero ? */
1158	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1159	dest[MIN(ret, dest_len-1)] = 0;
1160	}
1161	} else {
1162	dest[0] = 0;
1163	}
1164
1165	return src_len;
1166	}
1167
1168	/**
1169	* Copy a string from a dos codepage source to a unix char* destination.
1170	Talloc version.
1171	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1172	needs fixing. JRA).
1173	*
1174	* The resulting string in "dest" is always null terminated.
1175	*
1176	* @param flags can have:
1177	* <dl>
1178	* <dt>STR_TERMINATE</dt>
1179	* <dd>STR_TERMINATE means the string in @p src
1180	* is null terminated, and src_len is ignored.</dd>
1181	* </dl>
1182	*
1183	* @param src_len is the length of the source area in bytes.
1184	* @returns the number of bytes occupied by the string in @p src.
1185	**/
1186
1187	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1188	char **ppdest,
1189	const void *src,
1190	size_t src_len,
1191	int flags)
1192	{
1193	char *dest = NULL;
1194	size_t dest_len = 0;
1195
1196	#ifdef DEVELOPER
1197	/* Ensure we never use the braindead "malloc" varient. */
1198	if (ctx == NULL) {
1199	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1200	}
1201	#endif
1202
1203	*ppdest = NULL;
1204
1205	if (!src_len) {
1206	return 0;
1207	}
1208
1209	if (flags & STR_TERMINATE) {
1210	if (src_len == (size_t)-1) {
1211	src_len = strlen((const char *)src) + 1;
1212	} else {
1213	size_t len = strnlen((const char *)src, src_len);
1214	if (len < src_len)
1215	len++;
1216	src_len = len;
1217	}
1218	/* Ensure we don't use an insane length from the client. */
1219	if (src_len >= 1024*1024) {
1220	char *msg = talloc_asprintf(ctx,
1221	"Bad src length (%u) in "
1222	"pull_ascii_base_talloc",
1223	(unsigned int)src_len);
1224	smb_panic(msg);
1225	}
1226	} else {
1227	/* Can't have an unlimited length
1228	* non STR_TERMINATE'd.
1229	*/
1230	if (src_len == (size_t)-1) {
1231	errno = EINVAL;
1232	return 0;
1233	}
1234	}
1235
1236	/* src_len != -1 here. */
1237
1238	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1239	&dest_len, True)) {
1240	dest_len = 0;
1241	}
1242
1243	if (dest_len && dest) {
1244	/* Did we already process the terminating zero ? */
1245	if (dest[dest_len-1] != 0) {
1246	size_t size = talloc_get_size(dest);
1247	/* Have we got space to append the '\0' ? */
1248	if (size <= dest_len) {
1249	/* No, realloc. */
1250	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1251	dest_len+1);
1252	if (!dest) {
1253	/* talloc fail. */
1254	dest_len = (size_t)-1;
1255	return 0;
1256	}
1257	}
1258	/* Yay - space ! */
1259	dest[dest_len] = '\0';
1260	dest_len++;
1261	}
1262	} else if (dest) {
1263	dest[0] = 0;
1264	}
1265
1266	*ppdest = dest;
1267	return src_len;
1268	}
1269
1270	size_t pull_ascii_fstring(char dest, const void src)
1271	{
1272	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1273	}
1274
1275	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1276
1277	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1278	{
1279	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1280	}
1281
1282	/**
1283	* Copy a string from a char* src to a unicode destination.
1284	*
1285	* @returns the number of bytes occupied by the string in the destination.
1286	*
1287	* @param flags can have:
1288	*
1289	* <dl>
1290	* <dt>STR_TERMINATE <dd>means include the null termination.
1291	* <dt>STR_UPPER <dd>means uppercase in the destination.
1292	* <dt>STR_NOALIGN <dd>means don't do alignment.
1293	* </dl>
1294	*
1295	* @param dest_len is the maximum length allowed in the
1296	* destination.
1297	**/
1298
1299	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1300	{
1301	size_t len=0;
1302	size_t src_len;
1303	size_t ret;
1304
1305	if (dest_len == (size_t)-1) {
1306	/* No longer allow dest_len of -1. */
1307	smb_panic("push_ucs2 - invalid dest_len of -1");
1308	}
1309
1310	if (flags & STR_TERMINATE)
1311	src_len = (size_t)-1;
1312	else
1313	src_len = strlen(src);
1314
1315	if (ucs2_align(base_ptr, dest, flags)) {
1316	(char )dest = 0;
1317	dest = (void )((char )dest + 1);
1318	if (dest_len)
1319	dest_len--;
1320	len++;
1321	}
1322
1323	/* ucs2 is always a multiple of 2 bytes */
1324	dest_len &= ~1;
1325
1326	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1327	if (ret == (size_t)-1) {
1328	if ((flags & STR_TERMINATE) &&
1329	dest &&
1330	dest_len) {
1331	(char )dest = 0;
1332	}
1333	return len;
1334	}
1335
1336	len += ret;
1337
1338	if (flags & STR_UPPER) {
1339	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1340	size_t i;
1341
1342	/* We check for i < (ret / 2) below as the dest string isn't null
1343	terminated if STR_TERMINATE isn't set. */
1344
1345	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1346	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1347	if (v != dest_ucs2[i]) {
1348	dest_ucs2[i] = v;
1349	}
1350	}
1351	}
1352
1353	return len;
1354	}
1355
1356
1357	/**
1358	* Copy a string from a unix char* src to a UCS2 destination,
1359	* allocating a buffer using talloc().
1360	*
1361	* @param dest always set at least to NULL
1362	*
1363	* @returns The number of bytes occupied by the string in the destination
1364	* or -1 in case of error.
1365	**/
1366	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1367	{
1368	size_t src_len = strlen(src)+1;
1369
1370	*dest = NULL;
1371	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1372	}
1373
1374
1375	/**
1376	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1377	*
1378	* @param dest always set at least to NULL
1379	*
1380	* @returns The number of bytes occupied by the string in the destination
1381	* or -1 in case of error.
1382	**/
1383
1384	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1385	{
1386	size_t dest_len, src_len = strlen(src)+1;
1387
1388	*dest = NULL;
1389	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1390	(void **)dest, &dest_len, True))
1391	return (size_t)-1;
1392	else
1393	return dest_len;
1394	}
1395
1396	/**
1397	Copy a string from a char* src to a UTF-8 destination.
1398	Return the number of bytes occupied by the string in the destination
1399	Flags can have:
1400	STR_TERMINATE means include the null termination
1401	STR_UPPER means uppercase in the destination
1402	dest_len is the maximum length allowed in the destination. If dest_len
1403	is -1 then no maxiumum is used.
1404	**/
1405
1406	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1407	{
1408	size_t src_len = 0;
1409	size_t ret;
1410	char *tmpbuf = NULL;
1411
1412	if (dest_len == (size_t)-1) {
1413	/* No longer allow dest_len of -1. */
1414	smb_panic("push_utf8 - invalid dest_len of -1");
1415	}
1416
1417	if (flags & STR_UPPER) {
1418	tmpbuf = strdup_upper(src);
1419	if (!tmpbuf) {
1420	return (size_t)-1;
1421	}
1422	src = tmpbuf;
1423	src_len = strlen(src);
1424	}
1425
1426	src_len = strlen(src);
1427	if (flags & STR_TERMINATE) {
1428	src_len++;
1429	}
1430
1431	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1432	SAFE_FREE(tmpbuf);
1433	return ret;
1434	}
1435
1436	size_t push_utf8_fstring(void dest, const char src)
1437	{
1438	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1439	}
1440
1441	/**
1442	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1443	*
1444	* @param dest always set at least to NULL
1445	*
1446	* @returns The number of bytes occupied by the string in the destination
1447	**/
1448
1449	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1450	{
1451	size_t src_len = strlen(src)+1;
1452
1453	*dest = NULL;
1454	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1455	}
1456
1457	/**
1458	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1459	*
1460	* @param dest always set at least to NULL
1461	*
1462	* @returns The number of bytes occupied by the string in the destination
1463	**/
1464
1465	size_t push_utf8_allocate(char *dest, const char src)
1466	{
1467	size_t dest_len, src_len = strlen(src)+1;
1468
1469	*dest = NULL;
1470	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1471	(void **)dest, &dest_len, True))
1472	return (size_t)-1;
1473	else
1474	return dest_len;
1475	}
1476
1477	/**
1478	Copy a string from a ucs2 source to a unix char* destination.
1479	Flags can have:
1480	STR_TERMINATE means the string in src is null terminated.
1481	STR_NOALIGN means don't try to align.
1482	if STR_TERMINATE is set then src_len is ignored if it is -1.
1483	src_len is the length of the source area in bytes
1484	Return the number of bytes occupied by the string in src.
1485	The resulting string in "dest" is always null terminated.
1486	**/
1487
1488	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1489	{
1490	size_t ret;
1491
1492	if (dest_len == (size_t)-1) {
1493	/* No longer allow dest_len of -1. */
1494	smb_panic("pull_ucs2 - invalid dest_len of -1");
1495	}
1496
1497	if (!src_len) {
1498	if (dest && dest_len > 0) {
1499	dest[0] = '\0';
1500	}
1501	return 0;
1502	}
1503
1504	if (ucs2_align(base_ptr, src, flags)) {
1505	src = (const void )((const char )src + 1);
1506	if (src_len != (size_t)-1)
1507	src_len--;
1508	}
1509
1510	if (flags & STR_TERMINATE) {
1511	/* src_len -1 is the default for null terminated strings. */
1512	if (src_len != (size_t)-1) {
1513	size_t len = strnlen_w((const smb_ucs2_t *)src,
1514	src_len/2);
1515	if (len < src_len/2)
1516	len++;
1517	src_len = len*2;
1518	}
1519	}
1520
1521	/* ucs2 is always a multiple of 2 bytes */
1522	if (src_len != (size_t)-1)
1523	src_len &= ~1;
1524
1525	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1526	if (ret == (size_t)-1) {
1527	ret = 0;
1528	dest_len = 0;
1529	}
1530
1531	if (src_len == (size_t)-1)
1532	src_len = ret*2;
1533
1534	if (dest_len && ret) {
1535	/* Did we already process the terminating zero ? */
1536	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1537	dest[MIN(ret, dest_len-1)] = 0;
1538	}
1539	} else {
1540	dest[0] = 0;
1541	}
1542
1543	return src_len;
1544	}
1545
1546	/**
1547	Copy a string from a ucs2 source to a unix char* destination.
1548	Talloc version with a base pointer.
1549	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1550	needs fixing. JRA).
1551	Flags can have:
1552	STR_TERMINATE means the string in src is null terminated.
1553	STR_NOALIGN means don't try to align.
1554	if STR_TERMINATE is set then src_len is ignored if it is -1.
1555	src_len is the length of the source area in bytes
1556	Return the number of bytes occupied by the string in src.
1557	The resulting string in "dest" is always null terminated.
1558	**/
1559
1560	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1561	const void *base_ptr,
1562	char **ppdest,
1563	const void *src,
1564	size_t src_len,
1565	int flags)
1566	{
1567	char *dest;
1568	size_t dest_len;
1569
1570	*ppdest = NULL;
1571
1572	#ifdef DEVELOPER
1573	/* Ensure we never use the braindead "malloc" varient. */
1574	if (ctx == NULL) {
1575	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1576	}
1577	#endif
1578
1579	if (!src_len) {
1580	return 0;
1581	}
1582
1583	if (ucs2_align(base_ptr, src, flags)) {
1584	src = (const void )((const char )src + 1);
1585	if (src_len != (size_t)-1)
1586	src_len--;
1587	}
1588
1589	if (flags & STR_TERMINATE) {
1590	/* src_len -1 is the default for null terminated strings. */
1591	if (src_len != (size_t)-1) {
1592	size_t len = strnlen_w((const smb_ucs2_t *)src,
1593	src_len/2);
1594	if (len < src_len/2)
1595	len++;
1596	src_len = len*2;
1597	} else {
1598	/*
1599	* src_len == -1 - alloc interface won't take this
1600	* so we must calculate.
1601	*/
1602	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1603	}
1604	/* Ensure we don't use an insane length from the client. */
1605	if (src_len >= 1024*1024) {
1606	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1607	}
1608	} else {
1609	/* Can't have an unlimited length
1610	* non STR_TERMINATE'd.
1611	*/
1612	if (src_len == (size_t)-1) {
1613	errno = EINVAL;
1614	return 0;
1615	}
1616	}
1617
1618	/* src_len != -1 here. */
1619
1620	/* ucs2 is always a multiple of 2 bytes */
1621	src_len &= ~1;
1622
1623	dest_len = convert_string_talloc(ctx,
1624	CH_UTF16LE,
1625	CH_UNIX,
1626	src,
1627	src_len,
1628	(void *)&dest,
1629	True);
1630	if (dest_len == (size_t)-1) {
1631	dest_len = 0;
1632	}
1633
1634	if (dest_len) {
1635	/* Did we already process the terminating zero ? */
1636	if (dest[dest_len-1] != 0) {
1637	size_t size = talloc_get_size(dest);
1638	/* Have we got space to append the '\0' ? */
1639	if (size <= dest_len) {
1640	/* No, realloc. */
1641	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1642	dest_len+1);
1643	if (!dest) {
1644	/* talloc fail. */
1645	dest_len = (size_t)-1;
1646	return 0;
1647	}
1648	}
1649	/* Yay - space ! */
1650	dest[dest_len] = '\0';
1651	dest_len++;
1652	}
1653	} else if (dest) {
1654	dest[0] = 0;
1655	}
1656
1657	*ppdest = dest;
1658	return src_len;
1659	}
1660
1661	size_t pull_ucs2_fstring(char dest, const void src)
1662	{
1663	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1664	}
1665
1666	/**
1667	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1668	*
1669	* @param dest always set at least to NULL
1670	*
1671	* @returns The number of bytes occupied by the string in the destination
1672	**/
1673
1674	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1675	{
1676	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1677	*dest = NULL;
1678	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1679	}
1680
1681	/**
1682	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1683	*
1684	* @param dest always set at least to NULL
1685	*
1686	* @returns The number of bytes occupied by the string in the destination
1687	**/
1688
1689	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1690	{
1691	size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1692	*dest = NULL;
1693	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1694	(void **)dest, &dest_len, True))
1695	return (size_t)-1;
1696	else
1697	return dest_len;
1698	}
1699
1700	/**
1701	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1702	*
1703	* @param dest always set at least to NULL
1704	*
1705	* @returns The number of bytes occupied by the string in the destination
1706	**/
1707
1708	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1709	{
1710	size_t src_len = strlen(src)+1;
1711	*dest = NULL;
1712	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1713	}
1714
1715	/**
1716	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1717	*
1718	* @param dest always set at least to NULL
1719	*
1720	* @returns The number of bytes occupied by the string in the destination
1721	**/
1722
1723	size_t pull_utf8_allocate(char *dest, const char src)
1724	{
1725	size_t dest_len, src_len = strlen(src)+1;
1726	*dest = NULL;
1727	if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1728	(void **)dest, &dest_len, True))
1729	return (size_t)-1;
1730	else
1731	return dest_len;
1732	}
1733
1734	/**
1735	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1736	*
1737	* @param dest always set at least to NULL
1738	*
1739	* @returns The number of bytes occupied by the string in the destination
1740	**/
1741
1742	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1743	{
1744	size_t src_len = strlen(src)+1;
1745	*dest = NULL;
1746	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1747	}
1748
1749	/**
1750	Copy a string from a char* src to a unicode or ascii
1751	dos codepage destination choosing unicode or ascii based on the
1752	flags in the SMB buffer starting at base_ptr.
1753	Return the number of bytes occupied by the string in the destination.
1754	flags can have:
1755	STR_TERMINATE means include the null termination.
1756	STR_UPPER means uppercase in the destination.
1757	STR_ASCII use ascii even with unicode packet.
1758	STR_NOALIGN means don't do alignment.
1759	dest_len is the maximum length allowed in the destination. If dest_len
1760	is -1 then no maxiumum is used.
1761	**/
1762
1763	size_t push_string_fn(const char *function, unsigned int line,
1764	const void *base_ptr, uint16 flags2,
1765	void dest, const char src,
1766	size_t dest_len, int flags)
1767	{
1768	#ifdef DEVELOPER
1769	/* We really need to zero fill here, not clobber
1770	* region, as we want to ensure that valgrind thinks
1771	* all of the outgoing buffer has been written to
1772	* so a send() or write() won't trap an error.
1773	* JRA.
1774	*/
1775	#if 0
1776	clobber_region(function, line, dest, dest_len);
1777	#else
1778	memset(dest, '\0', dest_len);
1779	#endif
1780	#endif
1781
1782	if (!(flags & STR_ASCII) && \
1783	((flags & STR_UNICODE \|\| \
1784	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1785	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1786	}
1787	return push_ascii(dest, src, dest_len, flags);
1788	}
1789
1790
1791	/**
1792	Copy a string from a unicode or ascii source (depending on
1793	the packet flags) to a char* destination.
1794	Flags can have:
1795	STR_TERMINATE means the string in src is null terminated.
1796	STR_UNICODE means to force as unicode.
1797	STR_ASCII use ascii even with unicode packet.
1798	STR_NOALIGN means don't do alignment.
1799	if STR_TERMINATE is set then src_len is ignored is it is -1
1800	src_len is the length of the source area in bytes.
1801	Return the number of bytes occupied by the string in src.
1802	The resulting string in "dest" is always null terminated.
1803	**/
1804
1805	size_t pull_string_fn(const char *function,
1806	unsigned int line,
1807	const void *base_ptr,
1808	uint16 smb_flags2,
1809	char *dest,
1810	const void *src,
1811	size_t dest_len,
1812	size_t src_len,
1813	int flags)
1814	{
1815	#ifdef DEVELOPER
1816	clobber_region(function, line, dest, dest_len);
1817	#endif
1818
1819	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1820	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1821	"UNICODE defined");
1822	}
1823
1824	if (!(flags & STR_ASCII) && \
1825	((flags & STR_UNICODE \|\| \
1826	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1827	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1828	}
1829	return pull_ascii(dest, src, dest_len, src_len, flags);
1830	}
1831
1832	/**
1833	Copy a string from a unicode or ascii source (depending on
1834	the packet flags) to a char* destination.
1835	Variant that uses talloc.
1836	Flags can have:
1837	STR_TERMINATE means the string in src is null terminated.
1838	STR_UNICODE means to force as unicode.
1839	STR_ASCII use ascii even with unicode packet.
1840	STR_NOALIGN means don't do alignment.
1841	if STR_TERMINATE is set then src_len is ignored is it is -1
1842	src_len is the length of the source area in bytes.
1843	Return the number of bytes occupied by the string in src.
1844	The resulting string in "dest" is always null terminated.
1845	**/
1846
1847	size_t pull_string_talloc_fn(const char *function,
1848	unsigned int line,
1849	TALLOC_CTX *ctx,
1850	const void *base_ptr,
1851	uint16 smb_flags2,
1852	char **ppdest,
1853	const void *src,
1854	size_t src_len,
1855	int flags)
1856	{
1857	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1858	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1859	"UNICODE defined");
1860	}
1861
1862	if (!(flags & STR_ASCII) && \
1863	((flags & STR_UNICODE \|\| \
1864	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1865	return pull_ucs2_base_talloc(ctx,
1866	base_ptr,
1867	ppdest,
1868	src,
1869	src_len,
1870	flags);
1871	}
1872	return pull_ascii_base_talloc(ctx,
1873	ppdest,
1874	src,
1875	src_len,
1876	flags);
1877	}
1878
1879
1880	size_t align_string(const void base_ptr, const char p, int flags)
1881	{
1882	if (!(flags & STR_ASCII) && \
1883	((flags & STR_UNICODE \|\| \
1884	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1885	return ucs2_align(base_ptr, p, flags);
1886	}
1887	return 0;
1888	}
1889
1890	/*
1891	Return the unicode codepoint for the next multi-byte CH_UNIX character
1892	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1893
1894	Also return the number of bytes consumed (which tells the caller
1895	how many bytes to skip to get to the next CH_UNIX character).
1896
1897	Return INVALID_CODEPOINT if the next character cannot be converted.
1898	*/
1899
1900	codepoint_t next_codepoint(const char str, size_t size)
1901	{
1902	/* It cannot occupy more than 4 bytes in UTF16 format */
1903	uint8_t buf[4];
1904	smb_iconv_t descriptor;
1905	size_t ilen_orig;
1906	size_t ilen;
1907	size_t olen;
1908	char *outbuf;
1909
1910	if ((str[0] & 0x80) == 0) {
1911	*size = 1;
1912	return (codepoint_t)str[0];
1913	}
1914
1915	/* We assume that no multi-byte character can take
1916	more than 5 bytes. This is OK as we only
1917	support codepoints up to 1M */
1918
1919	ilen_orig = strnlen(str, 5);
1920	ilen = ilen_orig;
1921
1922	lazy_initialize_conv();
1923
1924	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1925	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1926	*size = 1;
1927	return INVALID_CODEPOINT;
1928	}
1929
1930	/* This looks a little strange, but it is needed to cope
1931	with codepoints above 64k which are encoded as per RFC2781. */
1932	olen = 2;
1933	outbuf = (char *)buf;
1934	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1935	if (olen == 2) {
1936	/* We failed to convert to a 2 byte character.
1937	See if we can convert to a 4 UTF16-LE byte char encoding.
1938	*/
1939	olen = 4;
1940	outbuf = (char *)buf;
1941	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1942	if (olen == 4) {
1943	/* We didn't convert any bytes */
1944	*size = 1;
1945	return INVALID_CODEPOINT;
1946	}
1947	olen = 4 - olen;
1948	} else {
1949	olen = 2 - olen;
1950	}
1951
1952	*size = ilen_orig - ilen;
1953
1954	if (olen == 2) {
1955	/* 2 byte, UTF16-LE encoded value. */
1956	return (codepoint_t)SVAL(buf, 0);
1957	}
1958	if (olen == 4) {
1959	/* Decode a 4 byte UTF16-LE character manually.
1960	See RFC2871 for the encoding machanism.
1961	*/
1962	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1963	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1964
1965	return (codepoint_t)0x10000 +
1966	(w1 << 10) + w2;
1967	}
1968
1969	/* no other length is valid */
1970	return INVALID_CODEPOINT;
1971	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: