Context Navigation

source: branches/samba-3.3.x/source/lib/charcnv.c@ 233

Visit:

Last change on this file since 233 was 223, checked in by Herwig Bauernfeind, 16 years ago
Update Samba 3.3 branch to 3.3.3
File size: 51.1 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	if (!initialized) {
97	load_case_tables();
98	init_iconv();
99	initialized = true;
100	}
101	}
102
103	/**
104	* Destroy global objects allocated by init_iconv()
105	**/
106	void gfree_charcnv(void)
107	{
108	int c1, c2;
109
110	for (c1=0;c1<NUM_CHARSETS;c1++) {
111	for (c2=0;c2<NUM_CHARSETS;c2++) {
112	if ( conv_handles[c1][c2] ) {
113	smb_iconv_close( conv_handles[c1][c2] );
114	conv_handles[c1][c2] = 0;
115	}
116	}
117	}
118	initialized = false;
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	bool did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_valid_table();
182	conv_silent = False;
183	}
184	}
185
186	/**
187	* Convert string from one encoding to another, making error checking etc
188	* Slow path version - uses (slow) iconv.
189	*
190	* @param src pointer to source string (multibyte or singlebyte)
191	* @param srclen length of the source string in bytes
192	* @param dest pointer to destination string (multibyte or singlebyte)
193	* @param destlen maximal length allowed for string
194	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195	* @returns the number of bytes occupied in the destination
196	*
197	* Ensure the srclen contains the terminating zero.
198	*
199	**/
200
201	static size_t convert_string_internal(charset_t from, charset_t to,
202	void const *src, size_t srclen,
203	void *dest, size_t destlen, bool allow_bad_conv)
204	{
205	size_t i_len, o_len;
206	size_t retval;
207	const char* inbuf = (const char*)src;
208	char* outbuf = (char*)dest;
209	smb_iconv_t descriptor;
210
211	lazy_initialize_conv();
212
213	descriptor = conv_handles[from][to];
214
215	if (srclen == (size_t)-1) {
216	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
217	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
218	} else {
219	srclen = strlen((const char *)src)+1;
220	}
221	}
222
223
224	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
225	if (!conv_silent)
226	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227	return (size_t)-1;
228	}
229
230	i_len=srclen;
231	o_len=destlen;
232
233	again:
234
235	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236	if(retval==(size_t)-1) {
237	const char *reason="unknown error";
238	switch(errno) {
239	case EINVAL:
240	reason="Incomplete multibyte sequence";
241	if (!conv_silent)
242	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243	if (allow_bad_conv)
244	goto use_as_is;
245	return (size_t)-1;
246	case E2BIG:
247	reason="No more room";
248	if (!conv_silent) {
249	if (from == CH_UNIX) {
250	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251	charset_name(from), charset_name(to),
252	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253	} else {
254	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255	charset_name(from), charset_name(to),
256	(unsigned int)srclen, (unsigned int)destlen));
257	}
258	}
259	break;
260	case EILSEQ:
261	reason="Illegal multibyte sequence";
262	if (!conv_silent)
263	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264	if (allow_bad_conv)
265	goto use_as_is;
266
267	return (size_t)-1;
268	default:
269	if (!conv_silent)
270	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271	return (size_t)-1;
272	}
273	/* smb_panic(reason); */
274	}
275	return destlen-o_len;
276
277	use_as_is:
278
279	/*
280	* Conversion not supported. This is actually an error, but there are so
281	* many misconfigured iconv systems and smb.conf's out there we can't just
282	* fail. Do a very bad conversion instead.... JRA.
283	*/
284
285	{
286	if (o_len == 0 \|\| i_len == 0)
287	return destlen - o_len;
288
289	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
290	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
291	/* Can't convert from utf16 any endian to multibyte.
292	Replace with the default fail char.
293	*/
294	if (i_len < 2)
295	return destlen - o_len;
296	if (i_len >= 2) {
297	*outbuf = lp_failed_convert_char();
298
299	outbuf++;
300	o_len--;
301
302	inbuf += 2;
303	i_len -= 2;
304	}
305
306	if (o_len == 0 \|\| i_len == 0)
307	return destlen - o_len;
308
309	/* Keep trying with the next char... */
310	goto again;
311
312	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313	/* Can't convert to UTF16LE - just widen by adding the
314	default fail char then zero.
315	*/
316	if (o_len < 2)
317	return destlen - o_len;
318
319	outbuf[0] = lp_failed_convert_char();
320	outbuf[1] = '\0';
321
322	inbuf++;
323	i_len--;
324
325	outbuf += 2;
326	o_len -= 2;
327
328	if (o_len == 0 \|\| i_len == 0)
329	return destlen - o_len;
330
331	/* Keep trying with the next char... */
332	goto again;
333
334	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335	to != CH_UTF16LE && to != CH_UTF16BE) {
336	/* Failed multibyte to multibyte. Just copy the default fail char and
337	try again. */
338	outbuf[0] = lp_failed_convert_char();
339
340	inbuf++;
341	i_len--;
342
343	outbuf++;
344	o_len--;
345
346	if (o_len == 0 \|\| i_len == 0)
347	return destlen - o_len;
348
349	/* Keep trying with the next char... */
350	goto again;
351
352	} else {
353	/* Keep compiler happy.... */
354	return destlen - o_len;
355	}
356	}
357	}
358
359	/**
360	* Convert string from one encoding to another, making error checking etc
361	* Fast path version - handles ASCII first.
362	*
363	* @param src pointer to source string (multibyte or singlebyte)
364	* @param srclen length of the source string in bytes, or -1 for nul terminated.
365	* @param dest pointer to destination string (multibyte or singlebyte)
366	* @param destlen maximal length allowed for string - NEVER -1.
367	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368	* @returns the number of bytes occupied in the destination
369	*
370	* Ensure the srclen contains the terminating zero.
371	*
372	* This function has been hand-tuned to provide a fast path.
373	* Don't change unless you really know what you are doing. JRA.
374	**/
375
376	size_t convert_string(charset_t from, charset_t to,
377	void const *src, size_t srclen,
378	void *dest, size_t destlen, bool allow_bad_conv)
379	{
380	/*
381	* NB. We deliberately don't do a strlen here if srclen == -1.
382	* This is very expensive over millions of calls and is taken
383	* care of in the slow path in convert_string_internal. JRA.
384	*/
385
386	#ifdef DEVELOPER
387	SMB_ASSERT(destlen != (size_t)-1);
388	#endif
389
390	if (srclen == 0)
391	return 0;
392
393	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394	const unsigned char p = (const unsigned char )src;
395	unsigned char q = (unsigned char )dest;
396	size_t slen = srclen;
397	size_t dlen = destlen;
398	unsigned char lastp = '\0';
399	size_t retval = 0;
400
401	/* If all characters are ascii, fast path here. */
402	while (slen && dlen) {
403	if ((lastp = *p) <= 0x7f) {
404	q++ = p++;
405	if (slen != (size_t)-1) {
406	slen--;
407	}
408	dlen--;
409	retval++;
410	if (!lastp)
411	break;
412	} else {
413	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
414	goto general_case;
415	#else
416	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417	if (ret == (size_t)-1) {
418	return ret;
419	}
420	return retval + ret;
421	#endif
422	}
423	}
424	if (!dlen) {
425	/* Even if we fast path we should note if we ran out of room. */
426	if (((slen != (size_t)-1) && slen) \|\|
427	((slen == (size_t)-1) && lastp)) {
428	errno = E2BIG;
429	}
430	}
431	return retval;
432	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433	const unsigned char p = (const unsigned char )src;
434	unsigned char q = (unsigned char )dest;
435	size_t retval = 0;
436	size_t slen = srclen;
437	size_t dlen = destlen;
438	unsigned char lastp = '\0';
439
440	/* If all characters are ascii, fast path here. */
441	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
442	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
443	q++ = p;
444	if (slen != (size_t)-1) {
445	slen -= 2;
446	}
447	p += 2;
448	dlen--;
449	retval++;
450	if (!lastp)
451	break;
452	} else {
453	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
454	goto general_case;
455	#else
456	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457	if (ret == (size_t)-1) {
458	return ret;
459	}
460	return retval + ret;
461	#endif
462	}
463	}
464	if (!dlen) {
465	/* Even if we fast path we should note if we ran out of room. */
466	if (((slen != (size_t)-1) && slen) \|\|
467	((slen == (size_t)-1) && lastp)) {
468	errno = E2BIG;
469	}
470	}
471	return retval;
472	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
473	const unsigned char p = (const unsigned char )src;
474	unsigned char q = (unsigned char )dest;
475	size_t retval = 0;
476	size_t slen = srclen;
477	size_t dlen = destlen;
478	unsigned char lastp = '\0';
479
480	/* If all characters are ascii, fast path here. */
481	while (slen && (dlen >= 2)) {
482	if ((lastp = *p) <= 0x7F) {
483	q++ = p++;
484	*q++ = '\0';
485	if (slen != (size_t)-1) {
486	slen--;
487	}
488	dlen -= 2;
489	retval += 2;
490	if (!lastp)
491	break;
492	} else {
493	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
494	goto general_case;
495	#else
496	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
497	if (ret == (size_t)-1) {
498	return ret;
499	}
500	return retval + ret;
501	#endif
502	}
503	}
504	if (!dlen) {
505	/* Even if we fast path we should note if we ran out of room. */
506	if (((slen != (size_t)-1) && slen) \|\|
507	((slen == (size_t)-1) && lastp)) {
508	errno = E2BIG;
509	}
510	}
511	return retval;
512	}
513
514	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
515	general_case:
516	#endif
517	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
518	}
519
520	/**
521	* Convert between character sets, allocating a new buffer for the result.
522	*
523	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
524	* (this is a bad interface and needs fixing. JRA).
525	* @param srclen length of source buffer.
526	* @param dest always set at least to NULL
527	* @param converted_size set to the size of the allocated buffer on return
528	* true
529	* @note -1 is not accepted for srclen.
530	*
531	* @return true if new buffer was correctly allocated, and string was
532	* converted.
533	*
534	* Ensure the srclen contains the terminating zero.
535	*
536	* I hate the goto's in this function. It's embarressing.....
537	* There has to be a cleaner way to do this. JRA.
538	**/
539
540	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
541	void const src, size_t srclen, void dst,
542	size_t *converted_size, bool allow_bad_conv)
543	{
544	size_t i_len, o_len, destlen = (srclen * 3) / 2;
545	size_t retval;
546	const char inbuf = (const char )src;
547	char outbuf = NULL, ob = NULL;
548	smb_iconv_t descriptor;
549	void dest = (void )dst;
550
551	*dest = NULL;
552
553	if (!converted_size) {
554	errno = EINVAL;
555	return false;
556	}
557
558	if (src == NULL \|\| srclen == (size_t)-1) {
559	errno = EINVAL;
560	return false;
561	}
562	if (srclen == 0) {
563	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
564	if (ob == NULL) {
565	errno = ENOMEM;
566	return false;
567	}
568	*dest = ob;
569	*converted_size = 0;
570	return true;
571	}
572
573	lazy_initialize_conv();
574
575	descriptor = conv_handles[from][to];
576
577	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
578	if (!conv_silent)
579	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
580	errno = EOPNOTSUPP;
581	return false;
582	}
583
584	convert:
585
586	/* +2 is for ucs2 null termination. */
587	if ((destlen*2)+2 < destlen) {
588	/* wrapped ! abort. */
589	if (!conv_silent)
590	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
591	if (!ctx)
592	SAFE_FREE(outbuf);
593	errno = EOPNOTSUPP;
594	return false;
595	} else {
596	destlen = destlen * 2;
597	}
598
599	/* +2 is for ucs2 null termination. */
600	if (ctx) {
601	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
602	} else {
603	ob = (char *)SMB_REALLOC(ob, destlen + 2);
604	}
605
606	if (!ob) {
607	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
608	errno = ENOMEM;
609	return false;
610	}
611	outbuf = ob;
612	i_len = srclen;
613	o_len = destlen;
614
615	again:
616
617	retval = smb_iconv(descriptor,
618	&inbuf, &i_len,
619	&outbuf, &o_len);
620	if(retval == (size_t)-1) {
621	const char *reason="unknown error";
622	switch(errno) {
623	case EINVAL:
624	reason="Incomplete multibyte sequence";
625	if (!conv_silent)
626	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
627	if (allow_bad_conv)
628	goto use_as_is;
629	break;
630	case E2BIG:
631	goto convert;
632	case EILSEQ:
633	reason="Illegal multibyte sequence";
634	if (!conv_silent)
635	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
636	if (allow_bad_conv)
637	goto use_as_is;
638	break;
639	}
640	if (!conv_silent)
641	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
642	/* smb_panic(reason); */
643	if (ctx) {
644	TALLOC_FREE(ob);
645	} else {
646	SAFE_FREE(ob);
647	}
648	return false;
649	}
650
651	out:
652
653	destlen = destlen - o_len;
654	/* Don't shrink unless we're reclaiming a lot of
655	* space. This is in the hot codepath and these
656	* reallocs cost. JRA.
657	*/
658	if (o_len > 1024) {
659	/* We're shrinking here so we know the +2 is safe from wrap. */
660	if (ctx) {
661	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
662	} else {
663	ob = (char *)SMB_REALLOC(ob,destlen + 2);
664	}
665	}
666
667	if (destlen && !ob) {
668	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
669	errno = ENOMEM;
670	return false;
671	}
672
673	*dest = ob;
674
675	/* Must ucs2 null terminate in the extra space we allocated. */
676	ob[destlen] = '\0';
677	ob[destlen+1] = '\0';
678
679	*converted_size = destlen;
680	return true;
681
682	use_as_is:
683
684	/*
685	* Conversion not supported. This is actually an error, but there are so
686	* many misconfigured iconv systems and smb.conf's out there we can't just
687	* fail. Do a very bad conversion instead.... JRA.
688	*/
689
690	{
691	if (o_len == 0 \|\| i_len == 0)
692	goto out;
693
694	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
695	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
696	/* Can't convert from utf16 any endian to multibyte.
697	Replace with the default fail char.
698	*/
699
700	if (i_len < 2)
701	goto out;
702
703	if (i_len >= 2) {
704	*outbuf = lp_failed_convert_char();
705
706	outbuf++;
707	o_len--;
708
709	inbuf += 2;
710	i_len -= 2;
711	}
712
713	if (o_len == 0 \|\| i_len == 0)
714	goto out;
715
716	/* Keep trying with the next char... */
717	goto again;
718
719	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
720	/* Can't convert to UTF16LE - just widen by adding the
721	default fail char then zero.
722	*/
723	if (o_len < 2)
724	goto out;
725
726	outbuf[0] = lp_failed_convert_char();
727	outbuf[1] = '\0';
728
729	inbuf++;
730	i_len--;
731
732	outbuf += 2;
733	o_len -= 2;
734
735	if (o_len == 0 \|\| i_len == 0)
736	goto out;
737
738	/* Keep trying with the next char... */
739	goto again;
740
741	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
742	to != CH_UTF16LE && to != CH_UTF16BE) {
743	/* Failed multibyte to multibyte. Just copy the default fail char and
744	try again. */
745	outbuf[0] = lp_failed_convert_char();
746
747	inbuf++;
748	i_len--;
749
750	outbuf++;
751	o_len--;
752
753	if (o_len == 0 \|\| i_len == 0)
754	goto out;
755
756	/* Keep trying with the next char... */
757	goto again;
758
759	} else {
760	/* Keep compiler happy.... */
761	goto out;
762	}
763	}
764	}
765
766	/**
767	* Convert between character sets, allocating a new buffer using talloc for the result.
768	*
769	* @param srclen length of source buffer.
770	* @param dest always set at least to NULL
771	* @parm converted_size set to the number of bytes occupied by the string in
772	* the destination on success.
773	* @note -1 is not accepted for srclen.
774	*
775	* @return true if new buffer was correctly allocated, and string was
776	* converted.
777	*/
778	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
779	void const src, size_t srclen, void dst,
780	size_t *converted_size, bool allow_bad_conv)
781	{
782	void dest = (void )dst;
783
784	*dest = NULL;
785	return convert_string_allocate(ctx, from, to, src, srclen, dest,
786	converted_size, allow_bad_conv);
787	}
788
789	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
790	{
791	size_t size;
792	smb_ucs2_t *buffer;
793
794	if (!push_ucs2_allocate(&buffer, src, &size)) {
795	return (size_t)-1;
796	}
797
798	if (!strupper_w(buffer) && (dest == src)) {
799	free(buffer);
800	return srclen;
801	}
802
803	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
804	free(buffer);
805	return size;
806	}
807
808	/**
809	strdup() a unix string to upper case.
810	**/
811
812	char strdup_upper(const char s)
813	{
814	char *out_buffer = SMB_STRDUP(s);
815	const unsigned char p = (const unsigned char )s;
816	unsigned char q = (unsigned char )out_buffer;
817
818	if (!q) {
819	return NULL;
820	}
821
822	/* this is quite a common operation, so we want it to be
823	fast. We optimise for the ascii case, knowing that all our
824	supported multi-byte character sets are ascii-compatible
825	(ie. they match for the first 128 chars) */
826
827	while (*p) {
828	if (*p & 0x80)
829	break;
830	q++ = toupper_ascii_fast(p);
831	p++;
832	}
833
834	if (*p) {
835	/* MB case. */
836	size_t converted_size, converted_size2;
837	smb_ucs2_t *buffer = NULL;
838
839	SAFE_FREE(out_buffer);
840	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
841	strlen(s) + 1,
842	(void *)(void )&buffer,
843	&converted_size, True))
844	{
845	return NULL;
846	}
847
848	strupper_w(buffer);
849
850	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
851	converted_size,
852	(void *)(void )&out_buffer,
853	&converted_size2, True))
854	{
855	TALLOC_FREE(buffer);
856	return NULL;
857	}
858
859	/* Don't need the intermediate buffer
860	* anymore.
861	*/
862	TALLOC_FREE(buffer);
863	}
864
865	return out_buffer;
866	}
867
868	/**
869	talloc_strdup() a unix string to upper case.
870	**/
871
872	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
873	{
874	char *out_buffer = talloc_strdup(ctx,s);
875	const unsigned char p = (const unsigned char )s;
876	unsigned char q = (unsigned char )out_buffer;
877
878	if (!q) {
879	return NULL;
880	}
881
882	/* this is quite a common operation, so we want it to be
883	fast. We optimise for the ascii case, knowing that all our
884	supported multi-byte character sets are ascii-compatible
885	(ie. they match for the first 128 chars) */
886
887	while (*p) {
888	if (*p & 0x80)
889	break;
890	q++ = toupper_ascii_fast(p);
891	p++;
892	}
893
894	if (*p) {
895	/* MB case. */
896	size_t converted_size, converted_size2;
897	smb_ucs2_t *ubuf = NULL;
898
899	/* We're not using the ascii buffer above. */
900	TALLOC_FREE(out_buffer);
901
902	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
903	strlen(s)+1, (void *)&ubuf,
904	&converted_size, True))
905	{
906	return NULL;
907	}
908
909	strupper_w(ubuf);
910
911	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
912	converted_size, (void *)&out_buffer,
913	&converted_size2, True))
914	{
915	TALLOC_FREE(ubuf);
916	return NULL;
917	}
918
919	/* Don't need the intermediate buffer
920	* anymore.
921	*/
922	TALLOC_FREE(ubuf);
923	}
924
925	return out_buffer;
926	}
927
928	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
929	{
930	size_t size;
931	smb_ucs2_t *buffer = NULL;
932
933	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
934	(void *)(void )&buffer, &size,
935	True))
936	{
937	smb_panic("failed to create UCS2 buffer");
938	}
939	if (!strlower_w(buffer) && (dest == src)) {
940	SAFE_FREE(buffer);
941	return srclen;
942	}
943	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
944	SAFE_FREE(buffer);
945	return size;
946	}
947
948	/**
949	strdup() a unix string to lower case.
950	**/
951
952	char strdup_lower(const char s)
953	{
954	size_t converted_size;
955	smb_ucs2_t *buffer = NULL;
956	char *out_buffer;
957
958	if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
959	return NULL;
960	}
961
962	strlower_w(buffer);
963
964	if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
965	SAFE_FREE(buffer);
966	return NULL;
967	}
968
969	SAFE_FREE(buffer);
970
971	return out_buffer;
972	}
973
974	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
975	{
976	size_t converted_size;
977	smb_ucs2_t *buffer = NULL;
978	char *out_buffer;
979
980	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
981	return NULL;
982	}
983
984	strlower_w(buffer);
985
986	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
987	TALLOC_FREE(buffer);
988	return NULL;
989	}
990
991	TALLOC_FREE(buffer);
992
993	return out_buffer;
994	}
995
996
997	size_t ucs2_align(const void base_ptr, const void p, int flags)
998	{
999	if (flags & (STR_NOALIGN\|STR_ASCII))
1000	return 0;
1001	return PTR_DIFF(p, base_ptr) & 1;
1002	}
1003
1004
1005	/**
1006	* Copy a string from a char* unix src to a dos codepage string destination.
1007	*
1008	* @return the number of bytes occupied by the string in the destination.
1009	*
1010	* @param flags can include
1011	* <dl>
1012	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1013	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1014	* </dl>
1015	*
1016	* @param dest_len the maximum length in bytes allowed in the
1017	* destination.
1018	**/
1019	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1020	{
1021	size_t src_len = strlen(src);
1022	char *tmpbuf = NULL;
1023	size_t ret;
1024
1025	/* No longer allow a length of -1. */
1026	if (dest_len == (size_t)-1) {
1027	smb_panic("push_ascii - dest_len == -1");
1028	}
1029
1030	if (flags & STR_UPPER) {
1031	tmpbuf = SMB_STRDUP(src);
1032	if (!tmpbuf) {
1033	smb_panic("malloc fail");
1034	}
1035	strupper_m(tmpbuf);
1036	src = tmpbuf;
1037	}
1038
1039	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1040	src_len++;
1041	}
1042
1043	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1044	if (ret == (size_t)-1 &&
1045	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1046	&& dest_len > 0) {
1047	((char *)dest)[0] = '\0';
1048	}
1049	SAFE_FREE(tmpbuf);
1050	return ret;
1051	}
1052
1053	size_t push_ascii_fstring(void dest, const char src)
1054	{
1055	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1056	}
1057
1058	/********************************************************************
1059	Push an nstring - ensure null terminated. Written by
1060	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1061	********************************************************************/
1062
1063	size_t push_ascii_nstring(void dest, const char src)
1064	{
1065	size_t i, buffer_len, dest_len;
1066	smb_ucs2_t *buffer;
1067
1068	conv_silent = True;
1069	if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1070	smb_panic("failed to create UCS2 buffer");
1071	}
1072
1073	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1074	buffer_len /= sizeof(smb_ucs2_t);
1075
1076	dest_len = 0;
1077	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1078	unsigned char mb[10];
1079	/* Convert one smb_ucs2_t character at a time. */
1080	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1081	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1082	memcpy((char *)dest + dest_len, mb, mb_len);
1083	dest_len += mb_len;
1084	} else {
1085	errno = E2BIG;
1086	break;
1087	}
1088	}
1089	((char *)dest)[dest_len] = '\0';
1090
1091	SAFE_FREE(buffer);
1092	conv_silent = False;
1093	return dest_len;
1094	}
1095
1096	/********************************************************************
1097	Push and malloc an ascii string. src and dest null terminated.
1098	********************************************************************/
1099
1100	bool push_ascii_allocate(char *dest, const char src, size_t *converted_size)
1101	{
1102	size_t src_len = strlen(src)+1;
1103
1104	*dest = NULL;
1105	return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1106	(void **)dest, converted_size, True);
1107	}
1108
1109	/**
1110	* Copy a string from a dos codepage source to a unix char* destination.
1111	*
1112	* The resulting string in "dest" is always null terminated.
1113	*
1114	* @param flags can have:
1115	* <dl>
1116	* <dt>STR_TERMINATE</dt>
1117	* <dd>STR_TERMINATE means the string in @p src
1118	* is null terminated, and src_len is ignored.</dd>
1119	* </dl>
1120	*
1121	* @param src_len is the length of the source area in bytes.
1122	* @returns the number of bytes occupied by the string in @p src.
1123	**/
1124	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1125	{
1126	size_t ret;
1127
1128	if (dest_len == (size_t)-1) {
1129	/* No longer allow dest_len of -1. */
1130	smb_panic("pull_ascii - invalid dest_len of -1");
1131	}
1132
1133	if (flags & STR_TERMINATE) {
1134	if (src_len == (size_t)-1) {
1135	src_len = strlen((const char *)src) + 1;
1136	} else {
1137	size_t len = strnlen((const char *)src, src_len);
1138	if (len < src_len)
1139	len++;
1140	src_len = len;
1141	}
1142	}
1143
1144	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1145	if (ret == (size_t)-1) {
1146	ret = 0;
1147	dest_len = 0;
1148	}
1149
1150	if (dest_len && ret) {
1151	/* Did we already process the terminating zero ? */
1152	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1153	dest[MIN(ret, dest_len-1)] = 0;
1154	}
1155	} else {
1156	dest[0] = 0;
1157	}
1158
1159	return src_len;
1160	}
1161
1162	/**
1163	* Copy a string from a dos codepage source to a unix char* destination.
1164	Talloc version.
1165	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1166	needs fixing. JRA).
1167	*
1168	* The resulting string in "dest" is always null terminated.
1169	*
1170	* @param flags can have:
1171	* <dl>
1172	* <dt>STR_TERMINATE</dt>
1173	* <dd>STR_TERMINATE means the string in @p src
1174	* is null terminated, and src_len is ignored.</dd>
1175	* </dl>
1176	*
1177	* @param src_len is the length of the source area in bytes.
1178	* @returns the number of bytes occupied by the string in @p src.
1179	**/
1180
1181	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1182	char **ppdest,
1183	const void *src,
1184	size_t src_len,
1185	int flags)
1186	{
1187	char *dest = NULL;
1188	size_t dest_len;
1189
1190	#ifdef DEVELOPER
1191	/* Ensure we never use the braindead "malloc" varient. */
1192	if (ctx == NULL) {
1193	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1194	}
1195	#endif
1196
1197	*ppdest = NULL;
1198
1199	if (!src_len) {
1200	return 0;
1201	}
1202
1203	if (flags & STR_TERMINATE) {
1204	if (src_len == (size_t)-1) {
1205	src_len = strlen((const char *)src) + 1;
1206	} else {
1207	size_t len = strnlen((const char *)src, src_len);
1208	if (len < src_len)
1209	len++;
1210	src_len = len;
1211	}
1212	/* Ensure we don't use an insane length from the client. */
1213	if (src_len >= 1024*1024) {
1214	char *msg = talloc_asprintf(ctx,
1215	"Bad src length (%u) in "
1216	"pull_ascii_base_talloc",
1217	(unsigned int)src_len);
1218	smb_panic(msg);
1219	}
1220	} else {
1221	/* Can't have an unlimited length
1222	* non STR_TERMINATE'd.
1223	*/
1224	if (src_len == (size_t)-1) {
1225	errno = EINVAL;
1226	return 0;
1227	}
1228	}
1229
1230	/* src_len != -1 here. */
1231
1232	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1233	&dest_len, True)) {
1234	dest_len = 0;
1235	}
1236
1237	if (dest_len && dest) {
1238	/* Did we already process the terminating zero ? */
1239	if (dest[dest_len-1] != 0) {
1240	size_t size = talloc_get_size(dest);
1241	/* Have we got space to append the '\0' ? */
1242	if (size <= dest_len) {
1243	/* No, realloc. */
1244	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1245	dest_len+1);
1246	if (!dest) {
1247	/* talloc fail. */
1248	dest_len = (size_t)-1;
1249	return 0;
1250	}
1251	}
1252	/* Yay - space ! */
1253	dest[dest_len] = '\0';
1254	dest_len++;
1255	}
1256	} else if (dest) {
1257	dest[0] = 0;
1258	}
1259
1260	*ppdest = dest;
1261	return src_len;
1262	}
1263
1264	size_t pull_ascii_fstring(char dest, const void src)
1265	{
1266	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1267	}
1268
1269	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1270
1271	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1272	{
1273	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1274	}
1275
1276	/**
1277	* Copy a string from a char* src to a unicode destination.
1278	*
1279	* @returns the number of bytes occupied by the string in the destination.
1280	*
1281	* @param flags can have:
1282	*
1283	* <dl>
1284	* <dt>STR_TERMINATE <dd>means include the null termination.
1285	* <dt>STR_UPPER <dd>means uppercase in the destination.
1286	* <dt>STR_NOALIGN <dd>means don't do alignment.
1287	* </dl>
1288	*
1289	* @param dest_len is the maximum length allowed in the
1290	* destination.
1291	**/
1292
1293	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1294	{
1295	size_t len=0;
1296	size_t src_len;
1297	size_t ret;
1298
1299	if (dest_len == (size_t)-1) {
1300	/* No longer allow dest_len of -1. */
1301	smb_panic("push_ucs2 - invalid dest_len of -1");
1302	}
1303
1304	if (flags & STR_TERMINATE)
1305	src_len = (size_t)-1;
1306	else
1307	src_len = strlen(src);
1308
1309	if (ucs2_align(base_ptr, dest, flags)) {
1310	(char )dest = 0;
1311	dest = (void )((char )dest + 1);
1312	if (dest_len)
1313	dest_len--;
1314	len++;
1315	}
1316
1317	/* ucs2 is always a multiple of 2 bytes */
1318	dest_len &= ~1;
1319
1320	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1321	if (ret == (size_t)-1) {
1322	if ((flags & STR_TERMINATE) &&
1323	dest &&
1324	dest_len) {
1325	(char )dest = 0;
1326	}
1327	return len;
1328	}
1329
1330	len += ret;
1331
1332	if (flags & STR_UPPER) {
1333	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1334	size_t i;
1335
1336	/* We check for i < (ret / 2) below as the dest string isn't null
1337	terminated if STR_TERMINATE isn't set. */
1338
1339	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1340	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1341	if (v != dest_ucs2[i]) {
1342	dest_ucs2[i] = v;
1343	}
1344	}
1345	}
1346
1347	return len;
1348	}
1349
1350
1351	/**
1352	* Copy a string from a unix char* src to a UCS2 destination,
1353	* allocating a buffer using talloc().
1354	*
1355	* @param dest always set at least to NULL
1356	* @parm converted_size set to the number of bytes occupied by the string in
1357	* the destination on success.
1358	*
1359	* @return true if new buffer was correctly allocated, and string was
1360	* converted.
1361	**/
1362	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1363	size_t *converted_size)
1364	{
1365	size_t src_len = strlen(src)+1;
1366
1367	*dest = NULL;
1368	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1369	(void **)dest, converted_size, True);
1370	}
1371
1372
1373	/**
1374	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1375	*
1376	* @param dest always set at least to NULL
1377	* @parm converted_size set to the number of bytes occupied by the string in
1378	* the destination on success.
1379	*
1380	* @return true if new buffer was correctly allocated, and string was
1381	* converted.
1382	**/
1383
1384	bool push_ucs2_allocate(smb_ucs2_t *dest, const char src,
1385	size_t *converted_size)
1386	{
1387	size_t src_len = strlen(src)+1;
1388
1389	*dest = NULL;
1390	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1391	(void **)dest, converted_size, True);
1392	}
1393
1394	/**
1395	Copy a string from a char* src to a UTF-8 destination.
1396	Return the number of bytes occupied by the string in the destination
1397	Flags can have:
1398	STR_TERMINATE means include the null termination
1399	STR_UPPER means uppercase in the destination
1400	dest_len is the maximum length allowed in the destination. If dest_len
1401	is -1 then no maxiumum is used.
1402	**/
1403
1404	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1405	{
1406	size_t src_len = 0;
1407	size_t ret;
1408	char *tmpbuf = NULL;
1409
1410	if (dest_len == (size_t)-1) {
1411	/* No longer allow dest_len of -1. */
1412	smb_panic("push_utf8 - invalid dest_len of -1");
1413	}
1414
1415	if (flags & STR_UPPER) {
1416	tmpbuf = strdup_upper(src);
1417	if (!tmpbuf) {
1418	return (size_t)-1;
1419	}
1420	src = tmpbuf;
1421	src_len = strlen(src);
1422	}
1423
1424	src_len = strlen(src);
1425	if (flags & STR_TERMINATE) {
1426	src_len++;
1427	}
1428
1429	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1430	SAFE_FREE(tmpbuf);
1431	return ret;
1432	}
1433
1434	size_t push_utf8_fstring(void dest, const char src)
1435	{
1436	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1437	}
1438
1439	/**
1440	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1441	*
1442	* @param dest always set at least to NULL
1443	* @parm converted_size set to the number of bytes occupied by the string in
1444	* the destination on success.
1445	*
1446	* @return true if new buffer was correctly allocated, and string was
1447	* converted.
1448	**/
1449
1450	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1451	size_t *converted_size)
1452	{
1453	size_t src_len = strlen(src)+1;
1454
1455	*dest = NULL;
1456	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1457	(void**)dest, converted_size, True);
1458	}
1459
1460	/**
1461	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1462	*
1463	* @param dest always set at least to NULL
1464	* @parm converted_size set to the number of bytes occupied by the string in
1465	* the destination on success.
1466	*
1467	* @return true if new buffer was correctly allocated, and string was
1468	* converted.
1469	**/
1470
1471	bool push_utf8_allocate(char *dest, const char src, size_t *converted_size)
1472	{
1473	size_t src_len = strlen(src)+1;
1474
1475	*dest = NULL;
1476	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1477	(void **)dest, converted_size, True);
1478	}
1479
1480	/**
1481	Copy a string from a ucs2 source to a unix char* destination.
1482	Flags can have:
1483	STR_TERMINATE means the string in src is null terminated.
1484	STR_NOALIGN means don't try to align.
1485	if STR_TERMINATE is set then src_len is ignored if it is -1.
1486	src_len is the length of the source area in bytes
1487	Return the number of bytes occupied by the string in src.
1488	The resulting string in "dest" is always null terminated.
1489	**/
1490
1491	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1492	{
1493	size_t ret;
1494
1495	if (dest_len == (size_t)-1) {
1496	/* No longer allow dest_len of -1. */
1497	smb_panic("pull_ucs2 - invalid dest_len of -1");
1498	}
1499
1500	if (!src_len) {
1501	if (dest && dest_len > 0) {
1502	dest[0] = '\0';
1503	}
1504	return 0;
1505	}
1506
1507	if (ucs2_align(base_ptr, src, flags)) {
1508	src = (const void )((const char )src + 1);
1509	if (src_len != (size_t)-1)
1510	src_len--;
1511	}
1512
1513	if (flags & STR_TERMINATE) {
1514	/* src_len -1 is the default for null terminated strings. */
1515	if (src_len != (size_t)-1) {
1516	size_t len = strnlen_w((const smb_ucs2_t *)src,
1517	src_len/2);
1518	if (len < src_len/2)
1519	len++;
1520	src_len = len*2;
1521	}
1522	}
1523
1524	/* ucs2 is always a multiple of 2 bytes */
1525	if (src_len != (size_t)-1)
1526	src_len &= ~1;
1527
1528	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1529	if (ret == (size_t)-1) {
1530	ret = 0;
1531	dest_len = 0;
1532	}
1533
1534	if (src_len == (size_t)-1)
1535	src_len = ret*2;
1536
1537	if (dest_len && ret) {
1538	/* Did we already process the terminating zero ? */
1539	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1540	dest[MIN(ret, dest_len-1)] = 0;
1541	}
1542	} else {
1543	dest[0] = 0;
1544	}
1545
1546	return src_len;
1547	}
1548
1549	/**
1550	Copy a string from a ucs2 source to a unix char* destination.
1551	Talloc version with a base pointer.
1552	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1553	needs fixing. JRA).
1554	Flags can have:
1555	STR_TERMINATE means the string in src is null terminated.
1556	STR_NOALIGN means don't try to align.
1557	if STR_TERMINATE is set then src_len is ignored if it is -1.
1558	src_len is the length of the source area in bytes
1559	Return the number of bytes occupied by the string in src.
1560	The resulting string in "dest" is always null terminated.
1561	**/
1562
1563	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1564	const void *base_ptr,
1565	char **ppdest,
1566	const void *src,
1567	size_t src_len,
1568	int flags)
1569	{
1570	char *dest;
1571	size_t dest_len;
1572
1573	*ppdest = NULL;
1574
1575	#ifdef DEVELOPER
1576	/* Ensure we never use the braindead "malloc" varient. */
1577	if (ctx == NULL) {
1578	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1579	}
1580	#endif
1581
1582	if (!src_len) {
1583	return 0;
1584	}
1585
1586	if (ucs2_align(base_ptr, src, flags)) {
1587	src = (const void )((const char )src + 1);
1588	if (src_len != (size_t)-1)
1589	src_len--;
1590	}
1591
1592	if (flags & STR_TERMINATE) {
1593	/* src_len -1 is the default for null terminated strings. */
1594	if (src_len != (size_t)-1) {
1595	size_t len = strnlen_w((const smb_ucs2_t *)src,
1596	src_len/2);
1597	if (len < src_len/2)
1598	len++;
1599	src_len = len*2;
1600	} else {
1601	/*
1602	* src_len == -1 - alloc interface won't take this
1603	* so we must calculate.
1604	*/
1605	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1606	}
1607	/* Ensure we don't use an insane length from the client. */
1608	if (src_len >= 1024*1024) {
1609	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1610	}
1611	} else {
1612	/* Can't have an unlimited length
1613	* non STR_TERMINATE'd.
1614	*/
1615	if (src_len == (size_t)-1) {
1616	errno = EINVAL;
1617	return 0;
1618	}
1619	}
1620
1621	/* src_len != -1 here. */
1622
1623	/* ucs2 is always a multiple of 2 bytes */
1624	src_len &= ~1;
1625
1626	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1627	(void *)&dest, &dest_len, True)) {
1628	dest_len = 0;
1629	}
1630
1631	if (dest_len) {
1632	/* Did we already process the terminating zero ? */
1633	if (dest[dest_len-1] != 0) {
1634	size_t size = talloc_get_size(dest);
1635	/* Have we got space to append the '\0' ? */
1636	if (size <= dest_len) {
1637	/* No, realloc. */
1638	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1639	dest_len+1);
1640	if (!dest) {
1641	/* talloc fail. */
1642	dest_len = (size_t)-1;
1643	return 0;
1644	}
1645	}
1646	/* Yay - space ! */
1647	dest[dest_len] = '\0';
1648	dest_len++;
1649	}
1650	} else if (dest) {
1651	dest[0] = 0;
1652	}
1653
1654	*ppdest = dest;
1655	return src_len;
1656	}
1657
1658	size_t pull_ucs2_fstring(char dest, const void src)
1659	{
1660	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1661	}
1662
1663	/**
1664	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1665	*
1666	* @param dest always set at least to NULL
1667	* @parm converted_size set to the number of bytes occupied by the string in
1668	* the destination on success.
1669	*
1670	* @return true if new buffer was correctly allocated, and string was
1671	* converted.
1672	**/
1673
1674	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1675	size_t *converted_size)
1676	{
1677	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1678
1679	*dest = NULL;
1680	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1681	(void **)dest, converted_size, True);
1682	}
1683
1684	/**
1685	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1686	*
1687	* @param dest always set at least to NULL
1688	* @parm converted_size set to the number of bytes occupied by the string in
1689	* the destination on success.
1690	* @return true if new buffer was correctly allocated, and string was
1691	* converted.
1692	**/
1693
1694	bool pull_ucs2_allocate(char *dest, const smb_ucs2_t src,
1695	size_t *converted_size)
1696	{
1697	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1698
1699	*dest = NULL;
1700	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1701	(void **)dest, converted_size, True);
1702	}
1703
1704	/**
1705	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1706	*
1707	* @param dest always set at least to NULL
1708	* @parm converted_size set to the number of bytes occupied by the string in
1709	* the destination on success.
1710	*
1711	* @return true if new buffer was correctly allocated, and string was
1712	* converted.
1713	**/
1714
1715	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1716	size_t *converted_size)
1717	{
1718	size_t src_len = strlen(src)+1;
1719
1720	*dest = NULL;
1721	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1722	(void **)dest, converted_size, True);
1723	}
1724
1725	/**
1726	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1727	*
1728	* @param dest always set at least to NULL
1729	* @parm converted_size set to the number of bytes occupied by the string in
1730	* the destination on success.
1731	*
1732	* @return true if new buffer was correctly allocated, and string was
1733	* converted.
1734	**/
1735
1736	bool pull_utf8_allocate(char *dest, const char src, size_t *converted_size)
1737	{
1738	size_t src_len = strlen(src)+1;
1739
1740	*dest = NULL;
1741	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1742	(void **)dest, converted_size, True);
1743	}
1744
1745	/**
1746	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1747	*
1748	* @param dest always set at least to NULL
1749	* @parm converted_size set to the number of bytes occupied by the string in
1750	* the destination on success.
1751	*
1752	* @return true if new buffer was correctly allocated, and string was
1753	* converted.
1754	**/
1755
1756	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1757	size_t *converted_size)
1758	{
1759	size_t src_len = strlen(src)+1;
1760
1761	*dest = NULL;
1762	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1763	(void **)dest, converted_size, True);
1764	}
1765
1766	/**
1767	Copy a string from a char* src to a unicode or ascii
1768	dos codepage destination choosing unicode or ascii based on the
1769	flags in the SMB buffer starting at base_ptr.
1770	Return the number of bytes occupied by the string in the destination.
1771	flags can have:
1772	STR_TERMINATE means include the null termination.
1773	STR_UPPER means uppercase in the destination.
1774	STR_ASCII use ascii even with unicode packet.
1775	STR_NOALIGN means don't do alignment.
1776	dest_len is the maximum length allowed in the destination. If dest_len
1777	is -1 then no maxiumum is used.
1778	**/
1779
1780	size_t push_string_fn(const char *function, unsigned int line,
1781	const void *base_ptr, uint16 flags2,
1782	void dest, const char src,
1783	size_t dest_len, int flags)
1784	{
1785	#ifdef DEVELOPER
1786	/* We really need to zero fill here, not clobber
1787	* region, as we want to ensure that valgrind thinks
1788	* all of the outgoing buffer has been written to
1789	* so a send() or write() won't trap an error.
1790	* JRA.
1791	*/
1792	#if 0
1793	clobber_region(function, line, dest, dest_len);
1794	#else
1795	memset(dest, '\0', dest_len);
1796	#endif
1797	#endif
1798
1799	if (!(flags & STR_ASCII) && \
1800	((flags & STR_UNICODE \|\| \
1801	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1802	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1803	}
1804	return push_ascii(dest, src, dest_len, flags);
1805	}
1806
1807
1808	/**
1809	Copy a string from a unicode or ascii source (depending on
1810	the packet flags) to a char* destination.
1811	Flags can have:
1812	STR_TERMINATE means the string in src is null terminated.
1813	STR_UNICODE means to force as unicode.
1814	STR_ASCII use ascii even with unicode packet.
1815	STR_NOALIGN means don't do alignment.
1816	if STR_TERMINATE is set then src_len is ignored is it is -1
1817	src_len is the length of the source area in bytes.
1818	Return the number of bytes occupied by the string in src.
1819	The resulting string in "dest" is always null terminated.
1820	**/
1821
1822	size_t pull_string_fn(const char *function,
1823	unsigned int line,
1824	const void *base_ptr,
1825	uint16 smb_flags2,
1826	char *dest,
1827	const void *src,
1828	size_t dest_len,
1829	size_t src_len,
1830	int flags)
1831	{
1832	#ifdef DEVELOPER
1833	clobber_region(function, line, dest, dest_len);
1834	#endif
1835
1836	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1837	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1838	"UNICODE defined");
1839	}
1840
1841	if (!(flags & STR_ASCII) && \
1842	((flags & STR_UNICODE \|\| \
1843	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1844	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1845	}
1846	return pull_ascii(dest, src, dest_len, src_len, flags);
1847	}
1848
1849	/**
1850	Copy a string from a unicode or ascii source (depending on
1851	the packet flags) to a char* destination.
1852	Variant that uses talloc.
1853	Flags can have:
1854	STR_TERMINATE means the string in src is null terminated.
1855	STR_UNICODE means to force as unicode.
1856	STR_ASCII use ascii even with unicode packet.
1857	STR_NOALIGN means don't do alignment.
1858	if STR_TERMINATE is set then src_len is ignored is it is -1
1859	src_len is the length of the source area in bytes.
1860	Return the number of bytes occupied by the string in src.
1861	The resulting string in "dest" is always null terminated.
1862	**/
1863
1864	size_t pull_string_talloc_fn(const char *function,
1865	unsigned int line,
1866	TALLOC_CTX *ctx,
1867	const void *base_ptr,
1868	uint16 smb_flags2,
1869	char **ppdest,
1870	const void *src,
1871	size_t src_len,
1872	int flags)
1873	{
1874	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1875	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1876	"UNICODE defined");
1877	}
1878
1879	if (!(flags & STR_ASCII) && \
1880	((flags & STR_UNICODE \|\| \
1881	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1882	return pull_ucs2_base_talloc(ctx,
1883	base_ptr,
1884	ppdest,
1885	src,
1886	src_len,
1887	flags);
1888	}
1889	return pull_ascii_base_talloc(ctx,
1890	ppdest,
1891	src,
1892	src_len,
1893	flags);
1894	}
1895
1896
1897	size_t align_string(const void base_ptr, const char p, int flags)
1898	{
1899	if (!(flags & STR_ASCII) && \
1900	((flags & STR_UNICODE \|\| \
1901	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1902	return ucs2_align(base_ptr, p, flags);
1903	}
1904	return 0;
1905	}
1906
1907	/*
1908	Return the unicode codepoint for the next multi-byte CH_UNIX character
1909	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1910
1911	Also return the number of bytes consumed (which tells the caller
1912	how many bytes to skip to get to the next CH_UNIX character).
1913
1914	Return INVALID_CODEPOINT if the next character cannot be converted.
1915	*/
1916
1917	codepoint_t next_codepoint(const char str, size_t size)
1918	{
1919	/* It cannot occupy more than 4 bytes in UTF16 format */
1920	uint8_t buf[4];
1921	smb_iconv_t descriptor;
1922	#ifdef __OS2__
1923	size_t ilen_max;
1924	size_t olen_orig;
1925	const char *inbuf;
1926	#endif
1927	size_t ilen_orig;
1928	size_t ilen;
1929	size_t olen;
1930
1931	char *outbuf;
1932
1933	#ifdef __OS2__
1934	*size = 1;
1935	#endif
1936
1937	if ((str[0] & 0x80) == 0) {
1938	#ifndef __OS2__
1939	*size = 1;
1940	#endif
1941	return (codepoint_t)str[0];
1942	}
1943
1944	lazy_initialize_conv();
1945
1946	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1947	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1948	#ifndef __OS2__
1949	*size = 1;
1950	#endif
1951	return INVALID_CODEPOINT;
1952	}
1953	#ifdef __OS2__
1954	/* We assume that no multi-byte character can take
1955	more than 5 bytes. This is OK as we only
1956	support codepoints up to 1M */
1957
1958	ilen_max = strnlen( str, 5 );
1959	#else
1960	*size = 1;
1961	#endif
1962	ilen_orig = 1;
1963	olen_orig = 2;
1964	while( 1 )
1965	{
1966	ilen = ilen_orig;
1967	olen = olen_orig;
1968	inbuf = str;
1969	outbuf = ( char * )buf;
1970	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1971	break;
1972
1973	switch( errno )
1974	{
1975	case E2BIG :
1976	if( olen_orig == 2 )
1977	olen_orig = 4;
1978	else
1979	return INVALID_CODEPOINT;
1980	break;
1981
1982
1983	case EINVAL :
1984	#ifndef __OS2__
1985	/* We assume that no multi-byte character can take
1986	more than 5 bytes. This is OK as we only
1987	support codepoints up to 1M */
1988	if( ilen_orig < 5 )
1989	#else
1990	if( ilen_orig < ilen_max )
1991	#endif
1992	ilen_orig++;
1993	else
1994	return INVALID_CODEPOINT;
1995	break;
1996
1997	case EILSEQ :
1998	default :
1999	return INVALID_CODEPOINT;
2000	}
2001	}
2002
2003	olen = olen_orig - olen;
2004
2005	*size = ilen_orig - ilen;
2006
2007	if (olen == 2) {
2008	/* 2 byte, UTF16-LE encoded value. */
2009	return (codepoint_t)SVAL(buf, 0);
2010	}
2011	if (olen == 4) {
2012	/* Decode a 4 byte UTF16-LE character manually.
2013	See RFC2871 for the encoding machanism.
2014	*/
2015	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2016	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2017
2018	return (codepoint_t)0x10000 +
2019	(w1 << 10) + w2;
2020	}
2021
2022	/* no other length is valid */
2023	return INVALID_CODEPOINT;
2024	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: