Context Navigation

source: branches/samba-3.2.x/source/lib/charcnv.c@ 237

Visit:

Last change on this file since 237 was 237, checked in by Herwig Bauernfeind, 16 years ago
Fix for Ticket #85 (by diver) in 3.2 branch
File size: 50.0 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	#ifndef __OS2__
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	#else
62	if (ch == CH_UTF16LE) ret = "IBM-1200";
63	else if (ch == CH_UTF16BE) ret = "IBM-1200";
64	#endif
65	else if (ch == CH_UNIX) ret = lp_unix_charset();
66	else if (ch == CH_DOS) ret = lp_dos_charset();
67	else if (ch == CH_DISPLAY) ret = lp_display_charset();
68	else if (ch == CH_UTF8) ret = "UTF8";
69
70	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71	if (ret && !strcmp(ret, "LOCALE")) {
72	const char *ln = NULL;
73
74	#ifdef HAVE_SETLOCALE
75	setlocale(LC_ALL, "");
76	#endif
77	ln = nl_langinfo(CODESET);
78	if (ln) {
79	/* Check whether the charset name is supported
80	by iconv */
81	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82	if (handle == (smb_iconv_t) -1) {
83	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84	ln = NULL;
85	} else {
86	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87	smb_iconv_close(handle);
88	}
89	}
90	ret = ln;
91	}
92	#endif
93
94	if (!ret \|\| !*ret) ret = "ASCII";
95	DEBUG(10, ("codepage: %s\n",ret));
96	return ret;
97	}
98
99	void lazy_initialize_conv(void)
100	{
101	if (!initialized) {
102	load_case_tables();
103	init_iconv();
104	initialized = true;
105	}
106	}
107
108	/**
109	* Destroy global objects allocated by init_iconv()
110	**/
111	void gfree_charcnv(void)
112	{
113	int c1, c2;
114
115	for (c1=0;c1<NUM_CHARSETS;c1++) {
116	for (c2=0;c2<NUM_CHARSETS;c2++) {
117	if ( conv_handles[c1][c2] ) {
118	smb_iconv_close( conv_handles[c1][c2] );
119	conv_handles[c1][c2] = 0;
120	}
121	}
122	}
123	initialized = false;
124	}
125
126	/**
127	* Initialize iconv conversion descriptors.
128	*
129	* This is called the first time it is needed, and also called again
130	* every time the configuration is reloaded, because the charset or
131	* codepage might have changed.
132	**/
133	void init_iconv(void)
134	{
135	int c1, c2;
136	bool did_reload = False;
137
138	/* so that charset_name() works we need to get the UNIX<->UCS2 going
139	first */
140	if (!conv_handles[CH_UNIX][CH_UTF16LE])
141	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
142
143	if (!conv_handles[CH_UTF16LE][CH_UNIX])
144	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
145
146	for (c1=0;c1<NUM_CHARSETS;c1++) {
147	for (c2=0;c2<NUM_CHARSETS;c2++) {
148	const char *n1 = charset_name((charset_t)c1);
149	const char *n2 = charset_name((charset_t)c2);
150	if (conv_handles[c1][c2] &&
151	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
152	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
153	continue;
154
155	did_reload = True;
156
157	if (conv_handles[c1][c2])
158	smb_iconv_close(conv_handles[c1][c2]);
159
160	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
161	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
162	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
163	charset_name((charset_t)c1), charset_name((charset_t)c2)));
164	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
165	n1 = "ASCII";
166	}
167	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
168	n2 = "ASCII";
169	}
170	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
171	n1, n2 ));
172	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
173	if (!conv_handles[c1][c2]) {
174	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
175	smb_panic("init_iconv: conv_handle initialization failed");
176	}
177	}
178	}
179	}
180
181	if (did_reload) {
182	/* XXX: Does this really get called every time the dos
183	* codepage changes? */
184	/* XXX: Is the did_reload test too strict? */
185	conv_silent = True;
186	init_valid_table();
187	conv_silent = False;
188	}
189	}
190
191	/**
192	* Convert string from one encoding to another, making error checking etc
193	* Slow path version - uses (slow) iconv.
194	*
195	* @param src pointer to source string (multibyte or singlebyte)
196	* @param srclen length of the source string in bytes
197	* @param dest pointer to destination string (multibyte or singlebyte)
198	* @param destlen maximal length allowed for string
199	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
200	* @returns the number of bytes occupied in the destination
201	*
202	* Ensure the srclen contains the terminating zero.
203	*
204	**/
205
206	static size_t convert_string_internal(charset_t from, charset_t to,
207	void const *src, size_t srclen,
208	void *dest, size_t destlen, bool allow_bad_conv)
209	{
210	size_t i_len, o_len;
211	size_t retval;
212	const char* inbuf = (const char*)src;
213	char* outbuf = (char*)dest;
214	smb_iconv_t descriptor;
215
216	lazy_initialize_conv();
217
218	descriptor = conv_handles[from][to];
219
220	if (srclen == (size_t)-1) {
221	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
222	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
223	} else {
224	srclen = strlen((const char *)src)+1;
225	}
226	}
227
228
229	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
230	if (!conv_silent)
231	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
232	return (size_t)-1;
233	}
234
235	i_len=srclen;
236	o_len=destlen;
237
238	again:
239
240	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
241	if(retval==(size_t)-1) {
242	const char *reason="unknown error";
243	switch(errno) {
244	case EINVAL:
245	reason="Incomplete multibyte sequence";
246	if (!conv_silent)
247	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
248	if (allow_bad_conv)
249	goto use_as_is;
250	return (size_t)-1;
251	case E2BIG:
252	reason="No more room";
253	if (!conv_silent) {
254	if (from == CH_UNIX) {
255	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
256	charset_name(from), charset_name(to),
257	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
258	} else {
259	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
260	charset_name(from), charset_name(to),
261	(unsigned int)srclen, (unsigned int)destlen));
262	}
263	}
264	break;
265	case EILSEQ:
266	reason="Illegal multibyte sequence";
267	if (!conv_silent)
268	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
269	if (allow_bad_conv)
270	goto use_as_is;
271
272	return (size_t)-1;
273	default:
274	if (!conv_silent)
275	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
276	return (size_t)-1;
277	}
278	/* smb_panic(reason); */
279	}
280	return destlen-o_len;
281
282	use_as_is:
283
284	/*
285	* Conversion not supported. This is actually an error, but there are so
286	* many misconfigured iconv systems and smb.conf's out there we can't just
287	* fail. Do a very bad conversion instead.... JRA.
288	*/
289
290	{
291	if (o_len == 0 \|\| i_len == 0)
292	return destlen - o_len;
293
294	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
295	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
296	/* Can't convert from utf16 any endian to multibyte.
297	Replace with the default fail char.
298	*/
299	if (i_len < 2)
300	return destlen - o_len;
301	if (i_len >= 2) {
302	*outbuf = lp_failed_convert_char();
303
304	outbuf++;
305	o_len--;
306
307	inbuf += 2;
308	i_len -= 2;
309	}
310
311	if (o_len == 0 \|\| i_len == 0)
312	return destlen - o_len;
313
314	/* Keep trying with the next char... */
315	goto again;
316
317	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
318	/* Can't convert to UTF16LE - just widen by adding the
319	default fail char then zero.
320	*/
321	if (o_len < 2)
322	return destlen - o_len;
323
324	outbuf[0] = lp_failed_convert_char();
325	outbuf[1] = '\0';
326
327	inbuf++;
328	i_len--;
329
330	outbuf += 2;
331	o_len -= 2;
332
333	if (o_len == 0 \|\| i_len == 0)
334	return destlen - o_len;
335
336	/* Keep trying with the next char... */
337	goto again;
338
339	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
340	to != CH_UTF16LE && to != CH_UTF16BE) {
341	/* Failed multibyte to multibyte. Just copy the default fail char and
342	try again. */
343	outbuf[0] = lp_failed_convert_char();
344
345	inbuf++;
346	i_len--;
347
348	outbuf++;
349	o_len--;
350
351	if (o_len == 0 \|\| i_len == 0)
352	return destlen - o_len;
353
354	/* Keep trying with the next char... */
355	goto again;
356
357	} else {
358	/* Keep compiler happy.... */
359	return destlen - o_len;
360	}
361	}
362	}
363
364	/**
365	* Convert string from one encoding to another, making error checking etc
366	* Fast path version - handles ASCII first.
367	*
368	* @param src pointer to source string (multibyte or singlebyte)
369	* @param srclen length of the source string in bytes, or -1 for nul terminated.
370	* @param dest pointer to destination string (multibyte or singlebyte)
371	* @param destlen maximal length allowed for string - NEVER -1.
372	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
373	* @returns the number of bytes occupied in the destination
374	*
375	* Ensure the srclen contains the terminating zero.
376	*
377	* This function has been hand-tuned to provide a fast path.
378	* Don't change unless you really know what you are doing. JRA.
379	**/
380
381	size_t convert_string(charset_t from, charset_t to,
382	void const *src, size_t srclen,
383	void *dest, size_t destlen, bool allow_bad_conv)
384	{
385	/*
386	* NB. We deliberately don't do a strlen here if srclen == -1.
387	* This is very expensive over millions of calls and is taken
388	* care of in the slow path in convert_string_internal. JRA.
389	*/
390
391	#ifdef DEVELOPER
392	SMB_ASSERT(destlen != (size_t)-1);
393	#endif
394
395	if (srclen == 0)
396	return 0;
397
398	// DEBUG(10, ("convert_string: 1"));
399
400	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
401	const unsigned char p = (const unsigned char )src;
402	unsigned char q = (unsigned char )dest;
403	size_t slen = srclen;
404	size_t dlen = destlen;
405	unsigned char lastp = '\0';
406	size_t retval = 0;
407
408	// DEBUG(10, ("convert_string: 2"));
409
410	/* If all characters are ascii, fast path here. */
411	while (slen && dlen) {
412	if ((lastp = *p) <= 0x7f) {
413	q++ = p++;
414	if (slen != (size_t)-1) {
415	slen--;
416	}
417	dlen--;
418	retval++;
419	if (!lastp)
420	break;
421	} else {
422	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
423	goto general_case;
424	#else
425	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
426	if (ret == (size_t)-1) {
427	return ret;
428	}
429	return retval + ret;
430	#endif
431	}
432	}
433	if (!dlen) {
434	/* Even if we fast path we should note if we ran out of room. */
435	if (((slen != (size_t)-1) && slen) \|\|
436	((slen == (size_t)-1) && lastp)) {
437	errno = E2BIG;
438	}
439	}
440	return retval;
441	// DEBUG(10, ("convert_string: 3"));
442
443	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
444	const unsigned char p = (const unsigned char )src;
445	unsigned char q = (unsigned char )dest;
446	size_t retval = 0;
447	size_t slen = srclen;
448	size_t dlen = destlen;
449	unsigned char lastp = '\0';
450
451	/* If all characters are ascii, fast path here. */
452	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
453	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
454	q++ = p;
455	if (slen != (size_t)-1) {
456	slen -= 2;
457	}
458	p += 2;
459	dlen--;
460	retval++;
461	if (!lastp)
462	break;
463	} else {
464	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
465	goto general_case;
466	#else
467	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
468	if (ret == (size_t)-1) {
469	return ret;
470	}
471	return retval + ret;
472	#endif
473	}
474	}
475	if (!dlen) {
476	/* Even if we fast path we should note if we ran out of room. */
477	if (((slen != (size_t)-1) && slen) \|\|
478	((slen == (size_t)-1) && lastp)) {
479	errno = E2BIG;
480	}
481	}
482	return retval;
483	// DEBUG(10, ("convert_string: 4"));
484
485	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
486	const unsigned char p = (const unsigned char )src;
487	unsigned char q = (unsigned char )dest;
488	size_t retval = 0;
489	size_t slen = srclen;
490	size_t dlen = destlen;
491	unsigned char lastp = '\0';
492
493	/* If all characters are ascii, fast path here. */
494	while (slen && (dlen >= 2)) {
495	if ((lastp = *p) <= 0x7F) {
496	q++ = p++;
497	*q++ = '\0';
498	if (slen != (size_t)-1) {
499	slen--;
500	}
501	dlen -= 2;
502	retval += 2;
503	if (!lastp)
504	break;
505	} else {
506	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
507	goto general_case;
508	#else
509	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
510	if (ret == (size_t)-1) {
511	return ret;
512	}
513	return retval + ret;
514	#endif
515	}
516	}
517	if (!dlen) {
518	/* Even if we fast path we should note if we ran out of room. */
519	if (((slen != (size_t)-1) && slen) \|\|
520	((slen == (size_t)-1) && lastp)) {
521	errno = E2BIG;
522	}
523	}
524	return retval;
525	}
526
527	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
528	general_case:
529	#endif
530	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
531	}
532
533	/**
534	* Convert between character sets, allocating a new buffer for the result.
535	*
536	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
537	* (this is a bad interface and needs fixing. JRA).
538	* @param srclen length of source buffer.
539	* @param dest always set at least to NULL
540	* @param converted_size set to the size of the allocated buffer on return
541	* true
542	* @note -1 is not accepted for srclen.
543	*
544	* @return True if new buffer was correctly allocated, and string was
545	* converted.
546	*
547	* Ensure the srclen contains the terminating zero.
548	*
549	* I hate the goto's in this function. It's embarressing.....
550	* There has to be a cleaner way to do this. JRA.
551	**/
552
553	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
554	void const src, size_t srclen, void dst,
555	size_t *converted_size, bool allow_bad_conv)
556	{
557	size_t i_len, o_len, destlen = (srclen * 3) / 2;
558	size_t retval;
559	const char inbuf = (const char )src;
560	char outbuf = NULL, ob = NULL;
561	smb_iconv_t descriptor;
562	void dest = (void )dst;
563
564	*dest = NULL;
565
566	if (!converted_size) {
567	errno = EINVAL;
568	return false;
569	}
570
571	if (src == NULL \|\| srclen == (size_t)-1) {
572	errno = EINVAL;
573	return false;
574	}
575	if (srclen == 0) {
576	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
577	if (ob == NULL) {
578	errno = ENOMEM;
579	return false;
580	}
581	*dest = ob;
582	*converted_size = 0;
583	return true;
584	}
585
586	lazy_initialize_conv();
587
588	descriptor = conv_handles[from][to];
589
590	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
591	if (!conv_silent)
592	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
593	errno = EOPNOTSUPP;
594	return false;
595	}
596
597	convert:
598
599	/* +2 is for ucs2 null termination. */
600	if ((destlen*2)+2 < destlen) {
601	/* wrapped ! abort. */
602	if (!conv_silent)
603	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
604	if (!ctx)
605	SAFE_FREE(outbuf);
606	errno = EOPNOTSUPP;
607	return false;
608	} else {
609	destlen = destlen * 2;
610	}
611
612	/* +2 is for ucs2 null termination. */
613	if (ctx) {
614	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
615	} else {
616	ob = (char *)SMB_REALLOC(ob, destlen + 2);
617	}
618
619	if (!ob) {
620	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
621	errno = ENOMEM;
622	return false;
623	}
624	outbuf = ob;
625	i_len = srclen;
626	o_len = destlen;
627
628	again:
629	DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
630	charset_name(from), charset_name(to),
631	(unsigned int)srclen, (unsigned int)destlen));
632
633	retval = smb_iconv(descriptor,
634	&inbuf, &i_len,
635	&outbuf, &o_len);
636	if(retval == (size_t)-1) {
637	const char *reason="unknown error";
638	switch(errno) {
639	case EINVAL:
640	reason="Incomplete multibyte sequence";
641	if (!conv_silent)
642	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
643	if (allow_bad_conv)
644	goto use_as_is;
645	break;
646	case E2BIG:
647	goto convert;
648	case EILSEQ:
649	reason="Illegal multibyte sequence";
650	if (!conv_silent)
651	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
652	if (allow_bad_conv)
653	goto use_as_is;
654	break;
655	}
656	if (!conv_silent)
657	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
658	/* smb_panic(reason); */
659	if (ctx) {
660	TALLOC_FREE(ob);
661	} else {
662	SAFE_FREE(ob);
663	}
664	return false;
665	}
666
667	out:
668
669	destlen = destlen - o_len;
670	/* Don't shrink unless we're reclaiming a lot of
671	* space. This is in the hot codepath and these
672	* reallocs cost. JRA.
673	*/
674	if (o_len > 1024) {
675	/* We're shrinking here so we know the +2 is safe from wrap. */
676	if (ctx) {
677	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
678	} else {
679	ob = (char *)SMB_REALLOC(ob,destlen + 2);
680	}
681	}
682
683	if (destlen && !ob) {
684	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
685	errno = ENOMEM;
686	return false;
687	}
688
689	*dest = ob;
690
691	/* Must ucs2 null terminate in the extra space we allocated. */
692	ob[destlen] = '\0';
693	ob[destlen+1] = '\0';
694
695	*converted_size = destlen;
696	return true;
697
698	use_as_is:
699
700	/*
701	* Conversion not supported. This is actually an error, but there are so
702	* many misconfigured iconv systems and smb.conf's out there we can't just
703	* fail. Do a very bad conversion instead.... JRA.
704	*/
705
706	{
707	if (o_len == 0 \|\| i_len == 0)
708	goto out;
709
710	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
711	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
712	/* Can't convert from utf16 any endian to multibyte.
713	Replace with the default fail char.
714	*/
715
716	if (i_len < 2)
717	goto out;
718
719	if (i_len >= 2) {
720	*outbuf = lp_failed_convert_char();
721
722	outbuf++;
723	o_len--;
724
725	inbuf += 2;
726	i_len -= 2;
727	}
728
729	if (o_len == 0 \|\| i_len == 0)
730	goto out;
731
732	/* Keep trying with the next char... */
733	goto again;
734
735	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
736	/* Can't convert to UTF16LE - just widen by adding the
737	default fail char then zero.
738	*/
739	if (o_len < 2)
740	goto out;
741
742	outbuf[0] = lp_failed_convert_char();
743	outbuf[1] = '\0';
744
745	inbuf++;
746	i_len--;
747
748	outbuf += 2;
749	o_len -= 2;
750
751	if (o_len == 0 \|\| i_len == 0)
752	goto out;
753
754	/* Keep trying with the next char... */
755	goto again;
756
757	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
758	to != CH_UTF16LE && to != CH_UTF16BE) {
759	/* Failed multibyte to multibyte. Just copy the default fail char and
760	try again. */
761	outbuf[0] = lp_failed_convert_char();
762
763	inbuf++;
764	i_len--;
765
766	outbuf++;
767	o_len--;
768
769	if (o_len == 0 \|\| i_len == 0)
770	goto out;
771
772	/* Keep trying with the next char... */
773	goto again;
774
775	} else {
776	/* Keep compiler happy.... */
777	goto out;
778	}
779	}
780	}
781
782	/**
783	* Convert between character sets, allocating a new buffer using talloc for the result.
784	*
785	* @param srclen length of source buffer.
786	* @param dest always set at least to NULL
787	* @note -1 is not accepted for srclen.
788	*
789	* @returns Size in bytes of the converted string; or -1 in case of error.
790	**/
791	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
792	void const src, size_t srclen, void dst,
793	bool allow_bad_conv)
794	{
795	void dest = (void )dst;
796	size_t dest_len;
797
798	*dest = NULL;
799	if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
800	&dest_len, allow_bad_conv))
801	return (size_t)-1;
802	if (*dest == NULL)
803	return (size_t)-1;
804	return dest_len;
805	}
806
807	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
808	{
809	size_t size;
810	smb_ucs2_t *buffer;
811
812	size = push_ucs2_allocate(&buffer, src);
813	if (size == (size_t)-1) {
814	return (size_t)-1;
815	}
816	if (!strupper_w(buffer) && (dest == src)) {
817	free(buffer);
818	return srclen;
819	}
820
821	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
822	free(buffer);
823	return size;
824	}
825
826	/**
827	strdup() a unix string to upper case.
828	**/
829
830	char strdup_upper(const char s)
831	{
832	char *out_buffer = SMB_STRDUP(s);
833	const unsigned char p = (const unsigned char )s;
834	unsigned char q = (unsigned char )out_buffer;
835
836	if (!q) {
837	return NULL;
838	}
839
840	/* this is quite a common operation, so we want it to be
841	fast. We optimise for the ascii case, knowing that all our
842	supported multi-byte character sets are ascii-compatible
843	(ie. they match for the first 128 chars) */
844
845	while (*p) {
846	if (*p & 0x80)
847	break;
848	q++ = toupper_ascii_fast(p);
849	p++;
850	}
851
852	if (*p) {
853	/* MB case. */
854	size_t size, size2;
855	smb_ucs2_t *buffer = NULL;
856
857	SAFE_FREE(out_buffer);
858	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
859	strlen(s) + 1, (void *)(void )&buffer, &size,
860	True)) {
861	return NULL;
862	}
863
864	strupper_w(buffer);
865
866	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
867	size, (void *)(void )&out_buffer, &size2, True)) {
868	TALLOC_FREE(buffer);
869	return NULL;
870	}
871
872	/* Don't need the intermediate buffer
873	* anymore.
874	*/
875	TALLOC_FREE(buffer);
876	}
877
878	return out_buffer;
879	}
880
881	/**
882	talloc_strdup() a unix string to upper case.
883	**/
884
885	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
886	{
887	char *out_buffer = talloc_strdup(ctx,s);
888	const unsigned char p = (const unsigned char )s;
889	unsigned char q = (unsigned char )out_buffer;
890
891	if (!q) {
892	return NULL;
893	}
894
895	/* this is quite a common operation, so we want it to be
896	fast. We optimise for the ascii case, knowing that all our
897	supported multi-byte character sets are ascii-compatible
898	(ie. they match for the first 128 chars) */
899
900	while (*p) {
901	if (*p & 0x80)
902	break;
903	q++ = toupper_ascii_fast(p);
904	p++;
905	}
906
907	if (*p) {
908	/* MB case. */
909	size_t size;
910	smb_ucs2_t *ubuf = NULL;
911
912	/* We're not using the ascii buffer above. */
913	TALLOC_FREE(out_buffer);
914
915	size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
916	s, strlen(s)+1,
917	(void *)&ubuf,
918	True);
919	if (size == (size_t)-1) {
920	return NULL;
921	}
922
923	strupper_w(ubuf);
924
925	size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
926	ubuf, size,
927	(void *)&out_buffer,
928	True);
929
930	/* Don't need the intermediate buffer
931	* anymore.
932	*/
933
934	TALLOC_FREE(ubuf);
935
936	if (size == (size_t)-1) {
937	return NULL;
938	}
939	}
940
941	return out_buffer;
942	}
943
944	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
945	{
946	size_t size;
947	smb_ucs2_t *buffer = NULL;
948
949	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
950	(void *)(void )&buffer, &size, True)) {
951	smb_panic("failed to create UCS2 buffer");
952	}
953	if (!strlower_w(buffer) && (dest == src)) {
954	SAFE_FREE(buffer);
955	return srclen;
956	}
957	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
958	SAFE_FREE(buffer);
959	return size;
960	}
961
962	/**
963	strdup() a unix string to lower case.
964	**/
965
966	char strdup_lower(const char s)
967	{
968	size_t size;
969	smb_ucs2_t *buffer = NULL;
970	char *out_buffer;
971
972	size = push_ucs2_allocate(&buffer, s);
973	if (size == -1 \|\| !buffer) {
974	return NULL;
975	}
976
977	strlower_w(buffer);
978
979	size = pull_ucs2_allocate(&out_buffer, buffer);
980	SAFE_FREE(buffer);
981
982	if (size == (size_t)-1) {
983	return NULL;
984	}
985
986	return out_buffer;
987	}
988
989	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
990	{
991	size_t size;
992	smb_ucs2_t *buffer = NULL;
993	char *out_buffer;
994
995	size = push_ucs2_talloc(ctx, &buffer, s);
996	if (size == -1 \|\| !buffer) {
997	TALLOC_FREE(buffer);
998	return NULL;
999	}
1000
1001	strlower_w(buffer);
1002
1003	size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
1004	TALLOC_FREE(buffer);
1005
1006	if (size == (size_t)-1) {
1007	TALLOC_FREE(out_buffer);
1008	return NULL;
1009	}
1010
1011	return out_buffer;
1012	}
1013
1014
1015	size_t ucs2_align(const void base_ptr, const void p, int flags)
1016	{
1017	if (flags & (STR_NOALIGN\|STR_ASCII))
1018	return 0;
1019	return PTR_DIFF(p, base_ptr) & 1;
1020	}
1021
1022
1023	/**
1024	* Copy a string from a char* unix src to a dos codepage string destination.
1025	*
1026	* @return the number of bytes occupied by the string in the destination.
1027	*
1028	* @param flags can include
1029	* <dl>
1030	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1031	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1032	* </dl>
1033	*
1034	* @param dest_len the maximum length in bytes allowed in the
1035	* destination.
1036	**/
1037	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1038	{
1039	size_t src_len = strlen(src);
1040	char *tmpbuf = NULL;
1041	size_t ret;
1042
1043	/* No longer allow a length of -1. */
1044	if (dest_len == (size_t)-1) {
1045	smb_panic("push_ascii - dest_len == -1");
1046	}
1047
1048	if (flags & STR_UPPER) {
1049	tmpbuf = SMB_STRDUP(src);
1050	if (!tmpbuf) {
1051	smb_panic("malloc fail");
1052	}
1053	strupper_m(tmpbuf);
1054	src = tmpbuf;
1055	}
1056
1057	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1058	src_len++;
1059	}
1060
1061	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1062	if (ret == (size_t)-1 &&
1063	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1064	&& dest_len > 0) {
1065	((char *)dest)[0] = '\0';
1066	}
1067	SAFE_FREE(tmpbuf);
1068	return ret;
1069	}
1070
1071	size_t push_ascii_fstring(void dest, const char src)
1072	{
1073	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1074	}
1075
1076	/********************************************************************
1077	Push an nstring - ensure null terminated. Written by
1078	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1079	********************************************************************/
1080
1081	size_t push_ascii_nstring(void dest, const char src)
1082	{
1083	size_t i, buffer_len, dest_len;
1084	smb_ucs2_t *buffer;
1085
1086	conv_silent = True;
1087	buffer_len = push_ucs2_allocate(&buffer, src);
1088	if (buffer_len == (size_t)-1) {
1089	smb_panic("failed to create UCS2 buffer");
1090	}
1091
1092	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1093	buffer_len /= sizeof(smb_ucs2_t);
1094
1095	dest_len = 0;
1096	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1097	unsigned char mb[10];
1098	/* Convert one smb_ucs2_t character at a time. */
1099	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1100	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1101	memcpy((char *)dest + dest_len, mb, mb_len);
1102	dest_len += mb_len;
1103	} else {
1104	errno = E2BIG;
1105	break;
1106	}
1107	}
1108	((char *)dest)[dest_len] = '\0';
1109
1110	SAFE_FREE(buffer);
1111	conv_silent = False;
1112	return dest_len;
1113	}
1114
1115	/********************************************************************
1116	Push and malloc an ascii string. src and dest null terminated.
1117	********************************************************************/
1118
1119	size_t push_ascii_allocate(char *dest, const char src)
1120	{
1121	size_t dest_len, src_len = strlen(src)+1;
1122
1123	*dest = NULL;
1124	if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1125	(void **)dest, &dest_len, True))
1126	return (size_t)-1;
1127	else
1128	return dest_len;
1129	}
1130
1131	/**
1132	* Copy a string from a dos codepage source to a unix char* destination.
1133	*
1134	* The resulting string in "dest" is always null terminated.
1135	*
1136	* @param flags can have:
1137	* <dl>
1138	* <dt>STR_TERMINATE</dt>
1139	* <dd>STR_TERMINATE means the string in @p src
1140	* is null terminated, and src_len is ignored.</dd>
1141	* </dl>
1142	*
1143	* @param src_len is the length of the source area in bytes.
1144	* @returns the number of bytes occupied by the string in @p src.
1145	**/
1146	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1147	{
1148	size_t ret;
1149
1150	if (dest_len == (size_t)-1) {
1151	/* No longer allow dest_len of -1. */
1152	smb_panic("pull_ascii - invalid dest_len of -1");
1153	}
1154
1155	if (flags & STR_TERMINATE) {
1156	if (src_len == (size_t)-1) {
1157	src_len = strlen((const char *)src) + 1;
1158	} else {
1159	size_t len = strnlen((const char *)src, src_len);
1160	if (len < src_len)
1161	len++;
1162	src_len = len;
1163	}
1164	}
1165
1166	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1167	if (ret == (size_t)-1) {
1168	ret = 0;
1169	dest_len = 0;
1170	}
1171
1172	if (dest_len && ret) {
1173	/* Did we already process the terminating zero ? */
1174	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1175	dest[MIN(ret, dest_len-1)] = 0;
1176	}
1177	} else {
1178	dest[0] = 0;
1179	}
1180
1181	return src_len;
1182	}
1183
1184	/**
1185	* Copy a string from a dos codepage source to a unix char* destination.
1186	Talloc version.
1187	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1188	needs fixing. JRA).
1189	*
1190	* The resulting string in "dest" is always null terminated.
1191	*
1192	* @param flags can have:
1193	* <dl>
1194	* <dt>STR_TERMINATE</dt>
1195	* <dd>STR_TERMINATE means the string in @p src
1196	* is null terminated, and src_len is ignored.</dd>
1197	* </dl>
1198	*
1199	* @param src_len is the length of the source area in bytes.
1200	* @returns the number of bytes occupied by the string in @p src.
1201	**/
1202
1203	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1204	char **ppdest,
1205	const void *src,
1206	size_t src_len,
1207	int flags)
1208	{
1209	char *dest = NULL;
1210	size_t dest_len = 0;
1211
1212	#ifdef DEVELOPER
1213	/* Ensure we never use the braindead "malloc" varient. */
1214	if (ctx == NULL) {
1215	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1216	}
1217	#endif
1218
1219	*ppdest = NULL;
1220
1221	if (!src_len) {
1222	return 0;
1223	}
1224
1225	if (flags & STR_TERMINATE) {
1226	if (src_len == (size_t)-1) {
1227	src_len = strlen((const char *)src) + 1;
1228	} else {
1229	size_t len = strnlen((const char *)src, src_len);
1230	if (len < src_len)
1231	len++;
1232	src_len = len;
1233	}
1234	/* Ensure we don't use an insane length from the client. */
1235	if (src_len >= 1024*1024) {
1236	char *msg = talloc_asprintf(ctx,
1237	"Bad src length (%u) in "
1238	"pull_ascii_base_talloc",
1239	(unsigned int)src_len);
1240	smb_panic(msg);
1241	}
1242	} else {
1243	/* Can't have an unlimited length
1244	* non STR_TERMINATE'd.
1245	*/
1246	if (src_len == (size_t)-1) {
1247	errno = EINVAL;
1248	return 0;
1249	}
1250	}
1251
1252	/* src_len != -1 here. */
1253
1254	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1255	&dest_len, True)) {
1256	dest_len = 0;
1257	}
1258
1259	if (dest_len && dest) {
1260	/* Did we already process the terminating zero ? */
1261	if (dest[dest_len-1] != 0) {
1262	size_t size = talloc_get_size(dest);
1263	/* Have we got space to append the '\0' ? */
1264	if (size <= dest_len) {
1265	/* No, realloc. */
1266	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1267	dest_len+1);
1268	if (!dest) {
1269	/* talloc fail. */
1270	dest_len = (size_t)-1;
1271	return 0;
1272	}
1273	}
1274	/* Yay - space ! */
1275	dest[dest_len] = '\0';
1276	dest_len++;
1277	}
1278	} else if (dest) {
1279	dest[0] = 0;
1280	}
1281
1282	*ppdest = dest;
1283	return src_len;
1284	}
1285
1286	size_t pull_ascii_fstring(char dest, const void src)
1287	{
1288	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1289	}
1290
1291	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1292
1293	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1294	{
1295	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1296	}
1297
1298	/**
1299	* Copy a string from a char* src to a unicode destination.
1300	*
1301	* @returns the number of bytes occupied by the string in the destination.
1302	*
1303	* @param flags can have:
1304	*
1305	* <dl>
1306	* <dt>STR_TERMINATE <dd>means include the null termination.
1307	* <dt>STR_UPPER <dd>means uppercase in the destination.
1308	* <dt>STR_NOALIGN <dd>means don't do alignment.
1309	* </dl>
1310	*
1311	* @param dest_len is the maximum length allowed in the
1312	* destination.
1313	**/
1314
1315	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1316	{
1317	size_t len=0;
1318	size_t src_len;
1319	size_t ret;
1320
1321	if (dest_len == (size_t)-1) {
1322	/* No longer allow dest_len of -1. */
1323	smb_panic("push_ucs2 - invalid dest_len of -1");
1324	}
1325
1326	if (flags & STR_TERMINATE)
1327	src_len = (size_t)-1;
1328	else
1329	src_len = strlen(src);
1330
1331	if (ucs2_align(base_ptr, dest, flags)) {
1332	(char )dest = 0;
1333	dest = (void )((char )dest + 1);
1334	if (dest_len)
1335	dest_len--;
1336	len++;
1337	}
1338
1339	/* ucs2 is always a multiple of 2 bytes */
1340	dest_len &= ~1;
1341
1342	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1343	if (ret == (size_t)-1) {
1344	if ((flags & STR_TERMINATE) &&
1345	dest &&
1346	dest_len) {
1347	(char )dest = 0;
1348	}
1349	return len;
1350	}
1351
1352	len += ret;
1353
1354	if (flags & STR_UPPER) {
1355	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1356	size_t i;
1357
1358	/* We check for i < (ret / 2) below as the dest string isn't null
1359	terminated if STR_TERMINATE isn't set. */
1360
1361	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1362	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1363	if (v != dest_ucs2[i]) {
1364	dest_ucs2[i] = v;
1365	}
1366	}
1367	}
1368
1369	return len;
1370	}
1371
1372
1373	/**
1374	* Copy a string from a unix char* src to a UCS2 destination,
1375	* allocating a buffer using talloc().
1376	*
1377	* @param dest always set at least to NULL
1378	*
1379	* @returns The number of bytes occupied by the string in the destination
1380	* or -1 in case of error.
1381	**/
1382	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1383	{
1384	size_t src_len = strlen(src)+1;
1385
1386	*dest = NULL;
1387	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1388	}
1389
1390
1391	/**
1392	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1393	*
1394	* @param dest always set at least to NULL
1395	*
1396	* @returns The number of bytes occupied by the string in the destination
1397	* or -1 in case of error.
1398	**/
1399
1400	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1401	{
1402	size_t dest_len, src_len = strlen(src)+1;
1403
1404	*dest = NULL;
1405	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1406	(void **)dest, &dest_len, True))
1407	return (size_t)-1;
1408	else
1409	return dest_len;
1410	}
1411
1412	/**
1413	Copy a string from a char* src to a UTF-8 destination.
1414	Return the number of bytes occupied by the string in the destination
1415	Flags can have:
1416	STR_TERMINATE means include the null termination
1417	STR_UPPER means uppercase in the destination
1418	dest_len is the maximum length allowed in the destination. If dest_len
1419	is -1 then no maxiumum is used.
1420	**/
1421
1422	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1423	{
1424	size_t src_len = 0;
1425	size_t ret;
1426	char *tmpbuf = NULL;
1427
1428	if (dest_len == (size_t)-1) {
1429	/* No longer allow dest_len of -1. */
1430	smb_panic("push_utf8 - invalid dest_len of -1");
1431	}
1432
1433	if (flags & STR_UPPER) {
1434	tmpbuf = strdup_upper(src);
1435	if (!tmpbuf) {
1436	return (size_t)-1;
1437	}
1438	src = tmpbuf;
1439	src_len = strlen(src);
1440	}
1441
1442	src_len = strlen(src);
1443	if (flags & STR_TERMINATE) {
1444	src_len++;
1445	}
1446
1447	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1448	SAFE_FREE(tmpbuf);
1449	return ret;
1450	}
1451
1452	size_t push_utf8_fstring(void dest, const char src)
1453	{
1454	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1455	}
1456
1457	/**
1458	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1459	*
1460	* @param dest always set at least to NULL
1461	*
1462	* @returns The number of bytes occupied by the string in the destination
1463	**/
1464
1465	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1466	{
1467	size_t src_len = strlen(src)+1;
1468
1469	*dest = NULL;
1470	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1471	}
1472
1473	/**
1474	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1475	*
1476	* @param dest always set at least to NULL
1477	*
1478	* @returns The number of bytes occupied by the string in the destination
1479	**/
1480
1481	size_t push_utf8_allocate(char *dest, const char src)
1482	{
1483	size_t dest_len, src_len = strlen(src)+1;
1484
1485	*dest = NULL;
1486	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1487	(void **)dest, &dest_len, True))
1488	return (size_t)-1;
1489	else
1490	return dest_len;
1491	}
1492
1493	/**
1494	Copy a string from a ucs2 source to a unix char* destination.
1495	Flags can have:
1496	STR_TERMINATE means the string in src is null terminated.
1497	STR_NOALIGN means don't try to align.
1498	if STR_TERMINATE is set then src_len is ignored if it is -1.
1499	src_len is the length of the source area in bytes
1500	Return the number of bytes occupied by the string in src.
1501	The resulting string in "dest" is always null terminated.
1502	**/
1503
1504	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1505	{
1506	size_t ret;
1507
1508	if (dest_len == (size_t)-1) {
1509	/* No longer allow dest_len of -1. */
1510	smb_panic("pull_ucs2 - invalid dest_len of -1");
1511	}
1512
1513	if (!src_len) {
1514	if (dest && dest_len > 0) {
1515	dest[0] = '\0';
1516	}
1517	return 0;
1518	}
1519
1520	if (ucs2_align(base_ptr, src, flags)) {
1521	src = (const void )((const char )src + 1);
1522	if (src_len != (size_t)-1)
1523	src_len--;
1524	}
1525
1526	if (flags & STR_TERMINATE) {
1527	/* src_len -1 is the default for null terminated strings. */
1528	if (src_len != (size_t)-1) {
1529	size_t len = strnlen_w((const smb_ucs2_t *)src,
1530	src_len/2);
1531	if (len < src_len/2)
1532	len++;
1533	src_len = len*2;
1534	}
1535	}
1536
1537	/* ucs2 is always a multiple of 2 bytes */
1538	if (src_len != (size_t)-1)
1539	src_len &= ~1;
1540
1541	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1542	if (ret == (size_t)-1) {
1543	ret = 0;
1544	dest_len = 0;
1545	}
1546
1547	if (src_len == (size_t)-1)
1548	src_len = ret*2;
1549
1550	if (dest_len && ret) {
1551	/* Did we already process the terminating zero ? */
1552	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1553	dest[MIN(ret, dest_len-1)] = 0;
1554	}
1555	} else {
1556	dest[0] = 0;
1557	}
1558
1559	return src_len;
1560	}
1561
1562	/**
1563	Copy a string from a ucs2 source to a unix char* destination.
1564	Talloc version with a base pointer.
1565	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1566	needs fixing. JRA).
1567	Flags can have:
1568	STR_TERMINATE means the string in src is null terminated.
1569	STR_NOALIGN means don't try to align.
1570	if STR_TERMINATE is set then src_len is ignored if it is -1.
1571	src_len is the length of the source area in bytes
1572	Return the number of bytes occupied by the string in src.
1573	The resulting string in "dest" is always null terminated.
1574	**/
1575
1576	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1577	const void *base_ptr,
1578	char **ppdest,
1579	const void *src,
1580	size_t src_len,
1581	int flags)
1582	{
1583	char *dest;
1584	size_t dest_len;
1585
1586	*ppdest = NULL;
1587
1588	#ifdef DEVELOPER
1589	/* Ensure we never use the braindead "malloc" varient. */
1590	if (ctx == NULL) {
1591	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1592	}
1593	#endif
1594
1595	if (!src_len) {
1596	return 0;
1597	}
1598
1599	if (ucs2_align(base_ptr, src, flags)) {
1600	src = (const void )((const char )src + 1);
1601	if (src_len != (size_t)-1)
1602	src_len--;
1603	}
1604
1605	if (flags & STR_TERMINATE) {
1606	/* src_len -1 is the default for null terminated strings. */
1607	if (src_len != (size_t)-1) {
1608	size_t len = strnlen_w((const smb_ucs2_t *)src,
1609	src_len/2);
1610	if (len < src_len/2)
1611	len++;
1612	src_len = len*2;
1613	} else {
1614	/*
1615	* src_len == -1 - alloc interface won't take this
1616	* so we must calculate.
1617	*/
1618	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1619	}
1620	/* Ensure we don't use an insane length from the client. */
1621	if (src_len >= 1024*1024) {
1622	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1623	}
1624	} else {
1625	/* Can't have an unlimited length
1626	* non STR_TERMINATE'd.
1627	*/
1628	if (src_len == (size_t)-1) {
1629	errno = EINVAL;
1630	return 0;
1631	}
1632	}
1633
1634	/* src_len != -1 here. */
1635
1636	/* ucs2 is always a multiple of 2 bytes */
1637	src_len &= ~1;
1638
1639	dest_len = convert_string_talloc(ctx,
1640	CH_UTF16LE,
1641	CH_UNIX,
1642	src,
1643	src_len,
1644	(void *)&dest,
1645	True);
1646	if (dest_len == (size_t)-1) {
1647	dest_len = 0;
1648	}
1649
1650	if (dest_len) {
1651	/* Did we already process the terminating zero ? */
1652	if (dest[dest_len-1] != 0) {
1653	size_t size = talloc_get_size(dest);
1654	/* Have we got space to append the '\0' ? */
1655	if (size <= dest_len) {
1656	/* No, realloc. */
1657	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1658	dest_len+1);
1659	if (!dest) {
1660	/* talloc fail. */
1661	dest_len = (size_t)-1;
1662	return 0;
1663	}
1664	}
1665	/* Yay - space ! */
1666	dest[dest_len] = '\0';
1667	dest_len++;
1668	}
1669	} else if (dest) {
1670	dest[0] = 0;
1671	}
1672
1673	*ppdest = dest;
1674	return src_len;
1675	}
1676
1677	size_t pull_ucs2_fstring(char dest, const void src)
1678	{
1679	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1680	}
1681
1682	/**
1683	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1684	*
1685	* @param dest always set at least to NULL
1686	*
1687	* @returns The number of bytes occupied by the string in the destination
1688	**/
1689
1690	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1691	{
1692	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1693	*dest = NULL;
1694	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1695	}
1696
1697	/**
1698	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1699	*
1700	* @param dest always set at least to NULL
1701	*
1702	* @returns The number of bytes occupied by the string in the destination
1703	**/
1704
1705	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1706	{
1707	size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1708	*dest = NULL;
1709	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1710	(void **)dest, &dest_len, True))
1711	return (size_t)-1;
1712	else
1713	return dest_len;
1714	}
1715
1716	/**
1717	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1718	*
1719	* @param dest always set at least to NULL
1720	*
1721	* @returns The number of bytes occupied by the string in the destination
1722	**/
1723
1724	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1725	{
1726	size_t src_len = strlen(src)+1;
1727	*dest = NULL;
1728	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1729	}
1730
1731	/**
1732	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1733	*
1734	* @param dest always set at least to NULL
1735	*
1736	* @returns The number of bytes occupied by the string in the destination
1737	**/
1738
1739	size_t pull_utf8_allocate(char *dest, const char src)
1740	{
1741	size_t dest_len, src_len = strlen(src)+1;
1742	*dest = NULL;
1743	if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1744	(void **)dest, &dest_len, True))
1745	return (size_t)-1;
1746	else
1747	return dest_len;
1748	}
1749
1750	/**
1751	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1752	*
1753	* @param dest always set at least to NULL
1754	*
1755	* @returns The number of bytes occupied by the string in the destination
1756	**/
1757
1758	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1759	{
1760	size_t src_len = strlen(src)+1;
1761	*dest = NULL;
1762	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1763	}
1764
1765	/**
1766	Copy a string from a char* src to a unicode or ascii
1767	dos codepage destination choosing unicode or ascii based on the
1768	flags in the SMB buffer starting at base_ptr.
1769	Return the number of bytes occupied by the string in the destination.
1770	flags can have:
1771	STR_TERMINATE means include the null termination.
1772	STR_UPPER means uppercase in the destination.
1773	STR_ASCII use ascii even with unicode packet.
1774	STR_NOALIGN means don't do alignment.
1775	dest_len is the maximum length allowed in the destination. If dest_len
1776	is -1 then no maxiumum is used.
1777	**/
1778
1779	size_t push_string_fn(const char *function, unsigned int line,
1780	const void *base_ptr, uint16 flags2,
1781	void dest, const char src,
1782	size_t dest_len, int flags)
1783	{
1784	#ifdef DEVELOPER
1785	/* We really need to zero fill here, not clobber
1786	* region, as we want to ensure that valgrind thinks
1787	* all of the outgoing buffer has been written to
1788	* so a send() or write() won't trap an error.
1789	* JRA.
1790	*/
1791	#if 0
1792	clobber_region(function, line, dest, dest_len);
1793	#else
1794	memset(dest, '\0', dest_len);
1795	#endif
1796	#endif
1797
1798	if (!(flags & STR_ASCII) && \
1799	((flags & STR_UNICODE \|\| \
1800	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1801	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1802	}
1803	return push_ascii(dest, src, dest_len, flags);
1804	}
1805
1806
1807	/**
1808	Copy a string from a unicode or ascii source (depending on
1809	the packet flags) to a char* destination.
1810	Flags can have:
1811	STR_TERMINATE means the string in src is null terminated.
1812	STR_UNICODE means to force as unicode.
1813	STR_ASCII use ascii even with unicode packet.
1814	STR_NOALIGN means don't do alignment.
1815	if STR_TERMINATE is set then src_len is ignored is it is -1
1816	src_len is the length of the source area in bytes.
1817	Return the number of bytes occupied by the string in src.
1818	The resulting string in "dest" is always null terminated.
1819	**/
1820
1821	size_t pull_string_fn(const char *function,
1822	unsigned int line,
1823	const void *base_ptr,
1824	uint16 smb_flags2,
1825	char *dest,
1826	const void *src,
1827	size_t dest_len,
1828	size_t src_len,
1829	int flags)
1830	{
1831	#ifdef DEVELOPER
1832	clobber_region(function, line, dest, dest_len);
1833	#endif
1834
1835	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1836	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1837	"UNICODE defined");
1838	}
1839
1840	if (!(flags & STR_ASCII) && \
1841	((flags & STR_UNICODE \|\| \
1842	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1843	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1844	}
1845	return pull_ascii(dest, src, dest_len, src_len, flags);
1846	}
1847
1848	/**
1849	Copy a string from a unicode or ascii source (depending on
1850	the packet flags) to a char* destination.
1851	Variant that uses talloc.
1852	Flags can have:
1853	STR_TERMINATE means the string in src is null terminated.
1854	STR_UNICODE means to force as unicode.
1855	STR_ASCII use ascii even with unicode packet.
1856	STR_NOALIGN means don't do alignment.
1857	if STR_TERMINATE is set then src_len is ignored is it is -1
1858	src_len is the length of the source area in bytes.
1859	Return the number of bytes occupied by the string in src.
1860	The resulting string in "dest" is always null terminated.
1861	**/
1862
1863	size_t pull_string_talloc_fn(const char *function,
1864	unsigned int line,
1865	TALLOC_CTX *ctx,
1866	const void *base_ptr,
1867	uint16 smb_flags2,
1868	char **ppdest,
1869	const void *src,
1870	size_t src_len,
1871	int flags)
1872	{
1873	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1874	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1875	"UNICODE defined");
1876	}
1877
1878	if (!(flags & STR_ASCII) && \
1879	((flags & STR_UNICODE \|\| \
1880	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1881	return pull_ucs2_base_talloc(ctx,
1882	base_ptr,
1883	ppdest,
1884	src,
1885	src_len,
1886	flags);
1887	}
1888	return pull_ascii_base_talloc(ctx,
1889	ppdest,
1890	src,
1891	src_len,
1892	flags);
1893	}
1894
1895
1896	size_t align_string(const void base_ptr, const char p, int flags)
1897	{
1898	if (!(flags & STR_ASCII) && \
1899	((flags & STR_UNICODE \|\| \
1900	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1901	return ucs2_align(base_ptr, p, flags);
1902	}
1903	return 0;
1904	}
1905
1906	/*
1907	Return the unicode codepoint for the next multi-byte CH_UNIX character
1908	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1909
1910	Also return the number of bytes consumed (which tells the caller
1911	how many bytes to skip to get to the next CH_UNIX character).
1912
1913	Return INVALID_CODEPOINT if the next character cannot be converted.
1914	*/
1915
1916	codepoint_t next_codepoint(const char str, size_t size)
1917	{
1918	/* It cannot occupy more than 4 bytes in UTF16 format */
1919	uint8_t buf[4];
1920	smb_iconv_t descriptor;
1921	size_t ilen_orig;
1922	size_t ilen;
1923	size_t olen;
1924	char *outbuf;
1925
1926	if ((str[0] & 0x80) == 0) {
1927	*size = 1;
1928	return (codepoint_t)str[0];
1929	}
1930
1931	/* We assume that no multi-byte character can take
1932	more than 5 bytes. This is OK as we only
1933	support codepoints up to 1M */
1934
1935	ilen_orig = strnlen(str, 5);
1936	ilen = ilen_orig;
1937
1938	lazy_initialize_conv();
1939
1940	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1941	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1942	*size = 1;
1943	return INVALID_CODEPOINT;
1944	}
1945
1946	/* This looks a little strange, but it is needed to cope
1947	with codepoints above 64k which are encoded as per RFC2781. */
1948	olen = 2;
1949	outbuf = (char *)buf;
1950	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1951	if (olen == 2) {
1952	/* We failed to convert to a 2 byte character.
1953	See if we can convert to a 4 UTF16-LE byte char encoding.
1954	*/
1955	olen = 4;
1956	outbuf = (char *)buf;
1957	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1958	if (olen == 4) {
1959	/* We didn't convert any bytes */
1960	*size = 1;
1961	return INVALID_CODEPOINT;
1962	}
1963	olen = 4 - olen;
1964	} else {
1965	olen = 2 - olen;
1966	}
1967
1968	*size = ilen_orig - ilen;
1969
1970	if (olen == 2) {
1971	/* 2 byte, UTF16-LE encoded value. */
1972	return (codepoint_t)SVAL(buf, 0);
1973	}
1974	if (olen == 4) {
1975	/* Decode a 4 byte UTF16-LE character manually.
1976	See RFC2871 for the encoding machanism.
1977	*/
1978	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1979	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1980
1981	return (codepoint_t)0x10000 +
1982	(w1 << 10) + w2;
1983	}
1984
1985	/* no other length is valid */
1986	return INVALID_CODEPOINT;
1987	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: