Context Navigation

source: branches/samba-3.3.x/source/lib/charcnv.c@ 285

Visit:

Last change on this file since 285 was 239, checked in by Herwig Bauernfeind, 16 years ago
Fix for Ticket #85 (by diver) in 3.3 branch
File size: 51.6 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	#ifndef __OS2__
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	#else
62	if (ch == CH_UTF16LE) ret = "IBM-1200";
63	else if (ch == CH_UTF16BE) ret = "IBM-1200";
64	#endif
65	else if (ch == CH_UNIX) ret = lp_unix_charset();
66	else if (ch == CH_DOS) ret = lp_dos_charset();
67	else if (ch == CH_DISPLAY) ret = lp_display_charset();
68	else if (ch == CH_UTF8) ret = "UTF8";
69
70	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71	if (ret && !strcmp(ret, "LOCALE")) {
72	const char *ln = NULL;
73
74	#ifdef HAVE_SETLOCALE
75	setlocale(LC_ALL, "");
76	#endif
77	ln = nl_langinfo(CODESET);
78	if (ln) {
79	/* Check whether the charset name is supported
80	by iconv */
81	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82	if (handle == (smb_iconv_t) -1) {
83	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84	ln = NULL;
85	} else {
86	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87	smb_iconv_close(handle);
88	}
89	}
90	ret = ln;
91	}
92	#endif
93
94	if (!ret \|\| !*ret) ret = "ASCII";
95	DEBUG(10, ("codepage: %s\n",ret));
96	return ret;
97	}
98
99	void lazy_initialize_conv(void)
100	{
101	if (!initialized) {
102	load_case_tables();
103	init_iconv();
104	initialized = true;
105	}
106	}
107
108	/**
109	* Destroy global objects allocated by init_iconv()
110	**/
111	void gfree_charcnv(void)
112	{
113	int c1, c2;
114
115	for (c1=0;c1<NUM_CHARSETS;c1++) {
116	for (c2=0;c2<NUM_CHARSETS;c2++) {
117	if ( conv_handles[c1][c2] ) {
118	smb_iconv_close( conv_handles[c1][c2] );
119	conv_handles[c1][c2] = 0;
120	}
121	}
122	}
123	initialized = false;
124	}
125
126	/**
127	* Initialize iconv conversion descriptors.
128	*
129	* This is called the first time it is needed, and also called again
130	* every time the configuration is reloaded, because the charset or
131	* codepage might have changed.
132	**/
133	void init_iconv(void)
134	{
135	int c1, c2;
136	bool did_reload = False;
137
138	/* so that charset_name() works we need to get the UNIX<->UCS2 going
139	first */
140	if (!conv_handles[CH_UNIX][CH_UTF16LE])
141	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
142
143	if (!conv_handles[CH_UTF16LE][CH_UNIX])
144	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
145
146	for (c1=0;c1<NUM_CHARSETS;c1++) {
147	for (c2=0;c2<NUM_CHARSETS;c2++) {
148	const char *n1 = charset_name((charset_t)c1);
149	const char *n2 = charset_name((charset_t)c2);
150	if (conv_handles[c1][c2] &&
151	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
152	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
153	continue;
154
155	did_reload = True;
156
157	if (conv_handles[c1][c2])
158	smb_iconv_close(conv_handles[c1][c2]);
159
160	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
161	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
162	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
163	charset_name((charset_t)c1), charset_name((charset_t)c2)));
164	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
165	n1 = "ASCII";
166	}
167	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
168	n2 = "ASCII";
169	}
170	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
171	n1, n2 ));
172	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
173	if (!conv_handles[c1][c2]) {
174	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
175	smb_panic("init_iconv: conv_handle initialization failed");
176	}
177	}
178	}
179	}
180
181	if (did_reload) {
182	/* XXX: Does this really get called every time the dos
183	* codepage changes? */
184	/* XXX: Is the did_reload test too strict? */
185	conv_silent = True;
186	init_valid_table();
187	conv_silent = False;
188	}
189	}
190
191	/**
192	* Convert string from one encoding to another, making error checking etc
193	* Slow path version - uses (slow) iconv.
194	*
195	* @param src pointer to source string (multibyte or singlebyte)
196	* @param srclen length of the source string in bytes
197	* @param dest pointer to destination string (multibyte or singlebyte)
198	* @param destlen maximal length allowed for string
199	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
200	* @returns the number of bytes occupied in the destination
201	*
202	* Ensure the srclen contains the terminating zero.
203	*
204	**/
205
206	static size_t convert_string_internal(charset_t from, charset_t to,
207	void const *src, size_t srclen,
208	void *dest, size_t destlen, bool allow_bad_conv)
209	{
210	size_t i_len, o_len;
211	size_t retval;
212	const char* inbuf = (const char*)src;
213	char* outbuf = (char*)dest;
214	smb_iconv_t descriptor;
215
216	lazy_initialize_conv();
217
218	descriptor = conv_handles[from][to];
219
220	if (srclen == (size_t)-1) {
221	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
222	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
223	} else {
224	srclen = strlen((const char *)src)+1;
225	}
226	}
227
228
229	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
230	if (!conv_silent)
231	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
232	return (size_t)-1;
233	}
234
235	i_len=srclen;
236	o_len=destlen;
237
238	again:
239
240	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
241	if(retval==(size_t)-1) {
242	const char *reason="unknown error";
243	switch(errno) {
244	case EINVAL:
245	reason="Incomplete multibyte sequence";
246	if (!conv_silent)
247	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
248	if (allow_bad_conv)
249	goto use_as_is;
250	return (size_t)-1;
251	case E2BIG:
252	reason="No more room";
253	if (!conv_silent) {
254	if (from == CH_UNIX) {
255	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
256	charset_name(from), charset_name(to),
257	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
258	} else {
259	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
260	charset_name(from), charset_name(to),
261	(unsigned int)srclen, (unsigned int)destlen));
262	}
263	}
264	break;
265	case EILSEQ:
266	reason="Illegal multibyte sequence";
267	if (!conv_silent)
268	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
269	if (allow_bad_conv)
270	goto use_as_is;
271
272	return (size_t)-1;
273	default:
274	if (!conv_silent)
275	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
276	return (size_t)-1;
277	}
278	/* smb_panic(reason); */
279	}
280	return destlen-o_len;
281
282	use_as_is:
283
284	/*
285	* Conversion not supported. This is actually an error, but there are so
286	* many misconfigured iconv systems and smb.conf's out there we can't just
287	* fail. Do a very bad conversion instead.... JRA.
288	*/
289
290	{
291	if (o_len == 0 \|\| i_len == 0)
292	return destlen - o_len;
293
294	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
295	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
296	/* Can't convert from utf16 any endian to multibyte.
297	Replace with the default fail char.
298	*/
299	if (i_len < 2)
300	return destlen - o_len;
301	if (i_len >= 2) {
302	*outbuf = lp_failed_convert_char();
303
304	outbuf++;
305	o_len--;
306
307	inbuf += 2;
308	i_len -= 2;
309	}
310
311	if (o_len == 0 \|\| i_len == 0)
312	return destlen - o_len;
313
314	/* Keep trying with the next char... */
315	goto again;
316
317	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
318	/* Can't convert to UTF16LE - just widen by adding the
319	default fail char then zero.
320	*/
321	if (o_len < 2)
322	return destlen - o_len;
323
324	outbuf[0] = lp_failed_convert_char();
325	outbuf[1] = '\0';
326
327	inbuf++;
328	i_len--;
329
330	outbuf += 2;
331	o_len -= 2;
332
333	if (o_len == 0 \|\| i_len == 0)
334	return destlen - o_len;
335
336	/* Keep trying with the next char... */
337	goto again;
338
339	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
340	to != CH_UTF16LE && to != CH_UTF16BE) {
341	/* Failed multibyte to multibyte. Just copy the default fail char and
342	try again. */
343	outbuf[0] = lp_failed_convert_char();
344
345	inbuf++;
346	i_len--;
347
348	outbuf++;
349	o_len--;
350
351	if (o_len == 0 \|\| i_len == 0)
352	return destlen - o_len;
353
354	/* Keep trying with the next char... */
355	goto again;
356
357	} else {
358	/* Keep compiler happy.... */
359	return destlen - o_len;
360	}
361	}
362	}
363
364	/**
365	* Convert string from one encoding to another, making error checking etc
366	* Fast path version - handles ASCII first.
367	*
368	* @param src pointer to source string (multibyte or singlebyte)
369	* @param srclen length of the source string in bytes, or -1 for nul terminated.
370	* @param dest pointer to destination string (multibyte or singlebyte)
371	* @param destlen maximal length allowed for string - NEVER -1.
372	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
373	* @returns the number of bytes occupied in the destination
374	*
375	* Ensure the srclen contains the terminating zero.
376	*
377	* This function has been hand-tuned to provide a fast path.
378	* Don't change unless you really know what you are doing. JRA.
379	**/
380
381	size_t convert_string(charset_t from, charset_t to,
382	void const *src, size_t srclen,
383	void *dest, size_t destlen, bool allow_bad_conv)
384	{
385	/*
386	* NB. We deliberately don't do a strlen here if srclen == -1.
387	* This is very expensive over millions of calls and is taken
388	* care of in the slow path in convert_string_internal. JRA.
389	*/
390
391	#ifdef DEVELOPER
392	SMB_ASSERT(destlen != (size_t)-1);
393	#endif
394
395	if (srclen == 0)
396	return 0;
397
398	// DEBUG(10, ("convert_string: 1"));
399
400	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
401	const unsigned char p = (const unsigned char )src;
402	unsigned char q = (unsigned char )dest;
403	size_t slen = srclen;
404	size_t dlen = destlen;
405	unsigned char lastp = '\0';
406	size_t retval = 0;
407
408	// DEBUG(10, ("convert_string: 2"));
409
410	/* If all characters are ascii, fast path here. */
411	while (slen && dlen) {
412	if ((lastp = *p) <= 0x7f) {
413	q++ = p++;
414	if (slen != (size_t)-1) {
415	slen--;
416	}
417	dlen--;
418	retval++;
419	if (!lastp)
420	break;
421	} else {
422	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
423	goto general_case;
424	#else
425	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
426	if (ret == (size_t)-1) {
427	return ret;
428	}
429	return retval + ret;
430	#endif
431	}
432	}
433	if (!dlen) {
434	/* Even if we fast path we should note if we ran out of room. */
435	if (((slen != (size_t)-1) && slen) \|\|
436	((slen == (size_t)-1) && lastp)) {
437	errno = E2BIG;
438	}
439	}
440	return retval;
441	// DEBUG(10, ("convert_string: 3"));
442
443	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
444	const unsigned char p = (const unsigned char )src;
445	unsigned char q = (unsigned char )dest;
446	size_t retval = 0;
447	size_t slen = srclen;
448	size_t dlen = destlen;
449	unsigned char lastp = '\0';
450
451	/* If all characters are ascii, fast path here. */
452	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
453	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
454	q++ = p;
455	if (slen != (size_t)-1) {
456	slen -= 2;
457	}
458	p += 2;
459	dlen--;
460	retval++;
461	if (!lastp)
462	break;
463	} else {
464	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
465	goto general_case;
466	#else
467	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
468	if (ret == (size_t)-1) {
469	return ret;
470	}
471	return retval + ret;
472	#endif
473	}
474	}
475	if (!dlen) {
476	/* Even if we fast path we should note if we ran out of room. */
477	if (((slen != (size_t)-1) && slen) \|\|
478	((slen == (size_t)-1) && lastp)) {
479	errno = E2BIG;
480	}
481	}
482	return retval;
483	// DEBUG(10, ("convert_string: 4"));
484
485	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
486	const unsigned char p = (const unsigned char )src;
487	unsigned char q = (unsigned char )dest;
488	size_t retval = 0;
489	size_t slen = srclen;
490	size_t dlen = destlen;
491	unsigned char lastp = '\0';
492
493	/* If all characters are ascii, fast path here. */
494	while (slen && (dlen >= 2)) {
495	if ((lastp = *p) <= 0x7F) {
496	q++ = p++;
497	*q++ = '\0';
498	if (slen != (size_t)-1) {
499	slen--;
500	}
501	dlen -= 2;
502	retval += 2;
503	if (!lastp)
504	break;
505	} else {
506	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
507	goto general_case;
508	#else
509	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
510	if (ret == (size_t)-1) {
511	return ret;
512	}
513	return retval + ret;
514	#endif
515	}
516	}
517	if (!dlen) {
518	/* Even if we fast path we should note if we ran out of room. */
519	if (((slen != (size_t)-1) && slen) \|\|
520	((slen == (size_t)-1) && lastp)) {
521	errno = E2BIG;
522	}
523	}
524	return retval;
525	}
526
527	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
528	general_case:
529	#endif
530	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
531	}
532
533	/**
534	* Convert between character sets, allocating a new buffer for the result.
535	*
536	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
537	* (this is a bad interface and needs fixing. JRA).
538	* @param srclen length of source buffer.
539	* @param dest always set at least to NULL
540	* @param converted_size set to the size of the allocated buffer on return
541	* true
542	* @note -1 is not accepted for srclen.
543	*
544	* @return true if new buffer was correctly allocated, and string was
545	* converted.
546	*
547	* Ensure the srclen contains the terminating zero.
548	*
549	* I hate the goto's in this function. It's embarressing.....
550	* There has to be a cleaner way to do this. JRA.
551	**/
552
553	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
554	void const src, size_t srclen, void dst,
555	size_t *converted_size, bool allow_bad_conv)
556	{
557	size_t i_len, o_len, destlen = (srclen * 3) / 2;
558	size_t retval;
559	const char inbuf = (const char )src;
560	char outbuf = NULL, ob = NULL;
561	smb_iconv_t descriptor;
562	void dest = (void )dst;
563
564	*dest = NULL;
565
566	if (!converted_size) {
567	errno = EINVAL;
568	return false;
569	}
570
571	if (src == NULL \|\| srclen == (size_t)-1) {
572	errno = EINVAL;
573	return false;
574	}
575	if (srclen == 0) {
576	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
577	if (ob == NULL) {
578	errno = ENOMEM;
579	return false;
580	}
581	*dest = ob;
582	*converted_size = 0;
583	return true;
584	}
585
586	lazy_initialize_conv();
587
588	descriptor = conv_handles[from][to];
589
590	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
591	if (!conv_silent)
592	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
593	errno = EOPNOTSUPP;
594	return false;
595	}
596
597	convert:
598
599	/* +2 is for ucs2 null termination. */
600	if ((destlen*2)+2 < destlen) {
601	/* wrapped ! abort. */
602	if (!conv_silent)
603	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
604	if (!ctx)
605	SAFE_FREE(outbuf);
606	errno = EOPNOTSUPP;
607	return false;
608	} else {
609	destlen = destlen * 2;
610	}
611
612	/* +2 is for ucs2 null termination. */
613	if (ctx) {
614	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
615	} else {
616	ob = (char *)SMB_REALLOC(ob, destlen + 2);
617	}
618
619	if (!ob) {
620	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
621	errno = ENOMEM;
622	return false;
623	}
624	outbuf = ob;
625	i_len = srclen;
626	o_len = destlen;
627
628	again:
629	DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
630	charset_name(from), charset_name(to),
631	(unsigned int)srclen, (unsigned int)destlen));
632
633	retval = smb_iconv(descriptor,
634	&inbuf, &i_len,
635	&outbuf, &o_len);
636	if(retval == (size_t)-1) {
637	const char *reason="unknown error";
638	switch(errno) {
639	case EINVAL:
640	reason="Incomplete multibyte sequence";
641	if (!conv_silent)
642	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
643	if (allow_bad_conv)
644	goto use_as_is;
645	break;
646	case E2BIG:
647	goto convert;
648	case EILSEQ:
649	reason="Illegal multibyte sequence";
650	if (!conv_silent)
651	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
652	if (allow_bad_conv)
653	goto use_as_is;
654	break;
655	}
656	if (!conv_silent)
657	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
658	/* smb_panic(reason); */
659	if (ctx) {
660	TALLOC_FREE(ob);
661	} else {
662	SAFE_FREE(ob);
663	}
664	return false;
665	}
666
667	out:
668
669	destlen = destlen - o_len;
670	/* Don't shrink unless we're reclaiming a lot of
671	* space. This is in the hot codepath and these
672	* reallocs cost. JRA.
673	*/
674	if (o_len > 1024) {
675	/* We're shrinking here so we know the +2 is safe from wrap. */
676	if (ctx) {
677	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
678	} else {
679	ob = (char *)SMB_REALLOC(ob,destlen + 2);
680	}
681	}
682
683	if (destlen && !ob) {
684	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
685	errno = ENOMEM;
686	return false;
687	}
688
689	*dest = ob;
690
691	/* Must ucs2 null terminate in the extra space we allocated. */
692	ob[destlen] = '\0';
693	ob[destlen+1] = '\0';
694
695	*converted_size = destlen;
696	return true;
697
698	use_as_is:
699
700	/*
701	* Conversion not supported. This is actually an error, but there are so
702	* many misconfigured iconv systems and smb.conf's out there we can't just
703	* fail. Do a very bad conversion instead.... JRA.
704	*/
705
706	{
707	if (o_len == 0 \|\| i_len == 0)
708	goto out;
709
710	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
711	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
712	/* Can't convert from utf16 any endian to multibyte.
713	Replace with the default fail char.
714	*/
715
716	if (i_len < 2)
717	goto out;
718
719	if (i_len >= 2) {
720	*outbuf = lp_failed_convert_char();
721
722	outbuf++;
723	o_len--;
724
725	inbuf += 2;
726	i_len -= 2;
727	}
728
729	if (o_len == 0 \|\| i_len == 0)
730	goto out;
731
732	/* Keep trying with the next char... */
733	goto again;
734
735	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
736	/* Can't convert to UTF16LE - just widen by adding the
737	default fail char then zero.
738	*/
739	if (o_len < 2)
740	goto out;
741
742	outbuf[0] = lp_failed_convert_char();
743	outbuf[1] = '\0';
744
745	inbuf++;
746	i_len--;
747
748	outbuf += 2;
749	o_len -= 2;
750
751	if (o_len == 0 \|\| i_len == 0)
752	goto out;
753
754	/* Keep trying with the next char... */
755	goto again;
756
757	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
758	to != CH_UTF16LE && to != CH_UTF16BE) {
759	/* Failed multibyte to multibyte. Just copy the default fail char and
760	try again. */
761	outbuf[0] = lp_failed_convert_char();
762
763	inbuf++;
764	i_len--;
765
766	outbuf++;
767	o_len--;
768
769	if (o_len == 0 \|\| i_len == 0)
770	goto out;
771
772	/* Keep trying with the next char... */
773	goto again;
774
775	} else {
776	/* Keep compiler happy.... */
777	goto out;
778	}
779	}
780	}
781
782	/**
783	* Convert between character sets, allocating a new buffer using talloc for the result.
784	*
785	* @param srclen length of source buffer.
786	* @param dest always set at least to NULL
787	* @parm converted_size set to the number of bytes occupied by the string in
788	* the destination on success.
789	* @note -1 is not accepted for srclen.
790	*
791	* @return true if new buffer was correctly allocated, and string was
792	* converted.
793	*/
794	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
795	void const src, size_t srclen, void dst,
796	size_t *converted_size, bool allow_bad_conv)
797	{
798	void dest = (void )dst;
799
800	*dest = NULL;
801	return convert_string_allocate(ctx, from, to, src, srclen, dest,
802	converted_size, allow_bad_conv);
803	}
804
805	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
806	{
807	size_t size;
808	smb_ucs2_t *buffer;
809
810	if (!push_ucs2_allocate(&buffer, src, &size)) {
811	return (size_t)-1;
812	}
813
814	if (!strupper_w(buffer) && (dest == src)) {
815	free(buffer);
816	return srclen;
817	}
818
819	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
820	free(buffer);
821	return size;
822	}
823
824	/**
825	strdup() a unix string to upper case.
826	**/
827
828	char strdup_upper(const char s)
829	{
830	char *out_buffer = SMB_STRDUP(s);
831	const unsigned char p = (const unsigned char )s;
832	unsigned char q = (unsigned char )out_buffer;
833
834	if (!q) {
835	return NULL;
836	}
837
838	/* this is quite a common operation, so we want it to be
839	fast. We optimise for the ascii case, knowing that all our
840	supported multi-byte character sets are ascii-compatible
841	(ie. they match for the first 128 chars) */
842
843	while (*p) {
844	if (*p & 0x80)
845	break;
846	q++ = toupper_ascii_fast(p);
847	p++;
848	}
849
850	if (*p) {
851	/* MB case. */
852	size_t converted_size, converted_size2;
853	smb_ucs2_t *buffer = NULL;
854
855	SAFE_FREE(out_buffer);
856	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
857	strlen(s) + 1,
858	(void *)(void )&buffer,
859	&converted_size, True))
860	{
861	return NULL;
862	}
863
864	strupper_w(buffer);
865
866	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
867	converted_size,
868	(void *)(void )&out_buffer,
869	&converted_size2, True))
870	{
871	TALLOC_FREE(buffer);
872	return NULL;
873	}
874
875	/* Don't need the intermediate buffer
876	* anymore.
877	*/
878	TALLOC_FREE(buffer);
879	}
880
881	return out_buffer;
882	}
883
884	/**
885	talloc_strdup() a unix string to upper case.
886	**/
887
888	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
889	{
890	char *out_buffer = talloc_strdup(ctx,s);
891	const unsigned char p = (const unsigned char )s;
892	unsigned char q = (unsigned char )out_buffer;
893
894	if (!q) {
895	return NULL;
896	}
897
898	/* this is quite a common operation, so we want it to be
899	fast. We optimise for the ascii case, knowing that all our
900	supported multi-byte character sets are ascii-compatible
901	(ie. they match for the first 128 chars) */
902
903	while (*p) {
904	if (*p & 0x80)
905	break;
906	q++ = toupper_ascii_fast(p);
907	p++;
908	}
909
910	if (*p) {
911	/* MB case. */
912	size_t converted_size, converted_size2;
913	smb_ucs2_t *ubuf = NULL;
914
915	/* We're not using the ascii buffer above. */
916	TALLOC_FREE(out_buffer);
917
918	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
919	strlen(s)+1, (void *)&ubuf,
920	&converted_size, True))
921	{
922	return NULL;
923	}
924
925	strupper_w(ubuf);
926
927	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
928	converted_size, (void *)&out_buffer,
929	&converted_size2, True))
930	{
931	TALLOC_FREE(ubuf);
932	return NULL;
933	}
934
935	/* Don't need the intermediate buffer
936	* anymore.
937	*/
938	TALLOC_FREE(ubuf);
939	}
940
941	return out_buffer;
942	}
943
944	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
945	{
946	size_t size;
947	smb_ucs2_t *buffer = NULL;
948
949	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
950	(void *)(void )&buffer, &size,
951	True))
952	{
953	smb_panic("failed to create UCS2 buffer");
954	}
955	if (!strlower_w(buffer) && (dest == src)) {
956	SAFE_FREE(buffer);
957	return srclen;
958	}
959	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
960	SAFE_FREE(buffer);
961	return size;
962	}
963
964	/**
965	strdup() a unix string to lower case.
966	**/
967
968	char strdup_lower(const char s)
969	{
970	size_t converted_size;
971	smb_ucs2_t *buffer = NULL;
972	char *out_buffer;
973
974	if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
975	return NULL;
976	}
977
978	strlower_w(buffer);
979
980	if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
981	SAFE_FREE(buffer);
982	return NULL;
983	}
984
985	SAFE_FREE(buffer);
986
987	return out_buffer;
988	}
989
990	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
991	{
992	size_t converted_size;
993	smb_ucs2_t *buffer = NULL;
994	char *out_buffer;
995
996	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
997	return NULL;
998	}
999
1000	strlower_w(buffer);
1001
1002	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
1003	TALLOC_FREE(buffer);
1004	return NULL;
1005	}
1006
1007	TALLOC_FREE(buffer);
1008
1009	return out_buffer;
1010	}
1011
1012
1013	size_t ucs2_align(const void base_ptr, const void p, int flags)
1014	{
1015	if (flags & (STR_NOALIGN\|STR_ASCII))
1016	return 0;
1017	return PTR_DIFF(p, base_ptr) & 1;
1018	}
1019
1020
1021	/**
1022	* Copy a string from a char* unix src to a dos codepage string destination.
1023	*
1024	* @return the number of bytes occupied by the string in the destination.
1025	*
1026	* @param flags can include
1027	* <dl>
1028	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1029	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1030	* </dl>
1031	*
1032	* @param dest_len the maximum length in bytes allowed in the
1033	* destination.
1034	**/
1035	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1036	{
1037	size_t src_len = strlen(src);
1038	char *tmpbuf = NULL;
1039	size_t ret;
1040
1041	/* No longer allow a length of -1. */
1042	if (dest_len == (size_t)-1) {
1043	smb_panic("push_ascii - dest_len == -1");
1044	}
1045
1046	if (flags & STR_UPPER) {
1047	tmpbuf = SMB_STRDUP(src);
1048	if (!tmpbuf) {
1049	smb_panic("malloc fail");
1050	}
1051	strupper_m(tmpbuf);
1052	src = tmpbuf;
1053	}
1054
1055	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1056	src_len++;
1057	}
1058
1059	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1060	if (ret == (size_t)-1 &&
1061	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1062	&& dest_len > 0) {
1063	((char *)dest)[0] = '\0';
1064	}
1065	SAFE_FREE(tmpbuf);
1066	return ret;
1067	}
1068
1069	size_t push_ascii_fstring(void dest, const char src)
1070	{
1071	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1072	}
1073
1074	/********************************************************************
1075	Push an nstring - ensure null terminated. Written by
1076	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1077	********************************************************************/
1078
1079	size_t push_ascii_nstring(void dest, const char src)
1080	{
1081	size_t i, buffer_len, dest_len;
1082	smb_ucs2_t *buffer;
1083
1084	conv_silent = True;
1085	if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1086	smb_panic("failed to create UCS2 buffer");
1087	}
1088
1089	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1090	buffer_len /= sizeof(smb_ucs2_t);
1091
1092	dest_len = 0;
1093	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1094	unsigned char mb[10];
1095	/* Convert one smb_ucs2_t character at a time. */
1096	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1097	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1098	memcpy((char *)dest + dest_len, mb, mb_len);
1099	dest_len += mb_len;
1100	} else {
1101	errno = E2BIG;
1102	break;
1103	}
1104	}
1105	((char *)dest)[dest_len] = '\0';
1106
1107	SAFE_FREE(buffer);
1108	conv_silent = False;
1109	return dest_len;
1110	}
1111
1112	/********************************************************************
1113	Push and malloc an ascii string. src and dest null terminated.
1114	********************************************************************/
1115
1116	bool push_ascii_allocate(char *dest, const char src, size_t *converted_size)
1117	{
1118	size_t src_len = strlen(src)+1;
1119
1120	*dest = NULL;
1121	return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1122	(void **)dest, converted_size, True);
1123	}
1124
1125	/**
1126	* Copy a string from a dos codepage source to a unix char* destination.
1127	*
1128	* The resulting string in "dest" is always null terminated.
1129	*
1130	* @param flags can have:
1131	* <dl>
1132	* <dt>STR_TERMINATE</dt>
1133	* <dd>STR_TERMINATE means the string in @p src
1134	* is null terminated, and src_len is ignored.</dd>
1135	* </dl>
1136	*
1137	* @param src_len is the length of the source area in bytes.
1138	* @returns the number of bytes occupied by the string in @p src.
1139	**/
1140	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1141	{
1142	size_t ret;
1143
1144	if (dest_len == (size_t)-1) {
1145	/* No longer allow dest_len of -1. */
1146	smb_panic("pull_ascii - invalid dest_len of -1");
1147	}
1148
1149	if (flags & STR_TERMINATE) {
1150	if (src_len == (size_t)-1) {
1151	src_len = strlen((const char *)src) + 1;
1152	} else {
1153	size_t len = strnlen((const char *)src, src_len);
1154	if (len < src_len)
1155	len++;
1156	src_len = len;
1157	}
1158	}
1159
1160	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1161	if (ret == (size_t)-1) {
1162	ret = 0;
1163	dest_len = 0;
1164	}
1165
1166	if (dest_len && ret) {
1167	/* Did we already process the terminating zero ? */
1168	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1169	dest[MIN(ret, dest_len-1)] = 0;
1170	}
1171	} else {
1172	dest[0] = 0;
1173	}
1174
1175	return src_len;
1176	}
1177
1178	/**
1179	* Copy a string from a dos codepage source to a unix char* destination.
1180	Talloc version.
1181	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1182	needs fixing. JRA).
1183	*
1184	* The resulting string in "dest" is always null terminated.
1185	*
1186	* @param flags can have:
1187	* <dl>
1188	* <dt>STR_TERMINATE</dt>
1189	* <dd>STR_TERMINATE means the string in @p src
1190	* is null terminated, and src_len is ignored.</dd>
1191	* </dl>
1192	*
1193	* @param src_len is the length of the source area in bytes.
1194	* @returns the number of bytes occupied by the string in @p src.
1195	**/
1196
1197	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1198	char **ppdest,
1199	const void *src,
1200	size_t src_len,
1201	int flags)
1202	{
1203	char *dest = NULL;
1204	size_t dest_len;
1205
1206	#ifdef DEVELOPER
1207	/* Ensure we never use the braindead "malloc" varient. */
1208	if (ctx == NULL) {
1209	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1210	}
1211	#endif
1212
1213	*ppdest = NULL;
1214
1215	if (!src_len) {
1216	return 0;
1217	}
1218
1219	if (flags & STR_TERMINATE) {
1220	if (src_len == (size_t)-1) {
1221	src_len = strlen((const char *)src) + 1;
1222	} else {
1223	size_t len = strnlen((const char *)src, src_len);
1224	if (len < src_len)
1225	len++;
1226	src_len = len;
1227	}
1228	/* Ensure we don't use an insane length from the client. */
1229	if (src_len >= 1024*1024) {
1230	char *msg = talloc_asprintf(ctx,
1231	"Bad src length (%u) in "
1232	"pull_ascii_base_talloc",
1233	(unsigned int)src_len);
1234	smb_panic(msg);
1235	}
1236	} else {
1237	/* Can't have an unlimited length
1238	* non STR_TERMINATE'd.
1239	*/
1240	if (src_len == (size_t)-1) {
1241	errno = EINVAL;
1242	return 0;
1243	}
1244	}
1245
1246	/* src_len != -1 here. */
1247
1248	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1249	&dest_len, True)) {
1250	dest_len = 0;
1251	}
1252
1253	if (dest_len && dest) {
1254	/* Did we already process the terminating zero ? */
1255	if (dest[dest_len-1] != 0) {
1256	size_t size = talloc_get_size(dest);
1257	/* Have we got space to append the '\0' ? */
1258	if (size <= dest_len) {
1259	/* No, realloc. */
1260	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1261	dest_len+1);
1262	if (!dest) {
1263	/* talloc fail. */
1264	dest_len = (size_t)-1;
1265	return 0;
1266	}
1267	}
1268	/* Yay - space ! */
1269	dest[dest_len] = '\0';
1270	dest_len++;
1271	}
1272	} else if (dest) {
1273	dest[0] = 0;
1274	}
1275
1276	*ppdest = dest;
1277	return src_len;
1278	}
1279
1280	size_t pull_ascii_fstring(char dest, const void src)
1281	{
1282	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1283	}
1284
1285	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1286
1287	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1288	{
1289	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1290	}
1291
1292	/**
1293	* Copy a string from a char* src to a unicode destination.
1294	*
1295	* @returns the number of bytes occupied by the string in the destination.
1296	*
1297	* @param flags can have:
1298	*
1299	* <dl>
1300	* <dt>STR_TERMINATE <dd>means include the null termination.
1301	* <dt>STR_UPPER <dd>means uppercase in the destination.
1302	* <dt>STR_NOALIGN <dd>means don't do alignment.
1303	* </dl>
1304	*
1305	* @param dest_len is the maximum length allowed in the
1306	* destination.
1307	**/
1308
1309	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1310	{
1311	size_t len=0;
1312	size_t src_len;
1313	size_t ret;
1314
1315	if (dest_len == (size_t)-1) {
1316	/* No longer allow dest_len of -1. */
1317	smb_panic("push_ucs2 - invalid dest_len of -1");
1318	}
1319
1320	if (flags & STR_TERMINATE)
1321	src_len = (size_t)-1;
1322	else
1323	src_len = strlen(src);
1324
1325	if (ucs2_align(base_ptr, dest, flags)) {
1326	(char )dest = 0;
1327	dest = (void )((char )dest + 1);
1328	if (dest_len)
1329	dest_len--;
1330	len++;
1331	}
1332
1333	/* ucs2 is always a multiple of 2 bytes */
1334	dest_len &= ~1;
1335
1336	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1337	if (ret == (size_t)-1) {
1338	if ((flags & STR_TERMINATE) &&
1339	dest &&
1340	dest_len) {
1341	(char )dest = 0;
1342	}
1343	return len;
1344	}
1345
1346	len += ret;
1347
1348	if (flags & STR_UPPER) {
1349	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1350	size_t i;
1351
1352	/* We check for i < (ret / 2) below as the dest string isn't null
1353	terminated if STR_TERMINATE isn't set. */
1354
1355	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1356	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1357	if (v != dest_ucs2[i]) {
1358	dest_ucs2[i] = v;
1359	}
1360	}
1361	}
1362
1363	return len;
1364	}
1365
1366
1367	/**
1368	* Copy a string from a unix char* src to a UCS2 destination,
1369	* allocating a buffer using talloc().
1370	*
1371	* @param dest always set at least to NULL
1372	* @parm converted_size set to the number of bytes occupied by the string in
1373	* the destination on success.
1374	*
1375	* @return true if new buffer was correctly allocated, and string was
1376	* converted.
1377	**/
1378	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1379	size_t *converted_size)
1380	{
1381	size_t src_len = strlen(src)+1;
1382
1383	*dest = NULL;
1384	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1385	(void **)dest, converted_size, True);
1386	}
1387
1388
1389	/**
1390	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1391	*
1392	* @param dest always set at least to NULL
1393	* @parm converted_size set to the number of bytes occupied by the string in
1394	* the destination on success.
1395	*
1396	* @return true if new buffer was correctly allocated, and string was
1397	* converted.
1398	**/
1399
1400	bool push_ucs2_allocate(smb_ucs2_t *dest, const char src,
1401	size_t *converted_size)
1402	{
1403	size_t src_len = strlen(src)+1;
1404
1405	*dest = NULL;
1406	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1407	(void **)dest, converted_size, True);
1408	}
1409
1410	/**
1411	Copy a string from a char* src to a UTF-8 destination.
1412	Return the number of bytes occupied by the string in the destination
1413	Flags can have:
1414	STR_TERMINATE means include the null termination
1415	STR_UPPER means uppercase in the destination
1416	dest_len is the maximum length allowed in the destination. If dest_len
1417	is -1 then no maxiumum is used.
1418	**/
1419
1420	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1421	{
1422	size_t src_len = 0;
1423	size_t ret;
1424	char *tmpbuf = NULL;
1425
1426	if (dest_len == (size_t)-1) {
1427	/* No longer allow dest_len of -1. */
1428	smb_panic("push_utf8 - invalid dest_len of -1");
1429	}
1430
1431	if (flags & STR_UPPER) {
1432	tmpbuf = strdup_upper(src);
1433	if (!tmpbuf) {
1434	return (size_t)-1;
1435	}
1436	src = tmpbuf;
1437	src_len = strlen(src);
1438	}
1439
1440	src_len = strlen(src);
1441	if (flags & STR_TERMINATE) {
1442	src_len++;
1443	}
1444
1445	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1446	SAFE_FREE(tmpbuf);
1447	return ret;
1448	}
1449
1450	size_t push_utf8_fstring(void dest, const char src)
1451	{
1452	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1453	}
1454
1455	/**
1456	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1457	*
1458	* @param dest always set at least to NULL
1459	* @parm converted_size set to the number of bytes occupied by the string in
1460	* the destination on success.
1461	*
1462	* @return true if new buffer was correctly allocated, and string was
1463	* converted.
1464	**/
1465
1466	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1467	size_t *converted_size)
1468	{
1469	size_t src_len = strlen(src)+1;
1470
1471	*dest = NULL;
1472	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1473	(void**)dest, converted_size, True);
1474	}
1475
1476	/**
1477	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1478	*
1479	* @param dest always set at least to NULL
1480	* @parm converted_size set to the number of bytes occupied by the string in
1481	* the destination on success.
1482	*
1483	* @return true if new buffer was correctly allocated, and string was
1484	* converted.
1485	**/
1486
1487	bool push_utf8_allocate(char *dest, const char src, size_t *converted_size)
1488	{
1489	size_t src_len = strlen(src)+1;
1490
1491	*dest = NULL;
1492	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1493	(void **)dest, converted_size, True);
1494	}
1495
1496	/**
1497	Copy a string from a ucs2 source to a unix char* destination.
1498	Flags can have:
1499	STR_TERMINATE means the string in src is null terminated.
1500	STR_NOALIGN means don't try to align.
1501	if STR_TERMINATE is set then src_len is ignored if it is -1.
1502	src_len is the length of the source area in bytes
1503	Return the number of bytes occupied by the string in src.
1504	The resulting string in "dest" is always null terminated.
1505	**/
1506
1507	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1508	{
1509	size_t ret;
1510
1511	if (dest_len == (size_t)-1) {
1512	/* No longer allow dest_len of -1. */
1513	smb_panic("pull_ucs2 - invalid dest_len of -1");
1514	}
1515
1516	if (!src_len) {
1517	if (dest && dest_len > 0) {
1518	dest[0] = '\0';
1519	}
1520	return 0;
1521	}
1522
1523	if (ucs2_align(base_ptr, src, flags)) {
1524	src = (const void )((const char )src + 1);
1525	if (src_len != (size_t)-1)
1526	src_len--;
1527	}
1528
1529	if (flags & STR_TERMINATE) {
1530	/* src_len -1 is the default for null terminated strings. */
1531	if (src_len != (size_t)-1) {
1532	size_t len = strnlen_w((const smb_ucs2_t *)src,
1533	src_len/2);
1534	if (len < src_len/2)
1535	len++;
1536	src_len = len*2;
1537	}
1538	}
1539
1540	/* ucs2 is always a multiple of 2 bytes */
1541	if (src_len != (size_t)-1)
1542	src_len &= ~1;
1543
1544	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1545	if (ret == (size_t)-1) {
1546	ret = 0;
1547	dest_len = 0;
1548	}
1549
1550	if (src_len == (size_t)-1)
1551	src_len = ret*2;
1552
1553	if (dest_len && ret) {
1554	/* Did we already process the terminating zero ? */
1555	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1556	dest[MIN(ret, dest_len-1)] = 0;
1557	}
1558	} else {
1559	dest[0] = 0;
1560	}
1561
1562	return src_len;
1563	}
1564
1565	/**
1566	Copy a string from a ucs2 source to a unix char* destination.
1567	Talloc version with a base pointer.
1568	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1569	needs fixing. JRA).
1570	Flags can have:
1571	STR_TERMINATE means the string in src is null terminated.
1572	STR_NOALIGN means don't try to align.
1573	if STR_TERMINATE is set then src_len is ignored if it is -1.
1574	src_len is the length of the source area in bytes
1575	Return the number of bytes occupied by the string in src.
1576	The resulting string in "dest" is always null terminated.
1577	**/
1578
1579	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1580	const void *base_ptr,
1581	char **ppdest,
1582	const void *src,
1583	size_t src_len,
1584	int flags)
1585	{
1586	char *dest;
1587	size_t dest_len;
1588
1589	*ppdest = NULL;
1590
1591	#ifdef DEVELOPER
1592	/* Ensure we never use the braindead "malloc" varient. */
1593	if (ctx == NULL) {
1594	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1595	}
1596	#endif
1597
1598	if (!src_len) {
1599	return 0;
1600	}
1601
1602	if (ucs2_align(base_ptr, src, flags)) {
1603	src = (const void )((const char )src + 1);
1604	if (src_len != (size_t)-1)
1605	src_len--;
1606	}
1607
1608	if (flags & STR_TERMINATE) {
1609	/* src_len -1 is the default for null terminated strings. */
1610	if (src_len != (size_t)-1) {
1611	size_t len = strnlen_w((const smb_ucs2_t *)src,
1612	src_len/2);
1613	if (len < src_len/2)
1614	len++;
1615	src_len = len*2;
1616	} else {
1617	/*
1618	* src_len == -1 - alloc interface won't take this
1619	* so we must calculate.
1620	*/
1621	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1622	}
1623	/* Ensure we don't use an insane length from the client. */
1624	if (src_len >= 1024*1024) {
1625	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1626	}
1627	} else {
1628	/* Can't have an unlimited length
1629	* non STR_TERMINATE'd.
1630	*/
1631	if (src_len == (size_t)-1) {
1632	errno = EINVAL;
1633	return 0;
1634	}
1635	}
1636
1637	/* src_len != -1 here. */
1638
1639	/* ucs2 is always a multiple of 2 bytes */
1640	src_len &= ~1;
1641
1642	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1643	(void *)&dest, &dest_len, True)) {
1644	dest_len = 0;
1645	}
1646
1647	if (dest_len) {
1648	/* Did we already process the terminating zero ? */
1649	if (dest[dest_len-1] != 0) {
1650	size_t size = talloc_get_size(dest);
1651	/* Have we got space to append the '\0' ? */
1652	if (size <= dest_len) {
1653	/* No, realloc. */
1654	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1655	dest_len+1);
1656	if (!dest) {
1657	/* talloc fail. */
1658	dest_len = (size_t)-1;
1659	return 0;
1660	}
1661	}
1662	/* Yay - space ! */
1663	dest[dest_len] = '\0';
1664	dest_len++;
1665	}
1666	} else if (dest) {
1667	dest[0] = 0;
1668	}
1669
1670	*ppdest = dest;
1671	return src_len;
1672	}
1673
1674	size_t pull_ucs2_fstring(char dest, const void src)
1675	{
1676	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1677	}
1678
1679	/**
1680	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1681	*
1682	* @param dest always set at least to NULL
1683	* @parm converted_size set to the number of bytes occupied by the string in
1684	* the destination on success.
1685	*
1686	* @return true if new buffer was correctly allocated, and string was
1687	* converted.
1688	**/
1689
1690	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1691	size_t *converted_size)
1692	{
1693	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1694
1695	*dest = NULL;
1696	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1697	(void **)dest, converted_size, True);
1698	}
1699
1700	/**
1701	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1702	*
1703	* @param dest always set at least to NULL
1704	* @parm converted_size set to the number of bytes occupied by the string in
1705	* the destination on success.
1706	* @return true if new buffer was correctly allocated, and string was
1707	* converted.
1708	**/
1709
1710	bool pull_ucs2_allocate(char *dest, const smb_ucs2_t src,
1711	size_t *converted_size)
1712	{
1713	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1714
1715	*dest = NULL;
1716	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1717	(void **)dest, converted_size, True);
1718	}
1719
1720	/**
1721	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1722	*
1723	* @param dest always set at least to NULL
1724	* @parm converted_size set to the number of bytes occupied by the string in
1725	* the destination on success.
1726	*
1727	* @return true if new buffer was correctly allocated, and string was
1728	* converted.
1729	**/
1730
1731	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1732	size_t *converted_size)
1733	{
1734	size_t src_len = strlen(src)+1;
1735
1736	*dest = NULL;
1737	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1738	(void **)dest, converted_size, True);
1739	}
1740
1741	/**
1742	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1743	*
1744	* @param dest always set at least to NULL
1745	* @parm converted_size set to the number of bytes occupied by the string in
1746	* the destination on success.
1747	*
1748	* @return true if new buffer was correctly allocated, and string was
1749	* converted.
1750	**/
1751
1752	bool pull_utf8_allocate(char *dest, const char src, size_t *converted_size)
1753	{
1754	size_t src_len = strlen(src)+1;
1755
1756	*dest = NULL;
1757	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1758	(void **)dest, converted_size, True);
1759	}
1760
1761	/**
1762	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1763	*
1764	* @param dest always set at least to NULL
1765	* @parm converted_size set to the number of bytes occupied by the string in
1766	* the destination on success.
1767	*
1768	* @return true if new buffer was correctly allocated, and string was
1769	* converted.
1770	**/
1771
1772	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1773	size_t *converted_size)
1774	{
1775	size_t src_len = strlen(src)+1;
1776
1777	*dest = NULL;
1778	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1779	(void **)dest, converted_size, True);
1780	}
1781
1782	/**
1783	Copy a string from a char* src to a unicode or ascii
1784	dos codepage destination choosing unicode or ascii based on the
1785	flags in the SMB buffer starting at base_ptr.
1786	Return the number of bytes occupied by the string in the destination.
1787	flags can have:
1788	STR_TERMINATE means include the null termination.
1789	STR_UPPER means uppercase in the destination.
1790	STR_ASCII use ascii even with unicode packet.
1791	STR_NOALIGN means don't do alignment.
1792	dest_len is the maximum length allowed in the destination. If dest_len
1793	is -1 then no maxiumum is used.
1794	**/
1795
1796	size_t push_string_fn(const char *function, unsigned int line,
1797	const void *base_ptr, uint16 flags2,
1798	void dest, const char src,
1799	size_t dest_len, int flags)
1800	{
1801	#ifdef DEVELOPER
1802	/* We really need to zero fill here, not clobber
1803	* region, as we want to ensure that valgrind thinks
1804	* all of the outgoing buffer has been written to
1805	* so a send() or write() won't trap an error.
1806	* JRA.
1807	*/
1808	#if 0
1809	clobber_region(function, line, dest, dest_len);
1810	#else
1811	memset(dest, '\0', dest_len);
1812	#endif
1813	#endif
1814
1815	if (!(flags & STR_ASCII) && \
1816	((flags & STR_UNICODE \|\| \
1817	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1818	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1819	}
1820	return push_ascii(dest, src, dest_len, flags);
1821	}
1822
1823
1824	/**
1825	Copy a string from a unicode or ascii source (depending on
1826	the packet flags) to a char* destination.
1827	Flags can have:
1828	STR_TERMINATE means the string in src is null terminated.
1829	STR_UNICODE means to force as unicode.
1830	STR_ASCII use ascii even with unicode packet.
1831	STR_NOALIGN means don't do alignment.
1832	if STR_TERMINATE is set then src_len is ignored is it is -1
1833	src_len is the length of the source area in bytes.
1834	Return the number of bytes occupied by the string in src.
1835	The resulting string in "dest" is always null terminated.
1836	**/
1837
1838	size_t pull_string_fn(const char *function,
1839	unsigned int line,
1840	const void *base_ptr,
1841	uint16 smb_flags2,
1842	char *dest,
1843	const void *src,
1844	size_t dest_len,
1845	size_t src_len,
1846	int flags)
1847	{
1848	#ifdef DEVELOPER
1849	clobber_region(function, line, dest, dest_len);
1850	#endif
1851
1852	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1853	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1854	"UNICODE defined");
1855	}
1856
1857	if (!(flags & STR_ASCII) && \
1858	((flags & STR_UNICODE \|\| \
1859	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1860	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1861	}
1862	return pull_ascii(dest, src, dest_len, src_len, flags);
1863	}
1864
1865	/**
1866	Copy a string from a unicode or ascii source (depending on
1867	the packet flags) to a char* destination.
1868	Variant that uses talloc.
1869	Flags can have:
1870	STR_TERMINATE means the string in src is null terminated.
1871	STR_UNICODE means to force as unicode.
1872	STR_ASCII use ascii even with unicode packet.
1873	STR_NOALIGN means don't do alignment.
1874	if STR_TERMINATE is set then src_len is ignored is it is -1
1875	src_len is the length of the source area in bytes.
1876	Return the number of bytes occupied by the string in src.
1877	The resulting string in "dest" is always null terminated.
1878	**/
1879
1880	size_t pull_string_talloc_fn(const char *function,
1881	unsigned int line,
1882	TALLOC_CTX *ctx,
1883	const void *base_ptr,
1884	uint16 smb_flags2,
1885	char **ppdest,
1886	const void *src,
1887	size_t src_len,
1888	int flags)
1889	{
1890	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1891	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1892	"UNICODE defined");
1893	}
1894
1895	if (!(flags & STR_ASCII) && \
1896	((flags & STR_UNICODE \|\| \
1897	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1898	return pull_ucs2_base_talloc(ctx,
1899	base_ptr,
1900	ppdest,
1901	src,
1902	src_len,
1903	flags);
1904	}
1905	return pull_ascii_base_talloc(ctx,
1906	ppdest,
1907	src,
1908	src_len,
1909	flags);
1910	}
1911
1912
1913	size_t align_string(const void base_ptr, const char p, int flags)
1914	{
1915	if (!(flags & STR_ASCII) && \
1916	((flags & STR_UNICODE \|\| \
1917	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1918	return ucs2_align(base_ptr, p, flags);
1919	}
1920	return 0;
1921	}
1922
1923	/*
1924	Return the unicode codepoint for the next multi-byte CH_UNIX character
1925	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1926
1927	Also return the number of bytes consumed (which tells the caller
1928	how many bytes to skip to get to the next CH_UNIX character).
1929
1930	Return INVALID_CODEPOINT if the next character cannot be converted.
1931	*/
1932
1933	codepoint_t next_codepoint(const char str, size_t size)
1934	{
1935	/* It cannot occupy more than 4 bytes in UTF16 format */
1936	uint8_t buf[4];
1937	smb_iconv_t descriptor;
1938	#ifdef __OS2__
1939	size_t ilen_max;
1940	size_t olen_orig;
1941	const char *inbuf;
1942	#endif
1943	size_t ilen_orig;
1944	size_t ilen;
1945	size_t olen;
1946
1947	char *outbuf;
1948
1949	#ifdef __OS2__
1950	*size = 1;
1951	#endif
1952
1953	if ((str[0] & 0x80) == 0) {
1954	#ifndef __OS2__
1955	*size = 1;
1956	#endif
1957	return (codepoint_t)str[0];
1958	}
1959
1960	lazy_initialize_conv();
1961
1962	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1963	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1964	#ifndef __OS2__
1965	*size = 1;
1966	#endif
1967	return INVALID_CODEPOINT;
1968	}
1969	#ifdef __OS2__
1970	/* We assume that no multi-byte character can take
1971	more than 5 bytes. This is OK as we only
1972	support codepoints up to 1M */
1973
1974	ilen_max = strnlen( str, 5 );
1975	#else
1976	*size = 1;
1977	#endif
1978	ilen_orig = 1;
1979	olen_orig = 2;
1980	while( 1 )
1981	{
1982	ilen = ilen_orig;
1983	olen = olen_orig;
1984	inbuf = str;
1985	outbuf = ( char * )buf;
1986	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1987	break;
1988
1989	switch( errno )
1990	{
1991	case E2BIG :
1992	if( olen_orig == 2 )
1993	olen_orig = 4;
1994	else
1995	return INVALID_CODEPOINT;
1996	break;
1997
1998
1999	case EINVAL :
2000	#ifndef __OS2__
2001	/* We assume that no multi-byte character can take
2002	more than 5 bytes. This is OK as we only
2003	support codepoints up to 1M */
2004	if( ilen_orig < 5 )
2005	#else
2006	if( ilen_orig < ilen_max )
2007	#endif
2008	ilen_orig++;
2009	else
2010	return INVALID_CODEPOINT;
2011	break;
2012
2013	case EILSEQ :
2014	default :
2015	return INVALID_CODEPOINT;
2016	}
2017	}
2018
2019	olen = olen_orig - olen;
2020
2021	*size = ilen_orig - ilen;
2022
2023	if (olen == 2) {
2024	/* 2 byte, UTF16-LE encoded value. */
2025	return (codepoint_t)SVAL(buf, 0);
2026	}
2027	if (olen == 4) {
2028	/* Decode a 4 byte UTF16-LE character manually.
2029	See RFC2871 for the encoding machanism.
2030	*/
2031	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2032	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2033
2034	return (codepoint_t)0x10000 +
2035	(w1 << 10) + w2;
2036	}
2037
2038	/* no other length is valid */
2039	return INVALID_CODEPOINT;
2040	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: