Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: branches/samba-3.3.x/source/lib/charcnv33.c@ 374

Visit:

Last change on this file since 374 was 374, checked in by Herwig Bauernfeind, 16 years ago
Update Samba 3.3 to 3.3.10 (new files)
File size: 51.2 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	if (ch == CH_UTF16LE) ret = "UTF-16LE";
59	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60	else if (ch == CH_UNIX) ret = lp_unix_charset();
61	else if (ch == CH_DOS) ret = lp_dos_charset();
62	else if (ch == CH_DISPLAY) ret = lp_display_charset();
63	else if (ch == CH_UTF8) ret = "UTF8";
64
65	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66	if (ret && !strcmp(ret, "LOCALE")) {
67	const char *ln = NULL;
68
69	#ifdef HAVE_SETLOCALE
70	setlocale(LC_ALL, "");
71	#endif
72	ln = nl_langinfo(CODESET);
73	if (ln) {
74	/* Check whether the charset name is supported
75	by iconv */
76	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77	if (handle == (smb_iconv_t) -1) {
78	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79	ln = NULL;
80	} else {
81	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82	smb_iconv_close(handle);
83	}
84	}
85	ret = ln;
86	}
87	#endif
88
89	if (!ret \|\| !*ret) ret = "ASCII";
90	return ret;
91	}
92
93	void lazy_initialize_conv(void)
94	{
95	if (!initialized) {
96	load_case_tables();
97	init_iconv();
98	initialized = true;
99	}
100	}
101
102	/**
103	* Destroy global objects allocated by init_iconv()
104	**/
105	void gfree_charcnv(void)
106	{
107	int c1, c2;
108
109	for (c1=0;c1<NUM_CHARSETS;c1++) {
110	for (c2=0;c2<NUM_CHARSETS;c2++) {
111	if ( conv_handles[c1][c2] ) {
112	smb_iconv_close( conv_handles[c1][c2] );
113	conv_handles[c1][c2] = 0;
114	}
115	}
116	}
117	initialized = false;
118	}
119
120	/**
121	* Initialize iconv conversion descriptors.
122	*
123	* This is called the first time it is needed, and also called again
124	* every time the configuration is reloaded, because the charset or
125	* codepage might have changed.
126	**/
127	void init_iconv(void)
128	{
129	int c1, c2;
130	bool did_reload = False;
131
132	/* so that charset_name() works we need to get the UNIX<->UCS2 going
133	first */
134	if (!conv_handles[CH_UNIX][CH_UTF16LE])
135	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
136
137	if (!conv_handles[CH_UTF16LE][CH_UNIX])
138	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
139
140	for (c1=0;c1<NUM_CHARSETS;c1++) {
141	for (c2=0;c2<NUM_CHARSETS;c2++) {
142	const char *n1 = charset_name((charset_t)c1);
143	const char *n2 = charset_name((charset_t)c2);
144	if (conv_handles[c1][c2] &&
145	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
146	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
147	continue;
148
149	did_reload = True;
150
151	if (conv_handles[c1][c2])
152	smb_iconv_close(conv_handles[c1][c2]);
153
154	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
155	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
156	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
157	charset_name((charset_t)c1), charset_name((charset_t)c2)));
158	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
159	n1 = "ASCII";
160	}
161	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
162	n2 = "ASCII";
163	}
164	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
165	n1, n2 ));
166	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
167	if (!conv_handles[c1][c2]) {
168	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
169	smb_panic("init_iconv: conv_handle initialization failed");
170	}
171	}
172	}
173	}
174
175	if (did_reload) {
176	/* XXX: Does this really get called every time the dos
177	* codepage changes? */
178	/* XXX: Is the did_reload test too strict? */
179	conv_silent = True;
180	init_valid_table();
181	conv_silent = False;
182	}
183	}
184
185	/**
186	* Convert string from one encoding to another, making error checking etc
187	* Slow path version - uses (slow) iconv.
188	*
189	* @param src pointer to source string (multibyte or singlebyte)
190	* @param srclen length of the source string in bytes
191	* @param dest pointer to destination string (multibyte or singlebyte)
192	* @param destlen maximal length allowed for string
193	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
194	* @returns the number of bytes occupied in the destination
195	*
196	* Ensure the srclen contains the terminating zero.
197	*
198	**/
199
200	static size_t convert_string_internal(charset_t from, charset_t to,
201	void const *src, size_t srclen,
202	void *dest, size_t destlen, bool allow_bad_conv)
203	{
204	size_t i_len, o_len;
205	size_t retval;
206	const char* inbuf = (const char*)src;
207	char* outbuf = (char*)dest;
208	smb_iconv_t descriptor;
209
210	lazy_initialize_conv();
211
212	descriptor = conv_handles[from][to];
213
214	if (srclen == (size_t)-1) {
215	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
216	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
217	} else {
218	srclen = strlen((const char *)src)+1;
219	}
220	}
221
222
223	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
224	if (!conv_silent)
225	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
226	return (size_t)-1;
227	}
228
229	i_len=srclen;
230	o_len=destlen;
231
232	again:
233
234	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
235	if(retval==(size_t)-1) {
236	const char *reason="unknown error";
237	switch(errno) {
238	case EINVAL:
239	reason="Incomplete multibyte sequence";
240	if (!conv_silent)
241	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
242	if (allow_bad_conv)
243	goto use_as_is;
244	return (size_t)-1;
245	case E2BIG:
246	reason="No more room";
247	if (!conv_silent) {
248	if (from == CH_UNIX) {
249	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
250	charset_name(from), charset_name(to),
251	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
252	} else {
253	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
254	charset_name(from), charset_name(to),
255	(unsigned int)srclen, (unsigned int)destlen));
256	}
257	}
258	break;
259	case EILSEQ:
260	reason="Illegal multibyte sequence";
261	if (!conv_silent)
262	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263	if (allow_bad_conv)
264	goto use_as_is;
265
266	return (size_t)-1;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	return (size_t)-1;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	if (ret == (size_t)-1) {
417	return ret;
418	}
419	return retval + ret;
420	#endif
421	}
422	}
423	if (!dlen) {
424	/* Even if we fast path we should note if we ran out of room. */
425	if (((slen != (size_t)-1) && slen) \|\|
426	((slen == (size_t)-1) && lastp)) {
427	errno = E2BIG;
428	}
429	}
430	return retval;
431	// DEBUG(10, ("convert_string: 3"));
432
433	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
434	const unsigned char p = (const unsigned char )src;
435	unsigned char q = (unsigned char )dest;
436	size_t retval = 0;
437	size_t slen = srclen;
438	size_t dlen = destlen;
439	unsigned char lastp = '\0';
440
441	/* If all characters are ascii, fast path here. */
442	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
443	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
444	q++ = p;
445	if (slen != (size_t)-1) {
446	slen -= 2;
447	}
448	p += 2;
449	dlen--;
450	retval++;
451	if (!lastp)
452	break;
453	} else {
454	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
455	goto general_case;
456	#else
457	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
458	if (ret == (size_t)-1) {
459	return ret;
460	}
461	return retval + ret;
462	#endif
463	}
464	}
465	if (!dlen) {
466	/* Even if we fast path we should note if we ran out of room. */
467	if (((slen != (size_t)-1) && slen) \|\|
468	((slen == (size_t)-1) && lastp)) {
469	errno = E2BIG;
470	}
471	}
472	return retval;
473
474	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
475	const unsigned char p = (const unsigned char )src;
476	unsigned char q = (unsigned char )dest;
477	size_t retval = 0;
478	size_t slen = srclen;
479	size_t dlen = destlen;
480	unsigned char lastp = '\0';
481
482	/* If all characters are ascii, fast path here. */
483	while (slen && (dlen >= 2)) {
484	if ((lastp = *p) <= 0x7F) {
485	q++ = p++;
486	*q++ = '\0';
487	if (slen != (size_t)-1) {
488	slen--;
489	}
490	dlen -= 2;
491	retval += 2;
492	if (!lastp)
493	break;
494	} else {
495	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
496	goto general_case;
497	#else
498	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
499	if (ret == (size_t)-1) {
500	return ret;
501	}
502	return retval + ret;
503	#endif
504	}
505	}
506	if (!dlen) {
507	/* Even if we fast path we should note if we ran out of room. */
508	if (((slen != (size_t)-1) && slen) \|\|
509	((slen == (size_t)-1) && lastp)) {
510	errno = E2BIG;
511	}
512	}
513	return retval;
514	}
515
516	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517	general_case:
518	#endif
519	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
520	}
521
522	/**
523	* Convert between character sets, allocating a new buffer for the result.
524	*
525	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
526	* (this is a bad interface and needs fixing. JRA).
527	* @param srclen length of source buffer.
528	* @param dest always set at least to NULL
529	* @param converted_size set to the size of the allocated buffer on return
530	* true
531	* @note -1 is not accepted for srclen.
532	*
533	* @return true if new buffer was correctly allocated, and string was
534	* converted.
535	*
536	* Ensure the srclen contains the terminating zero.
537	*
538	* I hate the goto's in this function. It's embarressing.....
539	* There has to be a cleaner way to do this. JRA.
540	**/
541
542	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
543	void const src, size_t srclen, void dst,
544	size_t *converted_size, bool allow_bad_conv)
545	{
546	size_t i_len, o_len, destlen = (srclen * 3) / 2;
547	size_t retval;
548	const char inbuf = (const char )src;
549	char outbuf = NULL, ob = NULL;
550	smb_iconv_t descriptor;
551	void dest = (void )dst;
552
553	*dest = NULL;
554
555	if (!converted_size) {
556	errno = EINVAL;
557	return false;
558	}
559
560	if (src == NULL \|\| srclen == (size_t)-1) {
561	errno = EINVAL;
562	return false;
563	}
564	if (srclen == 0) {
565	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
566	if (ob == NULL) {
567	errno = ENOMEM;
568	return false;
569	}
570	*dest = ob;
571	*converted_size = 0;
572	return true;
573	}
574
575	lazy_initialize_conv();
576
577	descriptor = conv_handles[from][to];
578
579	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
580	if (!conv_silent)
581	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
582	errno = EOPNOTSUPP;
583	return false;
584	}
585
586	convert:
587
588	/* +2 is for ucs2 null termination. */
589	if ((destlen*2)+2 < destlen) {
590	/* wrapped ! abort. */
591	if (!conv_silent)
592	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
593	if (!ctx)
594	SAFE_FREE(outbuf);
595	errno = EOPNOTSUPP;
596	return false;
597	} else {
598	destlen = destlen * 2;
599	}
600
601	/* +2 is for ucs2 null termination. */
602	if (ctx) {
603	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
604	} else {
605	ob = (char *)SMB_REALLOC(ob, destlen + 2);
606	}
607
608	if (!ob) {
609	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
610	errno = ENOMEM;
611	return false;
612	}
613	outbuf = ob;
614	i_len = srclen;
615	o_len = destlen;
616
617	again:
618
619
620	retval = smb_iconv(descriptor,
621	&inbuf, &i_len,
622	&outbuf, &o_len);
623	if(retval == (size_t)-1) {
624	const char *reason="unknown error";
625	switch(errno) {
626	case EINVAL:
627	reason="Incomplete multibyte sequence";
628	if (!conv_silent)
629	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
630	if (allow_bad_conv)
631	goto use_as_is;
632	break;
633	case E2BIG:
634	goto convert;
635	case EILSEQ:
636	reason="Illegal multibyte sequence";
637	if (!conv_silent)
638	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
639	if (allow_bad_conv)
640	goto use_as_is;
641	break;
642	}
643	if (!conv_silent)
644	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
645	/* smb_panic(reason); */
646	if (ctx) {
647	TALLOC_FREE(ob);
648	} else {
649	SAFE_FREE(ob);
650	}
651	return false;
652	}
653
654	out:
655
656	destlen = destlen - o_len;
657	/* Don't shrink unless we're reclaiming a lot of
658	* space. This is in the hot codepath and these
659	* reallocs cost. JRA.
660	*/
661	if (o_len > 1024) {
662	/* We're shrinking here so we know the +2 is safe from wrap. */
663	if (ctx) {
664	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
665	} else {
666	ob = (char *)SMB_REALLOC(ob,destlen + 2);
667	}
668	}
669
670	if (destlen && !ob) {
671	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
672	errno = ENOMEM;
673	return false;
674	}
675
676	*dest = ob;
677
678	/* Must ucs2 null terminate in the extra space we allocated. */
679	ob[destlen] = '\0';
680	ob[destlen+1] = '\0';
681
682	*converted_size = destlen;
683	return true;
684
685	use_as_is:
686
687	/*
688	* Conversion not supported. This is actually an error, but there are so
689	* many misconfigured iconv systems and smb.conf's out there we can't just
690	* fail. Do a very bad conversion instead.... JRA.
691	*/
692
693	{
694	if (o_len == 0 \|\| i_len == 0)
695	goto out;
696
697	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
698	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
699	/* Can't convert from utf16 any endian to multibyte.
700	Replace with the default fail char.
701	*/
702
703	if (i_len < 2)
704	goto out;
705
706	if (i_len >= 2) {
707	*outbuf = lp_failed_convert_char();
708
709	outbuf++;
710	o_len--;
711
712	inbuf += 2;
713	i_len -= 2;
714	}
715
716	if (o_len == 0 \|\| i_len == 0)
717	goto out;
718
719	/* Keep trying with the next char... */
720	goto again;
721
722	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
723	/* Can't convert to UTF16LE - just widen by adding the
724	default fail char then zero.
725	*/
726	if (o_len < 2)
727	goto out;
728
729	outbuf[0] = lp_failed_convert_char();
730	outbuf[1] = '\0';
731
732	inbuf++;
733	i_len--;
734
735	outbuf += 2;
736	o_len -= 2;
737
738	if (o_len == 0 \|\| i_len == 0)
739	goto out;
740
741	/* Keep trying with the next char... */
742	goto again;
743
744	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
745	to != CH_UTF16LE && to != CH_UTF16BE) {
746	/* Failed multibyte to multibyte. Just copy the default fail char and
747	try again. */
748	outbuf[0] = lp_failed_convert_char();
749
750	inbuf++;
751	i_len--;
752
753	outbuf++;
754	o_len--;
755
756	if (o_len == 0 \|\| i_len == 0)
757	goto out;
758
759	/* Keep trying with the next char... */
760	goto again;
761
762	} else {
763	/* Keep compiler happy.... */
764	goto out;
765	}
766	}
767	}
768
769	/**
770	* Convert between character sets, allocating a new buffer using talloc for the result.
771	*
772	* @param srclen length of source buffer.
773	* @param dest always set at least to NULL
774	* @parm converted_size set to the number of bytes occupied by the string in
775	* the destination on success.
776	* @note -1 is not accepted for srclen.
777	*
778	* @return true if new buffer was correctly allocated, and string was
779	* converted.
780	*/
781	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
782	void const src, size_t srclen, void dst,
783	size_t *converted_size, bool allow_bad_conv)
784	{
785	void dest = (void )dst;
786
787	*dest = NULL;
788	return convert_string_allocate(ctx, from, to, src, srclen, dest,
789	converted_size, allow_bad_conv);
790	}
791
792	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
793	{
794	size_t size;
795	smb_ucs2_t *buffer;
796
797	if (!push_ucs2_allocate(&buffer, src, &size)) {
798	return (size_t)-1;
799	}
800
801	if (!strupper_w(buffer) && (dest == src)) {
802	free(buffer);
803	return srclen;
804	}
805
806	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
807	free(buffer);
808	return size;
809	}
810
811	/**
812	strdup() a unix string to upper case.
813	**/
814
815	char strdup_upper(const char s)
816	{
817	char *out_buffer = SMB_STRDUP(s);
818	const unsigned char p = (const unsigned char )s;
819	unsigned char q = (unsigned char )out_buffer;
820
821	if (!q) {
822	return NULL;
823	}
824
825	/* this is quite a common operation, so we want it to be
826	fast. We optimise for the ascii case, knowing that all our
827	supported multi-byte character sets are ascii-compatible
828	(ie. they match for the first 128 chars) */
829
830	while (*p) {
831	if (*p & 0x80)
832	break;
833	q++ = toupper_ascii_fast(p);
834	p++;
835	}
836
837	if (*p) {
838	/* MB case. */
839	size_t converted_size, converted_size2;
840	smb_ucs2_t *buffer = NULL;
841
842	SAFE_FREE(out_buffer);
843	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
844	strlen(s) + 1,
845	(void *)(void )&buffer,
846	&converted_size, True))
847	{
848	return NULL;
849	}
850
851	strupper_w(buffer);
852
853	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
854	converted_size,
855	(void *)(void )&out_buffer,
856	&converted_size2, True))
857	{
858	TALLOC_FREE(buffer);
859	return NULL;
860	}
861
862	/* Don't need the intermediate buffer
863	* anymore.
864	*/
865	TALLOC_FREE(buffer);
866	}
867
868	return out_buffer;
869	}
870
871	/**
872	talloc_strdup() a unix string to upper case.
873	**/
874
875	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
876	{
877	char *out_buffer = talloc_strdup(ctx,s);
878	const unsigned char p = (const unsigned char )s;
879	unsigned char q = (unsigned char )out_buffer;
880
881	if (!q) {
882	return NULL;
883	}
884
885	/* this is quite a common operation, so we want it to be
886	fast. We optimise for the ascii case, knowing that all our
887	supported multi-byte character sets are ascii-compatible
888	(ie. they match for the first 128 chars) */
889
890	while (*p) {
891	if (*p & 0x80)
892	break;
893	q++ = toupper_ascii_fast(p);
894	p++;
895	}
896
897	if (*p) {
898	/* MB case. */
899	size_t converted_size, converted_size2;
900	smb_ucs2_t *ubuf = NULL;
901
902	/* We're not using the ascii buffer above. */
903	TALLOC_FREE(out_buffer);
904
905	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
906	strlen(s)+1, (void *)&ubuf,
907	&converted_size, True))
908	{
909	return NULL;
910	}
911
912	strupper_w(ubuf);
913
914	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
915	converted_size, (void *)&out_buffer,
916	&converted_size2, True))
917	{
918	TALLOC_FREE(ubuf);
919	return NULL;
920	}
921
922	/* Don't need the intermediate buffer
923	* anymore.
924	*/
925	TALLOC_FREE(ubuf);
926	}
927
928	return out_buffer;
929	}
930
931	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
932	{
933	size_t size;
934	smb_ucs2_t *buffer = NULL;
935
936	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
937	(void *)(void )&buffer, &size,
938	True))
939	{
940	smb_panic("failed to create UCS2 buffer");
941	}
942	if (!strlower_w(buffer) && (dest == src)) {
943	SAFE_FREE(buffer);
944	return srclen;
945	}
946	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
947	SAFE_FREE(buffer);
948	return size;
949	}
950
951	/**
952	strdup() a unix string to lower case.
953	**/
954
955	char strdup_lower(const char s)
956	{
957	size_t converted_size;
958	smb_ucs2_t *buffer = NULL;
959	char *out_buffer;
960
961	if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
962	return NULL;
963	}
964
965	strlower_w(buffer);
966
967	if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
968	SAFE_FREE(buffer);
969	return NULL;
970	}
971
972	SAFE_FREE(buffer);
973
974	return out_buffer;
975	}
976
977	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
978	{
979	size_t converted_size;
980	smb_ucs2_t *buffer = NULL;
981	char *out_buffer;
982
983	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
984	return NULL;
985	}
986
987	strlower_w(buffer);
988
989	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
990	TALLOC_FREE(buffer);
991	return NULL;
992	}
993
994	TALLOC_FREE(buffer);
995
996	return out_buffer;
997	}
998
999
1000	size_t ucs2_align(const void base_ptr, const void p, int flags)
1001	{
1002	if (flags & (STR_NOALIGN\|STR_ASCII))
1003	return 0;
1004	return PTR_DIFF(p, base_ptr) & 1;
1005	}
1006
1007
1008	/**
1009	* Copy a string from a char* unix src to a dos codepage string destination.
1010	*
1011	* @return the number of bytes occupied by the string in the destination.
1012	*
1013	* @param flags can include
1014	* <dl>
1015	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1016	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1017	* </dl>
1018	*
1019	* @param dest_len the maximum length in bytes allowed in the
1020	* destination.
1021	**/
1022	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1023	{
1024	size_t src_len = strlen(src);
1025	char *tmpbuf = NULL;
1026	size_t ret;
1027
1028	/* No longer allow a length of -1. */
1029	if (dest_len == (size_t)-1) {
1030	smb_panic("push_ascii - dest_len == -1");
1031	}
1032
1033	if (flags & STR_UPPER) {
1034	tmpbuf = SMB_STRDUP(src);
1035	if (!tmpbuf) {
1036	smb_panic("malloc fail");
1037	}
1038	strupper_m(tmpbuf);
1039	src = tmpbuf;
1040	}
1041
1042	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1043	src_len++;
1044	}
1045
1046	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1047	if (ret == (size_t)-1 &&
1048	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1049	&& dest_len > 0) {
1050	((char *)dest)[0] = '\0';
1051	}
1052	SAFE_FREE(tmpbuf);
1053	return ret;
1054	}
1055
1056	size_t push_ascii_fstring(void dest, const char src)
1057	{
1058	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1059	}
1060
1061	/********************************************************************
1062	Push an nstring - ensure null terminated. Written by
1063	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1064	********************************************************************/
1065
1066	size_t push_ascii_nstring(void dest, const char src)
1067	{
1068	size_t i, buffer_len, dest_len;
1069	smb_ucs2_t *buffer;
1070
1071	conv_silent = True;
1072	if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1073	smb_panic("failed to create UCS2 buffer");
1074	}
1075
1076	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1077	buffer_len /= sizeof(smb_ucs2_t);
1078
1079	dest_len = 0;
1080	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1081	unsigned char mb[10];
1082	/* Convert one smb_ucs2_t character at a time. */
1083	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1084	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1085	memcpy((char *)dest + dest_len, mb, mb_len);
1086	dest_len += mb_len;
1087	} else {
1088	errno = E2BIG;
1089	break;
1090	}
1091	}
1092	((char *)dest)[dest_len] = '\0';
1093
1094	SAFE_FREE(buffer);
1095	conv_silent = False;
1096	return dest_len;
1097	}
1098
1099	/********************************************************************
1100	Push and malloc an ascii string. src and dest null terminated.
1101	********************************************************************/
1102
1103	bool push_ascii_allocate(char *dest, const char src, size_t *converted_size)
1104	{
1105	size_t src_len = strlen(src)+1;
1106
1107	*dest = NULL;
1108	return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1109	(void **)dest, converted_size, True);
1110	}
1111
1112	/**
1113	* Copy a string from a dos codepage source to a unix char* destination.
1114	*
1115	* The resulting string in "dest" is always null terminated.
1116	*
1117	* @param flags can have:
1118	* <dl>
1119	* <dt>STR_TERMINATE</dt>
1120	* <dd>STR_TERMINATE means the string in @p src
1121	* is null terminated, and src_len is ignored.</dd>
1122	* </dl>
1123	*
1124	* @param src_len is the length of the source area in bytes.
1125	* @returns the number of bytes occupied by the string in @p src.
1126	**/
1127	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1128	{
1129	size_t ret;
1130
1131	if (dest_len == (size_t)-1) {
1132	/* No longer allow dest_len of -1. */
1133	smb_panic("pull_ascii - invalid dest_len of -1");
1134	}
1135
1136	if (flags & STR_TERMINATE) {
1137	if (src_len == (size_t)-1) {
1138	src_len = strlen((const char *)src) + 1;
1139	} else {
1140	size_t len = strnlen((const char *)src, src_len);
1141	if (len < src_len)
1142	len++;
1143	src_len = len;
1144	}
1145	}
1146
1147	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1148	if (ret == (size_t)-1) {
1149	ret = 0;
1150	dest_len = 0;
1151	}
1152
1153	if (dest_len && ret) {
1154	/* Did we already process the terminating zero ? */
1155	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1156	dest[MIN(ret, dest_len-1)] = 0;
1157	}
1158	} else {
1159	dest[0] = 0;
1160	}
1161
1162	return src_len;
1163	}
1164
1165	/**
1166	* Copy a string from a dos codepage source to a unix char* destination.
1167	Talloc version.
1168	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1169	needs fixing. JRA).
1170	*
1171	* The resulting string in "dest" is always null terminated.
1172	*
1173	* @param flags can have:
1174	* <dl>
1175	* <dt>STR_TERMINATE</dt>
1176	* <dd>STR_TERMINATE means the string in @p src
1177	* is null terminated, and src_len is ignored.</dd>
1178	* </dl>
1179	*
1180	* @param src_len is the length of the source area in bytes.
1181	* @returns the number of bytes occupied by the string in @p src.
1182	**/
1183
1184	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1185	char **ppdest,
1186	const void *src,
1187	size_t src_len,
1188	int flags)
1189	{
1190	char *dest = NULL;
1191	size_t dest_len;
1192
1193	#ifdef DEVELOPER
1194	/* Ensure we never use the braindead "malloc" varient. */
1195	if (ctx == NULL) {
1196	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1197	}
1198	#endif
1199
1200	*ppdest = NULL;
1201
1202	if (!src_len) {
1203	return 0;
1204	}
1205
1206	if (flags & STR_TERMINATE) {
1207	if (src_len == (size_t)-1) {
1208	src_len = strlen((const char *)src) + 1;
1209	} else {
1210	size_t len = strnlen((const char *)src, src_len);
1211	if (len < src_len)
1212	len++;
1213	src_len = len;
1214	}
1215	/* Ensure we don't use an insane length from the client. */
1216	if (src_len >= 1024*1024) {
1217	char *msg = talloc_asprintf(ctx,
1218	"Bad src length (%u) in "
1219	"pull_ascii_base_talloc",
1220	(unsigned int)src_len);
1221	smb_panic(msg);
1222	}
1223	} else {
1224	/* Can't have an unlimited length
1225	* non STR_TERMINATE'd.
1226	*/
1227	if (src_len == (size_t)-1) {
1228	errno = EINVAL;
1229	return 0;
1230	}
1231	}
1232
1233	/* src_len != -1 here. */
1234
1235	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1236	&dest_len, True)) {
1237	dest_len = 0;
1238	}
1239
1240	if (dest_len && dest) {
1241	/* Did we already process the terminating zero ? */
1242	if (dest[dest_len-1] != 0) {
1243	size_t size = talloc_get_size(dest);
1244	/* Have we got space to append the '\0' ? */
1245	if (size <= dest_len) {
1246	/* No, realloc. */
1247	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1248	dest_len+1);
1249	if (!dest) {
1250	/* talloc fail. */
1251	dest_len = (size_t)-1;
1252	return 0;
1253	}
1254	}
1255	/* Yay - space ! */
1256	dest[dest_len] = '\0';
1257	dest_len++;
1258	}
1259	} else if (dest) {
1260	dest[0] = 0;
1261	}
1262
1263	*ppdest = dest;
1264	return src_len;
1265	}
1266
1267	size_t pull_ascii_fstring(char dest, const void src)
1268	{
1269	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1270	}
1271
1272	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1273
1274	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1275	{
1276	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1277	}
1278
1279	/**
1280	* Copy a string from a char* src to a unicode destination.
1281	*
1282	* @returns the number of bytes occupied by the string in the destination.
1283	*
1284	* @param flags can have:
1285	*
1286	* <dl>
1287	* <dt>STR_TERMINATE <dd>means include the null termination.
1288	* <dt>STR_UPPER <dd>means uppercase in the destination.
1289	* <dt>STR_NOALIGN <dd>means don't do alignment.
1290	* </dl>
1291	*
1292	* @param dest_len is the maximum length allowed in the
1293	* destination.
1294	**/
1295
1296	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1297	{
1298	size_t len=0;
1299	size_t src_len;
1300	size_t ret;
1301
1302	if (dest_len == (size_t)-1) {
1303	/* No longer allow dest_len of -1. */
1304	smb_panic("push_ucs2 - invalid dest_len of -1");
1305	}
1306
1307	if (flags & STR_TERMINATE)
1308	src_len = (size_t)-1;
1309	else
1310	src_len = strlen(src);
1311
1312	if (ucs2_align(base_ptr, dest, flags)) {
1313	(char )dest = 0;
1314	dest = (void )((char )dest + 1);
1315	if (dest_len)
1316	dest_len--;
1317	len++;
1318	}
1319
1320	/* ucs2 is always a multiple of 2 bytes */
1321	dest_len &= ~1;
1322
1323	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1324	if (ret == (size_t)-1) {
1325	if ((flags & STR_TERMINATE) &&
1326	dest &&
1327	dest_len) {
1328	(char )dest = 0;
1329	}
1330	return len;
1331	}
1332
1333	len += ret;
1334
1335	if (flags & STR_UPPER) {
1336	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1337	size_t i;
1338
1339	/* We check for i < (ret / 2) below as the dest string isn't null
1340	terminated if STR_TERMINATE isn't set. */
1341
1342	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1343	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1344	if (v != dest_ucs2[i]) {
1345	dest_ucs2[i] = v;
1346	}
1347	}
1348	}
1349
1350	return len;
1351	}
1352
1353
1354	/**
1355	* Copy a string from a unix char* src to a UCS2 destination,
1356	* allocating a buffer using talloc().
1357	*
1358	* @param dest always set at least to NULL
1359	* @parm converted_size set to the number of bytes occupied by the string in
1360	* the destination on success.
1361	*
1362	* @return true if new buffer was correctly allocated, and string was
1363	* converted.
1364	**/
1365	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1366	size_t *converted_size)
1367	{
1368	size_t src_len = strlen(src)+1;
1369
1370	*dest = NULL;
1371	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1372	(void **)dest, converted_size, True);
1373	}
1374
1375
1376	/**
1377	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1378	*
1379	* @param dest always set at least to NULL
1380	* @parm converted_size set to the number of bytes occupied by the string in
1381	* the destination on success.
1382	*
1383	* @return true if new buffer was correctly allocated, and string was
1384	* converted.
1385	**/
1386
1387	bool push_ucs2_allocate(smb_ucs2_t *dest, const char src,
1388	size_t *converted_size)
1389	{
1390	size_t src_len = strlen(src)+1;
1391
1392	*dest = NULL;
1393	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1394	(void **)dest, converted_size, True);
1395	}
1396
1397	/**
1398	Copy a string from a char* src to a UTF-8 destination.
1399	Return the number of bytes occupied by the string in the destination
1400	Flags can have:
1401	STR_TERMINATE means include the null termination
1402	STR_UPPER means uppercase in the destination
1403	dest_len is the maximum length allowed in the destination. If dest_len
1404	is -1 then no maxiumum is used.
1405	**/
1406
1407	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1408	{
1409	size_t src_len = 0;
1410	size_t ret;
1411	char *tmpbuf = NULL;
1412
1413	if (dest_len == (size_t)-1) {
1414	/* No longer allow dest_len of -1. */
1415	smb_panic("push_utf8 - invalid dest_len of -1");
1416	}
1417
1418	if (flags & STR_UPPER) {
1419	tmpbuf = strdup_upper(src);
1420	if (!tmpbuf) {
1421	return (size_t)-1;
1422	}
1423	src = tmpbuf;
1424	src_len = strlen(src);
1425	}
1426
1427	src_len = strlen(src);
1428	if (flags & STR_TERMINATE) {
1429	src_len++;
1430	}
1431
1432	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1433	SAFE_FREE(tmpbuf);
1434	return ret;
1435	}
1436
1437	size_t push_utf8_fstring(void dest, const char src)
1438	{
1439	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1440	}
1441
1442	/**
1443	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1444	*
1445	* @param dest always set at least to NULL
1446	* @parm converted_size set to the number of bytes occupied by the string in
1447	* the destination on success.
1448	*
1449	* @return true if new buffer was correctly allocated, and string was
1450	* converted.
1451	**/
1452
1453	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1454	size_t *converted_size)
1455	{
1456	size_t src_len = strlen(src)+1;
1457
1458	*dest = NULL;
1459	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1460	(void**)dest, converted_size, True);
1461	}
1462
1463	/**
1464	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1465	*
1466	* @param dest always set at least to NULL
1467	* @parm converted_size set to the number of bytes occupied by the string in
1468	* the destination on success.
1469	*
1470	* @return true if new buffer was correctly allocated, and string was
1471	* converted.
1472	**/
1473
1474	bool push_utf8_allocate(char *dest, const char src, size_t *converted_size)
1475	{
1476	size_t src_len = strlen(src)+1;
1477
1478	*dest = NULL;
1479	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1480	(void **)dest, converted_size, True);
1481	}
1482
1483	/**
1484	Copy a string from a ucs2 source to a unix char* destination.
1485	Flags can have:
1486	STR_TERMINATE means the string in src is null terminated.
1487	STR_NOALIGN means don't try to align.
1488	if STR_TERMINATE is set then src_len is ignored if it is -1.
1489	src_len is the length of the source area in bytes
1490	Return the number of bytes occupied by the string in src.
1491	The resulting string in "dest" is always null terminated.
1492	**/
1493
1494	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1495	{
1496	size_t ret;
1497
1498	if (dest_len == (size_t)-1) {
1499	/* No longer allow dest_len of -1. */
1500	smb_panic("pull_ucs2 - invalid dest_len of -1");
1501	}
1502
1503	if (!src_len) {
1504	if (dest && dest_len > 0) {
1505	dest[0] = '\0';
1506	}
1507	return 0;
1508	}
1509
1510	if (ucs2_align(base_ptr, src, flags)) {
1511	src = (const void )((const char )src + 1);
1512	if (src_len != (size_t)-1)
1513	src_len--;
1514	}
1515
1516	if (flags & STR_TERMINATE) {
1517	/* src_len -1 is the default for null terminated strings. */
1518	if (src_len != (size_t)-1) {
1519	size_t len = strnlen_w((const smb_ucs2_t *)src,
1520	src_len/2);
1521	if (len < src_len/2)
1522	len++;
1523	src_len = len*2;
1524	}
1525	}
1526
1527	/* ucs2 is always a multiple of 2 bytes */
1528	if (src_len != (size_t)-1)
1529	src_len &= ~1;
1530
1531	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1532	if (ret == (size_t)-1) {
1533	ret = 0;
1534	dest_len = 0;
1535	}
1536
1537	if (src_len == (size_t)-1)
1538	src_len = ret*2;
1539
1540	if (dest_len && ret) {
1541	/* Did we already process the terminating zero ? */
1542	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1543	dest[MIN(ret, dest_len-1)] = 0;
1544	}
1545	} else {
1546	dest[0] = 0;
1547	}
1548
1549	return src_len;
1550	}
1551
1552	/**
1553	Copy a string from a ucs2 source to a unix char* destination.
1554	Talloc version with a base pointer.
1555	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1556	needs fixing. JRA).
1557	Flags can have:
1558	STR_TERMINATE means the string in src is null terminated.
1559	STR_NOALIGN means don't try to align.
1560	if STR_TERMINATE is set then src_len is ignored if it is -1.
1561	src_len is the length of the source area in bytes
1562	Return the number of bytes occupied by the string in src.
1563	The resulting string in "dest" is always null terminated.
1564	**/
1565
1566	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1567	const void *base_ptr,
1568	char **ppdest,
1569	const void *src,
1570	size_t src_len,
1571	int flags)
1572	{
1573	char *dest;
1574	size_t dest_len;
1575
1576	*ppdest = NULL;
1577
1578	#ifdef DEVELOPER
1579	/* Ensure we never use the braindead "malloc" varient. */
1580	if (ctx == NULL) {
1581	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1582	}
1583	#endif
1584
1585	if (!src_len) {
1586	return 0;
1587	}
1588
1589	if (ucs2_align(base_ptr, src, flags)) {
1590	src = (const void )((const char )src + 1);
1591	if (src_len != (size_t)-1)
1592	src_len--;
1593	}
1594
1595	if (flags & STR_TERMINATE) {
1596	/* src_len -1 is the default for null terminated strings. */
1597	if (src_len != (size_t)-1) {
1598	size_t len = strnlen_w((const smb_ucs2_t *)src,
1599	src_len/2);
1600	if (len < src_len/2)
1601	len++;
1602	src_len = len*2;
1603	} else {
1604	/*
1605	* src_len == -1 - alloc interface won't take this
1606	* so we must calculate.
1607	*/
1608	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1609	}
1610	/* Ensure we don't use an insane length from the client. */
1611	if (src_len >= 1024*1024) {
1612	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1613	}
1614	} else {
1615	/* Can't have an unlimited length
1616	* non STR_TERMINATE'd.
1617	*/
1618	if (src_len == (size_t)-1) {
1619	errno = EINVAL;
1620	return 0;
1621	}
1622	}
1623
1624	/* src_len != -1 here. */
1625
1626	/* ucs2 is always a multiple of 2 bytes */
1627	src_len &= ~1;
1628
1629	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1630	(void *)&dest, &dest_len, True)) {
1631	dest_len = 0;
1632	}
1633
1634	if (dest_len) {
1635	/* Did we already process the terminating zero ? */
1636	if (dest[dest_len-1] != 0) {
1637	size_t size = talloc_get_size(dest);
1638	/* Have we got space to append the '\0' ? */
1639	if (size <= dest_len) {
1640	/* No, realloc. */
1641	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1642	dest_len+1);
1643	if (!dest) {
1644	/* talloc fail. */
1645	dest_len = (size_t)-1;
1646	return 0;
1647	}
1648	}
1649	/* Yay - space ! */
1650	dest[dest_len] = '\0';
1651	dest_len++;
1652	}
1653	} else if (dest) {
1654	dest[0] = 0;
1655	}
1656
1657	*ppdest = dest;
1658	return src_len;
1659	}
1660
1661	size_t pull_ucs2_fstring(char dest, const void src)
1662	{
1663	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1664	}
1665
1666	/**
1667	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1668	*
1669	* @param dest always set at least to NULL
1670	* @parm converted_size set to the number of bytes occupied by the string in
1671	* the destination on success.
1672	*
1673	* @return true if new buffer was correctly allocated, and string was
1674	* converted.
1675	**/
1676
1677	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1678	size_t *converted_size)
1679	{
1680	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1681
1682	*dest = NULL;
1683	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1684	(void **)dest, converted_size, True);
1685	}
1686
1687	/**
1688	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1689	*
1690	* @param dest always set at least to NULL
1691	* @parm converted_size set to the number of bytes occupied by the string in
1692	* the destination on success.
1693	* @return true if new buffer was correctly allocated, and string was
1694	* converted.
1695	**/
1696
1697	bool pull_ucs2_allocate(char *dest, const smb_ucs2_t src,
1698	size_t *converted_size)
1699	{
1700	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1701
1702	*dest = NULL;
1703	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1704	(void **)dest, converted_size, True);
1705	}
1706
1707	/**
1708	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1709	*
1710	* @param dest always set at least to NULL
1711	* @parm converted_size set to the number of bytes occupied by the string in
1712	* the destination on success.
1713	*
1714	* @return true if new buffer was correctly allocated, and string was
1715	* converted.
1716	**/
1717
1718	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1719	size_t *converted_size)
1720	{
1721	size_t src_len = strlen(src)+1;
1722
1723	*dest = NULL;
1724	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1725	(void **)dest, converted_size, True);
1726	}
1727
1728	/**
1729	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1730	*
1731	* @param dest always set at least to NULL
1732	* @parm converted_size set to the number of bytes occupied by the string in
1733	* the destination on success.
1734	*
1735	* @return true if new buffer was correctly allocated, and string was
1736	* converted.
1737	**/
1738
1739	bool pull_utf8_allocate(char *dest, const char src, size_t *converted_size)
1740	{
1741	size_t src_len = strlen(src)+1;
1742
1743	*dest = NULL;
1744	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1745	(void **)dest, converted_size, True);
1746	}
1747
1748	/**
1749	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1750	*
1751	* @param dest always set at least to NULL
1752	* @parm converted_size set to the number of bytes occupied by the string in
1753	* the destination on success.
1754	*
1755	* @return true if new buffer was correctly allocated, and string was
1756	* converted.
1757	**/
1758
1759	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1760	size_t *converted_size)
1761	{
1762	size_t src_len = strlen(src)+1;
1763
1764	*dest = NULL;
1765	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1766	(void **)dest, converted_size, True);
1767	}
1768
1769	/**
1770	Copy a string from a char* src to a unicode or ascii
1771	dos codepage destination choosing unicode or ascii based on the
1772	flags in the SMB buffer starting at base_ptr.
1773	Return the number of bytes occupied by the string in the destination.
1774	flags can have:
1775	STR_TERMINATE means include the null termination.
1776	STR_UPPER means uppercase in the destination.
1777	STR_ASCII use ascii even with unicode packet.
1778	STR_NOALIGN means don't do alignment.
1779	dest_len is the maximum length allowed in the destination. If dest_len
1780	is -1 then no maxiumum is used.
1781	**/
1782
1783	size_t push_string_fn(const char *function, unsigned int line,
1784	const void *base_ptr, uint16 flags2,
1785	void dest, const char src,
1786	size_t dest_len, int flags)
1787	{
1788	#ifdef DEVELOPER
1789	/* We really need to zero fill here, not clobber
1790	* region, as we want to ensure that valgrind thinks
1791	* all of the outgoing buffer has been written to
1792	* so a send() or write() won't trap an error.
1793	* JRA.
1794	*/
1795	#if 0
1796	clobber_region(function, line, dest, dest_len);
1797	#else
1798	memset(dest, '\0', dest_len);
1799	#endif
1800	#endif
1801
1802	if (!(flags & STR_ASCII) && \
1803	((flags & STR_UNICODE \|\| \
1804	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1805	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1806	}
1807	return push_ascii(dest, src, dest_len, flags);
1808	}
1809
1810
1811	/**
1812	Copy a string from a unicode or ascii source (depending on
1813	the packet flags) to a char* destination.
1814	Flags can have:
1815	STR_TERMINATE means the string in src is null terminated.
1816	STR_UNICODE means to force as unicode.
1817	STR_ASCII use ascii even with unicode packet.
1818	STR_NOALIGN means don't do alignment.
1819	if STR_TERMINATE is set then src_len is ignored is it is -1
1820	src_len is the length of the source area in bytes.
1821	Return the number of bytes occupied by the string in src.
1822	The resulting string in "dest" is always null terminated.
1823	**/
1824
1825	size_t pull_string_fn(const char *function,
1826	unsigned int line,
1827	const void *base_ptr,
1828	uint16 smb_flags2,
1829	char *dest,
1830	const void *src,
1831	size_t dest_len,
1832	size_t src_len,
1833	int flags)
1834	{
1835	#ifdef DEVELOPER
1836	clobber_region(function, line, dest, dest_len);
1837	#endif
1838
1839	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1840	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1841	"UNICODE defined");
1842	}
1843
1844	if (!(flags & STR_ASCII) && \
1845	((flags & STR_UNICODE \|\| \
1846	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1847	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1848	}
1849	return pull_ascii(dest, src, dest_len, src_len, flags);
1850	}
1851
1852	/**
1853	Copy a string from a unicode or ascii source (depending on
1854	the packet flags) to a char* destination.
1855	Variant that uses talloc.
1856	Flags can have:
1857	STR_TERMINATE means the string in src is null terminated.
1858	STR_UNICODE means to force as unicode.
1859	STR_ASCII use ascii even with unicode packet.
1860	STR_NOALIGN means don't do alignment.
1861	if STR_TERMINATE is set then src_len is ignored is it is -1
1862	src_len is the length of the source area in bytes.
1863	Return the number of bytes occupied by the string in src.
1864	The resulting string in "dest" is always null terminated.
1865	**/
1866
1867	size_t pull_string_talloc_fn(const char *function,
1868	unsigned int line,
1869	TALLOC_CTX *ctx,
1870	const void *base_ptr,
1871	uint16 smb_flags2,
1872	char **ppdest,
1873	const void *src,
1874	size_t src_len,
1875	int flags)
1876	{
1877	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1878	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1879	"UNICODE defined");
1880	}
1881
1882	if (!(flags & STR_ASCII) && \
1883	((flags & STR_UNICODE \|\| \
1884	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1885	return pull_ucs2_base_talloc(ctx,
1886	base_ptr,
1887	ppdest,
1888	src,
1889	src_len,
1890	flags);
1891	}
1892	return pull_ascii_base_talloc(ctx,
1893	ppdest,
1894	src,
1895	src_len,
1896	flags);
1897	}
1898
1899
1900	size_t align_string(const void base_ptr, const char p, int flags)
1901	{
1902	if (!(flags & STR_ASCII) && \
1903	((flags & STR_UNICODE \|\| \
1904	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1905	return ucs2_align(base_ptr, p, flags);
1906	}
1907	return 0;
1908	}
1909
1910	/*
1911	Return the unicode codepoint for the next multi-byte CH_UNIX character
1912	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1913
1914	Also return the number of bytes consumed (which tells the caller
1915	how many bytes to skip to get to the next CH_UNIX character).
1916
1917	Return INVALID_CODEPOINT if the next character cannot be converted.
1918	*/
1919
1920	codepoint_t next_codepoint(const char str, size_t size)
1921	{
1922	/* It cannot occupy more than 4 bytes in UTF16 format */
1923	uint8_t buf[4];
1924	smb_iconv_t descriptor;
1925	#ifdef __OS2__
1926	size_t ilen_max;
1927	size_t olen_orig;
1928	const char *inbuf;
1929	#endif
1930	size_t ilen_orig;
1931	size_t ilen;
1932	size_t olen;
1933
1934	char *outbuf;
1935
1936	#ifdef __OS2__
1937	*size = 1;
1938	#endif
1939
1940	if ((str[0] & 0x80) == 0) {
1941	#ifndef __OS2__
1942	*size = 1;
1943	#endif
1944	return (codepoint_t)str[0];
1945	}
1946
1947	lazy_initialize_conv();
1948
1949	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1950	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1951	#ifndef __OS2__
1952	*size = 1;
1953	#endif
1954	return INVALID_CODEPOINT;
1955	}
1956	#ifdef __OS2__
1957	/* We assume that no multi-byte character can take
1958	more than 5 bytes. This is OK as we only
1959	support codepoints up to 1M */
1960
1961	ilen_max = strnlen( str, 5 );
1962	#else
1963	*size = 1;
1964	#endif
1965	ilen_orig = 1;
1966	olen_orig = 2;
1967	while( 1 )
1968	{
1969	ilen = ilen_orig;
1970	olen = olen_orig;
1971	inbuf = str;
1972	outbuf = ( char * )buf;
1973	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1974	break;
1975
1976	switch( errno )
1977	{
1978	case E2BIG :
1979	if( olen_orig == 2 )
1980	olen_orig = 4;
1981	else
1982	return INVALID_CODEPOINT;
1983	break;
1984
1985
1986	case EINVAL :
1987	#ifndef __OS2__
1988	/* We assume that no multi-byte character can take
1989	more than 5 bytes. This is OK as we only
1990	support codepoints up to 1M */
1991	if( ilen_orig < 5 )
1992	#else
1993	if( ilen_orig < ilen_max )
1994	#endif
1995	ilen_orig++;
1996	else
1997	return INVALID_CODEPOINT;
1998	break;
1999
2000	case EILSEQ :
2001	default :
2002	return INVALID_CODEPOINT;
2003	}
2004	}
2005
2006	olen = olen_orig - olen;
2007
2008	*size = ilen_orig - ilen;
2009
2010	if (olen == 2) {
2011	/* 2 byte, UTF16-LE encoded value. */
2012	return (codepoint_t)SVAL(buf, 0);
2013	}
2014	if (olen == 4) {
2015	/* Decode a 4 byte UTF16-LE character manually.
2016	See RFC2871 for the encoding machanism.
2017	*/
2018	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2019	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2020
2021	return (codepoint_t)0x10000 +
2022	(w1 << 10) + w2;
2023	}
2024
2025	/* no other length is valid */
2026	return INVALID_CODEPOINT;
2027	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: