Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: branches/samba-3.3.x/source/lib/charcnv.c@ 206

Visit:

Last change on this file since 206 was 206, checked in by Herwig Bauernfeind, 16 years ago
Import Samba 3.3 branch at 3.0.0 level (psmedley's port)
File size: 50.9 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	if (!initialized) {
97	load_case_tables();
98	init_iconv();
99	initialized = true;
100	}
101	}
102
103	/**
104	* Destroy global objects allocated by init_iconv()
105	**/
106	void gfree_charcnv(void)
107	{
108	int c1, c2;
109
110	for (c1=0;c1<NUM_CHARSETS;c1++) {
111	for (c2=0;c2<NUM_CHARSETS;c2++) {
112	if ( conv_handles[c1][c2] ) {
113	smb_iconv_close( conv_handles[c1][c2] );
114	conv_handles[c1][c2] = 0;
115	}
116	}
117	}
118	initialized = false;
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	bool did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_valid_table();
182	conv_silent = False;
183	}
184	}
185
186	/**
187	* Convert string from one encoding to another, making error checking etc
188	* Slow path version - uses (slow) iconv.
189	*
190	* @param src pointer to source string (multibyte or singlebyte)
191	* @param srclen length of the source string in bytes
192	* @param dest pointer to destination string (multibyte or singlebyte)
193	* @param destlen maximal length allowed for string
194	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195	* @returns the number of bytes occupied in the destination
196	*
197	* Ensure the srclen contains the terminating zero.
198	*
199	**/
200
201	static size_t convert_string_internal(charset_t from, charset_t to,
202	void const *src, size_t srclen,
203	void *dest, size_t destlen, bool allow_bad_conv)
204	{
205	size_t i_len, o_len;
206	size_t retval;
207	const char* inbuf = (const char*)src;
208	char* outbuf = (char*)dest;
209	smb_iconv_t descriptor;
210
211	lazy_initialize_conv();
212
213	descriptor = conv_handles[from][to];
214
215	if (srclen == (size_t)-1) {
216	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
217	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
218	} else {
219	srclen = strlen((const char *)src)+1;
220	}
221	}
222
223
224	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
225	if (!conv_silent)
226	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227	return (size_t)-1;
228	}
229
230	i_len=srclen;
231	o_len=destlen;
232
233	again:
234
235	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236	if(retval==(size_t)-1) {
237	const char *reason="unknown error";
238	switch(errno) {
239	case EINVAL:
240	reason="Incomplete multibyte sequence";
241	if (!conv_silent)
242	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243	if (allow_bad_conv)
244	goto use_as_is;
245	break;
246	case E2BIG:
247	reason="No more room";
248	if (!conv_silent) {
249	if (from == CH_UNIX) {
250	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251	charset_name(from), charset_name(to),
252	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253	} else {
254	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255	charset_name(from), charset_name(to),
256	(unsigned int)srclen, (unsigned int)destlen));
257	}
258	}
259	break;
260	case EILSEQ:
261	reason="Illegal multibyte sequence";
262	if (!conv_silent)
263	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264	if (allow_bad_conv)
265	goto use_as_is;
266	break;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	break;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	#endif
417	}
418	}
419	if (!dlen) {
420	/* Even if we fast path we should note if we ran out of room. */
421	if (((slen != (size_t)-1) && slen) \|\|
422	((slen == (size_t)-1) && lastp)) {
423	errno = E2BIG;
424	}
425	}
426	return retval;
427	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
428	const unsigned char p = (const unsigned char )src;
429	unsigned char q = (unsigned char )dest;
430	size_t retval = 0;
431	size_t slen = srclen;
432	size_t dlen = destlen;
433	unsigned char lastp = '\0';
434
435	/* If all characters are ascii, fast path here. */
436	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
437	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
438	q++ = p;
439	if (slen != (size_t)-1) {
440	slen -= 2;
441	}
442	p += 2;
443	dlen--;
444	retval++;
445	if (!lastp)
446	break;
447	} else {
448	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
449	goto general_case;
450	#else
451	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
452	#endif
453	}
454	}
455	if (!dlen) {
456	/* Even if we fast path we should note if we ran out of room. */
457	if (((slen != (size_t)-1) && slen) \|\|
458	((slen == (size_t)-1) && lastp)) {
459	errno = E2BIG;
460	}
461	}
462	return retval;
463	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
464	const unsigned char p = (const unsigned char )src;
465	unsigned char q = (unsigned char )dest;
466	size_t retval = 0;
467	size_t slen = srclen;
468	size_t dlen = destlen;
469	unsigned char lastp = '\0';
470
471	/* If all characters are ascii, fast path here. */
472	while (slen && (dlen >= 2)) {
473	if ((lastp = *p) <= 0x7F) {
474	q++ = p++;
475	*q++ = '\0';
476	if (slen != (size_t)-1) {
477	slen--;
478	}
479	dlen -= 2;
480	retval += 2;
481	if (!lastp)
482	break;
483	} else {
484	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
485	goto general_case;
486	#else
487	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
488	#endif
489	}
490	}
491	if (!dlen) {
492	/* Even if we fast path we should note if we ran out of room. */
493	if (((slen != (size_t)-1) && slen) \|\|
494	((slen == (size_t)-1) && lastp)) {
495	errno = E2BIG;
496	}
497	}
498	return retval;
499	}
500
501	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
502	general_case:
503	#endif
504	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
505	}
506
507	/**
508	* Convert between character sets, allocating a new buffer for the result.
509	*
510	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
511	* (this is a bad interface and needs fixing. JRA).
512	* @param srclen length of source buffer.
513	* @param dest always set at least to NULL
514	* @param converted_size set to the size of the allocated buffer on return
515	* true
516	* @note -1 is not accepted for srclen.
517	*
518	* @return true if new buffer was correctly allocated, and string was
519	* converted.
520	*
521	* Ensure the srclen contains the terminating zero.
522	*
523	* I hate the goto's in this function. It's embarressing.....
524	* There has to be a cleaner way to do this. JRA.
525	**/
526
527	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528	void const src, size_t srclen, void dst,
529	size_t *converted_size, bool allow_bad_conv)
530	{
531	size_t i_len, o_len, destlen = (srclen * 3) / 2;
532	size_t retval;
533	const char inbuf = (const char )src;
534	char outbuf = NULL, ob = NULL;
535	smb_iconv_t descriptor;
536	void dest = (void )dst;
537
538	*dest = NULL;
539
540	if (!converted_size) {
541	errno = EINVAL;
542	return false;
543	}
544
545	if (src == NULL \|\| srclen == (size_t)-1) {
546	errno = EINVAL;
547	return false;
548	}
549	if (srclen == 0) {
550	ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
551	if (ob == NULL) {
552	errno = ENOMEM;
553	return false;
554	}
555	*dest = ob;
556	*converted_size = 0;
557	return true;
558	}
559
560	lazy_initialize_conv();
561
562	descriptor = conv_handles[from][to];
563
564	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
565	if (!conv_silent)
566	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
567	errno = EOPNOTSUPP;
568	return false;
569	}
570
571	convert:
572
573	/* +2 is for ucs2 null termination. */
574	if ((destlen*2)+2 < destlen) {
575	/* wrapped ! abort. */
576	if (!conv_silent)
577	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
578	if (!ctx)
579	SAFE_FREE(outbuf);
580	errno = EOPNOTSUPP;
581	return false;
582	} else {
583	destlen = destlen * 2;
584	}
585
586	/* +2 is for ucs2 null termination. */
587	if (ctx) {
588	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
589	} else {
590	ob = (char *)SMB_REALLOC(ob, destlen + 2);
591	}
592
593	if (!ob) {
594	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
595	errno = ENOMEM;
596	return false;
597	}
598	outbuf = ob;
599	i_len = srclen;
600	o_len = destlen;
601
602	again:
603
604	retval = smb_iconv(descriptor,
605	&inbuf, &i_len,
606	&outbuf, &o_len);
607	if(retval == (size_t)-1) {
608	const char *reason="unknown error";
609	switch(errno) {
610	case EINVAL:
611	reason="Incomplete multibyte sequence";
612	if (!conv_silent)
613	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
614	if (allow_bad_conv)
615	goto use_as_is;
616	break;
617	case E2BIG:
618	goto convert;
619	case EILSEQ:
620	reason="Illegal multibyte sequence";
621	if (!conv_silent)
622	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
623	if (allow_bad_conv)
624	goto use_as_is;
625	break;
626	}
627	if (!conv_silent)
628	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
629	/* smb_panic(reason); */
630	if (ctx) {
631	TALLOC_FREE(ob);
632	} else {
633	SAFE_FREE(ob);
634	}
635	return false;
636	}
637
638	out:
639
640	destlen = destlen - o_len;
641	/* Don't shrink unless we're reclaiming a lot of
642	* space. This is in the hot codepath and these
643	* reallocs cost. JRA.
644	*/
645	if (o_len > 1024) {
646	/* We're shrinking here so we know the +2 is safe from wrap. */
647	if (ctx) {
648	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
649	} else {
650	ob = (char *)SMB_REALLOC(ob,destlen + 2);
651	}
652	}
653
654	if (destlen && !ob) {
655	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
656	errno = ENOMEM;
657	return false;
658	}
659
660	*dest = ob;
661
662	/* Must ucs2 null terminate in the extra space we allocated. */
663	ob[destlen] = '\0';
664	ob[destlen+1] = '\0';
665
666	*converted_size = destlen;
667	return true;
668
669	use_as_is:
670
671	/*
672	* Conversion not supported. This is actually an error, but there are so
673	* many misconfigured iconv systems and smb.conf's out there we can't just
674	* fail. Do a very bad conversion instead.... JRA.
675	*/
676
677	{
678	if (o_len == 0 \|\| i_len == 0)
679	goto out;
680
681	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
682	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
683	/* Can't convert from utf16 any endian to multibyte.
684	Replace with the default fail char.
685	*/
686
687	if (i_len < 2)
688	goto out;
689
690	if (i_len >= 2) {
691	*outbuf = lp_failed_convert_char();
692
693	outbuf++;
694	o_len--;
695
696	inbuf += 2;
697	i_len -= 2;
698	}
699
700	if (o_len == 0 \|\| i_len == 0)
701	goto out;
702
703	/* Keep trying with the next char... */
704	goto again;
705
706	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
707	/* Can't convert to UTF16LE - just widen by adding the
708	default fail char then zero.
709	*/
710	if (o_len < 2)
711	goto out;
712
713	outbuf[0] = lp_failed_convert_char();
714	outbuf[1] = '\0';
715
716	inbuf++;
717	i_len--;
718
719	outbuf += 2;
720	o_len -= 2;
721
722	if (o_len == 0 \|\| i_len == 0)
723	goto out;
724
725	/* Keep trying with the next char... */
726	goto again;
727
728	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
729	to != CH_UTF16LE && to != CH_UTF16BE) {
730	/* Failed multibyte to multibyte. Just copy the default fail char and
731	try again. */
732	outbuf[0] = lp_failed_convert_char();
733
734	inbuf++;
735	i_len--;
736
737	outbuf++;
738	o_len--;
739
740	if (o_len == 0 \|\| i_len == 0)
741	goto out;
742
743	/* Keep trying with the next char... */
744	goto again;
745
746	} else {
747	/* Keep compiler happy.... */
748	goto out;
749	}
750	}
751	}
752
753	/**
754	* Convert between character sets, allocating a new buffer using talloc for the result.
755	*
756	* @param srclen length of source buffer.
757	* @param dest always set at least to NULL
758	* @parm converted_size set to the number of bytes occupied by the string in
759	* the destination on success.
760	* @note -1 is not accepted for srclen.
761	*
762	* @return true if new buffer was correctly allocated, and string was
763	* converted.
764	*/
765	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
766	void const src, size_t srclen, void dst,
767	size_t *converted_size, bool allow_bad_conv)
768	{
769	void dest = (void )dst;
770
771	*dest = NULL;
772	return convert_string_allocate(ctx, from, to, src, srclen, dest,
773	converted_size, allow_bad_conv);
774	}
775
776	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
777	{
778	size_t size;
779	smb_ucs2_t *buffer;
780
781	if (!push_ucs2_allocate(&buffer, src, &size)) {
782	return (size_t)-1;
783	}
784
785	if (!strupper_w(buffer) && (dest == src)) {
786	free(buffer);
787	return srclen;
788	}
789
790	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
791	free(buffer);
792	return size;
793	}
794
795	/**
796	strdup() a unix string to upper case.
797	**/
798
799	char strdup_upper(const char s)
800	{
801	char *out_buffer = SMB_STRDUP(s);
802	const unsigned char p = (const unsigned char )s;
803	unsigned char q = (unsigned char )out_buffer;
804
805	if (!q) {
806	return NULL;
807	}
808
809	/* this is quite a common operation, so we want it to be
810	fast. We optimise for the ascii case, knowing that all our
811	supported multi-byte character sets are ascii-compatible
812	(ie. they match for the first 128 chars) */
813
814	while (*p) {
815	if (*p & 0x80)
816	break;
817	q++ = toupper_ascii_fast(p);
818	p++;
819	}
820
821	if (*p) {
822	/* MB case. */
823	size_t converted_size, converted_size2;
824	smb_ucs2_t *buffer = NULL;
825
826	SAFE_FREE(out_buffer);
827	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
828	strlen(s) + 1,
829	(void *)(void )&buffer,
830	&converted_size, True))
831	{
832	return NULL;
833	}
834
835	strupper_w(buffer);
836
837	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
838	converted_size,
839	(void *)(void )&out_buffer,
840	&converted_size2, True))
841	{
842	TALLOC_FREE(buffer);
843	return NULL;
844	}
845
846	/* Don't need the intermediate buffer
847	* anymore.
848	*/
849	TALLOC_FREE(buffer);
850	}
851
852	return out_buffer;
853	}
854
855	/**
856	talloc_strdup() a unix string to upper case.
857	**/
858
859	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
860	{
861	char *out_buffer = talloc_strdup(ctx,s);
862	const unsigned char p = (const unsigned char )s;
863	unsigned char q = (unsigned char )out_buffer;
864
865	if (!q) {
866	return NULL;
867	}
868
869	/* this is quite a common operation, so we want it to be
870	fast. We optimise for the ascii case, knowing that all our
871	supported multi-byte character sets are ascii-compatible
872	(ie. they match for the first 128 chars) */
873
874	while (*p) {
875	if (*p & 0x80)
876	break;
877	q++ = toupper_ascii_fast(p);
878	p++;
879	}
880
881	if (*p) {
882	/* MB case. */
883	size_t converted_size, converted_size2;
884	smb_ucs2_t *ubuf = NULL;
885
886	/* We're not using the ascii buffer above. */
887	TALLOC_FREE(out_buffer);
888
889	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
890	strlen(s)+1, (void *)&ubuf,
891	&converted_size, True))
892	{
893	return NULL;
894	}
895
896	strupper_w(ubuf);
897
898	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
899	converted_size, (void *)&out_buffer,
900	&converted_size2, True))
901	{
902	TALLOC_FREE(ubuf);
903	return NULL;
904	}
905
906	/* Don't need the intermediate buffer
907	* anymore.
908	*/
909	TALLOC_FREE(ubuf);
910	}
911
912	return out_buffer;
913	}
914
915	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
916	{
917	size_t size;
918	smb_ucs2_t *buffer = NULL;
919
920	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
921	(void *)(void )&buffer, &size,
922	True))
923	{
924	smb_panic("failed to create UCS2 buffer");
925	}
926	if (!strlower_w(buffer) && (dest == src)) {
927	SAFE_FREE(buffer);
928	return srclen;
929	}
930	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
931	SAFE_FREE(buffer);
932	return size;
933	}
934
935	/**
936	strdup() a unix string to lower case.
937	**/
938
939	char strdup_lower(const char s)
940	{
941	size_t converted_size;
942	smb_ucs2_t *buffer = NULL;
943	char *out_buffer;
944
945	if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
946	return NULL;
947	}
948
949	strlower_w(buffer);
950
951	if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
952	SAFE_FREE(buffer);
953	return NULL;
954	}
955
956	SAFE_FREE(buffer);
957
958	return out_buffer;
959	}
960
961	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
962	{
963	size_t converted_size;
964	smb_ucs2_t *buffer = NULL;
965	char *out_buffer;
966
967	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
968	return NULL;
969	}
970
971	strlower_w(buffer);
972
973	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
974	TALLOC_FREE(buffer);
975	return NULL;
976	}
977
978	TALLOC_FREE(buffer);
979
980	return out_buffer;
981	}
982
983
984	size_t ucs2_align(const void base_ptr, const void p, int flags)
985	{
986	if (flags & (STR_NOALIGN\|STR_ASCII))
987	return 0;
988	return PTR_DIFF(p, base_ptr) & 1;
989	}
990
991
992	/**
993	* Copy a string from a char* unix src to a dos codepage string destination.
994	*
995	* @return the number of bytes occupied by the string in the destination.
996	*
997	* @param flags can include
998	* <dl>
999	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1000	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1001	* </dl>
1002	*
1003	* @param dest_len the maximum length in bytes allowed in the
1004	* destination.
1005	**/
1006	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1007	{
1008	size_t src_len = strlen(src);
1009	char *tmpbuf = NULL;
1010	size_t ret;
1011
1012	/* No longer allow a length of -1. */
1013	if (dest_len == (size_t)-1) {
1014	smb_panic("push_ascii - dest_len == -1");
1015	}
1016
1017	if (flags & STR_UPPER) {
1018	tmpbuf = SMB_STRDUP(src);
1019	if (!tmpbuf) {
1020	smb_panic("malloc fail");
1021	}
1022	strupper_m(tmpbuf);
1023	src = tmpbuf;
1024	}
1025
1026	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1027	src_len++;
1028	}
1029
1030	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1031	if (ret == (size_t)-1 &&
1032	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1033	&& dest_len > 0) {
1034	((char *)dest)[0] = '\0';
1035	}
1036	SAFE_FREE(tmpbuf);
1037	return ret;
1038	}
1039
1040	size_t push_ascii_fstring(void dest, const char src)
1041	{
1042	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1043	}
1044
1045	/********************************************************************
1046	Push an nstring - ensure null terminated. Written by
1047	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1048	********************************************************************/
1049
1050	size_t push_ascii_nstring(void dest, const char src)
1051	{
1052	size_t i, buffer_len, dest_len;
1053	smb_ucs2_t *buffer;
1054
1055	conv_silent = True;
1056	if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1057	smb_panic("failed to create UCS2 buffer");
1058	}
1059
1060	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1061	buffer_len /= sizeof(smb_ucs2_t);
1062
1063	dest_len = 0;
1064	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1065	unsigned char mb[10];
1066	/* Convert one smb_ucs2_t character at a time. */
1067	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1068	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1069	memcpy((char *)dest + dest_len, mb, mb_len);
1070	dest_len += mb_len;
1071	} else {
1072	errno = E2BIG;
1073	break;
1074	}
1075	}
1076	((char *)dest)[dest_len] = '\0';
1077
1078	SAFE_FREE(buffer);
1079	conv_silent = False;
1080	return dest_len;
1081	}
1082
1083	/********************************************************************
1084	Push and malloc an ascii string. src and dest null terminated.
1085	********************************************************************/
1086
1087	bool push_ascii_allocate(char *dest, const char src, size_t *converted_size)
1088	{
1089	size_t src_len = strlen(src)+1;
1090
1091	*dest = NULL;
1092	return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1093	(void **)dest, converted_size, True);
1094	}
1095
1096	/**
1097	* Copy a string from a dos codepage source to a unix char* destination.
1098	*
1099	* The resulting string in "dest" is always null terminated.
1100	*
1101	* @param flags can have:
1102	* <dl>
1103	* <dt>STR_TERMINATE</dt>
1104	* <dd>STR_TERMINATE means the string in @p src
1105	* is null terminated, and src_len is ignored.</dd>
1106	* </dl>
1107	*
1108	* @param src_len is the length of the source area in bytes.
1109	* @returns the number of bytes occupied by the string in @p src.
1110	**/
1111	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1112	{
1113	size_t ret;
1114
1115	if (dest_len == (size_t)-1) {
1116	/* No longer allow dest_len of -1. */
1117	smb_panic("pull_ascii - invalid dest_len of -1");
1118	}
1119
1120	if (flags & STR_TERMINATE) {
1121	if (src_len == (size_t)-1) {
1122	src_len = strlen((const char *)src) + 1;
1123	} else {
1124	size_t len = strnlen((const char *)src, src_len);
1125	if (len < src_len)
1126	len++;
1127	src_len = len;
1128	}
1129	}
1130
1131	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1132	if (ret == (size_t)-1) {
1133	ret = 0;
1134	dest_len = 0;
1135	}
1136
1137	if (dest_len && ret) {
1138	/* Did we already process the terminating zero ? */
1139	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1140	dest[MIN(ret, dest_len-1)] = 0;
1141	}
1142	} else {
1143	dest[0] = 0;
1144	}
1145
1146	return src_len;
1147	}
1148
1149	/**
1150	* Copy a string from a dos codepage source to a unix char* destination.
1151	Talloc version.
1152	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1153	needs fixing. JRA).
1154	*
1155	* The resulting string in "dest" is always null terminated.
1156	*
1157	* @param flags can have:
1158	* <dl>
1159	* <dt>STR_TERMINATE</dt>
1160	* <dd>STR_TERMINATE means the string in @p src
1161	* is null terminated, and src_len is ignored.</dd>
1162	* </dl>
1163	*
1164	* @param src_len is the length of the source area in bytes.
1165	* @returns the number of bytes occupied by the string in @p src.
1166	**/
1167
1168	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1169	char **ppdest,
1170	const void *src,
1171	size_t src_len,
1172	int flags)
1173	{
1174	char *dest = NULL;
1175	size_t dest_len;
1176
1177	#ifdef DEVELOPER
1178	/* Ensure we never use the braindead "malloc" varient. */
1179	if (ctx == NULL) {
1180	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1181	}
1182	#endif
1183
1184	*ppdest = NULL;
1185
1186	if (!src_len) {
1187	return 0;
1188	}
1189
1190	if (flags & STR_TERMINATE) {
1191	if (src_len == (size_t)-1) {
1192	src_len = strlen((const char *)src) + 1;
1193	} else {
1194	size_t len = strnlen((const char *)src, src_len);
1195	if (len < src_len)
1196	len++;
1197	src_len = len;
1198	}
1199	/* Ensure we don't use an insane length from the client. */
1200	if (src_len >= 1024*1024) {
1201	char *msg = talloc_asprintf(ctx,
1202	"Bad src length (%u) in "
1203	"pull_ascii_base_talloc",
1204	(unsigned int)src_len);
1205	smb_panic(msg);
1206	}
1207	} else {
1208	/* Can't have an unlimited length
1209	* non STR_TERMINATE'd.
1210	*/
1211	if (src_len == (size_t)-1) {
1212	errno = EINVAL;
1213	return 0;
1214	}
1215	}
1216
1217	/* src_len != -1 here. */
1218
1219	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1220	&dest_len, True)) {
1221	dest_len = 0;
1222	}
1223
1224	if (dest_len && dest) {
1225	/* Did we already process the terminating zero ? */
1226	if (dest[dest_len-1] != 0) {
1227	size_t size = talloc_get_size(dest);
1228	/* Have we got space to append the '\0' ? */
1229	if (size <= dest_len) {
1230	/* No, realloc. */
1231	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1232	dest_len+1);
1233	if (!dest) {
1234	/* talloc fail. */
1235	dest_len = (size_t)-1;
1236	return 0;
1237	}
1238	}
1239	/* Yay - space ! */
1240	dest[dest_len] = '\0';
1241	dest_len++;
1242	}
1243	} else if (dest) {
1244	dest[0] = 0;
1245	}
1246
1247	*ppdest = dest;
1248	return src_len;
1249	}
1250
1251	size_t pull_ascii_fstring(char dest, const void src)
1252	{
1253	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1254	}
1255
1256	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1257
1258	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1259	{
1260	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1261	}
1262
1263	/**
1264	* Copy a string from a char* src to a unicode destination.
1265	*
1266	* @returns the number of bytes occupied by the string in the destination.
1267	*
1268	* @param flags can have:
1269	*
1270	* <dl>
1271	* <dt>STR_TERMINATE <dd>means include the null termination.
1272	* <dt>STR_UPPER <dd>means uppercase in the destination.
1273	* <dt>STR_NOALIGN <dd>means don't do alignment.
1274	* </dl>
1275	*
1276	* @param dest_len is the maximum length allowed in the
1277	* destination.
1278	**/
1279
1280	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1281	{
1282	size_t len=0;
1283	size_t src_len;
1284	size_t ret;
1285
1286	if (dest_len == (size_t)-1) {
1287	/* No longer allow dest_len of -1. */
1288	smb_panic("push_ucs2 - invalid dest_len of -1");
1289	}
1290
1291	if (flags & STR_TERMINATE)
1292	src_len = (size_t)-1;
1293	else
1294	src_len = strlen(src);
1295
1296	if (ucs2_align(base_ptr, dest, flags)) {
1297	(char )dest = 0;
1298	dest = (void )((char )dest + 1);
1299	if (dest_len)
1300	dest_len--;
1301	len++;
1302	}
1303
1304	/* ucs2 is always a multiple of 2 bytes */
1305	dest_len &= ~1;
1306
1307	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1308	if (ret == (size_t)-1) {
1309	if ((flags & STR_TERMINATE) &&
1310	dest &&
1311	dest_len) {
1312	(char )dest = 0;
1313	}
1314	return len;
1315	}
1316
1317	len += ret;
1318
1319	if (flags & STR_UPPER) {
1320	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1321	size_t i;
1322
1323	/* We check for i < (ret / 2) below as the dest string isn't null
1324	terminated if STR_TERMINATE isn't set. */
1325
1326	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1327	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1328	if (v != dest_ucs2[i]) {
1329	dest_ucs2[i] = v;
1330	}
1331	}
1332	}
1333
1334	return len;
1335	}
1336
1337
1338	/**
1339	* Copy a string from a unix char* src to a UCS2 destination,
1340	* allocating a buffer using talloc().
1341	*
1342	* @param dest always set at least to NULL
1343	* @parm converted_size set to the number of bytes occupied by the string in
1344	* the destination on success.
1345	*
1346	* @return true if new buffer was correctly allocated, and string was
1347	* converted.
1348	**/
1349	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1350	size_t *converted_size)
1351	{
1352	size_t src_len = strlen(src)+1;
1353
1354	*dest = NULL;
1355	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1356	(void **)dest, converted_size, True);
1357	}
1358
1359
1360	/**
1361	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1362	*
1363	* @param dest always set at least to NULL
1364	* @parm converted_size set to the number of bytes occupied by the string in
1365	* the destination on success.
1366	*
1367	* @return true if new buffer was correctly allocated, and string was
1368	* converted.
1369	**/
1370
1371	bool push_ucs2_allocate(smb_ucs2_t *dest, const char src,
1372	size_t *converted_size)
1373	{
1374	size_t src_len = strlen(src)+1;
1375
1376	*dest = NULL;
1377	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1378	(void **)dest, converted_size, True);
1379	}
1380
1381	/**
1382	Copy a string from a char* src to a UTF-8 destination.
1383	Return the number of bytes occupied by the string in the destination
1384	Flags can have:
1385	STR_TERMINATE means include the null termination
1386	STR_UPPER means uppercase in the destination
1387	dest_len is the maximum length allowed in the destination. If dest_len
1388	is -1 then no maxiumum is used.
1389	**/
1390
1391	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1392	{
1393	size_t src_len = 0;
1394	size_t ret;
1395	char *tmpbuf = NULL;
1396
1397	if (dest_len == (size_t)-1) {
1398	/* No longer allow dest_len of -1. */
1399	smb_panic("push_utf8 - invalid dest_len of -1");
1400	}
1401
1402	if (flags & STR_UPPER) {
1403	tmpbuf = strdup_upper(src);
1404	if (!tmpbuf) {
1405	return (size_t)-1;
1406	}
1407	src = tmpbuf;
1408	src_len = strlen(src);
1409	}
1410
1411	src_len = strlen(src);
1412	if (flags & STR_TERMINATE) {
1413	src_len++;
1414	}
1415
1416	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1417	SAFE_FREE(tmpbuf);
1418	return ret;
1419	}
1420
1421	size_t push_utf8_fstring(void dest, const char src)
1422	{
1423	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1424	}
1425
1426	/**
1427	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1428	*
1429	* @param dest always set at least to NULL
1430	* @parm converted_size set to the number of bytes occupied by the string in
1431	* the destination on success.
1432	*
1433	* @return true if new buffer was correctly allocated, and string was
1434	* converted.
1435	**/
1436
1437	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1438	size_t *converted_size)
1439	{
1440	size_t src_len = strlen(src)+1;
1441
1442	*dest = NULL;
1443	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1444	(void**)dest, converted_size, True);
1445	}
1446
1447	/**
1448	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1449	*
1450	* @param dest always set at least to NULL
1451	* @parm converted_size set to the number of bytes occupied by the string in
1452	* the destination on success.
1453	*
1454	* @return true if new buffer was correctly allocated, and string was
1455	* converted.
1456	**/
1457
1458	bool push_utf8_allocate(char *dest, const char src, size_t *converted_size)
1459	{
1460	size_t src_len = strlen(src)+1;
1461
1462	*dest = NULL;
1463	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1464	(void **)dest, converted_size, True);
1465	}
1466
1467	/**
1468	Copy a string from a ucs2 source to a unix char* destination.
1469	Flags can have:
1470	STR_TERMINATE means the string in src is null terminated.
1471	STR_NOALIGN means don't try to align.
1472	if STR_TERMINATE is set then src_len is ignored if it is -1.
1473	src_len is the length of the source area in bytes
1474	Return the number of bytes occupied by the string in src.
1475	The resulting string in "dest" is always null terminated.
1476	**/
1477
1478	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1479	{
1480	size_t ret;
1481
1482	if (dest_len == (size_t)-1) {
1483	/* No longer allow dest_len of -1. */
1484	smb_panic("pull_ucs2 - invalid dest_len of -1");
1485	}
1486
1487	if (!src_len) {
1488	if (dest && dest_len > 0) {
1489	dest[0] = '\0';
1490	}
1491	return 0;
1492	}
1493
1494	if (ucs2_align(base_ptr, src, flags)) {
1495	src = (const void )((const char )src + 1);
1496	if (src_len != (size_t)-1)
1497	src_len--;
1498	}
1499
1500	if (flags & STR_TERMINATE) {
1501	/* src_len -1 is the default for null terminated strings. */
1502	if (src_len != (size_t)-1) {
1503	size_t len = strnlen_w((const smb_ucs2_t *)src,
1504	src_len/2);
1505	if (len < src_len/2)
1506	len++;
1507	src_len = len*2;
1508	}
1509	}
1510
1511	/* ucs2 is always a multiple of 2 bytes */
1512	if (src_len != (size_t)-1)
1513	src_len &= ~1;
1514
1515	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1516	if (ret == (size_t)-1) {
1517	ret = 0;
1518	dest_len = 0;
1519	}
1520
1521	if (src_len == (size_t)-1)
1522	src_len = ret*2;
1523
1524	if (dest_len && ret) {
1525	/* Did we already process the terminating zero ? */
1526	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1527	dest[MIN(ret, dest_len-1)] = 0;
1528	}
1529	} else {
1530	dest[0] = 0;
1531	}
1532
1533	return src_len;
1534	}
1535
1536	/**
1537	Copy a string from a ucs2 source to a unix char* destination.
1538	Talloc version with a base pointer.
1539	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1540	needs fixing. JRA).
1541	Flags can have:
1542	STR_TERMINATE means the string in src is null terminated.
1543	STR_NOALIGN means don't try to align.
1544	if STR_TERMINATE is set then src_len is ignored if it is -1.
1545	src_len is the length of the source area in bytes
1546	Return the number of bytes occupied by the string in src.
1547	The resulting string in "dest" is always null terminated.
1548	**/
1549
1550	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1551	const void *base_ptr,
1552	char **ppdest,
1553	const void *src,
1554	size_t src_len,
1555	int flags)
1556	{
1557	char *dest;
1558	size_t dest_len;
1559
1560	*ppdest = NULL;
1561
1562	#ifdef DEVELOPER
1563	/* Ensure we never use the braindead "malloc" varient. */
1564	if (ctx == NULL) {
1565	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1566	}
1567	#endif
1568
1569	if (!src_len) {
1570	return 0;
1571	}
1572
1573	if (ucs2_align(base_ptr, src, flags)) {
1574	src = (const void )((const char )src + 1);
1575	if (src_len != (size_t)-1)
1576	src_len--;
1577	}
1578
1579	if (flags & STR_TERMINATE) {
1580	/* src_len -1 is the default for null terminated strings. */
1581	if (src_len != (size_t)-1) {
1582	size_t len = strnlen_w((const smb_ucs2_t *)src,
1583	src_len/2);
1584	if (len < src_len/2)
1585	len++;
1586	src_len = len*2;
1587	} else {
1588	/*
1589	* src_len == -1 - alloc interface won't take this
1590	* so we must calculate.
1591	*/
1592	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1593	}
1594	/* Ensure we don't use an insane length from the client. */
1595	if (src_len >= 1024*1024) {
1596	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1597	}
1598	} else {
1599	/* Can't have an unlimited length
1600	* non STR_TERMINATE'd.
1601	*/
1602	if (src_len == (size_t)-1) {
1603	errno = EINVAL;
1604	return 0;
1605	}
1606	}
1607
1608	/* src_len != -1 here. */
1609
1610	/* ucs2 is always a multiple of 2 bytes */
1611	src_len &= ~1;
1612
1613	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1614	(void *)&dest, &dest_len, True)) {
1615	dest_len = 0;
1616	}
1617
1618	if (dest_len) {
1619	/* Did we already process the terminating zero ? */
1620	if (dest[dest_len-1] != 0) {
1621	size_t size = talloc_get_size(dest);
1622	/* Have we got space to append the '\0' ? */
1623	if (size <= dest_len) {
1624	/* No, realloc. */
1625	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1626	dest_len+1);
1627	if (!dest) {
1628	/* talloc fail. */
1629	dest_len = (size_t)-1;
1630	return 0;
1631	}
1632	}
1633	/* Yay - space ! */
1634	dest[dest_len] = '\0';
1635	dest_len++;
1636	}
1637	} else if (dest) {
1638	dest[0] = 0;
1639	}
1640
1641	*ppdest = dest;
1642	return src_len;
1643	}
1644
1645	size_t pull_ucs2_fstring(char dest, const void src)
1646	{
1647	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1648	}
1649
1650	/**
1651	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1652	*
1653	* @param dest always set at least to NULL
1654	* @parm converted_size set to the number of bytes occupied by the string in
1655	* the destination on success.
1656	*
1657	* @return true if new buffer was correctly allocated, and string was
1658	* converted.
1659	**/
1660
1661	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1662	size_t *converted_size)
1663	{
1664	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1665
1666	*dest = NULL;
1667	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1668	(void **)dest, converted_size, True);
1669	}
1670
1671	/**
1672	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1673	*
1674	* @param dest always set at least to NULL
1675	* @parm converted_size set to the number of bytes occupied by the string in
1676	* the destination on success.
1677	* @return true if new buffer was correctly allocated, and string was
1678	* converted.
1679	**/
1680
1681	bool pull_ucs2_allocate(char *dest, const smb_ucs2_t src,
1682	size_t *converted_size)
1683	{
1684	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1685
1686	*dest = NULL;
1687	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1688	(void **)dest, converted_size, True);
1689	}
1690
1691	/**
1692	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1693	*
1694	* @param dest always set at least to NULL
1695	* @parm converted_size set to the number of bytes occupied by the string in
1696	* the destination on success.
1697	*
1698	* @return true if new buffer was correctly allocated, and string was
1699	* converted.
1700	**/
1701
1702	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1703	size_t *converted_size)
1704	{
1705	size_t src_len = strlen(src)+1;
1706
1707	*dest = NULL;
1708	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1709	(void **)dest, converted_size, True);
1710	}
1711
1712	/**
1713	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1714	*
1715	* @param dest always set at least to NULL
1716	* @parm converted_size set to the number of bytes occupied by the string in
1717	* the destination on success.
1718	*
1719	* @return true if new buffer was correctly allocated, and string was
1720	* converted.
1721	**/
1722
1723	bool pull_utf8_allocate(char *dest, const char src, size_t *converted_size)
1724	{
1725	size_t src_len = strlen(src)+1;
1726
1727	*dest = NULL;
1728	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1729	(void **)dest, converted_size, True);
1730	}
1731
1732	/**
1733	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1734	*
1735	* @param dest always set at least to NULL
1736	* @parm converted_size set to the number of bytes occupied by the string in
1737	* the destination on success.
1738	*
1739	* @return true if new buffer was correctly allocated, and string was
1740	* converted.
1741	**/
1742
1743	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1744	size_t *converted_size)
1745	{
1746	size_t src_len = strlen(src)+1;
1747
1748	*dest = NULL;
1749	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1750	(void **)dest, converted_size, True);
1751	}
1752
1753	/**
1754	Copy a string from a char* src to a unicode or ascii
1755	dos codepage destination choosing unicode or ascii based on the
1756	flags in the SMB buffer starting at base_ptr.
1757	Return the number of bytes occupied by the string in the destination.
1758	flags can have:
1759	STR_TERMINATE means include the null termination.
1760	STR_UPPER means uppercase in the destination.
1761	STR_ASCII use ascii even with unicode packet.
1762	STR_NOALIGN means don't do alignment.
1763	dest_len is the maximum length allowed in the destination. If dest_len
1764	is -1 then no maxiumum is used.
1765	**/
1766
1767	size_t push_string_fn(const char *function, unsigned int line,
1768	const void *base_ptr, uint16 flags2,
1769	void dest, const char src,
1770	size_t dest_len, int flags)
1771	{
1772	#ifdef DEVELOPER
1773	/* We really need to zero fill here, not clobber
1774	* region, as we want to ensure that valgrind thinks
1775	* all of the outgoing buffer has been written to
1776	* so a send() or write() won't trap an error.
1777	* JRA.
1778	*/
1779	#if 0
1780	clobber_region(function, line, dest, dest_len);
1781	#else
1782	memset(dest, '\0', dest_len);
1783	#endif
1784	#endif
1785
1786	if (!(flags & STR_ASCII) && \
1787	((flags & STR_UNICODE \|\| \
1788	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1789	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1790	}
1791	return push_ascii(dest, src, dest_len, flags);
1792	}
1793
1794
1795	/**
1796	Copy a string from a unicode or ascii source (depending on
1797	the packet flags) to a char* destination.
1798	Flags can have:
1799	STR_TERMINATE means the string in src is null terminated.
1800	STR_UNICODE means to force as unicode.
1801	STR_ASCII use ascii even with unicode packet.
1802	STR_NOALIGN means don't do alignment.
1803	if STR_TERMINATE is set then src_len is ignored is it is -1
1804	src_len is the length of the source area in bytes.
1805	Return the number of bytes occupied by the string in src.
1806	The resulting string in "dest" is always null terminated.
1807	**/
1808
1809	size_t pull_string_fn(const char *function,
1810	unsigned int line,
1811	const void *base_ptr,
1812	uint16 smb_flags2,
1813	char *dest,
1814	const void *src,
1815	size_t dest_len,
1816	size_t src_len,
1817	int flags)
1818	{
1819	#ifdef DEVELOPER
1820	clobber_region(function, line, dest, dest_len);
1821	#endif
1822
1823	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1824	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1825	"UNICODE defined");
1826	}
1827
1828	if (!(flags & STR_ASCII) && \
1829	((flags & STR_UNICODE \|\| \
1830	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1831	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1832	}
1833	return pull_ascii(dest, src, dest_len, src_len, flags);
1834	}
1835
1836	/**
1837	Copy a string from a unicode or ascii source (depending on
1838	the packet flags) to a char* destination.
1839	Variant that uses talloc.
1840	Flags can have:
1841	STR_TERMINATE means the string in src is null terminated.
1842	STR_UNICODE means to force as unicode.
1843	STR_ASCII use ascii even with unicode packet.
1844	STR_NOALIGN means don't do alignment.
1845	if STR_TERMINATE is set then src_len is ignored is it is -1
1846	src_len is the length of the source area in bytes.
1847	Return the number of bytes occupied by the string in src.
1848	The resulting string in "dest" is always null terminated.
1849	**/
1850
1851	size_t pull_string_talloc_fn(const char *function,
1852	unsigned int line,
1853	TALLOC_CTX *ctx,
1854	const void *base_ptr,
1855	uint16 smb_flags2,
1856	char **ppdest,
1857	const void *src,
1858	size_t src_len,
1859	int flags)
1860	{
1861	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1862	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1863	"UNICODE defined");
1864	}
1865
1866	if (!(flags & STR_ASCII) && \
1867	((flags & STR_UNICODE \|\| \
1868	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1869	return pull_ucs2_base_talloc(ctx,
1870	base_ptr,
1871	ppdest,
1872	src,
1873	src_len,
1874	flags);
1875	}
1876	return pull_ascii_base_talloc(ctx,
1877	ppdest,
1878	src,
1879	src_len,
1880	flags);
1881	}
1882
1883
1884	size_t align_string(const void base_ptr, const char p, int flags)
1885	{
1886	if (!(flags & STR_ASCII) && \
1887	((flags & STR_UNICODE \|\| \
1888	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1889	return ucs2_align(base_ptr, p, flags);
1890	}
1891	return 0;
1892	}
1893
1894	/*
1895	Return the unicode codepoint for the next multi-byte CH_UNIX character
1896	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1897
1898	Also return the number of bytes consumed (which tells the caller
1899	how many bytes to skip to get to the next CH_UNIX character).
1900
1901	Return INVALID_CODEPOINT if the next character cannot be converted.
1902	*/
1903
1904	codepoint_t next_codepoint(const char str, size_t size)
1905	{
1906	/* It cannot occupy more than 4 bytes in UTF16 format */
1907	uint8_t buf[4];
1908	smb_iconv_t descriptor;
1909	#ifdef __OS2__
1910	size_t ilen_max;
1911	size_t olen_orig;
1912	const char *inbuf;
1913	#endif
1914	size_t ilen_orig;
1915	size_t ilen;
1916	size_t olen;
1917
1918	char *outbuf;
1919
1920	#ifdef __OS2__
1921	*size = 1;
1922	#endif
1923
1924	if ((str[0] & 0x80) == 0) {
1925	#ifndef __OS2__
1926	*size = 1;
1927	#endif
1928	return (codepoint_t)str[0];
1929	}
1930
1931	lazy_initialize_conv();
1932
1933	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1934	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1935	#ifndef __OS2__
1936	*size = 1;
1937	#endif
1938	return INVALID_CODEPOINT;
1939	}
1940	#ifdef __OS2__
1941	/* We assume that no multi-byte character can take
1942	more than 5 bytes. This is OK as we only
1943	support codepoints up to 1M */
1944
1945	ilen_max = strnlen( str, 5 );
1946	#else
1947	*size = 1;
1948	#endif
1949	ilen_orig = 1;
1950	olen_orig = 2;
1951	while( 1 )
1952	{
1953	ilen = ilen_orig;
1954	olen = olen_orig;
1955	inbuf = str;
1956	outbuf = ( char * )buf;
1957	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1958	break;
1959
1960	switch( errno )
1961	{
1962	case E2BIG :
1963	if( olen_orig == 2 )
1964	olen_orig = 4;
1965	else
1966	return INVALID_CODEPOINT;
1967	break;
1968
1969
1970	case EINVAL :
1971	#ifndef __OS2__
1972	/* We assume that no multi-byte character can take
1973	more than 5 bytes. This is OK as we only
1974	support codepoints up to 1M */
1975	if( ilen_orig < 5 )
1976	#else
1977	if( ilen_orig < ilen_max )
1978	#endif
1979	ilen_orig++;
1980	else
1981	return INVALID_CODEPOINT;
1982	break;
1983
1984	case EILSEQ :
1985	default :
1986	return INVALID_CODEPOINT;
1987	}
1988	}
1989
1990	olen = olen_orig - olen;
1991
1992	*size = ilen_orig - ilen;
1993
1994	if (olen == 2) {
1995	/* 2 byte, UTF16-LE encoded value. */
1996	return (codepoint_t)SVAL(buf, 0);
1997	}
1998	if (olen == 4) {
1999	/* Decode a 4 byte UTF16-LE character manually.
2000	See RFC2871 for the encoding machanism.
2001	*/
2002	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
2003	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
2004
2005	return (codepoint_t)0x10000 +
2006	(w1 << 10) + w2;
2007	}
2008
2009	/* no other length is valid */
2010	return INVALID_CODEPOINT;
2011	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: