Context Navigation

source: branches/samba-3.5.x/source3/lib/charcnv.c@ 774

Visit:

Last change on this file since 774 was 599, checked in by Herwig Bauernfeind, 14 years ago
Samba 3.5: Update trunk to 3.5.9
File size: 50.0 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret;
58
59	switch (ch) {
60	case CH_UTF16LE:
61	ret = "UTF-16LE";
62	break;
63	case CH_UTF16BE:
64	ret = "UTF-16BE";
65	break;
66	case CH_UNIX:
67	ret = lp_unix_charset();
68	break;
69	case CH_DOS:
70	ret = lp_dos_charset();
71	break;
72	case CH_DISPLAY:
73	ret = lp_display_charset();
74	break;
75	case CH_UTF8:
76	ret = "UTF8";
77	break;
78	default:
79	ret = NULL;
80	}
81
82	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83	if (ret && !strcmp(ret, "LOCALE")) {
84	const char *ln = NULL;
85
86	#ifdef HAVE_SETLOCALE
87	setlocale(LC_ALL, "");
88	#endif
89	ln = nl_langinfo(CODESET);
90	if (ln) {
91	/* Check whether the charset name is supported
92	by iconv */
93	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94	if (handle == (smb_iconv_t) -1) {
95	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96	ln = NULL;
97	} else {
98	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99	smb_iconv_close(handle);
100	}
101	}
102	ret = ln;
103	}
104	#endif
105
106	if (!ret \|\| !*ret) ret = "ASCII";
107	return ret;
108	}
109
110	void lazy_initialize_conv(void)
111	{
112	if (!initialized) {
113	load_case_tables();
114	init_iconv();
115	initialized = true;
116	}
117	}
118
119	/**
120	* Destroy global objects allocated by init_iconv()
121	**/
122	void gfree_charcnv(void)
123	{
124	int c1, c2;
125
126	for (c1=0;c1<NUM_CHARSETS;c1++) {
127	for (c2=0;c2<NUM_CHARSETS;c2++) {
128	if ( conv_handles[c1][c2] ) {
129	smb_iconv_close( conv_handles[c1][c2] );
130	conv_handles[c1][c2] = 0;
131	}
132	}
133	}
134	initialized = false;
135	}
136
137	/**
138	* Initialize iconv conversion descriptors.
139	*
140	* This is called the first time it is needed, and also called again
141	* every time the configuration is reloaded, because the charset or
142	* codepage might have changed.
143	**/
144	void init_iconv(void)
145	{
146	int c1, c2;
147	bool did_reload = False;
148
149	/* so that charset_name() works we need to get the UNIX<->UCS2 going
150	first */
151	if (!conv_handles[CH_UNIX][CH_UTF16LE])
152	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
153
154	if (!conv_handles[CH_UTF16LE][CH_UNIX])
155	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
156
157	for (c1=0;c1<NUM_CHARSETS;c1++) {
158	for (c2=0;c2<NUM_CHARSETS;c2++) {
159	const char *n1 = charset_name((charset_t)c1);
160	const char *n2 = charset_name((charset_t)c2);
161	if (conv_handles[c1][c2] &&
162	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164	continue;
165
166	did_reload = True;
167
168	if (conv_handles[c1][c2])
169	smb_iconv_close(conv_handles[c1][c2]);
170
171	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174	charset_name((charset_t)c1), charset_name((charset_t)c2)));
175	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176	n1 = "ASCII";
177	}
178	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179	n2 = "ASCII";
180	}
181	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182	n1, n2 ));
183	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184	if (!conv_handles[c1][c2]) {
185	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186	smb_panic("init_iconv: conv_handle initialization failed");
187	}
188	}
189	}
190	}
191
192	if (did_reload) {
193	/* XXX: Does this really get called every time the dos
194	* codepage changes? */
195	/* XXX: Is the did_reload test too strict? */
196	conv_silent = True;
197	init_valid_table();
198	conv_silent = False;
199	}
200	}
201
202	/**
203	* Convert string from one encoding to another, making error checking etc
204	* Slow path version - uses (slow) iconv.
205	*
206	* @param src pointer to source string (multibyte or singlebyte)
207	* @param srclen length of the source string in bytes
208	* @param dest pointer to destination string (multibyte or singlebyte)
209	* @param destlen maximal length allowed for string
210	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211	* @returns the number of bytes occupied in the destination
212	*
213	* Ensure the srclen contains the terminating zero.
214	*
215	**/
216
217	static size_t convert_string_internal(charset_t from, charset_t to,
218	void const *src, size_t srclen,
219	void *dest, size_t destlen, bool allow_bad_conv)
220	{
221	size_t i_len, o_len;
222	size_t retval;
223	const char* inbuf = (const char*)src;
224	char* outbuf = (char*)dest;
225	smb_iconv_t descriptor;
226
227	lazy_initialize_conv();
228
229	descriptor = conv_handles[from][to];
230
231	if (srclen == (size_t)-1) {
232	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
233	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
234	} else {
235	srclen = strlen((const char *)src)+1;
236	}
237	}
238
239
240	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
241	if (!conv_silent)
242	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
243	return (size_t)-1;
244	}
245
246	i_len=srclen;
247	o_len=destlen;
248
249	again:
250
251	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
252	if(retval==(size_t)-1) {
253	const char *reason="unknown error";
254	switch(errno) {
255	case EINVAL:
256	reason="Incomplete multibyte sequence";
257	if (!conv_silent)
258	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
259	if (allow_bad_conv)
260	goto use_as_is;
261	return (size_t)-1;
262	case E2BIG:
263	reason="No more room";
264	if (!conv_silent) {
265	if (from == CH_UNIX) {
266	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267	charset_name(from), charset_name(to),
268	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
269	} else {
270	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271	charset_name(from), charset_name(to),
272	(unsigned int)srclen, (unsigned int)destlen));
273	}
274	}
275	break;
276	case EILSEQ:
277	reason="Illegal multibyte sequence";
278	if (!conv_silent)
279	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
280	if (allow_bad_conv)
281	goto use_as_is;
282
283	return (size_t)-1;
284	default:
285	if (!conv_silent)
286	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
287	return (size_t)-1;
288	}
289	/* smb_panic(reason); */
290	}
291	return destlen-o_len;
292
293	use_as_is:
294
295	/*
296	* Conversion not supported. This is actually an error, but there are so
297	* many misconfigured iconv systems and smb.conf's out there we can't just
298	* fail. Do a very bad conversion instead.... JRA.
299	*/
300
301	{
302	if (o_len == 0 \|\| i_len == 0)
303	return destlen - o_len;
304
305	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
306	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
307	/* Can't convert from utf16 any endian to multibyte.
308	Replace with the default fail char.
309	*/
310	if (i_len < 2)
311	return destlen - o_len;
312	if (i_len >= 2) {
313	*outbuf = lp_failed_convert_char();
314
315	outbuf++;
316	o_len--;
317
318	inbuf += 2;
319	i_len -= 2;
320	}
321
322	if (o_len == 0 \|\| i_len == 0)
323	return destlen - o_len;
324
325	/* Keep trying with the next char... */
326	goto again;
327
328	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
329	/* Can't convert to UTF16LE - just widen by adding the
330	default fail char then zero.
331	*/
332	if (o_len < 2)
333	return destlen - o_len;
334
335	outbuf[0] = lp_failed_convert_char();
336	outbuf[1] = '\0';
337
338	inbuf++;
339	i_len--;
340
341	outbuf += 2;
342	o_len -= 2;
343
344	if (o_len == 0 \|\| i_len == 0)
345	return destlen - o_len;
346
347	/* Keep trying with the next char... */
348	goto again;
349
350	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
351	to != CH_UTF16LE && to != CH_UTF16BE) {
352	/* Failed multibyte to multibyte. Just copy the default fail char and
353	try again. */
354	outbuf[0] = lp_failed_convert_char();
355
356	inbuf++;
357	i_len--;
358
359	outbuf++;
360	o_len--;
361
362	if (o_len == 0 \|\| i_len == 0)
363	return destlen - o_len;
364
365	/* Keep trying with the next char... */
366	goto again;
367
368	} else {
369	/* Keep compiler happy.... */
370	return destlen - o_len;
371	}
372	}
373	}
374
375	/**
376	* Convert string from one encoding to another, making error checking etc
377	* Fast path version - handles ASCII first.
378	*
379	* @param src pointer to source string (multibyte or singlebyte)
380	* @param srclen length of the source string in bytes, or -1 for nul terminated.
381	* @param dest pointer to destination string (multibyte or singlebyte)
382	* @param destlen maximal length allowed for string - NEVER -1.
383	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384	* @returns the number of bytes occupied in the destination
385	*
386	* Ensure the srclen contains the terminating zero.
387	*
388	* This function has been hand-tuned to provide a fast path.
389	* Don't change unless you really know what you are doing. JRA.
390	**/
391
392	size_t convert_string(charset_t from, charset_t to,
393	void const *src, size_t srclen,
394	void *dest, size_t destlen, bool allow_bad_conv)
395	{
396	/*
397	* NB. We deliberately don't do a strlen here if srclen == -1.
398	* This is very expensive over millions of calls and is taken
399	* care of in the slow path in convert_string_internal. JRA.
400	*/
401
402	#ifdef DEVELOPER
403	SMB_ASSERT(destlen != (size_t)-1);
404	#endif
405
406	if (srclen == 0)
407	return 0;
408
409	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
410	const unsigned char p = (const unsigned char )src;
411	unsigned char q = (unsigned char )dest;
412	size_t slen = srclen;
413	size_t dlen = destlen;
414	unsigned char lastp = '\0';
415	size_t retval = 0;
416
417	/* If all characters are ascii, fast path here. */
418	while (slen && dlen) {
419	if ((lastp = *p) <= 0x7f) {
420	q++ = p++;
421	if (slen != (size_t)-1) {
422	slen--;
423	}
424	dlen--;
425	retval++;
426	if (!lastp)
427	break;
428	} else {
429	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
430	goto general_case;
431	#else
432	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
433	if (ret == (size_t)-1) {
434	return ret;
435	}
436	return retval + ret;
437	#endif
438	}
439	}
440	if (!dlen) {
441	/* Even if we fast path we should note if we ran out of room. */
442	if (((slen != (size_t)-1) && slen) \|\|
443	((slen == (size_t)-1) && lastp)) {
444	errno = E2BIG;
445	}
446	}
447	return retval;
448	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
449	const unsigned char p = (const unsigned char )src;
450	unsigned char q = (unsigned char )dest;
451	size_t retval = 0;
452	size_t slen = srclen;
453	size_t dlen = destlen;
454	unsigned char lastp = '\0';
455
456	/* If all characters are ascii, fast path here. */
457	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
458	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
459	q++ = p;
460	if (slen != (size_t)-1) {
461	slen -= 2;
462	}
463	p += 2;
464	dlen--;
465	retval++;
466	if (!lastp)
467	break;
468	} else {
469	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
470	goto general_case;
471	#else
472	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
473	if (ret == (size_t)-1) {
474	return ret;
475	}
476	return retval + ret;
477	#endif
478	}
479	}
480	if (!dlen) {
481	/* Even if we fast path we should note if we ran out of room. */
482	if (((slen != (size_t)-1) && slen) \|\|
483	((slen == (size_t)-1) && lastp)) {
484	errno = E2BIG;
485	}
486	}
487	return retval;
488	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
489	const unsigned char p = (const unsigned char )src;
490	unsigned char q = (unsigned char )dest;
491	size_t retval = 0;
492	size_t slen = srclen;
493	size_t dlen = destlen;
494	unsigned char lastp = '\0';
495
496	/* If all characters are ascii, fast path here. */
497	while (slen && (dlen >= 2)) {
498	if ((lastp = *p) <= 0x7F) {
499	q++ = p++;
500	*q++ = '\0';
501	if (slen != (size_t)-1) {
502	slen--;
503	}
504	dlen -= 2;
505	retval += 2;
506	if (!lastp)
507	break;
508	} else {
509	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
510	goto general_case;
511	#else
512	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
513	if (ret == (size_t)-1) {
514	return ret;
515	}
516	return retval + ret;
517	#endif
518	}
519	}
520	if (!dlen) {
521	/* Even if we fast path we should note if we ran out of room. */
522	if (((slen != (size_t)-1) && slen) \|\|
523	((slen == (size_t)-1) && lastp)) {
524	errno = E2BIG;
525	}
526	}
527	return retval;
528	}
529
530	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
531	general_case:
532	#endif
533	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
534	}
535
536	/**
537	* Convert between character sets, allocating a new buffer using talloc for the result.
538	*
539	* @param srclen length of source buffer.
540	* @param dest always set at least to NULL
541	* @parm converted_size set to the number of bytes occupied by the string in
542	* the destination on success.
543	* @note -1 is not accepted for srclen.
544	*
545	* @return true if new buffer was correctly allocated, and string was
546	* converted.
547	*
548	* Ensure the srclen contains the terminating zero.
549	*
550	* I hate the goto's in this function. It's embarressing.....
551	* There has to be a cleaner way to do this. JRA.
552	*/
553	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
554	void const src, size_t srclen, void dst,
555	size_t *converted_size, bool allow_bad_conv)
556
557	{
558	size_t i_len, o_len, destlen = (srclen * 3) / 2;
559	size_t retval;
560	const char inbuf = (const char )src;
561	char outbuf = NULL, ob = NULL;
562	smb_iconv_t descriptor;
563	void dest = (void )dst;
564
565	*dest = NULL;
566
567	if (!converted_size) {
568	errno = EINVAL;
569	return false;
570	}
571
572	if (src == NULL \|\| srclen == (size_t)-1) {
573	errno = EINVAL;
574	return false;
575	}
576
577	if (srclen == 0) {
578	/* We really should treat this as an error, but
579	there are too many callers that need this to
580	return a NULL terminated string in the correct
581	character set. */
582	if (to == CH_UTF16LE\|\| to == CH_UTF16BE \|\| to == CH_UTF16MUNGED) {
583	destlen = 2;
584	} else {
585	destlen = 1;
586	}
587	ob = talloc_zero_array(ctx, char, destlen);
588	if (ob == NULL) {
589	errno = ENOMEM;
590	return false;
591	}
592	*converted_size = destlen;
593	*dest = ob;
594	return true;
595	}
596
597	lazy_initialize_conv();
598
599	descriptor = conv_handles[from][to];
600
601	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
602	if (!conv_silent)
603	DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
604	errno = EOPNOTSUPP;
605	return false;
606	}
607
608	convert:
609
610	/* +2 is for ucs2 null termination. */
611	if ((destlen*2)+2 < destlen) {
612	/* wrapped ! abort. */
613	if (!conv_silent)
614	DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
615	TALLOC_FREE(outbuf);
616	errno = EOPNOTSUPP;
617	return false;
618	} else {
619	destlen = destlen * 2;
620	}
621
622	/* +2 is for ucs2 null termination. */
623	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
624
625	if (!ob) {
626	DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
627	errno = ENOMEM;
628	return false;
629	}
630	outbuf = ob;
631	i_len = srclen;
632	o_len = destlen;
633
634	again:
635
636	retval = smb_iconv(descriptor,
637	&inbuf, &i_len,
638	&outbuf, &o_len);
639	if(retval == (size_t)-1) {
640	const char *reason="unknown error";
641	switch(errno) {
642	case EINVAL:
643	reason="Incomplete multibyte sequence";
644	if (!conv_silent)
645	DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
646	if (allow_bad_conv)
647	goto use_as_is;
648	break;
649	case E2BIG:
650	goto convert;
651	case EILSEQ:
652	reason="Illegal multibyte sequence";
653	if (!conv_silent)
654	DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
655	if (allow_bad_conv)
656	goto use_as_is;
657	break;
658	}
659	if (!conv_silent)
660	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
661	/* smb_panic(reason); */
662	TALLOC_FREE(ob);
663	return false;
664	}
665
666	out:
667
668	destlen = destlen - o_len;
669	/* Don't shrink unless we're reclaiming a lot of
670	* space. This is in the hot codepath and these
671	* reallocs cost. JRA.
672	*/
673	if (o_len > 1024) {
674	/* We're shrinking here so we know the +2 is safe from wrap. */
675	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
676	}
677
678	if (destlen && !ob) {
679	DEBUG(0, ("convert_string_talloc: out of memory!\n"));
680	errno = ENOMEM;
681	return false;
682	}
683
684	*dest = ob;
685
686	/* Must ucs2 null terminate in the extra space we allocated. */
687	ob[destlen] = '\0';
688	ob[destlen+1] = '\0';
689
690	/* Ensure we can never return a converted_size of zero. /
691	if (destlen == 0) {
692	/* This can happen from a bad iconv "use_as_is:" call. */
693	if (to == CH_UTF16LE\|\| to == CH_UTF16BE \|\| to == CH_UTF16MUNGED) {
694	destlen = 2;
695	} else {
696	destlen = 1;
697	}
698	}
699
700	*converted_size = destlen;
701	return true;
702
703	use_as_is:
704
705	/*
706	* Conversion not supported. This is actually an error, but there are so
707	* many misconfigured iconv systems and smb.conf's out there we can't just
708	* fail. Do a very bad conversion instead.... JRA.
709	*/
710
711	{
712	if (o_len == 0 \|\| i_len == 0)
713	goto out;
714
715	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
716	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
717	/* Can't convert from utf16 any endian to multibyte.
718	Replace with the default fail char.
719	*/
720
721	if (i_len < 2)
722	goto out;
723
724	if (i_len >= 2) {
725	*outbuf = lp_failed_convert_char();
726
727	outbuf++;
728	o_len--;
729
730	inbuf += 2;
731	i_len -= 2;
732	}
733
734	if (o_len == 0 \|\| i_len == 0)
735	goto out;
736
737	/* Keep trying with the next char... */
738	goto again;
739
740	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
741	/* Can't convert to UTF16LE - just widen by adding the
742	default fail char then zero.
743	*/
744	if (o_len < 2)
745	goto out;
746
747	outbuf[0] = lp_failed_convert_char();
748	outbuf[1] = '\0';
749
750	inbuf++;
751	i_len--;
752
753	outbuf += 2;
754	o_len -= 2;
755
756	if (o_len == 0 \|\| i_len == 0)
757	goto out;
758
759	/* Keep trying with the next char... */
760	goto again;
761
762	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
763	to != CH_UTF16LE && to != CH_UTF16BE) {
764	/* Failed multibyte to multibyte. Just copy the default fail char and
765	try again. */
766	outbuf[0] = lp_failed_convert_char();
767
768	inbuf++;
769	i_len--;
770
771	outbuf++;
772	o_len--;
773
774	if (o_len == 0 \|\| i_len == 0)
775	goto out;
776
777	/* Keep trying with the next char... */
778	goto again;
779
780	} else {
781	/* Keep compiler happy.... */
782	goto out;
783	}
784	}
785	}
786
787	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
788	{
789	size_t size;
790	smb_ucs2_t *buffer;
791
792	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
793	return (size_t)-1;
794	}
795
796	if (!strupper_w(buffer) && (dest == src)) {
797	TALLOC_FREE(buffer);
798	return srclen;
799	}
800
801	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
802	TALLOC_FREE(buffer);
803	return size;
804	}
805
806	/**
807	talloc_strdup() a unix string to upper case.
808	**/
809
810	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
811	{
812	char *out_buffer = talloc_strdup(ctx,s);
813	const unsigned char p = (const unsigned char )s;
814	unsigned char q = (unsigned char )out_buffer;
815
816	if (!q) {
817	return NULL;
818	}
819
820	/* this is quite a common operation, so we want it to be
821	fast. We optimise for the ascii case, knowing that all our
822	supported multi-byte character sets are ascii-compatible
823	(ie. they match for the first 128 chars) */
824
825	while (*p) {
826	if (*p & 0x80)
827	break;
828	q++ = toupper_ascii_fast(p);
829	p++;
830	}
831
832	if (*p) {
833	/* MB case. */
834	size_t converted_size, converted_size2;
835	smb_ucs2_t *ubuf = NULL;
836
837	/* We're not using the ascii buffer above. */
838	TALLOC_FREE(out_buffer);
839
840	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
841	strlen(s)+1, (void *)&ubuf,
842	&converted_size, True))
843	{
844	return NULL;
845	}
846
847	strupper_w(ubuf);
848
849	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
850	converted_size, (void *)&out_buffer,
851	&converted_size2, True))
852	{
853	TALLOC_FREE(ubuf);
854	return NULL;
855	}
856
857	/* Don't need the intermediate buffer
858	* anymore.
859	*/
860	TALLOC_FREE(ubuf);
861	}
862
863	return out_buffer;
864	}
865
866	char strupper_talloc(TALLOC_CTX ctx, const char *s) {
867	return talloc_strdup_upper(ctx, s);
868	}
869
870
871	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
872	{
873	size_t size;
874	smb_ucs2_t *buffer = NULL;
875
876	if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
877	(void *)(void )&buffer, &size,
878	True))
879	{
880	smb_panic("failed to create UCS2 buffer");
881	}
882	if (!strlower_w(buffer) && (dest == src)) {
883	TALLOC_FREE(buffer);
884	return srclen;
885	}
886	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
887	TALLOC_FREE(buffer);
888	return size;
889	}
890
891
892	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
893	{
894	size_t converted_size;
895	smb_ucs2_t *buffer = NULL;
896	char *out_buffer;
897
898	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
899	return NULL;
900	}
901
902	strlower_w(buffer);
903
904	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
905	TALLOC_FREE(buffer);
906	return NULL;
907	}
908
909	TALLOC_FREE(buffer);
910
911	return out_buffer;
912	}
913
914	char strlower_talloc(TALLOC_CTX ctx, const char *s) {
915	return talloc_strdup_lower(ctx, s);
916	}
917
918	size_t ucs2_align(const void base_ptr, const void p, int flags)
919	{
920	if (flags & (STR_NOALIGN\|STR_ASCII))
921	return 0;
922	return PTR_DIFF(p, base_ptr) & 1;
923	}
924
925
926	/**
927	* Copy a string from a char* unix src to a dos codepage string destination.
928	*
929	* @return the number of bytes occupied by the string in the destination.
930	*
931	* @param flags can include
932	* <dl>
933	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
934	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
935	* </dl>
936	*
937	* @param dest_len the maximum length in bytes allowed in the
938	* destination.
939	**/
940	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
941	{
942	size_t src_len = strlen(src);
943	char *tmpbuf = NULL;
944	size_t ret;
945
946	/* No longer allow a length of -1. */
947	if (dest_len == (size_t)-1) {
948	smb_panic("push_ascii - dest_len == -1");
949	}
950
951	if (flags & STR_UPPER) {
952	tmpbuf = SMB_STRDUP(src);
953	if (!tmpbuf) {
954	smb_panic("malloc fail");
955	}
956	strupper_m(tmpbuf);
957	src = tmpbuf;
958	}
959
960	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
961	src_len++;
962	}
963
964	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
965	if (ret == (size_t)-1 &&
966	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
967	&& dest_len > 0) {
968	((char *)dest)[0] = '\0';
969	}
970	SAFE_FREE(tmpbuf);
971	return ret;
972	}
973
974	size_t push_ascii_fstring(void dest, const char src)
975	{
976	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
977	}
978
979	/********************************************************************
980	Push an nstring - ensure null terminated. Written by
981	moriyama@miraclelinux.com (MORIYAMA Masayuki).
982	********************************************************************/
983
984	size_t push_ascii_nstring(void dest, const char src)
985	{
986	size_t i, buffer_len, dest_len;
987	smb_ucs2_t *buffer;
988
989	conv_silent = True;
990	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
991	smb_panic("failed to create UCS2 buffer");
992	}
993
994	/* We're using buffer_len below to count ucs2 characters, not bytes. */
995	buffer_len /= sizeof(smb_ucs2_t);
996
997	dest_len = 0;
998	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
999	unsigned char mb[10];
1000	/* Convert one smb_ucs2_t character at a time. */
1001	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1002	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1003	memcpy((char *)dest + dest_len, mb, mb_len);
1004	dest_len += mb_len;
1005	} else {
1006	errno = E2BIG;
1007	break;
1008	}
1009	}
1010	((char *)dest)[dest_len] = '\0';
1011
1012	conv_silent = False;
1013	TALLOC_FREE(buffer);
1014	return dest_len;
1015	}
1016
1017	/********************************************************************
1018	Push and malloc an ascii string. src and dest null terminated.
1019	********************************************************************/
1020
1021	bool push_ascii_talloc(TALLOC_CTX mem_ctx, char dest, const char src, size_t *converted_size)
1022	{
1023	size_t src_len = strlen(src)+1;
1024
1025	*dest = NULL;
1026	return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1027	(void **)dest, converted_size, True);
1028	}
1029
1030	/**
1031	* Copy a string from a dos codepage source to a unix char* destination.
1032	*
1033	* The resulting string in "dest" is always null terminated.
1034	*
1035	* @param flags can have:
1036	* <dl>
1037	* <dt>STR_TERMINATE</dt>
1038	* <dd>STR_TERMINATE means the string in @p src
1039	* is null terminated, and src_len is ignored.</dd>
1040	* </dl>
1041	*
1042	* @param src_len is the length of the source area in bytes.
1043	* @returns the number of bytes occupied by the string in @p src.
1044	**/
1045	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1046	{
1047	size_t ret;
1048
1049	if (dest_len == (size_t)-1) {
1050	/* No longer allow dest_len of -1. */
1051	smb_panic("pull_ascii - invalid dest_len of -1");
1052	}
1053
1054	if (flags & STR_TERMINATE) {
1055	if (src_len == (size_t)-1) {
1056	src_len = strlen((const char *)src) + 1;
1057	} else {
1058	size_t len = strnlen((const char *)src, src_len);
1059	if (len < src_len)
1060	len++;
1061	src_len = len;
1062	}
1063	}
1064
1065	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1066	if (ret == (size_t)-1) {
1067	ret = 0;
1068	dest_len = 0;
1069	}
1070
1071	if (dest_len && ret) {
1072	/* Did we already process the terminating zero ? */
1073	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1074	dest[MIN(ret, dest_len-1)] = 0;
1075	}
1076	} else {
1077	dest[0] = 0;
1078	}
1079
1080	return src_len;
1081	}
1082
1083	/**
1084	* Copy a string from a dos codepage source to a unix char* destination.
1085	* Talloc version.
1086	*
1087	* The resulting string in "dest" is always null terminated.
1088	*
1089	* @param flags can have:
1090	* <dl>
1091	* <dt>STR_TERMINATE</dt>
1092	* <dd>STR_TERMINATE means the string in @p src
1093	* is null terminated, and src_len is ignored.</dd>
1094	* </dl>
1095	*
1096	* @param src_len is the length of the source area in bytes.
1097	* @returns the number of bytes occupied by the string in @p src.
1098	**/
1099
1100	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1101	char **ppdest,
1102	const void *src,
1103	size_t src_len,
1104	int flags)
1105	{
1106	char *dest = NULL;
1107	size_t dest_len;
1108
1109	*ppdest = NULL;
1110
1111	if (!src_len) {
1112	return 0;
1113	}
1114
1115	if (flags & STR_TERMINATE) {
1116	if (src_len == (size_t)-1) {
1117	src_len = strlen((const char *)src) + 1;
1118	} else {
1119	size_t len = strnlen((const char *)src, src_len);
1120	if (len < src_len)
1121	len++;
1122	src_len = len;
1123	}
1124	/* Ensure we don't use an insane length from the client. */
1125	if (src_len >= 1024*1024) {
1126	char *msg = talloc_asprintf(ctx,
1127	"Bad src length (%u) in "
1128	"pull_ascii_base_talloc",
1129	(unsigned int)src_len);
1130	smb_panic(msg);
1131	}
1132	} else {
1133	/* Can't have an unlimited length
1134	* non STR_TERMINATE'd.
1135	*/
1136	if (src_len == (size_t)-1) {
1137	errno = EINVAL;
1138	return 0;
1139	}
1140	}
1141
1142	/* src_len != -1 here. */
1143
1144	if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1145	&dest_len, True)) {
1146	dest_len = 0;
1147	}
1148
1149	if (dest_len && dest) {
1150	/* Did we already process the terminating zero ? */
1151	if (dest[dest_len-1] != 0) {
1152	size_t size = talloc_get_size(dest);
1153	/* Have we got space to append the '\0' ? */
1154	if (size <= dest_len) {
1155	/* No, realloc. */
1156	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1157	dest_len+1);
1158	if (!dest) {
1159	/* talloc fail. */
1160	dest_len = (size_t)-1;
1161	return 0;
1162	}
1163	}
1164	/* Yay - space ! */
1165	dest[dest_len] = '\0';
1166	dest_len++;
1167	}
1168	} else if (dest) {
1169	dest[0] = 0;
1170	}
1171
1172	*ppdest = dest;
1173	return src_len;
1174	}
1175
1176	size_t pull_ascii_fstring(char dest, const void src)
1177	{
1178	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1179	}
1180
1181	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1182
1183	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1184	{
1185	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1186	}
1187
1188	/**
1189	* Copy a string from a char* src to a unicode destination.
1190	*
1191	* @returns the number of bytes occupied by the string in the destination.
1192	*
1193	* @param flags can have:
1194	*
1195	* <dl>
1196	* <dt>STR_TERMINATE <dd>means include the null termination.
1197	* <dt>STR_UPPER <dd>means uppercase in the destination.
1198	* <dt>STR_NOALIGN <dd>means don't do alignment.
1199	* </dl>
1200	*
1201	* @param dest_len is the maximum length allowed in the
1202	* destination.
1203	**/
1204
1205	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1206	{
1207	size_t len=0;
1208	size_t src_len;
1209	size_t ret;
1210
1211	if (dest_len == (size_t)-1) {
1212	/* No longer allow dest_len of -1. */
1213	smb_panic("push_ucs2 - invalid dest_len of -1");
1214	}
1215
1216	if (flags & STR_TERMINATE)
1217	src_len = (size_t)-1;
1218	else
1219	src_len = strlen(src);
1220
1221	if (ucs2_align(base_ptr, dest, flags)) {
1222	(char )dest = 0;
1223	dest = (void )((char )dest + 1);
1224	if (dest_len)
1225	dest_len--;
1226	len++;
1227	}
1228
1229	/* ucs2 is always a multiple of 2 bytes */
1230	dest_len &= ~1;
1231
1232	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1233	if (ret == (size_t)-1) {
1234	if ((flags & STR_TERMINATE) &&
1235	dest &&
1236	dest_len) {
1237	(char )dest = 0;
1238	}
1239	return len;
1240	}
1241
1242	len += ret;
1243
1244	if (flags & STR_UPPER) {
1245	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1246	size_t i;
1247
1248	/* We check for i < (ret / 2) below as the dest string isn't null
1249	terminated if STR_TERMINATE isn't set. */
1250
1251	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1252	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1253	if (v != dest_ucs2[i]) {
1254	dest_ucs2[i] = v;
1255	}
1256	}
1257	}
1258
1259	return len;
1260	}
1261
1262
1263	/**
1264	* Copy a string from a unix char* src to a UCS2 destination,
1265	* allocating a buffer using talloc().
1266	*
1267	* @param dest always set at least to NULL
1268	* @parm converted_size set to the number of bytes occupied by the string in
1269	* the destination on success.
1270	*
1271	* @return true if new buffer was correctly allocated, and string was
1272	* converted.
1273	**/
1274	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1275	size_t *converted_size)
1276	{
1277	size_t src_len = strlen(src)+1;
1278
1279	*dest = NULL;
1280	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1281	(void **)dest, converted_size, True);
1282	}
1283
1284
1285	/**
1286	Copy a string from a char* src to a UTF-8 destination.
1287	Return the number of bytes occupied by the string in the destination
1288	Flags can have:
1289	STR_TERMINATE means include the null termination
1290	STR_UPPER means uppercase in the destination
1291	dest_len is the maximum length allowed in the destination. If dest_len
1292	is -1 then no maxiumum is used.
1293	**/
1294
1295	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1296	{
1297	size_t src_len = 0;
1298	size_t ret;
1299	char *tmpbuf = NULL;
1300
1301	if (dest_len == (size_t)-1) {
1302	/* No longer allow dest_len of -1. */
1303	smb_panic("push_utf8 - invalid dest_len of -1");
1304	}
1305
1306	if (flags & STR_UPPER) {
1307	tmpbuf = strupper_talloc(talloc_tos(), src);
1308	if (!tmpbuf) {
1309	return (size_t)-1;
1310	}
1311	src = tmpbuf;
1312	src_len = strlen(src);
1313	}
1314
1315	src_len = strlen(src);
1316	if (flags & STR_TERMINATE) {
1317	src_len++;
1318	}
1319
1320	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1321	TALLOC_FREE(tmpbuf);
1322	return ret;
1323	}
1324
1325	size_t push_utf8_fstring(void dest, const char src)
1326	{
1327	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1328	}
1329
1330	/**
1331	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1332	*
1333	* @param dest always set at least to NULL
1334	* @parm converted_size set to the number of bytes occupied by the string in
1335	* the destination on success.
1336	*
1337	* @return true if new buffer was correctly allocated, and string was
1338	* converted.
1339	**/
1340
1341	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1342	size_t *converted_size)
1343	{
1344	size_t src_len = strlen(src)+1;
1345
1346	*dest = NULL;
1347	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1348	(void**)dest, converted_size, True);
1349	}
1350
1351	/**
1352	Copy a string from a ucs2 source to a unix char* destination.
1353	Flags can have:
1354	STR_TERMINATE means the string in src is null terminated.
1355	STR_NOALIGN means don't try to align.
1356	if STR_TERMINATE is set then src_len is ignored if it is -1.
1357	src_len is the length of the source area in bytes
1358	Return the number of bytes occupied by the string in src.
1359	The resulting string in "dest" is always null terminated.
1360	**/
1361
1362	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1363	{
1364	size_t ret;
1365	size_t ucs2_align_len = 0;
1366
1367	if (dest_len == (size_t)-1) {
1368	/* No longer allow dest_len of -1. */
1369	smb_panic("pull_ucs2 - invalid dest_len of -1");
1370	}
1371
1372	if (!src_len) {
1373	if (dest && dest_len > 0) {
1374	dest[0] = '\0';
1375	}
1376	return 0;
1377	}
1378
1379	if (ucs2_align(base_ptr, src, flags)) {
1380	src = (const void )((const char )src + 1);
1381	if (src_len != (size_t)-1)
1382	src_len--;
1383	ucs2_align_len = 1;
1384	}
1385
1386	if (flags & STR_TERMINATE) {
1387	/* src_len -1 is the default for null terminated strings. */
1388	if (src_len != (size_t)-1) {
1389	size_t len = strnlen_w((const smb_ucs2_t *)src,
1390	src_len/2);
1391	if (len < src_len/2)
1392	len++;
1393	src_len = len*2;
1394	}
1395	}
1396
1397	/* ucs2 is always a multiple of 2 bytes */
1398	if (src_len != (size_t)-1)
1399	src_len &= ~1;
1400
1401	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1402	if (ret == (size_t)-1) {
1403	ret = 0;
1404	dest_len = 0;
1405	}
1406
1407	if (src_len == (size_t)-1)
1408	src_len = ret*2;
1409
1410	if (dest_len && ret) {
1411	/* Did we already process the terminating zero ? */
1412	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1413	dest[MIN(ret, dest_len-1)] = 0;
1414	}
1415	} else {
1416	dest[0] = 0;
1417	}
1418
1419	return src_len + ucs2_align_len;
1420	}
1421
1422	/**
1423	Copy a string from a ucs2 source to a unix char* destination.
1424	Talloc version with a base pointer.
1425	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1426	needs fixing. JRA).
1427	Flags can have:
1428	STR_TERMINATE means the string in src is null terminated.
1429	STR_NOALIGN means don't try to align.
1430	if STR_TERMINATE is set then src_len is ignored if it is -1.
1431	src_len is the length of the source area in bytes
1432	Return the number of bytes occupied by the string in src.
1433	The resulting string in "dest" is always null terminated.
1434	**/
1435
1436	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1437	const void *base_ptr,
1438	char **ppdest,
1439	const void *src,
1440	size_t src_len,
1441	int flags)
1442	{
1443	char *dest;
1444	size_t dest_len;
1445	size_t ucs2_align_len = 0;
1446
1447	*ppdest = NULL;
1448
1449	#ifdef DEVELOPER
1450	/* Ensure we never use the braindead "malloc" varient. */
1451	if (ctx == NULL) {
1452	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1453	}
1454	#endif
1455
1456	if (!src_len) {
1457	return 0;
1458	}
1459
1460	if (ucs2_align(base_ptr, src, flags)) {
1461	src = (const void )((const char )src + 1);
1462	if (src_len != (size_t)-1)
1463	src_len--;
1464	ucs2_align_len = 1;
1465	}
1466
1467	if (flags & STR_TERMINATE) {
1468	/* src_len -1 is the default for null terminated strings. */
1469	if (src_len != (size_t)-1) {
1470	size_t len = strnlen_w((const smb_ucs2_t *)src,
1471	src_len/2);
1472	if (len < src_len/2)
1473	len++;
1474	src_len = len*2;
1475	} else {
1476	/*
1477	* src_len == -1 - alloc interface won't take this
1478	* so we must calculate.
1479	*/
1480	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1481	}
1482	/* Ensure we don't use an insane length from the client. */
1483	if (src_len >= 1024*1024) {
1484	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1485	}
1486	} else {
1487	/* Can't have an unlimited length
1488	* non STR_TERMINATE'd.
1489	*/
1490	if (src_len == (size_t)-1) {
1491	errno = EINVAL;
1492	return 0;
1493	}
1494	}
1495
1496	/* src_len != -1 here. */
1497
1498	/* ucs2 is always a multiple of 2 bytes */
1499	src_len &= ~1;
1500
1501	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1502	(void *)&dest, &dest_len, True)) {
1503	dest_len = 0;
1504	}
1505
1506	if (dest_len) {
1507	/* Did we already process the terminating zero ? */
1508	if (dest[dest_len-1] != 0) {
1509	size_t size = talloc_get_size(dest);
1510	/* Have we got space to append the '\0' ? */
1511	if (size <= dest_len) {
1512	/* No, realloc. */
1513	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1514	dest_len+1);
1515	if (!dest) {
1516	/* talloc fail. */
1517	dest_len = (size_t)-1;
1518	return 0;
1519	}
1520	}
1521	/* Yay - space ! */
1522	dest[dest_len] = '\0';
1523	dest_len++;
1524	}
1525	} else if (dest) {
1526	dest[0] = 0;
1527	}
1528
1529	*ppdest = dest;
1530	return src_len + ucs2_align_len;
1531	}
1532
1533	size_t pull_ucs2_fstring(char dest, const void src)
1534	{
1535	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1536	}
1537
1538	/**
1539	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1540	*
1541	* @param dest always set at least to NULL
1542	* @parm converted_size set to the number of bytes occupied by the string in
1543	* the destination on success.
1544	*
1545	* @return true if new buffer was correctly allocated, and string was
1546	* converted.
1547	**/
1548
1549	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1550	size_t *converted_size)
1551	{
1552	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1553
1554	*dest = NULL;
1555	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1556	(void **)dest, converted_size, True);
1557	}
1558
1559	/**
1560	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1561	*
1562	* @param dest always set at least to NULL
1563	* @parm converted_size set to the number of bytes occupied by the string in
1564	* the destination on success.
1565	*
1566	* @return true if new buffer was correctly allocated, and string was
1567	* converted.
1568	**/
1569
1570	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1571	size_t *converted_size)
1572	{
1573	size_t src_len = strlen(src)+1;
1574
1575	*dest = NULL;
1576	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1577	(void **)dest, converted_size, True);
1578	}
1579
1580
1581	/**
1582	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1583	*
1584	* @param dest always set at least to NULL
1585	* @parm converted_size set to the number of bytes occupied by the string in
1586	* the destination on success.
1587	*
1588	* @return true if new buffer was correctly allocated, and string was
1589	* converted.
1590	**/
1591
1592	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1593	size_t *converted_size)
1594	{
1595	size_t src_len = strlen(src)+1;
1596
1597	*dest = NULL;
1598	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1599	(void **)dest, converted_size, True);
1600	}
1601
1602	/**
1603	Copy a string from a char* src to a unicode or ascii
1604	dos codepage destination choosing unicode or ascii based on the
1605	flags supplied
1606	Return the number of bytes occupied by the string in the destination.
1607	flags can have:
1608	STR_TERMINATE means include the null termination.
1609	STR_UPPER means uppercase in the destination.
1610	STR_ASCII use ascii even with unicode packet.
1611	STR_NOALIGN means don't do alignment.
1612	dest_len is the maximum length allowed in the destination. If dest_len
1613	is -1 then no maxiumum is used.
1614	**/
1615
1616	size_t push_string_check_fn(const char *function, unsigned int line,
1617	void dest, const char src,
1618	size_t dest_len, int flags)
1619	{
1620	#ifdef DEVELOPER
1621	/* We really need to zero fill here, not clobber
1622	* region, as we want to ensure that valgrind thinks
1623	* all of the outgoing buffer has been written to
1624	* so a send() or write() won't trap an error.
1625	* JRA.
1626	*/
1627	#if 0
1628	clobber_region(function, line, dest, dest_len);
1629	#else
1630	memset(dest, '\0', dest_len);
1631	#endif
1632	#endif
1633
1634	if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1635	return push_ucs2(NULL, dest, src, dest_len, flags);
1636	}
1637	return push_ascii(dest, src, dest_len, flags);
1638	}
1639
1640
1641	/**
1642	Copy a string from a char* src to a unicode or ascii
1643	dos codepage destination choosing unicode or ascii based on the
1644	flags in the SMB buffer starting at base_ptr.
1645	Return the number of bytes occupied by the string in the destination.
1646	flags can have:
1647	STR_TERMINATE means include the null termination.
1648	STR_UPPER means uppercase in the destination.
1649	STR_ASCII use ascii even with unicode packet.
1650	STR_NOALIGN means don't do alignment.
1651	dest_len is the maximum length allowed in the destination. If dest_len
1652	is -1 then no maxiumum is used.
1653	**/
1654
1655	size_t push_string_base(const char *function, unsigned int line,
1656	const char *base, uint16 flags2,
1657	void dest, const char src,
1658	size_t dest_len, int flags)
1659	{
1660	#ifdef DEVELOPER
1661	/* We really need to zero fill here, not clobber
1662	* region, as we want to ensure that valgrind thinks
1663	* all of the outgoing buffer has been written to
1664	* so a send() or write() won't trap an error.
1665	* JRA.
1666	*/
1667	#if 0
1668	clobber_region(function, line, dest, dest_len);
1669	#else
1670	memset(dest, '\0', dest_len);
1671	#endif
1672	#endif
1673
1674	if (!(flags & STR_ASCII) && \
1675	((flags & STR_UNICODE \|\| \
1676	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1677	return push_ucs2(base, dest, src, dest_len, flags);
1678	}
1679	return push_ascii(dest, src, dest_len, flags);
1680	}
1681
1682	/**
1683	Copy a string from a char* src to a unicode or ascii
1684	dos codepage destination choosing unicode or ascii based on the
1685	flags supplied
1686	Return the number of bytes occupied by the string in the destination.
1687	flags can have:
1688	STR_TERMINATE means include the null termination.
1689	STR_UPPER means uppercase in the destination.
1690	STR_ASCII use ascii even with unicode packet.
1691	STR_NOALIGN means don't do alignment.
1692	dest_len is the maximum length allowed in the destination. If dest_len
1693	is -1 then no maxiumum is used.
1694	**/
1695
1696	ssize_t push_string(void dest, const char src, size_t dest_len, int flags)
1697	{
1698	size_t ret;
1699	#ifdef DEVELOPER
1700	/* We really need to zero fill here, not clobber
1701	* region, as we want to ensure that valgrind thinks
1702	* all of the outgoing buffer has been written to
1703	* so a send() or write() won't trap an error.
1704	* JRA.
1705	*/
1706	memset(dest, '\0', dest_len);
1707	#endif
1708
1709	if (!(flags & STR_ASCII) && \
1710	(flags & STR_UNICODE)) {
1711	ret = push_ucs2(NULL, dest, src, dest_len, flags);
1712	} else {
1713	ret = push_ascii(dest, src, dest_len, flags);
1714	}
1715	if (ret == (size_t)-1) {
1716	return -1;
1717	}
1718	return ret;
1719	}
1720
1721	/**
1722	Copy a string from a unicode or ascii source (depending on
1723	the packet flags) to a char* destination.
1724	Flags can have:
1725	STR_TERMINATE means the string in src is null terminated.
1726	STR_UNICODE means to force as unicode.
1727	STR_ASCII use ascii even with unicode packet.
1728	STR_NOALIGN means don't do alignment.
1729	if STR_TERMINATE is set then src_len is ignored is it is -1
1730	src_len is the length of the source area in bytes.
1731	Return the number of bytes occupied by the string in src.
1732	The resulting string in "dest" is always null terminated.
1733	**/
1734
1735	size_t pull_string_fn(const char *function,
1736	unsigned int line,
1737	const void *base_ptr,
1738	uint16 smb_flags2,
1739	char *dest,
1740	const void *src,
1741	size_t dest_len,
1742	size_t src_len,
1743	int flags)
1744	{
1745	#ifdef DEVELOPER
1746	clobber_region(function, line, dest, dest_len);
1747	#endif
1748
1749	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1750	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1751	"UNICODE defined");
1752	}
1753
1754	if (!(flags & STR_ASCII) && \
1755	((flags & STR_UNICODE \|\| \
1756	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1757	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1758	}
1759	return pull_ascii(dest, src, dest_len, src_len, flags);
1760	}
1761
1762	/**
1763	Copy a string from a unicode or ascii source (depending on
1764	the packet flags) to a char* destination.
1765	Variant that uses talloc.
1766	Flags can have:
1767	STR_TERMINATE means the string in src is null terminated.
1768	STR_UNICODE means to force as unicode.
1769	STR_ASCII use ascii even with unicode packet.
1770	STR_NOALIGN means don't do alignment.
1771	if STR_TERMINATE is set then src_len is ignored is it is -1
1772	src_len is the length of the source area in bytes.
1773	Return the number of bytes occupied by the string in src.
1774	The resulting string in "dest" is always null terminated.
1775	**/
1776
1777	size_t pull_string_talloc_fn(const char *function,
1778	unsigned int line,
1779	TALLOC_CTX *ctx,
1780	const void *base_ptr,
1781	uint16 smb_flags2,
1782	char **ppdest,
1783	const void *src,
1784	size_t src_len,
1785	int flags)
1786	{
1787	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1788	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1789	"UNICODE defined");
1790	}
1791
1792	if (!(flags & STR_ASCII) && \
1793	((flags & STR_UNICODE \|\| \
1794	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1795	return pull_ucs2_base_talloc(ctx,
1796	base_ptr,
1797	ppdest,
1798	src,
1799	src_len,
1800	flags);
1801	}
1802	return pull_ascii_base_talloc(ctx,
1803	ppdest,
1804	src,
1805	src_len,
1806	flags);
1807	}
1808
1809
1810	size_t align_string(const void base_ptr, const char p, int flags)
1811	{
1812	if (!(flags & STR_ASCII) && \
1813	((flags & STR_UNICODE \|\| \
1814	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1815	return ucs2_align(base_ptr, p, flags);
1816	}
1817	return 0;
1818	}
1819
1820	/*
1821	Return the unicode codepoint for the next multi-byte CH_UNIX character
1822	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1823
1824	Also return the number of bytes consumed (which tells the caller
1825	how many bytes to skip to get to the next CH_UNIX character).
1826
1827	Return INVALID_CODEPOINT if the next character cannot be converted.
1828	*/
1829	codepoint_t next_codepoint(const char str, size_t size)
1830	{
1831	/* It cannot occupy more than 4 bytes in UTF16 format */
1832	uint8_t buf[4];
1833	smb_iconv_t descriptor;
1834	size_t ilen_orig;
1835	size_t ilen;
1836	size_t olen;
1837	char *outbuf;
1838
1839	if ((str[0] & 0x80) == 0) {
1840	*size = 1;
1841	return (codepoint_t)str[0];
1842	}
1843
1844	/* We assume that no multi-byte character can take
1845	more than 5 bytes. This is OK as we only
1846	support codepoints up to 1M */
1847
1848	ilen_orig = strnlen(str, 5);
1849	ilen = ilen_orig;
1850
1851	lazy_initialize_conv();
1852
1853	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1854	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1855	*size = 1;
1856	return INVALID_CODEPOINT;
1857	}
1858
1859	/* This looks a little strange, but it is needed to cope
1860	with codepoints above 64k which are encoded as per RFC2781. */
1861	olen = 2;
1862	outbuf = (char *)buf;
1863	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1864	if (olen == 2) {
1865	/* We failed to convert to a 2 byte character.
1866	See if we can convert to a 4 UTF16-LE byte char encoding.
1867	*/
1868	olen = 4;
1869	outbuf = (char *)buf;
1870	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1871	if (olen == 4) {
1872	/* We didn't convert any bytes */
1873	*size = 1;
1874	return INVALID_CODEPOINT;
1875	}
1876	olen = 4 - olen;
1877	} else {
1878	olen = 2 - olen;
1879	}
1880
1881	*size = ilen_orig - ilen;
1882
1883	if (olen == 2) {
1884	/* 2 byte, UTF16-LE encoded value. */
1885	return (codepoint_t)SVAL(buf, 0);
1886	}
1887	if (olen == 4) {
1888	/* Decode a 4 byte UTF16-LE character manually.
1889	See RFC2871 for the encoding machanism.
1890	*/
1891	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1892	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1893
1894	return (codepoint_t)0x10000 +
1895	(w1 << 10) + w2;
1896	}
1897
1898	/* no other length is valid */
1899	return INVALID_CODEPOINT;
1900	}
1901
1902	/*
1903	push a single codepoint into a CH_UNIX string the target string must
1904	be able to hold the full character, which is guaranteed if it is at
1905	least 5 bytes in size. The caller may pass less than 5 bytes if they
1906	are sure the character will fit (for example, you can assume that
1907	uppercase/lowercase of a character will not add more than 1 byte)
1908
1909	return the number of bytes occupied by the CH_UNIX character, or
1910	-1 on failure
1911	*/
1912	_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1913	{
1914	smb_iconv_t descriptor;
1915	uint8_t buf[4];
1916	size_t ilen, olen;
1917	const char *inbuf;
1918
1919	if (c < 128) {
1920	*str = c;
1921	return 1;
1922	}
1923
1924	lazy_initialize_conv();
1925
1926	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1927	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1928	return -1;
1929	}
1930
1931	if (c < 0x10000) {
1932	ilen = 2;
1933	olen = 5;
1934	inbuf = (char *)buf;
1935	SSVAL(buf, 0, c);
1936	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1937	if (ilen != 0) {
1938	return -1;
1939	}
1940	return 5 - olen;
1941	}
1942
1943	c -= 0x10000;
1944
1945	buf[0] = (c>>10) & 0xFF;
1946	buf[1] = (c>>18) \| 0xd8;
1947	buf[2] = c & 0xFF;
1948	buf[3] = ((c>>8) & 0x3) \| 0xdc;
1949
1950	ilen = 4;
1951	olen = 5;
1952	inbuf = (char *)buf;
1953
1954	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1955	if (ilen != 0) {
1956	return -1;
1957	}
1958	return 5 - olen;
1959	}
1960
1961

Note: See TracBrowser for help on using the repository browser.

Download in other formats: