Context Navigation

source: branches/samba-3.2.x/source/lib/charcnv.c@ 201

Visit:

Last change on this file since 201 was 149, checked in by Paul Smedley, 17 years ago
Update trunk to v3.2.2
File size: 48.4 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	if (!initialized) {
97	load_case_tables();
98	init_iconv();
99	initialized = true;
100	}
101	}
102
103	/**
104	* Destroy global objects allocated by init_iconv()
105	**/
106	void gfree_charcnv(void)
107	{
108	int c1, c2;
109
110	for (c1=0;c1<NUM_CHARSETS;c1++) {
111	for (c2=0;c2<NUM_CHARSETS;c2++) {
112	if ( conv_handles[c1][c2] ) {
113	smb_iconv_close( conv_handles[c1][c2] );
114	conv_handles[c1][c2] = 0;
115	}
116	}
117	}
118	initialized = false;
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	bool did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_valid_table();
182	conv_silent = False;
183	}
184	}
185
186	/**
187	* Convert string from one encoding to another, making error checking etc
188	* Slow path version - uses (slow) iconv.
189	*
190	* @param src pointer to source string (multibyte or singlebyte)
191	* @param srclen length of the source string in bytes
192	* @param dest pointer to destination string (multibyte or singlebyte)
193	* @param destlen maximal length allowed for string
194	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195	* @returns the number of bytes occupied in the destination
196	*
197	* Ensure the srclen contains the terminating zero.
198	*
199	**/
200
201	static size_t convert_string_internal(charset_t from, charset_t to,
202	void const *src, size_t srclen,
203	void *dest, size_t destlen, bool allow_bad_conv)
204	{
205	size_t i_len, o_len;
206	size_t retval;
207	const char* inbuf = (const char*)src;
208	char* outbuf = (char*)dest;
209	smb_iconv_t descriptor;
210
211	lazy_initialize_conv();
212
213	descriptor = conv_handles[from][to];
214
215	if (srclen == (size_t)-1) {
216	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
217	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
218	} else {
219	srclen = strlen((const char *)src)+1;
220	}
221	}
222
223
224	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
225	if (!conv_silent)
226	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227	return (size_t)-1;
228	}
229
230	i_len=srclen;
231	o_len=destlen;
232
233	again:
234
235	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236	if(retval==(size_t)-1) {
237	const char *reason="unknown error";
238	switch(errno) {
239	case EINVAL:
240	reason="Incomplete multibyte sequence";
241	if (!conv_silent)
242	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243	if (allow_bad_conv)
244	goto use_as_is;
245	break;
246	case E2BIG:
247	reason="No more room";
248	if (!conv_silent) {
249	if (from == CH_UNIX) {
250	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251	charset_name(from), charset_name(to),
252	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253	} else {
254	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255	charset_name(from), charset_name(to),
256	(unsigned int)srclen, (unsigned int)destlen));
257	}
258	}
259	break;
260	case EILSEQ:
261	reason="Illegal multibyte sequence";
262	if (!conv_silent)
263	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264	if (allow_bad_conv)
265	goto use_as_is;
266	break;
267	default:
268	if (!conv_silent)
269	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270	break;
271	}
272	/* smb_panic(reason); */
273	}
274	return destlen-o_len;
275
276	use_as_is:
277
278	/*
279	* Conversion not supported. This is actually an error, but there are so
280	* many misconfigured iconv systems and smb.conf's out there we can't just
281	* fail. Do a very bad conversion instead.... JRA.
282	*/
283
284	{
285	if (o_len == 0 \|\| i_len == 0)
286	return destlen - o_len;
287
288	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
289	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
290	/* Can't convert from utf16 any endian to multibyte.
291	Replace with the default fail char.
292	*/
293	if (i_len < 2)
294	return destlen - o_len;
295	if (i_len >= 2) {
296	*outbuf = lp_failed_convert_char();
297
298	outbuf++;
299	o_len--;
300
301	inbuf += 2;
302	i_len -= 2;
303	}
304
305	if (o_len == 0 \|\| i_len == 0)
306	return destlen - o_len;
307
308	/* Keep trying with the next char... */
309	goto again;
310
311	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312	/* Can't convert to UTF16LE - just widen by adding the
313	default fail char then zero.
314	*/
315	if (o_len < 2)
316	return destlen - o_len;
317
318	outbuf[0] = lp_failed_convert_char();
319	outbuf[1] = '\0';
320
321	inbuf++;
322	i_len--;
323
324	outbuf += 2;
325	o_len -= 2;
326
327	if (o_len == 0 \|\| i_len == 0)
328	return destlen - o_len;
329
330	/* Keep trying with the next char... */
331	goto again;
332
333	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334	to != CH_UTF16LE && to != CH_UTF16BE) {
335	/* Failed multibyte to multibyte. Just copy the default fail char and
336	try again. */
337	outbuf[0] = lp_failed_convert_char();
338
339	inbuf++;
340	i_len--;
341
342	outbuf++;
343	o_len--;
344
345	if (o_len == 0 \|\| i_len == 0)
346	return destlen - o_len;
347
348	/* Keep trying with the next char... */
349	goto again;
350
351	} else {
352	/* Keep compiler happy.... */
353	return destlen - o_len;
354	}
355	}
356	}
357
358	/**
359	* Convert string from one encoding to another, making error checking etc
360	* Fast path version - handles ASCII first.
361	*
362	* @param src pointer to source string (multibyte or singlebyte)
363	* @param srclen length of the source string in bytes, or -1 for nul terminated.
364	* @param dest pointer to destination string (multibyte or singlebyte)
365	* @param destlen maximal length allowed for string - NEVER -1.
366	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367	* @returns the number of bytes occupied in the destination
368	*
369	* Ensure the srclen contains the terminating zero.
370	*
371	* This function has been hand-tuned to provide a fast path.
372	* Don't change unless you really know what you are doing. JRA.
373	**/
374
375	size_t convert_string(charset_t from, charset_t to,
376	void const *src, size_t srclen,
377	void *dest, size_t destlen, bool allow_bad_conv)
378	{
379	/*
380	* NB. We deliberately don't do a strlen here if srclen == -1.
381	* This is very expensive over millions of calls and is taken
382	* care of in the slow path in convert_string_internal. JRA.
383	*/
384
385	#ifdef DEVELOPER
386	SMB_ASSERT(destlen != (size_t)-1);
387	#endif
388
389	if (srclen == 0)
390	return 0;
391
392	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393	const unsigned char p = (const unsigned char )src;
394	unsigned char q = (unsigned char )dest;
395	size_t slen = srclen;
396	size_t dlen = destlen;
397	unsigned char lastp = '\0';
398	size_t retval = 0;
399
400	/* If all characters are ascii, fast path here. */
401	while (slen && dlen) {
402	if ((lastp = *p) <= 0x7f) {
403	q++ = p++;
404	if (slen != (size_t)-1) {
405	slen--;
406	}
407	dlen--;
408	retval++;
409	if (!lastp)
410	break;
411	} else {
412	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413	goto general_case;
414	#else
415	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416	#endif
417	}
418	}
419	if (!dlen) {
420	/* Even if we fast path we should note if we ran out of room. */
421	if (((slen != (size_t)-1) && slen) \|\|
422	((slen == (size_t)-1) && lastp)) {
423	errno = E2BIG;
424	}
425	}
426	return retval;
427	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
428	const unsigned char p = (const unsigned char )src;
429	unsigned char q = (unsigned char )dest;
430	size_t retval = 0;
431	size_t slen = srclen;
432	size_t dlen = destlen;
433	unsigned char lastp = '\0';
434
435	/* If all characters are ascii, fast path here. */
436	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
437	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
438	q++ = p;
439	if (slen != (size_t)-1) {
440	slen -= 2;
441	}
442	p += 2;
443	dlen--;
444	retval++;
445	if (!lastp)
446	break;
447	} else {
448	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
449	goto general_case;
450	#else
451	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
452	#endif
453	}
454	}
455	if (!dlen) {
456	/* Even if we fast path we should note if we ran out of room. */
457	if (((slen != (size_t)-1) && slen) \|\|
458	((slen == (size_t)-1) && lastp)) {
459	errno = E2BIG;
460	}
461	}
462	return retval;
463	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
464	const unsigned char p = (const unsigned char )src;
465	unsigned char q = (unsigned char )dest;
466	size_t retval = 0;
467	size_t slen = srclen;
468	size_t dlen = destlen;
469	unsigned char lastp = '\0';
470
471	/* If all characters are ascii, fast path here. */
472	while (slen && (dlen >= 2)) {
473	if ((lastp = *p) <= 0x7F) {
474	q++ = p++;
475	*q++ = '\0';
476	if (slen != (size_t)-1) {
477	slen--;
478	}
479	dlen -= 2;
480	retval += 2;
481	if (!lastp)
482	break;
483	} else {
484	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
485	goto general_case;
486	#else
487	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
488	#endif
489	}
490	}
491	if (!dlen) {
492	/* Even if we fast path we should note if we ran out of room. */
493	if (((slen != (size_t)-1) && slen) \|\|
494	((slen == (size_t)-1) && lastp)) {
495	errno = E2BIG;
496	}
497	}
498	return retval;
499	}
500
501	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
502	general_case:
503	#endif
504	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
505	}
506
507	/**
508	* Convert between character sets, allocating a new buffer for the result.
509	*
510	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
511	* (this is a bad interface and needs fixing. JRA).
512	* @param srclen length of source buffer.
513	* @param dest always set at least to NULL
514	* @param converted_size set to the size of the allocated buffer on return
515	* true
516	* @note -1 is not accepted for srclen.
517	*
518	* @return True if new buffer was correctly allocated, and string was
519	* converted.
520	*
521	* Ensure the srclen contains the terminating zero.
522	*
523	* I hate the goto's in this function. It's embarressing.....
524	* There has to be a cleaner way to do this. JRA.
525	**/
526
527	bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528	void const src, size_t srclen, void dst,
529	size_t *converted_size, bool allow_bad_conv)
530	{
531	size_t i_len, o_len, destlen = (srclen * 3) / 2;
532	size_t retval;
533	const char inbuf = (const char )src;
534	char outbuf = NULL, ob = NULL;
535	smb_iconv_t descriptor;
536	void dest = (void )dst;
537
538	*dest = NULL;
539
540	if (!converted_size) {
541	errno = EINVAL;
542	return false;
543	}
544
545	if (src == NULL \|\| srclen == (size_t)-1) {
546	errno = EINVAL;
547	return false;
548	}
549	if (srclen == 0) {
550	*converted_size = 0;
551	return true;
552	}
553
554	lazy_initialize_conv();
555
556	descriptor = conv_handles[from][to];
557
558	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
559	if (!conv_silent)
560	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
561	errno = EOPNOTSUPP;
562	return false;
563	}
564
565	convert:
566
567	/* +2 is for ucs2 null termination. */
568	if ((destlen*2)+2 < destlen) {
569	/* wrapped ! abort. */
570	if (!conv_silent)
571	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
572	if (!ctx)
573	SAFE_FREE(outbuf);
574	errno = EOPNOTSUPP;
575	return false;
576	} else {
577	destlen = destlen * 2;
578	}
579
580	/* +2 is for ucs2 null termination. */
581	if (ctx) {
582	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
583	} else {
584	ob = (char *)SMB_REALLOC(ob, destlen + 2);
585	}
586
587	if (!ob) {
588	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
589	errno = ENOMEM;
590	return false;
591	}
592	outbuf = ob;
593	i_len = srclen;
594	o_len = destlen;
595
596	again:
597
598	retval = smb_iconv(descriptor,
599	&inbuf, &i_len,
600	&outbuf, &o_len);
601	if(retval == (size_t)-1) {
602	const char *reason="unknown error";
603	switch(errno) {
604	case EINVAL:
605	reason="Incomplete multibyte sequence";
606	if (!conv_silent)
607	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
608	if (allow_bad_conv)
609	goto use_as_is;
610	break;
611	case E2BIG:
612	goto convert;
613	case EILSEQ:
614	reason="Illegal multibyte sequence";
615	if (!conv_silent)
616	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
617	if (allow_bad_conv)
618	goto use_as_is;
619	break;
620	}
621	if (!conv_silent)
622	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
623	/* smb_panic(reason); */
624	if (ctx) {
625	TALLOC_FREE(ob);
626	} else {
627	SAFE_FREE(ob);
628	}
629	return false;
630	}
631
632	out:
633
634	destlen = destlen - o_len;
635	/* Don't shrink unless we're reclaiming a lot of
636	* space. This is in the hot codepath and these
637	* reallocs cost. JRA.
638	*/
639	if (o_len > 1024) {
640	/* We're shrinking here so we know the +2 is safe from wrap. */
641	if (ctx) {
642	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
643	} else {
644	ob = (char *)SMB_REALLOC(ob,destlen + 2);
645	}
646	}
647
648	if (destlen && !ob) {
649	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
650	errno = ENOMEM;
651	return false;
652	}
653
654	*dest = ob;
655
656	/* Must ucs2 null terminate in the extra space we allocated. */
657	ob[destlen] = '\0';
658	ob[destlen+1] = '\0';
659
660	*converted_size = destlen;
661	return true;
662
663	use_as_is:
664
665	/*
666	* Conversion not supported. This is actually an error, but there are so
667	* many misconfigured iconv systems and smb.conf's out there we can't just
668	* fail. Do a very bad conversion instead.... JRA.
669	*/
670
671	{
672	if (o_len == 0 \|\| i_len == 0)
673	goto out;
674
675	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
676	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
677	/* Can't convert from utf16 any endian to multibyte.
678	Replace with the default fail char.
679	*/
680
681	if (i_len < 2)
682	goto out;
683
684	if (i_len >= 2) {
685	*outbuf = lp_failed_convert_char();
686
687	outbuf++;
688	o_len--;
689
690	inbuf += 2;
691	i_len -= 2;
692	}
693
694	if (o_len == 0 \|\| i_len == 0)
695	goto out;
696
697	/* Keep trying with the next char... */
698	goto again;
699
700	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
701	/* Can't convert to UTF16LE - just widen by adding the
702	default fail char then zero.
703	*/
704	if (o_len < 2)
705	goto out;
706
707	outbuf[0] = lp_failed_convert_char();
708	outbuf[1] = '\0';
709
710	inbuf++;
711	i_len--;
712
713	outbuf += 2;
714	o_len -= 2;
715
716	if (o_len == 0 \|\| i_len == 0)
717	goto out;
718
719	/* Keep trying with the next char... */
720	goto again;
721
722	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
723	to != CH_UTF16LE && to != CH_UTF16BE) {
724	/* Failed multibyte to multibyte. Just copy the default fail char and
725	try again. */
726	outbuf[0] = lp_failed_convert_char();
727
728	inbuf++;
729	i_len--;
730
731	outbuf++;
732	o_len--;
733
734	if (o_len == 0 \|\| i_len == 0)
735	goto out;
736
737	/* Keep trying with the next char... */
738	goto again;
739
740	} else {
741	/* Keep compiler happy.... */
742	goto out;
743	}
744	}
745	}
746
747	/**
748	* Convert between character sets, allocating a new buffer using talloc for the result.
749	*
750	* @param srclen length of source buffer.
751	* @param dest always set at least to NULL
752	* @note -1 is not accepted for srclen.
753	*
754	* @returns Size in bytes of the converted string; or -1 in case of error.
755	**/
756	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
757	void const src, size_t srclen, void dst,
758	bool allow_bad_conv)
759	{
760	void dest = (void )dst;
761	size_t dest_len;
762
763	*dest = NULL;
764	if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
765	&dest_len, allow_bad_conv))
766	return (size_t)-1;
767	if (*dest == NULL)
768	return (size_t)-1;
769	return dest_len;
770	}
771
772	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
773	{
774	size_t size;
775	smb_ucs2_t *buffer;
776
777	size = push_ucs2_allocate(&buffer, src);
778	if (size == (size_t)-1) {
779	return (size_t)-1;
780	}
781	if (!strupper_w(buffer) && (dest == src)) {
782	free(buffer);
783	return srclen;
784	}
785
786	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
787	free(buffer);
788	return size;
789	}
790
791	/**
792	strdup() a unix string to upper case.
793	**/
794
795	char strdup_upper(const char s)
796	{
797	char *out_buffer = SMB_STRDUP(s);
798	const unsigned char p = (const unsigned char )s;
799	unsigned char q = (unsigned char )out_buffer;
800
801	if (!q) {
802	return NULL;
803	}
804
805	/* this is quite a common operation, so we want it to be
806	fast. We optimise for the ascii case, knowing that all our
807	supported multi-byte character sets are ascii-compatible
808	(ie. they match for the first 128 chars) */
809
810	while (*p) {
811	if (*p & 0x80)
812	break;
813	q++ = toupper_ascii_fast(p);
814	p++;
815	}
816
817	if (*p) {
818	/* MB case. */
819	size_t size, size2;
820	smb_ucs2_t *buffer = NULL;
821
822	SAFE_FREE(out_buffer);
823	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
824	strlen(s) + 1, (void *)(void )&buffer, &size,
825	True)) {
826	return NULL;
827	}
828
829	strupper_w(buffer);
830
831	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
832	size, (void *)(void )&out_buffer, &size2, True)) {
833	TALLOC_FREE(buffer);
834	return NULL;
835	}
836
837	/* Don't need the intermediate buffer
838	* anymore.
839	*/
840	TALLOC_FREE(buffer);
841	}
842
843	return out_buffer;
844	}
845
846	/**
847	talloc_strdup() a unix string to upper case.
848	**/
849
850	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
851	{
852	char *out_buffer = talloc_strdup(ctx,s);
853	const unsigned char p = (const unsigned char )s;
854	unsigned char q = (unsigned char )out_buffer;
855
856	if (!q) {
857	return NULL;
858	}
859
860	/* this is quite a common operation, so we want it to be
861	fast. We optimise for the ascii case, knowing that all our
862	supported multi-byte character sets are ascii-compatible
863	(ie. they match for the first 128 chars) */
864
865	while (*p) {
866	if (*p & 0x80)
867	break;
868	q++ = toupper_ascii_fast(p);
869	p++;
870	}
871
872	if (*p) {
873	/* MB case. */
874	size_t size;
875	smb_ucs2_t *ubuf = NULL;
876
877	/* We're not using the ascii buffer above. */
878	TALLOC_FREE(out_buffer);
879
880	size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
881	s, strlen(s)+1,
882	(void *)&ubuf,
883	True);
884	if (size == (size_t)-1) {
885	return NULL;
886	}
887
888	strupper_w(ubuf);
889
890	size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
891	ubuf, size,
892	(void *)&out_buffer,
893	True);
894
895	/* Don't need the intermediate buffer
896	* anymore.
897	*/
898
899	TALLOC_FREE(ubuf);
900
901	if (size == (size_t)-1) {
902	return NULL;
903	}
904	}
905
906	return out_buffer;
907	}
908
909	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
910	{
911	size_t size;
912	smb_ucs2_t *buffer = NULL;
913
914	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
915	(void *)(void )&buffer, &size, True)) {
916	smb_panic("failed to create UCS2 buffer");
917	}
918	if (!strlower_w(buffer) && (dest == src)) {
919	SAFE_FREE(buffer);
920	return srclen;
921	}
922	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
923	SAFE_FREE(buffer);
924	return size;
925	}
926
927	/**
928	strdup() a unix string to lower case.
929	**/
930
931	char strdup_lower(const char s)
932	{
933	size_t size;
934	smb_ucs2_t *buffer = NULL;
935	char *out_buffer;
936
937	size = push_ucs2_allocate(&buffer, s);
938	if (size == -1 \|\| !buffer) {
939	return NULL;
940	}
941
942	strlower_w(buffer);
943
944	size = pull_ucs2_allocate(&out_buffer, buffer);
945	SAFE_FREE(buffer);
946
947	if (size == (size_t)-1) {
948	return NULL;
949	}
950
951	return out_buffer;
952	}
953
954	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
955	{
956	size_t size;
957	smb_ucs2_t *buffer = NULL;
958	char *out_buffer;
959
960	size = push_ucs2_talloc(ctx, &buffer, s);
961	if (size == -1 \|\| !buffer) {
962	TALLOC_FREE(buffer);
963	return NULL;
964	}
965
966	strlower_w(buffer);
967
968	size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
969	TALLOC_FREE(buffer);
970
971	if (size == (size_t)-1) {
972	TALLOC_FREE(out_buffer);
973	return NULL;
974	}
975
976	return out_buffer;
977	}
978
979
980	size_t ucs2_align(const void base_ptr, const void p, int flags)
981	{
982	if (flags & (STR_NOALIGN\|STR_ASCII))
983	return 0;
984	return PTR_DIFF(p, base_ptr) & 1;
985	}
986
987
988	/**
989	* Copy a string from a char* unix src to a dos codepage string destination.
990	*
991	* @return the number of bytes occupied by the string in the destination.
992	*
993	* @param flags can include
994	* <dl>
995	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
996	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
997	* </dl>
998	*
999	* @param dest_len the maximum length in bytes allowed in the
1000	* destination.
1001	**/
1002	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
1003	{
1004	size_t src_len = strlen(src);
1005	char *tmpbuf = NULL;
1006	size_t ret;
1007
1008	/* No longer allow a length of -1. */
1009	if (dest_len == (size_t)-1) {
1010	smb_panic("push_ascii - dest_len == -1");
1011	}
1012
1013	if (flags & STR_UPPER) {
1014	tmpbuf = SMB_STRDUP(src);
1015	if (!tmpbuf) {
1016	smb_panic("malloc fail");
1017	}
1018	strupper_m(tmpbuf);
1019	src = tmpbuf;
1020	}
1021
1022	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
1023	src_len++;
1024	}
1025
1026	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1027	if (ret == (size_t)-1 &&
1028	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
1029	&& dest_len > 0) {
1030	((char *)dest)[0] = '\0';
1031	}
1032	SAFE_FREE(tmpbuf);
1033	return ret;
1034	}
1035
1036	size_t push_ascii_fstring(void dest, const char src)
1037	{
1038	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1039	}
1040
1041	/********************************************************************
1042	Push an nstring - ensure null terminated. Written by
1043	moriyama@miraclelinux.com (MORIYAMA Masayuki).
1044	********************************************************************/
1045
1046	size_t push_ascii_nstring(void dest, const char src)
1047	{
1048	size_t i, buffer_len, dest_len;
1049	smb_ucs2_t *buffer;
1050
1051	conv_silent = True;
1052	buffer_len = push_ucs2_allocate(&buffer, src);
1053	if (buffer_len == (size_t)-1) {
1054	smb_panic("failed to create UCS2 buffer");
1055	}
1056
1057	/* We're using buffer_len below to count ucs2 characters, not bytes. */
1058	buffer_len /= sizeof(smb_ucs2_t);
1059
1060	dest_len = 0;
1061	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1062	unsigned char mb[10];
1063	/* Convert one smb_ucs2_t character at a time. */
1064	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1065	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1066	memcpy((char *)dest + dest_len, mb, mb_len);
1067	dest_len += mb_len;
1068	} else {
1069	errno = E2BIG;
1070	break;
1071	}
1072	}
1073	((char *)dest)[dest_len] = '\0';
1074
1075	SAFE_FREE(buffer);
1076	conv_silent = False;
1077	return dest_len;
1078	}
1079
1080	/********************************************************************
1081	Push and malloc an ascii string. src and dest null terminated.
1082	********************************************************************/
1083
1084	size_t push_ascii_allocate(char *dest, const char src)
1085	{
1086	size_t dest_len, src_len = strlen(src)+1;
1087
1088	*dest = NULL;
1089	if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1090	(void **)dest, &dest_len, True))
1091	return (size_t)-1;
1092	else
1093	return dest_len;
1094	}
1095
1096	/**
1097	* Copy a string from a dos codepage source to a unix char* destination.
1098	*
1099	* The resulting string in "dest" is always null terminated.
1100	*
1101	* @param flags can have:
1102	* <dl>
1103	* <dt>STR_TERMINATE</dt>
1104	* <dd>STR_TERMINATE means the string in @p src
1105	* is null terminated, and src_len is ignored.</dd>
1106	* </dl>
1107	*
1108	* @param src_len is the length of the source area in bytes.
1109	* @returns the number of bytes occupied by the string in @p src.
1110	**/
1111	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1112	{
1113	size_t ret;
1114
1115	if (dest_len == (size_t)-1) {
1116	/* No longer allow dest_len of -1. */
1117	smb_panic("pull_ascii - invalid dest_len of -1");
1118	}
1119
1120	if (flags & STR_TERMINATE) {
1121	if (src_len == (size_t)-1) {
1122	src_len = strlen((const char *)src) + 1;
1123	} else {
1124	size_t len = strnlen((const char *)src, src_len);
1125	if (len < src_len)
1126	len++;
1127	src_len = len;
1128	}
1129	}
1130
1131	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1132	if (ret == (size_t)-1) {
1133	ret = 0;
1134	dest_len = 0;
1135	}
1136
1137	if (dest_len && ret) {
1138	/* Did we already process the terminating zero ? */
1139	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1140	dest[MIN(ret, dest_len-1)] = 0;
1141	}
1142	} else {
1143	dest[0] = 0;
1144	}
1145
1146	return src_len;
1147	}
1148
1149	/**
1150	* Copy a string from a dos codepage source to a unix char* destination.
1151	Talloc version.
1152	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1153	needs fixing. JRA).
1154	*
1155	* The resulting string in "dest" is always null terminated.
1156	*
1157	* @param flags can have:
1158	* <dl>
1159	* <dt>STR_TERMINATE</dt>
1160	* <dd>STR_TERMINATE means the string in @p src
1161	* is null terminated, and src_len is ignored.</dd>
1162	* </dl>
1163	*
1164	* @param src_len is the length of the source area in bytes.
1165	* @returns the number of bytes occupied by the string in @p src.
1166	**/
1167
1168	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1169	char **ppdest,
1170	const void *src,
1171	size_t src_len,
1172	int flags)
1173	{
1174	char *dest = NULL;
1175	size_t dest_len = 0;
1176
1177	#ifdef DEVELOPER
1178	/* Ensure we never use the braindead "malloc" varient. */
1179	if (ctx == NULL) {
1180	smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1181	}
1182	#endif
1183
1184	*ppdest = NULL;
1185
1186	if (flags & STR_TERMINATE) {
1187	if (src_len == (size_t)-1) {
1188	src_len = strlen((const char *)src) + 1;
1189	} else {
1190	size_t len = strnlen((const char *)src, src_len);
1191	if (len < src_len)
1192	len++;
1193	src_len = len;
1194	}
1195	/* Ensure we don't use an insane length from the client. */
1196	if (src_len >= 1024*1024) {
1197	char *msg = talloc_asprintf(ctx,
1198	"Bad src length (%u) in "
1199	"pull_ascii_base_talloc",
1200	(unsigned int)src_len);
1201	smb_panic(msg);
1202	}
1203	}
1204
1205	if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1206	&dest_len, True))
1207	dest_len = 0;
1208
1209	if (dest_len && dest) {
1210	/* Did we already process the terminating zero ? */
1211	if (dest[dest_len-1] != 0) {
1212	dest[dest_len-1] = 0;
1213	}
1214	} else if (dest) {
1215	dest[0] = 0;
1216	}
1217
1218	*ppdest = dest;
1219	return src_len;
1220	}
1221
1222	size_t pull_ascii_fstring(char dest, const void src)
1223	{
1224	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1225	}
1226
1227	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1228
1229	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1230	{
1231	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1232	}
1233
1234	/**
1235	* Copy a string from a char* src to a unicode destination.
1236	*
1237	* @returns the number of bytes occupied by the string in the destination.
1238	*
1239	* @param flags can have:
1240	*
1241	* <dl>
1242	* <dt>STR_TERMINATE <dd>means include the null termination.
1243	* <dt>STR_UPPER <dd>means uppercase in the destination.
1244	* <dt>STR_NOALIGN <dd>means don't do alignment.
1245	* </dl>
1246	*
1247	* @param dest_len is the maximum length allowed in the
1248	* destination.
1249	**/
1250
1251	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1252	{
1253	size_t len=0;
1254	size_t src_len;
1255	size_t ret;
1256
1257	if (dest_len == (size_t)-1) {
1258	/* No longer allow dest_len of -1. */
1259	smb_panic("push_ucs2 - invalid dest_len of -1");
1260	}
1261
1262	if (flags & STR_TERMINATE)
1263	src_len = (size_t)-1;
1264	else
1265	src_len = strlen(src);
1266
1267	if (ucs2_align(base_ptr, dest, flags)) {
1268	(char )dest = 0;
1269	dest = (void )((char )dest + 1);
1270	if (dest_len)
1271	dest_len--;
1272	len++;
1273	}
1274
1275	/* ucs2 is always a multiple of 2 bytes */
1276	dest_len &= ~1;
1277
1278	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1279	if (ret == (size_t)-1) {
1280	if ((flags & STR_TERMINATE) &&
1281	dest &&
1282	dest_len) {
1283	(char )dest = 0;
1284	}
1285	return len;
1286	}
1287
1288	len += ret;
1289
1290	if (flags & STR_UPPER) {
1291	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1292	size_t i;
1293
1294	/* We check for i < (ret / 2) below as the dest string isn't null
1295	terminated if STR_TERMINATE isn't set. */
1296
1297	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1298	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1299	if (v != dest_ucs2[i]) {
1300	dest_ucs2[i] = v;
1301	}
1302	}
1303	}
1304
1305	return len;
1306	}
1307
1308
1309	/**
1310	* Copy a string from a unix char* src to a UCS2 destination,
1311	* allocating a buffer using talloc().
1312	*
1313	* @param dest always set at least to NULL
1314	*
1315	* @returns The number of bytes occupied by the string in the destination
1316	* or -1 in case of error.
1317	**/
1318	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1319	{
1320	size_t src_len = strlen(src)+1;
1321
1322	*dest = NULL;
1323	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1324	}
1325
1326
1327	/**
1328	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1329	*
1330	* @param dest always set at least to NULL
1331	*
1332	* @returns The number of bytes occupied by the string in the destination
1333	* or -1 in case of error.
1334	**/
1335
1336	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1337	{
1338	size_t dest_len, src_len = strlen(src)+1;
1339
1340	*dest = NULL;
1341	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1342	(void **)dest, &dest_len, True))
1343	return (size_t)-1;
1344	else
1345	return dest_len;
1346	}
1347
1348	/**
1349	Copy a string from a char* src to a UTF-8 destination.
1350	Return the number of bytes occupied by the string in the destination
1351	Flags can have:
1352	STR_TERMINATE means include the null termination
1353	STR_UPPER means uppercase in the destination
1354	dest_len is the maximum length allowed in the destination. If dest_len
1355	is -1 then no maxiumum is used.
1356	**/
1357
1358	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1359	{
1360	size_t src_len = 0;
1361	size_t ret;
1362	char *tmpbuf = NULL;
1363
1364	if (dest_len == (size_t)-1) {
1365	/* No longer allow dest_len of -1. */
1366	smb_panic("push_utf8 - invalid dest_len of -1");
1367	}
1368
1369	if (flags & STR_UPPER) {
1370	tmpbuf = strdup_upper(src);
1371	if (!tmpbuf) {
1372	return (size_t)-1;
1373	}
1374	src = tmpbuf;
1375	src_len = strlen(src);
1376	}
1377
1378	src_len = strlen(src);
1379	if (flags & STR_TERMINATE) {
1380	src_len++;
1381	}
1382
1383	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1384	SAFE_FREE(tmpbuf);
1385	return ret;
1386	}
1387
1388	size_t push_utf8_fstring(void dest, const char src)
1389	{
1390	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1391	}
1392
1393	/**
1394	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1395	*
1396	* @param dest always set at least to NULL
1397	*
1398	* @returns The number of bytes occupied by the string in the destination
1399	**/
1400
1401	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1402	{
1403	size_t src_len = strlen(src)+1;
1404
1405	*dest = NULL;
1406	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1407	}
1408
1409	/**
1410	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1411	*
1412	* @param dest always set at least to NULL
1413	*
1414	* @returns The number of bytes occupied by the string in the destination
1415	**/
1416
1417	size_t push_utf8_allocate(char *dest, const char src)
1418	{
1419	size_t dest_len, src_len = strlen(src)+1;
1420
1421	*dest = NULL;
1422	if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1423	(void **)dest, &dest_len, True))
1424	return (size_t)-1;
1425	else
1426	return dest_len;
1427	}
1428
1429	/**
1430	Copy a string from a ucs2 source to a unix char* destination.
1431	Flags can have:
1432	STR_TERMINATE means the string in src is null terminated.
1433	STR_NOALIGN means don't try to align.
1434	if STR_TERMINATE is set then src_len is ignored if it is -1.
1435	src_len is the length of the source area in bytes
1436	Return the number of bytes occupied by the string in src.
1437	The resulting string in "dest" is always null terminated.
1438	**/
1439
1440	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1441	{
1442	size_t ret;
1443
1444	if (dest_len == (size_t)-1) {
1445	/* No longer allow dest_len of -1. */
1446	smb_panic("pull_ucs2 - invalid dest_len of -1");
1447	}
1448
1449	if (!src_len) {
1450	if (dest && dest_len > 0) {
1451	dest[0] = '\0';
1452	}
1453	return 0;
1454	}
1455
1456	if (ucs2_align(base_ptr, src, flags)) {
1457	src = (const void )((const char )src + 1);
1458	if (src_len != (size_t)-1)
1459	src_len--;
1460	}
1461
1462	if (flags & STR_TERMINATE) {
1463	/* src_len -1 is the default for null terminated strings. */
1464	if (src_len != (size_t)-1) {
1465	size_t len = strnlen_w((const smb_ucs2_t *)src,
1466	src_len/2);
1467	if (len < src_len/2)
1468	len++;
1469	src_len = len*2;
1470	}
1471	}
1472
1473	/* ucs2 is always a multiple of 2 bytes */
1474	if (src_len != (size_t)-1)
1475	src_len &= ~1;
1476
1477	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1478	if (ret == (size_t)-1) {
1479	ret = 0;
1480	dest_len = 0;
1481	}
1482
1483	if (src_len == (size_t)-1)
1484	src_len = ret*2;
1485
1486	if (dest_len && ret) {
1487	/* Did we already process the terminating zero ? */
1488	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1489	dest[MIN(ret, dest_len-1)] = 0;
1490	}
1491	} else {
1492	dest[0] = 0;
1493	}
1494
1495	return src_len;
1496	}
1497
1498	/**
1499	Copy a string from a ucs2 source to a unix char* destination.
1500	Talloc version with a base pointer.
1501	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1502	needs fixing. JRA).
1503	Flags can have:
1504	STR_TERMINATE means the string in src is null terminated.
1505	STR_NOALIGN means don't try to align.
1506	if STR_TERMINATE is set then src_len is ignored if it is -1.
1507	src_len is the length of the source area in bytes
1508	Return the number of bytes occupied by the string in src.
1509	The resulting string in "dest" is always null terminated.
1510	**/
1511
1512	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1513	const void *base_ptr,
1514	char **ppdest,
1515	const void *src,
1516	size_t src_len,
1517	int flags)
1518	{
1519	char *dest;
1520	size_t dest_len;
1521
1522	*ppdest = NULL;
1523
1524	#ifdef DEVELOPER
1525	/* Ensure we never use the braindead "malloc" varient. */
1526	if (ctx == NULL) {
1527	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1528	}
1529	#endif
1530
1531	if (!src_len) {
1532	return 0;
1533	}
1534
1535	if (ucs2_align(base_ptr, src, flags)) {
1536	src = (const void )((const char )src + 1);
1537	if (src_len != (size_t)-1)
1538	src_len--;
1539	}
1540
1541	if (flags & STR_TERMINATE) {
1542	/* src_len -1 is the default for null terminated strings. */
1543	if (src_len != (size_t)-1) {
1544	size_t len = strnlen_w((const smb_ucs2_t *)src,
1545	src_len/2);
1546	if (len < src_len/2)
1547	len++;
1548	src_len = len*2;
1549	} else {
1550	/*
1551	* src_len == -1 - alloc interface won't take this
1552	* so we must calculate.
1553	*/
1554	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1555	}
1556	/* Ensure we don't use an insane length from the client. */
1557	if (src_len >= 1024*1024) {
1558	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1559	}
1560	}
1561
1562	/* ucs2 is always a multiple of 2 bytes */
1563	if (src_len != (size_t)-1) {
1564	src_len &= ~1;
1565	}
1566
1567	dest_len = convert_string_talloc(ctx,
1568	CH_UTF16LE,
1569	CH_UNIX,
1570	src,
1571	src_len,
1572	(void *)&dest,
1573	True);
1574	if (dest_len == (size_t)-1) {
1575	dest_len = 0;
1576	}
1577
1578	if (src_len == (size_t)-1)
1579	src_len = dest_len*2;
1580
1581	if (dest_len) {
1582	/* Did we already process the terminating zero ? */
1583	if (dest[dest_len-1] != 0) {
1584	size_t size = talloc_get_size(dest);
1585	/* Have we got space to append the '\0' ? */
1586	if (size <= dest_len) {
1587	/* No, realloc. */
1588	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1589	dest_len+1);
1590	if (!dest) {
1591	/* talloc fail. */
1592	dest_len = (size_t)-1;
1593	return 0;
1594	}
1595	}
1596	/* Yay - space ! */
1597	dest[dest_len] = '\0';
1598	dest_len++;
1599	}
1600	} else if (dest) {
1601	dest[0] = 0;
1602	}
1603
1604	*ppdest = dest;
1605	return src_len;
1606	}
1607
1608	size_t pull_ucs2_fstring(char dest, const void src)
1609	{
1610	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1611	}
1612
1613	/**
1614	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1615	*
1616	* @param dest always set at least to NULL
1617	*
1618	* @returns The number of bytes occupied by the string in the destination
1619	**/
1620
1621	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1622	{
1623	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1624	*dest = NULL;
1625	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1626	}
1627
1628	/**
1629	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1630	*
1631	* @param dest always set at least to NULL
1632	*
1633	* @returns The number of bytes occupied by the string in the destination
1634	**/
1635
1636	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1637	{
1638	size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1639	*dest = NULL;
1640	if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1641	(void **)dest, &dest_len, True))
1642	return (size_t)-1;
1643	else
1644	return dest_len;
1645	}
1646
1647	/**
1648	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1649	*
1650	* @param dest always set at least to NULL
1651	*
1652	* @returns The number of bytes occupied by the string in the destination
1653	**/
1654
1655	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1656	{
1657	size_t src_len = strlen(src)+1;
1658	*dest = NULL;
1659	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1660	}
1661
1662	/**
1663	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1664	*
1665	* @param dest always set at least to NULL
1666	*
1667	* @returns The number of bytes occupied by the string in the destination
1668	**/
1669
1670	size_t pull_utf8_allocate(char *dest, const char src)
1671	{
1672	size_t dest_len, src_len = strlen(src)+1;
1673	*dest = NULL;
1674	if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1675	(void **)dest, &dest_len, True))
1676	return (size_t)-1;
1677	else
1678	return dest_len;
1679	}
1680
1681	/**
1682	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1683	*
1684	* @param dest always set at least to NULL
1685	*
1686	* @returns The number of bytes occupied by the string in the destination
1687	**/
1688
1689	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1690	{
1691	size_t src_len = strlen(src)+1;
1692	*dest = NULL;
1693	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1694	}
1695
1696	/**
1697	Copy a string from a char* src to a unicode or ascii
1698	dos codepage destination choosing unicode or ascii based on the
1699	flags in the SMB buffer starting at base_ptr.
1700	Return the number of bytes occupied by the string in the destination.
1701	flags can have:
1702	STR_TERMINATE means include the null termination.
1703	STR_UPPER means uppercase in the destination.
1704	STR_ASCII use ascii even with unicode packet.
1705	STR_NOALIGN means don't do alignment.
1706	dest_len is the maximum length allowed in the destination. If dest_len
1707	is -1 then no maxiumum is used.
1708	**/
1709
1710	size_t push_string_fn(const char *function, unsigned int line,
1711	const void *base_ptr, uint16 flags2,
1712	void dest, const char src,
1713	size_t dest_len, int flags)
1714	{
1715	#ifdef DEVELOPER
1716	/* We really need to zero fill here, not clobber
1717	* region, as we want to ensure that valgrind thinks
1718	* all of the outgoing buffer has been written to
1719	* so a send() or write() won't trap an error.
1720	* JRA.
1721	*/
1722	#if 0
1723	clobber_region(function, line, dest, dest_len);
1724	#else
1725	memset(dest, '\0', dest_len);
1726	#endif
1727	#endif
1728
1729	if (!(flags & STR_ASCII) && \
1730	((flags & STR_UNICODE \|\| \
1731	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1732	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1733	}
1734	return push_ascii(dest, src, dest_len, flags);
1735	}
1736
1737
1738	/**
1739	Copy a string from a unicode or ascii source (depending on
1740	the packet flags) to a char* destination.
1741	Flags can have:
1742	STR_TERMINATE means the string in src is null terminated.
1743	STR_UNICODE means to force as unicode.
1744	STR_ASCII use ascii even with unicode packet.
1745	STR_NOALIGN means don't do alignment.
1746	if STR_TERMINATE is set then src_len is ignored is it is -1
1747	src_len is the length of the source area in bytes.
1748	Return the number of bytes occupied by the string in src.
1749	The resulting string in "dest" is always null terminated.
1750	**/
1751
1752	size_t pull_string_fn(const char *function,
1753	unsigned int line,
1754	const void *base_ptr,
1755	uint16 smb_flags2,
1756	char *dest,
1757	const void *src,
1758	size_t dest_len,
1759	size_t src_len,
1760	int flags)
1761	{
1762	#ifdef DEVELOPER
1763	clobber_region(function, line, dest, dest_len);
1764	#endif
1765
1766	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1767	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1768	"UNICODE defined");
1769	}
1770
1771	if (!(flags & STR_ASCII) && \
1772	((flags & STR_UNICODE \|\| \
1773	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1774	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1775	}
1776	return pull_ascii(dest, src, dest_len, src_len, flags);
1777	}
1778
1779	/**
1780	Copy a string from a unicode or ascii source (depending on
1781	the packet flags) to a char* destination.
1782	Variant that uses talloc.
1783	Flags can have:
1784	STR_TERMINATE means the string in src is null terminated.
1785	STR_UNICODE means to force as unicode.
1786	STR_ASCII use ascii even with unicode packet.
1787	STR_NOALIGN means don't do alignment.
1788	if STR_TERMINATE is set then src_len is ignored is it is -1
1789	src_len is the length of the source area in bytes.
1790	Return the number of bytes occupied by the string in src.
1791	The resulting string in "dest" is always null terminated.
1792	**/
1793
1794	size_t pull_string_talloc_fn(const char *function,
1795	unsigned int line,
1796	TALLOC_CTX *ctx,
1797	const void *base_ptr,
1798	uint16 smb_flags2,
1799	char **ppdest,
1800	const void *src,
1801	size_t src_len,
1802	int flags)
1803	{
1804	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1805	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1806	"UNICODE defined");
1807	}
1808
1809	if (!(flags & STR_ASCII) && \
1810	((flags & STR_UNICODE \|\| \
1811	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1812	return pull_ucs2_base_talloc(ctx,
1813	base_ptr,
1814	ppdest,
1815	src,
1816	src_len,
1817	flags);
1818	}
1819	return pull_ascii_base_talloc(ctx,
1820	ppdest,
1821	src,
1822	src_len,
1823	flags);
1824	}
1825
1826
1827	size_t align_string(const void base_ptr, const char p, int flags)
1828	{
1829	if (!(flags & STR_ASCII) && \
1830	((flags & STR_UNICODE \|\| \
1831	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1832	return ucs2_align(base_ptr, p, flags);
1833	}
1834	return 0;
1835	}
1836
1837	/*
1838	Return the unicode codepoint for the next multi-byte CH_UNIX character
1839	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1840
1841	Also return the number of bytes consumed (which tells the caller
1842	how many bytes to skip to get to the next CH_UNIX character).
1843
1844	Return INVALID_CODEPOINT if the next character cannot be converted.
1845	*/
1846
1847	codepoint_t next_codepoint(const char str, size_t size)
1848	{
1849	/* It cannot occupy more than 4 bytes in UTF16 format */
1850	uint8_t buf[4];
1851	smb_iconv_t descriptor;
1852	size_t ilen_orig;
1853	size_t ilen;
1854	size_t olen;
1855	char *outbuf;
1856
1857	if ((str[0] & 0x80) == 0) {
1858	*size = 1;
1859	return (codepoint_t)str[0];
1860	}
1861
1862	/* We assume that no multi-byte character can take
1863	more than 5 bytes. This is OK as we only
1864	support codepoints up to 1M */
1865
1866	ilen_orig = strnlen(str, 5);
1867	ilen = ilen_orig;
1868
1869	lazy_initialize_conv();
1870
1871	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1872	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1873	*size = 1;
1874	return INVALID_CODEPOINT;
1875	}
1876
1877	/* This looks a little strange, but it is needed to cope
1878	with codepoints above 64k which are encoded as per RFC2781. */
1879	olen = 2;
1880	outbuf = (char *)buf;
1881	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1882	if (olen == 2) {
1883	/* We failed to convert to a 2 byte character.
1884	See if we can convert to a 4 UTF16-LE byte char encoding.
1885	*/
1886	olen = 4;
1887	outbuf = (char *)buf;
1888	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1889	if (olen == 4) {
1890	/* We didn't convert any bytes */
1891	*size = 1;
1892	return INVALID_CODEPOINT;
1893	}
1894	olen = 4 - olen;
1895	} else {
1896	olen = 2 - olen;
1897	}
1898
1899	*size = ilen_orig - ilen;
1900
1901	if (olen == 2) {
1902	/* 2 byte, UTF16-LE encoded value. */
1903	return (codepoint_t)SVAL(buf, 0);
1904	}
1905	if (olen == 4) {
1906	/* Decode a 4 byte UTF16-LE character manually.
1907	See RFC2871 for the encoding machanism.
1908	*/
1909	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1910	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1911
1912	return (codepoint_t)0x10000 +
1913	(w1 << 10) + w2;
1914	}
1915
1916	/* no other length is valid */
1917	return INVALID_CODEPOINT;
1918	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: