Context Navigation

source: trunk/server/source3/lib/charcnv.c@ 533

Visit:

Last change on this file since 533 was 481, checked in by Silvan Scherrer, 15 years ago
Samba Server 3.5: fixes ticket 141
File size: 47.4 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 3 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22	*/
23	#include "includes.h"
24
25	/* We can parameterize this if someone complains.... JRA. */
26
27	char lp_failed_convert_char(void)
28	{
29	return '_';
30	}
31
32	/**
33	* @file
34	*
35	* @brief Character-set conversion routines built on our iconv.
36	*
37	* @note Samba's internal character set (at least in the 3.0 series)
38	* is always the same as the one for the Unix filesystem. It is
39	* <b>not</b> necessarily UTF-8 and may be different on machines that
40	* need i18n filenames to be compatible with Unix software. It does
41	* have to be a superset of ASCII. All multibyte sequences must start
42	* with a byte with the high bit set.
43	*
44	* @sa lib/iconv.c
45	*/
46
47
48	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49	static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50	static bool initialized;
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret;
58
59	switch (ch) {
60	case CH_UTF16LE:
61	ret = "UTF-16LE";
62	break;
63	case CH_UTF16BE:
64	ret = "UTF-16BE";
65	break;
66	case CH_UNIX:
67	ret = lp_unix_charset();
68	break;
69	case CH_DOS:
70	ret = lp_dos_charset();
71	break;
72	case CH_DISPLAY:
73	ret = lp_display_charset();
74	break;
75	case CH_UTF8:
76	ret = "UTF8";
77	break;
78	default:
79	ret = NULL;
80	}
81
82	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83	if (ret && !strcmp(ret, "LOCALE")) {
84	const char *ln = NULL;
85
86	#ifdef HAVE_SETLOCALE
87	setlocale(LC_ALL, "");
88	#endif
89	ln = nl_langinfo(CODESET);
90	if (ln) {
91	/* Check whether the charset name is supported
92	by iconv */
93	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94	if (handle == (smb_iconv_t) -1) {
95	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96	ln = NULL;
97	} else {
98	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99	smb_iconv_close(handle);
100	}
101	}
102	ret = ln;
103	}
104	#endif
105
106	if (!ret \|\| !*ret) ret = "ASCII";
107	return ret;
108	}
109
110	void lazy_initialize_conv(void)
111	{
112	if (!initialized) {
113	load_case_tables();
114	init_iconv();
115	initialized = true;
116	}
117	}
118
119	/**
120	* Destroy global objects allocated by init_iconv()
121	**/
122	void gfree_charcnv(void)
123	{
124	int c1, c2;
125
126	for (c1=0;c1<NUM_CHARSETS;c1++) {
127	for (c2=0;c2<NUM_CHARSETS;c2++) {
128	if ( conv_handles[c1][c2] ) {
129	smb_iconv_close( conv_handles[c1][c2] );
130	conv_handles[c1][c2] = 0;
131	}
132	}
133	}
134	initialized = false;
135	}
136
137	/**
138	* Initialize iconv conversion descriptors.
139	*
140	* This is called the first time it is needed, and also called again
141	* every time the configuration is reloaded, because the charset or
142	* codepage might have changed.
143	**/
144	void init_iconv(void)
145	{
146	int c1, c2;
147	bool did_reload = False;
148
149	/* so that charset_name() works we need to get the UNIX<->UCS2 going
150	first */
151	if (!conv_handles[CH_UNIX][CH_UTF16LE])
152	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
153
154	if (!conv_handles[CH_UTF16LE][CH_UNIX])
155	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
156
157	for (c1=0;c1<NUM_CHARSETS;c1++) {
158	for (c2=0;c2<NUM_CHARSETS;c2++) {
159	const char *n1 = charset_name((charset_t)c1);
160	const char *n2 = charset_name((charset_t)c2);
161	if (conv_handles[c1][c2] &&
162	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164	continue;
165
166	did_reload = True;
167
168	if (conv_handles[c1][c2])
169	smb_iconv_close(conv_handles[c1][c2]);
170
171	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174	charset_name((charset_t)c1), charset_name((charset_t)c2)));
175	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176	n1 = "ASCII";
177	}
178	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179	n2 = "ASCII";
180	}
181	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182	n1, n2 ));
183	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184	if (!conv_handles[c1][c2]) {
185	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186	smb_panic("init_iconv: conv_handle initialization failed");
187	}
188	}
189	}
190	}
191
192	if (did_reload) {
193	/* XXX: Does this really get called every time the dos
194	* codepage changes? */
195	/* XXX: Is the did_reload test too strict? */
196	conv_silent = True;
197	init_valid_table();
198	conv_silent = False;
199	}
200	}
201
202	/**
203	* Convert string from one encoding to another, making error checking etc
204	* Slow path version - uses (slow) iconv.
205	*
206	* @param src pointer to source string (multibyte or singlebyte)
207	* @param srclen length of the source string in bytes
208	* @param dest pointer to destination string (multibyte or singlebyte)
209	* @param destlen maximal length allowed for string
210	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211	* @returns the number of bytes occupied in the destination
212	*
213	* Ensure the srclen contains the terminating zero.
214	*
215	**/
216
217	static size_t convert_string_internal(charset_t from, charset_t to,
218	void const *src, size_t srclen,
219	void *dest, size_t destlen, bool allow_bad_conv)
220	{
221	size_t i_len, o_len;
222	size_t retval;
223	const char* inbuf = (const char*)src;
224	char* outbuf = (char*)dest;
225	smb_iconv_t descriptor;
226
227	lazy_initialize_conv();
228
229	descriptor = conv_handles[from][to];
230
231	if (srclen == (size_t)-1) {
232	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
233	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
234	} else {
235	srclen = strlen((const char *)src)+1;
236	}
237	}
238
239
240	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
241	if (!conv_silent)
242	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
243	return (size_t)-1;
244	}
245
246	i_len=srclen;
247	o_len=destlen;
248
249	again:
250
251	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
252	if(retval==(size_t)-1) {
253	const char *reason="unknown error";
254	switch(errno) {
255	case EINVAL:
256	reason="Incomplete multibyte sequence";
257	if (!conv_silent)
258	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
259	if (allow_bad_conv)
260	goto use_as_is;
261	return (size_t)-1;
262	case E2BIG:
263	reason="No more room";
264	if (!conv_silent) {
265	if (from == CH_UNIX) {
266	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267	charset_name(from), charset_name(to),
268	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
269	} else {
270	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271	charset_name(from), charset_name(to),
272	(unsigned int)srclen, (unsigned int)destlen));
273	}
274	}
275	break;
276	case EILSEQ:
277	reason="Illegal multibyte sequence";
278	if (!conv_silent)
279	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
280	if (allow_bad_conv)
281	goto use_as_is;
282
283	return (size_t)-1;
284	default:
285	if (!conv_silent)
286	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
287	return (size_t)-1;
288	}
289	/* smb_panic(reason); */
290	}
291	return destlen-o_len;
292
293	use_as_is:
294
295	/*
296	* Conversion not supported. This is actually an error, but there are so
297	* many misconfigured iconv systems and smb.conf's out there we can't just
298	* fail. Do a very bad conversion instead.... JRA.
299	*/
300
301	{
302	if (o_len == 0 \|\| i_len == 0)
303	return destlen - o_len;
304
305	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
306	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
307	/* Can't convert from utf16 any endian to multibyte.
308	Replace with the default fail char.
309	*/
310	if (i_len < 2)
311	return destlen - o_len;
312	if (i_len >= 2) {
313	*outbuf = lp_failed_convert_char();
314
315	outbuf++;
316	o_len--;
317
318	inbuf += 2;
319	i_len -= 2;
320	}
321
322	if (o_len == 0 \|\| i_len == 0)
323	return destlen - o_len;
324
325	/* Keep trying with the next char... */
326	goto again;
327
328	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
329	/* Can't convert to UTF16LE - just widen by adding the
330	default fail char then zero.
331	*/
332	if (o_len < 2)
333	return destlen - o_len;
334
335	outbuf[0] = lp_failed_convert_char();
336	outbuf[1] = '\0';
337
338	inbuf++;
339	i_len--;
340
341	outbuf += 2;
342	o_len -= 2;
343
344	if (o_len == 0 \|\| i_len == 0)
345	return destlen - o_len;
346
347	/* Keep trying with the next char... */
348	goto again;
349
350	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
351	to != CH_UTF16LE && to != CH_UTF16BE) {
352	/* Failed multibyte to multibyte. Just copy the default fail char and
353	try again. */
354	outbuf[0] = lp_failed_convert_char();
355
356	inbuf++;
357	i_len--;
358
359	outbuf++;
360	o_len--;
361
362	if (o_len == 0 \|\| i_len == 0)
363	return destlen - o_len;
364
365	/* Keep trying with the next char... */
366	goto again;
367
368	} else {
369	/* Keep compiler happy.... */
370	return destlen - o_len;
371	}
372	}
373	}
374
375	/**
376	* Convert string from one encoding to another, making error checking etc
377	* Fast path version - handles ASCII first.
378	*
379	* @param src pointer to source string (multibyte or singlebyte)
380	* @param srclen length of the source string in bytes, or -1 for nul terminated.
381	* @param dest pointer to destination string (multibyte or singlebyte)
382	* @param destlen maximal length allowed for string - NEVER -1.
383	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384	* @returns the number of bytes occupied in the destination
385	*
386	* Ensure the srclen contains the terminating zero.
387	*
388	* This function has been hand-tuned to provide a fast path.
389	* Don't change unless you really know what you are doing. JRA.
390	**/
391
392	size_t convert_string(charset_t from, charset_t to,
393	void const *src, size_t srclen,
394	void *dest, size_t destlen, bool allow_bad_conv)
395	{
396	/*
397	* NB. We deliberately don't do a strlen here if srclen == -1.
398	* This is very expensive over millions of calls and is taken
399	* care of in the slow path in convert_string_internal. JRA.
400	*/
401
402	#ifdef DEVELOPER
403	SMB_ASSERT(destlen != (size_t)-1);
404	#endif
405
406	if (srclen == 0)
407	return 0;
408
409	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
410	const unsigned char p = (const unsigned char )src;
411	unsigned char q = (unsigned char )dest;
412	size_t slen = srclen;
413	size_t dlen = destlen;
414	unsigned char lastp = '\0';
415	size_t retval = 0;
416
417	/* If all characters are ascii, fast path here. */
418	while (slen && dlen) {
419	if ((lastp = *p) <= 0x7f) {
420	q++ = p++;
421	if (slen != (size_t)-1) {
422	slen--;
423	}
424	dlen--;
425	retval++;
426	if (!lastp)
427	break;
428	} else {
429	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
430	goto general_case;
431	#else
432	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
433	if (ret == (size_t)-1) {
434	return ret;
435	}
436	return retval + ret;
437	#endif
438	}
439	}
440	if (!dlen) {
441	/* Even if we fast path we should note if we ran out of room. */
442	if (((slen != (size_t)-1) && slen) \|\|
443	((slen == (size_t)-1) && lastp)) {
444	errno = E2BIG;
445	}
446	}
447	return retval;
448	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
449	const unsigned char p = (const unsigned char )src;
450	unsigned char q = (unsigned char )dest;
451	size_t retval = 0;
452	size_t slen = srclen;
453	size_t dlen = destlen;
454	unsigned char lastp = '\0';
455
456	/* If all characters are ascii, fast path here. */
457	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
458	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
459	q++ = p;
460	if (slen != (size_t)-1) {
461	slen -= 2;
462	}
463	p += 2;
464	dlen--;
465	retval++;
466	if (!lastp)
467	break;
468	} else {
469	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
470	goto general_case;
471	#else
472	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
473	if (ret == (size_t)-1) {
474	return ret;
475	}
476	return retval + ret;
477	#endif
478	}
479	}
480	if (!dlen) {
481	/* Even if we fast path we should note if we ran out of room. */
482	if (((slen != (size_t)-1) && slen) \|\|
483	((slen == (size_t)-1) && lastp)) {
484	errno = E2BIG;
485	}
486	}
487	return retval;
488	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
489	const unsigned char p = (const unsigned char )src;
490	unsigned char q = (unsigned char )dest;
491	size_t retval = 0;
492	size_t slen = srclen;
493	size_t dlen = destlen;
494	unsigned char lastp = '\0';
495
496	/* If all characters are ascii, fast path here. */
497	while (slen && (dlen >= 2)) {
498	if ((lastp = *p) <= 0x7F) {
499	q++ = p++;
500	*q++ = '\0';
501	if (slen != (size_t)-1) {
502	slen--;
503	}
504	dlen -= 2;
505	retval += 2;
506	if (!lastp)
507	break;
508	} else {
509	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
510	goto general_case;
511	#else
512	size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
513	if (ret == (size_t)-1) {
514	return ret;
515	}
516	return retval + ret;
517	#endif
518	}
519	}
520	if (!dlen) {
521	/* Even if we fast path we should note if we ran out of room. */
522	if (((slen != (size_t)-1) && slen) \|\|
523	((slen == (size_t)-1) && lastp)) {
524	errno = E2BIG;
525	}
526	}
527	return retval;
528	}
529
530	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
531	general_case:
532	#endif
533	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
534	}
535
536	/**
537	* Convert between character sets, allocating a new buffer using talloc for the result.
538	*
539	* @param srclen length of source buffer.
540	* @param dest always set at least to NULL
541	* @parm converted_size set to the number of bytes occupied by the string in
542	* the destination on success.
543	* @note -1 is not accepted for srclen.
544	*
545	* @return true if new buffer was correctly allocated, and string was
546	* converted.
547	*
548	* Ensure the srclen contains the terminating zero.
549	*
550	* I hate the goto's in this function. It's embarressing.....
551	* There has to be a cleaner way to do this. JRA.
552	*/
553	bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
554	void const src, size_t srclen, void dst,
555	size_t *converted_size, bool allow_bad_conv)
556
557	{
558	size_t i_len, o_len, destlen = (srclen * 3) / 2;
559	size_t retval;
560	const char inbuf = (const char )src;
561	char outbuf = NULL, ob = NULL;
562	smb_iconv_t descriptor;
563	void dest = (void )dst;
564
565	*dest = NULL;
566
567	if (!converted_size) {
568	errno = EINVAL;
569	return false;
570	}
571
572	if (src == NULL \|\| srclen == (size_t)-1) {
573	errno = EINVAL;
574	return false;
575	}
576	if (srclen == 0) {
577	ob = talloc_strdup(ctx, "");
578	if (ob == NULL) {
579	errno = ENOMEM;
580	return false;
581	}
582	*dest = ob;
583	*converted_size = 0;
584	return true;
585	}
586
587	lazy_initialize_conv();
588
589	descriptor = conv_handles[from][to];
590
591	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
592	if (!conv_silent)
593	DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
594	errno = EOPNOTSUPP;
595	return false;
596	}
597
598	convert:
599
600	/* +2 is for ucs2 null termination. */
601	if ((destlen*2)+2 < destlen) {
602	/* wrapped ! abort. */
603	if (!conv_silent)
604	DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
605	TALLOC_FREE(outbuf);
606	errno = EOPNOTSUPP;
607	return false;
608	} else {
609	destlen = destlen * 2;
610	}
611
612	/* +2 is for ucs2 null termination. */
613	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
614
615	if (!ob) {
616	DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
617	errno = ENOMEM;
618	return false;
619	}
620	outbuf = ob;
621	i_len = srclen;
622	o_len = destlen;
623
624	again:
625
626	retval = smb_iconv(descriptor,
627	&inbuf, &i_len,
628	&outbuf, &o_len);
629	if(retval == (size_t)-1) {
630	const char *reason="unknown error";
631	switch(errno) {
632	case EINVAL:
633	reason="Incomplete multibyte sequence";
634	if (!conv_silent)
635	DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
636	if (allow_bad_conv)
637	goto use_as_is;
638	break;
639	case E2BIG:
640	goto convert;
641	case EILSEQ:
642	reason="Illegal multibyte sequence";
643	if (!conv_silent)
644	DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
645	if (allow_bad_conv)
646	goto use_as_is;
647	break;
648	}
649	if (!conv_silent)
650	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
651	/* smb_panic(reason); */
652	TALLOC_FREE(ob);
653	return false;
654	}
655
656	out:
657
658	destlen = destlen - o_len;
659	/* Don't shrink unless we're reclaiming a lot of
660	* space. This is in the hot codepath and these
661	* reallocs cost. JRA.
662	*/
663	if (o_len > 1024) {
664	/* We're shrinking here so we know the +2 is safe from wrap. */
665	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
666	}
667
668	if (destlen && !ob) {
669	DEBUG(0, ("convert_string_talloc: out of memory!\n"));
670	errno = ENOMEM;
671	return false;
672	}
673
674	*dest = ob;
675
676	/* Must ucs2 null terminate in the extra space we allocated. */
677	ob[destlen] = '\0';
678	ob[destlen+1] = '\0';
679
680	*converted_size = destlen;
681	return true;
682
683	use_as_is:
684
685	/*
686	* Conversion not supported. This is actually an error, but there are so
687	* many misconfigured iconv systems and smb.conf's out there we can't just
688	* fail. Do a very bad conversion instead.... JRA.
689	*/
690
691	{
692	if (o_len == 0 \|\| i_len == 0)
693	goto out;
694
695	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
696	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
697	/* Can't convert from utf16 any endian to multibyte.
698	Replace with the default fail char.
699	*/
700
701	if (i_len < 2)
702	goto out;
703
704	if (i_len >= 2) {
705	*outbuf = lp_failed_convert_char();
706
707	outbuf++;
708	o_len--;
709
710	inbuf += 2;
711	i_len -= 2;
712	}
713
714	if (o_len == 0 \|\| i_len == 0)
715	goto out;
716
717	/* Keep trying with the next char... */
718	goto again;
719
720	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
721	/* Can't convert to UTF16LE - just widen by adding the
722	default fail char then zero.
723	*/
724	if (o_len < 2)
725	goto out;
726
727	outbuf[0] = lp_failed_convert_char();
728	outbuf[1] = '\0';
729
730	inbuf++;
731	i_len--;
732
733	outbuf += 2;
734	o_len -= 2;
735
736	if (o_len == 0 \|\| i_len == 0)
737	goto out;
738
739	/* Keep trying with the next char... */
740	goto again;
741
742	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
743	to != CH_UTF16LE && to != CH_UTF16BE) {
744	/* Failed multibyte to multibyte. Just copy the default fail char and
745	try again. */
746	outbuf[0] = lp_failed_convert_char();
747
748	inbuf++;
749	i_len--;
750
751	outbuf++;
752	o_len--;
753
754	if (o_len == 0 \|\| i_len == 0)
755	goto out;
756
757	/* Keep trying with the next char... */
758	goto again;
759
760	} else {
761	/* Keep compiler happy.... */
762	goto out;
763	}
764	}
765	}
766
767	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
768	{
769	size_t size;
770	smb_ucs2_t *buffer;
771
772	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
773	return (size_t)-1;
774	}
775
776	if (!strupper_w(buffer) && (dest == src)) {
777	TALLOC_FREE(buffer);
778	return srclen;
779	}
780
781	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
782	TALLOC_FREE(buffer);
783	return size;
784	}
785
786	/**
787	talloc_strdup() a unix string to upper case.
788	**/
789
790	char talloc_strdup_upper(TALLOC_CTX ctx, const char *s)
791	{
792	char *out_buffer = talloc_strdup(ctx,s);
793	const unsigned char p = (const unsigned char )s;
794	unsigned char q = (unsigned char )out_buffer;
795
796	if (!q) {
797	return NULL;
798	}
799
800	/* this is quite a common operation, so we want it to be
801	fast. We optimise for the ascii case, knowing that all our
802	supported multi-byte character sets are ascii-compatible
803	(ie. they match for the first 128 chars) */
804
805	while (*p) {
806	if (*p & 0x80)
807	break;
808	q++ = toupper_ascii_fast(p);
809	p++;
810	}
811
812	if (*p) {
813	/* MB case. */
814	size_t converted_size, converted_size2;
815	smb_ucs2_t *ubuf = NULL;
816
817	/* We're not using the ascii buffer above. */
818	TALLOC_FREE(out_buffer);
819
820	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
821	strlen(s)+1, (void *)&ubuf,
822	&converted_size, True))
823	{
824	return NULL;
825	}
826
827	strupper_w(ubuf);
828
829	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
830	converted_size, (void *)&out_buffer,
831	&converted_size2, True))
832	{
833	TALLOC_FREE(ubuf);
834	return NULL;
835	}
836
837	/* Don't need the intermediate buffer
838	* anymore.
839	*/
840	TALLOC_FREE(ubuf);
841	}
842
843	return out_buffer;
844	}
845
846	char strupper_talloc(TALLOC_CTX ctx, const char *s) {
847	return talloc_strdup_upper(ctx, s);
848	}
849
850
851	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
852	{
853	size_t size;
854	smb_ucs2_t *buffer = NULL;
855
856	if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
857	(void *)(void )&buffer, &size,
858	True))
859	{
860	smb_panic("failed to create UCS2 buffer");
861	}
862	if (!strlower_w(buffer) && (dest == src)) {
863	TALLOC_FREE(buffer);
864	return srclen;
865	}
866	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
867	TALLOC_FREE(buffer);
868	return size;
869	}
870
871
872	char talloc_strdup_lower(TALLOC_CTX ctx, const char *s)
873	{
874	size_t converted_size;
875	smb_ucs2_t *buffer = NULL;
876	char *out_buffer;
877
878	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
879	return NULL;
880	}
881
882	strlower_w(buffer);
883
884	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
885	TALLOC_FREE(buffer);
886	return NULL;
887	}
888
889	TALLOC_FREE(buffer);
890
891	return out_buffer;
892	}
893
894	char strlower_talloc(TALLOC_CTX ctx, const char *s) {
895	return talloc_strdup_lower(ctx, s);
896	}
897
898	size_t ucs2_align(const void base_ptr, const void p, int flags)
899	{
900	if (flags & (STR_NOALIGN\|STR_ASCII))
901	return 0;
902	return PTR_DIFF(p, base_ptr) & 1;
903	}
904
905
906	/**
907	* Copy a string from a char* unix src to a dos codepage string destination.
908	*
909	* @return the number of bytes occupied by the string in the destination.
910	*
911	* @param flags can include
912	* <dl>
913	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
914	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
915	* </dl>
916	*
917	* @param dest_len the maximum length in bytes allowed in the
918	* destination.
919	**/
920	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
921	{
922	size_t src_len = strlen(src);
923	char *tmpbuf = NULL;
924	size_t ret;
925
926	/* No longer allow a length of -1. */
927	if (dest_len == (size_t)-1) {
928	smb_panic("push_ascii - dest_len == -1");
929	}
930
931	if (flags & STR_UPPER) {
932	tmpbuf = SMB_STRDUP(src);
933	if (!tmpbuf) {
934	smb_panic("malloc fail");
935	}
936	strupper_m(tmpbuf);
937	src = tmpbuf;
938	}
939
940	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII)) {
941	src_len++;
942	}
943
944	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
945	if (ret == (size_t)-1 &&
946	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
947	&& dest_len > 0) {
948	((char *)dest)[0] = '\0';
949	}
950	SAFE_FREE(tmpbuf);
951	return ret;
952	}
953
954	size_t push_ascii_fstring(void dest, const char src)
955	{
956	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
957	}
958
959	/********************************************************************
960	Push an nstring - ensure null terminated. Written by
961	moriyama@miraclelinux.com (MORIYAMA Masayuki).
962	********************************************************************/
963
964	size_t push_ascii_nstring(void dest, const char src)
965	{
966	size_t i, buffer_len, dest_len;
967	smb_ucs2_t *buffer;
968
969	conv_silent = True;
970	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
971	smb_panic("failed to create UCS2 buffer");
972	}
973
974	/* We're using buffer_len below to count ucs2 characters, not bytes. */
975	buffer_len /= sizeof(smb_ucs2_t);
976
977	dest_len = 0;
978	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
979	unsigned char mb[10];
980	/* Convert one smb_ucs2_t character at a time. */
981	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
982	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
983	memcpy((char *)dest + dest_len, mb, mb_len);
984	dest_len += mb_len;
985	} else {
986	errno = E2BIG;
987	break;
988	}
989	}
990	((char *)dest)[dest_len] = '\0';
991
992	conv_silent = False;
993	TALLOC_FREE(buffer);
994	return dest_len;
995	}
996
997	/********************************************************************
998	Push and malloc an ascii string. src and dest null terminated.
999	********************************************************************/
1000
1001	bool push_ascii_talloc(TALLOC_CTX mem_ctx, char dest, const char src, size_t *converted_size)
1002	{
1003	size_t src_len = strlen(src)+1;
1004
1005	*dest = NULL;
1006	return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1007	(void **)dest, converted_size, True);
1008	}
1009
1010	/**
1011	* Copy a string from a dos codepage source to a unix char* destination.
1012	*
1013	* The resulting string in "dest" is always null terminated.
1014	*
1015	* @param flags can have:
1016	* <dl>
1017	* <dt>STR_TERMINATE</dt>
1018	* <dd>STR_TERMINATE means the string in @p src
1019	* is null terminated, and src_len is ignored.</dd>
1020	* </dl>
1021	*
1022	* @param src_len is the length of the source area in bytes.
1023	* @returns the number of bytes occupied by the string in @p src.
1024	**/
1025	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
1026	{
1027	size_t ret;
1028
1029	if (dest_len == (size_t)-1) {
1030	/* No longer allow dest_len of -1. */
1031	smb_panic("pull_ascii - invalid dest_len of -1");
1032	}
1033
1034	if (flags & STR_TERMINATE) {
1035	if (src_len == (size_t)-1) {
1036	src_len = strlen((const char *)src) + 1;
1037	} else {
1038	size_t len = strnlen((const char *)src, src_len);
1039	if (len < src_len)
1040	len++;
1041	src_len = len;
1042	}
1043	}
1044
1045	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1046	if (ret == (size_t)-1) {
1047	ret = 0;
1048	dest_len = 0;
1049	}
1050
1051	if (dest_len && ret) {
1052	/* Did we already process the terminating zero ? */
1053	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1054	dest[MIN(ret, dest_len-1)] = 0;
1055	}
1056	} else {
1057	dest[0] = 0;
1058	}
1059
1060	return src_len;
1061	}
1062
1063	/**
1064	* Copy a string from a dos codepage source to a unix char* destination.
1065	* Talloc version.
1066	*
1067	* The resulting string in "dest" is always null terminated.
1068	*
1069	* @param flags can have:
1070	* <dl>
1071	* <dt>STR_TERMINATE</dt>
1072	* <dd>STR_TERMINATE means the string in @p src
1073	* is null terminated, and src_len is ignored.</dd>
1074	* </dl>
1075	*
1076	* @param src_len is the length of the source area in bytes.
1077	* @returns the number of bytes occupied by the string in @p src.
1078	**/
1079
1080	static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1081	char **ppdest,
1082	const void *src,
1083	size_t src_len,
1084	int flags)
1085	{
1086	char *dest = NULL;
1087	size_t dest_len;
1088
1089	*ppdest = NULL;
1090
1091	if (!src_len) {
1092	return 0;
1093	}
1094
1095	if (flags & STR_TERMINATE) {
1096	if (src_len == (size_t)-1) {
1097	src_len = strlen((const char *)src) + 1;
1098	} else {
1099	size_t len = strnlen((const char *)src, src_len);
1100	if (len < src_len)
1101	len++;
1102	src_len = len;
1103	}
1104	/* Ensure we don't use an insane length from the client. */
1105	if (src_len >= 1024*1024) {
1106	char *msg = talloc_asprintf(ctx,
1107	"Bad src length (%u) in "
1108	"pull_ascii_base_talloc",
1109	(unsigned int)src_len);
1110	smb_panic(msg);
1111	}
1112	} else {
1113	/* Can't have an unlimited length
1114	* non STR_TERMINATE'd.
1115	*/
1116	if (src_len == (size_t)-1) {
1117	errno = EINVAL;
1118	return 0;
1119	}
1120	}
1121
1122	/* src_len != -1 here. */
1123
1124	if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1125	&dest_len, True)) {
1126	dest_len = 0;
1127	}
1128
1129	if (dest_len && dest) {
1130	/* Did we already process the terminating zero ? */
1131	if (dest[dest_len-1] != 0) {
1132	size_t size = talloc_get_size(dest);
1133	/* Have we got space to append the '\0' ? */
1134	if (size <= dest_len) {
1135	/* No, realloc. */
1136	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1137	dest_len+1);
1138	if (!dest) {
1139	/* talloc fail. */
1140	dest_len = (size_t)-1;
1141	return 0;
1142	}
1143	}
1144	/* Yay - space ! */
1145	dest[dest_len] = '\0';
1146	dest_len++;
1147	}
1148	} else if (dest) {
1149	dest[0] = 0;
1150	}
1151
1152	*ppdest = dest;
1153	return src_len;
1154	}
1155
1156	size_t pull_ascii_fstring(char dest, const void src)
1157	{
1158	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1159	}
1160
1161	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1162
1163	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1164	{
1165	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1166	}
1167
1168	/**
1169	* Copy a string from a char* src to a unicode destination.
1170	*
1171	* @returns the number of bytes occupied by the string in the destination.
1172	*
1173	* @param flags can have:
1174	*
1175	* <dl>
1176	* <dt>STR_TERMINATE <dd>means include the null termination.
1177	* <dt>STR_UPPER <dd>means uppercase in the destination.
1178	* <dt>STR_NOALIGN <dd>means don't do alignment.
1179	* </dl>
1180	*
1181	* @param dest_len is the maximum length allowed in the
1182	* destination.
1183	**/
1184
1185	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1186	{
1187	size_t len=0;
1188	size_t src_len;
1189	size_t ret;
1190
1191	if (dest_len == (size_t)-1) {
1192	/* No longer allow dest_len of -1. */
1193	smb_panic("push_ucs2 - invalid dest_len of -1");
1194	}
1195
1196	if (flags & STR_TERMINATE)
1197	src_len = (size_t)-1;
1198	else
1199	src_len = strlen(src);
1200
1201	if (ucs2_align(base_ptr, dest, flags)) {
1202	(char )dest = 0;
1203	dest = (void )((char )dest + 1);
1204	if (dest_len)
1205	dest_len--;
1206	len++;
1207	}
1208
1209	/* ucs2 is always a multiple of 2 bytes */
1210	dest_len &= ~1;
1211
1212	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1213	if (ret == (size_t)-1) {
1214	if ((flags & STR_TERMINATE) &&
1215	dest &&
1216	dest_len) {
1217	(char )dest = 0;
1218	}
1219	return len;
1220	}
1221
1222	len += ret;
1223
1224	if (flags & STR_UPPER) {
1225	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1226	size_t i;
1227
1228	/* We check for i < (ret / 2) below as the dest string isn't null
1229	terminated if STR_TERMINATE isn't set. */
1230
1231	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1232	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1233	if (v != dest_ucs2[i]) {
1234	dest_ucs2[i] = v;
1235	}
1236	}
1237	}
1238
1239	return len;
1240	}
1241
1242
1243	/**
1244	* Copy a string from a unix char* src to a UCS2 destination,
1245	* allocating a buffer using talloc().
1246	*
1247	* @param dest always set at least to NULL
1248	* @parm converted_size set to the number of bytes occupied by the string in
1249	* the destination on success.
1250	*
1251	* @return true if new buffer was correctly allocated, and string was
1252	* converted.
1253	**/
1254	bool push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src,
1255	size_t *converted_size)
1256	{
1257	size_t src_len = strlen(src)+1;
1258
1259	*dest = NULL;
1260	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1261	(void **)dest, converted_size, True);
1262	}
1263
1264
1265	/**
1266	Copy a string from a char* src to a UTF-8 destination.
1267	Return the number of bytes occupied by the string in the destination
1268	Flags can have:
1269	STR_TERMINATE means include the null termination
1270	STR_UPPER means uppercase in the destination
1271	dest_len is the maximum length allowed in the destination. If dest_len
1272	is -1 then no maxiumum is used.
1273	**/
1274
1275	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1276	{
1277	size_t src_len = 0;
1278	size_t ret;
1279	char *tmpbuf = NULL;
1280
1281	if (dest_len == (size_t)-1) {
1282	/* No longer allow dest_len of -1. */
1283	smb_panic("push_utf8 - invalid dest_len of -1");
1284	}
1285
1286	if (flags & STR_UPPER) {
1287	tmpbuf = strupper_talloc(talloc_tos(), src);
1288	if (!tmpbuf) {
1289	return (size_t)-1;
1290	}
1291	src = tmpbuf;
1292	src_len = strlen(src);
1293	}
1294
1295	src_len = strlen(src);
1296	if (flags & STR_TERMINATE) {
1297	src_len++;
1298	}
1299
1300	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1301	TALLOC_FREE(tmpbuf);
1302	return ret;
1303	}
1304
1305	size_t push_utf8_fstring(void dest, const char src)
1306	{
1307	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1308	}
1309
1310	/**
1311	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1312	*
1313	* @param dest always set at least to NULL
1314	* @parm converted_size set to the number of bytes occupied by the string in
1315	* the destination on success.
1316	*
1317	* @return true if new buffer was correctly allocated, and string was
1318	* converted.
1319	**/
1320
1321	bool push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1322	size_t *converted_size)
1323	{
1324	size_t src_len = strlen(src)+1;
1325
1326	*dest = NULL;
1327	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1328	(void**)dest, converted_size, True);
1329	}
1330
1331	/**
1332	Copy a string from a ucs2 source to a unix char* destination.
1333	Flags can have:
1334	STR_TERMINATE means the string in src is null terminated.
1335	STR_NOALIGN means don't try to align.
1336	if STR_TERMINATE is set then src_len is ignored if it is -1.
1337	src_len is the length of the source area in bytes
1338	Return the number of bytes occupied by the string in src.
1339	The resulting string in "dest" is always null terminated.
1340	**/
1341
1342	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1343	{
1344	size_t ret;
1345
1346	if (dest_len == (size_t)-1) {
1347	/* No longer allow dest_len of -1. */
1348	smb_panic("pull_ucs2 - invalid dest_len of -1");
1349	}
1350
1351	if (!src_len) {
1352	if (dest && dest_len > 0) {
1353	dest[0] = '\0';
1354	}
1355	return 0;
1356	}
1357
1358	if (ucs2_align(base_ptr, src, flags)) {
1359	src = (const void )((const char )src + 1);
1360	if (src_len != (size_t)-1)
1361	src_len--;
1362	}
1363
1364	if (flags & STR_TERMINATE) {
1365	/* src_len -1 is the default for null terminated strings. */
1366	if (src_len != (size_t)-1) {
1367	size_t len = strnlen_w((const smb_ucs2_t *)src,
1368	src_len/2);
1369	if (len < src_len/2)
1370	len++;
1371	src_len = len*2;
1372	}
1373	}
1374
1375	/* ucs2 is always a multiple of 2 bytes */
1376	if (src_len != (size_t)-1)
1377	src_len &= ~1;
1378
1379	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1380	if (ret == (size_t)-1) {
1381	ret = 0;
1382	dest_len = 0;
1383	}
1384
1385	if (src_len == (size_t)-1)
1386	src_len = ret*2;
1387
1388	if (dest_len && ret) {
1389	/* Did we already process the terminating zero ? */
1390	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1391	dest[MIN(ret, dest_len-1)] = 0;
1392	}
1393	} else {
1394	dest[0] = 0;
1395	}
1396
1397	return src_len;
1398	}
1399
1400	/**
1401	Copy a string from a ucs2 source to a unix char* destination.
1402	Talloc version with a base pointer.
1403	Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1404	needs fixing. JRA).
1405	Flags can have:
1406	STR_TERMINATE means the string in src is null terminated.
1407	STR_NOALIGN means don't try to align.
1408	if STR_TERMINATE is set then src_len is ignored if it is -1.
1409	src_len is the length of the source area in bytes
1410	Return the number of bytes occupied by the string in src.
1411	The resulting string in "dest" is always null terminated.
1412	**/
1413
1414	size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1415	const void *base_ptr,
1416	char **ppdest,
1417	const void *src,
1418	size_t src_len,
1419	int flags)
1420	{
1421	char *dest;
1422	size_t dest_len;
1423
1424	*ppdest = NULL;
1425
1426	#ifdef DEVELOPER
1427	/* Ensure we never use the braindead "malloc" varient. */
1428	if (ctx == NULL) {
1429	smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1430	}
1431	#endif
1432
1433	if (!src_len) {
1434	return 0;
1435	}
1436
1437	if (ucs2_align(base_ptr, src, flags)) {
1438	src = (const void )((const char )src + 1);
1439	if (src_len != (size_t)-1)
1440	src_len--;
1441	}
1442
1443	if (flags & STR_TERMINATE) {
1444	/* src_len -1 is the default for null terminated strings. */
1445	if (src_len != (size_t)-1) {
1446	size_t len = strnlen_w((const smb_ucs2_t *)src,
1447	src_len/2);
1448	if (len < src_len/2)
1449	len++;
1450	src_len = len*2;
1451	} else {
1452	/*
1453	* src_len == -1 - alloc interface won't take this
1454	* so we must calculate.
1455	*/
1456	src_len = (strlen_w((const smb_ucs2_t )src)+1)sizeof(smb_ucs2_t);
1457	}
1458	/* Ensure we don't use an insane length from the client. */
1459	if (src_len >= 1024*1024) {
1460	smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1461	}
1462	} else {
1463	/* Can't have an unlimited length
1464	* non STR_TERMINATE'd.
1465	*/
1466	if (src_len == (size_t)-1) {
1467	errno = EINVAL;
1468	return 0;
1469	}
1470	}
1471
1472	/* src_len != -1 here. */
1473
1474	/* ucs2 is always a multiple of 2 bytes */
1475	src_len &= ~1;
1476
1477	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1478	(void *)&dest, &dest_len, True)) {
1479	dest_len = 0;
1480	}
1481
1482	if (dest_len) {
1483	/* Did we already process the terminating zero ? */
1484	if (dest[dest_len-1] != 0) {
1485	size_t size = talloc_get_size(dest);
1486	/* Have we got space to append the '\0' ? */
1487	if (size <= dest_len) {
1488	/* No, realloc. */
1489	dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1490	dest_len+1);
1491	if (!dest) {
1492	/* talloc fail. */
1493	dest_len = (size_t)-1;
1494	return 0;
1495	}
1496	}
1497	/* Yay - space ! */
1498	dest[dest_len] = '\0';
1499	dest_len++;
1500	}
1501	} else if (dest) {
1502	dest[0] = 0;
1503	}
1504
1505	*ppdest = dest;
1506	return src_len;
1507	}
1508
1509	size_t pull_ucs2_fstring(char dest, const void src)
1510	{
1511	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1512	}
1513
1514	/**
1515	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1516	*
1517	* @param dest always set at least to NULL
1518	* @parm converted_size set to the number of bytes occupied by the string in
1519	* the destination on success.
1520	*
1521	* @return true if new buffer was correctly allocated, and string was
1522	* converted.
1523	**/
1524
1525	bool pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src,
1526	size_t *converted_size)
1527	{
1528	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1529
1530	*dest = NULL;
1531	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1532	(void **)dest, converted_size, True);
1533	}
1534
1535	/**
1536	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1537	*
1538	* @param dest always set at least to NULL
1539	* @parm converted_size set to the number of bytes occupied by the string in
1540	* the destination on success.
1541	*
1542	* @return true if new buffer was correctly allocated, and string was
1543	* converted.
1544	**/
1545
1546	bool pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src,
1547	size_t *converted_size)
1548	{
1549	size_t src_len = strlen(src)+1;
1550
1551	*dest = NULL;
1552	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1553	(void **)dest, converted_size, True);
1554	}
1555
1556
1557	/**
1558	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1559	*
1560	* @param dest always set at least to NULL
1561	* @parm converted_size set to the number of bytes occupied by the string in
1562	* the destination on success.
1563	*
1564	* @return true if new buffer was correctly allocated, and string was
1565	* converted.
1566	**/
1567
1568	bool pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src,
1569	size_t *converted_size)
1570	{
1571	size_t src_len = strlen(src)+1;
1572
1573	*dest = NULL;
1574	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1575	(void **)dest, converted_size, True);
1576	}
1577
1578	/**
1579	Copy a string from a char* src to a unicode or ascii
1580	dos codepage destination choosing unicode or ascii based on the
1581	flags supplied
1582	Return the number of bytes occupied by the string in the destination.
1583	flags can have:
1584	STR_TERMINATE means include the null termination.
1585	STR_UPPER means uppercase in the destination.
1586	STR_ASCII use ascii even with unicode packet.
1587	STR_NOALIGN means don't do alignment.
1588	dest_len is the maximum length allowed in the destination. If dest_len
1589	is -1 then no maxiumum is used.
1590	**/
1591
1592	size_t push_string_check_fn(const char *function, unsigned int line,
1593	void dest, const char src,
1594	size_t dest_len, int flags)
1595	{
1596	#ifdef DEVELOPER
1597	/* We really need to zero fill here, not clobber
1598	* region, as we want to ensure that valgrind thinks
1599	* all of the outgoing buffer has been written to
1600	* so a send() or write() won't trap an error.
1601	* JRA.
1602	*/
1603	#if 0
1604	clobber_region(function, line, dest, dest_len);
1605	#else
1606	memset(dest, '\0', dest_len);
1607	#endif
1608	#endif
1609
1610	if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1611	return push_ucs2(NULL, dest, src, dest_len, flags);
1612	}
1613	return push_ascii(dest, src, dest_len, flags);
1614	}
1615
1616
1617	/**
1618	Copy a string from a char* src to a unicode or ascii
1619	dos codepage destination choosing unicode or ascii based on the
1620	flags in the SMB buffer starting at base_ptr.
1621	Return the number of bytes occupied by the string in the destination.
1622	flags can have:
1623	STR_TERMINATE means include the null termination.
1624	STR_UPPER means uppercase in the destination.
1625	STR_ASCII use ascii even with unicode packet.
1626	STR_NOALIGN means don't do alignment.
1627	dest_len is the maximum length allowed in the destination. If dest_len
1628	is -1 then no maxiumum is used.
1629	**/
1630
1631	size_t push_string_base(const char *function, unsigned int line,
1632	const char *base, uint16 flags2,
1633	void dest, const char src,
1634	size_t dest_len, int flags)
1635	{
1636	#ifdef DEVELOPER
1637	/* We really need to zero fill here, not clobber
1638	* region, as we want to ensure that valgrind thinks
1639	* all of the outgoing buffer has been written to
1640	* so a send() or write() won't trap an error.
1641	* JRA.
1642	*/
1643	#if 0
1644	clobber_region(function, line, dest, dest_len);
1645	#else
1646	memset(dest, '\0', dest_len);
1647	#endif
1648	#endif
1649
1650	if (!(flags & STR_ASCII) && \
1651	((flags & STR_UNICODE \|\| \
1652	(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1653	return push_ucs2(base, dest, src, dest_len, flags);
1654	}
1655	return push_ascii(dest, src, dest_len, flags);
1656	}
1657
1658	/**
1659	Copy a string from a char* src to a unicode or ascii
1660	dos codepage destination choosing unicode or ascii based on the
1661	flags supplied
1662	Return the number of bytes occupied by the string in the destination.
1663	flags can have:
1664	STR_TERMINATE means include the null termination.
1665	STR_UPPER means uppercase in the destination.
1666	STR_ASCII use ascii even with unicode packet.
1667	STR_NOALIGN means don't do alignment.
1668	dest_len is the maximum length allowed in the destination. If dest_len
1669	is -1 then no maxiumum is used.
1670	**/
1671
1672	ssize_t push_string(void dest, const char src, size_t dest_len, int flags)
1673	{
1674	size_t ret;
1675	#ifdef DEVELOPER
1676	/* We really need to zero fill here, not clobber
1677	* region, as we want to ensure that valgrind thinks
1678	* all of the outgoing buffer has been written to
1679	* so a send() or write() won't trap an error.
1680	* JRA.
1681	*/
1682	memset(dest, '\0', dest_len);
1683	#endif
1684
1685	if (!(flags & STR_ASCII) && \
1686	(flags & STR_UNICODE)) {
1687	ret = push_ucs2(NULL, dest, src, dest_len, flags);
1688	} else {
1689	ret = push_ascii(dest, src, dest_len, flags);
1690	}
1691	if (ret == (size_t)-1) {
1692	return -1;
1693	}
1694	return ret;
1695	}
1696
1697	/**
1698	Copy a string from a unicode or ascii source (depending on
1699	the packet flags) to a char* destination.
1700	Flags can have:
1701	STR_TERMINATE means the string in src is null terminated.
1702	STR_UNICODE means to force as unicode.
1703	STR_ASCII use ascii even with unicode packet.
1704	STR_NOALIGN means don't do alignment.
1705	if STR_TERMINATE is set then src_len is ignored is it is -1
1706	src_len is the length of the source area in bytes.
1707	Return the number of bytes occupied by the string in src.
1708	The resulting string in "dest" is always null terminated.
1709	**/
1710
1711	size_t pull_string_fn(const char *function,
1712	unsigned int line,
1713	const void *base_ptr,
1714	uint16 smb_flags2,
1715	char *dest,
1716	const void *src,
1717	size_t dest_len,
1718	size_t src_len,
1719	int flags)
1720	{
1721	#ifdef DEVELOPER
1722	clobber_region(function, line, dest, dest_len);
1723	#endif
1724
1725	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1726	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1727	"UNICODE defined");
1728	}
1729
1730	if (!(flags & STR_ASCII) && \
1731	((flags & STR_UNICODE \|\| \
1732	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1733	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1734	}
1735	return pull_ascii(dest, src, dest_len, src_len, flags);
1736	}
1737
1738	/**
1739	Copy a string from a unicode or ascii source (depending on
1740	the packet flags) to a char* destination.
1741	Variant that uses talloc.
1742	Flags can have:
1743	STR_TERMINATE means the string in src is null terminated.
1744	STR_UNICODE means to force as unicode.
1745	STR_ASCII use ascii even with unicode packet.
1746	STR_NOALIGN means don't do alignment.
1747	if STR_TERMINATE is set then src_len is ignored is it is -1
1748	src_len is the length of the source area in bytes.
1749	Return the number of bytes occupied by the string in src.
1750	The resulting string in "dest" is always null terminated.
1751	**/
1752
1753	size_t pull_string_talloc_fn(const char *function,
1754	unsigned int line,
1755	TALLOC_CTX *ctx,
1756	const void *base_ptr,
1757	uint16 smb_flags2,
1758	char **ppdest,
1759	const void *src,
1760	size_t src_len,
1761	int flags)
1762	{
1763	if ((base_ptr == NULL) && ((flags & (STR_ASCII\|STR_UNICODE)) == 0)) {
1764	smb_panic("No base ptr to get flg2 and neither ASCII nor "
1765	"UNICODE defined");
1766	}
1767
1768	if (!(flags & STR_ASCII) && \
1769	((flags & STR_UNICODE \|\| \
1770	(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1771	return pull_ucs2_base_talloc(ctx,
1772	base_ptr,
1773	ppdest,
1774	src,
1775	src_len,
1776	flags);
1777	}
1778	return pull_ascii_base_talloc(ctx,
1779	ppdest,
1780	src,
1781	src_len,
1782	flags);
1783	}
1784
1785
1786	size_t align_string(const void base_ptr, const char p, int flags)
1787	{
1788	if (!(flags & STR_ASCII) && \
1789	((flags & STR_UNICODE \|\| \
1790	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1791	return ucs2_align(base_ptr, p, flags);
1792	}
1793	return 0;
1794	}
1795
1796	/*
1797	Return the unicode codepoint for the next multi-byte CH_UNIX character
1798	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1799
1800	Also return the number of bytes consumed (which tells the caller
1801	how many bytes to skip to get to the next CH_UNIX character).
1802
1803	Return INVALID_CODEPOINT if the next character cannot be converted.
1804	*/
1805	codepoint_t next_codepoint(const char str, size_t size)
1806	{
1807	/* It cannot occupy more than 4 bytes in UTF16 format */
1808	uint8_t buf[4];
1809	smb_iconv_t descriptor;
1810	size_t ilen_orig;
1811	size_t ilen;
1812	size_t olen;
1813	char *outbuf;
1814
1815	if ((str[0] & 0x80) == 0) {
1816	*size = 1;
1817	return (codepoint_t)str[0];
1818	}
1819
1820	/* We assume that no multi-byte character can take
1821	more than 5 bytes. This is OK as we only
1822	support codepoints up to 1M */
1823
1824	ilen_orig = strnlen(str, 5);
1825	ilen = ilen_orig;
1826
1827	lazy_initialize_conv();
1828
1829	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1830	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1831	*size = 1;
1832	return INVALID_CODEPOINT;
1833	}
1834
1835	/* This looks a little strange, but it is needed to cope
1836	with codepoints above 64k which are encoded as per RFC2781. */
1837	olen = 2;
1838	outbuf = (char *)buf;
1839	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1840	if (olen == 2) {
1841	/* We failed to convert to a 2 byte character.
1842	See if we can convert to a 4 UTF16-LE byte char encoding.
1843	*/
1844	olen = 4;
1845	outbuf = (char *)buf;
1846	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1847	if (olen == 4) {
1848	/* We didn't convert any bytes */
1849	*size = 1;
1850	return INVALID_CODEPOINT;
1851	}
1852	olen = 4 - olen;
1853	} else {
1854	olen = 2 - olen;
1855	}
1856
1857	*size = ilen_orig - ilen;
1858
1859	if (olen == 2) {
1860	/* 2 byte, UTF16-LE encoded value. */
1861	return (codepoint_t)SVAL(buf, 0);
1862	}
1863	if (olen == 4) {
1864	/* Decode a 4 byte UTF16-LE character manually.
1865	See RFC2871 for the encoding machanism.
1866	*/
1867	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1868	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1869
1870	return (codepoint_t)0x10000 +
1871	(w1 << 10) + w2;
1872	}
1873
1874	/* no other length is valid */
1875	return INVALID_CODEPOINT;
1876	}
1877
1878	/*
1879	push a single codepoint into a CH_UNIX string the target string must
1880	be able to hold the full character, which is guaranteed if it is at
1881	least 5 bytes in size. The caller may pass less than 5 bytes if they
1882	are sure the character will fit (for example, you can assume that
1883	uppercase/lowercase of a character will not add more than 1 byte)
1884
1885	return the number of bytes occupied by the CH_UNIX character, or
1886	-1 on failure
1887	*/
1888	_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1889	{
1890	smb_iconv_t descriptor;
1891	uint8_t buf[4];
1892	size_t ilen, olen;
1893	const char *inbuf;
1894
1895	if (c < 128) {
1896	*str = c;
1897	return 1;
1898	}
1899
1900	lazy_initialize_conv();
1901
1902	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1903	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1904	return -1;
1905	}
1906
1907	if (c < 0x10000) {
1908	ilen = 2;
1909	olen = 5;
1910	inbuf = (char *)buf;
1911	SSVAL(buf, 0, c);
1912	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1913	if (ilen != 0) {
1914	return -1;
1915	}
1916	return 5 - olen;
1917	}
1918
1919	c -= 0x10000;
1920
1921	buf[0] = (c>>10) & 0xFF;
1922	buf[1] = (c>>18) \| 0xd8;
1923	buf[2] = c & 0xFF;
1924	buf[3] = ((c>>8) & 0x3) \| 0xdc;
1925
1926	ilen = 4;
1927	olen = 5;
1928	inbuf = (char *)buf;
1929
1930	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1931	if (ilen != 0) {
1932	return -1;
1933	}
1934	return 5 - olen;
1935	}
1936
1937

Note: See TracBrowser for help on using the repository browser.

Download in other formats: