Context Navigation

source: branches/samba-3.0/source/lib/charcnv.c@ 392

Visit:

Last change on this file since 392 was 337, checked in by Herwig Bauernfeind, 16 years ago
divers fix for Ticket #68 in 3.0
File size: 40.0 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 2 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program; if not, write to the Free Software
21	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23	*/
24	#include "includes.h"
25
26	/* We can parameterize this if someone complains.... JRA. */
27
28	char lp_failed_convert_char(void)
29	{
30	return '_';
31	}
32
33	/**
34	* @file
35	*
36	* @brief Character-set conversion routines built on our iconv.
37	*
38	* @note Samba's internal character set (at least in the 3.0 series)
39	* is always the same as the one for the Unix filesystem. It is
40	* <b>not</b> necessarily UTF-8 and may be different on machines that
41	* need i18n filenames to be compatible with Unix software. It does
42	* have to be a superset of ASCII. All multibyte sequences must start
43	* with a byte with the high bit set.
44	*
45	* @sa lib/iconv.c
46	*/
47
48
49	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50	static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	if (ch == CH_UTF16LE) ret = "UTF-16LE";
59	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
60	else if (ch == CH_UNIX) ret = lp_unix_charset();
61	else if (ch == CH_DOS) ret = lp_dos_charset();
62	else if (ch == CH_DISPLAY) ret = lp_display_charset();
63	else if (ch == CH_UTF8) ret = "UTF8";
64
65	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66	if (ret && !strcmp(ret, "LOCALE")) {
67	const char *ln = NULL;
68
69	#ifdef HAVE_SETLOCALE
70	setlocale(LC_ALL, "");
71	#endif
72	ln = nl_langinfo(CODESET);
73	if (ln) {
74	/* Check whether the charset name is supported
75	by iconv */
76	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77	if (handle == (smb_iconv_t) -1) {
78	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79	ln = NULL;
80	} else {
81	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82	smb_iconv_close(handle);
83	}
84	}
85	ret = ln;
86	}
87	#endif
88
89	if (!ret \|\| !*ret) ret = "ASCII";
90	return ret;
91	}
92
93	void lazy_initialize_conv(void)
94	{
95	static int initialized = False;
96
97	if (!initialized) {
98	initialized = True;
99	load_case_tables();
100	init_iconv();
101	}
102	}
103
104	/**
105	* Destroy global objects allocated by init_iconv()
106	**/
107	void gfree_charcnv(void)
108	{
109	int c1, c2;
110
111	for (c1=0;c1<NUM_CHARSETS;c1++) {
112	for (c2=0;c2<NUM_CHARSETS;c2++) {
113	if ( conv_handles[c1][c2] ) {
114	smb_iconv_close( conv_handles[c1][c2] );
115	conv_handles[c1][c2] = 0;
116	}
117	}
118	}
119	}
120
121	/**
122	* Initialize iconv conversion descriptors.
123	*
124	* This is called the first time it is needed, and also called again
125	* every time the configuration is reloaded, because the charset or
126	* codepage might have changed.
127	**/
128	void init_iconv(void)
129	{
130	int c1, c2;
131	BOOL did_reload = False;
132
133	/* so that charset_name() works we need to get the UNIX<->UCS2 going
134	first */
135	if (!conv_handles[CH_UNIX][CH_UTF16LE])
136	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
137
138	if (!conv_handles[CH_UTF16LE][CH_UNIX])
139	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
140
141	for (c1=0;c1<NUM_CHARSETS;c1++) {
142	for (c2=0;c2<NUM_CHARSETS;c2++) {
143	const char *n1 = charset_name((charset_t)c1);
144	const char *n2 = charset_name((charset_t)c2);
145	if (conv_handles[c1][c2] &&
146	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148	continue;
149
150	did_reload = True;
151
152	if (conv_handles[c1][c2])
153	smb_iconv_close(conv_handles[c1][c2]);
154
155	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158	charset_name((charset_t)c1), charset_name((charset_t)c2)));
159	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160	n1 = "ASCII";
161	}
162	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163	n2 = "ASCII";
164	}
165	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166	n1, n2 ));
167	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168	if (!conv_handles[c1][c2]) {
169	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170	smb_panic("init_iconv: conv_handle initialization failed.");
171	}
172	}
173	}
174	}
175
176	if (did_reload) {
177	/* XXX: Does this really get called every time the dos
178	* codepage changes? */
179	/* XXX: Is the did_reload test too strict? */
180	conv_silent = True;
181	init_doschar_table();
182	init_valid_table();
183	conv_silent = False;
184	}
185	}
186
187	/**
188	* Convert string from one encoding to another, making error checking etc
189	* Slow path version - uses (slow) iconv.
190	*
191	* @param src pointer to source string (multibyte or singlebyte)
192	* @param srclen length of the source string in bytes
193	* @param dest pointer to destination string (multibyte or singlebyte)
194	* @param destlen maximal length allowed for string
195	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
196	* @returns the number of bytes occupied in the destination
197	*
198	* Ensure the srclen contains the terminating zero.
199	*
200	**/
201
202	static size_t convert_string_internal(charset_t from, charset_t to,
203	void const *src, size_t srclen,
204	void *dest, size_t destlen, BOOL allow_bad_conv)
205	{
206	size_t i_len, o_len;
207	size_t retval;
208	const char* inbuf = (const char*)src;
209	char* outbuf = (char*)dest;
210	smb_iconv_t descriptor;
211
212	lazy_initialize_conv();
213
214	descriptor = conv_handles[from][to];
215
216	if (srclen == (size_t)-1) {
217	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
218	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
219	} else {
220	srclen = strlen((const char *)src)+1;
221	}
222	}
223
224
225	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
226	if (!conv_silent)
227	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
228	return (size_t)-1;
229	}
230
231	i_len=srclen;
232	o_len=destlen;
233
234	again:
235
236	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
237	if(retval==(size_t)-1) {
238	const char *reason="unknown error";
239	switch(errno) {
240	case EINVAL:
241	reason="Incomplete multibyte sequence";
242	if (!conv_silent)
243	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
244	if (allow_bad_conv)
245	goto use_as_is;
246	break;
247	case E2BIG:
248	reason="No more room";
249	if (!conv_silent) {
250	if (from == CH_UNIX) {
251	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
252	charset_name(from), charset_name(to),
253	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
254	} else {
255	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
256	charset_name(from), charset_name(to),
257	(unsigned int)srclen, (unsigned int)destlen));
258	}
259	}
260	break;
261	case EILSEQ:
262	reason="Illegal multibyte sequence";
263	if (!conv_silent)
264	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
265	if (allow_bad_conv)
266	goto use_as_is;
267	break;
268	default:
269	if (!conv_silent)
270	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271	break;
272	}
273	/* smb_panic(reason); */
274	}
275	return destlen-o_len;
276
277	use_as_is:
278
279	/*
280	* Conversion not supported. This is actually an error, but there are so
281	* many misconfigured iconv systems and smb.conf's out there we can't just
282	* fail. Do a very bad conversion instead.... JRA.
283	*/
284
285	{
286	if (o_len == 0 \|\| i_len == 0)
287	return destlen - o_len;
288
289	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
290	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
291	/* Can't convert from utf16 any endian to multibyte.
292	Replace with the default fail char.
293	*/
294	if (i_len < 2)
295	return destlen - o_len;
296	if (i_len >= 2) {
297	*outbuf = lp_failed_convert_char();
298
299	outbuf++;
300	o_len--;
301
302	inbuf += 2;
303	i_len -= 2;
304	}
305
306	if (o_len == 0 \|\| i_len == 0)
307	return destlen - o_len;
308
309	/* Keep trying with the next char... */
310	goto again;
311
312	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313	/* Can't convert to UTF16LE - just widen by adding the
314	default fail char then zero.
315	*/
316	if (o_len < 2)
317	return destlen - o_len;
318
319	outbuf[0] = lp_failed_convert_char();
320	outbuf[1] = '\0';
321
322	inbuf++;
323	i_len--;
324
325	outbuf += 2;
326	o_len -= 2;
327
328	if (o_len == 0 \|\| i_len == 0)
329	return destlen - o_len;
330
331	/* Keep trying with the next char... */
332	goto again;
333
334	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335	to != CH_UTF16LE && to != CH_UTF16BE) {
336	/* Failed multibyte to multibyte. Just copy the default fail char and
337	try again. */
338	outbuf[0] = lp_failed_convert_char();
339
340	inbuf++;
341	i_len--;
342
343	outbuf++;
344	o_len--;
345
346	if (o_len == 0 \|\| i_len == 0)
347	return destlen - o_len;
348
349	/* Keep trying with the next char... */
350	goto again;
351
352	} else {
353	/* Keep compiler happy.... */
354	return destlen - o_len;
355	}
356	}
357	}
358
359	/**
360	* Convert string from one encoding to another, making error checking etc
361	* Fast path version - handles ASCII first.
362	*
363	* @param src pointer to source string (multibyte or singlebyte)
364	* @param srclen length of the source string in bytes, or -1 for nul terminated.
365	* @param dest pointer to destination string (multibyte or singlebyte)
366	* @param destlen maximal length allowed for string - NEVER -1.
367	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368	* @returns the number of bytes occupied in the destination
369	*
370	* Ensure the srclen contains the terminating zero.
371	*
372	* This function has been hand-tuned to provide a fast path.
373	* Don't change unless you really know what you are doing. JRA.
374	**/
375
376	size_t convert_string(charset_t from, charset_t to,
377	void const *src, size_t srclen,
378	void *dest, size_t destlen, BOOL allow_bad_conv)
379	{
380	/*
381	* NB. We deliberately don't do a strlen here if srclen == -1.
382	* This is very expensive over millions of calls and is taken
383	* care of in the slow path in convert_string_internal. JRA.
384	*/
385
386	#ifdef DEVELOPER
387	SMB_ASSERT(destlen != (size_t)-1);
388	#endif
389
390	if (srclen == 0)
391	return 0;
392
393	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394	const unsigned char p = (const unsigned char )src;
395	unsigned char q = (unsigned char )dest;
396	size_t slen = srclen;
397	size_t dlen = destlen;
398	unsigned char lastp = '\0';
399	size_t retval = 0;
400
401	/* If all characters are ascii, fast path here. */
402	while (slen && dlen) {
403	if ((lastp = *p) <= 0x7f) {
404	q++ = p++;
405	if (slen != (size_t)-1) {
406	slen--;
407	}
408	dlen--;
409	retval++;
410	if (!lastp)
411	break;
412	} else {
413	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
414	goto general_case;
415	#else
416	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417	#endif
418	}
419	}
420	if (!dlen) {
421	/* Even if we fast path we should note if we ran out of room. */
422	if (((slen != (size_t)-1) && slen) \|\|
423	((slen == (size_t)-1) && lastp)) {
424	errno = E2BIG;
425	}
426	}
427	return retval;
428
429	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
430	const unsigned char p = (const unsigned char )src;
431	unsigned char q = (unsigned char )dest;
432	size_t retval = 0;
433	size_t slen = srclen;
434	size_t dlen = destlen;
435	unsigned char lastp = '\0';
436
437	/* If all characters are ascii, fast path here. */
438	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
439	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
440	q++ = p;
441	if (slen != (size_t)-1) {
442	slen -= 2;
443	}
444	p += 2;
445	dlen--;
446	retval++;
447	if (!lastp)
448	break;
449	} else {
450	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
451	goto general_case;
452	#else
453	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
454	#endif
455	}
456	}
457	if (!dlen) {
458	/* Even if we fast path we should note if we ran out of room. */
459	if (((slen != (size_t)-1) && slen) \|\|
460	((slen == (size_t)-1) && lastp)) {
461	errno = E2BIG;
462	}
463	}
464	return retval;
465
466	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
467	const unsigned char p = (const unsigned char )src;
468	unsigned char q = (unsigned char )dest;
469	size_t retval = 0;
470	size_t slen = srclen;
471	size_t dlen = destlen;
472	unsigned char lastp = '\0';
473
474	/* If all characters are ascii, fast path here. */
475	while (slen && (dlen >= 2)) {
476	if ((lastp = *p) <= 0x7F) {
477	q++ = p++;
478	*q++ = '\0';
479	if (slen != (size_t)-1) {
480	slen--;
481	}
482	dlen -= 2;
483	retval += 2;
484	if (!lastp)
485	break;
486	} else {
487	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
488	goto general_case;
489	#else
490	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
491	#endif
492	}
493	}
494	if (!dlen) {
495	/* Even if we fast path we should note if we ran out of room. */
496	if (((slen != (size_t)-1) && slen) \|\|
497	((slen == (size_t)-1) && lastp)) {
498	errno = E2BIG;
499	}
500	}
501	return retval;
502	}
503
504	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
505	general_case:
506	#endif
507
508	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
509	}
510
511	/**
512	* Convert between character sets, allocating a new buffer for the result.
513	*
514	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
515	* @param srclen length of source buffer.
516	* @param dest always set at least to NULL
517	* @note -1 is not accepted for srclen.
518	*
519	* @returns Size in bytes of the converted string; or -1 in case of error.
520	*
521	* Ensure the srclen contains the terminating zero.
522	*
523	* I hate the goto's in this function. It's embarressing.....
524	* There has to be a cleaner way to do this. JRA.
525	**/
526
527	size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528	void const src, size_t srclen, void dst, BOOL allow_bad_conv)
529	{
530	size_t i_len, o_len, destlen = (srclen * 3) / 2;
531	size_t retval;
532	const char inbuf = (const char )src;
533	char outbuf = NULL, ob = NULL;
534	smb_iconv_t descriptor;
535	void dest = (void )dst;
536
537	*dest = NULL;
538
539	if (src == NULL \|\| srclen == (size_t)-1)
540	return (size_t)-1;
541	if (srclen == 0)
542	return 0;
543
544	lazy_initialize_conv();
545
546	descriptor = conv_handles[from][to];
547
548	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
549	if (!conv_silent)
550	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
551	return (size_t)-1;
552	}
553
554	convert:
555
556	/* +2 is for ucs2 null termination. */
557	if ((destlen*2)+2 < destlen) {
558	/* wrapped ! abort. */
559	if (!conv_silent)
560	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
561	if (!ctx)
562	SAFE_FREE(outbuf);
563	return (size_t)-1;
564	} else {
565	destlen = destlen * 2;
566	}
567
568	/* +2 is for ucs2 null termination. */
569	if (ctx) {
570	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
571	} else {
572	ob = (char *)SMB_REALLOC(ob, destlen + 2);
573	}
574
575	if (!ob) {
576	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
577	return (size_t)-1;
578	}
579	outbuf = ob;
580	i_len = srclen;
581	o_len = destlen;
582
583	again:
584
585	retval = smb_iconv(descriptor,
586	&inbuf, &i_len,
587	&outbuf, &o_len);
588	if(retval == (size_t)-1) {
589	const char *reason="unknown error";
590	switch(errno) {
591	case EINVAL:
592	reason="Incomplete multibyte sequence";
593	if (!conv_silent)
594	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
595	if (allow_bad_conv)
596	goto use_as_is;
597	break;
598	case E2BIG:
599	goto convert;
600	case EILSEQ:
601	reason="Illegal multibyte sequence";
602	if (!conv_silent)
603	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
604	if (allow_bad_conv)
605	goto use_as_is;
606	break;
607	}
608	if (!conv_silent)
609	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
610	/* smb_panic(reason); */
611	return (size_t)-1;
612	}
613
614	out:
615
616	destlen = destlen - o_len;
617	if (ctx) {
618	/* We're shrinking here so we know the +2 is safe from wrap. */
619	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
620	} else {
621	ob = (char *)SMB_REALLOC(ob,destlen + 2);
622	}
623
624	if (destlen && !ob) {
625	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
626	return (size_t)-1;
627	}
628
629	*dest = ob;
630
631	/* Must ucs2 null terminate in the extra space we allocated. */
632	ob[destlen] = '\0';
633	ob[destlen+1] = '\0';
634
635	return destlen;
636
637	use_as_is:
638
639	/*
640	* Conversion not supported. This is actually an error, but there are so
641	* many misconfigured iconv systems and smb.conf's out there we can't just
642	* fail. Do a very bad conversion instead.... JRA.
643	*/
644
645	{
646	if (o_len == 0 \|\| i_len == 0)
647	goto out;
648
649	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
650	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
651	/* Can't convert from utf16 any endian to multibyte.
652	Replace with the default fail char.
653	*/
654
655	if (i_len < 2)
656	goto out;
657
658	if (i_len >= 2) {
659	*outbuf = lp_failed_convert_char();
660
661	outbuf++;
662	o_len--;
663
664	inbuf += 2;
665	i_len -= 2;
666	}
667
668	if (o_len == 0 \|\| i_len == 0)
669	goto out;
670
671	/* Keep trying with the next char... */
672	goto again;
673
674	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
675	/* Can't convert to UTF16LE - just widen by adding the
676	default fail char then zero.
677	*/
678	if (o_len < 2)
679	goto out;
680
681	outbuf[0] = lp_failed_convert_char();
682	outbuf[1] = '\0';
683
684	inbuf++;
685	i_len--;
686
687	outbuf += 2;
688	o_len -= 2;
689
690	if (o_len == 0 \|\| i_len == 0)
691	goto out;
692
693	/* Keep trying with the next char... */
694	goto again;
695
696	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
697	to != CH_UTF16LE && to != CH_UTF16BE) {
698	/* Failed multibyte to multibyte. Just copy the default fail char and
699	try again. */
700	outbuf[0] = lp_failed_convert_char();
701
702	inbuf++;
703	i_len--;
704
705	outbuf++;
706	o_len--;
707
708	if (o_len == 0 \|\| i_len == 0)
709	goto out;
710
711	/* Keep trying with the next char... */
712	goto again;
713
714	} else {
715	/* Keep compiler happy.... */
716	goto out;
717	}
718	}
719	}
720
721	/**
722	* Convert between character sets, allocating a new buffer using talloc for the result.
723	*
724	* @param srclen length of source buffer.
725	* @param dest always set at least to NULL
726	* @note -1 is not accepted for srclen.
727	*
728	* @returns Size in bytes of the converted string; or -1 in case of error.
729	**/
730	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
731	void const src, size_t srclen, void dst,
732	BOOL allow_bad_conv)
733	{
734	void dest = (void )dst;
735	size_t dest_len;
736
737	*dest = NULL;
738	dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
739	if (dest_len == (size_t)-1)
740	return (size_t)-1;
741	if (*dest == NULL)
742	return (size_t)-1;
743	return dest_len;
744	}
745
746	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
747	{
748	size_t size;
749	smb_ucs2_t *buffer;
750
751	size = push_ucs2_allocate(&buffer, src);
752	if (size == (size_t)-1) {
753	smb_panic("failed to create UCS2 buffer");
754	}
755	if (!strupper_w(buffer) && (dest == src)) {
756	free(buffer);
757	return srclen;
758	}
759
760	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
761	free(buffer);
762	return size;
763	}
764
765	/**
766	strdup() a unix string to upper case.
767	Max size is pstring.
768	**/
769
770	char strdup_upper(const char s)
771	{
772	pstring out_buffer;
773	const unsigned char p = (const unsigned char )s;
774	unsigned char q = (unsigned char )out_buffer;
775
776	/* this is quite a common operation, so we want it to be
777	fast. We optimise for the ascii case, knowing that all our
778	supported multi-byte character sets are ascii-compatible
779	(ie. they match for the first 128 chars) */
780
781	while (1) {
782	if (*p & 0x80)
783	break;
784	q++ = toupper_ascii(p);
785	if (!*p)
786	break;
787	p++;
788	if (p - ( const unsigned char *)s >= sizeof(pstring))
789	break;
790	}
791
792	if (*p) {
793	/* MB case. */
794	size_t size;
795	wpstring buffer;
796	size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
797	if (size == (size_t)-1) {
798	return NULL;
799	}
800
801	strupper_w(buffer);
802
803	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
804	if (size == (size_t)-1) {
805	return NULL;
806	}
807	}
808
809	return SMB_STRDUP(out_buffer);
810	}
811
812	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
813	{
814	size_t size;
815	smb_ucs2_t *buffer = NULL;
816
817	size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
818	(void *)(void )&buffer, True);
819	if (size == (size_t)-1 \|\| !buffer) {
820	smb_panic("failed to create UCS2 buffer");
821	}
822	if (!strlower_w(buffer) && (dest == src)) {
823	SAFE_FREE(buffer);
824	return srclen;
825	}
826	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
827	SAFE_FREE(buffer);
828	return size;
829	}
830
831	/**
832	strdup() a unix string to lower case.
833	**/
834
835	char strdup_lower(const char s)
836	{
837	size_t size;
838	smb_ucs2_t *buffer = NULL;
839	char *out_buffer;
840
841	size = push_ucs2_allocate(&buffer, s);
842	if (size == -1 \|\| !buffer) {
843	return NULL;
844	}
845
846	strlower_w(buffer);
847
848	size = pull_ucs2_allocate(&out_buffer, buffer);
849	SAFE_FREE(buffer);
850
851	if (size == (size_t)-1) {
852	return NULL;
853	}
854
855	return out_buffer;
856	}
857
858	static size_t ucs2_align(const void base_ptr, const void p, int flags)
859	{
860	if (flags & (STR_NOALIGN\|STR_ASCII))
861	return 0;
862	return PTR_DIFF(p, base_ptr) & 1;
863	}
864
865
866	/**
867	* Copy a string from a char* unix src to a dos codepage string destination.
868	*
869	* @return the number of bytes occupied by the string in the destination.
870	*
871	* @param flags can include
872	* <dl>
873	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
874	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
875	* </dl>
876	*
877	* @param dest_len the maximum length in bytes allowed in the
878	* destination. If @p dest_len is -1 then no maximum is used.
879	**/
880	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
881	{
882	size_t src_len = strlen(src);
883	pstring tmpbuf;
884	size_t ret;
885
886	/* No longer allow a length of -1 */
887	if (dest_len == (size_t)-1)
888	smb_panic("push_ascii - dest_len == -1");
889
890	if (flags & STR_UPPER) {
891	pstrcpy(tmpbuf, src);
892	strupper_m(tmpbuf);
893	src = tmpbuf;
894	}
895
896	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
897	src_len++;
898
899	ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
900	if (ret == (size_t)-1 &&
901	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
902	&& dest_len > 0) {
903	((char *)dest)[0] = '\0';
904	}
905	return ret;
906	}
907
908	size_t push_ascii_fstring(void dest, const char src)
909	{
910	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
911	}
912
913	size_t push_ascii_pstring(void dest, const char src)
914	{
915	return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
916	}
917
918	/********************************************************************
919	Push an nstring - ensure null terminated. Written by
920	moriyama@miraclelinux.com (MORIYAMA Masayuki).
921	********************************************************************/
922
923	size_t push_ascii_nstring(void dest, const char src)
924	{
925	size_t i, buffer_len, dest_len;
926	smb_ucs2_t *buffer;
927
928	conv_silent = True;
929	buffer_len = push_ucs2_allocate(&buffer, src);
930	if (buffer_len == (size_t)-1) {
931	smb_panic("failed to create UCS2 buffer");
932	}
933
934	/* We're using buffer_len below to count ucs2 characters, not bytes. */
935	buffer_len /= sizeof(smb_ucs2_t);
936
937	dest_len = 0;
938	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
939	unsigned char mb[10];
940	/* Convert one smb_ucs2_t character at a time. */
941	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
942	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
943	memcpy((char *)dest + dest_len, mb, mb_len);
944	dest_len += mb_len;
945	} else {
946	errno = E2BIG;
947	break;
948	}
949	}
950	((char *)dest)[dest_len] = '\0';
951
952	SAFE_FREE(buffer);
953	conv_silent = False;
954	return dest_len;
955	}
956
957	/**
958	* Copy a string from a dos codepage source to a unix char* destination.
959	*
960	* The resulting string in "dest" is always null terminated.
961	*
962	* @param flags can have:
963	* <dl>
964	* <dt>STR_TERMINATE</dt>
965	* <dd>STR_TERMINATE means the string in @p src
966	* is null terminated, and src_len is ignored.</dd>
967	* </dl>
968	*
969	* @param src_len is the length of the source area in bytes.
970	* @returns the number of bytes occupied by the string in @p src.
971	**/
972	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
973	{
974	size_t ret;
975
976	if (dest_len == (size_t)-1)
977	dest_len = sizeof(pstring);
978
979	if (flags & STR_TERMINATE) {
980	if (src_len == (size_t)-1) {
981	src_len = strlen((const char *)src) + 1;
982	} else {
983	size_t len = strnlen((const char *)src, src_len);
984	if (len < src_len)
985	len++;
986	src_len = len;
987	}
988	}
989
990	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
991	if (ret == (size_t)-1) {
992	ret = 0;
993	dest_len = 0;
994	}
995
996	if (dest_len && ret) {
997	/* Did we already process the terminating zero ? */
998	if (dest[MIN(ret-1, dest_len-1)] != 0) {
999	dest[MIN(ret, dest_len-1)] = 0;
1000	}
1001	} else {
1002	dest[0] = 0;
1003	}
1004
1005	return src_len;
1006	}
1007
1008	size_t pull_ascii_pstring(char dest, const void src)
1009	{
1010	return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1011	}
1012
1013	size_t pull_ascii_fstring(char dest, const void src)
1014	{
1015	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1016	}
1017
1018	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1019
1020	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1021	{
1022	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1023	}
1024
1025	/**
1026	* Copy a string from a char* src to a unicode destination.
1027	*
1028	* @returns the number of bytes occupied by the string in the destination.
1029	*
1030	* @param flags can have:
1031	*
1032	* <dl>
1033	* <dt>STR_TERMINATE <dd>means include the null termination.
1034	* <dt>STR_UPPER <dd>means uppercase in the destination.
1035	* <dt>STR_NOALIGN <dd>means don't do alignment.
1036	* </dl>
1037	*
1038	* @param dest_len is the maximum length allowed in the
1039	* destination. If dest_len is -1 then no maxiumum is used.
1040	**/
1041
1042	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1043	{
1044	size_t len=0;
1045	size_t src_len;
1046	size_t ret;
1047
1048	/* treat a pstring as "unlimited" length */
1049	if (dest_len == (size_t)-1)
1050	dest_len = sizeof(pstring);
1051
1052	if (flags & STR_TERMINATE)
1053	src_len = (size_t)-1;
1054	else
1055	src_len = strlen(src);
1056
1057	if (ucs2_align(base_ptr, dest, flags)) {
1058	(char )dest = 0;
1059	dest = (void )((char )dest + 1);
1060	if (dest_len)
1061	dest_len--;
1062	len++;
1063	}
1064
1065	/* ucs2 is always a multiple of 2 bytes */
1066	dest_len &= ~1;
1067
1068	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1069	if (ret == (size_t)-1) {
1070	return 0;
1071	}
1072
1073	len += ret;
1074
1075	if (flags & STR_UPPER) {
1076	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1077	size_t i;
1078
1079	/* We check for i < (ret / 2) below as the dest string isn't null
1080	terminated if STR_TERMINATE isn't set. */
1081
1082	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1083	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1084	if (v != dest_ucs2[i]) {
1085	dest_ucs2[i] = v;
1086	}
1087	}
1088	}
1089
1090	return len;
1091	}
1092
1093
1094	/**
1095	* Copy a string from a unix char* src to a UCS2 destination,
1096	* allocating a buffer using talloc().
1097	*
1098	* @param dest always set at least to NULL
1099	*
1100	* @returns The number of bytes occupied by the string in the destination
1101	* or -1 in case of error.
1102	**/
1103	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1104	{
1105	size_t src_len = strlen(src)+1;
1106
1107	*dest = NULL;
1108	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1109	}
1110
1111
1112	/**
1113	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1114	*
1115	* @param dest always set at least to NULL
1116	*
1117	* @returns The number of bytes occupied by the string in the destination
1118	* or -1 in case of error.
1119	**/
1120
1121	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1122	{
1123	size_t src_len = strlen(src)+1;
1124
1125	*dest = NULL;
1126	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1127	}
1128
1129	/**
1130	Copy a string from a char* src to a UTF-8 destination.
1131	Return the number of bytes occupied by the string in the destination
1132	Flags can have:
1133	STR_TERMINATE means include the null termination
1134	STR_UPPER means uppercase in the destination
1135	dest_len is the maximum length allowed in the destination. If dest_len
1136	is -1 then no maxiumum is used.
1137	**/
1138
1139	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1140	{
1141	size_t src_len = strlen(src);
1142	pstring tmpbuf;
1143
1144	/* treat a pstring as "unlimited" length */
1145	if (dest_len == (size_t)-1)
1146	dest_len = sizeof(pstring);
1147
1148	if (flags & STR_UPPER) {
1149	pstrcpy(tmpbuf, src);
1150	strupper_m(tmpbuf);
1151	src = tmpbuf;
1152	}
1153
1154	if (flags & STR_TERMINATE)
1155	src_len++;
1156
1157	return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1158	}
1159
1160	size_t push_utf8_fstring(void dest, const char src)
1161	{
1162	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1163	}
1164
1165	/**
1166	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1167	*
1168	* @param dest always set at least to NULL
1169	*
1170	* @returns The number of bytes occupied by the string in the destination
1171	**/
1172
1173	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1174	{
1175	size_t src_len = strlen(src)+1;
1176
1177	*dest = NULL;
1178	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1179	}
1180
1181	/**
1182	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1183	*
1184	* @param dest always set at least to NULL
1185	*
1186	* @returns The number of bytes occupied by the string in the destination
1187	**/
1188
1189	size_t push_utf8_allocate(char *dest, const char src)
1190	{
1191	size_t src_len = strlen(src)+1;
1192
1193	*dest = NULL;
1194	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1195	}
1196
1197	/**
1198	Copy a string from a ucs2 source to a unix char* destination.
1199	Flags can have:
1200	STR_TERMINATE means the string in src is null terminated.
1201	STR_NOALIGN means don't try to align.
1202	if STR_TERMINATE is set then src_len is ignored if it is -1.
1203	src_len is the length of the source area in bytes
1204	Return the number of bytes occupied by the string in src.
1205	The resulting string in "dest" is always null terminated.
1206	**/
1207
1208	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1209	{
1210	size_t ret;
1211
1212	if (dest_len == (size_t)-1)
1213	dest_len = sizeof(pstring);
1214
1215	if (ucs2_align(base_ptr, src, flags)) {
1216	src = (const void )((const char )src + 1);
1217	if (src_len != (size_t)-1)
1218	src_len--;
1219	}
1220
1221	if (flags & STR_TERMINATE) {
1222	/* src_len -1 is the default for null terminated strings. */
1223	if (src_len != (size_t)-1) {
1224	size_t len = strnlen_w((const smb_ucs2_t *)src,
1225	src_len/2);
1226	if (len < src_len/2)
1227	len++;
1228	src_len = len*2;
1229	}
1230	}
1231
1232	/* ucs2 is always a multiple of 2 bytes */
1233	if (src_len != (size_t)-1)
1234	src_len &= ~1;
1235
1236	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1237	if (ret == (size_t)-1) {
1238	return 0;
1239	}
1240
1241	if (src_len == (size_t)-1)
1242	src_len = ret*2;
1243
1244	if (dest_len && ret) {
1245	/* Did we already process the terminating zero ? */
1246	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1247	dest[MIN(ret, dest_len-1)] = 0;
1248	}
1249	} else {
1250	dest[0] = 0;
1251	}
1252
1253	return src_len;
1254	}
1255
1256	size_t pull_ucs2_pstring(char dest, const void src)
1257	{
1258	return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1259	}
1260
1261	size_t pull_ucs2_fstring(char dest, const void src)
1262	{
1263	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1264	}
1265
1266	/**
1267	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1268	*
1269	* @param dest always set at least to NULL
1270	*
1271	* @returns The number of bytes occupied by the string in the destination
1272	**/
1273
1274	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1275	{
1276	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1277	*dest = NULL;
1278	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1279	}
1280
1281	/**
1282	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1283	*
1284	* @param dest always set at least to NULL
1285	*
1286	* @returns The number of bytes occupied by the string in the destination
1287	**/
1288
1289	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1290	{
1291	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1292	*dest = NULL;
1293	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1294	}
1295
1296	/**
1297	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1298	*
1299	* @param dest always set at least to NULL
1300	*
1301	* @returns The number of bytes occupied by the string in the destination
1302	**/
1303
1304	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1305	{
1306	size_t src_len = strlen(src)+1;
1307	*dest = NULL;
1308	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1309	}
1310
1311	/**
1312	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1313	*
1314	* @param dest always set at least to NULL
1315	*
1316	* @returns The number of bytes occupied by the string in the destination
1317	**/
1318
1319	size_t pull_utf8_allocate(char *dest, const char src)
1320	{
1321	size_t src_len = strlen(src)+1;
1322	*dest = NULL;
1323	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1324	}
1325
1326	/**
1327	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1328	*
1329	* @param dest always set at least to NULL
1330	*
1331	* @returns The number of bytes occupied by the string in the destination
1332	**/
1333
1334	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1335	{
1336	size_t src_len = strlen(src)+1;
1337	*dest = NULL;
1338	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1339	}
1340
1341	/**
1342	Copy a string from a char* src to a unicode or ascii
1343	dos codepage destination choosing unicode or ascii based on the
1344	flags in the SMB buffer starting at base_ptr.
1345	Return the number of bytes occupied by the string in the destination.
1346	flags can have:
1347	STR_TERMINATE means include the null termination.
1348	STR_UPPER means uppercase in the destination.
1349	STR_ASCII use ascii even with unicode packet.
1350	STR_NOALIGN means don't do alignment.
1351	dest_len is the maximum length allowed in the destination. If dest_len
1352	is -1 then no maxiumum is used.
1353	**/
1354
1355	size_t push_string_fn(const char function, unsigned int line, const void base_ptr, void dest, const char src, size_t dest_len, int flags)
1356	{
1357	#ifdef DEVELOPER
1358	/* We really need to zero fill here, not clobber
1359	* region, as we want to ensure that valgrind thinks
1360	* all of the outgoing buffer has been written to
1361	* so a send() or write() won't trap an error.
1362	* JRA.
1363	*/
1364	#if 0
1365	if (dest_len != (size_t)-1)
1366	clobber_region(function, line, dest, dest_len);
1367	#else
1368	if (dest_len != (size_t)-1)
1369	memset(dest, '\0', dest_len);
1370	#endif
1371	#endif
1372
1373	if (!(flags & STR_ASCII) && \
1374	((flags & STR_UNICODE \|\| \
1375	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1376	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1377	}
1378	return push_ascii(dest, src, dest_len, flags);
1379	}
1380
1381
1382	/**
1383	Copy a string from a unicode or ascii source (depending on
1384	the packet flags) to a char* destination.
1385	Flags can have:
1386	STR_TERMINATE means the string in src is null terminated.
1387	STR_UNICODE means to force as unicode.
1388	STR_ASCII use ascii even with unicode packet.
1389	STR_NOALIGN means don't do alignment.
1390	if STR_TERMINATE is set then src_len is ignored is it is -1
1391	src_len is the length of the source area in bytes.
1392	Return the number of bytes occupied by the string in src.
1393	The resulting string in "dest" is always null terminated.
1394	**/
1395
1396	size_t pull_string_fn(const char function, unsigned int line, const void base_ptr, char dest, const void src, size_t dest_len, size_t src_len, int flags)
1397	{
1398	#ifdef DEVELOPER
1399	if (dest_len != (size_t)-1)
1400	clobber_region(function, line, dest, dest_len);
1401	#endif
1402
1403	if (!(flags & STR_ASCII) && \
1404	((flags & STR_UNICODE \|\| \
1405	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1406	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1407	}
1408	return pull_ascii(dest, src, dest_len, src_len, flags);
1409	}
1410
1411	size_t align_string(const void base_ptr, const char p, int flags)
1412	{
1413	if (!(flags & STR_ASCII) && \
1414	((flags & STR_UNICODE \|\| \
1415	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1416	return ucs2_align(base_ptr, p, flags);
1417	}
1418	return 0;
1419	}
1420
1421	/*
1422	Return the unicode codepoint for the next multi-byte CH_UNIX character
1423	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1424
1425	Also return the number of bytes consumed (which tells the caller
1426	how many bytes to skip to get to the next CH_UNIX character).
1427
1428	Return INVALID_CODEPOINT if the next character cannot be converted.
1429	*/
1430
1431	codepoint_t next_codepoint(const char str, size_t size)
1432	{
1433	/* It cannot occupy more than 4 bytes in UTF16 format */
1434	uint8_t buf[4];
1435	smb_iconv_t descriptor;
1436	#ifdef __OS2__
1437	size_t ilen_max;
1438	size_t olen_orig;
1439	const char *inbuf;
1440	#endif
1441	size_t ilen_orig;
1442	size_t ilen;
1443	size_t olen;
1444
1445	char *outbuf;
1446
1447	#ifdef __OS2__
1448	*size = 1;
1449	#endif
1450
1451	if ((str[0] & 0x80) == 0) {
1452	#ifndef __OS2__
1453	*size = 1;
1454	#endif
1455	return (codepoint_t)str[0];
1456	}
1457
1458	lazy_initialize_conv();
1459
1460	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1461	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1462	#ifndef __OS2__
1463	*size = 1;
1464	#endif
1465	return INVALID_CODEPOINT;
1466	}
1467	#ifdef __OS2__
1468	/* We assume that no multi-byte character can take
1469	more than 5 bytes. This is OK as we only
1470	support codepoints up to 1M */
1471
1472	ilen_max = strnlen( str, 5 );
1473	#else
1474	*size = 1;
1475	#endif
1476	ilen_orig = 1;
1477	olen_orig = 2;
1478	while( 1 )
1479	{
1480	ilen = ilen_orig;
1481	olen = olen_orig;
1482	inbuf = str;
1483	outbuf = ( char * )buf;
1484	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1485	break;
1486
1487	switch( errno )
1488	{
1489	case E2BIG :
1490	if( olen_orig == 2 )
1491	olen_orig = 4;
1492	else
1493	return INVALID_CODEPOINT;
1494	break;
1495
1496
1497	case EINVAL :
1498	#ifndef __OS2__
1499	/* We assume that no multi-byte character can take
1500	more than 5 bytes. This is OK as we only
1501	support codepoints up to 1M */
1502	if( ilen_orig < 5 )
1503	#else
1504	if( ilen_orig < ilen_max )
1505	#endif
1506	ilen_orig++;
1507	else
1508	return INVALID_CODEPOINT;
1509	break;
1510
1511	case EILSEQ :
1512	default :
1513	return INVALID_CODEPOINT;
1514	}
1515	}
1516
1517	olen = olen_orig - olen;
1518
1519	*size = ilen_orig - ilen;
1520
1521	if (olen == 2) {
1522	/* 2 byte, UTF16-LE encoded value. */
1523	return (codepoint_t)SVAL(buf, 0);
1524	}
1525	if (olen == 4) {
1526	/* Decode a 4 byte UTF16-LE character manually.
1527	See RFC2871 for the encoding machanism.
1528	*/
1529	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1530	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1531
1532	return (codepoint_t)0x10000 +
1533	(w1 << 10) + w2;
1534	}
1535
1536	/* no other length is valid */
1537	return INVALID_CODEPOINT;
1538	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: