Context Navigation

source: branches/samba-3.0/source/lib/charcnv.c@ 170

Visit:

Last change on this file since 170 was 165, checked in by Paul Smedley, 16 years ago
Add 'missing' 3.0.34 diffs
File size: 38.5 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 2 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program; if not, write to the Free Software
21	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23	*/
24	#include "includes.h"
25
26	/* We can parameterize this if someone complains.... JRA. */
27
28	char lp_failed_convert_char(void)
29	{
30	return '_';
31	}
32
33	/**
34	* @file
35	*
36	* @brief Character-set conversion routines built on our iconv.
37	*
38	* @note Samba's internal character set (at least in the 3.0 series)
39	* is always the same as the one for the Unix filesystem. It is
40	* <b>not</b> necessarily UTF-8 and may be different on machines that
41	* need i18n filenames to be compatible with Unix software. It does
42	* have to be a superset of ASCII. All multibyte sequences must start
43	* with a byte with the high bit set.
44	*
45	* @sa lib/iconv.c
46	*/
47
48
49	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50	static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	static int initialized = False;
97
98	if (!initialized) {
99	initialized = True;
100	load_case_tables();
101	init_iconv();
102	}
103	}
104
105	/**
106	* Destroy global objects allocated by init_iconv()
107	**/
108	void gfree_charcnv(void)
109	{
110	int c1, c2;
111
112	for (c1=0;c1<NUM_CHARSETS;c1++) {
113	for (c2=0;c2<NUM_CHARSETS;c2++) {
114	if ( conv_handles[c1][c2] ) {
115	smb_iconv_close( conv_handles[c1][c2] );
116	conv_handles[c1][c2] = 0;
117	}
118	}
119	}
120	}
121
122	/**
123	* Initialize iconv conversion descriptors.
124	*
125	* This is called the first time it is needed, and also called again
126	* every time the configuration is reloaded, because the charset or
127	* codepage might have changed.
128	**/
129	void init_iconv(void)
130	{
131	int c1, c2;
132	BOOL did_reload = False;
133
134	/* so that charset_name() works we need to get the UNIX<->UCS2 going
135	first */
136	if (!conv_handles[CH_UNIX][CH_UTF16LE])
137	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
138
139	if (!conv_handles[CH_UTF16LE][CH_UNIX])
140	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
141
142	for (c1=0;c1<NUM_CHARSETS;c1++) {
143	for (c2=0;c2<NUM_CHARSETS;c2++) {
144	const char *n1 = charset_name((charset_t)c1);
145	const char *n2 = charset_name((charset_t)c2);
146	if (conv_handles[c1][c2] &&
147	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
148	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
149	continue;
150
151	did_reload = True;
152
153	if (conv_handles[c1][c2])
154	smb_iconv_close(conv_handles[c1][c2]);
155
156	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
157	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
158	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
159	charset_name((charset_t)c1), charset_name((charset_t)c2)));
160	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
161	n1 = "ASCII";
162	}
163	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
164	n2 = "ASCII";
165	}
166	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
167	n1, n2 ));
168	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
169	if (!conv_handles[c1][c2]) {
170	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
171	smb_panic("init_iconv: conv_handle initialization failed.");
172	}
173	}
174	}
175	}
176
177	if (did_reload) {
178	/* XXX: Does this really get called every time the dos
179	* codepage changes? */
180	/* XXX: Is the did_reload test too strict? */
181	conv_silent = True;
182	init_doschar_table();
183	init_valid_table();
184	conv_silent = False;
185	}
186	}
187
188	/**
189	* Convert string from one encoding to another, making error checking etc
190	* Slow path version - uses (slow) iconv.
191	*
192	* @param src pointer to source string (multibyte or singlebyte)
193	* @param srclen length of the source string in bytes
194	* @param dest pointer to destination string (multibyte or singlebyte)
195	* @param destlen maximal length allowed for string
196	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
197	* @returns the number of bytes occupied in the destination
198	*
199	* Ensure the srclen contains the terminating zero.
200	*
201	**/
202
203	static size_t convert_string_internal(charset_t from, charset_t to,
204	void const *src, size_t srclen,
205	void *dest, size_t destlen, BOOL allow_bad_conv)
206	{
207	size_t i_len, o_len;
208	size_t retval;
209	const char* inbuf = (const char*)src;
210	char* outbuf = (char*)dest;
211	smb_iconv_t descriptor;
212
213	lazy_initialize_conv();
214
215	descriptor = conv_handles[from][to];
216
217	if (srclen == (size_t)-1) {
218	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
219	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
220	} else {
221	srclen = strlen((const char *)src)+1;
222	}
223	}
224
225
226	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
227	if (!conv_silent)
228	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
229	return (size_t)-1;
230	}
231
232	i_len=srclen;
233	o_len=destlen;
234
235	again:
236
237	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
238	if(retval==(size_t)-1) {
239	const char *reason="unknown error";
240	switch(errno) {
241	case EINVAL:
242	reason="Incomplete multibyte sequence";
243	if (!conv_silent)
244	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
245	if (allow_bad_conv)
246	goto use_as_is;
247	break;
248	case E2BIG:
249	reason="No more room";
250	if (!conv_silent) {
251	if (from == CH_UNIX) {
252	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
253	charset_name(from), charset_name(to),
254	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
255	} else {
256	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
257	charset_name(from), charset_name(to),
258	(unsigned int)srclen, (unsigned int)destlen));
259	}
260	}
261	break;
262	case EILSEQ:
263	reason="Illegal multibyte sequence";
264	if (!conv_silent)
265	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
266	if (allow_bad_conv)
267	goto use_as_is;
268	break;
269	default:
270	if (!conv_silent)
271	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
272	break;
273	}
274	/* smb_panic(reason); */
275	}
276	return destlen-o_len;
277
278	use_as_is:
279
280	/*
281	* Conversion not supported. This is actually an error, but there are so
282	* many misconfigured iconv systems and smb.conf's out there we can't just
283	* fail. Do a very bad conversion instead.... JRA.
284	*/
285
286	{
287	if (o_len == 0 \|\| i_len == 0)
288	return destlen - o_len;
289
290	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
291	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
292	/* Can't convert from utf16 any endian to multibyte.
293	Replace with the default fail char.
294	*/
295	if (i_len < 2)
296	return destlen - o_len;
297	if (i_len >= 2) {
298	*outbuf = lp_failed_convert_char();
299
300	outbuf++;
301	o_len--;
302
303	inbuf += 2;
304	i_len -= 2;
305	}
306
307	if (o_len == 0 \|\| i_len == 0)
308	return destlen - o_len;
309
310	/* Keep trying with the next char... */
311	goto again;
312
313	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
314	/* Can't convert to UTF16LE - just widen by adding the
315	default fail char then zero.
316	*/
317	if (o_len < 2)
318	return destlen - o_len;
319
320	outbuf[0] = lp_failed_convert_char();
321	outbuf[1] = '\0';
322
323	inbuf++;
324	i_len--;
325
326	outbuf += 2;
327	o_len -= 2;
328
329	if (o_len == 0 \|\| i_len == 0)
330	return destlen - o_len;
331
332	/* Keep trying with the next char... */
333	goto again;
334
335	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
336	to != CH_UTF16LE && to != CH_UTF16BE) {
337	/* Failed multibyte to multibyte. Just copy the default fail char and
338	try again. */
339	outbuf[0] = lp_failed_convert_char();
340
341	inbuf++;
342	i_len--;
343
344	outbuf++;
345	o_len--;
346
347	if (o_len == 0 \|\| i_len == 0)
348	return destlen - o_len;
349
350	/* Keep trying with the next char... */
351	goto again;
352
353	} else {
354	/* Keep compiler happy.... */
355	return destlen - o_len;
356	}
357	}
358	}
359
360	/**
361	* Convert string from one encoding to another, making error checking etc
362	* Fast path version - handles ASCII first.
363	*
364	* @param src pointer to source string (multibyte or singlebyte)
365	* @param srclen length of the source string in bytes, or -1 for nul terminated.
366	* @param dest pointer to destination string (multibyte or singlebyte)
367	* @param destlen maximal length allowed for string - NEVER -1.
368	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
369	* @returns the number of bytes occupied in the destination
370	*
371	* Ensure the srclen contains the terminating zero.
372	*
373	* This function has been hand-tuned to provide a fast path.
374	* Don't change unless you really know what you are doing. JRA.
375	**/
376
377	size_t convert_string(charset_t from, charset_t to,
378	void const *src, size_t srclen,
379	void *dest, size_t destlen, BOOL allow_bad_conv)
380	{
381	/*
382	* NB. We deliberately don't do a strlen here if srclen == -1.
383	* This is very expensive over millions of calls and is taken
384	* care of in the slow path in convert_string_internal. JRA.
385	*/
386
387	#ifdef DEVELOPER
388	SMB_ASSERT(destlen != (size_t)-1);
389	#endif
390
391	if (srclen == 0)
392	return 0;
393
394	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
395	const unsigned char p = (const unsigned char )src;
396	unsigned char q = (unsigned char )dest;
397	size_t slen = srclen;
398	size_t dlen = destlen;
399	unsigned char lastp = '\0';
400	size_t retval = 0;
401
402	/* If all characters are ascii, fast path here. */
403	while (slen && dlen) {
404	if ((lastp = *p) <= 0x7f) {
405	q++ = p++;
406	if (slen != (size_t)-1) {
407	slen--;
408	}
409	dlen--;
410	retval++;
411	if (!lastp)
412	break;
413	} else {
414	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
415	goto general_case;
416	#else
417	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
418	#endif
419	}
420	}
421	if (!dlen) {
422	/* Even if we fast path we should note if we ran out of room. */
423	if (((slen != (size_t)-1) && slen) \|\|
424	((slen == (size_t)-1) && lastp)) {
425	errno = E2BIG;
426	}
427	}
428	return retval;
429	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
430	const unsigned char p = (const unsigned char )src;
431	unsigned char q = (unsigned char )dest;
432	size_t retval = 0;
433	size_t slen = srclen;
434	size_t dlen = destlen;
435	unsigned char lastp = '\0';
436
437	/* If all characters are ascii, fast path here. */
438	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
439	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
440	q++ = p;
441	if (slen != (size_t)-1) {
442	slen -= 2;
443	}
444	p += 2;
445	dlen--;
446	retval++;
447	if (!lastp)
448	break;
449	} else {
450	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
451	goto general_case;
452	#else
453	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
454	#endif
455	}
456	}
457	if (!dlen) {
458	/* Even if we fast path we should note if we ran out of room. */
459	if (((slen != (size_t)-1) && slen) \|\|
460	((slen == (size_t)-1) && lastp)) {
461	errno = E2BIG;
462	}
463	}
464	return retval;
465	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
466	const unsigned char p = (const unsigned char )src;
467	unsigned char q = (unsigned char )dest;
468	size_t retval = 0;
469	size_t slen = srclen;
470	size_t dlen = destlen;
471	unsigned char lastp = '\0';
472
473	/* If all characters are ascii, fast path here. */
474	while (slen && (dlen >= 2)) {
475	if ((lastp = *p) <= 0x7F) {
476	q++ = p++;
477	*q++ = '\0';
478	if (slen != (size_t)-1) {
479	slen--;
480	}
481	dlen -= 2;
482	retval += 2;
483	if (!lastp)
484	break;
485	} else {
486	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
487	goto general_case;
488	#else
489	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
490	#endif
491	}
492	}
493	if (!dlen) {
494	/* Even if we fast path we should note if we ran out of room. */
495	if (((slen != (size_t)-1) && slen) \|\|
496	((slen == (size_t)-1) && lastp)) {
497	errno = E2BIG;
498	}
499	}
500	return retval;
501	}
502
503	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
504	general_case:
505	#endif
506	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
507	}
508
509	/**
510	* Convert between character sets, allocating a new buffer for the result.
511	*
512	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
513	* @param srclen length of source buffer.
514	* @param dest always set at least to NULL
515	* @note -1 is not accepted for srclen.
516	*
517	* @returns Size in bytes of the converted string; or -1 in case of error.
518	*
519	* Ensure the srclen contains the terminating zero.
520	*
521	* I hate the goto's in this function. It's embarressing.....
522	* There has to be a cleaner way to do this. JRA.
523	**/
524
525	size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
526	void const src, size_t srclen, void dst, BOOL allow_bad_conv)
527	{
528	size_t i_len, o_len, destlen = (srclen * 3) / 2;
529	size_t retval;
530	const char inbuf = (const char )src;
531	char outbuf = NULL, ob = NULL;
532	smb_iconv_t descriptor;
533	void dest = (void )dst;
534
535	*dest = NULL;
536
537	if (src == NULL \|\| srclen == (size_t)-1)
538	return (size_t)-1;
539	if (srclen == 0)
540	return 0;
541
542	lazy_initialize_conv();
543
544	descriptor = conv_handles[from][to];
545
546	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
547	if (!conv_silent)
548	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
549	return (size_t)-1;
550	}
551
552	convert:
553
554	/* +2 is for ucs2 null termination. */
555	if ((destlen*2)+2 < destlen) {
556	/* wrapped ! abort. */
557	if (!conv_silent)
558	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
559	if (!ctx)
560	SAFE_FREE(outbuf);
561	return (size_t)-1;
562	} else {
563	destlen = destlen * 2;
564	}
565
566	/* +2 is for ucs2 null termination. */
567	if (ctx) {
568	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
569	} else {
570	ob = (char *)SMB_REALLOC(ob, destlen + 2);
571	}
572
573	if (!ob) {
574	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
575	return (size_t)-1;
576	}
577	outbuf = ob;
578	i_len = srclen;
579	o_len = destlen;
580
581	again:
582
583	retval = smb_iconv(descriptor,
584	&inbuf, &i_len,
585	&outbuf, &o_len);
586	if(retval == (size_t)-1) {
587	const char *reason="unknown error";
588	switch(errno) {
589	case EINVAL:
590	reason="Incomplete multibyte sequence";
591	if (!conv_silent)
592	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
593	if (allow_bad_conv)
594	goto use_as_is;
595	break;
596	case E2BIG:
597	goto convert;
598	case EILSEQ:
599	reason="Illegal multibyte sequence";
600	if (!conv_silent)
601	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
602	if (allow_bad_conv)
603	goto use_as_is;
604	break;
605	}
606	if (!conv_silent)
607	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
608	/* smb_panic(reason); */
609	return (size_t)-1;
610	}
611
612	out:
613
614	destlen = destlen - o_len;
615	if (ctx) {
616	/* We're shrinking here so we know the +2 is safe from wrap. */
617	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
618	} else {
619	ob = (char *)SMB_REALLOC(ob,destlen + 2);
620	}
621
622	if (destlen && !ob) {
623	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
624	return (size_t)-1;
625	}
626
627	*dest = ob;
628
629	/* Must ucs2 null terminate in the extra space we allocated. */
630	ob[destlen] = '\0';
631	ob[destlen+1] = '\0';
632
633	return destlen;
634
635	use_as_is:
636
637	/*
638	* Conversion not supported. This is actually an error, but there are so
639	* many misconfigured iconv systems and smb.conf's out there we can't just
640	* fail. Do a very bad conversion instead.... JRA.
641	*/
642
643	{
644	if (o_len == 0 \|\| i_len == 0)
645	goto out;
646
647	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
648	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
649	/* Can't convert from utf16 any endian to multibyte.
650	Replace with the default fail char.
651	*/
652
653	if (i_len < 2)
654	goto out;
655
656	if (i_len >= 2) {
657	*outbuf = lp_failed_convert_char();
658
659	outbuf++;
660	o_len--;
661
662	inbuf += 2;
663	i_len -= 2;
664	}
665
666	if (o_len == 0 \|\| i_len == 0)
667	goto out;
668
669	/* Keep trying with the next char... */
670	goto again;
671
672	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
673	/* Can't convert to UTF16LE - just widen by adding the
674	default fail char then zero.
675	*/
676	if (o_len < 2)
677	goto out;
678
679	outbuf[0] = lp_failed_convert_char();
680	outbuf[1] = '\0';
681
682	inbuf++;
683	i_len--;
684
685	outbuf += 2;
686	o_len -= 2;
687
688	if (o_len == 0 \|\| i_len == 0)
689	goto out;
690
691	/* Keep trying with the next char... */
692	goto again;
693
694	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
695	to != CH_UTF16LE && to != CH_UTF16BE) {
696	/* Failed multibyte to multibyte. Just copy the default fail char and
697	try again. */
698	outbuf[0] = lp_failed_convert_char();
699
700	inbuf++;
701	i_len--;
702
703	outbuf++;
704	o_len--;
705
706	if (o_len == 0 \|\| i_len == 0)
707	goto out;
708
709	/* Keep trying with the next char... */
710	goto again;
711
712	} else {
713	/* Keep compiler happy.... */
714	goto out;
715	}
716	}
717	}
718
719	/**
720	* Convert between character sets, allocating a new buffer using talloc for the result.
721	*
722	* @param srclen length of source buffer.
723	* @param dest always set at least to NULL
724	* @note -1 is not accepted for srclen.
725	*
726	* @returns Size in bytes of the converted string; or -1 in case of error.
727	**/
728	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
729	void const src, size_t srclen, void dst,
730	BOOL allow_bad_conv)
731	{
732	void dest = (void )dst;
733	size_t dest_len;
734
735	*dest = NULL;
736	dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
737	if (dest_len == (size_t)-1)
738	return (size_t)-1;
739	if (*dest == NULL)
740	return (size_t)-1;
741	return dest_len;
742	}
743
744	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
745	{
746	size_t size;
747	smb_ucs2_t *buffer;
748
749	size = push_ucs2_allocate(&buffer, src);
750	if (size == (size_t)-1) {
751	smb_panic("failed to create UCS2 buffer");
752	}
753	if (!strupper_w(buffer) && (dest == src)) {
754	free(buffer);
755	return srclen;
756	}
757
758	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
759	free(buffer);
760	return size;
761	}
762
763	/**
764	strdup() a unix string to upper case.
765	Max size is pstring.
766	**/
767
768	char strdup_upper(const char s)
769	{
770	pstring out_buffer;
771	const unsigned char p = (const unsigned char )s;
772	unsigned char q = (unsigned char )out_buffer;
773
774	/* this is quite a common operation, so we want it to be
775	fast. We optimise for the ascii case, knowing that all our
776	supported multi-byte character sets are ascii-compatible
777	(ie. they match for the first 128 chars) */
778
779	while (1) {
780	if (*p & 0x80)
781	break;
782	q++ = toupper_ascii(p);
783	if (!*p)
784	break;
785	p++;
786	if (p - ( const unsigned char *)s >= sizeof(pstring))
787	break;
788	}
789
790	if (*p) {
791	/* MB case. */
792	size_t size;
793	wpstring buffer;
794	size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
795	if (size == (size_t)-1) {
796	return NULL;
797	}
798
799	strupper_w(buffer);
800
801	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
802	if (size == (size_t)-1) {
803	return NULL;
804	}
805	}
806
807	return SMB_STRDUP(out_buffer);
808	}
809
810	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
811	{
812	size_t size;
813	smb_ucs2_t *buffer = NULL;
814
815	size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
816	(void *)(void )&buffer, True);
817	if (size == (size_t)-1 \|\| !buffer) {
818	smb_panic("failed to create UCS2 buffer");
819	}
820	if (!strlower_w(buffer) && (dest == src)) {
821	SAFE_FREE(buffer);
822	return srclen;
823	}
824	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
825	SAFE_FREE(buffer);
826	return size;
827	}
828
829	/**
830	strdup() a unix string to lower case.
831	**/
832
833	char strdup_lower(const char s)
834	{
835	size_t size;
836	smb_ucs2_t *buffer = NULL;
837	char *out_buffer;
838
839	size = push_ucs2_allocate(&buffer, s);
840	if (size == -1 \|\| !buffer) {
841	return NULL;
842	}
843
844	strlower_w(buffer);
845
846	size = pull_ucs2_allocate(&out_buffer, buffer);
847	SAFE_FREE(buffer);
848
849	if (size == (size_t)-1) {
850	return NULL;
851	}
852
853	return out_buffer;
854	}
855
856	static size_t ucs2_align(const void base_ptr, const void p, int flags)
857	{
858	if (flags & (STR_NOALIGN\|STR_ASCII))
859	return 0;
860	return PTR_DIFF(p, base_ptr) & 1;
861	}
862
863
864	/**
865	* Copy a string from a char* unix src to a dos codepage string destination.
866	*
867	* @return the number of bytes occupied by the string in the destination.
868	*
869	* @param flags can include
870	* <dl>
871	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
872	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
873	* </dl>
874	*
875	* @param dest_len the maximum length in bytes allowed in the
876	* destination. If @p dest_len is -1 then no maximum is used.
877	**/
878	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
879	{
880	size_t src_len = strlen(src);
881	pstring tmpbuf;
882	size_t ret;
883
884	/* No longer allow a length of -1 */
885	if (dest_len == (size_t)-1)
886	smb_panic("push_ascii - dest_len == -1");
887
888	if (flags & STR_UPPER) {
889	pstrcpy(tmpbuf, src);
890	strupper_m(tmpbuf);
891	src = tmpbuf;
892	}
893
894	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
895	src_len++;
896
897	ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
898	if (ret == (size_t)-1 &&
899	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
900	&& dest_len > 0) {
901	((char *)dest)[0] = '\0';
902	}
903	return ret;
904	}
905
906	size_t push_ascii_fstring(void dest, const char src)
907	{
908	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
909	}
910
911	size_t push_ascii_pstring(void dest, const char src)
912	{
913	return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
914	}
915
916	/********************************************************************
917	Push an nstring - ensure null terminated. Written by
918	moriyama@miraclelinux.com (MORIYAMA Masayuki).
919	********************************************************************/
920
921	size_t push_ascii_nstring(void dest, const char src)
922	{
923	size_t i, buffer_len, dest_len;
924	smb_ucs2_t *buffer;
925
926	conv_silent = True;
927	buffer_len = push_ucs2_allocate(&buffer, src);
928	if (buffer_len == (size_t)-1) {
929	smb_panic("failed to create UCS2 buffer");
930	}
931
932	/* We're using buffer_len below to count ucs2 characters, not bytes. */
933	buffer_len /= sizeof(smb_ucs2_t);
934
935	dest_len = 0;
936	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
937	unsigned char mb[10];
938	/* Convert one smb_ucs2_t character at a time. */
939	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
940	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
941	memcpy((char *)dest + dest_len, mb, mb_len);
942	dest_len += mb_len;
943	} else {
944	errno = E2BIG;
945	break;
946	}
947	}
948	((char *)dest)[dest_len] = '\0';
949
950	SAFE_FREE(buffer);
951	conv_silent = False;
952	return dest_len;
953	}
954
955	/**
956	* Copy a string from a dos codepage source to a unix char* destination.
957	*
958	* The resulting string in "dest" is always null terminated.
959	*
960	* @param flags can have:
961	* <dl>
962	* <dt>STR_TERMINATE</dt>
963	* <dd>STR_TERMINATE means the string in @p src
964	* is null terminated, and src_len is ignored.</dd>
965	* </dl>
966	*
967	* @param src_len is the length of the source area in bytes.
968	* @returns the number of bytes occupied by the string in @p src.
969	**/
970	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
971	{
972	size_t ret;
973
974	if (dest_len == (size_t)-1)
975	dest_len = sizeof(pstring);
976
977	if (flags & STR_TERMINATE) {
978	if (src_len == (size_t)-1) {
979	src_len = strlen((const char *)src) + 1;
980	} else {
981	size_t len = strnlen((const char *)src, src_len);
982	if (len < src_len)
983	len++;
984	src_len = len;
985	}
986	}
987
988	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
989	if (ret == (size_t)-1) {
990	ret = 0;
991	dest_len = 0;
992	}
993
994	if (dest_len && ret) {
995	/* Did we already process the terminating zero ? */
996	if (dest[MIN(ret-1, dest_len-1)] != 0) {
997	dest[MIN(ret, dest_len-1)] = 0;
998	}
999	} else {
1000	dest[0] = 0;
1001	}
1002
1003	return src_len;
1004	}
1005
1006	size_t pull_ascii_pstring(char dest, const void src)
1007	{
1008	return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1009	}
1010
1011	size_t pull_ascii_fstring(char dest, const void src)
1012	{
1013	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1014	}
1015
1016	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1017
1018	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1019	{
1020	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1021	}
1022
1023	/**
1024	* Copy a string from a char* src to a unicode destination.
1025	*
1026	* @returns the number of bytes occupied by the string in the destination.
1027	*
1028	* @param flags can have:
1029	*
1030	* <dl>
1031	* <dt>STR_TERMINATE <dd>means include the null termination.
1032	* <dt>STR_UPPER <dd>means uppercase in the destination.
1033	* <dt>STR_NOALIGN <dd>means don't do alignment.
1034	* </dl>
1035	*
1036	* @param dest_len is the maximum length allowed in the
1037	* destination. If dest_len is -1 then no maxiumum is used.
1038	**/
1039
1040	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1041	{
1042	size_t len=0;
1043	size_t src_len;
1044	size_t ret;
1045
1046	/* treat a pstring as "unlimited" length */
1047	if (dest_len == (size_t)-1)
1048	dest_len = sizeof(pstring);
1049
1050	if (flags & STR_TERMINATE)
1051	src_len = (size_t)-1;
1052	else
1053	src_len = strlen(src);
1054
1055	if (ucs2_align(base_ptr, dest, flags)) {
1056	(char )dest = 0;
1057	dest = (void )((char )dest + 1);
1058	if (dest_len)
1059	dest_len--;
1060	len++;
1061	}
1062
1063	/* ucs2 is always a multiple of 2 bytes */
1064	dest_len &= ~1;
1065
1066	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1067	if (ret == (size_t)-1) {
1068	return 0;
1069	}
1070
1071	len += ret;
1072
1073	if (flags & STR_UPPER) {
1074	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1075	size_t i;
1076
1077	/* We check for i < (ret / 2) below as the dest string isn't null
1078	terminated if STR_TERMINATE isn't set. */
1079
1080	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1081	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1082	if (v != dest_ucs2[i]) {
1083	dest_ucs2[i] = v;
1084	}
1085	}
1086	}
1087
1088	return len;
1089	}
1090
1091
1092	/**
1093	* Copy a string from a unix char* src to a UCS2 destination,
1094	* allocating a buffer using talloc().
1095	*
1096	* @param dest always set at least to NULL
1097	*
1098	* @returns The number of bytes occupied by the string in the destination
1099	* or -1 in case of error.
1100	**/
1101	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1102	{
1103	size_t src_len = strlen(src)+1;
1104
1105	*dest = NULL;
1106	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1107	}
1108
1109
1110	/**
1111	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1112	*
1113	* @param dest always set at least to NULL
1114	*
1115	* @returns The number of bytes occupied by the string in the destination
1116	* or -1 in case of error.
1117	**/
1118
1119	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1120	{
1121	size_t src_len = strlen(src)+1;
1122
1123	*dest = NULL;
1124	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1125	}
1126
1127	/**
1128	Copy a string from a char* src to a UTF-8 destination.
1129	Return the number of bytes occupied by the string in the destination
1130	Flags can have:
1131	STR_TERMINATE means include the null termination
1132	STR_UPPER means uppercase in the destination
1133	dest_len is the maximum length allowed in the destination. If dest_len
1134	is -1 then no maxiumum is used.
1135	**/
1136
1137	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1138	{
1139	size_t src_len = strlen(src);
1140	pstring tmpbuf;
1141
1142	/* treat a pstring as "unlimited" length */
1143	if (dest_len == (size_t)-1)
1144	dest_len = sizeof(pstring);
1145
1146	if (flags & STR_UPPER) {
1147	pstrcpy(tmpbuf, src);
1148	strupper_m(tmpbuf);
1149	src = tmpbuf;
1150	}
1151
1152	if (flags & STR_TERMINATE)
1153	src_len++;
1154
1155	return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1156	}
1157
1158	size_t push_utf8_fstring(void dest, const char src)
1159	{
1160	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1161	}
1162
1163	/**
1164	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1165	*
1166	* @param dest always set at least to NULL
1167	*
1168	* @returns The number of bytes occupied by the string in the destination
1169	**/
1170
1171	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1172	{
1173	size_t src_len = strlen(src)+1;
1174
1175	*dest = NULL;
1176	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1177	}
1178
1179	/**
1180	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1181	*
1182	* @param dest always set at least to NULL
1183	*
1184	* @returns The number of bytes occupied by the string in the destination
1185	**/
1186
1187	size_t push_utf8_allocate(char *dest, const char src)
1188	{
1189	size_t src_len = strlen(src)+1;
1190
1191	*dest = NULL;
1192	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1193	}
1194
1195	/**
1196	Copy a string from a ucs2 source to a unix char* destination.
1197	Flags can have:
1198	STR_TERMINATE means the string in src is null terminated.
1199	STR_NOALIGN means don't try to align.
1200	if STR_TERMINATE is set then src_len is ignored if it is -1.
1201	src_len is the length of the source area in bytes
1202	Return the number of bytes occupied by the string in src.
1203	The resulting string in "dest" is always null terminated.
1204	**/
1205
1206	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1207	{
1208	size_t ret;
1209
1210	if (dest_len == (size_t)-1)
1211	dest_len = sizeof(pstring);
1212
1213	if (ucs2_align(base_ptr, src, flags)) {
1214	src = (const void )((const char )src + 1);
1215	if (src_len != (size_t)-1)
1216	src_len--;
1217	}
1218
1219	if (flags & STR_TERMINATE) {
1220	/* src_len -1 is the default for null terminated strings. */
1221	if (src_len != (size_t)-1) {
1222	size_t len = strnlen_w((const smb_ucs2_t *)src,
1223	src_len/2);
1224	if (len < src_len/2)
1225	len++;
1226	src_len = len*2;
1227	}
1228	}
1229
1230	/* ucs2 is always a multiple of 2 bytes */
1231	if (src_len != (size_t)-1)
1232	src_len &= ~1;
1233
1234	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1235	if (ret == (size_t)-1) {
1236	return 0;
1237	}
1238
1239	if (src_len == (size_t)-1)
1240	src_len = ret*2;
1241
1242	if (dest_len && ret) {
1243	/* Did we already process the terminating zero ? */
1244	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1245	dest[MIN(ret, dest_len-1)] = 0;
1246	}
1247	} else {
1248	dest[0] = 0;
1249	}
1250
1251	return src_len;
1252	}
1253
1254	size_t pull_ucs2_pstring(char dest, const void src)
1255	{
1256	return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1257	}
1258
1259	size_t pull_ucs2_fstring(char dest, const void src)
1260	{
1261	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1262	}
1263
1264	/**
1265	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1266	*
1267	* @param dest always set at least to NULL
1268	*
1269	* @returns The number of bytes occupied by the string in the destination
1270	**/
1271
1272	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1273	{
1274	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1275	*dest = NULL;
1276	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1277	}
1278
1279	/**
1280	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1281	*
1282	* @param dest always set at least to NULL
1283	*
1284	* @returns The number of bytes occupied by the string in the destination
1285	**/
1286
1287	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1288	{
1289	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1290	*dest = NULL;
1291	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1292	}
1293
1294	/**
1295	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1296	*
1297	* @param dest always set at least to NULL
1298	*
1299	* @returns The number of bytes occupied by the string in the destination
1300	**/
1301
1302	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1303	{
1304	size_t src_len = strlen(src)+1;
1305	*dest = NULL;
1306	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1307	}
1308
1309	/**
1310	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1311	*
1312	* @param dest always set at least to NULL
1313	*
1314	* @returns The number of bytes occupied by the string in the destination
1315	**/
1316
1317	size_t pull_utf8_allocate(char *dest, const char src)
1318	{
1319	size_t src_len = strlen(src)+1;
1320	*dest = NULL;
1321	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1322	}
1323
1324	/**
1325	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1326	*
1327	* @param dest always set at least to NULL
1328	*
1329	* @returns The number of bytes occupied by the string in the destination
1330	**/
1331
1332	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1333	{
1334	size_t src_len = strlen(src)+1;
1335	*dest = NULL;
1336	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1337	}
1338
1339	/**
1340	Copy a string from a char* src to a unicode or ascii
1341	dos codepage destination choosing unicode or ascii based on the
1342	flags in the SMB buffer starting at base_ptr.
1343	Return the number of bytes occupied by the string in the destination.
1344	flags can have:
1345	STR_TERMINATE means include the null termination.
1346	STR_UPPER means uppercase in the destination.
1347	STR_ASCII use ascii even with unicode packet.
1348	STR_NOALIGN means don't do alignment.
1349	dest_len is the maximum length allowed in the destination. If dest_len
1350	is -1 then no maxiumum is used.
1351	**/
1352
1353	size_t push_string_fn(const char function, unsigned int line, const void base_ptr, void dest, const char src, size_t dest_len, int flags)
1354	{
1355	#ifdef DEVELOPER
1356	/* We really need to zero fill here, not clobber
1357	* region, as we want to ensure that valgrind thinks
1358	* all of the outgoing buffer has been written to
1359	* so a send() or write() won't trap an error.
1360	* JRA.
1361	*/
1362	#if 0
1363	if (dest_len != (size_t)-1)
1364	clobber_region(function, line, dest, dest_len);
1365	#else
1366	if (dest_len != (size_t)-1)
1367	memset(dest, '\0', dest_len);
1368	#endif
1369	#endif
1370
1371	if (!(flags & STR_ASCII) && \
1372	((flags & STR_UNICODE \|\| \
1373	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1374	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1375	}
1376	return push_ascii(dest, src, dest_len, flags);
1377	}
1378
1379
1380	/**
1381	Copy a string from a unicode or ascii source (depending on
1382	the packet flags) to a char* destination.
1383	Flags can have:
1384	STR_TERMINATE means the string in src is null terminated.
1385	STR_UNICODE means to force as unicode.
1386	STR_ASCII use ascii even with unicode packet.
1387	STR_NOALIGN means don't do alignment.
1388	if STR_TERMINATE is set then src_len is ignored is it is -1
1389	src_len is the length of the source area in bytes.
1390	Return the number of bytes occupied by the string in src.
1391	The resulting string in "dest" is always null terminated.
1392	**/
1393
1394	size_t pull_string_fn(const char function, unsigned int line, const void base_ptr, char dest, const void src, size_t dest_len, size_t src_len, int flags)
1395	{
1396	#ifdef DEVELOPER
1397	if (dest_len != (size_t)-1)
1398	clobber_region(function, line, dest, dest_len);
1399	#endif
1400
1401	if (!(flags & STR_ASCII) && \
1402	((flags & STR_UNICODE \|\| \
1403	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1404	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1405	}
1406	return pull_ascii(dest, src, dest_len, src_len, flags);
1407	}
1408
1409	size_t align_string(const void base_ptr, const char p, int flags)
1410	{
1411	if (!(flags & STR_ASCII) && \
1412	((flags & STR_UNICODE \|\| \
1413	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1414	return ucs2_align(base_ptr, p, flags);
1415	}
1416	return 0;
1417	}
1418
1419	/*
1420	Return the unicode codepoint for the next multi-byte CH_UNIX character
1421	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1422
1423	Also return the number of bytes consumed (which tells the caller
1424	how many bytes to skip to get to the next CH_UNIX character).
1425
1426	Return INVALID_CODEPOINT if the next character cannot be converted.
1427	*/
1428
1429	codepoint_t next_codepoint(const char str, size_t size)
1430	{
1431	/* It cannot occupy more than 4 bytes in UTF16 format */
1432	uint8_t buf[4];
1433	smb_iconv_t descriptor;
1434	#ifdef __OS2__
1435	size_t ilen_max;
1436	size_t olen_orig;
1437	const char *inbuf;
1438	#endif
1439	size_t ilen_orig;
1440	size_t ilen;
1441	size_t olen;
1442
1443	char *outbuf;
1444
1445	#ifdef __OS2__
1446	*size = 1;
1447	#endif
1448
1449	if ((str[0] & 0x80) == 0) {
1450	#ifndef __OS2__
1451	*size = 1;
1452	#endif
1453	return (codepoint_t)str[0];
1454	}
1455
1456	lazy_initialize_conv();
1457
1458	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1459	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1460	#ifndef __OS2__
1461	*size = 1;
1462	#endif
1463	return INVALID_CODEPOINT;
1464	}
1465	#ifdef __OS2__
1466	/* We assume that no multi-byte character can take
1467	more than 5 bytes. This is OK as we only
1468	support codepoints up to 1M */
1469
1470	ilen_max = strnlen( str, 5 );
1471	#else
1472	*size = 1;
1473	#endif
1474	ilen_orig = 1;
1475	olen_orig = 2;
1476	while( 1 )
1477	{
1478	ilen = ilen_orig;
1479	olen = olen_orig;
1480	inbuf = str;
1481	outbuf = ( char * )buf;
1482	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1483	break;
1484
1485	switch( errno )
1486	{
1487	case E2BIG :
1488	if( olen_orig == 2 )
1489	olen_orig = 4;
1490	else
1491	return INVALID_CODEPOINT;
1492	break;
1493
1494
1495	case EINVAL :
1496	#ifndef __OS2__
1497	/* We assume that no multi-byte character can take
1498	more than 5 bytes. This is OK as we only
1499	support codepoints up to 1M */
1500	if( ilen_orig < 5 )
1501	#else
1502	if( ilen_orig < ilen_max )
1503	#endif
1504	ilen_orig++;
1505	else
1506	return INVALID_CODEPOINT;
1507	break;
1508
1509	case EILSEQ :
1510	default :
1511	return INVALID_CODEPOINT;
1512	}
1513	}
1514
1515	olen = olen_orig - olen;
1516
1517	*size = ilen_orig - ilen;
1518
1519	if (olen == 2) {
1520	/* 2 byte, UTF16-LE encoded value. */
1521	return (codepoint_t)SVAL(buf, 0);
1522	}
1523	if (olen == 4) {
1524	/* Decode a 4 byte UTF16-LE character manually.
1525	See RFC2871 for the encoding machanism.
1526	*/
1527	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1528	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1529
1530	return (codepoint_t)0x10000 +
1531	(w1 << 10) + w2;
1532	}
1533
1534	/* no other length is valid */
1535	return INVALID_CODEPOINT;
1536	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: