Context Navigation

source: branches/samba-3.0/source/lib/charcnv.c@ 120

Visit:

Last change on this file since 120 was 120, checked in by Paul Smedley, 17 years ago
Changes from KO to address Ticket #20
File size: 45.1 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 2 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program; if not, write to the Free Software
21	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23	*/
24	#include "includes.h"
25
26	/* We can parameterize this if someone complains.... JRA. */
27
28	char lp_failed_convert_char(void)
29	{
30	return '_';
31	}
32
33	/**
34	* @file
35	*
36	* @brief Character-set conversion routines built on our iconv.
37	*
38	* @note Samba's internal character set (at least in the 3.0 series)
39	* is always the same as the one for the Unix filesystem. It is
40	* <b>not</b> necessarily UTF-8 and may be different on machines that
41	* need i18n filenames to be compatible with Unix software. It does
42	* have to be a superset of ASCII. All multibyte sequences must start
43	* with a byte with the high bit set.
44	*
45	* @sa lib/iconv.c
46	*/
47
48
49	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50	static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	else if (ch == CH_UNIX) ret = lp_unix_charset();
62	else if (ch == CH_DOS) ret = lp_dos_charset();
63	else if (ch == CH_DISPLAY) ret = lp_display_charset();
64	else if (ch == CH_UTF8) ret = "UTF8";
65
66	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67	if (ret && !strcmp(ret, "LOCALE")) {
68	const char *ln = NULL;
69
70	#ifdef HAVE_SETLOCALE
71	setlocale(LC_ALL, "");
72	#endif
73	ln = nl_langinfo(CODESET);
74	if (ln) {
75	/* Check whether the charset name is supported
76	by iconv */
77	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78	if (handle == (smb_iconv_t) -1) {
79	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80	ln = NULL;
81	} else {
82	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83	smb_iconv_close(handle);
84	}
85	}
86	ret = ln;
87	}
88	#endif
89
90	if (!ret \|\| !*ret) ret = "ASCII";
91	return ret;
92	}
93
94	void lazy_initialize_conv(void)
95	{
96	static int initialized = False;
97
98	if (!initialized) {
99	initialized = True;
100	load_case_tables();
101	init_iconv();
102	}
103	}
104
105	/**
106	* Destroy global objects allocated by init_iconv()
107	**/
108	void gfree_charcnv(void)
109	{
110	int c1, c2;
111
112	for (c1=0;c1<NUM_CHARSETS;c1++) {
113	for (c2=0;c2<NUM_CHARSETS;c2++) {
114	if ( conv_handles[c1][c2] ) {
115	smb_iconv_close( conv_handles[c1][c2] );
116	conv_handles[c1][c2] = 0;
117	}
118	}
119	}
120	}
121
122	/**
123	* Initialize iconv conversion descriptors.
124	*
125	* This is called the first time it is needed, and also called again
126	* every time the configuration is reloaded, because the charset or
127	* codepage might have changed.
128	**/
129	void init_iconv(void)
130	{
131	int c1, c2;
132	BOOL did_reload = False;
133
134	/* so that charset_name() works we need to get the UNIX<->UCS2 going
135	first */
136	if (!conv_handles[CH_UNIX][CH_UTF16LE])
137	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
138
139	if (!conv_handles[CH_UTF16LE][CH_UNIX])
140	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
141
142	for (c1=0;c1<NUM_CHARSETS;c1++) {
143	for (c2=0;c2<NUM_CHARSETS;c2++) {
144	const char *n1 = charset_name((charset_t)c1);
145	const char *n2 = charset_name((charset_t)c2);
146	if (conv_handles[c1][c2] &&
147	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
148	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
149	continue;
150
151	did_reload = True;
152
153	if (conv_handles[c1][c2])
154	smb_iconv_close(conv_handles[c1][c2]);
155
156	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
157	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
158	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
159	charset_name((charset_t)c1), charset_name((charset_t)c2)));
160	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
161	n1 = "ASCII";
162	}
163	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
164	n2 = "ASCII";
165	}
166	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
167	n1, n2 ));
168	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
169	if (!conv_handles[c1][c2]) {
170	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
171	smb_panic("init_iconv: conv_handle initialization failed.");
172	}
173	}
174	}
175	}
176
177	if (did_reload) {
178	/* XXX: Does this really get called every time the dos
179	* codepage changes? */
180	/* XXX: Is the did_reload test too strict? */
181	conv_silent = True;
182	init_doschar_table();
183	init_valid_table();
184	conv_silent = False;
185	}
186	}
187
188	/**
189	* Convert string from one encoding to another, making error checking etc
190	* Slow path version - uses (slow) iconv.
191	*
192	* @param src pointer to source string (multibyte or singlebyte)
193	* @param srclen length of the source string in bytes
194	* @param dest pointer to destination string (multibyte or singlebyte)
195	* @param destlen maximal length allowed for string
196	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
197	* @returns the number of bytes occupied in the destination
198	*
199	* Ensure the srclen contains the terminating zero.
200	*
201	**/
202
203	static size_t convert_string_internal(charset_t from, charset_t to,
204	void const *src, size_t srclen,
205	void *dest, size_t destlen, BOOL allow_bad_conv)
206	{
207	size_t i_len, o_len;
208	size_t retval;
209	const char* inbuf = (const char*)src;
210	char* outbuf = (char*)dest;
211	smb_iconv_t descriptor;
212
213	lazy_initialize_conv();
214
215	descriptor = conv_handles[from][to];
216
217	if (srclen == (size_t)-1) {
218	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
219	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
220	} else {
221	srclen = strlen((const char *)src)+1;
222	}
223	}
224
225
226	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
227	if (!conv_silent)
228	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
229	return (size_t)-1;
230	}
231
232	i_len=srclen;
233	o_len=destlen;
234
235	again:
236
237	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
238	if(retval==(size_t)-1) {
239	const char *reason="unknown error";
240	switch(errno) {
241	case EINVAL:
242	reason="Incomplete multibyte sequence";
243	if (!conv_silent)
244	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
245	if (allow_bad_conv)
246	goto use_as_is;
247	break;
248	case E2BIG:
249	reason="No more room";
250	if (!conv_silent) {
251	if (from == CH_UNIX) {
252	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
253	charset_name(from), charset_name(to),
254	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
255	} else {
256	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
257	charset_name(from), charset_name(to),
258	(unsigned int)srclen, (unsigned int)destlen));
259	}
260	}
261	break;
262	case EILSEQ:
263	reason="Illegal multibyte sequence";
264	if (!conv_silent)
265	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
266	if (allow_bad_conv)
267	goto use_as_is;
268	break;
269	default:
270	if (!conv_silent)
271	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
272	break;
273	}
274	/* smb_panic(reason); */
275	}
276	return destlen-o_len;
277
278	use_as_is:
279
280	/*
281	* Conversion not supported. This is actually an error, but there are so
282	* many misconfigured iconv systems and smb.conf's out there we can't just
283	* fail. Do a very bad conversion instead.... JRA.
284	*/
285
286	{
287	if (o_len == 0 \|\| i_len == 0)
288	return destlen - o_len;
289
290	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
291	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
292	/* Can't convert from utf16 any endian to multibyte.
293	Replace with the default fail char.
294	*/
295	if (i_len < 2)
296	return destlen - o_len;
297	if (i_len >= 2) {
298	*outbuf = lp_failed_convert_char();
299
300	outbuf++;
301	o_len--;
302
303	inbuf += 2;
304	i_len -= 2;
305	}
306
307	if (o_len == 0 \|\| i_len == 0)
308	return destlen - o_len;
309
310	/* Keep trying with the next char... */
311	goto again;
312
313	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
314	/* Can't convert to UTF16LE - just widen by adding the
315	default fail char then zero.
316	*/
317	if (o_len < 2)
318	return destlen - o_len;
319
320	outbuf[0] = lp_failed_convert_char();
321	outbuf[1] = '\0';
322
323	inbuf++;
324	i_len--;
325
326	outbuf += 2;
327	o_len -= 2;
328
329	if (o_len == 0 \|\| i_len == 0)
330	return destlen - o_len;
331
332	/* Keep trying with the next char... */
333	goto again;
334
335	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
336	to != CH_UTF16LE && to != CH_UTF16BE) {
337	/* Failed multibyte to multibyte. Just copy the default fail char and
338	try again. */
339	outbuf[0] = lp_failed_convert_char();
340
341	inbuf++;
342	i_len--;
343
344	outbuf++;
345	o_len--;
346
347	if (o_len == 0 \|\| i_len == 0)
348	return destlen - o_len;
349
350	/* Keep trying with the next char... */
351	goto again;
352
353	} else {
354	/* Keep compiler happy.... */
355	return destlen - o_len;
356	}
357	}
358	}
359
360	/**
361	* Convert string from one encoding to another, making error checking etc
362	* Fast path version - handles ASCII first.
363	*
364	* @param src pointer to source string (multibyte or singlebyte)
365	* @param srclen length of the source string in bytes, or -1 for nul terminated.
366	* @param dest pointer to destination string (multibyte or singlebyte)
367	* @param destlen maximal length allowed for string - NEVER -1.
368	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
369	* @returns the number of bytes occupied in the destination
370	*
371	* Ensure the srclen contains the terminating zero.
372	*
373	* This function has been hand-tuned to provide a fast path.
374	* Don't change unless you really know what you are doing. JRA.
375	**/
376
377	size_t convert_string(charset_t from, charset_t to,
378	void const *src, size_t srclen,
379	void *dest, size_t destlen, BOOL allow_bad_conv)
380	{
381	/*
382	* NB. We deliberately don't do a strlen here if srclen == -1.
383	* This is very expensive over millions of calls and is taken
384	* care of in the slow path in convert_string_internal. JRA.
385	*/
386
387	#ifdef DEVELOPER
388	SMB_ASSERT(destlen != (size_t)-1);
389	#endif
390
391	if (srclen == 0)
392	return 0;
393
394	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
395	const unsigned char p = (const unsigned char )src;
396	unsigned char q = (unsigned char )dest;
397	size_t slen = srclen;
398	size_t dlen = destlen;
399	unsigned char lastp = '\0';
400	size_t retval = 0;
401
402	/* If all characters are ascii, fast path here. */
403	while (slen && dlen) {
404	if ((lastp = *p) <= 0x7f) {
405	q++ = p++;
406	if (slen != (size_t)-1) {
407	slen--;
408	}
409	dlen--;
410	retval++;
411	if (!lastp)
412	break;
413	} else {
414	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
415	goto general_case;
416	#else
417	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
418	#endif
419	}
420	}
421	if (!dlen) {
422	/* Even if we fast path we should note if we ran out of room. */
423	if (((slen != (size_t)-1) && slen) \|\|
424	((slen == (size_t)-1) && lastp)) {
425	errno = E2BIG;
426	}
427	}
428	return retval;
429	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
430	const unsigned char p = (const unsigned char )src;
431	unsigned char q = (unsigned char )dest;
432	size_t retval = 0;
433	size_t slen = srclen;
434	size_t dlen = destlen;
435	unsigned char lastp = '\0';
436
437	/* If all characters are ascii, fast path here. */
438	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
439	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
440	q++ = p;
441	if (slen != (size_t)-1) {
442	slen -= 2;
443	}
444	p += 2;
445	dlen--;
446	retval++;
447	if (!lastp)
448	break;
449	} else {
450	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
451	goto general_case;
452	#else
453	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
454	#endif
455	}
456	}
457	if (!dlen) {
458	/* Even if we fast path we should note if we ran out of room. */
459	if (((slen != (size_t)-1) && slen) \|\|
460	((slen == (size_t)-1) && lastp)) {
461	errno = E2BIG;
462	}
463	}
464	return retval;
465	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
466	const unsigned char p = (const unsigned char )src;
467	unsigned char q = (unsigned char )dest;
468	size_t retval = 0;
469	size_t slen = srclen;
470	size_t dlen = destlen;
471	unsigned char lastp = '\0';
472
473	/* If all characters are ascii, fast path here. */
474	while (slen && (dlen >= 2)) {
475	if ((lastp = *p) <= 0x7F) {
476	q++ = p++;
477	*q++ = '\0';
478	if (slen != (size_t)-1) {
479	slen--;
480	}
481	dlen -= 2;
482	retval += 2;
483	if (!lastp)
484	break;
485	} else {
486	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
487	goto general_case;
488	#else
489	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
490	#endif
491	}
492	}
493	if (!dlen) {
494	/* Even if we fast path we should note if we ran out of room. */
495	if (((slen != (size_t)-1) && slen) \|\|
496	((slen == (size_t)-1) && lastp)) {
497	errno = E2BIG;
498	}
499	}
500	return retval;
501	}
502
503	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
504	general_case:
505	#endif
506	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
507	}
508
509	/**
510	* Convert between character sets, allocating a new buffer for the result.
511	*
512	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
513	* @param srclen length of source buffer.
514	* @param dest always set at least to NULL
515	* @note -1 is not accepted for srclen.
516	*
517	* @returns Size in bytes of the converted string; or -1 in case of error.
518	*
519	* Ensure the srclen contains the terminating zero.
520	*
521	* I hate the goto's in this function. It's embarressing.....
522	* There has to be a cleaner way to do this. JRA.
523	**/
524
525	size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
526	void const src, size_t srclen, void dst, BOOL allow_bad_conv)
527	{
528	size_t i_len, o_len, destlen = MAX(srclen, 512);
529	size_t retval;
530	const char inbuf = (const char )src;
531	char outbuf = NULL, ob = NULL;
532	smb_iconv_t descriptor;
533	void dest = (void )dst;
534
535	*dest = NULL;
536
537	if (src == NULL \|\| srclen == (size_t)-1)
538	return (size_t)-1;
539	if (srclen == 0)
540	return 0;
541
542	lazy_initialize_conv();
543
544	descriptor = conv_handles[from][to];
545
546	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
547	if (!conv_silent)
548	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
549	return (size_t)-1;
550	}
551
552	convert:
553
554	if ((destlen*2) < destlen) {
555	/* wrapped ! abort. */
556	if (!conv_silent)
557	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
558	if (!ctx)
559	SAFE_FREE(outbuf);
560	return (size_t)-1;
561	} else {
562	destlen = destlen * 2;
563	}
564
565	if (ctx) {
566	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
567	} else {
568	ob = (char *)SMB_REALLOC(ob, destlen);
569	}
570
571	if (!ob) {
572	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
573	return (size_t)-1;
574	}
575	outbuf = ob;
576	i_len = srclen;
577	o_len = destlen;
578
579	again:
580
581	retval = smb_iconv(descriptor,
582	&inbuf, &i_len,
583	&outbuf, &o_len);
584	if(retval == (size_t)-1) {
585	const char *reason="unknown error";
586	switch(errno) {
587	case EINVAL:
588	reason="Incomplete multibyte sequence";
589	if (!conv_silent)
590	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
591	if (allow_bad_conv)
592	goto use_as_is;
593	break;
594	case E2BIG:
595	goto convert;
596	case EILSEQ:
597	reason="Illegal multibyte sequence";
598	if (!conv_silent)
599	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
600	if (allow_bad_conv)
601	goto use_as_is;
602	break;
603	}
604	if (!conv_silent)
605	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
606	/* smb_panic(reason); */
607	return (size_t)-1;
608	}
609
610	out:
611
612	destlen = destlen - o_len;
613	if (ctx) {
614	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);
615	} else {
616	ob = (char *)SMB_REALLOC(ob,destlen);
617	}
618
619	if (destlen && !ob) {
620	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
621	return (size_t)-1;
622	}
623
624	*dest = ob;
625	return destlen;
626
627	use_as_is:
628
629	/*
630	* Conversion not supported. This is actually an error, but there are so
631	* many misconfigured iconv systems and smb.conf's out there we can't just
632	* fail. Do a very bad conversion instead.... JRA.
633	*/
634
635	{
636	if (o_len == 0 \|\| i_len == 0)
637	goto out;
638
639	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
640	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
641	/* Can't convert from utf16 any endian to multibyte.
642	Replace with the default fail char.
643	*/
644
645	if (i_len < 2)
646	goto out;
647
648	if (i_len >= 2) {
649	*outbuf = lp_failed_convert_char();
650
651	outbuf++;
652	o_len--;
653
654	inbuf += 2;
655	i_len -= 2;
656	}
657
658	if (o_len == 0 \|\| i_len == 0)
659	goto out;
660
661	/* Keep trying with the next char... */
662	goto again;
663
664	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
665	/* Can't convert to UTF16LE - just widen by adding the
666	default fail char then zero.
667	*/
668	if (o_len < 2)
669	goto out;
670
671	outbuf[0] = lp_failed_convert_char();
672	outbuf[1] = '\0';
673
674	inbuf++;
675	i_len--;
676
677	outbuf += 2;
678	o_len -= 2;
679
680	if (o_len == 0 \|\| i_len == 0)
681	goto out;
682
683	/* Keep trying with the next char... */
684	goto again;
685
686	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
687	to != CH_UTF16LE && to != CH_UTF16BE) {
688	/* Failed multibyte to multibyte. Just copy the default fail char and
689	try again. */
690	outbuf[0] = lp_failed_convert_char();
691
692	inbuf++;
693	i_len--;
694
695	outbuf++;
696	o_len--;
697
698	if (o_len == 0 \|\| i_len == 0)
699	goto out;
700
701	/* Keep trying with the next char... */
702	goto again;
703
704	} else {
705	/* Keep compiler happy.... */
706	goto out;
707	}
708	}
709	}
710
711	/**
712	* Convert between character sets, allocating a new buffer using talloc for the result.
713	*
714	* @param srclen length of source buffer.
715	* @param dest always set at least to NULL
716	* @note -1 is not accepted for srclen.
717	*
718	* @returns Size in bytes of the converted string; or -1 in case of error.
719	**/
720	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
721	void const src, size_t srclen, void dst,
722	BOOL allow_bad_conv)
723	{
724	void dest = (void )dst;
725	size_t dest_len;
726
727	*dest = NULL;
728	dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
729	if (dest_len == (size_t)-1)
730	return (size_t)-1;
731	if (*dest == NULL)
732	return (size_t)-1;
733	return dest_len;
734	}
735
736	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
737	{
738	size_t size;
739	smb_ucs2_t *buffer;
740
741	size = push_ucs2_allocate(&buffer, src);
742	if (size == (size_t)-1) {
743	smb_panic("failed to create UCS2 buffer");
744	}
745	if (!strupper_w(buffer) && (dest == src)) {
746	free(buffer);
747	return srclen;
748	}
749
750	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
751	free(buffer);
752	return size;
753	}
754
755	/**
756	strdup() a unix string to upper case.
757	Max size is pstring.
758	**/
759
760	char strdup_upper(const char s)
761	{
762	pstring out_buffer;
763	const unsigned char p = (const unsigned char )s;
764	unsigned char q = (unsigned char )out_buffer;
765
766	/* this is quite a common operation, so we want it to be
767	fast. We optimise for the ascii case, knowing that all our
768	supported multi-byte character sets are ascii-compatible
769	(ie. they match for the first 128 chars) */
770
771	while (1) {
772	if (*p & 0x80)
773	break;
774	q++ = toupper_ascii(p);
775	if (!*p)
776	break;
777	p++;
778	if (p - ( const unsigned char *)s >= sizeof(pstring))
779	break;
780	}
781
782	if (*p) {
783	/* MB case. */
784	size_t size;
785	wpstring buffer;
786	size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
787	if (size == (size_t)-1) {
788	return NULL;
789	}
790
791	strupper_w(buffer);
792
793	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
794	if (size == (size_t)-1) {
795	return NULL;
796	}
797	}
798
799	return SMB_STRDUP(out_buffer);
800	}
801
802	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
803	{
804	size_t size;
805	smb_ucs2_t *buffer = NULL;
806
807	size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
808	(void *)(void )&buffer, True);
809	if (size == (size_t)-1 \|\| !buffer) {
810	smb_panic("failed to create UCS2 buffer");
811	}
812	if (!strlower_w(buffer) && (dest == src)) {
813	SAFE_FREE(buffer);
814	return srclen;
815	}
816	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
817	SAFE_FREE(buffer);
818	return size;
819	}
820
821	/**
822	strdup() a unix string to lower case.
823	**/
824
825	char strdup_lower(const char s)
826	{
827	size_t size;
828	smb_ucs2_t *buffer = NULL;
829	char *out_buffer;
830
831	size = push_ucs2_allocate(&buffer, s);
832	if (size == -1 \|\| !buffer) {
833	return NULL;
834	}
835
836	strlower_w(buffer);
837
838	size = pull_ucs2_allocate(&out_buffer, buffer);
839	SAFE_FREE(buffer);
840
841	if (size == (size_t)-1) {
842	return NULL;
843	}
844
845	return out_buffer;
846	}
847
848	static size_t ucs2_align(const void base_ptr, const void p, int flags)
849	{
850	if (flags & (STR_NOALIGN\|STR_ASCII))
851	return 0;
852	return PTR_DIFF(p, base_ptr) & 1;
853	}
854
855
856	/**
857	* Copy a string from a char* unix src to a dos codepage string destination.
858	*
859	* @return the number of bytes occupied by the string in the destination.
860	*
861	* @param flags can include
862	* <dl>
863	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
864	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
865	* </dl>
866	*
867	* @param dest_len the maximum length in bytes allowed in the
868	* destination. If @p dest_len is -1 then no maximum is used.
869	**/
870	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
871	{
872	size_t src_len = strlen(src);
873	pstring tmpbuf;
874	size_t ret;
875
876	/* No longer allow a length of -1 */
877	if (dest_len == (size_t)-1)
878	smb_panic("push_ascii - dest_len == -1");
879
880	if (flags & STR_UPPER) {
881	pstrcpy(tmpbuf, src);
882	strupper_m(tmpbuf);
883	src = tmpbuf;
884	}
885
886	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
887	src_len++;
888
889	ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
890	if (ret == (size_t)-1 &&
891	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
892	&& dest_len > 0) {
893	((char *)dest)[0] = '\0';
894	}
895	return ret;
896
897	}
898
899	size_t push_ascii_fstring(void dest, const char src)
900	{
901	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
902	}
903
904	size_t push_ascii_pstring(void dest, const char src)
905	{
906	return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
907	}
908
909	/********************************************************************
910	Push an nstring - ensure null terminated. Written by
911	moriyama@miraclelinux.com (MORIYAMA Masayuki).
912	********************************************************************/
913
914	size_t push_ascii_nstring(void dest, const char src)
915	{
916	size_t i, buffer_len, dest_len;
917	smb_ucs2_t *buffer;
918
919	conv_silent = True;
920	buffer_len = push_ucs2_allocate(&buffer, src);
921	if (buffer_len == (size_t)-1) {
922	smb_panic("failed to create UCS2 buffer");
923	}
924
925	/* We're using buffer_len below to count ucs2 characters, not bytes. */
926	buffer_len /= sizeof(smb_ucs2_t);
927
928	dest_len = 0;
929	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
930	unsigned char mb[10];
931	/* Convert one smb_ucs2_t character at a time. */
932	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
933	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
934	memcpy((char *)dest + dest_len, mb, mb_len);
935	dest_len += mb_len;
936	} else {
937	errno = E2BIG;
938	break;
939	}
940	}
941	((char *)dest)[dest_len] = '\0';
942
943	SAFE_FREE(buffer);
944	conv_silent = False;
945	return dest_len;
946	}
947
948	/**
949	* Copy a string from a dos codepage source to a unix char* destination.
950	*
951	* The resulting string in "dest" is always null terminated.
952	*
953	* @param flags can have:
954	* <dl>
955	* <dt>STR_TERMINATE</dt>
956	* <dd>STR_TERMINATE means the string in @p src
957	* is null terminated, and src_len is ignored.</dd>
958	* </dl>
959	*
960	* @param src_len is the length of the source area in bytes.
961	* @returns the number of bytes occupied by the string in @p src.
962	**/
963	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
964	{
965	size_t ret;
966
967	if (dest_len == (size_t)-1)
968	dest_len = sizeof(pstring);
969
970	if (flags & STR_TERMINATE) {
971	if (src_len == (size_t)-1) {
972	src_len = strlen((const char *)src) + 1;
973	} else {
974	size_t len = strnlen((const char *)src, src_len);
975	if (len < src_len)
976	len++;
977	src_len = len;
978	}
979	}
980
981	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
982	if (ret == (size_t)-1) {
983	ret = 0;
984	dest_len = 0;
985	}
986
987	if (dest_len && ret) {
988	/* Did we already process the terminating zero ? */
989	if (dest[MIN(ret-1, dest_len-1)] != 0) {
990	dest[MIN(ret, dest_len-1)] = 0;
991	}
992	} else {
993	dest[0] = 0;
994	}
995
996	return src_len;
997	}
998
999	size_t pull_ascii_pstring(char dest, const void src)
1000	{
1001	return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1002	}
1003
1004	size_t pull_ascii_fstring(char dest, const void src)
1005	{
1006	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1007	}
1008
1009	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1010
1011	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1012	{
1013	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1014	}
1015
1016	/**
1017	* Copy a string from a char* src to a unicode destination.
1018	*
1019	* @returns the number of bytes occupied by the string in the destination.
1020	*
1021	* @param flags can have:
1022	*
1023	* <dl>
1024	* <dt>STR_TERMINATE <dd>means include the null termination.
1025	* <dt>STR_UPPER <dd>means uppercase in the destination.
1026	* <dt>STR_NOALIGN <dd>means don't do alignment.
1027	* </dl>
1028	*
1029	* @param dest_len is the maximum length allowed in the
1030	* destination. If dest_len is -1 then no maxiumum is used.
1031	**/
1032
1033	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1034	{
1035	size_t len=0;
1036	size_t src_len;
1037	size_t ret;
1038
1039	/* treat a pstring as "unlimited" length */
1040	if (dest_len == (size_t)-1)
1041	dest_len = sizeof(pstring);
1042
1043	if (flags & STR_TERMINATE)
1044	src_len = (size_t)-1;
1045	else
1046	src_len = strlen(src);
1047
1048	if (ucs2_align(base_ptr, dest, flags)) {
1049	(char )dest = 0;
1050	dest = (void )((char )dest + 1);
1051	if (dest_len)
1052	dest_len--;
1053	len++;
1054	}
1055
1056	/* ucs2 is always a multiple of 2 bytes */
1057	dest_len &= ~1;
1058
1059	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1060	if (ret == (size_t)-1) {
1061	return 0;
1062	}
1063
1064	len += ret;
1065
1066	if (flags & STR_UPPER) {
1067	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1068	size_t i;
1069
1070	/* We check for i < (ret / 2) below as the dest string isn't null
1071	terminated if STR_TERMINATE isn't set. */
1072
1073	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1074	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1075	if (v != dest_ucs2[i]) {
1076	dest_ucs2[i] = v;
1077	}
1078	}
1079	}
1080
1081	return len;
1082	}
1083
1084
1085	/**
1086	* Copy a string from a unix char* src to a UCS2 destination,
1087	* allocating a buffer using talloc().
1088	*
1089	* @param dest always set at least to NULL
1090	*
1091	* @returns The number of bytes occupied by the string in the destination
1092	* or -1 in case of error.
1093	**/
1094	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1095	{
1096	size_t src_len = strlen(src)+1;
1097
1098	*dest = NULL;
1099	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1100	}
1101
1102
1103	/**
1104	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1105	*
1106	* @param dest always set at least to NULL
1107	*
1108	* @returns The number of bytes occupied by the string in the destination
1109	* or -1 in case of error.
1110	**/
1111
1112	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1113	{
1114	size_t src_len = strlen(src)+1;
1115
1116	*dest = NULL;
1117	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1118	}
1119
1120	/**
1121	Copy a string from a char* src to a UTF-8 destination.
1122	Return the number of bytes occupied by the string in the destination
1123	Flags can have:
1124	STR_TERMINATE means include the null termination
1125	STR_UPPER means uppercase in the destination
1126	dest_len is the maximum length allowed in the destination. If dest_len
1127	is -1 then no maxiumum is used.
1128	**/
1129
1130	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1131	{
1132	size_t src_len = strlen(src);
1133	pstring tmpbuf;
1134
1135	/* treat a pstring as "unlimited" length */
1136	if (dest_len == (size_t)-1)
1137	dest_len = sizeof(pstring);
1138
1139	if (flags & STR_UPPER) {
1140	pstrcpy(tmpbuf, src);
1141	strupper_m(tmpbuf);
1142	src = tmpbuf;
1143	}
1144
1145	if (flags & STR_TERMINATE)
1146	src_len++;
1147
1148	return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1149	}
1150
1151	size_t push_utf8_fstring(void dest, const char src)
1152	{
1153	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1154	}
1155
1156	/**
1157	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1158	*
1159	* @param dest always set at least to NULL
1160	*
1161	* @returns The number of bytes occupied by the string in the destination
1162	**/
1163
1164	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1165	{
1166	size_t src_len = strlen(src)+1;
1167
1168	*dest = NULL;
1169	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1170	}
1171
1172	/**
1173	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1174	*
1175	* @param dest always set at least to NULL
1176	*
1177	* @returns The number of bytes occupied by the string in the destination
1178	**/
1179
1180	size_t push_utf8_allocate(char *dest, const char src)
1181	{
1182	size_t src_len = strlen(src)+1;
1183
1184	*dest = NULL;
1185	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1186	}
1187
1188	/**
1189	Copy a string from a ucs2 source to a unix char* destination.
1190	Flags can have:
1191	STR_TERMINATE means the string in src is null terminated.
1192	STR_NOALIGN means don't try to align.
1193	if STR_TERMINATE is set then src_len is ignored if it is -1.
1194	src_len is the length of the source area in bytes
1195	Return the number of bytes occupied by the string in src.
1196	The resulting string in "dest" is always null terminated.
1197	**/
1198
1199	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1200	{
1201	size_t ret;
1202
1203	if (dest_len == (size_t)-1)
1204	dest_len = sizeof(pstring);
1205
1206	if (ucs2_align(base_ptr, src, flags)) {
1207	src = (const void )((const char )src + 1);
1208	if (src_len != (size_t)-1)
1209	src_len--;
1210	}
1211
1212	if (flags & STR_TERMINATE) {
1213	/* src_len -1 is the default for null terminated strings. */
1214	if (src_len != (size_t)-1) {
1215	size_t len = strnlen_w((const smb_ucs2_t *)src,
1216	src_len/2);
1217	if (len < src_len/2)
1218	len++;
1219	src_len = len*2;
1220	}
1221	}
1222
1223	/* ucs2 is always a multiple of 2 bytes */
1224	if (src_len != (size_t)-1)
1225	src_len &= ~1;
1226
1227	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1228	if (ret == (size_t)-1) {
1229	return 0;
1230	}
1231
1232	if (src_len == (size_t)-1)
1233	src_len = ret*2;
1234
1235	if (dest_len && ret) {
1236	/* Did we already process the terminating zero ? */
1237	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1238	dest[MIN(ret, dest_len-1)] = 0;
1239	}
1240	} else {
1241	dest[0] = 0;
1242	}
1243
1244	return src_len;
1245	}
1246
1247	size_t pull_ucs2_pstring(char dest, const void src)
1248	{
1249	return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1250	}
1251
1252	size_t pull_ucs2_fstring(char dest, const void src)
1253	{
1254	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1255	}
1256
1257	/**
1258	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1259	*
1260	* @param dest always set at least to NULL
1261	*
1262	* @returns The number of bytes occupied by the string in the destination
1263	**/
1264
1265	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1266	{
1267	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1268	*dest = NULL;
1269	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1270	}
1271
1272	/**
1273	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1274	*
1275	* @param dest always set at least to NULL
1276	*
1277	* @returns The number of bytes occupied by the string in the destination
1278	**/
1279
1280	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1281	{
1282	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1283	*dest = NULL;
1284	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1285	}
1286
1287	/**
1288	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1289	*
1290	* @param dest always set at least to NULL
1291	*
1292	* @returns The number of bytes occupied by the string in the destination
1293	**/
1294
1295	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1296	{
1297	size_t src_len = strlen(src)+1;
1298	*dest = NULL;
1299	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1300	}
1301
1302	/**
1303	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1304	*
1305	* @param dest always set at least to NULL
1306	*
1307	* @returns The number of bytes occupied by the string in the destination
1308	**/
1309
1310	size_t pull_utf8_allocate(char *dest, const char src)
1311	{
1312	size_t src_len = strlen(src)+1;
1313	*dest = NULL;
1314	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1315	}
1316
1317	/**
1318	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1319	*
1320	* @param dest always set at least to NULL
1321	*
1322	* @returns The number of bytes occupied by the string in the destination
1323	**/
1324
1325	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1326	{
1327	size_t src_len = strlen(src)+1;
1328	*dest = NULL;
1329	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1330	}
1331
1332	/**
1333	Copy a string from a char* src to a unicode or ascii
1334	dos codepage destination choosing unicode or ascii based on the
1335	flags in the SMB buffer starting at base_ptr.
1336	Return the number of bytes occupied by the string in the destination.
1337	flags can have:
1338	STR_TERMINATE means include the null termination.
1339	STR_UPPER means uppercase in the destination.
1340	STR_ASCII use ascii even with unicode packet.
1341	STR_NOALIGN means don't do alignment.
1342	dest_len is the maximum length allowed in the destination. If dest_len
1343	is -1 then no maxiumum is used.
1344	**/
1345
1346	size_t push_string_fn(const char function, unsigned int line, const void base_ptr, void dest, const char src, size_t dest_len, int flags)
1347	{
1348	#ifdef DEVELOPER
1349	/* We really need to zero fill here, not clobber
1350	* region, as we want to ensure that valgrind thinks
1351	* all of the outgoing buffer has been written to
1352	* so a send() or write() won't trap an error.
1353	* JRA.
1354	*/
1355	#if 0
1356	if (dest_len != (size_t)-1)
1357	clobber_region(function, line, dest, dest_len);
1358	#else
1359	if (dest_len != (size_t)-1)
1360	memset(dest, '\0', dest_len);
1361	#endif
1362	#endif
1363
1364	if (!(flags & STR_ASCII) && \
1365	((flags & STR_UNICODE \|\| \
1366	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1367	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1368	}
1369	return push_ascii(dest, src, dest_len, flags);
1370	}
1371
1372
1373	/**
1374	Copy a string from a unicode or ascii source (depending on
1375	the packet flags) to a char* destination.
1376	Flags can have:
1377	STR_TERMINATE means the string in src is null terminated.
1378	STR_UNICODE means to force as unicode.
1379	STR_ASCII use ascii even with unicode packet.
1380	STR_NOALIGN means don't do alignment.
1381	if STR_TERMINATE is set then src_len is ignored is it is -1
1382	src_len is the length of the source area in bytes.
1383	Return the number of bytes occupied by the string in src.
1384	The resulting string in "dest" is always null terminated.
1385	**/
1386
1387	size_t pull_string_fn(const char function, unsigned int line, const void base_ptr, char dest, const void src, size_t dest_len, size_t src_len, int flags)
1388	{
1389	#ifdef DEVELOPER
1390	if (dest_len != (size_t)-1)
1391	clobber_region(function, line, dest, dest_len);
1392	#endif
1393
1394	if (!(flags & STR_ASCII) && \
1395	((flags & STR_UNICODE \|\| \
1396	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1397	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1398	}
1399	return pull_ascii(dest, src, dest_len, src_len, flags);
1400	}
1401
1402	size_t align_string(const void base_ptr, const char p, int flags)
1403	{
1404	if (!(flags & STR_ASCII) && \
1405	((flags & STR_UNICODE \|\| \
1406	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1407	return ucs2_align(base_ptr, p, flags);
1408	}
1409	return 0;
1410	}
1411
1412	/*
1413	Return the unicode codepoint for the next multi-byte CH_UNIX character
1414	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1415
1416	Also return the number of bytes consumed (which tells the caller
1417	how many bytes to skip to get to the next CH_UNIX character).
1418
1419	Return INVALID_CODEPOINT if the next character cannot be converted.
1420	*/
1421
1422	codepoint_t next_codepoint(const char str, size_t size)
1423	{
1424	/* It cannot occupy more than 4 bytes in UTF16 format */
1425	uint8_t buf[4];
1426	smb_iconv_t descriptor;
1427	#ifdef __OS2__
1428	size_t ilen_max;
1429	#endif
1430	size_t ilen_orig;
1431	size_t ilen;
1432	size_t olen_orig;
1433	size_t olen;
1434	const char *inbuf;
1435	char *outbuf;
1436
1437	#ifdef __OS2__
1438	*size = 1;
1439	#endif
1440
1441	if ((str[0] & 0x80) == 0) {
1442	#ifndef __OS2__
1443	*size = 1;
1444	#endif
1445	return (codepoint_t)str[0];
1446	}
1447
1448	lazy_initialize_conv();
1449
1450	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1451	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1452	#ifndef __OS2__
1453	*size = 1;
1454	#endif
1455	return INVALID_CODEPOINT;
1456	}
1457	#ifdef __OS2__
1458	/* We assume that no multi-byte character can take
1459	more than 5 bytes. This is OK as we only
1460	support codepoints up to 1M */
1461
1462	ilen_max = strnlen( str, 5 );
1463	#else
1464	*size = 1;
1465	#endif
1466	ilen_orig = 1;
1467	olen_orig = 2;
1468	while( 1 )
1469	{
1470	ilen = ilen_orig;
1471	olen = olen_orig;
1472	inbuf = str;
1473	outbuf = ( char * )buf;
1474	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1475	break;
1476
1477	switch( errno )
1478	{
1479	case E2BIG :
1480	if( olen_orig == 2 )
1481	olen_orig = 4;
1482	else
1483	return INVALID_CODEPOINT;
1484	break;
1485
1486	case EINVAL :
1487	#ifndef __OS2__
1488	/* We assume that no multi-byte character can take
1489	more than 5 bytes. This is OK as we only
1490	support codepoints up to 1M */
1491	if( ilen_orig < 5 )
1492	#else
1493	if( ilen_orig < ilen_max )
1494	#endif
1495	ilen_orig++;
1496	else
1497	return INVALID_CODEPOINT;
1498	break;
1499
1500	case EILSEQ :
1501	default :
1502	return INVALID_CODEPOINT;
1503	}
1504	}
1505
1506	olen = olen_orig - olen;
1507
1508	*size = ilen_orig - ilen;
1509
1510	if (olen == 2) {
1511	/* 2 byte, UTF16-LE encoded value. */
1512	return (codepoint_t)SVAL(buf, 0);
1513	}
1514	if (olen == 4) {
1515	/* Decode a 4 byte UTF16-LE character manually.
1516	See RFC2871 for the encoding machanism.
1517	*/
1518	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1519	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1520
1521	return (codepoint_t)0x10000 +
1522	(w1 << 10) + w2;
1523	}
1524
1525	/* no other length is valid */
1526	return INVALID_CODEPOINT;
1527	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: