Context Navigation

source: branches/samba-3.0/source/lib/charcnv.c@ 312

Visit:

Last change on this file since 312 was 236, checked in by Herwig Bauernfeind, 16 years ago
Fix for Ticket #85 (by diver)
File size: 40.5 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Character set conversion Extensions
4	Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5	Copyright (C) Andrew Tridgell 2001
6	Copyright (C) Simo Sorce 2001
7	Copyright (C) Martin Pool 2003
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation; either version 2 of the License, or
12	(at your option) any later version.
13
14	This program is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program; if not, write to the Free Software
21	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23	*/
24	#include "includes.h"
25
26	/* We can parameterize this if someone complains.... JRA. */
27
28	char lp_failed_convert_char(void)
29	{
30	return '_';
31	}
32
33	/**
34	* @file
35	*
36	* @brief Character-set conversion routines built on our iconv.
37	*
38	* @note Samba's internal character set (at least in the 3.0 series)
39	* is always the same as the one for the Unix filesystem. It is
40	* <b>not</b> necessarily UTF-8 and may be different on machines that
41	* need i18n filenames to be compatible with Unix software. It does
42	* have to be a superset of ASCII. All multibyte sequences must start
43	* with a byte with the high bit set.
44	*
45	* @sa lib/iconv.c
46	*/
47
48
49	static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50	static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52	/**
53	* Return the name of a charset to give to iconv().
54	**/
55	static const char *charset_name(charset_t ch)
56	{
57	const char *ret = NULL;
58	#ifndef __OS2__
59	if (ch == CH_UTF16LE) ret = "UTF-16LE";
60	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61	#else
62	if (ch == CH_UTF16LE) ret = "IBM-1200";
63	else if (ch == CH_UTF16BE) ret = "IBM-1200";
64	#endif
65	else if (ch == CH_UNIX) ret = lp_unix_charset();
66	else if (ch == CH_DOS) ret = lp_dos_charset();
67	else if (ch == CH_DISPLAY) ret = lp_display_charset();
68	else if (ch == CH_UTF8) ret = "UTF8";
69
70	#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
71	if (ret && !strcmp(ret, "LOCALE")) {
72	const char *ln = NULL;
73
74	#ifdef HAVE_SETLOCALE
75	setlocale(LC_ALL, "");
76	#endif
77	ln = nl_langinfo(CODESET);
78	if (ln) {
79	/* Check whether the charset name is supported
80	by iconv */
81	smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
82	if (handle == (smb_iconv_t) -1) {
83	DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
84	ln = NULL;
85	} else {
86	DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
87	smb_iconv_close(handle);
88	}
89	}
90	ret = ln;
91	}
92	#endif
93
94	if (!ret \|\| !*ret) ret = "ASCII";
95	DEBUG(10, ("codepage: %s\n",ret));
96	return ret;
97	}
98
99	void lazy_initialize_conv(void)
100	{
101	static int initialized = False;
102
103	if (!initialized) {
104	initialized = True;
105	load_case_tables();
106	init_iconv();
107	}
108	}
109
110	/**
111	* Destroy global objects allocated by init_iconv()
112	**/
113	void gfree_charcnv(void)
114	{
115	int c1, c2;
116
117	for (c1=0;c1<NUM_CHARSETS;c1++) {
118	for (c2=0;c2<NUM_CHARSETS;c2++) {
119	if ( conv_handles[c1][c2] ) {
120	smb_iconv_close( conv_handles[c1][c2] );
121	conv_handles[c1][c2] = 0;
122	}
123	}
124	}
125	}
126
127	/**
128	* Initialize iconv conversion descriptors.
129	*
130	* This is called the first time it is needed, and also called again
131	* every time the configuration is reloaded, because the charset or
132	* codepage might have changed.
133	**/
134	void init_iconv(void)
135	{
136	int c1, c2;
137	BOOL did_reload = False;
138
139	/* so that charset_name() works we need to get the UNIX<->UCS2 going
140	first */
141	if (!conv_handles[CH_UNIX][CH_UTF16LE])
142	conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
143
144	if (!conv_handles[CH_UTF16LE][CH_UNIX])
145	conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
146
147	for (c1=0;c1<NUM_CHARSETS;c1++) {
148	for (c2=0;c2<NUM_CHARSETS;c2++) {
149	const char *n1 = charset_name((charset_t)c1);
150	const char *n2 = charset_name((charset_t)c2);
151	if (conv_handles[c1][c2] &&
152	strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
153	strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
154	continue;
155
156	did_reload = True;
157
158	if (conv_handles[c1][c2])
159	smb_iconv_close(conv_handles[c1][c2]);
160
161	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
162	if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
163	DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
164	charset_name((charset_t)c1), charset_name((charset_t)c2)));
165	if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
166	n1 = "ASCII";
167	}
168	if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
169	n2 = "ASCII";
170	}
171	DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
172	n1, n2 ));
173	conv_handles[c1][c2] = smb_iconv_open(n2,n1);
174	if (!conv_handles[c1][c2]) {
175	DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
176	smb_panic("init_iconv: conv_handle initialization failed.");
177	}
178	}
179	}
180	}
181
182	if (did_reload) {
183	/* XXX: Does this really get called every time the dos
184	* codepage changes? */
185	/* XXX: Is the did_reload test too strict? */
186	conv_silent = True;
187	init_doschar_table();
188	init_valid_table();
189	conv_silent = False;
190	}
191	}
192
193	/**
194	* Convert string from one encoding to another, making error checking etc
195	* Slow path version - uses (slow) iconv.
196	*
197	* @param src pointer to source string (multibyte or singlebyte)
198	* @param srclen length of the source string in bytes
199	* @param dest pointer to destination string (multibyte or singlebyte)
200	* @param destlen maximal length allowed for string
201	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
202	* @returns the number of bytes occupied in the destination
203	*
204	* Ensure the srclen contains the terminating zero.
205	*
206	**/
207
208	static size_t convert_string_internal(charset_t from, charset_t to,
209	void const *src, size_t srclen,
210	void *dest, size_t destlen, BOOL allow_bad_conv)
211	{
212	size_t i_len, o_len;
213	size_t retval;
214	const char* inbuf = (const char*)src;
215	char* outbuf = (char*)dest;
216	smb_iconv_t descriptor;
217
218	lazy_initialize_conv();
219
220	descriptor = conv_handles[from][to];
221
222	if (srclen == (size_t)-1) {
223	if (from == CH_UTF16LE \|\| from == CH_UTF16BE) {
224	srclen = (strlen_w((const smb_ucs2_t )src)+1) 2;
225	} else {
226	srclen = strlen((const char *)src)+1;
227	}
228	}
229
230
231	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
232	if (!conv_silent)
233	DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
234	return (size_t)-1;
235	}
236
237	i_len=srclen;
238	o_len=destlen;
239
240	again:
241
242	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
243	if(retval==(size_t)-1) {
244	const char *reason="unknown error";
245	switch(errno) {
246	case EINVAL:
247	reason="Incomplete multibyte sequence";
248	if (!conv_silent)
249	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
250	if (allow_bad_conv)
251	goto use_as_is;
252	break;
253	case E2BIG:
254	reason="No more room";
255	if (!conv_silent) {
256	if (from == CH_UNIX) {
257	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
258	charset_name(from), charset_name(to),
259	(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
260	} else {
261	DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
262	charset_name(from), charset_name(to),
263	(unsigned int)srclen, (unsigned int)destlen));
264	}
265	}
266	break;
267	case EILSEQ:
268	reason="Illegal multibyte sequence";
269	if (!conv_silent)
270	DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271	if (allow_bad_conv)
272	goto use_as_is;
273	break;
274	default:
275	if (!conv_silent)
276	DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
277	break;
278	}
279	/* smb_panic(reason); */
280	}
281	return destlen-o_len;
282
283	use_as_is:
284
285	/*
286	* Conversion not supported. This is actually an error, but there are so
287	* many misconfigured iconv systems and smb.conf's out there we can't just
288	* fail. Do a very bad conversion instead.... JRA.
289	*/
290
291	{
292	if (o_len == 0 \|\| i_len == 0)
293	return destlen - o_len;
294
295	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
296	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
297	/* Can't convert from utf16 any endian to multibyte.
298	Replace with the default fail char.
299	*/
300	if (i_len < 2)
301	return destlen - o_len;
302	if (i_len >= 2) {
303	*outbuf = lp_failed_convert_char();
304
305	outbuf++;
306	o_len--;
307
308	inbuf += 2;
309	i_len -= 2;
310	}
311
312	if (o_len == 0 \|\| i_len == 0)
313	return destlen - o_len;
314
315	/* Keep trying with the next char... */
316	goto again;
317
318	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
319	/* Can't convert to UTF16LE - just widen by adding the
320	default fail char then zero.
321	*/
322	if (o_len < 2)
323	return destlen - o_len;
324
325	outbuf[0] = lp_failed_convert_char();
326	outbuf[1] = '\0';
327
328	inbuf++;
329	i_len--;
330
331	outbuf += 2;
332	o_len -= 2;
333
334	if (o_len == 0 \|\| i_len == 0)
335	return destlen - o_len;
336
337	/* Keep trying with the next char... */
338	goto again;
339
340	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
341	to != CH_UTF16LE && to != CH_UTF16BE) {
342	/* Failed multibyte to multibyte. Just copy the default fail char and
343	try again. */
344	outbuf[0] = lp_failed_convert_char();
345
346	inbuf++;
347	i_len--;
348
349	outbuf++;
350	o_len--;
351
352	if (o_len == 0 \|\| i_len == 0)
353	return destlen - o_len;
354
355	/* Keep trying with the next char... */
356	goto again;
357
358	} else {
359	/* Keep compiler happy.... */
360	return destlen - o_len;
361	}
362	}
363	}
364
365	/**
366	* Convert string from one encoding to another, making error checking etc
367	* Fast path version - handles ASCII first.
368	*
369	* @param src pointer to source string (multibyte or singlebyte)
370	* @param srclen length of the source string in bytes, or -1 for nul terminated.
371	* @param dest pointer to destination string (multibyte or singlebyte)
372	* @param destlen maximal length allowed for string - NEVER -1.
373	* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
374	* @returns the number of bytes occupied in the destination
375	*
376	* Ensure the srclen contains the terminating zero.
377	*
378	* This function has been hand-tuned to provide a fast path.
379	* Don't change unless you really know what you are doing. JRA.
380	**/
381
382	size_t convert_string(charset_t from, charset_t to,
383	void const *src, size_t srclen,
384	void *dest, size_t destlen, BOOL allow_bad_conv)
385	{
386	/*
387	* NB. We deliberately don't do a strlen here if srclen == -1.
388	* This is very expensive over millions of calls and is taken
389	* care of in the slow path in convert_string_internal. JRA.
390	*/
391
392	#ifdef DEVELOPER
393	SMB_ASSERT(destlen != (size_t)-1);
394	#endif
395
396	if (srclen == 0)
397	return 0;
398
399	// DEBUG(10, ("convert_string: 1"));
400
401	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
402	const unsigned char p = (const unsigned char )src;
403	unsigned char q = (unsigned char )dest;
404	size_t slen = srclen;
405	size_t dlen = destlen;
406	unsigned char lastp = '\0';
407	size_t retval = 0;
408
409	// DEBUG(10, ("convert_string: 2"));
410
411	/* If all characters are ascii, fast path here. */
412	while (slen && dlen) {
413	if ((lastp = *p) <= 0x7f) {
414	q++ = p++;
415	if (slen != (size_t)-1) {
416	slen--;
417	}
418	dlen--;
419	retval++;
420	if (!lastp)
421	break;
422	} else {
423	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
424	goto general_case;
425	#else
426	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
427	#endif
428	}
429	}
430	if (!dlen) {
431	/* Even if we fast path we should note if we ran out of room. */
432	if (((slen != (size_t)-1) && slen) \|\|
433	((slen == (size_t)-1) && lastp)) {
434	errno = E2BIG;
435	}
436	}
437	return retval;
438	// DEBUG(10, ("convert_string: 3"));
439
440	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
441	const unsigned char p = (const unsigned char )src;
442	unsigned char q = (unsigned char )dest;
443	size_t retval = 0;
444	size_t slen = srclen;
445	size_t dlen = destlen;
446	unsigned char lastp = '\0';
447
448	/* If all characters are ascii, fast path here. */
449	while (((slen == (size_t)-1) \|\| (slen >= 2)) && dlen) {
450	if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
451	q++ = p;
452	if (slen != (size_t)-1) {
453	slen -= 2;
454	}
455	p += 2;
456	dlen--;
457	retval++;
458	if (!lastp)
459	break;
460	} else {
461	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
462	goto general_case;
463	#else
464	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
465	#endif
466	}
467	}
468	if (!dlen) {
469	/* Even if we fast path we should note if we ran out of room. */
470	if (((slen != (size_t)-1) && slen) \|\|
471	((slen == (size_t)-1) && lastp)) {
472	errno = E2BIG;
473	}
474	}
475	return retval;
476	// DEBUG(10, ("convert_string: 4"));
477
478	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
479	const unsigned char p = (const unsigned char )src;
480	unsigned char q = (unsigned char )dest;
481	size_t retval = 0;
482	size_t slen = srclen;
483	size_t dlen = destlen;
484	unsigned char lastp = '\0';
485
486	/* If all characters are ascii, fast path here. */
487	while (slen && (dlen >= 2)) {
488	if ((lastp = *p) <= 0x7F) {
489	q++ = p++;
490	*q++ = '\0';
491	if (slen != (size_t)-1) {
492	slen--;
493	}
494	dlen -= 2;
495	retval += 2;
496	if (!lastp)
497	break;
498	} else {
499	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
500	goto general_case;
501	#else
502	return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
503	#endif
504	}
505	}
506	if (!dlen) {
507	/* Even if we fast path we should note if we ran out of room. */
508	if (((slen != (size_t)-1) && slen) \|\|
509	((slen == (size_t)-1) && lastp)) {
510	errno = E2BIG;
511	}
512	}
513	return retval;
514	}
515
516	#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517	general_case:
518	#endif
519	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
520	}
521
522	/**
523	* Convert between character sets, allocating a new buffer for the result.
524	*
525	* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
526	* @param srclen length of source buffer.
527	* @param dest always set at least to NULL
528	* @note -1 is not accepted for srclen.
529	*
530	* @returns Size in bytes of the converted string; or -1 in case of error.
531	*
532	* Ensure the srclen contains the terminating zero.
533	*
534	* I hate the goto's in this function. It's embarressing.....
535	* There has to be a cleaner way to do this. JRA.
536	**/
537
538	size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
539	void const src, size_t srclen, void dst, BOOL allow_bad_conv)
540	{
541	size_t i_len, o_len, destlen = (srclen * 3) / 2;
542	size_t retval;
543	const char inbuf = (const char )src;
544	char outbuf = NULL, ob = NULL;
545	smb_iconv_t descriptor;
546	void dest = (void )dst;
547
548	*dest = NULL;
549
550	if (src == NULL \|\| srclen == (size_t)-1)
551	return (size_t)-1;
552	if (srclen == 0)
553	return 0;
554
555	lazy_initialize_conv();
556
557	descriptor = conv_handles[from][to];
558
559	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
560	if (!conv_silent)
561	DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
562	return (size_t)-1;
563	}
564
565	convert:
566
567	/* +2 is for ucs2 null termination. */
568	if ((destlen*2)+2 < destlen) {
569	/* wrapped ! abort. */
570	if (!conv_silent)
571	DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
572	if (!ctx)
573	SAFE_FREE(outbuf);
574	return (size_t)-1;
575	} else {
576	destlen = destlen * 2;
577	}
578
579	/* +2 is for ucs2 null termination. */
580	if (ctx) {
581	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
582	} else {
583	ob = (char *)SMB_REALLOC(ob, destlen + 2);
584	}
585
586	if (!ob) {
587	DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
588	return (size_t)-1;
589	}
590	outbuf = ob;
591	i_len = srclen;
592	o_len = destlen;
593
594	again:
595	DEBUG(10,("convert_string_internal: convert_string(%s,%s): srclen=%u destlen=%u\n",
596	charset_name(from), charset_name(to),
597	(unsigned int)srclen, (unsigned int)destlen));
598
599	retval = smb_iconv(descriptor,
600	&inbuf, &i_len,
601	&outbuf, &o_len);
602	if(retval == (size_t)-1) {
603	const char *reason="unknown error";
604	switch(errno) {
605	case EINVAL:
606	reason="Incomplete multibyte sequence";
607	if (!conv_silent)
608	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
609	if (allow_bad_conv)
610	goto use_as_is;
611	break;
612	case E2BIG:
613	goto convert;
614	case EILSEQ:
615	reason="Illegal multibyte sequence";
616	if (!conv_silent)
617	DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
618	if (allow_bad_conv)
619	goto use_as_is;
620	break;
621	}
622	if (!conv_silent)
623	DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
624	/* smb_panic(reason); */
625	return (size_t)-1;
626	}
627
628	out:
629
630	destlen = destlen - o_len;
631	if (ctx) {
632	/* We're shrinking here so we know the +2 is safe from wrap. */
633	ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
634	} else {
635	ob = (char *)SMB_REALLOC(ob,destlen + 2);
636	}
637
638	if (destlen && !ob) {
639	DEBUG(0, ("convert_string_allocate: out of memory!\n"));
640	return (size_t)-1;
641	}
642
643	*dest = ob;
644
645	/* Must ucs2 null terminate in the extra space we allocated. */
646	ob[destlen] = '\0';
647	ob[destlen+1] = '\0';
648
649	return destlen;
650
651	use_as_is:
652
653	/*
654	* Conversion not supported. This is actually an error, but there are so
655	* many misconfigured iconv systems and smb.conf's out there we can't just
656	* fail. Do a very bad conversion instead.... JRA.
657	*/
658
659	{
660	if (o_len == 0 \|\| i_len == 0)
661	goto out;
662
663	if (((from == CH_UTF16LE)\|\|(from == CH_UTF16BE)) &&
664	((to != CH_UTF16LE)\|\|(to != CH_UTF16BE))) {
665	/* Can't convert from utf16 any endian to multibyte.
666	Replace with the default fail char.
667	*/
668
669	if (i_len < 2)
670	goto out;
671
672	if (i_len >= 2) {
673	*outbuf = lp_failed_convert_char();
674
675	outbuf++;
676	o_len--;
677
678	inbuf += 2;
679	i_len -= 2;
680	}
681
682	if (o_len == 0 \|\| i_len == 0)
683	goto out;
684
685	/* Keep trying with the next char... */
686	goto again;
687
688	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
689	/* Can't convert to UTF16LE - just widen by adding the
690	default fail char then zero.
691	*/
692	if (o_len < 2)
693	goto out;
694
695	outbuf[0] = lp_failed_convert_char();
696	outbuf[1] = '\0';
697
698	inbuf++;
699	i_len--;
700
701	outbuf += 2;
702	o_len -= 2;
703
704	if (o_len == 0 \|\| i_len == 0)
705	goto out;
706
707	/* Keep trying with the next char... */
708	goto again;
709
710	} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
711	to != CH_UTF16LE && to != CH_UTF16BE) {
712	/* Failed multibyte to multibyte. Just copy the default fail char and
713	try again. */
714	outbuf[0] = lp_failed_convert_char();
715
716	inbuf++;
717	i_len--;
718
719	outbuf++;
720	o_len--;
721
722	if (o_len == 0 \|\| i_len == 0)
723	goto out;
724
725	/* Keep trying with the next char... */
726	goto again;
727
728	} else {
729	/* Keep compiler happy.... */
730	goto out;
731	}
732	}
733	}
734
735	/**
736	* Convert between character sets, allocating a new buffer using talloc for the result.
737	*
738	* @param srclen length of source buffer.
739	* @param dest always set at least to NULL
740	* @note -1 is not accepted for srclen.
741	*
742	* @returns Size in bytes of the converted string; or -1 in case of error.
743	**/
744	size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
745	void const src, size_t srclen, void dst,
746	BOOL allow_bad_conv)
747	{
748	void dest = (void )dst;
749	size_t dest_len;
750
751	*dest = NULL;
752	dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
753	if (dest_len == (size_t)-1)
754	return (size_t)-1;
755	if (*dest == NULL)
756	return (size_t)-1;
757	return dest_len;
758	}
759
760	size_t unix_strupper(const char src, size_t srclen, char dest, size_t destlen)
761	{
762	size_t size;
763	smb_ucs2_t *buffer;
764
765	size = push_ucs2_allocate(&buffer, src);
766	if (size == (size_t)-1) {
767	smb_panic("failed to create UCS2 buffer");
768	}
769	if (!strupper_w(buffer) && (dest == src)) {
770	free(buffer);
771	return srclen;
772	}
773
774	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
775	free(buffer);
776	return size;
777	}
778
779	/**
780	strdup() a unix string to upper case.
781	Max size is pstring.
782	**/
783
784	char strdup_upper(const char s)
785	{
786	pstring out_buffer;
787	const unsigned char p = (const unsigned char )s;
788	unsigned char q = (unsigned char )out_buffer;
789
790	/* this is quite a common operation, so we want it to be
791	fast. We optimise for the ascii case, knowing that all our
792	supported multi-byte character sets are ascii-compatible
793	(ie. they match for the first 128 chars) */
794
795	while (1) {
796	if (*p & 0x80)
797	break;
798	q++ = toupper_ascii(p);
799	if (!*p)
800	break;
801	p++;
802	if (p - ( const unsigned char *)s >= sizeof(pstring))
803	break;
804	}
805
806	if (*p) {
807	/* MB case. */
808	size_t size;
809	wpstring buffer;
810	size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
811	if (size == (size_t)-1) {
812	return NULL;
813	}
814
815	strupper_w(buffer);
816
817	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
818	if (size == (size_t)-1) {
819	return NULL;
820	}
821	}
822
823	return SMB_STRDUP(out_buffer);
824	}
825
826	size_t unix_strlower(const char src, size_t srclen, char dest, size_t destlen)
827	{
828	size_t size;
829	smb_ucs2_t *buffer = NULL;
830
831	size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
832	(void *)(void )&buffer, True);
833	if (size == (size_t)-1 \|\| !buffer) {
834	smb_panic("failed to create UCS2 buffer");
835	}
836	if (!strlower_w(buffer) && (dest == src)) {
837	SAFE_FREE(buffer);
838	return srclen;
839	}
840	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
841	SAFE_FREE(buffer);
842	return size;
843	}
844
845	/**
846	strdup() a unix string to lower case.
847	**/
848
849	char strdup_lower(const char s)
850	{
851	size_t size;
852	smb_ucs2_t *buffer = NULL;
853	char *out_buffer;
854
855	size = push_ucs2_allocate(&buffer, s);
856	if (size == -1 \|\| !buffer) {
857	return NULL;
858	}
859
860	strlower_w(buffer);
861
862	size = pull_ucs2_allocate(&out_buffer, buffer);
863	SAFE_FREE(buffer);
864
865	if (size == (size_t)-1) {
866	return NULL;
867	}
868
869	return out_buffer;
870	}
871
872	static size_t ucs2_align(const void base_ptr, const void p, int flags)
873	{
874	if (flags & (STR_NOALIGN\|STR_ASCII))
875	return 0;
876	return PTR_DIFF(p, base_ptr) & 1;
877	}
878
879
880	/**
881	* Copy a string from a char* unix src to a dos codepage string destination.
882	*
883	* @return the number of bytes occupied by the string in the destination.
884	*
885	* @param flags can include
886	* <dl>
887	* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
888	* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
889	* </dl>
890	*
891	* @param dest_len the maximum length in bytes allowed in the
892	* destination. If @p dest_len is -1 then no maximum is used.
893	**/
894	size_t push_ascii(void dest, const char src, size_t dest_len, int flags)
895	{
896	size_t src_len = strlen(src);
897	pstring tmpbuf;
898	size_t ret;
899
900	/* No longer allow a length of -1 */
901	if (dest_len == (size_t)-1)
902	smb_panic("push_ascii - dest_len == -1");
903
904	if (flags & STR_UPPER) {
905	pstrcpy(tmpbuf, src);
906	strupper_m(tmpbuf);
907	src = tmpbuf;
908	}
909
910	if (flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
911	src_len++;
912
913	ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
914	if (ret == (size_t)-1 &&
915	(flags & (STR_TERMINATE \| STR_TERMINATE_ASCII))
916	&& dest_len > 0) {
917	((char *)dest)[0] = '\0';
918	}
919	return ret;
920	}
921
922	size_t push_ascii_fstring(void dest, const char src)
923	{
924	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
925	}
926
927	size_t push_ascii_pstring(void dest, const char src)
928	{
929	return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
930	}
931
932	/********************************************************************
933	Push an nstring - ensure null terminated. Written by
934	moriyama@miraclelinux.com (MORIYAMA Masayuki).
935	********************************************************************/
936
937	size_t push_ascii_nstring(void dest, const char src)
938	{
939	size_t i, buffer_len, dest_len;
940	smb_ucs2_t *buffer;
941
942	conv_silent = True;
943	buffer_len = push_ucs2_allocate(&buffer, src);
944	if (buffer_len == (size_t)-1) {
945	smb_panic("failed to create UCS2 buffer");
946	}
947
948	/* We're using buffer_len below to count ucs2 characters, not bytes. */
949	buffer_len /= sizeof(smb_ucs2_t);
950
951	dest_len = 0;
952	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
953	unsigned char mb[10];
954	/* Convert one smb_ucs2_t character at a time. */
955	size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
956	if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
957	memcpy((char *)dest + dest_len, mb, mb_len);
958	dest_len += mb_len;
959	} else {
960	errno = E2BIG;
961	break;
962	}
963	}
964	((char *)dest)[dest_len] = '\0';
965
966	SAFE_FREE(buffer);
967	conv_silent = False;
968	return dest_len;
969	}
970
971	/**
972	* Copy a string from a dos codepage source to a unix char* destination.
973	*
974	* The resulting string in "dest" is always null terminated.
975	*
976	* @param flags can have:
977	* <dl>
978	* <dt>STR_TERMINATE</dt>
979	* <dd>STR_TERMINATE means the string in @p src
980	* is null terminated, and src_len is ignored.</dd>
981	* </dl>
982	*
983	* @param src_len is the length of the source area in bytes.
984	* @returns the number of bytes occupied by the string in @p src.
985	**/
986	size_t pull_ascii(char dest, const void src, size_t dest_len, size_t src_len, int flags)
987	{
988	size_t ret;
989
990	if (dest_len == (size_t)-1)
991	dest_len = sizeof(pstring);
992
993	if (flags & STR_TERMINATE) {
994	if (src_len == (size_t)-1) {
995	src_len = strlen((const char *)src) + 1;
996	} else {
997	size_t len = strnlen((const char *)src, src_len);
998	if (len < src_len)
999	len++;
1000	src_len = len;
1001	}
1002	}
1003
1004	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1005	if (ret == (size_t)-1) {
1006	ret = 0;
1007	dest_len = 0;
1008	}
1009
1010	if (dest_len && ret) {
1011	/* Did we already process the terminating zero ? */
1012	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1013	dest[MIN(ret, dest_len-1)] = 0;
1014	}
1015	} else {
1016	dest[0] = 0;
1017	}
1018
1019	return src_len;
1020	}
1021
1022	size_t pull_ascii_pstring(char dest, const void src)
1023	{
1024	return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1025	}
1026
1027	size_t pull_ascii_fstring(char dest, const void src)
1028	{
1029	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1030	}
1031
1032	/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1033
1034	size_t pull_ascii_nstring(char dest, size_t dest_len, const void src)
1035	{
1036	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1037	}
1038
1039	/**
1040	* Copy a string from a char* src to a unicode destination.
1041	*
1042	* @returns the number of bytes occupied by the string in the destination.
1043	*
1044	* @param flags can have:
1045	*
1046	* <dl>
1047	* <dt>STR_TERMINATE <dd>means include the null termination.
1048	* <dt>STR_UPPER <dd>means uppercase in the destination.
1049	* <dt>STR_NOALIGN <dd>means don't do alignment.
1050	* </dl>
1051	*
1052	* @param dest_len is the maximum length allowed in the
1053	* destination. If dest_len is -1 then no maxiumum is used.
1054	**/
1055
1056	size_t push_ucs2(const void base_ptr, void dest, const char *src, size_t dest_len, int flags)
1057	{
1058	size_t len=0;
1059	size_t src_len;
1060	size_t ret;
1061
1062	/* treat a pstring as "unlimited" length */
1063	if (dest_len == (size_t)-1)
1064	dest_len = sizeof(pstring);
1065
1066	if (flags & STR_TERMINATE)
1067	src_len = (size_t)-1;
1068	else
1069	src_len = strlen(src);
1070
1071	if (ucs2_align(base_ptr, dest, flags)) {
1072	(char )dest = 0;
1073	dest = (void )((char )dest + 1);
1074	if (dest_len)
1075	dest_len--;
1076	len++;
1077	}
1078
1079	/* ucs2 is always a multiple of 2 bytes */
1080	dest_len &= ~1;
1081
1082	ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1083	if (ret == (size_t)-1) {
1084	return 0;
1085	}
1086
1087	len += ret;
1088
1089	if (flags & STR_UPPER) {
1090	smb_ucs2_t dest_ucs2 = (smb_ucs2_t )dest;
1091	size_t i;
1092
1093	/* We check for i < (ret / 2) below as the dest string isn't null
1094	terminated if STR_TERMINATE isn't set. */
1095
1096	for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1097	smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1098	if (v != dest_ucs2[i]) {
1099	dest_ucs2[i] = v;
1100	}
1101	}
1102	}
1103
1104	return len;
1105	}
1106
1107
1108	/**
1109	* Copy a string from a unix char* src to a UCS2 destination,
1110	* allocating a buffer using talloc().
1111	*
1112	* @param dest always set at least to NULL
1113	*
1114	* @returns The number of bytes occupied by the string in the destination
1115	* or -1 in case of error.
1116	**/
1117	size_t push_ucs2_talloc(TALLOC_CTX ctx, smb_ucs2_t dest, const char src)
1118	{
1119	size_t src_len = strlen(src)+1;
1120
1121	*dest = NULL;
1122	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1123	}
1124
1125
1126	/**
1127	* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1128	*
1129	* @param dest always set at least to NULL
1130	*
1131	* @returns The number of bytes occupied by the string in the destination
1132	* or -1 in case of error.
1133	**/
1134
1135	size_t push_ucs2_allocate(smb_ucs2_t *dest, const char src)
1136	{
1137	size_t src_len = strlen(src)+1;
1138
1139	*dest = NULL;
1140	return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1141	}
1142
1143	/**
1144	Copy a string from a char* src to a UTF-8 destination.
1145	Return the number of bytes occupied by the string in the destination
1146	Flags can have:
1147	STR_TERMINATE means include the null termination
1148	STR_UPPER means uppercase in the destination
1149	dest_len is the maximum length allowed in the destination. If dest_len
1150	is -1 then no maxiumum is used.
1151	**/
1152
1153	static size_t push_utf8(void dest, const char src, size_t dest_len, int flags)
1154	{
1155	size_t src_len = strlen(src);
1156	pstring tmpbuf;
1157
1158	/* treat a pstring as "unlimited" length */
1159	if (dest_len == (size_t)-1)
1160	dest_len = sizeof(pstring);
1161
1162	if (flags & STR_UPPER) {
1163	pstrcpy(tmpbuf, src);
1164	strupper_m(tmpbuf);
1165	src = tmpbuf;
1166	}
1167
1168	if (flags & STR_TERMINATE)
1169	src_len++;
1170
1171	return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1172	}
1173
1174	size_t push_utf8_fstring(void dest, const char src)
1175	{
1176	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1177	}
1178
1179	/**
1180	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1181	*
1182	* @param dest always set at least to NULL
1183	*
1184	* @returns The number of bytes occupied by the string in the destination
1185	**/
1186
1187	size_t push_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1188	{
1189	size_t src_len = strlen(src)+1;
1190
1191	*dest = NULL;
1192	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1193	}
1194
1195	/**
1196	* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1197	*
1198	* @param dest always set at least to NULL
1199	*
1200	* @returns The number of bytes occupied by the string in the destination
1201	**/
1202
1203	size_t push_utf8_allocate(char *dest, const char src)
1204	{
1205	size_t src_len = strlen(src)+1;
1206
1207	*dest = NULL;
1208	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1209	}
1210
1211	/**
1212	Copy a string from a ucs2 source to a unix char* destination.
1213	Flags can have:
1214	STR_TERMINATE means the string in src is null terminated.
1215	STR_NOALIGN means don't try to align.
1216	if STR_TERMINATE is set then src_len is ignored if it is -1.
1217	src_len is the length of the source area in bytes
1218	Return the number of bytes occupied by the string in src.
1219	The resulting string in "dest" is always null terminated.
1220	**/
1221
1222	size_t pull_ucs2(const void base_ptr, char dest, const void *src, size_t dest_len, size_t src_len, int flags)
1223	{
1224	size_t ret;
1225
1226	if (dest_len == (size_t)-1)
1227	dest_len = sizeof(pstring);
1228
1229	if (ucs2_align(base_ptr, src, flags)) {
1230	src = (const void )((const char )src + 1);
1231	if (src_len != (size_t)-1)
1232	src_len--;
1233	}
1234
1235	if (flags & STR_TERMINATE) {
1236	/* src_len -1 is the default for null terminated strings. */
1237	if (src_len != (size_t)-1) {
1238	size_t len = strnlen_w((const smb_ucs2_t *)src,
1239	src_len/2);
1240	if (len < src_len/2)
1241	len++;
1242	src_len = len*2;
1243	}
1244	}
1245
1246	/* ucs2 is always a multiple of 2 bytes */
1247	if (src_len != (size_t)-1)
1248	src_len &= ~1;
1249
1250	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1251	if (ret == (size_t)-1) {
1252	return 0;
1253	}
1254
1255	if (src_len == (size_t)-1)
1256	src_len = ret*2;
1257
1258	if (dest_len && ret) {
1259	/* Did we already process the terminating zero ? */
1260	if (dest[MIN(ret-1, dest_len-1)] != 0) {
1261	dest[MIN(ret, dest_len-1)] = 0;
1262	}
1263	} else {
1264	dest[0] = 0;
1265	}
1266
1267	return src_len;
1268	}
1269
1270	size_t pull_ucs2_pstring(char dest, const void src)
1271	{
1272	return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1273	}
1274
1275	size_t pull_ucs2_fstring(char dest, const void src)
1276	{
1277	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1278	}
1279
1280	/**
1281	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1282	*
1283	* @param dest always set at least to NULL
1284	*
1285	* @returns The number of bytes occupied by the string in the destination
1286	**/
1287
1288	size_t pull_ucs2_talloc(TALLOC_CTX ctx, char dest, const smb_ucs2_t src)
1289	{
1290	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1291	*dest = NULL;
1292	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1293	}
1294
1295	/**
1296	* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1297	*
1298	* @param dest always set at least to NULL
1299	*
1300	* @returns The number of bytes occupied by the string in the destination
1301	**/
1302
1303	size_t pull_ucs2_allocate(char *dest, const smb_ucs2_t src)
1304	{
1305	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1306	*dest = NULL;
1307	return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1308	}
1309
1310	/**
1311	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1312	*
1313	* @param dest always set at least to NULL
1314	*
1315	* @returns The number of bytes occupied by the string in the destination
1316	**/
1317
1318	size_t pull_utf8_talloc(TALLOC_CTX ctx, char dest, const char src)
1319	{
1320	size_t src_len = strlen(src)+1;
1321	*dest = NULL;
1322	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1323	}
1324
1325	/**
1326	* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1327	*
1328	* @param dest always set at least to NULL
1329	*
1330	* @returns The number of bytes occupied by the string in the destination
1331	**/
1332
1333	size_t pull_utf8_allocate(char *dest, const char src)
1334	{
1335	size_t src_len = strlen(src)+1;
1336	*dest = NULL;
1337	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1338	}
1339
1340	/**
1341	* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1342	*
1343	* @param dest always set at least to NULL
1344	*
1345	* @returns The number of bytes occupied by the string in the destination
1346	**/
1347
1348	size_t pull_ascii_talloc(TALLOC_CTX ctx, char dest, const char src)
1349	{
1350	size_t src_len = strlen(src)+1;
1351	*dest = NULL;
1352	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1353	}
1354
1355	/**
1356	Copy a string from a char* src to a unicode or ascii
1357	dos codepage destination choosing unicode or ascii based on the
1358	flags in the SMB buffer starting at base_ptr.
1359	Return the number of bytes occupied by the string in the destination.
1360	flags can have:
1361	STR_TERMINATE means include the null termination.
1362	STR_UPPER means uppercase in the destination.
1363	STR_ASCII use ascii even with unicode packet.
1364	STR_NOALIGN means don't do alignment.
1365	dest_len is the maximum length allowed in the destination. If dest_len
1366	is -1 then no maxiumum is used.
1367	**/
1368
1369	size_t push_string_fn(const char function, unsigned int line, const void base_ptr, void dest, const char src, size_t dest_len, int flags)
1370	{
1371	#ifdef DEVELOPER
1372	/* We really need to zero fill here, not clobber
1373	* region, as we want to ensure that valgrind thinks
1374	* all of the outgoing buffer has been written to
1375	* so a send() or write() won't trap an error.
1376	* JRA.
1377	*/
1378	#if 0
1379	if (dest_len != (size_t)-1)
1380	clobber_region(function, line, dest, dest_len);
1381	#else
1382	if (dest_len != (size_t)-1)
1383	memset(dest, '\0', dest_len);
1384	#endif
1385	#endif
1386
1387	if (!(flags & STR_ASCII) && \
1388	((flags & STR_UNICODE \|\| \
1389	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1390	return push_ucs2(base_ptr, dest, src, dest_len, flags);
1391	}
1392	return push_ascii(dest, src, dest_len, flags);
1393	}
1394
1395
1396	/**
1397	Copy a string from a unicode or ascii source (depending on
1398	the packet flags) to a char* destination.
1399	Flags can have:
1400	STR_TERMINATE means the string in src is null terminated.
1401	STR_UNICODE means to force as unicode.
1402	STR_ASCII use ascii even with unicode packet.
1403	STR_NOALIGN means don't do alignment.
1404	if STR_TERMINATE is set then src_len is ignored is it is -1
1405	src_len is the length of the source area in bytes.
1406	Return the number of bytes occupied by the string in src.
1407	The resulting string in "dest" is always null terminated.
1408	**/
1409
1410	size_t pull_string_fn(const char function, unsigned int line, const void base_ptr, char dest, const void src, size_t dest_len, size_t src_len, int flags)
1411	{
1412	#ifdef DEVELOPER
1413	if (dest_len != (size_t)-1)
1414	clobber_region(function, line, dest, dest_len);
1415	#endif
1416
1417	if (!(flags & STR_ASCII) && \
1418	((flags & STR_UNICODE \|\| \
1419	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1420	return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1421	}
1422	return pull_ascii(dest, src, dest_len, src_len, flags);
1423	}
1424
1425	size_t align_string(const void base_ptr, const char p, int flags)
1426	{
1427	if (!(flags & STR_ASCII) && \
1428	((flags & STR_UNICODE \|\| \
1429	(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1430	return ucs2_align(base_ptr, p, flags);
1431	}
1432	return 0;
1433	}
1434
1435	/*
1436	Return the unicode codepoint for the next multi-byte CH_UNIX character
1437	in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1438
1439	Also return the number of bytes consumed (which tells the caller
1440	how many bytes to skip to get to the next CH_UNIX character).
1441
1442	Return INVALID_CODEPOINT if the next character cannot be converted.
1443	*/
1444
1445	codepoint_t next_codepoint(const char str, size_t size)
1446	{
1447	/* It cannot occupy more than 4 bytes in UTF16 format */
1448	uint8_t buf[4];
1449	smb_iconv_t descriptor;
1450	#ifdef __OS2__
1451	size_t ilen_max;
1452	size_t olen_orig;
1453	const char *inbuf;
1454	#endif
1455	size_t ilen_orig;
1456	size_t ilen;
1457	size_t olen;
1458
1459	char *outbuf;
1460
1461	#ifdef __OS2__
1462	*size = 1;
1463	#endif
1464
1465	if ((str[0] & 0x80) == 0) {
1466	#ifndef __OS2__
1467	*size = 1;
1468	#endif
1469	return (codepoint_t)str[0];
1470	}
1471
1472	lazy_initialize_conv();
1473
1474	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1475	if (descriptor == (smb_iconv_t)-1 \|\| descriptor == (smb_iconv_t)0) {
1476	#ifndef __OS2__
1477	*size = 1;
1478	#endif
1479	return INVALID_CODEPOINT;
1480	}
1481	#ifdef __OS2__
1482	/* We assume that no multi-byte character can take
1483	more than 5 bytes. This is OK as we only
1484	support codepoints up to 1M */
1485
1486	ilen_max = strnlen( str, 5 );
1487	#else
1488	*size = 1;
1489	#endif
1490	ilen_orig = 1;
1491	olen_orig = 2;
1492	while( 1 )
1493	{
1494	ilen = ilen_orig;
1495	olen = olen_orig;
1496	inbuf = str;
1497	outbuf = ( char * )buf;
1498	if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 )
1499	break;
1500
1501	switch( errno )
1502	{
1503	case E2BIG :
1504	if( olen_orig == 2 )
1505	olen_orig = 4;
1506	else
1507	return INVALID_CODEPOINT;
1508	break;
1509
1510
1511	case EINVAL :
1512	#ifndef __OS2__
1513	/* We assume that no multi-byte character can take
1514	more than 5 bytes. This is OK as we only
1515	support codepoints up to 1M */
1516	if( ilen_orig < 5 )
1517	#else
1518	if( ilen_orig < ilen_max )
1519	#endif
1520	ilen_orig++;
1521	else
1522	return INVALID_CODEPOINT;
1523	break;
1524
1525	case EILSEQ :
1526	default :
1527	return INVALID_CODEPOINT;
1528	}
1529	}
1530
1531	olen = olen_orig - olen;
1532
1533	*size = ilen_orig - ilen;
1534
1535	if (olen == 2) {
1536	/* 2 byte, UTF16-LE encoded value. */
1537	return (codepoint_t)SVAL(buf, 0);
1538	}
1539	if (olen == 4) {
1540	/* Decode a 4 byte UTF16-LE character manually.
1541	See RFC2871 for the encoding machanism.
1542	*/
1543	codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1544	codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1545
1546	return (codepoint_t)0x10000 +
1547	(w1 << 10) + w2;
1548	}
1549
1550	/* no other length is valid */
1551	return INVALID_CODEPOINT;
1552	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: