source: branches/samba-3.0/source/lib/iconv.c@ 367

Last change on this file since 367 was 337, checked in by Herwig Bauernfeind, 16 years ago

divers fix for Ticket #68 in 3.0

File size: 19.1 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 minimal iconv implementation
4 Copyright (C) Andrew Tridgell 2001
5 Copyright (C) Jelmer Vernooij 2002,2003
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20*/
21
22#include "includes.h"
23
24/*
25 * We have to use strcasecmp here as the character conversions
26 * haven't been initialised yet. JRA.
27 */
28
29#undef strcasecmp
30
31/**
32 * @file
33 *
34 * @brief Samba wrapper/stub for iconv character set conversion.
35 *
36 * iconv is the XPG2 interface for converting between character
37 * encodings. This file provides a Samba wrapper around it, and also
38 * a simple reimplementation that is used if the system does not
39 * implement iconv.
40 *
41 * Samba only works with encodings that are supersets of ASCII: ascii
42 * characters like whitespace can be tested for directly, multibyte
43 * sequences start with a byte with the high bit set, and strings are
44 * terminated by a nul byte.
45 *
46 * Note that the only function provided by iconv is conversion between
47 * characters. It doesn't directly support operations like
48 * uppercasing or comparison. We have to convert to UCS-2 and compare
49 * there.
50 *
51 * @sa Samba Developers Guide
52 **/
53
54static_decl_charset;
55
56static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
57static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
58static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
59static size_t utf8_pull(void *,const char **, size_t *, char **, size_t *);
60static size_t utf8_push(void *,const char **, size_t *, char **, size_t *);
61static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
62static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
63static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
64static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
65
66static struct charset_functions builtin_functions[] = {
67 /* windows is really neither UCS-2 not UTF-16 */
68 {"UCS-2LE", iconv_copy, iconv_copy},
69 {"UTF-16LE", iconv_copy, iconv_copy},
70 {"UCS-2BE", iconv_swab, iconv_swab},
71 {"UTF-16BE", iconv_swab, iconv_swab},
72
73 /* we include the UTF-8 alias to cope with differing locale settings */
74 {"UTF8", utf8_pull, utf8_push},
75 {"UTF-8", utf8_pull, utf8_push},
76 {"ASCII", ascii_pull, ascii_push},
77 {"646", ascii_pull, ascii_push},
78 {"ISO-8859-1", ascii_pull, latin1_push},
79 {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
80 {NULL, NULL, NULL}
81};
82
83static struct charset_functions *charsets = NULL;
84
85static struct charset_functions *find_charset_functions(const char *name)
86{
87 struct charset_functions *c = charsets;
88
89 while(c) {
90 if (strcasecmp(name, c->name) == 0) {
91 return c;
92 }
93 c = c->next;
94 }
95
96 return NULL;
97}
98
99NTSTATUS smb_register_charset(struct charset_functions *funcs)
100{
101 if (!funcs) {
102 return NT_STATUS_INVALID_PARAMETER;
103 }
104
105 DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
106 /* Check whether we already have this charset... */
107 if (find_charset_functions(funcs->name)) {
108 DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
109 return NT_STATUS_OBJECT_NAME_COLLISION;
110 }
111
112 funcs->next = funcs->prev = NULL;
113 DEBUG(5, ("Registered charset %s\n", funcs->name));
114 DLIST_ADD(charsets, funcs);
115 return NT_STATUS_OK;
116}
117
118static void lazy_initialize_iconv(void)
119{
120 static BOOL initialized;
121 int i;
122
123 if (!initialized) {
124 initialized = True;
125 for(i = 0; builtin_functions[i].name; i++)
126 smb_register_charset(&builtin_functions[i]);
127 static_init_charset;
128 }
129}
130
131#ifdef __OS2__
132// i could have done a static variable w/o this function. but i feel it's nicer this way. SCS
133// the purpose of this function is to save the to_name to get the korean and japanese code set working
134char * save_toname(char *toname, bool what)
135{
136 static char *to_name=NULL;
137
138 if ( what == 0 )
139 to_name = SMB_STRDUP(toname);
140
141 return to_name;
142}
143#endif
144
145#ifdef HAVE_NATIVE_ICONV
146/* if there was an error then reset the internal state,
147 this ensures that we don't have a shift state remaining for
148 character sets like SJIS */
149static size_t sys_iconv(void *cd,
150 const char **inbuf, size_t *inbytesleft,
151 char **outbuf, size_t *outbytesleft)
152{
153#ifdef __OS2__
154 uint16 *outbuf_uc = ( uint16 * )*outbuf;
155 char *to_name = save_toname(NULL, 1);
156#endif
157
158 size_t ret = iconv((iconv_t)cd,
159 (char **)inbuf, inbytesleft,
160 outbuf, outbytesleft);
161 if (ret == (size_t)-1) {
162 int saved_errno = errno;
163 iconv(cd, NULL, NULL, NULL, NULL);
164 errno = saved_errno;
165 }
166#ifdef __OS2__
167 /* Workaround for path separator on OS/2 */
168 else
169 {
170 if( (strstr(to_name, "949") != NULL) || /* Korean CP */
171 (strstr(to_name, "932") != NULL) || /* Japanese CP */
172 (strstr(to_name, "942") != NULL) || /* Japanese CP */
173 (strstr(to_name, "943") != NULL) ) /* Japanese CP */
174 {
175 while(( char * )outbuf_uc < *outbuf )
176 {
177 if( *outbuf_uc == 0x20a9 || /* Korean WON */
178 *outbuf_uc == 0x00a5 ) /* Japanese YEN */
179 *outbuf_uc = '\\';
180
181 outbuf_uc++;
182 }
183 }
184 }
185#endif
186 return ret;
187}
188#endif
189
190/**
191 * This is a simple portable iconv() implementaion.
192 *
193 * It only knows about a very small number of character sets - just
194 * enough that Samba works on systems that don't have iconv.
195 **/
196size_t smb_iconv(smb_iconv_t cd,
197 const char **inbuf, size_t *inbytesleft,
198 char **outbuf, size_t *outbytesleft)
199{
200 char cvtbuf[2048];
201 char *bufp = cvtbuf;
202 size_t bufsize;
203
204#ifdef __OS2__
205 save_toname(cd->to_name, 0);
206#endif
207
208 /* in many cases we can go direct */
209 if (cd->direct) {
210 return cd->direct(cd->cd_direct,
211 inbuf, inbytesleft, outbuf, outbytesleft);
212 }
213
214
215 /* otherwise we have to do it chunks at a time */
216 while (*inbytesleft > 0) {
217 bufp = cvtbuf;
218 bufsize = sizeof(cvtbuf);
219
220 if (cd->pull(cd->cd_pull,
221 inbuf, inbytesleft, &bufp, &bufsize) == -1
222 && errno != E2BIG)
223 return -1;
224
225 bufp = cvtbuf;
226 bufsize = sizeof(cvtbuf) - bufsize;
227
228 if (cd->push(cd->cd_push,
229 (const char **)&bufp, &bufsize,
230 outbuf, outbytesleft) == -1)
231 return -1;
232 }
233
234 return 0;
235}
236
237
238static BOOL is_utf16(const char *name)
239{
240 return strcasecmp(name, "UCS-2LE") == 0 ||
241 strcasecmp(name, "UTF-16LE") == 0;
242}
243
244/*
245 simple iconv_open() wrapper
246 */
247smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
248{
249 smb_iconv_t ret;
250 struct charset_functions *from, *to;
251
252 lazy_initialize_iconv();
253 from = charsets;
254 to = charsets;
255
256 ret = SMB_MALLOC_P(struct _smb_iconv_t);
257 if (!ret) {
258 errno = ENOMEM;
259 return (smb_iconv_t)-1;
260 }
261 memset(ret, 0, sizeof(struct _smb_iconv_t));
262
263 ret->from_name = SMB_STRDUP(fromcode);
264 ret->to_name = SMB_STRDUP(tocode);
265
266 /* check for the simplest null conversion */
267 if (strcasecmp(fromcode, tocode) == 0) {
268 ret->direct = iconv_copy;
269 return ret;
270 }
271
272 /* check if we have a builtin function for this conversion */
273 from = find_charset_functions(fromcode);
274 if(from)ret->pull = from->pull;
275
276 to = find_charset_functions(tocode);
277 if(to)ret->push = to->push;
278
279 /* check if we can use iconv for this conversion */
280#ifdef HAVE_NATIVE_ICONV
281 if (!ret->pull) {
282 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
283 if (ret->cd_pull == (iconv_t)-1)
284 ret->cd_pull = iconv_open("UCS-2LE", fromcode);
285 if (ret->cd_pull != (iconv_t)-1)
286 ret->pull = sys_iconv;
287 }
288
289 if (!ret->push) {
290 ret->cd_push = iconv_open(tocode, "UTF-16LE");
291 if (ret->cd_push == (iconv_t)-1)
292 ret->cd_push = iconv_open(tocode, "UCS-2LE");
293 if (ret->cd_push != (iconv_t)-1)
294 ret->push = sys_iconv;
295 }
296#endif
297
298 /* check if there is a module available that can do this conversion */
299 if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
300 if(!(from = find_charset_functions(fromcode)))
301 DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
302 else
303 ret->pull = from->pull;
304 }
305
306 if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
307 if(!(to = find_charset_functions(tocode)))
308 DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
309 else
310 ret->push = to->push;
311 }
312
313 if (!ret->push || !ret->pull) {
314 SAFE_FREE(ret->from_name);
315 SAFE_FREE(ret->to_name);
316 SAFE_FREE(ret);
317 errno = EINVAL;
318 return (smb_iconv_t)-1;
319 }
320
321 /* check for conversion to/from ucs2 */
322 if (is_utf16(fromcode) && to) {
323 ret->direct = to->push;
324 ret->push = ret->pull = NULL;
325 return ret;
326 }
327
328 if (is_utf16(tocode) && from) {
329 ret->direct = from->pull;
330 ret->push = ret->pull = NULL;
331 return ret;
332 }
333
334 /* Check if we can do the conversion direct */
335#ifdef HAVE_NATIVE_ICONV
336 if (is_utf16(fromcode)) {
337 ret->direct = sys_iconv;
338 ret->cd_direct = ret->cd_push;
339 ret->cd_push = NULL;
340 return ret;
341 }
342 if (is_utf16(tocode)) {
343 ret->direct = sys_iconv;
344 ret->cd_direct = ret->cd_pull;
345 ret->cd_pull = NULL;
346 return ret;
347 }
348#endif
349
350 return ret;
351}
352
353/*
354 simple iconv_close() wrapper
355*/
356int smb_iconv_close (smb_iconv_t cd)
357{
358#ifdef HAVE_NATIVE_ICONV
359 if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
360 if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
361 if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
362#endif
363
364 SAFE_FREE(cd->from_name);
365 SAFE_FREE(cd->to_name);
366
367 memset(cd, 0, sizeof(*cd));
368 SAFE_FREE(cd);
369 return 0;
370}
371
372
373/**********************************************************************
374 the following functions implement the builtin character sets in Samba
375 and also the "test" character sets that are designed to test
376 multi-byte character set support for english users
377***********************************************************************/
378
379static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
380 char **outbuf, size_t *outbytesleft)
381{
382 while (*inbytesleft >= 1 && *outbytesleft >= 2) {
383 (*outbuf)[0] = (*inbuf)[0];
384 (*outbuf)[1] = 0;
385 (*inbytesleft) -= 1;
386 (*outbytesleft) -= 2;
387 (*inbuf) += 1;
388 (*outbuf) += 2;
389 }
390
391 if (*inbytesleft > 0) {
392 errno = E2BIG;
393 return -1;
394 }
395
396 return 0;
397}
398
399static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
400 char **outbuf, size_t *outbytesleft)
401{
402 int ir_count=0;
403
404 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
405 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
406 if ((*inbuf)[1]) ir_count++;
407 (*inbytesleft) -= 2;
408 (*outbytesleft) -= 1;
409 (*inbuf) += 2;
410 (*outbuf) += 1;
411 }
412
413 if (*inbytesleft == 1) {
414 errno = EINVAL;
415 return -1;
416 }
417
418 if (*inbytesleft > 1) {
419 errno = E2BIG;
420 return -1;
421 }
422
423 return ir_count;
424}
425
426static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
427 char **outbuf, size_t *outbytesleft)
428{
429 int ir_count=0;
430
431 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
432 (*outbuf)[0] = (*inbuf)[0];
433 if ((*inbuf)[1]) ir_count++;
434 (*inbytesleft) -= 2;
435 (*outbytesleft) -= 1;
436 (*inbuf) += 2;
437 (*outbuf) += 1;
438 }
439
440 if (*inbytesleft == 1) {
441 errno = EINVAL;
442 return -1;
443 }
444
445 if (*inbytesleft > 1) {
446 errno = E2BIG;
447 return -1;
448 }
449
450 return ir_count;
451}
452
453static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
454 char **outbuf, size_t *outbytesleft)
455{
456 while (*inbytesleft >= 1 && *outbytesleft >= 2) {
457 unsigned v;
458
459 if ((*inbuf)[0] != '@') {
460 /* seven bit ascii case */
461 (*outbuf)[0] = (*inbuf)[0];
462 (*outbuf)[1] = 0;
463 (*inbytesleft) -= 1;
464 (*outbytesleft) -= 2;
465 (*inbuf) += 1;
466 (*outbuf) += 2;
467 continue;
468 }
469 /* it's a hex character */
470 if (*inbytesleft < 5) {
471 errno = EINVAL;
472 return -1;
473 }
474
475 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
476 errno = EILSEQ;
477 return -1;
478 }
479
480 (*outbuf)[0] = v&0xff;
481 (*outbuf)[1] = v>>8;
482 (*inbytesleft) -= 5;
483 (*outbytesleft) -= 2;
484 (*inbuf) += 5;
485 (*outbuf) += 2;
486 }
487
488 if (*inbytesleft > 0) {
489 errno = E2BIG;
490 return -1;
491 }
492
493 return 0;
494}
495
496static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
497 char **outbuf, size_t *outbytesleft)
498{
499 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
500 char buf[6];
501
502 if ((*inbuf)[1] == 0 &&
503 ((*inbuf)[0] & 0x80) == 0 &&
504 (*inbuf)[0] != '@') {
505 (*outbuf)[0] = (*inbuf)[0];
506 (*inbytesleft) -= 2;
507 (*outbytesleft) -= 1;
508 (*inbuf) += 2;
509 (*outbuf) += 1;
510 continue;
511 }
512 if (*outbytesleft < 5) {
513 errno = E2BIG;
514 return -1;
515 }
516 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
517 memcpy(*outbuf, buf, 5);
518 (*inbytesleft) -= 2;
519 (*outbytesleft) -= 5;
520 (*inbuf) += 2;
521 (*outbuf) += 5;
522 }
523
524 if (*inbytesleft == 1) {
525 errno = EINVAL;
526 return -1;
527 }
528
529 if (*inbytesleft > 1) {
530 errno = E2BIG;
531 return -1;
532 }
533
534 return 0;
535}
536
537static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
538 char **outbuf, size_t *outbytesleft)
539{
540 int n;
541
542 n = MIN(*inbytesleft, *outbytesleft);
543
544 swab(*inbuf, *outbuf, (n&~1));
545 if (n&1) {
546 (*outbuf)[n-1] = 0;
547 }
548
549 (*inbytesleft) -= n;
550 (*outbytesleft) -= n;
551 (*inbuf) += n;
552 (*outbuf) += n;
553
554 if (*inbytesleft > 0) {
555 errno = E2BIG;
556 return -1;
557 }
558
559 return 0;
560}
561
562static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
563 char **outbuf, size_t *outbytesleft)
564{
565 int n;
566
567 n = MIN(*inbytesleft, *outbytesleft);
568
569 memmove(*outbuf, *inbuf, n);
570
571 (*inbytesleft) -= n;
572 (*outbytesleft) -= n;
573 (*inbuf) += n;
574 (*outbuf) += n;
575
576 if (*inbytesleft > 0) {
577 errno = E2BIG;
578 return -1;
579 }
580
581 return 0;
582}
583
584static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
585 char **outbuf, size_t *outbytesleft)
586{
587 size_t in_left=*inbytesleft, out_left=*outbytesleft;
588 const uint8 *c = (const uint8 *)*inbuf;
589 uint8 *uc = (uint8 *)*outbuf;
590
591 while (in_left >= 1 && out_left >= 2) {
592 unsigned int codepoint;
593
594 if ((c[0] & 0x80) == 0) {
595 uc[0] = c[0];
596 uc[1] = 0;
597 c += 1;
598 in_left -= 1;
599 out_left -= 2;
600 uc += 2;
601 continue;
602 }
603
604 if ((c[0] & 0xe0) == 0xc0) {
605 if (in_left < 2 ||
606 (c[1] & 0xc0) != 0x80) {
607 errno = EILSEQ;
608 goto error;
609 }
610 codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
611 if (codepoint < 0x80) {
612 /* don't accept UTF-8 characters that are not minimally packed */
613 errno = EILSEQ;
614 goto error;
615 }
616 uc[1] = codepoint >> 8;
617 uc[0] = codepoint & 0xff;
618 c += 2;
619 in_left -= 2;
620 out_left -= 2;
621 uc += 2;
622 continue;
623 }
624
625 if ((c[0] & 0xf0) == 0xe0) {
626 if (in_left < 3 ||
627 (c[1] & 0xc0) != 0x80 ||
628 (c[2] & 0xc0) != 0x80) {
629 errno = EILSEQ;
630 goto error;
631 }
632 codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
633 if (codepoint < 0x800) {
634 /* don't accept UTF-8 characters that are not minimally packed */
635 errno = EILSEQ;
636 goto error;
637 }
638 uc[1] = codepoint >> 8;
639 uc[0] = codepoint & 0xff;
640 c += 3;
641 in_left -= 3;
642 out_left -= 2;
643 uc += 2;
644 continue;
645 }
646
647 if ((c[0] & 0xf8) == 0xf0) {
648 if (in_left < 4 ||
649 (c[1] & 0xc0) != 0x80 ||
650 (c[2] & 0xc0) != 0x80 ||
651 (c[3] & 0xc0) != 0x80) {
652 errno = EILSEQ;
653 goto error;
654 }
655 codepoint =
656 (c[3]&0x3f) |
657 ((c[2]&0x3f)<<6) |
658 ((c[1]&0x3f)<<12) |
659 ((c[0]&0x7)<<18);
660 if (codepoint < 0x10000 || codepoint > 0x10ffff) {
661 /* don't accept UTF-8 characters that are not minimally packed */
662 errno = EILSEQ;
663 goto error;
664 }
665
666 codepoint -= 0x10000;
667
668 if (out_left < 4) {
669 errno = E2BIG;
670 goto error;
671 }
672
673 uc[0] = (codepoint>>10) & 0xFF;
674 uc[1] = (codepoint>>18) | 0xd8;
675 uc[2] = codepoint & 0xFF;
676 uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
677 c += 4;
678 in_left -= 4;
679 out_left -= 4;
680 uc += 4;
681 continue;
682 }
683
684 /* we don't handle 5 byte sequences */
685 errno = EINVAL;
686 goto error;
687 }
688
689 if (in_left > 0) {
690 errno = E2BIG;
691 goto error;
692 }
693
694 *inbytesleft = in_left;
695 *outbytesleft = out_left;
696 *inbuf = (char *)c;
697 *outbuf = (char *)uc;
698 return 0;
699
700error:
701 *inbytesleft = in_left;
702 *outbytesleft = out_left;
703 *inbuf = (char *)c;
704 *outbuf = (char *)uc;
705 return -1;
706}
707
708static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
709 char **outbuf, size_t *outbytesleft)
710{
711 size_t in_left=*inbytesleft, out_left=*outbytesleft;
712 uint8 *c = (uint8 *)*outbuf;
713 const uint8 *uc = (const uint8 *)*inbuf;
714
715 while (in_left >= 2 && out_left >= 1) {
716 unsigned int codepoint;
717
718 if (uc[1] == 0 && !(uc[0] & 0x80)) {
719 /* simplest case */
720 c[0] = uc[0];
721 in_left -= 2;
722 out_left -= 1;
723 uc += 2;
724 c += 1;
725 continue;
726 }
727
728 if ((uc[1]&0xf8) == 0) {
729 /* next simplest case */
730 if (out_left < 2) {
731 errno = E2BIG;
732 goto error;
733 }
734 c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
735 c[1] = 0x80 | (uc[0] & 0x3f);
736 in_left -= 2;
737 out_left -= 2;
738 uc += 2;
739 c += 2;
740 continue;
741 }
742
743 if ((uc[1] & 0xfc) == 0xdc) {
744 /* its the second part of a 4 byte sequence. Illegal */
745 if (in_left < 4) {
746 errno = EINVAL;
747 } else {
748 errno = EILSEQ;
749 }
750 goto error;
751 }
752
753 if ((uc[1] & 0xfc) != 0xd8) {
754 codepoint = uc[0] | (uc[1]<<8);
755 if (out_left < 3) {
756 errno = E2BIG;
757 goto error;
758 }
759 c[0] = 0xe0 | (codepoint >> 12);
760 c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
761 c[2] = 0x80 | (codepoint & 0x3f);
762
763 in_left -= 2;
764 out_left -= 3;
765 uc += 2;
766 c += 3;
767 continue;
768 }
769
770 /* its the first part of a 4 byte sequence */
771 if (in_left < 4) {
772 errno = EINVAL;
773 goto error;
774 }
775 if ((uc[3] & 0xfc) != 0xdc) {
776 errno = EILSEQ;
777 goto error;
778 }
779 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
780 (uc[0]<<10) | ((uc[1] & 0x3)<<18));
781
782 if (out_left < 4) {
783 errno = E2BIG;
784 goto error;
785 }
786 c[0] = 0xf0 | (codepoint >> 18);
787 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
788 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
789 c[3] = 0x80 | (codepoint & 0x3f);
790
791 in_left -= 4;
792 out_left -= 4;
793 uc += 4;
794 c += 4;
795 }
796
797 if (in_left == 1) {
798 errno = EINVAL;
799 goto error;
800 }
801
802 if (in_left > 1) {
803 errno = E2BIG;
804 goto error;
805 }
806
807 *inbytesleft = in_left;
808 *outbytesleft = out_left;
809 *inbuf = (char *)uc;
810 *outbuf = (char *)c;
811
812 return 0;
813
814error:
815 *inbytesleft = in_left;
816 *outbytesleft = out_left;
817 *inbuf = (char *)uc;
818 *outbuf = (char *)c;
819 return -1;
820}
821
Note: See TracBrowser for help on using the repository browser.