Changeset 124 for branches/samba-3.0/source/lib/charcnv.c
- Timestamp:
- Mar 12, 2008, 9:08:18 AM (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/samba-3.0/source/lib/charcnv.c
r120 r124 28 28 char lp_failed_convert_char(void) 29 29 { 30 30 return '_'; 31 31 } 32 32 … … 55 55 static const char *charset_name(charset_t ch) 56 56 { 57 58 59 60 61 62 63 64 57 const char *ret = NULL; 58 59 if (ch == CH_UTF16LE) ret = "UTF-16LE"; 60 else if (ch == CH_UTF16BE) ret = "UTF-16BE"; 61 else if (ch == CH_UNIX) ret = lp_unix_charset(); 62 else if (ch == CH_DOS) ret = lp_dos_charset(); 63 else if (ch == CH_DISPLAY) ret = lp_display_charset(); 64 else if (ch == CH_UTF8) ret = "UTF8"; 65 65 66 66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET) 67 68 67 if (ret && !strcmp(ret, "LOCALE")) { 68 const char *ln = NULL; 69 69 70 70 #ifdef HAVE_SETLOCALE 71 71 setlocale(LC_ALL, ""); 72 72 #endif 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 73 ln = nl_langinfo(CODESET); 74 if (ln) { 75 /* Check whether the charset name is supported 76 by iconv */ 77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE"); 78 if (handle == (smb_iconv_t) -1) { 79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); 80 ln = NULL; 81 } else { 82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); 83 smb_iconv_close(handle); 84 } 85 } 86 ret = ln; 87 } 88 88 #endif 89 89 90 91 90 if (!ret || !*ret) ret = "ASCII"; 91 return ret; 92 92 } 93 93 94 94 void lazy_initialize_conv(void) 95 95 { 96 97 98 99 100 101 102 96 static int initialized = False; 97 98 if (!initialized) { 99 initialized = True; 100 load_case_tables(); 101 init_iconv(); 102 } 103 103 } 104 104 … … 108 108 void gfree_charcnv(void) 109 109 { 110 111 112 113 114 115 116 117 118 119 110 int c1, c2; 111 112 for (c1=0;c1<NUM_CHARSETS;c1++) { 113 for (c2=0;c2<NUM_CHARSETS;c2++) { 114 if ( conv_handles[c1][c2] ) { 115 smb_iconv_close( conv_handles[c1][c2] ); 116 conv_handles[c1][c2] = 0; 117 } 118 } 119 } 120 120 } 121 121 … … 129 129 void init_iconv(void) 130 130 { 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 131 int c1, c2; 132 BOOL did_reload = False; 133 134 /* so that charset_name() works we need to get the UNIX<->UCS2 going 135 first */ 136 if (!conv_handles[CH_UNIX][CH_UTF16LE]) 137 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); 138 139 if (!conv_handles[CH_UTF16LE][CH_UNIX]) 140 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); 141 142 for (c1=0;c1<NUM_CHARSETS;c1++) { 143 for (c2=0;c2<NUM_CHARSETS;c2++) { 144 const char *n1 = charset_name((charset_t)c1); 145 const char *n2 = charset_name((charset_t)c2); 146 if (conv_handles[c1][c2] && 147 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && 148 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) 149 continue; 150 151 did_reload = True; 152 153 if (conv_handles[c1][c2]) 154 smb_iconv_close(conv_handles[c1][c2]); 155 156 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 157 if (conv_handles[c1][c2] == (smb_iconv_t)-1) { 158 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", 159 charset_name((charset_t)c1), charset_name((charset_t)c2))); 160 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { 161 n1 = "ASCII"; 162 } 163 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { 164 n2 = "ASCII"; 165 } 166 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", 167 n1, n2 )); 168 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 169 if (!conv_handles[c1][c2]) { 170 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); 171 smb_panic("init_iconv: conv_handle initialization failed."); 172 } 173 } 174 } 175 } 176 177 if (did_reload) { 178 /* XXX: Does this really get called every time the dos 179 * codepage changes? */ 180 /* XXX: Is the did_reload test too strict? */ 181 conv_silent = True; 182 init_doschar_table(); 183 init_valid_table(); 184 conv_silent = False; 185 } 186 186 } 187 187 … … 202 202 203 203 static size_t convert_string_internal(charset_t from, charset_t to, 204 205 206 { 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 204 void const *src, size_t srclen, 205 void *dest, size_t destlen, BOOL allow_bad_conv) 206 { 207 size_t i_len, o_len; 208 size_t retval; 209 const char* inbuf = (const char*)src; 210 char* outbuf = (char*)dest; 211 smb_iconv_t descriptor; 212 213 lazy_initialize_conv(); 214 215 descriptor = conv_handles[from][to]; 216 217 if (srclen == (size_t)-1) { 218 if (from == CH_UTF16LE || from == CH_UTF16BE) { 219 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; 220 } else { 221 srclen = strlen((const char *)src)+1; 222 } 223 } 224 225 226 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 227 if (!conv_silent) 228 DEBUG(0,("convert_string_internal: Conversion not supported.\n")); 229 return (size_t)-1; 230 } 231 232 i_len=srclen; 233 o_len=destlen; 234 234 235 235 again: 236 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 237 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); 238 if(retval==(size_t)-1) { 239 const char *reason="unknown error"; 240 switch(errno) { 241 case EINVAL: 242 reason="Incomplete multibyte sequence"; 243 if (!conv_silent) 244 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 245 if (allow_bad_conv) 246 goto use_as_is; 247 break; 248 case E2BIG: 249 reason="No more room"; 250 if (!conv_silent) { 251 if (from == CH_UNIX) { 252 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", 253 charset_name(from), charset_name(to), 254 (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); 255 } else { 256 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", 257 charset_name(from), charset_name(to), 258 (unsigned int)srclen, (unsigned int)destlen)); 259 } 260 } 261 break; 262 case EILSEQ: 263 reason="Illegal multibyte sequence"; 264 if (!conv_silent) 265 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 266 if (allow_bad_conv) 267 goto use_as_is; 268 break; 269 default: 270 if (!conv_silent) 271 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 272 break; 273 } 274 /* smb_panic(reason); */ 275 } 276 return destlen-o_len; 277 277 278 278 use_as_is: 279 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 280 /* 281 * Conversion not supported. This is actually an error, but there are so 282 * many misconfigured iconv systems and smb.conf's out there we can't just 283 * fail. Do a very bad conversion instead.... JRA. 284 */ 285 286 { 287 if (o_len == 0 || i_len == 0) 288 return destlen - o_len; 289 290 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 291 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 292 /* Can't convert from utf16 any endian to multibyte. 293 Replace with the default fail char. 294 */ 295 if (i_len < 2) 296 return destlen - o_len; 297 if (i_len >= 2) { 298 *outbuf = lp_failed_convert_char(); 299 300 outbuf++; 301 o_len--; 302 303 inbuf += 2; 304 i_len -= 2; 305 } 306 307 if (o_len == 0 || i_len == 0) 308 return destlen - o_len; 309 310 /* Keep trying with the next char... */ 311 goto again; 312 313 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 314 /* Can't convert to UTF16LE - just widen by adding the 315 default fail char then zero. 316 */ 317 if (o_len < 2) 318 return destlen - o_len; 319 320 outbuf[0] = lp_failed_convert_char(); 321 outbuf[1] = '\0'; 322 323 inbuf++; 324 i_len--; 325 326 outbuf += 2; 327 o_len -= 2; 328 329 if (o_len == 0 || i_len == 0) 330 return destlen - o_len; 331 332 /* Keep trying with the next char... */ 333 goto again; 334 335 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 336 to != CH_UTF16LE && to != CH_UTF16BE) { 337 /* Failed multibyte to multibyte. Just copy the default fail char and 338 try again. */ 339 outbuf[0] = lp_failed_convert_char(); 340 341 inbuf++; 342 i_len--; 343 344 outbuf++; 345 o_len--; 346 347 if (o_len == 0 || i_len == 0) 348 return destlen - o_len; 349 350 /* Keep trying with the next char... */ 351 goto again; 352 353 } else { 354 /* Keep compiler happy.... */ 355 return destlen - o_len; 356 } 357 } 358 358 } 359 359 … … 376 376 377 377 size_t convert_string(charset_t from, charset_t to, 378 379 380 { 381 382 383 384 385 378 void const *src, size_t srclen, 379 void *dest, size_t destlen, BOOL allow_bad_conv) 380 { 381 /* 382 * NB. We deliberately don't do a strlen here if srclen == -1. 383 * This is very expensive over millions of calls and is taken 384 * care of in the slow path in convert_string_internal. JRA. 385 */ 386 386 387 387 #ifdef DEVELOPER 388 388 SMB_ASSERT(destlen != (size_t)-1); 389 389 #endif 390 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 391 if (srclen == 0) 392 return 0; 393 394 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) { 395 const unsigned char *p = (const unsigned char *)src; 396 unsigned char *q = (unsigned char *)dest; 397 size_t slen = srclen; 398 size_t dlen = destlen; 399 unsigned char lastp = '\0'; 400 size_t retval = 0; 401 402 /* If all characters are ascii, fast path here. */ 403 while (slen && dlen) { 404 if ((lastp = *p) <= 0x7f) { 405 *q++ = *p++; 406 if (slen != (size_t)-1) { 407 slen--; 408 } 409 dlen--; 410 retval++; 411 if (!lastp) 412 break; 413 } else { 414 414 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 415 415 goto general_case; 416 416 #else 417 417 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 418 418 #endif 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 419 } 420 } 421 if (!dlen) { 422 /* Even if we fast path we should note if we ran out of room. */ 423 if (((slen != (size_t)-1) && slen) || 424 ((slen == (size_t)-1) && lastp)) { 425 errno = E2BIG; 426 } 427 } 428 return retval; 429 } else if (from == CH_UTF16LE && to != CH_UTF16LE) { 430 const unsigned char *p = (const unsigned char *)src; 431 unsigned char *q = (unsigned char *)dest; 432 size_t retval = 0; 433 size_t slen = srclen; 434 size_t dlen = destlen; 435 unsigned char lastp = '\0'; 436 437 /* If all characters are ascii, fast path here. */ 438 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) { 439 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { 440 *q++ = *p; 441 if (slen != (size_t)-1) { 442 slen -= 2; 443 } 444 p += 2; 445 dlen--; 446 retval++; 447 if (!lastp) 448 break; 449 } else { 450 450 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 451 451 goto general_case; 452 452 #else 453 453 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 454 454 #endif 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 455 } 456 } 457 if (!dlen) { 458 /* Even if we fast path we should note if we ran out of room. */ 459 if (((slen != (size_t)-1) && slen) || 460 ((slen == (size_t)-1) && lastp)) { 461 errno = E2BIG; 462 } 463 } 464 return retval; 465 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 466 const unsigned char *p = (const unsigned char *)src; 467 unsigned char *q = (unsigned char *)dest; 468 size_t retval = 0; 469 size_t slen = srclen; 470 size_t dlen = destlen; 471 unsigned char lastp = '\0'; 472 473 /* If all characters are ascii, fast path here. */ 474 while (slen && (dlen >= 2)) { 475 if ((lastp = *p) <= 0x7F) { 476 *q++ = *p++; 477 *q++ = '\0'; 478 if (slen != (size_t)-1) { 479 slen--; 480 } 481 dlen -= 2; 482 retval += 2; 483 if (!lastp) 484 break; 485 } else { 486 486 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 487 487 goto general_case; 488 488 #else 489 489 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 490 490 #endif 491 492 493 494 495 496 497 498 499 500 501 491 } 492 } 493 if (!dlen) { 494 /* Even if we fast path we should note if we ran out of room. */ 495 if (((slen != (size_t)-1) && slen) || 496 ((slen == (size_t)-1) && lastp)) { 497 errno = E2BIG; 498 } 499 } 500 return retval; 501 } 502 502 503 503 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 504 504 general_case: 505 505 #endif 506 506 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv); 507 507 } 508 508 … … 524 524 525 525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to, 526 527 { 528 size_t i_len, o_len, destlen = MAX(srclen, 512);529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 526 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv) 527 { 528 size_t i_len, o_len, destlen = (srclen * 3) / 2; 529 size_t retval; 530 const char *inbuf = (const char *)src; 531 char *outbuf = NULL, *ob = NULL; 532 smb_iconv_t descriptor; 533 void **dest = (void **)dst; 534 535 *dest = NULL; 536 537 if (src == NULL || srclen == (size_t)-1) 538 return (size_t)-1; 539 if (srclen == 0) 540 return 0; 541 542 lazy_initialize_conv(); 543 544 descriptor = conv_handles[from][to]; 545 546 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 547 if (!conv_silent) 548 DEBUG(0,("convert_string_allocate: Conversion not supported.\n")); 549 return (size_t)-1; 550 } 551 551 552 552 convert: 553 553 554 if ((destlen*2) < destlen) { 555 /* wrapped ! abort. */ 556 if (!conv_silent) 557 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n")); 558 if (!ctx) 559 SAFE_FREE(outbuf); 560 return (size_t)-1; 561 } else { 562 destlen = destlen * 2; 563 } 564 565 if (ctx) { 566 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen); 567 } else { 568 ob = (char *)SMB_REALLOC(ob, destlen); 569 } 570 571 if (!ob) { 572 DEBUG(0, ("convert_string_allocate: realloc failed!\n")); 573 return (size_t)-1; 574 } 575 outbuf = ob; 576 i_len = srclen; 577 o_len = destlen; 554 /* +2 is for ucs2 null termination. */ 555 if ((destlen*2)+2 < destlen) { 556 /* wrapped ! abort. */ 557 if (!conv_silent) 558 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n")); 559 if (!ctx) 560 SAFE_FREE(outbuf); 561 return (size_t)-1; 562 } else { 563 destlen = destlen * 2; 564 } 565 566 /* +2 is for ucs2 null termination. */ 567 if (ctx) { 568 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2); 569 } else { 570 ob = (char *)SMB_REALLOC(ob, destlen + 2); 571 } 572 573 if (!ob) { 574 DEBUG(0, ("convert_string_allocate: realloc failed!\n")); 575 return (size_t)-1; 576 } 577 outbuf = ob; 578 i_len = srclen; 579 o_len = destlen; 578 580 579 581 again: 580 582 581 582 583 584 if(retval == (size_t)-1){585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 583 retval = smb_iconv(descriptor, 584 &inbuf, &i_len, 585 &outbuf, &o_len); 586 if(retval == (size_t)-1) { 587 const char *reason="unknown error"; 588 switch(errno) { 589 case EINVAL: 590 reason="Incomplete multibyte sequence"; 591 if (!conv_silent) 592 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 593 if (allow_bad_conv) 594 goto use_as_is; 595 break; 596 case E2BIG: 597 goto convert; 598 case EILSEQ: 599 reason="Illegal multibyte sequence"; 600 if (!conv_silent) 601 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 602 if (allow_bad_conv) 603 goto use_as_is; 604 break; 605 } 606 if (!conv_silent) 607 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf)); 608 /* smb_panic(reason); */ 609 return (size_t)-1; 610 } 609 611 610 612 out: 611 613 612 destlen = destlen - o_len; 613 if (ctx) { 614 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen); 615 } else { 616 ob = (char *)SMB_REALLOC(ob,destlen); 617 } 618 619 if (destlen && !ob) { 620 DEBUG(0, ("convert_string_allocate: out of memory!\n")); 621 return (size_t)-1; 622 } 623 624 *dest = ob; 625 return destlen; 614 destlen = destlen - o_len; 615 if (ctx) { 616 /* We're shrinking here so we know the +2 is safe from wrap. */ 617 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2); 618 } else { 619 ob = (char *)SMB_REALLOC(ob,destlen + 2); 620 } 621 622 if (destlen && !ob) { 623 DEBUG(0, ("convert_string_allocate: out of memory!\n")); 624 return (size_t)-1; 625 } 626 627 *dest = ob; 628 629 /* Must ucs2 null terminate in the extra space we allocated. */ 630 ob[destlen] = '\0'; 631 ob[destlen+1] = '\0'; 632 return destlen; 626 633 627 634 use_as_is: 628 635 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 636 /* 637 * Conversion not supported. This is actually an error, but there are so 638 * many misconfigured iconv systems and smb.conf's out there we can't just 639 * fail. Do a very bad conversion instead.... JRA. 640 */ 641 642 { 643 if (o_len == 0 || i_len == 0) 644 goto out; 645 646 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 647 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 648 /* Can't convert from utf16 any endian to multibyte. 649 Replace with the default fail char. 650 */ 651 652 if (i_len < 2) 653 goto out; 654 655 if (i_len >= 2) { 656 *outbuf = lp_failed_convert_char(); 657 658 outbuf++; 659 o_len--; 660 661 inbuf += 2; 662 i_len -= 2; 663 } 664 665 if (o_len == 0 || i_len == 0) 666 goto out; 667 668 /* Keep trying with the next char... */ 669 goto again; 670 671 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 672 /* Can't convert to UTF16LE - just widen by adding the 673 default fail char then zero. 674 */ 675 if (o_len < 2) 676 goto out; 677 678 outbuf[0] = lp_failed_convert_char(); 679 outbuf[1] = '\0'; 680 681 inbuf++; 682 i_len--; 683 684 outbuf += 2; 685 o_len -= 2; 686 687 if (o_len == 0 || i_len == 0) 688 goto out; 689 690 /* Keep trying with the next char... */ 691 goto again; 692 693 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 694 to != CH_UTF16LE && to != CH_UTF16BE) { 695 /* Failed multibyte to multibyte. Just copy the default fail char and 696 try again. */ 697 outbuf[0] = lp_failed_convert_char(); 698 699 inbuf++; 700 i_len--; 701 702 outbuf++; 703 o_len--; 704 705 if (o_len == 0 || i_len == 0) 706 goto out; 707 708 /* Keep trying with the next char... */ 709 goto again; 710 711 } else { 712 /* Keep compiler happy.... */ 713 goto out; 714 } 715 } 709 716 } 710 717 … … 719 726 **/ 720 727 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, 721 722 723 { 724 725 726 727 728 729 730 731 732 733 728 void const *src, size_t srclen, void *dst, 729 BOOL allow_bad_conv) 730 { 731 void **dest = (void **)dst; 732 size_t dest_len; 733 734 *dest = NULL; 735 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv); 736 if (dest_len == (size_t)-1) 737 return (size_t)-1; 738 if (*dest == NULL) 739 return (size_t)-1; 740 return dest_len; 734 741 } 735 742 736 743 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) 737 744 { 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 745 size_t size; 746 smb_ucs2_t *buffer; 747 748 size = push_ucs2_allocate(&buffer, src); 749 if (size == (size_t)-1) { 750 smb_panic("failed to create UCS2 buffer"); 751 } 752 if (!strupper_w(buffer) && (dest == src)) { 753 free(buffer); 754 return srclen; 755 } 756 757 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 758 free(buffer); 759 return size; 753 760 } 754 761 … … 760 767 char *strdup_upper(const char *s) 761 768 { 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 769 pstring out_buffer; 770 const unsigned char *p = (const unsigned char *)s; 771 unsigned char *q = (unsigned char *)out_buffer; 772 773 /* this is quite a common operation, so we want it to be 774 fast. We optimise for the ascii case, knowing that all our 775 supported multi-byte character sets are ascii-compatible 776 (ie. they match for the first 128 chars) */ 777 778 while (1) { 779 if (*p & 0x80) 780 break; 781 *q++ = toupper_ascii(*p); 782 if (!*p) 783 break; 784 p++; 785 if (p - ( const unsigned char *)s >= sizeof(pstring)) 786 break; 787 } 788 789 if (*p) { 790 /* MB case. */ 791 size_t size; 792 wpstring buffer; 793 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True); 794 if (size == (size_t)-1) { 795 return NULL; 796 } 797 798 strupper_w(buffer); 799 800 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True); 801 if (size == (size_t)-1) { 802 return NULL; 803 } 804 } 805 806 return SMB_STRDUP(out_buffer); 800 807 } 801 808 802 809 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) 803 810 { 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 811 size_t size; 812 smb_ucs2_t *buffer = NULL; 813 814 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen, 815 (void **)(void *)&buffer, True); 816 if (size == (size_t)-1 || !buffer) { 817 smb_panic("failed to create UCS2 buffer"); 818 } 819 if (!strlower_w(buffer) && (dest == src)) { 820 SAFE_FREE(buffer); 821 return srclen; 822 } 823 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 824 SAFE_FREE(buffer); 825 return size; 819 826 } 820 827 … … 825 832 char *strdup_lower(const char *s) 826 833 { 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 834 size_t size; 835 smb_ucs2_t *buffer = NULL; 836 char *out_buffer; 837 838 size = push_ucs2_allocate(&buffer, s); 839 if (size == -1 || !buffer) { 840 return NULL; 841 } 842 843 strlower_w(buffer); 844 845 size = pull_ucs2_allocate(&out_buffer, buffer); 846 SAFE_FREE(buffer); 847 848 if (size == (size_t)-1) { 849 return NULL; 850 } 851 852 return out_buffer; 846 853 } 847 854 848 855 static size_t ucs2_align(const void *base_ptr, const void *p, int flags) 849 856 { 850 851 852 857 if (flags & (STR_NOALIGN|STR_ASCII)) 858 return 0; 859 return PTR_DIFF(p, base_ptr) & 1; 853 860 } 854 861 … … 875 882 876 883 /* No longer allow a length of -1 */ 877 884 if (dest_len == (size_t)-1) 878 885 smb_panic("push_ascii - dest_len == -1"); 879 886 880 881 882 883 884 885 886 887 887 if (flags & STR_UPPER) { 888 pstrcpy(tmpbuf, src); 889 strupper_m(tmpbuf); 890 src = tmpbuf; 891 } 892 893 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) 894 src_len++; 888 895 889 896 ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True); … … 899 906 size_t push_ascii_fstring(void *dest, const char *src) 900 907 { 901 908 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE); 902 909 } 903 910 904 911 size_t push_ascii_pstring(void *dest, const char *src) 905 912 { 906 913 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE); 907 914 } 908 915 … … 914 921 size_t push_ascii_nstring(void *dest, const char *src) 915 922 { 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 923 size_t i, buffer_len, dest_len; 924 smb_ucs2_t *buffer; 925 926 conv_silent = True; 927 buffer_len = push_ucs2_allocate(&buffer, src); 928 if (buffer_len == (size_t)-1) { 929 smb_panic("failed to create UCS2 buffer"); 930 } 931 932 /* We're using buffer_len below to count ucs2 characters, not bytes. */ 933 buffer_len /= sizeof(smb_ucs2_t); 934 935 dest_len = 0; 936 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) { 937 unsigned char mb[10]; 938 /* Convert one smb_ucs2_t character at a time. */ 939 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False); 940 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) { 941 memcpy((char *)dest + dest_len, mb, mb_len); 942 dest_len += mb_len; 943 } else { 944 errno = E2BIG; 945 break; 946 } 947 } 948 ((char *)dest)[dest_len] = '\0'; 949 950 SAFE_FREE(buffer); 951 conv_silent = False; 952 return dest_len; 946 953 } 947 954 … … 963 970 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 964 971 { 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 972 size_t ret; 973 974 if (dest_len == (size_t)-1) 975 dest_len = sizeof(pstring); 976 977 if (flags & STR_TERMINATE) { 978 if (src_len == (size_t)-1) { 979 src_len = strlen((const char *)src) + 1; 980 } else { 981 size_t len = strnlen((const char *)src, src_len); 982 if (len < src_len) 983 len++; 984 src_len = len; 985 } 986 } 987 988 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True); 989 if (ret == (size_t)-1) { 990 ret = 0; 991 dest_len = 0; 992 } 993 994 if (dest_len && ret) { 995 /* Did we already process the terminating zero ? */ 996 if (dest[MIN(ret-1, dest_len-1)] != 0) { 997 dest[MIN(ret, dest_len-1)] = 0; 998 } 999 } else { 1000 dest[0] = 0; 1001 } 1002 1003 return src_len; 997 1004 } 998 1005 999 1006 size_t pull_ascii_pstring(char *dest, const void *src) 1000 1007 { 1001 1008 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE); 1002 1009 } 1003 1010 1004 1011 size_t pull_ascii_fstring(char *dest, const void *src) 1005 1012 { 1006 1013 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE); 1007 1014 } 1008 1015 … … 1011 1018 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src) 1012 1019 { 1013 1020 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE); 1014 1021 } 1015 1022 … … 1023 1030 * <dl> 1024 1031 * <dt>STR_TERMINATE <dd>means include the null termination. 1025 * <dt>STR_UPPER 1032 * <dt>STR_UPPER <dd>means uppercase in the destination. 1026 1033 * <dt>STR_NOALIGN <dd>means don't do alignment. 1027 1034 * </dl> … … 1033 1040 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags) 1034 1041 { 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1042 size_t len=0; 1043 size_t src_len; 1044 size_t ret; 1045 1046 /* treat a pstring as "unlimited" length */ 1047 if (dest_len == (size_t)-1) 1048 dest_len = sizeof(pstring); 1049 1050 if (flags & STR_TERMINATE) 1051 src_len = (size_t)-1; 1052 else 1053 src_len = strlen(src); 1054 1055 if (ucs2_align(base_ptr, dest, flags)) { 1056 *(char *)dest = 0; 1057 dest = (void *)((char *)dest + 1); 1058 if (dest_len) 1059 dest_len--; 1060 len++; 1061 } 1062 1063 /* ucs2 is always a multiple of 2 bytes */ 1064 dest_len &= ~1; 1065 1066 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True); 1067 if (ret == (size_t)-1) { 1068 return 0; 1069 } 1070 1071 len += ret; 1072 1073 if (flags & STR_UPPER) { 1074 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest; 1075 size_t i; 1076 1077 /* We check for i < (ret / 2) below as the dest string isn't null 1078 terminated if STR_TERMINATE isn't set. */ 1079 1080 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) { 1081 smb_ucs2_t v = toupper_w(dest_ucs2[i]); 1082 if (v != dest_ucs2[i]) { 1083 dest_ucs2[i] = v; 1084 } 1085 } 1086 } 1087 1088 return len; 1082 1089 } 1083 1090 … … 1090 1097 * 1091 1098 * @returns The number of bytes occupied by the string in the destination 1092 * 1099 * or -1 in case of error. 1093 1100 **/ 1094 1101 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) 1095 1102 { 1096 1097 1098 1099 1103 size_t src_len = strlen(src)+1; 1104 1105 *dest = NULL; 1106 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1100 1107 } 1101 1108 … … 1107 1114 * 1108 1115 * @returns The number of bytes occupied by the string in the destination 1109 * 1116 * or -1 in case of error. 1110 1117 **/ 1111 1118 1112 1119 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src) 1113 1120 { 1114 1115 1116 1117 1121 size_t src_len = strlen(src)+1; 1122 1123 *dest = NULL; 1124 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1118 1125 } 1119 1126 … … 1123 1130 Flags can have: 1124 1131 STR_TERMINATE means include the null termination 1125 STR_UPPER 1132 STR_UPPER means uppercase in the destination 1126 1133 dest_len is the maximum length allowed in the destination. If dest_len 1127 1134 is -1 then no maxiumum is used. … … 1130 1137 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags) 1131 1138 { 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1139 size_t src_len = strlen(src); 1140 pstring tmpbuf; 1141 1142 /* treat a pstring as "unlimited" length */ 1143 if (dest_len == (size_t)-1) 1144 dest_len = sizeof(pstring); 1145 1146 if (flags & STR_UPPER) { 1147 pstrcpy(tmpbuf, src); 1148 strupper_m(tmpbuf); 1149 src = tmpbuf; 1150 } 1151 1152 if (flags & STR_TERMINATE) 1153 src_len++; 1154 1155 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True); 1149 1156 } 1150 1157 1151 1158 size_t push_utf8_fstring(void *dest, const char *src) 1152 1159 { 1153 1160 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE); 1154 1161 } 1155 1162 … … 1164 1171 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1165 1172 { 1166 1167 1168 1169 1173 size_t src_len = strlen(src)+1; 1174 1175 *dest = NULL; 1176 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True); 1170 1177 } 1171 1178 … … 1180 1187 size_t push_utf8_allocate(char **dest, const char *src) 1181 1188 { 1182 1183 1184 1185 1189 size_t src_len = strlen(src)+1; 1190 1191 *dest = NULL; 1192 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True); 1186 1193 } 1187 1194 … … 1199 1206 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 1200 1207 { 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1208 size_t ret; 1209 1210 if (dest_len == (size_t)-1) 1211 dest_len = sizeof(pstring); 1212 1213 if (ucs2_align(base_ptr, src, flags)) { 1214 src = (const void *)((const char *)src + 1); 1215 if (src_len != (size_t)-1) 1216 src_len--; 1217 } 1218 1219 if (flags & STR_TERMINATE) { 1220 /* src_len -1 is the default for null terminated strings. */ 1221 if (src_len != (size_t)-1) { 1222 size_t len = strnlen_w((const smb_ucs2_t *)src, 1223 src_len/2); 1224 if (len < src_len/2) 1225 len++; 1226 src_len = len*2; 1227 } 1228 } 1229 1230 /* ucs2 is always a multiple of 2 bytes */ 1231 if (src_len != (size_t)-1) 1232 src_len &= ~1; 1233 1234 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True); 1235 if (ret == (size_t)-1) { 1236 return 0; 1237 } 1238 1239 if (src_len == (size_t)-1) 1240 src_len = ret*2; 1241 1242 if (dest_len && ret) { 1243 /* Did we already process the terminating zero ? */ 1244 if (dest[MIN(ret-1, dest_len-1)] != 0) { 1245 dest[MIN(ret, dest_len-1)] = 0; 1246 } 1247 } else { 1248 dest[0] = 0; 1249 } 1250 1251 return src_len; 1245 1252 } 1246 1253 1247 1254 size_t pull_ucs2_pstring(char *dest, const void *src) 1248 1255 { 1249 1256 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE); 1250 1257 } 1251 1258 1252 1259 size_t pull_ucs2_fstring(char *dest, const void *src) 1253 1260 { 1254 1261 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE); 1255 1262 } 1256 1263 … … 1265 1272 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src) 1266 1273 { 1267 1268 1269 1274 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1275 *dest = NULL; 1276 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1270 1277 } 1271 1278 … … 1280 1287 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src) 1281 1288 { 1282 1283 1284 1289 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1290 *dest = NULL; 1291 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1285 1292 } 1286 1293 … … 1295 1302 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1296 1303 { 1297 1298 1299 1304 size_t src_len = strlen(src)+1; 1305 *dest = NULL; 1306 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1300 1307 } 1301 1308 … … 1310 1317 size_t pull_utf8_allocate(char **dest, const char *src) 1311 1318 { 1312 1313 1314 1319 size_t src_len = strlen(src)+1; 1320 *dest = NULL; 1321 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1315 1322 } 1316 1323 … … 1325 1332 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1326 1333 { 1327 1328 1329 1334 size_t src_len = strlen(src)+1; 1335 *dest = NULL; 1336 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True); 1330 1337 } 1331 1338 … … 1337 1344 flags can have: 1338 1345 STR_TERMINATE means include the null termination. 1339 STR_UPPER 1340 STR_ASCII 1346 STR_UPPER means uppercase in the destination. 1347 STR_ASCII use ascii even with unicode packet. 1341 1348 STR_NOALIGN means don't do alignment. 1342 1349 dest_len is the maximum length allowed in the destination. If dest_len … … 1347 1354 { 1348 1355 #ifdef DEVELOPER 1349 1350 1351 1352 1353 1354 1356 /* We really need to zero fill here, not clobber 1357 * region, as we want to ensure that valgrind thinks 1358 * all of the outgoing buffer has been written to 1359 * so a send() or write() won't trap an error. 1360 * JRA. 1361 */ 1355 1362 #if 0 1356 1357 1363 if (dest_len != (size_t)-1) 1364 clobber_region(function, line, dest, dest_len); 1358 1365 #else 1359 1360 1366 if (dest_len != (size_t)-1) 1367 memset(dest, '\0', dest_len); 1361 1368 #endif 1362 1369 #endif 1363 1370 1364 1365 1366 1367 1368 1369 1371 if (!(flags & STR_ASCII) && \ 1372 ((flags & STR_UNICODE || \ 1373 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1374 return push_ucs2(base_ptr, dest, src, dest_len, flags); 1375 } 1376 return push_ascii(dest, src, dest_len, flags); 1370 1377 } 1371 1378 … … 1377 1384 STR_TERMINATE means the string in src is null terminated. 1378 1385 STR_UNICODE means to force as unicode. 1379 STR_ASCII 1386 STR_ASCII use ascii even with unicode packet. 1380 1387 STR_NOALIGN means don't do alignment. 1381 1388 if STR_TERMINATE is set then src_len is ignored is it is -1 … … 1388 1395 { 1389 1396 #ifdef DEVELOPER 1390 1391 1397 if (dest_len != (size_t)-1) 1398 clobber_region(function, line, dest, dest_len); 1392 1399 #endif 1393 1400 1394 1395 1396 1397 1398 1399 1401 if (!(flags & STR_ASCII) && \ 1402 ((flags & STR_UNICODE || \ 1403 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1404 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags); 1405 } 1406 return pull_ascii(dest, src, dest_len, src_len, flags); 1400 1407 } 1401 1408 1402 1409 size_t align_string(const void *base_ptr, const char *p, int flags) 1403 1410 { 1404 1405 1406 1407 1408 1409 1411 if (!(flags & STR_ASCII) && \ 1412 ((flags & STR_UNICODE || \ 1413 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1414 return ucs2_align(base_ptr, p, flags); 1415 } 1416 return 0; 1410 1417 } 1411 1418 … … 1422 1429 codepoint_t next_codepoint(const char *str, size_t *size) 1423 1430 { 1424 1425 1426 1431 /* It cannot occupy more than 4 bytes in UTF16 format */ 1432 uint8_t buf[4]; 1433 smb_iconv_t descriptor; 1427 1434 #ifdef __OS2__ 1428 1435 size_t ilen_max; 1429 1436 #endif 1430 1431 1432 1433 1434 1435 1437 size_t ilen_orig; 1438 size_t ilen; 1439 size_t olen_orig; 1440 size_t olen; 1441 const char *inbuf; 1442 char *outbuf; 1436 1443 1437 1444 #ifdef __OS2__ 1438 1445 *size = 1; 1439 1446 #endif 1440 1447 1441 1448 if ((str[0] & 0x80) == 0) { 1442 1449 #ifndef __OS2__ 1443 1450 *size = 1; 1444 1451 #endif 1445 1446 1447 1448 1449 1450 1451 1452 return (codepoint_t)str[0]; 1453 } 1454 1455 lazy_initialize_conv(); 1456 1457 descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; 1458 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 1452 1459 #ifndef __OS2__ 1453 1460 *size = 1; 1454 1461 #endif 1455 1456 1462 return INVALID_CODEPOINT; 1463 } 1457 1464 #ifdef __OS2__ 1458 1459 1460 1461 1462 1465 /* We assume that no multi-byte character can take 1466 more than 5 bytes. This is OK as we only 1467 support codepoints up to 1M */ 1468 1469 ilen_max = strnlen( str, 5 ); 1463 1470 #else 1464 1471 *size = 1; 1465 1472 #endif 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1473 ilen_orig = 1; 1474 olen_orig = 2; 1475 while( 1 ) 1476 { 1477 ilen = ilen_orig; 1478 olen = olen_orig; 1479 inbuf = str; 1480 outbuf = ( char * )buf; 1481 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 ) 1482 break; 1483 1484 switch( errno ) 1485 { 1486 case E2BIG : 1487 if( olen_orig == 2 ) 1488 olen_orig = 4; 1489 else 1490 return INVALID_CODEPOINT; 1491 break; 1492 1493 case EINVAL : 1487 1494 #ifndef __OS2__ 1488 1489 1490 1491 1495 /* We assume that no multi-byte character can take 1496 more than 5 bytes. This is OK as we only 1497 support codepoints up to 1M */ 1498 if( ilen_orig < 5 ) 1492 1499 #else 1493 1500 if( ilen_orig < ilen_max ) 1494 1501 #endif 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 } 1502 ilen_orig++; 1503 else 1504 return INVALID_CODEPOINT; 1505 break; 1506 1507 case EILSEQ : 1508 default : 1509 return INVALID_CODEPOINT; 1510 } 1511 } 1512 1513 olen = olen_orig - olen; 1514 1515 *size = ilen_orig - ilen; 1516 1517 if (olen == 2) { 1518 /* 2 byte, UTF16-LE encoded value. */ 1519 return (codepoint_t)SVAL(buf, 0); 1520 } 1521 if (olen == 4) { 1522 /* Decode a 4 byte UTF16-LE character manually. 1523 See RFC2871 for the encoding machanism. 1524 */ 1525 codepoint_t w1 = SVAL(buf,0) & ~0xD800; 1526 codepoint_t w2 = SVAL(buf,2) & ~0xDC00; 1527 1528 return (codepoint_t)0x10000 + 1529 (w1 << 10) + w2; 1530 } 1531 1532 /* no other length is valid */ 1533 return INVALID_CODEPOINT; 1534 }
Note:
See TracChangeset
for help on using the changeset viewer.