Changeset 63
- Timestamp:
- Aug 21, 2007, 11:51:59 AM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/samba/source/lib/charcnv.c
r39 r63 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 Character set conversion Extensions … … 6 6 Copyright (C) Simo Sorce 2001 7 7 Copyright (C) Martin Pool 2003 8 8 9 9 This program is free software; you can redistribute it and/or modify 10 10 it under the terms of the GNU General Public License as published by 11 11 the Free Software Foundation; either version 2 of the License, or 12 12 (at your option) any later version. 13 13 14 14 This program is distributed in the hope that it will be useful, 15 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 17 GNU General Public License for more details. 18 18 19 19 You should have received a copy of the GNU General Public License 20 20 along with this program; if not, write to the Free Software … … 28 28 char lp_failed_convert_char(void) 29 29 { 30 30 return '_'; 31 31 } 32 32 … … 35 35 * 36 36 * @brief Character-set conversion routines built on our iconv. 37 * 37 * 38 38 * @note Samba's internal character set (at least in the 3.0 series) 39 39 * is always the same as the one for the Unix filesystem. It is … … 55 55 static const char *charset_name(charset_t ch) 56 56 { 57 58 59 60 61 62 63 64 57 const char *ret = NULL; 58 59 if (ch == CH_UTF16LE) ret = "UTF-16LE"; 60 else if (ch == CH_UTF16BE) ret = "UTF-16BE"; 61 else if (ch == CH_UNIX) ret = lp_unix_charset(); 62 else if (ch == CH_DOS) ret = lp_dos_charset(); 63 else if (ch == CH_DISPLAY) ret = lp_display_charset(); 64 else if (ch == CH_UTF8) ret = "UTF8"; 65 65 66 66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET) 67 68 67 if (ret && !strcmp(ret, "LOCALE")) { 68 const char *ln = NULL; 69 69 70 70 #ifdef HAVE_SETLOCALE 71 71 setlocale(LC_ALL, ""); 72 72 #endif 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 73 ln = nl_langinfo(CODESET); 74 if (ln) { 75 /* Check whether the charset name is supported 76 by iconv */ 77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE"); 78 if (handle == (smb_iconv_t) -1) { 79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); 80 ln = NULL; 81 } else { 82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); 83 smb_iconv_close(handle); 84 } 85 } 86 ret = ln; 87 } 88 88 #endif 89 89 90 91 90 if (!ret || !*ret) ret = "ASCII"; 91 return ret; 92 92 } 93 93 94 94 void lazy_initialize_conv(void) 95 95 { 96 97 98 99 100 101 102 96 static int initialized = False; 97 98 if (!initialized) { 99 initialized = True; 100 load_case_tables(); 101 init_iconv(); 102 } 103 103 } 104 104 … … 108 108 void gfree_charcnv(void) 109 109 { 110 111 112 113 114 115 116 117 118 119 110 int c1, c2; 111 112 for (c1=0;c1<NUM_CHARSETS;c1++) { 113 for (c2=0;c2<NUM_CHARSETS;c2++) { 114 if ( conv_handles[c1][c2] ) { 115 smb_iconv_close( conv_handles[c1][c2] ); 116 conv_handles[c1][c2] = 0; 117 } 118 } 119 } 120 120 } 121 121 … … 129 129 void init_iconv(void) 130 130 { 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 131 int c1, c2; 132 BOOL did_reload = False; 133 134 /* so that charset_name() works we need to get the UNIX<->UCS2 going 135 first */ 136 if (!conv_handles[CH_UNIX][CH_UTF16LE]) 137 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); 138 139 if (!conv_handles[CH_UTF16LE][CH_UNIX]) 140 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); 141 142 for (c1=0;c1<NUM_CHARSETS;c1++) { 143 for (c2=0;c2<NUM_CHARSETS;c2++) { 144 const char *n1 = charset_name((charset_t)c1); 145 const char *n2 = charset_name((charset_t)c2); 146 if (conv_handles[c1][c2] && 147 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && 148 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) 149 continue; 150 151 did_reload = True; 152 153 if (conv_handles[c1][c2]) 154 smb_iconv_close(conv_handles[c1][c2]); 155 156 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 157 if (conv_handles[c1][c2] == (smb_iconv_t)-1) { 158 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", 159 charset_name((charset_t)c1), charset_name((charset_t)c2))); 160 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { 161 n1 = "ASCII"; 162 } 163 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { 164 n2 = "ASCII"; 165 } 166 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", 167 n1, n2 )); 168 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 169 if (!conv_handles[c1][c2]) { 170 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); 171 smb_panic("init_iconv: conv_handle initialization failed."); 172 } 173 } 174 } 175 } 176 177 if (did_reload) { 178 /* XXX: Does this really get called every time the dos 179 * codepage changes? */ 180 /* XXX: Is the did_reload test too strict? */ 181 conv_silent = True; 182 init_doschar_table(); 183 init_valid_table(); 184 conv_silent = False; 185 } 186 186 } 187 187 … … 202 202 203 203 static size_t convert_string_internal(charset_t from, charset_t to, 204 void const *src, size_t srclen, 205 206 { 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 204 void const *src, size_t srclen, 205 void *dest, size_t destlen, BOOL allow_bad_conv) 206 { 207 size_t i_len, o_len; 208 size_t retval; 209 const char* inbuf = (const char*)src; 210 char* outbuf = (char*)dest; 211 smb_iconv_t descriptor; 212 213 lazy_initialize_conv(); 214 215 descriptor = conv_handles[from][to]; 216 217 if (srclen == (size_t)-1) { 218 if (from == CH_UTF16LE || from == CH_UTF16BE) { 219 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; 220 } else { 221 srclen = strlen((const char *)src)+1; 222 } 223 } 224 225 226 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 227 if (!conv_silent) 228 DEBUG(0,("convert_string_internal: Conversion not supported.\n")); 229 return (size_t)-1; 230 } 231 232 i_len=srclen; 233 o_len=destlen; 234 234 235 235 again: 236 236 237 238 239 240 241 242 243 244 245 246 247 248 249 reason="No more room"; 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 237 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); 238 if(retval==(size_t)-1) { 239 const char *reason="unknown error"; 240 switch(errno) { 241 case EINVAL: 242 reason="Incomplete multibyte sequence"; 243 if (!conv_silent) 244 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 245 if (allow_bad_conv) 246 goto use_as_is; 247 break; 248 case E2BIG: 249 reason="No more room"; 250 if (!conv_silent) { 251 if (from == CH_UNIX) { 252 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", 253 charset_name(from), charset_name(to), 254 (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); 255 } else { 256 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", 257 charset_name(from), charset_name(to), 258 (unsigned int)srclen, (unsigned int)destlen)); 259 } 260 } 261 break; 262 case EILSEQ: 263 reason="Illegal multibyte sequence"; 264 if (!conv_silent) 265 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 266 if (allow_bad_conv) 267 goto use_as_is; 268 break; 269 default: 270 if (!conv_silent) 271 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 272 break; 273 } 274 /* smb_panic(reason); */ 275 } 276 return destlen-o_len; 277 277 278 278 use_as_is: 279 279 280 /* 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 280 /* 281 * Conversion not supported. This is actually an error, but there are so 282 * many misconfigured iconv systems and smb.conf's out there we can't just 283 * fail. Do a very bad conversion instead.... JRA. 284 */ 285 286 { 287 if (o_len == 0 || i_len == 0) 288 return destlen - o_len; 289 290 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 291 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 292 /* Can't convert from utf16 any endian to multibyte. 293 Replace with the default fail char. 294 */ 295 if (i_len < 2) 296 return destlen - o_len; 297 if (i_len >= 2) { 298 *outbuf = lp_failed_convert_char(); 299 300 outbuf++; 301 o_len--; 302 303 inbuf += 2; 304 i_len -= 2; 305 } 306 307 if (o_len == 0 || i_len == 0) 308 return destlen - o_len; 309 310 /* Keep trying with the next char... */ 311 goto again; 312 313 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 314 /* Can't convert to UTF16LE - just widen by adding the 315 default fail char then zero. 316 */ 317 if (o_len < 2) 318 return destlen - o_len; 319 320 outbuf[0] = lp_failed_convert_char(); 321 outbuf[1] = '\0'; 322 323 inbuf++; 324 i_len--; 325 326 outbuf += 2; 327 o_len -= 2; 328 329 if (o_len == 0 || i_len == 0) 330 return destlen - o_len; 331 332 /* Keep trying with the next char... */ 333 goto again; 334 335 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 336 to != CH_UTF16LE && to != CH_UTF16BE) { 337 /* Failed multibyte to multibyte. Just copy the default fail char and 338 try again. */ 339 outbuf[0] = lp_failed_convert_char(); 340 341 inbuf++; 342 i_len--; 343 344 outbuf++; 345 o_len--; 346 347 if (o_len == 0 || i_len == 0) 348 return destlen - o_len; 349 350 /* Keep trying with the next char... */ 351 goto again; 352 353 } else { 354 /* Keep compiler happy.... */ 355 return destlen - o_len; 356 } 357 } 358 358 } 359 359 … … 376 376 377 377 size_t convert_string(charset_t from, charset_t to, 378 void const *src, size_t srclen, 379 380 { 381 382 383 384 385 378 void const *src, size_t srclen, 379 void *dest, size_t destlen, BOOL allow_bad_conv) 380 { 381 /* 382 * NB. We deliberately don't do a strlen here if srclen == -1. 383 * This is very expensive over millions of calls and is taken 384 * care of in the slow path in convert_string_internal. JRA. 385 */ 386 386 387 387 #ifdef DEVELOPER 388 388 SMB_ASSERT(destlen != (size_t)-1); 389 389 #endif 390 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 391 if (srclen == 0) 392 return 0; 393 394 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) { 395 const unsigned char *p = (const unsigned char *)src; 396 unsigned char *q = (unsigned char *)dest; 397 size_t slen = srclen; 398 size_t dlen = destlen; 399 unsigned char lastp = '\0'; 400 size_t retval = 0; 401 402 /* If all characters are ascii, fast path here. */ 403 while (slen && dlen) { 404 if ((lastp = *p) <= 0x7f) { 405 *q++ = *p++; 406 if (slen != (size_t)-1) { 407 slen--; 408 } 409 dlen--; 410 retval++; 411 if (!lastp) 412 break; 413 } else { 414 414 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 415 415 goto general_case; 416 416 #else 417 417 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 418 418 #endif 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 419 } 420 } 421 if (!dlen) { 422 /* Even if we fast path we should note if we ran out of room. */ 423 if (((slen != (size_t)-1) && slen) || 424 ((slen == (size_t)-1) && lastp)) { 425 errno = E2BIG; 426 } 427 } 428 return retval; 429 } else if (from == CH_UTF16LE && to != CH_UTF16LE) { 430 const unsigned char *p = (const unsigned char *)src; 431 unsigned char *q = (unsigned char *)dest; 432 size_t retval = 0; 433 size_t slen = srclen; 434 size_t dlen = destlen; 435 unsigned char lastp = '\0'; 436 437 /* If all characters are ascii, fast path here. */ 438 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) { 439 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { 440 *q++ = *p; 441 if (slen != (size_t)-1) { 442 slen -= 2; 443 } 444 p += 2; 445 dlen--; 446 retval++; 447 if (!lastp) 448 break; 449 } else { 450 450 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 451 451 goto general_case; 452 452 #else 453 453 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 454 454 #endif 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 455 } 456 } 457 if (!dlen) { 458 /* Even if we fast path we should note if we ran out of room. */ 459 if (((slen != (size_t)-1) && slen) || 460 ((slen == (size_t)-1) && lastp)) { 461 errno = E2BIG; 462 } 463 } 464 return retval; 465 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 466 const unsigned char *p = (const unsigned char *)src; 467 unsigned char *q = (unsigned char *)dest; 468 size_t retval = 0; 469 size_t slen = srclen; 470 size_t dlen = destlen; 471 unsigned char lastp = '\0'; 472 473 /* If all characters are ascii, fast path here. */ 474 while (slen && (dlen >= 2)) { 475 if ((lastp = *p) <= 0x7F) { 476 *q++ = *p++; 477 *q++ = '\0'; 478 if (slen != (size_t)-1) { 479 slen--; 480 } 481 dlen -= 2; 482 retval += 2; 483 if (!lastp) 484 break; 485 } else { 486 486 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 487 487 goto general_case; 488 488 #else 489 489 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 490 490 #endif 491 492 493 494 495 496 497 498 499 500 501 491 } 492 } 493 if (!dlen) { 494 /* Even if we fast path we should note if we ran out of room. */ 495 if (((slen != (size_t)-1) && slen) || 496 ((slen == (size_t)-1) && lastp)) { 497 errno = E2BIG; 498 } 499 } 500 return retval; 501 } 502 502 503 503 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 504 504 general_case: 505 505 #endif 506 506 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv); 507 507 } 508 508 … … 518 518 * 519 519 * Ensure the srclen contains the terminating zero. 520 * 520 * 521 521 * I hate the goto's in this function. It's embarressing..... 522 522 * There has to be a cleaner way to do this. JRA. … … 524 524 525 525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to, 526 527 { 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 526 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv) 527 { 528 size_t i_len, o_len, destlen = MAX(srclen, 512); 529 size_t retval; 530 const char *inbuf = (const char *)src; 531 char *outbuf = NULL, *ob = NULL; 532 smb_iconv_t descriptor; 533 void **dest = (void **)dst; 534 535 *dest = NULL; 536 537 if (src == NULL || srclen == (size_t)-1) 538 return (size_t)-1; 539 if (srclen == 0) 540 return 0; 541 542 lazy_initialize_conv(); 543 544 descriptor = conv_handles[from][to]; 545 546 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 547 if (!conv_silent) 548 DEBUG(0,("convert_string_allocate: Conversion not supported.\n")); 549 return (size_t)-1; 550 } 551 551 552 552 convert: 553 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 554 if ((destlen*2) < destlen) { 555 /* wrapped ! abort. */ 556 if (!conv_silent) 557 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n")); 558 if (!ctx) 559 SAFE_FREE(outbuf); 560 return (size_t)-1; 561 } else { 562 destlen = destlen * 2; 563 } 564 565 if (ctx) { 566 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen); 567 } else { 568 ob = (char *)SMB_REALLOC(ob, destlen); 569 } 570 571 if (!ob) { 572 DEBUG(0, ("convert_string_allocate: realloc failed!\n")); 573 return (size_t)-1; 574 } 575 outbuf = ob; 576 i_len = srclen; 577 o_len = destlen; 578 578 579 579 again: 580 580 581 582 583 584 if(retval == (size_t)-1){585 586 587 588 589 590 591 592 593 594 595 goto convert; 596 597 598 599 600 601 602 603 604 605 606 607 608 581 retval = smb_iconv(descriptor, 582 &inbuf, &i_len, 583 &outbuf, &o_len); 584 if(retval == (size_t)-1) { 585 const char *reason="unknown error"; 586 switch(errno) { 587 case EINVAL: 588 reason="Incomplete multibyte sequence"; 589 if (!conv_silent) 590 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 591 if (allow_bad_conv) 592 goto use_as_is; 593 break; 594 case E2BIG: 595 goto convert; 596 case EILSEQ: 597 reason="Illegal multibyte sequence"; 598 if (!conv_silent) 599 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 600 if (allow_bad_conv) 601 goto use_as_is; 602 break; 603 } 604 if (!conv_silent) 605 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf)); 606 /* smb_panic(reason); */ 607 return (size_t)-1; 608 } 609 609 610 610 out: 611 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 612 destlen = destlen - o_len; 613 if (ctx) { 614 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen); 615 } else { 616 ob = (char *)SMB_REALLOC(ob,destlen); 617 } 618 619 if (destlen && !ob) { 620 DEBUG(0, ("convert_string_allocate: out of memory!\n")); 621 return (size_t)-1; 622 } 623 624 *dest = ob; 625 return destlen; 626 626 627 627 use_as_is: 628 628 629 /* 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 629 /* 630 * Conversion not supported. This is actually an error, but there are so 631 * many misconfigured iconv systems and smb.conf's out there we can't just 632 * fail. Do a very bad conversion instead.... JRA. 633 */ 634 635 { 636 if (o_len == 0 || i_len == 0) 637 goto out; 638 639 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 640 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 641 /* Can't convert from utf16 any endian to multibyte. 642 Replace with the default fail char. 643 */ 644 645 if (i_len < 2) 646 goto out; 647 648 if (i_len >= 2) { 649 *outbuf = lp_failed_convert_char(); 650 651 outbuf++; 652 o_len--; 653 654 inbuf += 2; 655 i_len -= 2; 656 } 657 658 if (o_len == 0 || i_len == 0) 659 goto out; 660 661 /* Keep trying with the next char... */ 662 goto again; 663 664 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 665 /* Can't convert to UTF16LE - just widen by adding the 666 default fail char then zero. 667 */ 668 if (o_len < 2) 669 goto out; 670 671 outbuf[0] = lp_failed_convert_char(); 672 outbuf[1] = '\0'; 673 674 inbuf++; 675 i_len--; 676 677 outbuf += 2; 678 o_len -= 2; 679 680 if (o_len == 0 || i_len == 0) 681 goto out; 682 683 /* Keep trying with the next char... */ 684 goto again; 685 686 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 687 to != CH_UTF16LE && to != CH_UTF16BE) { 688 /* Failed multibyte to multibyte. Just copy the default fail char and 689 try again. */ 690 outbuf[0] = lp_failed_convert_char(); 691 692 inbuf++; 693 i_len--; 694 695 outbuf++; 696 o_len--; 697 698 if (o_len == 0 || i_len == 0) 699 goto out; 700 701 /* Keep trying with the next char... */ 702 goto again; 703 704 } else { 705 /* Keep compiler happy.... */ 706 goto out; 707 } 708 } 709 709 } 710 710 … … 713 713 * 714 714 * @param srclen length of source buffer. 715 * @param dest always set at least to NULL 715 * @param dest always set at least to NULL 716 716 * @note -1 is not accepted for srclen. 717 717 * … … 719 719 **/ 720 720 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, 721 722 723 { 724 725 726 727 728 729 730 731 732 733 721 void const *src, size_t srclen, void *dst, 722 BOOL allow_bad_conv) 723 { 724 void **dest = (void **)dst; 725 size_t dest_len; 726 727 *dest = NULL; 728 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv); 729 if (dest_len == (size_t)-1) 730 return (size_t)-1; 731 if (*dest == NULL) 732 return (size_t)-1; 733 return dest_len; 734 734 } 735 735 736 736 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) 737 737 { 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 738 size_t size; 739 smb_ucs2_t *buffer; 740 741 size = push_ucs2_allocate(&buffer, src); 742 if (size == (size_t)-1) { 743 smb_panic("failed to create UCS2 buffer"); 744 } 745 if (!strupper_w(buffer) && (dest == src)) { 746 free(buffer); 747 return srclen; 748 } 749 750 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 751 free(buffer); 752 return size; 753 753 } 754 754 … … 760 760 char *strdup_upper(const char *s) 761 761 { 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 762 pstring out_buffer; 763 const unsigned char *p = (const unsigned char *)s; 764 unsigned char *q = (unsigned char *)out_buffer; 765 766 /* this is quite a common operation, so we want it to be 767 fast. We optimise for the ascii case, knowing that all our 768 supported multi-byte character sets are ascii-compatible 769 (ie. they match for the first 128 chars) */ 770 771 while (1) { 772 if (*p & 0x80) 773 break; 774 *q++ = toupper_ascii(*p); 775 if (!*p) 776 break; 777 p++; 778 if (p - ( const unsigned char *)s >= sizeof(pstring)) 779 break; 780 } 781 782 if (*p) { 783 /* MB case. */ 784 size_t size; 785 wpstring buffer; 786 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True); 787 if (size == (size_t)-1) { 788 return NULL; 789 } 790 791 strupper_w(buffer); 792 793 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True); 794 if (size == (size_t)-1) { 795 return NULL; 796 } 797 } 798 799 return SMB_STRDUP(out_buffer); 800 800 } 801 801 802 802 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) 803 803 { 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 804 size_t size; 805 smb_ucs2_t *buffer = NULL; 806 807 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen, 808 (void **)(void *)&buffer, True); 809 if (size == (size_t)-1 || !buffer) { 810 smb_panic("failed to create UCS2 buffer"); 811 } 812 if (!strlower_w(buffer) && (dest == src)) { 813 SAFE_FREE(buffer); 814 return srclen; 815 } 816 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 817 SAFE_FREE(buffer); 818 return size; 819 819 } 820 820 … … 825 825 char *strdup_lower(const char *s) 826 826 { 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 827 size_t size; 828 smb_ucs2_t *buffer = NULL; 829 char *out_buffer; 830 831 size = push_ucs2_allocate(&buffer, s); 832 if (size == -1 || !buffer) { 833 return NULL; 834 } 835 836 strlower_w(buffer); 837 838 size = pull_ucs2_allocate(&out_buffer, buffer); 839 SAFE_FREE(buffer); 840 841 if (size == (size_t)-1) { 842 return NULL; 843 } 844 845 return out_buffer; 846 846 } 847 847 848 848 static size_t ucs2_align(const void *base_ptr, const void *p, int flags) 849 849 { 850 851 852 850 if (flags & (STR_NOALIGN|STR_ASCII)) 851 return 0; 852 return PTR_DIFF(p, base_ptr) & 1; 853 853 } 854 854 … … 870 870 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags) 871 871 { 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 872 size_t src_len = strlen(src); 873 pstring tmpbuf; 874 875 /* treat a pstring as "unlimited" length */ 876 if (dest_len == (size_t)-1) 877 dest_len = sizeof(pstring); 878 879 if (flags & STR_UPPER) { 880 pstrcpy(tmpbuf, src); 881 strupper_m(tmpbuf); 882 src = tmpbuf; 883 } 884 885 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) 886 src_len++; 887 888 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True); 889 889 } 890 890 891 891 size_t push_ascii_fstring(void *dest, const char *src) 892 892 { 893 893 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE); 894 894 } 895 895 896 896 size_t push_ascii_pstring(void *dest, const char *src) 897 897 { 898 898 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE); 899 899 } 900 900 … … 906 906 size_t push_ascii_nstring(void *dest, const char *src) 907 907 { 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 908 size_t i, buffer_len, dest_len; 909 smb_ucs2_t *buffer; 910 911 conv_silent = True; 912 buffer_len = push_ucs2_allocate(&buffer, src); 913 if (buffer_len == (size_t)-1) { 914 smb_panic("failed to create UCS2 buffer"); 915 } 916 917 /* We're using buffer_len below to count ucs2 characters, not bytes. */ 918 buffer_len /= sizeof(smb_ucs2_t); 919 920 dest_len = 0; 921 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) { 922 unsigned char mb[10]; 923 /* Convert one smb_ucs2_t character at a time. */ 924 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False); 925 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) { 926 memcpy((char *)dest + dest_len, mb, mb_len); 927 dest_len += mb_len; 928 } else { 929 errno = E2BIG; 930 break; 931 } 932 } 933 ((char *)dest)[dest_len] = '\0'; 934 935 SAFE_FREE(buffer); 936 conv_silent = False; 937 return dest_len; 938 938 } 939 939 … … 955 955 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 956 956 { 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 957 size_t ret; 958 959 if (dest_len == (size_t)-1) 960 dest_len = sizeof(pstring); 961 962 if (flags & STR_TERMINATE) { 963 if (src_len == (size_t)-1) { 964 src_len = strlen((const char *)src) + 1; 965 } else { 966 size_t len = strnlen((const char *)src, src_len); 967 if (len < src_len) 968 len++; 969 src_len = len; 970 } 971 } 972 973 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True); 974 if (ret == (size_t)-1) { 975 ret = 0; 976 dest_len = 0; 977 } 978 979 if (dest_len && ret) { 980 /* Did we already process the terminating zero ? */ 981 if (dest[MIN(ret-1, dest_len-1)] != 0) { 982 dest[MIN(ret, dest_len-1)] = 0; 983 } 984 } else { 985 dest[0] = 0; 986 } 987 988 return src_len; 989 989 } 990 990 991 991 size_t pull_ascii_pstring(char *dest, const void *src) 992 992 { 993 993 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE); 994 994 } 995 995 996 996 size_t pull_ascii_fstring(char *dest, const void *src) 997 997 { 998 998 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE); 999 999 } 1000 1000 … … 1003 1003 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src) 1004 1004 { 1005 1005 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE); 1006 1006 } 1007 1007 … … 1025 1025 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags) 1026 1026 { 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1027 size_t len=0; 1028 size_t src_len; 1029 size_t ret; 1030 1031 /* treat a pstring as "unlimited" length */ 1032 if (dest_len == (size_t)-1) 1033 dest_len = sizeof(pstring); 1034 1035 if (flags & STR_TERMINATE) 1036 src_len = (size_t)-1; 1037 else 1038 src_len = strlen(src); 1039 1040 if (ucs2_align(base_ptr, dest, flags)) { 1041 *(char *)dest = 0; 1042 dest = (void *)((char *)dest + 1); 1043 if (dest_len) 1044 dest_len--; 1045 len++; 1046 } 1047 1048 /* ucs2 is always a multiple of 2 bytes */ 1049 dest_len &= ~1; 1050 1051 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True); 1052 if (ret == (size_t)-1) { 1053 return 0; 1054 } 1055 1056 len += ret; 1057 1058 if (flags & STR_UPPER) { 1059 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest; 1060 size_t i; 1061 1062 /* We check for i < (ret / 2) below as the dest string isn't null 1063 terminated if STR_TERMINATE isn't set. */ 1064 1065 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) { 1066 smb_ucs2_t v = toupper_w(dest_ucs2[i]); 1067 if (v != dest_ucs2[i]) { 1068 dest_ucs2[i] = v; 1069 } 1070 } 1071 } 1072 1073 return len; 1074 1074 } 1075 1075 … … 1079 1079 * allocating a buffer using talloc(). 1080 1080 * 1081 * @param dest always set at least to NULL 1081 * @param dest always set at least to NULL 1082 1082 * 1083 1083 * @returns The number of bytes occupied by the string in the destination … … 1086 1086 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) 1087 1087 { 1088 1089 1090 1091 1088 size_t src_len = strlen(src)+1; 1089 1090 *dest = NULL; 1091 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1092 1092 } 1093 1093 … … 1096 1096 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer 1097 1097 * 1098 * @param dest always set at least to NULL 1098 * @param dest always set at least to NULL 1099 1099 * 1100 1100 * @returns The number of bytes occupied by the string in the destination … … 1104 1104 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src) 1105 1105 { 1106 1107 1108 1109 1106 size_t src_len = strlen(src)+1; 1107 1108 *dest = NULL; 1109 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1110 1110 } 1111 1111 … … 1122 1122 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags) 1123 1123 { 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1124 size_t src_len = strlen(src); 1125 pstring tmpbuf; 1126 1127 /* treat a pstring as "unlimited" length */ 1128 if (dest_len == (size_t)-1) 1129 dest_len = sizeof(pstring); 1130 1131 if (flags & STR_UPPER) { 1132 pstrcpy(tmpbuf, src); 1133 strupper_m(tmpbuf); 1134 src = tmpbuf; 1135 } 1136 1137 if (flags & STR_TERMINATE) 1138 src_len++; 1139 1140 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True); 1141 1141 } 1142 1142 1143 1143 size_t push_utf8_fstring(void *dest, const char *src) 1144 1144 { 1145 1145 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE); 1146 1146 } 1147 1147 … … 1149 1149 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc 1150 1150 * 1151 * @param dest always set at least to NULL 1151 * @param dest always set at least to NULL 1152 1152 * 1153 1153 * @returns The number of bytes occupied by the string in the destination … … 1156 1156 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1157 1157 { 1158 1159 1160 1161 1158 size_t src_len = strlen(src)+1; 1159 1160 *dest = NULL; 1161 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True); 1162 1162 } 1163 1163 … … 1165 1165 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer 1166 1166 * 1167 * @param dest always set at least to NULL 1167 * @param dest always set at least to NULL 1168 1168 * 1169 1169 * @returns The number of bytes occupied by the string in the destination … … 1172 1172 size_t push_utf8_allocate(char **dest, const char *src) 1173 1173 { 1174 1175 1176 1177 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True); 1174 size_t src_len = strlen(src)+1; 1175 1176 *dest = NULL; 1177 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True); 1178 1178 } 1179 1179 … … 1191 1191 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 1192 1192 { 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1193 size_t ret; 1194 1195 if (dest_len == (size_t)-1) 1196 dest_len = sizeof(pstring); 1197 1198 if (ucs2_align(base_ptr, src, flags)) { 1199 src = (const void *)((const char *)src + 1); 1200 if (src_len != (size_t)-1) 1201 src_len--; 1202 } 1203 1204 if (flags & STR_TERMINATE) { 1205 /* src_len -1 is the default for null terminated strings. */ 1206 if (src_len != (size_t)-1) { 1207 size_t len = strnlen_w((const smb_ucs2_t *)src, 1208 src_len/2); 1209 if (len < src_len/2) 1210 len++; 1211 src_len = len*2; 1212 } 1213 } 1214 1215 /* ucs2 is always a multiple of 2 bytes */ 1216 if (src_len != (size_t)-1) 1217 src_len &= ~1; 1218 1219 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True); 1220 if (ret == (size_t)-1) { 1221 return 0; 1222 } 1223 1224 if (src_len == (size_t)-1) 1225 src_len = ret*2; 1226 1227 if (dest_len && ret) { 1228 /* Did we already process the terminating zero ? */ 1229 if (dest[MIN(ret-1, dest_len-1)] != 0) { 1230 dest[MIN(ret, dest_len-1)] = 0; 1231 } 1232 } else { 1233 dest[0] = 0; 1234 } 1235 1236 return src_len; 1237 1237 } 1238 1238 1239 1239 size_t pull_ucs2_pstring(char *dest, const void *src) 1240 1240 { 1241 1241 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE); 1242 1242 } 1243 1243 1244 1244 size_t pull_ucs2_fstring(char *dest, const void *src) 1245 1245 { 1246 1246 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE); 1247 1247 } 1248 1248 … … 1250 1250 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc 1251 1251 * 1252 * @param dest always set at least to NULL 1252 * @param dest always set at least to NULL 1253 1253 * 1254 1254 * @returns The number of bytes occupied by the string in the destination … … 1257 1257 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src) 1258 1258 { 1259 1260 1261 1259 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1260 *dest = NULL; 1261 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1262 1262 } 1263 1263 … … 1265 1265 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer 1266 1266 * 1267 * @param dest always set at least to NULL 1267 * @param dest always set at least to NULL 1268 1268 * 1269 1269 * @returns The number of bytes occupied by the string in the destination … … 1272 1272 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src) 1273 1273 { 1274 1275 1276 1274 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1275 *dest = NULL; 1276 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1277 1277 } 1278 1278 … … 1280 1280 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc 1281 1281 * 1282 * @param dest always set at least to NULL 1282 * @param dest always set at least to NULL 1283 1283 * 1284 1284 * @returns The number of bytes occupied by the string in the destination … … 1287 1287 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1288 1288 { 1289 1290 1291 1289 size_t src_len = strlen(src)+1; 1290 *dest = NULL; 1291 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1292 1292 } 1293 1293 … … 1295 1295 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer 1296 1296 * 1297 * @param dest always set at least to NULL 1297 * @param dest always set at least to NULL 1298 1298 * 1299 1299 * @returns The number of bytes occupied by the string in the destination … … 1302 1302 size_t pull_utf8_allocate(char **dest, const char *src) 1303 1303 { 1304 1305 1306 1307 } 1308 1304 size_t src_len = strlen(src)+1; 1305 *dest = NULL; 1306 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1307 } 1308 1309 1309 /** 1310 1310 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc 1311 1311 * 1312 * @param dest always set at least to NULL 1312 * @param dest always set at least to NULL 1313 1313 * 1314 1314 * @returns The number of bytes occupied by the string in the destination … … 1317 1317 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1318 1318 { 1319 1320 1321 1319 size_t src_len = strlen(src)+1; 1320 *dest = NULL; 1321 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True); 1322 1322 } 1323 1323 1324 1324 /** 1325 1325 Copy a string from a char* src to a unicode or ascii 1326 dos codepage destination choosing unicode or ascii based on the 1326 dos codepage destination choosing unicode or ascii based on the 1327 1327 flags in the SMB buffer starting at base_ptr. 1328 1328 Return the number of bytes occupied by the string in the destination. … … 1339 1339 { 1340 1340 #ifdef DEVELOPER 1341 1342 1343 1344 1345 1346 1341 /* We really need to zero fill here, not clobber 1342 * region, as we want to ensure that valgrind thinks 1343 * all of the outgoing buffer has been written to 1344 * so a send() or write() won't trap an error. 1345 * JRA. 1346 */ 1347 1347 #if 0 1348 1349 1348 if (dest_len != (size_t)-1) 1349 clobber_region(function, line, dest, dest_len); 1350 1350 #else 1351 1352 1351 if (dest_len != (size_t)-1) 1352 memset(dest, '\0', dest_len); 1353 1353 #endif 1354 1354 #endif 1355 1355 1356 1357 1358 1359 1360 1361 1356 if (!(flags & STR_ASCII) && \ 1357 ((flags & STR_UNICODE || \ 1358 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1359 return push_ucs2(base_ptr, dest, src, dest_len, flags); 1360 } 1361 return push_ascii(dest, src, dest_len, flags); 1362 1362 } 1363 1363 … … 1380 1380 { 1381 1381 #ifdef DEVELOPER 1382 1383 1382 if (dest_len != (size_t)-1) 1383 clobber_region(function, line, dest, dest_len); 1384 1384 #endif 1385 1385 1386 1387 1388 1389 1390 1391 1386 if (!(flags & STR_ASCII) && \ 1387 ((flags & STR_UNICODE || \ 1388 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1389 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags); 1390 } 1391 return pull_ascii(dest, src, dest_len, src_len, flags); 1392 1392 } 1393 1393 1394 1394 size_t align_string(const void *base_ptr, const char *p, int flags) 1395 1395 { 1396 1397 1398 1399 1400 1401 1396 if (!(flags & STR_ASCII) && \ 1397 ((flags & STR_UNICODE || \ 1398 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1399 return ucs2_align(base_ptr, p, flags); 1400 } 1401 return 0; 1402 1402 } 1403 1403 … … 1414 1414 codepoint_t next_codepoint(const char *str, size_t *size) 1415 1415 { 1416 /* It cannot occupy more than 4 bytes in UTF16 format */ 1417 uint8_t buf[4]; 1418 smb_iconv_t descriptor; 1419 size_t ilen_orig; 1420 size_t ilen; 1421 size_t olen; 1422 char *outbuf; 1423 1424 if ((str[0] & 0x80) == 0) { 1425 *size = 1; 1426 return (codepoint_t)str[0]; 1427 } 1428 1429 /* We assume that no multi-byte character can take 1430 more than 5 bytes. This is OK as we only 1431 support codepoints up to 1M */ 1432 1433 ilen_orig = strnlen(str, 5); 1434 ilen = ilen_orig; 1435 1436 lazy_initialize_conv(); 1437 1438 descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; 1439 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 1440 *size = 1; 1441 return INVALID_CODEPOINT; 1442 } 1443 1444 /* This looks a little strange, but it is needed to cope 1445 with codepoints above 64k which are encoded as per RFC2781. */ 1446 olen = 2; 1447 outbuf = (char *)buf; 1448 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); 1449 if (olen == 2) { 1450 /* We failed to convert to a 2 byte character. 1451 See if we can convert to a 4 UTF16-LE byte char encoding. 1452 */ 1453 olen = 4; 1454 outbuf = (char *)buf; 1455 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); 1456 if (olen == 4) { 1457 /* We didn't convert any bytes */ 1458 *size = 1; 1459 return INVALID_CODEPOINT; 1460 } 1461 olen = 4 - olen; 1462 } else { 1463 olen = 2 - olen; 1464 } 1465 1466 *size = ilen_orig - ilen; 1467 1468 if (olen == 2) { 1469 /* 2 byte, UTF16-LE encoded value. */ 1470 return (codepoint_t)SVAL(buf, 0); 1471 } 1472 if (olen == 4) { 1473 /* Decode a 4 byte UTF16-LE character manually. 1474 See RFC2871 for the encoding machanism. 1475 */ 1476 codepoint_t w1 = SVAL(buf,0) & ~0xD800; 1477 codepoint_t w2 = SVAL(buf,2) & ~0xDC00; 1478 1479 return (codepoint_t)0x10000 + 1480 (w1 << 10) + w2; 1481 } 1482 1483 /* no other length is valid */ 1484 return INVALID_CODEPOINT; 1485 } 1416 /* It cannot occupy more than 4 bytes in UTF16 format */ 1417 uint8_t buf[4]; 1418 smb_iconv_t descriptor; 1419 size_t ilen_orig; 1420 size_t ilen; 1421 size_t olen_orig; 1422 size_t olen; 1423 const char *inbuf; 1424 char *outbuf; 1425 1426 if ((str[0] & 0x80) == 0) { 1427 *size = 1; 1428 return (codepoint_t)str[0]; 1429 } 1430 1431 lazy_initialize_conv(); 1432 1433 descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; 1434 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 1435 *size = 1; 1436 return INVALID_CODEPOINT; 1437 } 1438 1439 *size = 1; 1440 ilen_orig = 1; 1441 olen_orig = 2; 1442 while( 1 ) 1443 { 1444 ilen = ilen_orig; 1445 olen = olen_orig; 1446 inbuf = str; 1447 outbuf = ( char * )buf; 1448 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 ) 1449 break; 1450 1451 switch( errno ) 1452 { 1453 case E2BIG : 1454 if( olen_orig == 2 ) 1455 olen_orig = 4; 1456 else 1457 return INVALID_CODEPOINT; 1458 break; 1459 1460 case EINVAL : 1461 /* We assume that no multi-byte character can take 1462 more than 5 bytes. This is OK as we only 1463 support codepoints up to 1M */ 1464 if( ilen_orig < 5 ) 1465 ilen_orig++; 1466 else 1467 return INVALID_CODEPOINT; 1468 break; 1469 1470 case EILSEQ : 1471 default : 1472 return INVALID_CODEPOINT; 1473 } 1474 } 1475 1476 olen = olen_orig - olen; 1477 1478 *size = ilen_orig - ilen; 1479 1480 if (olen == 2) { 1481 /* 2 byte, UTF16-LE encoded value. */ 1482 return (codepoint_t)SVAL(buf, 0); 1483 } 1484 if (olen == 4) { 1485 /* Decode a 4 byte UTF16-LE character manually. 1486 See RFC2871 for the encoding machanism. 1487 */ 1488 codepoint_t w1 = SVAL(buf,0) & ~0xD800; 1489 codepoint_t w2 = SVAL(buf,2) & ~0xDC00; 1490 1491 return (codepoint_t)0x10000 + 1492 (w1 << 10) + w2; 1493 } 1494 1495 /* no other length is valid */ 1496 return INVALID_CODEPOINT; 1497 }
Note:
See TracChangeset
for help on using the changeset viewer.