Changeset 745 for trunk/server/lib/util/charset/charcnv.c
- Timestamp:
- Nov 27, 2012, 4:43:17 PM (13 years ago)
- Location:
- trunk/server
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/server
- Property svn:mergeinfo changed
/vendor/current merged: 581,587,591,594,597,600,615,618,740
- Property svn:mergeinfo changed
-
trunk/server/lib/util/charset/charcnv.c
r414 r745 39 39 */ 40 40 41 struct smb_iconv_convenience {42 const char *unix_charset;43 const char *dos_charset;44 bool native_iconv;45 smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];46 };47 48 49 /**50 * Return the name of a charset to give to iconv().51 **/52 static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)53 {54 switch (ch) {55 case CH_UTF16: return "UTF-16LE";56 case CH_UNIX: return ic->unix_charset;57 case CH_DOS: return ic->dos_charset;58 case CH_UTF8: return "UTF8";59 case CH_UTF16BE: return "UTF-16BE";60 case CH_UTF16MUNGED: return "UTF16_MUNGED";61 default:62 return "ASCII";63 }64 }65 66 /**67 re-initialize iconv conversion descriptors68 **/69 static int close_iconv_convenience(struct smb_iconv_convenience *data)70 {71 unsigned c1, c2;72 for (c1=0;c1<NUM_CHARSETS;c1++) {73 for (c2=0;c2<NUM_CHARSETS;c2++) {74 if (data->conv_handles[c1][c2] != NULL) {75 if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) {76 smb_iconv_close(data->conv_handles[c1][c2]);77 }78 data->conv_handles[c1][c2] = NULL;79 }80 }81 }82 83 return 0;84 }85 86 _PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_init(TALLOC_CTX *mem_ctx,87 const char *dos_charset,88 const char *unix_charset,89 bool native_iconv)90 {91 struct smb_iconv_convenience *ret = talloc_zero(mem_ctx,92 struct smb_iconv_convenience);93 94 if (ret == NULL) {95 return NULL;96 }97 98 talloc_set_destructor(ret, close_iconv_convenience);99 100 ret->dos_charset = talloc_strdup(ret, dos_charset);101 ret->unix_charset = talloc_strdup(ret, unix_charset);102 ret->native_iconv = native_iconv;103 104 return ret;105 }106 107 /*108 on-demand initialisation of conversion handles109 */110 static smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,111 charset_t from, charset_t to)112 {113 const char *n1, *n2;114 static bool initialised;115 116 if (initialised == false) {117 initialised = true;118 119 #ifdef LC_ALL120 /* we set back the locale to C to get ASCII-compatible121 toupper/lower functions. For now we do not need122 any other POSIX localisations anyway. When we123 should really need localized string functions one124 day we need to write our own ascii_tolower etc.125 */126 setlocale(LC_ALL, "C");127 #endif128 }129 130 if (ic->conv_handles[from][to]) {131 return ic->conv_handles[from][to];132 }133 134 n1 = charset_name(ic, from);135 n2 = charset_name(ic, to);136 137 ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1,138 ic->native_iconv);139 140 if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {141 if ((from == CH_DOS || to == CH_DOS) &&142 strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) {143 DEBUG(0,("dos charset '%s' unavailable - using ASCII\n",144 charset_name(ic, CH_DOS)));145 ic->dos_charset = "ASCII";146 147 n1 = charset_name(ic, from);148 n2 = charset_name(ic, to);149 150 ic->conv_handles[from][to] =151 smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);152 }153 }154 155 return ic->conv_handles[from][to];156 }157 158 41 /** 159 42 * Convert string from one encoding to another, making error checking etc … … 214 97 break; 215 98 } 216 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf)); 99 DEBUG(0,("Conversion error: %s - ",reason)); 100 dump_data(0, (const uint8_t *) inbuf, i_len); 217 101 talloc_free(ob); 218 102 return (size_t)-1; … … 349 233 } 350 234 351 /*352 return the unicode codepoint for the next multi-byte CH_UNIX character353 in the string354 355 also return the number of bytes consumed (which tells the caller356 how many bytes to skip to get to the next CH_UNIX character)357 358 return INVALID_CODEPOINT if the next character cannot be converted359 */360 _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,361 const char *str, size_t *size)362 {363 /* it cannot occupy more than 4 bytes in UTF16 format */364 uint8_t buf[4];365 smb_iconv_t descriptor;366 size_t ilen_orig;367 size_t ilen;368 size_t olen;369 char *outbuf;370 371 if ((str[0] & 0x80) == 0) {372 *size = 1;373 return (codepoint_t)str[0];374 }375 376 /* we assume that no multi-byte character can take377 more than 5 bytes. This is OK as we only378 support codepoints up to 1M */379 ilen_orig = strnlen(str, 5);380 ilen = ilen_orig;381 382 descriptor = get_conv_handle(ic, CH_UNIX, CH_UTF16);383 if (descriptor == (smb_iconv_t)-1) {384 *size = 1;385 return INVALID_CODEPOINT;386 }387 388 /* this looks a little strange, but it is needed to cope389 with codepoints above 64k */390 olen = 2;391 outbuf = (char *)buf;392 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);393 if (olen == 2) {394 olen = 4;395 outbuf = (char *)buf;396 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);397 if (olen == 4) {398 /* we didn't convert any bytes */399 *size = 1;400 return INVALID_CODEPOINT;401 }402 olen = 4 - olen;403 } else {404 olen = 2 - olen;405 }406 407 *size = ilen_orig - ilen;408 409 if (olen == 2) {410 return (codepoint_t)SVAL(buf, 0);411 }412 if (olen == 4) {413 /* decode a 4 byte UTF16 character manually */414 return (codepoint_t)0x10000 +415 (buf[2] | ((buf[3] & 0x3)<<8) |416 (buf[0]<<10) | ((buf[1] & 0x3)<<18));417 }418 419 /* no other length is valid */420 return INVALID_CODEPOINT;421 }422 423 /*424 push a single codepoint into a CH_UNIX string the target string must425 be able to hold the full character, which is guaranteed if it is at426 least 5 bytes in size. The caller may pass less than 5 bytes if they427 are sure the character will fit (for example, you can assume that428 uppercase/lowercase of a character will not add more than 1 byte)429 430 return the number of bytes occupied by the CH_UNIX character, or431 -1 on failure432 */433 _PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,434 char *str, codepoint_t c)435 {436 smb_iconv_t descriptor;437 uint8_t buf[4];438 size_t ilen, olen;439 const char *inbuf;440 441 if (c < 128) {442 *str = c;443 return 1;444 }445 446 descriptor = get_conv_handle(ic,447 CH_UTF16, CH_UNIX);448 if (descriptor == (smb_iconv_t)-1) {449 return -1;450 }451 452 if (c < 0x10000) {453 ilen = 2;454 olen = 5;455 inbuf = (char *)buf;456 SSVAL(buf, 0, c);457 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);458 if (ilen != 0) {459 return -1;460 }461 return 5 - olen;462 }463 464 c -= 0x10000;465 466 buf[0] = (c>>10) & 0xFF;467 buf[1] = (c>>18) | 0xd8;468 buf[2] = c & 0xFF;469 buf[3] = ((c>>8) & 0x3) | 0xdc;470 471 ilen = 4;472 olen = 5;473 inbuf = (char *)buf;474 475 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);476 if (ilen != 0) {477 return -1;478 }479 return 5 - olen;480 }481 482
Note:
See TracChangeset
for help on using the changeset viewer.