- Timestamp:
- Aug 21, 2007, 11:51:59 AM (18 years ago)
- File:
-
- 1 edited
-
trunk/samba/source/lib/charcnv.c (modified) (42 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/samba/source/lib/charcnv.c
r39 r63 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 Character set conversion Extensions … … 6 6 Copyright (C) Simo Sorce 2001 7 7 Copyright (C) Martin Pool 2003 8 8 9 9 This program is free software; you can redistribute it and/or modify 10 10 it under the terms of the GNU General Public License as published by 11 11 the Free Software Foundation; either version 2 of the License, or 12 12 (at your option) any later version. 13 13 14 14 This program is distributed in the hope that it will be useful, 15 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 17 GNU General Public License for more details. 18 18 19 19 You should have received a copy of the GNU General Public License 20 20 along with this program; if not, write to the Free Software … … 28 28 char lp_failed_convert_char(void) 29 29 { 30 return '_';30 return '_'; 31 31 } 32 32 … … 35 35 * 36 36 * @brief Character-set conversion routines built on our iconv. 37 * 37 * 38 38 * @note Samba's internal character set (at least in the 3.0 series) 39 39 * is always the same as the one for the Unix filesystem. It is … … 55 55 static const char *charset_name(charset_t ch) 56 56 { 57 const char *ret = NULL;58 59 if (ch == CH_UTF16LE) ret = "UTF-16LE";60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";61 else if (ch == CH_UNIX) ret = lp_unix_charset();62 else if (ch == CH_DOS) ret = lp_dos_charset();63 else if (ch == CH_DISPLAY) ret = lp_display_charset();64 else if (ch == CH_UTF8) ret = "UTF8";57 const char *ret = NULL; 58 59 if (ch == CH_UTF16LE) ret = "UTF-16LE"; 60 else if (ch == CH_UTF16BE) ret = "UTF-16BE"; 61 else if (ch == CH_UNIX) ret = lp_unix_charset(); 62 else if (ch == CH_DOS) ret = lp_dos_charset(); 63 else if (ch == CH_DISPLAY) ret = lp_display_charset(); 64 else if (ch == CH_UTF8) ret = "UTF8"; 65 65 66 66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET) 67 if (ret && !strcmp(ret, "LOCALE")) {68 const char *ln = NULL;67 if (ret && !strcmp(ret, "LOCALE")) { 68 const char *ln = NULL; 69 69 70 70 #ifdef HAVE_SETLOCALE 71 setlocale(LC_ALL, "");71 setlocale(LC_ALL, ""); 72 72 #endif 73 ln = nl_langinfo(CODESET);74 if (ln) {75 /* Check whether the charset name is supported76 by iconv */77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");78 if (handle == (smb_iconv_t) -1) {79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));80 ln = NULL;81 } else {82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));83 smb_iconv_close(handle);84 }85 }86 ret = ln;87 }73 ln = nl_langinfo(CODESET); 74 if (ln) { 75 /* Check whether the charset name is supported 76 by iconv */ 77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE"); 78 if (handle == (smb_iconv_t) -1) { 79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); 80 ln = NULL; 81 } else { 82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); 83 smb_iconv_close(handle); 84 } 85 } 86 ret = ln; 87 } 88 88 #endif 89 89 90 if (!ret || !*ret) ret = "ASCII";91 return ret;90 if (!ret || !*ret) ret = "ASCII"; 91 return ret; 92 92 } 93 93 94 94 void lazy_initialize_conv(void) 95 95 { 96 static int initialized = False;97 98 if (!initialized) {99 initialized = True;100 load_case_tables();101 init_iconv();102 }96 static int initialized = False; 97 98 if (!initialized) { 99 initialized = True; 100 load_case_tables(); 101 init_iconv(); 102 } 103 103 } 104 104 … … 108 108 void gfree_charcnv(void) 109 109 { 110 int c1, c2;111 112 for (c1=0;c1<NUM_CHARSETS;c1++) {113 for (c2=0;c2<NUM_CHARSETS;c2++) {114 if ( conv_handles[c1][c2] ) {115 smb_iconv_close( conv_handles[c1][c2] );116 conv_handles[c1][c2] = 0;117 }118 }119 }110 int c1, c2; 111 112 for (c1=0;c1<NUM_CHARSETS;c1++) { 113 for (c2=0;c2<NUM_CHARSETS;c2++) { 114 if ( conv_handles[c1][c2] ) { 115 smb_iconv_close( conv_handles[c1][c2] ); 116 conv_handles[c1][c2] = 0; 117 } 118 } 119 } 120 120 } 121 121 … … 129 129 void init_iconv(void) 130 130 { 131 int c1, c2;132 BOOL did_reload = False;133 134 /* so that charset_name() works we need to get the UNIX<->UCS2 going135 first */136 if (!conv_handles[CH_UNIX][CH_UTF16LE])137 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");138 139 if (!conv_handles[CH_UTF16LE][CH_UNIX])140 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));141 142 for (c1=0;c1<NUM_CHARSETS;c1++) {143 for (c2=0;c2<NUM_CHARSETS;c2++) {144 const char *n1 = charset_name((charset_t)c1);145 const char *n2 = charset_name((charset_t)c2);146 if (conv_handles[c1][c2] &&147 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&148 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)149 continue;150 151 did_reload = True;152 153 if (conv_handles[c1][c2])154 smb_iconv_close(conv_handles[c1][c2]);155 156 conv_handles[c1][c2] = smb_iconv_open(n2,n1);157 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {158 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",159 charset_name((charset_t)c1), charset_name((charset_t)c2)));160 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {161 n1 = "ASCII";162 }163 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {164 n2 = "ASCII";165 }166 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",167 n1, n2 ));168 conv_handles[c1][c2] = smb_iconv_open(n2,n1);169 if (!conv_handles[c1][c2]) {170 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));171 smb_panic("init_iconv: conv_handle initialization failed.");172 }173 }174 }175 }176 177 if (did_reload) {178 /* XXX: Does this really get called every time the dos179 * codepage changes? */180 /* XXX: Is the did_reload test too strict? */181 conv_silent = True;182 init_doschar_table();183 init_valid_table();184 conv_silent = False;185 }131 int c1, c2; 132 BOOL did_reload = False; 133 134 /* so that charset_name() works we need to get the UNIX<->UCS2 going 135 first */ 136 if (!conv_handles[CH_UNIX][CH_UTF16LE]) 137 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); 138 139 if (!conv_handles[CH_UTF16LE][CH_UNIX]) 140 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); 141 142 for (c1=0;c1<NUM_CHARSETS;c1++) { 143 for (c2=0;c2<NUM_CHARSETS;c2++) { 144 const char *n1 = charset_name((charset_t)c1); 145 const char *n2 = charset_name((charset_t)c2); 146 if (conv_handles[c1][c2] && 147 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && 148 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) 149 continue; 150 151 did_reload = True; 152 153 if (conv_handles[c1][c2]) 154 smb_iconv_close(conv_handles[c1][c2]); 155 156 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 157 if (conv_handles[c1][c2] == (smb_iconv_t)-1) { 158 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", 159 charset_name((charset_t)c1), charset_name((charset_t)c2))); 160 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { 161 n1 = "ASCII"; 162 } 163 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { 164 n2 = "ASCII"; 165 } 166 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", 167 n1, n2 )); 168 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 169 if (!conv_handles[c1][c2]) { 170 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); 171 smb_panic("init_iconv: conv_handle initialization failed."); 172 } 173 } 174 } 175 } 176 177 if (did_reload) { 178 /* XXX: Does this really get called every time the dos 179 * codepage changes? */ 180 /* XXX: Is the did_reload test too strict? */ 181 conv_silent = True; 182 init_doschar_table(); 183 init_valid_table(); 184 conv_silent = False; 185 } 186 186 } 187 187 … … 202 202 203 203 static size_t convert_string_internal(charset_t from, charset_t to, 204 void const *src, size_t srclen, 205 void *dest, size_t destlen, BOOL allow_bad_conv)206 { 207 size_t i_len, o_len;208 size_t retval;209 const char* inbuf = (const char*)src;210 char* outbuf = (char*)dest;211 smb_iconv_t descriptor;212 213 lazy_initialize_conv();214 215 descriptor = conv_handles[from][to];216 217 if (srclen == (size_t)-1) {218 if (from == CH_UTF16LE || from == CH_UTF16BE) {219 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;220 } else {221 srclen = strlen((const char *)src)+1;222 }223 }224 225 226 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {227 if (!conv_silent)228 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));229 return (size_t)-1;230 }231 232 i_len=srclen;233 o_len=destlen;204 void const *src, size_t srclen, 205 void *dest, size_t destlen, BOOL allow_bad_conv) 206 { 207 size_t i_len, o_len; 208 size_t retval; 209 const char* inbuf = (const char*)src; 210 char* outbuf = (char*)dest; 211 smb_iconv_t descriptor; 212 213 lazy_initialize_conv(); 214 215 descriptor = conv_handles[from][to]; 216 217 if (srclen == (size_t)-1) { 218 if (from == CH_UTF16LE || from == CH_UTF16BE) { 219 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; 220 } else { 221 srclen = strlen((const char *)src)+1; 222 } 223 } 224 225 226 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 227 if (!conv_silent) 228 DEBUG(0,("convert_string_internal: Conversion not supported.\n")); 229 return (size_t)-1; 230 } 231 232 i_len=srclen; 233 o_len=destlen; 234 234 235 235 again: 236 236 237 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);238 if(retval==(size_t)-1) {239 const char *reason="unknown error";240 switch(errno) {241 case EINVAL:242 reason="Incomplete multibyte sequence";243 if (!conv_silent)244 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));245 if (allow_bad_conv)246 goto use_as_is;247 break;248 case E2BIG:249 reason="No more room"; 250 if (!conv_silent) {251 if (from == CH_UNIX) {252 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",253 charset_name(from), charset_name(to),254 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));255 } else {256 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",257 charset_name(from), charset_name(to),258 (unsigned int)srclen, (unsigned int)destlen));259 }260 }261 break;262 case EILSEQ:263 reason="Illegal multibyte sequence";264 if (!conv_silent)265 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));266 if (allow_bad_conv)267 goto use_as_is;268 break;269 default:270 if (!conv_silent)271 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));272 break;273 }274 /* smb_panic(reason); */275 }276 return destlen-o_len;237 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); 238 if(retval==(size_t)-1) { 239 const char *reason="unknown error"; 240 switch(errno) { 241 case EINVAL: 242 reason="Incomplete multibyte sequence"; 243 if (!conv_silent) 244 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 245 if (allow_bad_conv) 246 goto use_as_is; 247 break; 248 case E2BIG: 249 reason="No more room"; 250 if (!conv_silent) { 251 if (from == CH_UNIX) { 252 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", 253 charset_name(from), charset_name(to), 254 (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); 255 } else { 256 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", 257 charset_name(from), charset_name(to), 258 (unsigned int)srclen, (unsigned int)destlen)); 259 } 260 } 261 break; 262 case EILSEQ: 263 reason="Illegal multibyte sequence"; 264 if (!conv_silent) 265 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 266 if (allow_bad_conv) 267 goto use_as_is; 268 break; 269 default: 270 if (!conv_silent) 271 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf)); 272 break; 273 } 274 /* smb_panic(reason); */ 275 } 276 return destlen-o_len; 277 277 278 278 use_as_is: 279 279 280 /* 281 * Conversion not supported. This is actually an error, but there are so282 * many misconfigured iconv systems and smb.conf's out there we can't just283 * fail. Do a very bad conversion instead.... JRA.284 */285 286 {287 if (o_len == 0 || i_len == 0)288 return destlen - o_len;289 290 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&291 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {292 /* Can't convert from utf16 any endian to multibyte.293 Replace with the default fail char.294 */295 if (i_len < 2)296 return destlen - o_len;297 if (i_len >= 2) {298 *outbuf = lp_failed_convert_char();299 300 outbuf++;301 o_len--;302 303 inbuf += 2;304 i_len -= 2;305 }306 307 if (o_len == 0 || i_len == 0)308 return destlen - o_len;309 310 /* Keep trying with the next char... */311 goto again;312 313 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {314 /* Can't convert to UTF16LE - just widen by adding the315 default fail char then zero.316 */317 if (o_len < 2)318 return destlen - o_len;319 320 outbuf[0] = lp_failed_convert_char();321 outbuf[1] = '\0';322 323 inbuf++;324 i_len--;325 326 outbuf += 2;327 o_len -= 2;328 329 if (o_len == 0 || i_len == 0)330 return destlen - o_len;331 332 /* Keep trying with the next char... */333 goto again;334 335 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&336 to != CH_UTF16LE && to != CH_UTF16BE) {337 /* Failed multibyte to multibyte. Just copy the default fail char and338 try again. */339 outbuf[0] = lp_failed_convert_char();340 341 inbuf++;342 i_len--;343 344 outbuf++;345 o_len--;346 347 if (o_len == 0 || i_len == 0)348 return destlen - o_len;349 350 /* Keep trying with the next char... */351 goto again;352 353 } else {354 /* Keep compiler happy.... */355 return destlen - o_len;356 }357 }280 /* 281 * Conversion not supported. This is actually an error, but there are so 282 * many misconfigured iconv systems and smb.conf's out there we can't just 283 * fail. Do a very bad conversion instead.... JRA. 284 */ 285 286 { 287 if (o_len == 0 || i_len == 0) 288 return destlen - o_len; 289 290 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 291 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 292 /* Can't convert from utf16 any endian to multibyte. 293 Replace with the default fail char. 294 */ 295 if (i_len < 2) 296 return destlen - o_len; 297 if (i_len >= 2) { 298 *outbuf = lp_failed_convert_char(); 299 300 outbuf++; 301 o_len--; 302 303 inbuf += 2; 304 i_len -= 2; 305 } 306 307 if (o_len == 0 || i_len == 0) 308 return destlen - o_len; 309 310 /* Keep trying with the next char... */ 311 goto again; 312 313 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 314 /* Can't convert to UTF16LE - just widen by adding the 315 default fail char then zero. 316 */ 317 if (o_len < 2) 318 return destlen - o_len; 319 320 outbuf[0] = lp_failed_convert_char(); 321 outbuf[1] = '\0'; 322 323 inbuf++; 324 i_len--; 325 326 outbuf += 2; 327 o_len -= 2; 328 329 if (o_len == 0 || i_len == 0) 330 return destlen - o_len; 331 332 /* Keep trying with the next char... */ 333 goto again; 334 335 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 336 to != CH_UTF16LE && to != CH_UTF16BE) { 337 /* Failed multibyte to multibyte. Just copy the default fail char and 338 try again. */ 339 outbuf[0] = lp_failed_convert_char(); 340 341 inbuf++; 342 i_len--; 343 344 outbuf++; 345 o_len--; 346 347 if (o_len == 0 || i_len == 0) 348 return destlen - o_len; 349 350 /* Keep trying with the next char... */ 351 goto again; 352 353 } else { 354 /* Keep compiler happy.... */ 355 return destlen - o_len; 356 } 357 } 358 358 } 359 359 … … 376 376 377 377 size_t convert_string(charset_t from, charset_t to, 378 void const *src, size_t srclen, 379 void *dest, size_t destlen, BOOL allow_bad_conv)380 { 381 /*382 * NB. We deliberately don't do a strlen here if srclen == -1.383 * This is very expensive over millions of calls and is taken384 * care of in the slow path in convert_string_internal. JRA.385 */378 void const *src, size_t srclen, 379 void *dest, size_t destlen, BOOL allow_bad_conv) 380 { 381 /* 382 * NB. We deliberately don't do a strlen here if srclen == -1. 383 * This is very expensive over millions of calls and is taken 384 * care of in the slow path in convert_string_internal. JRA. 385 */ 386 386 387 387 #ifdef DEVELOPER 388 SMB_ASSERT(destlen != (size_t)-1);388 SMB_ASSERT(destlen != (size_t)-1); 389 389 #endif 390 390 391 if (srclen == 0)392 return 0;393 394 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {395 const unsigned char *p = (const unsigned char *)src;396 unsigned char *q = (unsigned char *)dest;397 size_t slen = srclen;398 size_t dlen = destlen;399 unsigned char lastp = '\0';400 size_t retval = 0;401 402 /* If all characters are ascii, fast path here. */403 while (slen && dlen) {404 if ((lastp = *p) <= 0x7f) {405 *q++ = *p++;406 if (slen != (size_t)-1) {407 slen--;408 }409 dlen--;410 retval++;411 if (!lastp)412 break;413 } else {391 if (srclen == 0) 392 return 0; 393 394 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) { 395 const unsigned char *p = (const unsigned char *)src; 396 unsigned char *q = (unsigned char *)dest; 397 size_t slen = srclen; 398 size_t dlen = destlen; 399 unsigned char lastp = '\0'; 400 size_t retval = 0; 401 402 /* If all characters are ascii, fast path here. */ 403 while (slen && dlen) { 404 if ((lastp = *p) <= 0x7f) { 405 *q++ = *p++; 406 if (slen != (size_t)-1) { 407 slen--; 408 } 409 dlen--; 410 retval++; 411 if (!lastp) 412 break; 413 } else { 414 414 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 415 goto general_case;415 goto general_case; 416 416 #else 417 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);417 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 418 418 #endif 419 }420 }421 if (!dlen) {422 /* Even if we fast path we should note if we ran out of room. */423 if (((slen != (size_t)-1) && slen) ||424 ((slen == (size_t)-1) && lastp)) {425 errno = E2BIG;426 }427 }428 return retval;429 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {430 const unsigned char *p = (const unsigned char *)src;431 unsigned char *q = (unsigned char *)dest;432 size_t retval = 0;433 size_t slen = srclen;434 size_t dlen = destlen;435 unsigned char lastp = '\0';436 437 /* If all characters are ascii, fast path here. */438 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {439 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {440 *q++ = *p;441 if (slen != (size_t)-1) {442 slen -= 2;443 }444 p += 2;445 dlen--;446 retval++;447 if (!lastp)448 break;449 } else {419 } 420 } 421 if (!dlen) { 422 /* Even if we fast path we should note if we ran out of room. */ 423 if (((slen != (size_t)-1) && slen) || 424 ((slen == (size_t)-1) && lastp)) { 425 errno = E2BIG; 426 } 427 } 428 return retval; 429 } else if (from == CH_UTF16LE && to != CH_UTF16LE) { 430 const unsigned char *p = (const unsigned char *)src; 431 unsigned char *q = (unsigned char *)dest; 432 size_t retval = 0; 433 size_t slen = srclen; 434 size_t dlen = destlen; 435 unsigned char lastp = '\0'; 436 437 /* If all characters are ascii, fast path here. */ 438 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) { 439 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { 440 *q++ = *p; 441 if (slen != (size_t)-1) { 442 slen -= 2; 443 } 444 p += 2; 445 dlen--; 446 retval++; 447 if (!lastp) 448 break; 449 } else { 450 450 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 451 goto general_case;451 goto general_case; 452 452 #else 453 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);453 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 454 454 #endif 455 }456 }457 if (!dlen) {458 /* Even if we fast path we should note if we ran out of room. */459 if (((slen != (size_t)-1) && slen) ||460 ((slen == (size_t)-1) && lastp)) {461 errno = E2BIG;462 }463 }464 return retval;465 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {466 const unsigned char *p = (const unsigned char *)src;467 unsigned char *q = (unsigned char *)dest;468 size_t retval = 0;469 size_t slen = srclen;470 size_t dlen = destlen;471 unsigned char lastp = '\0';472 473 /* If all characters are ascii, fast path here. */474 while (slen && (dlen >= 2)) {475 if ((lastp = *p) <= 0x7F) {476 *q++ = *p++;477 *q++ = '\0';478 if (slen != (size_t)-1) {479 slen--;480 }481 dlen -= 2;482 retval += 2;483 if (!lastp)484 break;485 } else {455 } 456 } 457 if (!dlen) { 458 /* Even if we fast path we should note if we ran out of room. */ 459 if (((slen != (size_t)-1) && slen) || 460 ((slen == (size_t)-1) && lastp)) { 461 errno = E2BIG; 462 } 463 } 464 return retval; 465 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 466 const unsigned char *p = (const unsigned char *)src; 467 unsigned char *q = (unsigned char *)dest; 468 size_t retval = 0; 469 size_t slen = srclen; 470 size_t dlen = destlen; 471 unsigned char lastp = '\0'; 472 473 /* If all characters are ascii, fast path here. */ 474 while (slen && (dlen >= 2)) { 475 if ((lastp = *p) <= 0x7F) { 476 *q++ = *p++; 477 *q++ = '\0'; 478 if (slen != (size_t)-1) { 479 slen--; 480 } 481 dlen -= 2; 482 retval += 2; 483 if (!lastp) 484 break; 485 } else { 486 486 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 487 goto general_case;487 goto general_case; 488 488 #else 489 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);489 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv); 490 490 #endif 491 }492 }493 if (!dlen) {494 /* Even if we fast path we should note if we ran out of room. */495 if (((slen != (size_t)-1) && slen) ||496 ((slen == (size_t)-1) && lastp)) {497 errno = E2BIG;498 }499 }500 return retval;501 }491 } 492 } 493 if (!dlen) { 494 /* Even if we fast path we should note if we ran out of room. */ 495 if (((slen != (size_t)-1) && slen) || 496 ((slen == (size_t)-1) && lastp)) { 497 errno = E2BIG; 498 } 499 } 500 return retval; 501 } 502 502 503 503 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS 504 504 general_case: 505 505 #endif 506 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);506 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv); 507 507 } 508 508 … … 518 518 * 519 519 * Ensure the srclen contains the terminating zero. 520 * 520 * 521 521 * I hate the goto's in this function. It's embarressing..... 522 522 * There has to be a cleaner way to do this. JRA. … … 524 524 525 525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to, 526 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)527 { 528 size_t i_len, o_len, destlen = MAX(srclen, 512);529 size_t retval;530 const char *inbuf = (const char *)src;531 char *outbuf = NULL, *ob = NULL;532 smb_iconv_t descriptor;533 void **dest = (void **)dst;534 535 *dest = NULL;536 537 if (src == NULL || srclen == (size_t)-1)538 return (size_t)-1;539 if (srclen == 0)540 return 0;541 542 lazy_initialize_conv();543 544 descriptor = conv_handles[from][to];545 546 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {547 if (!conv_silent)548 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));549 return (size_t)-1;550 }526 void const *src, size_t srclen, void *dst, BOOL allow_bad_conv) 527 { 528 size_t i_len, o_len, destlen = MAX(srclen, 512); 529 size_t retval; 530 const char *inbuf = (const char *)src; 531 char *outbuf = NULL, *ob = NULL; 532 smb_iconv_t descriptor; 533 void **dest = (void **)dst; 534 535 *dest = NULL; 536 537 if (src == NULL || srclen == (size_t)-1) 538 return (size_t)-1; 539 if (srclen == 0) 540 return 0; 541 542 lazy_initialize_conv(); 543 544 descriptor = conv_handles[from][to]; 545 546 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 547 if (!conv_silent) 548 DEBUG(0,("convert_string_allocate: Conversion not supported.\n")); 549 return (size_t)-1; 550 } 551 551 552 552 convert: 553 553 554 if ((destlen*2) < destlen) {555 /* wrapped ! abort. */556 if (!conv_silent)557 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));558 if (!ctx)559 SAFE_FREE(outbuf);560 return (size_t)-1;561 } else {562 destlen = destlen * 2;563 }564 565 if (ctx) {566 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);567 } else {568 ob = (char *)SMB_REALLOC(ob, destlen);569 }570 571 if (!ob) {572 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));573 return (size_t)-1;574 }575 outbuf = ob;576 i_len = srclen;577 o_len = destlen;554 if ((destlen*2) < destlen) { 555 /* wrapped ! abort. */ 556 if (!conv_silent) 557 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n")); 558 if (!ctx) 559 SAFE_FREE(outbuf); 560 return (size_t)-1; 561 } else { 562 destlen = destlen * 2; 563 } 564 565 if (ctx) { 566 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen); 567 } else { 568 ob = (char *)SMB_REALLOC(ob, destlen); 569 } 570 571 if (!ob) { 572 DEBUG(0, ("convert_string_allocate: realloc failed!\n")); 573 return (size_t)-1; 574 } 575 outbuf = ob; 576 i_len = srclen; 577 o_len = destlen; 578 578 579 579 again: 580 580 581 retval = smb_iconv(descriptor,582 &inbuf, &i_len,583 &outbuf, &o_len);584 if(retval == (size_t)-1){585 const char *reason="unknown error";586 switch(errno) {587 case EINVAL:588 reason="Incomplete multibyte sequence";589 if (!conv_silent)590 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));591 if (allow_bad_conv)592 goto use_as_is;593 break;594 case E2BIG:595 goto convert; 596 case EILSEQ:597 reason="Illegal multibyte sequence";598 if (!conv_silent)599 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));600 if (allow_bad_conv)601 goto use_as_is;602 break;603 }604 if (!conv_silent)605 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));606 /* smb_panic(reason); */607 return (size_t)-1;608 }581 retval = smb_iconv(descriptor, 582 &inbuf, &i_len, 583 &outbuf, &o_len); 584 if(retval == (size_t)-1) { 585 const char *reason="unknown error"; 586 switch(errno) { 587 case EINVAL: 588 reason="Incomplete multibyte sequence"; 589 if (!conv_silent) 590 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 591 if (allow_bad_conv) 592 goto use_as_is; 593 break; 594 case E2BIG: 595 goto convert; 596 case EILSEQ: 597 reason="Illegal multibyte sequence"; 598 if (!conv_silent) 599 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf)); 600 if (allow_bad_conv) 601 goto use_as_is; 602 break; 603 } 604 if (!conv_silent) 605 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf)); 606 /* smb_panic(reason); */ 607 return (size_t)-1; 608 } 609 609 610 610 out: 611 611 612 destlen = destlen - o_len;613 if (ctx) {614 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);615 } else {616 ob = (char *)SMB_REALLOC(ob,destlen);617 }618 619 if (destlen && !ob) {620 DEBUG(0, ("convert_string_allocate: out of memory!\n"));621 return (size_t)-1;622 }623 624 *dest = ob;625 return destlen;612 destlen = destlen - o_len; 613 if (ctx) { 614 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen); 615 } else { 616 ob = (char *)SMB_REALLOC(ob,destlen); 617 } 618 619 if (destlen && !ob) { 620 DEBUG(0, ("convert_string_allocate: out of memory!\n")); 621 return (size_t)-1; 622 } 623 624 *dest = ob; 625 return destlen; 626 626 627 627 use_as_is: 628 628 629 /* 630 * Conversion not supported. This is actually an error, but there are so631 * many misconfigured iconv systems and smb.conf's out there we can't just632 * fail. Do a very bad conversion instead.... JRA.633 */634 635 {636 if (o_len == 0 || i_len == 0)637 goto out;638 639 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&640 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {641 /* Can't convert from utf16 any endian to multibyte.642 Replace with the default fail char.643 */644 645 if (i_len < 2)646 goto out;647 648 if (i_len >= 2) {649 *outbuf = lp_failed_convert_char();650 651 outbuf++;652 o_len--;653 654 inbuf += 2;655 i_len -= 2;656 }657 658 if (o_len == 0 || i_len == 0)659 goto out;660 661 /* Keep trying with the next char... */662 goto again;663 664 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {665 /* Can't convert to UTF16LE - just widen by adding the666 default fail char then zero.667 */668 if (o_len < 2)669 goto out;670 671 outbuf[0] = lp_failed_convert_char();672 outbuf[1] = '\0';673 674 inbuf++;675 i_len--;676 677 outbuf += 2;678 o_len -= 2;679 680 if (o_len == 0 || i_len == 0)681 goto out;682 683 /* Keep trying with the next char... */684 goto again;685 686 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&687 to != CH_UTF16LE && to != CH_UTF16BE) {688 /* Failed multibyte to multibyte. Just copy the default fail char and689 try again. */690 outbuf[0] = lp_failed_convert_char();691 692 inbuf++;693 i_len--;694 695 outbuf++;696 o_len--;697 698 if (o_len == 0 || i_len == 0)699 goto out;700 701 /* Keep trying with the next char... */702 goto again;703 704 } else {705 /* Keep compiler happy.... */706 goto out;707 }708 }629 /* 630 * Conversion not supported. This is actually an error, but there are so 631 * many misconfigured iconv systems and smb.conf's out there we can't just 632 * fail. Do a very bad conversion instead.... JRA. 633 */ 634 635 { 636 if (o_len == 0 || i_len == 0) 637 goto out; 638 639 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) && 640 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) { 641 /* Can't convert from utf16 any endian to multibyte. 642 Replace with the default fail char. 643 */ 644 645 if (i_len < 2) 646 goto out; 647 648 if (i_len >= 2) { 649 *outbuf = lp_failed_convert_char(); 650 651 outbuf++; 652 o_len--; 653 654 inbuf += 2; 655 i_len -= 2; 656 } 657 658 if (o_len == 0 || i_len == 0) 659 goto out; 660 661 /* Keep trying with the next char... */ 662 goto again; 663 664 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { 665 /* Can't convert to UTF16LE - just widen by adding the 666 default fail char then zero. 667 */ 668 if (o_len < 2) 669 goto out; 670 671 outbuf[0] = lp_failed_convert_char(); 672 outbuf[1] = '\0'; 673 674 inbuf++; 675 i_len--; 676 677 outbuf += 2; 678 o_len -= 2; 679 680 if (o_len == 0 || i_len == 0) 681 goto out; 682 683 /* Keep trying with the next char... */ 684 goto again; 685 686 } else if (from != CH_UTF16LE && from != CH_UTF16BE && 687 to != CH_UTF16LE && to != CH_UTF16BE) { 688 /* Failed multibyte to multibyte. Just copy the default fail char and 689 try again. */ 690 outbuf[0] = lp_failed_convert_char(); 691 692 inbuf++; 693 i_len--; 694 695 outbuf++; 696 o_len--; 697 698 if (o_len == 0 || i_len == 0) 699 goto out; 700 701 /* Keep trying with the next char... */ 702 goto again; 703 704 } else { 705 /* Keep compiler happy.... */ 706 goto out; 707 } 708 } 709 709 } 710 710 … … 713 713 * 714 714 * @param srclen length of source buffer. 715 * @param dest always set at least to NULL 715 * @param dest always set at least to NULL 716 716 * @note -1 is not accepted for srclen. 717 717 * … … 719 719 **/ 720 720 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, 721 void const *src, size_t srclen, void *dst,722 BOOL allow_bad_conv)723 { 724 void **dest = (void **)dst;725 size_t dest_len;726 727 *dest = NULL;728 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);729 if (dest_len == (size_t)-1)730 return (size_t)-1;731 if (*dest == NULL)732 return (size_t)-1;733 return dest_len;721 void const *src, size_t srclen, void *dst, 722 BOOL allow_bad_conv) 723 { 724 void **dest = (void **)dst; 725 size_t dest_len; 726 727 *dest = NULL; 728 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv); 729 if (dest_len == (size_t)-1) 730 return (size_t)-1; 731 if (*dest == NULL) 732 return (size_t)-1; 733 return dest_len; 734 734 } 735 735 736 736 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) 737 737 { 738 size_t size;739 smb_ucs2_t *buffer;740 741 size = push_ucs2_allocate(&buffer, src);742 if (size == (size_t)-1) {743 smb_panic("failed to create UCS2 buffer");744 }745 if (!strupper_w(buffer) && (dest == src)) {746 free(buffer);747 return srclen;748 }749 750 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);751 free(buffer);752 return size;738 size_t size; 739 smb_ucs2_t *buffer; 740 741 size = push_ucs2_allocate(&buffer, src); 742 if (size == (size_t)-1) { 743 smb_panic("failed to create UCS2 buffer"); 744 } 745 if (!strupper_w(buffer) && (dest == src)) { 746 free(buffer); 747 return srclen; 748 } 749 750 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 751 free(buffer); 752 return size; 753 753 } 754 754 … … 760 760 char *strdup_upper(const char *s) 761 761 { 762 pstring out_buffer;763 const unsigned char *p = (const unsigned char *)s;764 unsigned char *q = (unsigned char *)out_buffer;765 766 /* this is quite a common operation, so we want it to be767 fast. We optimise for the ascii case, knowing that all our768 supported multi-byte character sets are ascii-compatible769 (ie. they match for the first 128 chars) */770 771 while (1) {772 if (*p & 0x80)773 break;774 *q++ = toupper_ascii(*p);775 if (!*p)776 break;777 p++;778 if (p - ( const unsigned char *)s >= sizeof(pstring))779 break;780 }781 782 if (*p) {783 /* MB case. */784 size_t size;785 wpstring buffer;786 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);787 if (size == (size_t)-1) {788 return NULL;789 }790 791 strupper_w(buffer);792 793 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);794 if (size == (size_t)-1) {795 return NULL;796 }797 }798 799 return SMB_STRDUP(out_buffer);762 pstring out_buffer; 763 const unsigned char *p = (const unsigned char *)s; 764 unsigned char *q = (unsigned char *)out_buffer; 765 766 /* this is quite a common operation, so we want it to be 767 fast. We optimise for the ascii case, knowing that all our 768 supported multi-byte character sets are ascii-compatible 769 (ie. they match for the first 128 chars) */ 770 771 while (1) { 772 if (*p & 0x80) 773 break; 774 *q++ = toupper_ascii(*p); 775 if (!*p) 776 break; 777 p++; 778 if (p - ( const unsigned char *)s >= sizeof(pstring)) 779 break; 780 } 781 782 if (*p) { 783 /* MB case. */ 784 size_t size; 785 wpstring buffer; 786 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True); 787 if (size == (size_t)-1) { 788 return NULL; 789 } 790 791 strupper_w(buffer); 792 793 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True); 794 if (size == (size_t)-1) { 795 return NULL; 796 } 797 } 798 799 return SMB_STRDUP(out_buffer); 800 800 } 801 801 802 802 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) 803 803 { 804 size_t size;805 smb_ucs2_t *buffer = NULL;806 807 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,808 (void **)(void *)&buffer, True);809 if (size == (size_t)-1 || !buffer) {810 smb_panic("failed to create UCS2 buffer");811 }812 if (!strlower_w(buffer) && (dest == src)) {813 SAFE_FREE(buffer);814 return srclen;815 }816 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);817 SAFE_FREE(buffer);818 return size;804 size_t size; 805 smb_ucs2_t *buffer = NULL; 806 807 size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen, 808 (void **)(void *)&buffer, True); 809 if (size == (size_t)-1 || !buffer) { 810 smb_panic("failed to create UCS2 buffer"); 811 } 812 if (!strlower_w(buffer) && (dest == src)) { 813 SAFE_FREE(buffer); 814 return srclen; 815 } 816 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True); 817 SAFE_FREE(buffer); 818 return size; 819 819 } 820 820 … … 825 825 char *strdup_lower(const char *s) 826 826 { 827 size_t size;828 smb_ucs2_t *buffer = NULL;829 char *out_buffer;830 831 size = push_ucs2_allocate(&buffer, s);832 if (size == -1 || !buffer) {833 return NULL;834 }835 836 strlower_w(buffer);837 838 size = pull_ucs2_allocate(&out_buffer, buffer);839 SAFE_FREE(buffer);840 841 if (size == (size_t)-1) {842 return NULL;843 }844 845 return out_buffer;827 size_t size; 828 smb_ucs2_t *buffer = NULL; 829 char *out_buffer; 830 831 size = push_ucs2_allocate(&buffer, s); 832 if (size == -1 || !buffer) { 833 return NULL; 834 } 835 836 strlower_w(buffer); 837 838 size = pull_ucs2_allocate(&out_buffer, buffer); 839 SAFE_FREE(buffer); 840 841 if (size == (size_t)-1) { 842 return NULL; 843 } 844 845 return out_buffer; 846 846 } 847 847 848 848 static size_t ucs2_align(const void *base_ptr, const void *p, int flags) 849 849 { 850 if (flags & (STR_NOALIGN|STR_ASCII))851 return 0;852 return PTR_DIFF(p, base_ptr) & 1;850 if (flags & (STR_NOALIGN|STR_ASCII)) 851 return 0; 852 return PTR_DIFF(p, base_ptr) & 1; 853 853 } 854 854 … … 870 870 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags) 871 871 { 872 size_t src_len = strlen(src);873 pstring tmpbuf;874 875 /* treat a pstring as "unlimited" length */876 if (dest_len == (size_t)-1)877 dest_len = sizeof(pstring);878 879 if (flags & STR_UPPER) {880 pstrcpy(tmpbuf, src);881 strupper_m(tmpbuf);882 src = tmpbuf;883 }884 885 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))886 src_len++;887 888 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);872 size_t src_len = strlen(src); 873 pstring tmpbuf; 874 875 /* treat a pstring as "unlimited" length */ 876 if (dest_len == (size_t)-1) 877 dest_len = sizeof(pstring); 878 879 if (flags & STR_UPPER) { 880 pstrcpy(tmpbuf, src); 881 strupper_m(tmpbuf); 882 src = tmpbuf; 883 } 884 885 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) 886 src_len++; 887 888 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True); 889 889 } 890 890 891 891 size_t push_ascii_fstring(void *dest, const char *src) 892 892 { 893 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);893 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE); 894 894 } 895 895 896 896 size_t push_ascii_pstring(void *dest, const char *src) 897 897 { 898 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);898 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE); 899 899 } 900 900 … … 906 906 size_t push_ascii_nstring(void *dest, const char *src) 907 907 { 908 size_t i, buffer_len, dest_len;909 smb_ucs2_t *buffer;910 911 conv_silent = True;912 buffer_len = push_ucs2_allocate(&buffer, src);913 if (buffer_len == (size_t)-1) {914 smb_panic("failed to create UCS2 buffer");915 }916 917 /* We're using buffer_len below to count ucs2 characters, not bytes. */918 buffer_len /= sizeof(smb_ucs2_t);919 920 dest_len = 0;921 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {922 unsigned char mb[10];923 /* Convert one smb_ucs2_t character at a time. */924 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);925 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {926 memcpy((char *)dest + dest_len, mb, mb_len);927 dest_len += mb_len;928 } else {929 errno = E2BIG;930 break;931 }932 }933 ((char *)dest)[dest_len] = '\0';934 935 SAFE_FREE(buffer);936 conv_silent = False;937 return dest_len;908 size_t i, buffer_len, dest_len; 909 smb_ucs2_t *buffer; 910 911 conv_silent = True; 912 buffer_len = push_ucs2_allocate(&buffer, src); 913 if (buffer_len == (size_t)-1) { 914 smb_panic("failed to create UCS2 buffer"); 915 } 916 917 /* We're using buffer_len below to count ucs2 characters, not bytes. */ 918 buffer_len /= sizeof(smb_ucs2_t); 919 920 dest_len = 0; 921 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) { 922 unsigned char mb[10]; 923 /* Convert one smb_ucs2_t character at a time. */ 924 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False); 925 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) { 926 memcpy((char *)dest + dest_len, mb, mb_len); 927 dest_len += mb_len; 928 } else { 929 errno = E2BIG; 930 break; 931 } 932 } 933 ((char *)dest)[dest_len] = '\0'; 934 935 SAFE_FREE(buffer); 936 conv_silent = False; 937 return dest_len; 938 938 } 939 939 … … 955 955 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 956 956 { 957 size_t ret;958 959 if (dest_len == (size_t)-1)960 dest_len = sizeof(pstring);961 962 if (flags & STR_TERMINATE) {963 if (src_len == (size_t)-1) {964 src_len = strlen((const char *)src) + 1;965 } else {966 size_t len = strnlen((const char *)src, src_len);967 if (len < src_len)968 len++;969 src_len = len;970 }971 }972 973 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);974 if (ret == (size_t)-1) {975 ret = 0;976 dest_len = 0;977 }978 979 if (dest_len && ret) {980 /* Did we already process the terminating zero ? */981 if (dest[MIN(ret-1, dest_len-1)] != 0) {982 dest[MIN(ret, dest_len-1)] = 0;983 }984 } else {985 dest[0] = 0;986 }987 988 return src_len;957 size_t ret; 958 959 if (dest_len == (size_t)-1) 960 dest_len = sizeof(pstring); 961 962 if (flags & STR_TERMINATE) { 963 if (src_len == (size_t)-1) { 964 src_len = strlen((const char *)src) + 1; 965 } else { 966 size_t len = strnlen((const char *)src, src_len); 967 if (len < src_len) 968 len++; 969 src_len = len; 970 } 971 } 972 973 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True); 974 if (ret == (size_t)-1) { 975 ret = 0; 976 dest_len = 0; 977 } 978 979 if (dest_len && ret) { 980 /* Did we already process the terminating zero ? */ 981 if (dest[MIN(ret-1, dest_len-1)] != 0) { 982 dest[MIN(ret, dest_len-1)] = 0; 983 } 984 } else { 985 dest[0] = 0; 986 } 987 988 return src_len; 989 989 } 990 990 991 991 size_t pull_ascii_pstring(char *dest, const void *src) 992 992 { 993 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);993 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE); 994 994 } 995 995 996 996 size_t pull_ascii_fstring(char *dest, const void *src) 997 997 { 998 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);998 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE); 999 999 } 1000 1000 … … 1003 1003 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src) 1004 1004 { 1005 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);1005 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE); 1006 1006 } 1007 1007 … … 1025 1025 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags) 1026 1026 { 1027 size_t len=0;1028 size_t src_len;1029 size_t ret;1030 1031 /* treat a pstring as "unlimited" length */1032 if (dest_len == (size_t)-1)1033 dest_len = sizeof(pstring);1034 1035 if (flags & STR_TERMINATE)1036 src_len = (size_t)-1;1037 else1038 src_len = strlen(src);1039 1040 if (ucs2_align(base_ptr, dest, flags)) {1041 *(char *)dest = 0;1042 dest = (void *)((char *)dest + 1);1043 if (dest_len)1044 dest_len--;1045 len++;1046 }1047 1048 /* ucs2 is always a multiple of 2 bytes */1049 dest_len &= ~1;1050 1051 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);1052 if (ret == (size_t)-1) {1053 return 0;1054 }1055 1056 len += ret;1057 1058 if (flags & STR_UPPER) {1059 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;1060 size_t i;1061 1062 /* We check for i < (ret / 2) below as the dest string isn't null1063 terminated if STR_TERMINATE isn't set. */1064 1065 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {1066 smb_ucs2_t v = toupper_w(dest_ucs2[i]);1067 if (v != dest_ucs2[i]) {1068 dest_ucs2[i] = v;1069 }1070 }1071 }1072 1073 return len;1027 size_t len=0; 1028 size_t src_len; 1029 size_t ret; 1030 1031 /* treat a pstring as "unlimited" length */ 1032 if (dest_len == (size_t)-1) 1033 dest_len = sizeof(pstring); 1034 1035 if (flags & STR_TERMINATE) 1036 src_len = (size_t)-1; 1037 else 1038 src_len = strlen(src); 1039 1040 if (ucs2_align(base_ptr, dest, flags)) { 1041 *(char *)dest = 0; 1042 dest = (void *)((char *)dest + 1); 1043 if (dest_len) 1044 dest_len--; 1045 len++; 1046 } 1047 1048 /* ucs2 is always a multiple of 2 bytes */ 1049 dest_len &= ~1; 1050 1051 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True); 1052 if (ret == (size_t)-1) { 1053 return 0; 1054 } 1055 1056 len += ret; 1057 1058 if (flags & STR_UPPER) { 1059 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest; 1060 size_t i; 1061 1062 /* We check for i < (ret / 2) below as the dest string isn't null 1063 terminated if STR_TERMINATE isn't set. */ 1064 1065 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) { 1066 smb_ucs2_t v = toupper_w(dest_ucs2[i]); 1067 if (v != dest_ucs2[i]) { 1068 dest_ucs2[i] = v; 1069 } 1070 } 1071 } 1072 1073 return len; 1074 1074 } 1075 1075 … … 1079 1079 * allocating a buffer using talloc(). 1080 1080 * 1081 * @param dest always set at least to NULL 1081 * @param dest always set at least to NULL 1082 1082 * 1083 1083 * @returns The number of bytes occupied by the string in the destination … … 1086 1086 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) 1087 1087 { 1088 size_t src_len = strlen(src)+1;1089 1090 *dest = NULL;1091 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);1088 size_t src_len = strlen(src)+1; 1089 1090 *dest = NULL; 1091 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1092 1092 } 1093 1093 … … 1096 1096 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer 1097 1097 * 1098 * @param dest always set at least to NULL 1098 * @param dest always set at least to NULL 1099 1099 * 1100 1100 * @returns The number of bytes occupied by the string in the destination … … 1104 1104 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src) 1105 1105 { 1106 size_t src_len = strlen(src)+1;1107 1108 *dest = NULL;1109 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);1106 size_t src_len = strlen(src)+1; 1107 1108 *dest = NULL; 1109 return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True); 1110 1110 } 1111 1111 … … 1122 1122 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags) 1123 1123 { 1124 size_t src_len = strlen(src);1125 pstring tmpbuf;1126 1127 /* treat a pstring as "unlimited" length */1128 if (dest_len == (size_t)-1)1129 dest_len = sizeof(pstring);1130 1131 if (flags & STR_UPPER) {1132 pstrcpy(tmpbuf, src);1133 strupper_m(tmpbuf);1134 src = tmpbuf;1135 }1136 1137 if (flags & STR_TERMINATE)1138 src_len++;1139 1140 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);1124 size_t src_len = strlen(src); 1125 pstring tmpbuf; 1126 1127 /* treat a pstring as "unlimited" length */ 1128 if (dest_len == (size_t)-1) 1129 dest_len = sizeof(pstring); 1130 1131 if (flags & STR_UPPER) { 1132 pstrcpy(tmpbuf, src); 1133 strupper_m(tmpbuf); 1134 src = tmpbuf; 1135 } 1136 1137 if (flags & STR_TERMINATE) 1138 src_len++; 1139 1140 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True); 1141 1141 } 1142 1142 1143 1143 size_t push_utf8_fstring(void *dest, const char *src) 1144 1144 { 1145 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);1145 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE); 1146 1146 } 1147 1147 … … 1149 1149 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc 1150 1150 * 1151 * @param dest always set at least to NULL 1151 * @param dest always set at least to NULL 1152 1152 * 1153 1153 * @returns The number of bytes occupied by the string in the destination … … 1156 1156 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1157 1157 { 1158 size_t src_len = strlen(src)+1;1159 1160 *dest = NULL;1161 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);1158 size_t src_len = strlen(src)+1; 1159 1160 *dest = NULL; 1161 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True); 1162 1162 } 1163 1163 … … 1165 1165 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer 1166 1166 * 1167 * @param dest always set at least to NULL 1167 * @param dest always set at least to NULL 1168 1168 * 1169 1169 * @returns The number of bytes occupied by the string in the destination … … 1172 1172 size_t push_utf8_allocate(char **dest, const char *src) 1173 1173 { 1174 size_t src_len = strlen(src)+1;1175 1176 *dest = NULL;1177 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True); 1174 size_t src_len = strlen(src)+1; 1175 1176 *dest = NULL; 1177 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True); 1178 1178 } 1179 1179 … … 1191 1191 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags) 1192 1192 { 1193 size_t ret;1194 1195 if (dest_len == (size_t)-1)1196 dest_len = sizeof(pstring);1197 1198 if (ucs2_align(base_ptr, src, flags)) {1199 src = (const void *)((const char *)src + 1);1200 if (src_len != (size_t)-1)1201 src_len--;1202 }1203 1204 if (flags & STR_TERMINATE) {1205 /* src_len -1 is the default for null terminated strings. */1206 if (src_len != (size_t)-1) {1207 size_t len = strnlen_w((const smb_ucs2_t *)src,1208 src_len/2);1209 if (len < src_len/2)1210 len++;1211 src_len = len*2;1212 }1213 }1214 1215 /* ucs2 is always a multiple of 2 bytes */1216 if (src_len != (size_t)-1)1217 src_len &= ~1;1218 1219 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);1220 if (ret == (size_t)-1) {1221 return 0;1222 }1223 1224 if (src_len == (size_t)-1)1225 src_len = ret*2;1226 1227 if (dest_len && ret) {1228 /* Did we already process the terminating zero ? */1229 if (dest[MIN(ret-1, dest_len-1)] != 0) {1230 dest[MIN(ret, dest_len-1)] = 0;1231 }1232 } else {1233 dest[0] = 0;1234 }1235 1236 return src_len;1193 size_t ret; 1194 1195 if (dest_len == (size_t)-1) 1196 dest_len = sizeof(pstring); 1197 1198 if (ucs2_align(base_ptr, src, flags)) { 1199 src = (const void *)((const char *)src + 1); 1200 if (src_len != (size_t)-1) 1201 src_len--; 1202 } 1203 1204 if (flags & STR_TERMINATE) { 1205 /* src_len -1 is the default for null terminated strings. */ 1206 if (src_len != (size_t)-1) { 1207 size_t len = strnlen_w((const smb_ucs2_t *)src, 1208 src_len/2); 1209 if (len < src_len/2) 1210 len++; 1211 src_len = len*2; 1212 } 1213 } 1214 1215 /* ucs2 is always a multiple of 2 bytes */ 1216 if (src_len != (size_t)-1) 1217 src_len &= ~1; 1218 1219 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True); 1220 if (ret == (size_t)-1) { 1221 return 0; 1222 } 1223 1224 if (src_len == (size_t)-1) 1225 src_len = ret*2; 1226 1227 if (dest_len && ret) { 1228 /* Did we already process the terminating zero ? */ 1229 if (dest[MIN(ret-1, dest_len-1)] != 0) { 1230 dest[MIN(ret, dest_len-1)] = 0; 1231 } 1232 } else { 1233 dest[0] = 0; 1234 } 1235 1236 return src_len; 1237 1237 } 1238 1238 1239 1239 size_t pull_ucs2_pstring(char *dest, const void *src) 1240 1240 { 1241 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);1241 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE); 1242 1242 } 1243 1243 1244 1244 size_t pull_ucs2_fstring(char *dest, const void *src) 1245 1245 { 1246 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);1246 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE); 1247 1247 } 1248 1248 … … 1250 1250 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc 1251 1251 * 1252 * @param dest always set at least to NULL 1252 * @param dest always set at least to NULL 1253 1253 * 1254 1254 * @returns The number of bytes occupied by the string in the destination … … 1257 1257 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src) 1258 1258 { 1259 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);1260 *dest = NULL;1261 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);1259 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1260 *dest = NULL; 1261 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1262 1262 } 1263 1263 … … 1265 1265 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer 1266 1266 * 1267 * @param dest always set at least to NULL 1267 * @param dest always set at least to NULL 1268 1268 * 1269 1269 * @returns The number of bytes occupied by the string in the destination … … 1272 1272 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src) 1273 1273 { 1274 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);1275 *dest = NULL;1276 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);1274 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); 1275 *dest = NULL; 1276 return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True); 1277 1277 } 1278 1278 … … 1280 1280 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc 1281 1281 * 1282 * @param dest always set at least to NULL 1282 * @param dest always set at least to NULL 1283 1283 * 1284 1284 * @returns The number of bytes occupied by the string in the destination … … 1287 1287 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1288 1288 { 1289 size_t src_len = strlen(src)+1;1290 *dest = NULL;1291 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);1289 size_t src_len = strlen(src)+1; 1290 *dest = NULL; 1291 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1292 1292 } 1293 1293 … … 1295 1295 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer 1296 1296 * 1297 * @param dest always set at least to NULL 1297 * @param dest always set at least to NULL 1298 1298 * 1299 1299 * @returns The number of bytes occupied by the string in the destination … … 1302 1302 size_t pull_utf8_allocate(char **dest, const char *src) 1303 1303 { 1304 size_t src_len = strlen(src)+1;1305 *dest = NULL;1306 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);1307 } 1308 1304 size_t src_len = strlen(src)+1; 1305 *dest = NULL; 1306 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True); 1307 } 1308 1309 1309 /** 1310 1310 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc 1311 1311 * 1312 * @param dest always set at least to NULL 1312 * @param dest always set at least to NULL 1313 1313 * 1314 1314 * @returns The number of bytes occupied by the string in the destination … … 1317 1317 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src) 1318 1318 { 1319 size_t src_len = strlen(src)+1;1320 *dest = NULL;1321 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);1319 size_t src_len = strlen(src)+1; 1320 *dest = NULL; 1321 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True); 1322 1322 } 1323 1323 1324 1324 /** 1325 1325 Copy a string from a char* src to a unicode or ascii 1326 dos codepage destination choosing unicode or ascii based on the 1326 dos codepage destination choosing unicode or ascii based on the 1327 1327 flags in the SMB buffer starting at base_ptr. 1328 1328 Return the number of bytes occupied by the string in the destination. … … 1339 1339 { 1340 1340 #ifdef DEVELOPER 1341 /* We really need to zero fill here, not clobber1342 * region, as we want to ensure that valgrind thinks1343 * all of the outgoing buffer has been written to1344 * so a send() or write() won't trap an error.1345 * JRA.1346 */1341 /* We really need to zero fill here, not clobber 1342 * region, as we want to ensure that valgrind thinks 1343 * all of the outgoing buffer has been written to 1344 * so a send() or write() won't trap an error. 1345 * JRA. 1346 */ 1347 1347 #if 0 1348 if (dest_len != (size_t)-1)1349 clobber_region(function, line, dest, dest_len);1348 if (dest_len != (size_t)-1) 1349 clobber_region(function, line, dest, dest_len); 1350 1350 #else 1351 if (dest_len != (size_t)-1)1352 memset(dest, '\0', dest_len);1351 if (dest_len != (size_t)-1) 1352 memset(dest, '\0', dest_len); 1353 1353 #endif 1354 1354 #endif 1355 1355 1356 if (!(flags & STR_ASCII) && \1357 ((flags & STR_UNICODE || \1358 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {1359 return push_ucs2(base_ptr, dest, src, dest_len, flags);1360 }1361 return push_ascii(dest, src, dest_len, flags);1356 if (!(flags & STR_ASCII) && \ 1357 ((flags & STR_UNICODE || \ 1358 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1359 return push_ucs2(base_ptr, dest, src, dest_len, flags); 1360 } 1361 return push_ascii(dest, src, dest_len, flags); 1362 1362 } 1363 1363 … … 1380 1380 { 1381 1381 #ifdef DEVELOPER 1382 if (dest_len != (size_t)-1)1383 clobber_region(function, line, dest, dest_len);1382 if (dest_len != (size_t)-1) 1383 clobber_region(function, line, dest, dest_len); 1384 1384 #endif 1385 1385 1386 if (!(flags & STR_ASCII) && \1387 ((flags & STR_UNICODE || \1388 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {1389 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);1390 }1391 return pull_ascii(dest, src, dest_len, src_len, flags);1386 if (!(flags & STR_ASCII) && \ 1387 ((flags & STR_UNICODE || \ 1388 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1389 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags); 1390 } 1391 return pull_ascii(dest, src, dest_len, src_len, flags); 1392 1392 } 1393 1393 1394 1394 size_t align_string(const void *base_ptr, const char *p, int flags) 1395 1395 { 1396 if (!(flags & STR_ASCII) && \1397 ((flags & STR_UNICODE || \1398 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {1399 return ucs2_align(base_ptr, p, flags);1400 }1401 return 0;1396 if (!(flags & STR_ASCII) && \ 1397 ((flags & STR_UNICODE || \ 1398 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { 1399 return ucs2_align(base_ptr, p, flags); 1400 } 1401 return 0; 1402 1402 } 1403 1403 … … 1414 1414 codepoint_t next_codepoint(const char *str, size_t *size) 1415 1415 { 1416 /* It cannot occupy more than 4 bytes in UTF16 format */ 1417 uint8_t buf[4]; 1418 smb_iconv_t descriptor; 1419 size_t ilen_orig; 1420 size_t ilen; 1421 size_t olen; 1422 char *outbuf; 1423 1424 if ((str[0] & 0x80) == 0) { 1425 *size = 1; 1426 return (codepoint_t)str[0]; 1427 } 1428 1429 /* We assume that no multi-byte character can take 1430 more than 5 bytes. This is OK as we only 1431 support codepoints up to 1M */ 1432 1433 ilen_orig = strnlen(str, 5); 1434 ilen = ilen_orig; 1435 1436 lazy_initialize_conv(); 1437 1438 descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; 1439 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 1440 *size = 1; 1441 return INVALID_CODEPOINT; 1442 } 1443 1444 /* This looks a little strange, but it is needed to cope 1445 with codepoints above 64k which are encoded as per RFC2781. */ 1446 olen = 2; 1447 outbuf = (char *)buf; 1448 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); 1449 if (olen == 2) { 1450 /* We failed to convert to a 2 byte character. 1451 See if we can convert to a 4 UTF16-LE byte char encoding. 1452 */ 1453 olen = 4; 1454 outbuf = (char *)buf; 1455 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); 1456 if (olen == 4) { 1457 /* We didn't convert any bytes */ 1458 *size = 1; 1459 return INVALID_CODEPOINT; 1460 } 1461 olen = 4 - olen; 1462 } else { 1463 olen = 2 - olen; 1464 } 1465 1466 *size = ilen_orig - ilen; 1467 1468 if (olen == 2) { 1469 /* 2 byte, UTF16-LE encoded value. */ 1470 return (codepoint_t)SVAL(buf, 0); 1471 } 1472 if (olen == 4) { 1473 /* Decode a 4 byte UTF16-LE character manually. 1474 See RFC2871 for the encoding machanism. 1475 */ 1476 codepoint_t w1 = SVAL(buf,0) & ~0xD800; 1477 codepoint_t w2 = SVAL(buf,2) & ~0xDC00; 1478 1479 return (codepoint_t)0x10000 + 1480 (w1 << 10) + w2; 1481 } 1482 1483 /* no other length is valid */ 1484 return INVALID_CODEPOINT; 1485 } 1416 /* It cannot occupy more than 4 bytes in UTF16 format */ 1417 uint8_t buf[4]; 1418 smb_iconv_t descriptor; 1419 size_t ilen_orig; 1420 size_t ilen; 1421 size_t olen_orig; 1422 size_t olen; 1423 const char *inbuf; 1424 char *outbuf; 1425 1426 if ((str[0] & 0x80) == 0) { 1427 *size = 1; 1428 return (codepoint_t)str[0]; 1429 } 1430 1431 lazy_initialize_conv(); 1432 1433 descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; 1434 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { 1435 *size = 1; 1436 return INVALID_CODEPOINT; 1437 } 1438 1439 *size = 1; 1440 ilen_orig = 1; 1441 olen_orig = 2; 1442 while( 1 ) 1443 { 1444 ilen = ilen_orig; 1445 olen = olen_orig; 1446 inbuf = str; 1447 outbuf = ( char * )buf; 1448 if( smb_iconv( descriptor, &inbuf, &ilen, &outbuf, &olen ) != ( size_t )-1 ) 1449 break; 1450 1451 switch( errno ) 1452 { 1453 case E2BIG : 1454 if( olen_orig == 2 ) 1455 olen_orig = 4; 1456 else 1457 return INVALID_CODEPOINT; 1458 break; 1459 1460 case EINVAL : 1461 /* We assume that no multi-byte character can take 1462 more than 5 bytes. This is OK as we only 1463 support codepoints up to 1M */ 1464 if( ilen_orig < 5 ) 1465 ilen_orig++; 1466 else 1467 return INVALID_CODEPOINT; 1468 break; 1469 1470 case EILSEQ : 1471 default : 1472 return INVALID_CODEPOINT; 1473 } 1474 } 1475 1476 olen = olen_orig - olen; 1477 1478 *size = ilen_orig - ilen; 1479 1480 if (olen == 2) { 1481 /* 2 byte, UTF16-LE encoded value. */ 1482 return (codepoint_t)SVAL(buf, 0); 1483 } 1484 if (olen == 4) { 1485 /* Decode a 4 byte UTF16-LE character manually. 1486 See RFC2871 for the encoding machanism. 1487 */ 1488 codepoint_t w1 = SVAL(buf,0) & ~0xD800; 1489 codepoint_t w2 = SVAL(buf,2) & ~0xDC00; 1490 1491 return (codepoint_t)0x10000 + 1492 (w1 << 10) + w2; 1493 } 1494 1495 /* no other length is valid */ 1496 return INVALID_CODEPOINT; 1497 }
Note:
See TracChangeset
for help on using the changeset viewer.
