| 1 | /* | 
|---|
| 2 | * WideCharToMultiByte implementation | 
|---|
| 3 | * | 
|---|
| 4 | * Copyright 2000 Alexandre Julliard | 
|---|
| 5 | */ | 
|---|
| 6 |  | 
|---|
| 7 | #include <string.h> | 
|---|
| 8 |  | 
|---|
| 9 | #include "winnls.h" | 
|---|
| 10 | #include "wine/unicode.h" | 
|---|
| 11 |  | 
|---|
| 12 | /* search for a character in the unicode_compose_table; helper for compose() */ | 
|---|
| 13 | static inline int binary_search( WCHAR ch, int low, int high ) | 
|---|
| 14 | { | 
|---|
| 15 | extern const WCHAR unicode_compose_table[]; | 
|---|
| 16 | while (low <= high) | 
|---|
| 17 | { | 
|---|
| 18 | int pos = (low + high) / 2; | 
|---|
| 19 | if (unicode_compose_table[2*pos] < ch) | 
|---|
| 20 | { | 
|---|
| 21 | low = pos + 1; | 
|---|
| 22 | continue; | 
|---|
| 23 | } | 
|---|
| 24 | if (unicode_compose_table[2*pos] > ch) | 
|---|
| 25 | { | 
|---|
| 26 | high = pos - 1; | 
|---|
| 27 | continue; | 
|---|
| 28 | } | 
|---|
| 29 | return pos; | 
|---|
| 30 | } | 
|---|
| 31 | return -1; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | /* return the result of the composition of two Unicode chars, or 0 if none */ | 
|---|
| 35 | static WCHAR compose( const WCHAR *str ) | 
|---|
| 36 | { | 
|---|
| 37 | extern const WCHAR unicode_compose_table[]; | 
|---|
| 38 | extern const unsigned int unicode_compose_table_size; | 
|---|
| 39 |  | 
|---|
| 40 | int idx = 1, low = 0, high = unicode_compose_table_size - 1; | 
|---|
| 41 | for (;;) | 
|---|
| 42 | { | 
|---|
| 43 | int pos = binary_search( str[idx], low, high ); | 
|---|
| 44 | if (pos == -1) return 0; | 
|---|
| 45 | if (!idx--) return unicode_compose_table[2*pos+1]; | 
|---|
| 46 | low = unicode_compose_table[2*pos+1]; | 
|---|
| 47 | high = unicode_compose_table[2*pos+3] - 1; | 
|---|
| 48 | } | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 |  | 
|---|
| 52 | /****************************************************************/ | 
|---|
| 53 | /* sbcs support */ | 
|---|
| 54 |  | 
|---|
| 55 | /* check if 'ch' is an acceptable sbcs mapping for 'wch' */ | 
|---|
| 56 | static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags, | 
|---|
| 57 | WCHAR wch, unsigned char ch ) | 
|---|
| 58 | { | 
|---|
| 59 | if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch); | 
|---|
| 60 | if (ch != (unsigned char)table->info.def_char) return 1; | 
|---|
| 61 | return (wch == table->info.def_unicode_char); | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | /* query necessary dst length for src string */ | 
|---|
| 65 | static inline int get_length_sbcs( const struct sbcs_table *table, int flags, | 
|---|
| 66 | const WCHAR *src, unsigned int srclen ) | 
|---|
| 67 | { | 
|---|
| 68 | unsigned int ret = srclen; | 
|---|
| 69 |  | 
|---|
| 70 | if (flags & WC_COMPOSITECHECK) | 
|---|
| 71 | { | 
|---|
| 72 | const unsigned char  * const uni2cp_low = table->uni2cp_low; | 
|---|
| 73 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 74 | WCHAR composed; | 
|---|
| 75 |  | 
|---|
| 76 | for (ret = 0; srclen > 1; ret++, srclen--, src++) | 
|---|
| 77 | { | 
|---|
| 78 | if (!(composed = compose(src))) continue; | 
|---|
| 79 | /* check if we should skip the next char */ | 
|---|
| 80 |  | 
|---|
| 81 | /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */ | 
|---|
| 82 | /* the next char no matter if the composition is valid or not */ | 
|---|
| 83 | if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS))) | 
|---|
| 84 | { | 
|---|
| 85 | unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; | 
|---|
| 86 | if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue; | 
|---|
| 87 | } | 
|---|
| 88 | src++; | 
|---|
| 89 | srclen--; | 
|---|
| 90 | } | 
|---|
| 91 | if (srclen) ret++;  /* last char */ | 
|---|
| 92 | } | 
|---|
| 93 | return ret; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | /* wcstombs for single-byte code page */ | 
|---|
| 97 | static inline int wcstombs_sbcs( const struct sbcs_table *table, | 
|---|
| 98 | const WCHAR *src, unsigned int srclen, | 
|---|
| 99 | char *dst, unsigned int dstlen ) | 
|---|
| 100 | { | 
|---|
| 101 | const unsigned char  * const uni2cp_low = table->uni2cp_low; | 
|---|
| 102 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 103 | int ret = srclen; | 
|---|
| 104 |  | 
|---|
| 105 | if (dstlen < srclen) | 
|---|
| 106 | { | 
|---|
| 107 | /* buffer too small: fill it up to dstlen and return error */ | 
|---|
| 108 | srclen = dstlen; | 
|---|
| 109 | ret = -1; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | for (;;) | 
|---|
| 113 | { | 
|---|
| 114 | switch(srclen) | 
|---|
| 115 | { | 
|---|
| 116 | default: | 
|---|
| 117 | case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)]; | 
|---|
| 118 | case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)]; | 
|---|
| 119 | case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)]; | 
|---|
| 120 | case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)]; | 
|---|
| 121 | case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)]; | 
|---|
| 122 | case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)]; | 
|---|
| 123 | case 10: dst[9]  = uni2cp_low[uni2cp_high[src[9]  >> 8] + (src[9]  & 0xff)]; | 
|---|
| 124 | case 9:  dst[8]  = uni2cp_low[uni2cp_high[src[8]  >> 8] + (src[8]  & 0xff)]; | 
|---|
| 125 | case 8:  dst[7]  = uni2cp_low[uni2cp_high[src[7]  >> 8] + (src[7]  & 0xff)]; | 
|---|
| 126 | case 7:  dst[6]  = uni2cp_low[uni2cp_high[src[6]  >> 8] + (src[6]  & 0xff)]; | 
|---|
| 127 | case 6:  dst[5]  = uni2cp_low[uni2cp_high[src[5]  >> 8] + (src[5]  & 0xff)]; | 
|---|
| 128 | case 5:  dst[4]  = uni2cp_low[uni2cp_high[src[4]  >> 8] + (src[4]  & 0xff)]; | 
|---|
| 129 | case 4:  dst[3]  = uni2cp_low[uni2cp_high[src[3]  >> 8] + (src[3]  & 0xff)]; | 
|---|
| 130 | case 3:  dst[2]  = uni2cp_low[uni2cp_high[src[2]  >> 8] + (src[2]  & 0xff)]; | 
|---|
| 131 | case 2:  dst[1]  = uni2cp_low[uni2cp_high[src[1]  >> 8] + (src[1]  & 0xff)]; | 
|---|
| 132 | case 1:  dst[0]  = uni2cp_low[uni2cp_high[src[0]  >> 8] + (src[0]  & 0xff)]; | 
|---|
| 133 | case 0: break; | 
|---|
| 134 | } | 
|---|
| 135 | if (srclen < 16) return ret; | 
|---|
| 136 | dst += 16; | 
|---|
| 137 | src += 16; | 
|---|
| 138 | srclen -= 16; | 
|---|
| 139 | } | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | /* slow version of wcstombs_sbcs that handles the various flags */ | 
|---|
| 143 | static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags, | 
|---|
| 144 | const WCHAR *src, unsigned int srclen, | 
|---|
| 145 | char *dst, unsigned int dstlen, | 
|---|
| 146 | const char *defchar, int *used ) | 
|---|
| 147 | { | 
|---|
| 148 | const unsigned char  * const uni2cp_low = table->uni2cp_low; | 
|---|
| 149 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 150 | const unsigned char table_default = table->info.def_char & 0xff; | 
|---|
| 151 | unsigned int len; | 
|---|
| 152 | int tmp; | 
|---|
| 153 | WCHAR composed; | 
|---|
| 154 |  | 
|---|
| 155 | if (!defchar) defchar = (const char *)&table_default; | 
|---|
| 156 | if (!used) used = &tmp;  /* avoid checking on every char */ | 
|---|
| 157 |  | 
|---|
| 158 | for (len = dstlen; srclen && len; dst++, len--, src++, srclen--) | 
|---|
| 159 | { | 
|---|
| 160 | WCHAR wch = *src; | 
|---|
| 161 |  | 
|---|
| 162 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) | 
|---|
| 163 | { | 
|---|
| 164 | /* now check if we can use the composed char */ | 
|---|
| 165 | *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; | 
|---|
| 166 | if (is_valid_sbcs_mapping( table, flags, composed, *dst )) | 
|---|
| 167 | { | 
|---|
| 168 | /* we have a good mapping, use it */ | 
|---|
| 169 | src++; | 
|---|
| 170 | srclen--; | 
|---|
| 171 | continue; | 
|---|
| 172 | } | 
|---|
| 173 | /* no mapping for the composed char, check the other flags */ | 
|---|
| 174 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */ | 
|---|
| 175 | { | 
|---|
| 176 | *dst = *defchar; | 
|---|
| 177 | *used = 1; | 
|---|
| 178 | src++;  /* skip the non-spacing char */ | 
|---|
| 179 | srclen--; | 
|---|
| 180 | continue; | 
|---|
| 181 | } | 
|---|
| 182 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */ | 
|---|
| 183 | { | 
|---|
| 184 | src++; | 
|---|
| 185 | srclen--; | 
|---|
| 186 | } | 
|---|
| 187 | /* WC_SEPCHARS is the default */ | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; | 
|---|
| 191 | if (!is_valid_sbcs_mapping( table, flags, wch, *dst )) | 
|---|
| 192 | { | 
|---|
| 193 | *dst = *defchar; | 
|---|
| 194 | *used = 1; | 
|---|
| 195 | } | 
|---|
| 196 | } | 
|---|
| 197 | if (srclen) return -1;  /* overflow */ | 
|---|
| 198 | return dstlen - len; | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 |  | 
|---|
| 202 | /****************************************************************/ | 
|---|
| 203 | /* dbcs support */ | 
|---|
| 204 |  | 
|---|
| 205 | /* check if 'ch' is an acceptable dbcs mapping for 'wch' */ | 
|---|
| 206 | static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags, | 
|---|
| 207 | WCHAR wch, unsigned short ch ) | 
|---|
| 208 | { | 
|---|
| 209 | if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0; | 
|---|
| 210 | if (flags & WC_NO_BEST_FIT_CHARS) | 
|---|
| 211 | { | 
|---|
| 212 | /* check if char maps back to the same Unicode value */ | 
|---|
| 213 | if (ch & 0xff00) | 
|---|
| 214 | { | 
|---|
| 215 | unsigned char off = table->cp2uni_leadbytes[ch >> 8]; | 
|---|
| 216 | return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch); | 
|---|
| 217 | } | 
|---|
| 218 | return (table->cp2uni[ch & 0xff] == wch); | 
|---|
| 219 | } | 
|---|
| 220 | return 1; | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /* query necessary dst length for src string */ | 
|---|
| 224 | static int get_length_dbcs( const struct dbcs_table *table, int flags, | 
|---|
| 225 | const WCHAR *src, unsigned int srclen, | 
|---|
| 226 | const char *defchar ) | 
|---|
| 227 | { | 
|---|
| 228 | const unsigned short * const uni2cp_low = table->uni2cp_low; | 
|---|
| 229 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 230 | WCHAR defchar_value = table->info.def_char; | 
|---|
| 231 | WCHAR composed; | 
|---|
| 232 | int len; | 
|---|
| 233 |  | 
|---|
| 234 | if (!defchar && !(flags & WC_COMPOSITECHECK)) | 
|---|
| 235 | { | 
|---|
| 236 | for (len = 0; srclen; srclen--, src++, len++) | 
|---|
| 237 | { | 
|---|
| 238 | if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++; | 
|---|
| 239 | } | 
|---|
| 240 | return len; | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0]; | 
|---|
| 244 | for (len = 0; srclen; len++, srclen--, src++) | 
|---|
| 245 | { | 
|---|
| 246 | unsigned short res; | 
|---|
| 247 | WCHAR wch = *src; | 
|---|
| 248 |  | 
|---|
| 249 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) | 
|---|
| 250 | { | 
|---|
| 251 | /* now check if we can use the composed char */ | 
|---|
| 252 | res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; | 
|---|
| 253 |  | 
|---|
| 254 | if (is_valid_dbcs_mapping( table, flags, composed, res )) | 
|---|
| 255 | { | 
|---|
| 256 | /* we have a good mapping for the composed char, use it */ | 
|---|
| 257 | if (res & 0xff00) len++; | 
|---|
| 258 | src++; | 
|---|
| 259 | srclen--; | 
|---|
| 260 | continue; | 
|---|
| 261 | } | 
|---|
| 262 | /* no mapping for the composed char, check the other flags */ | 
|---|
| 263 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */ | 
|---|
| 264 | { | 
|---|
| 265 | if (defchar_value & 0xff00) len++; | 
|---|
| 266 | src++;  /* skip the non-spacing char */ | 
|---|
| 267 | srclen--; | 
|---|
| 268 | continue; | 
|---|
| 269 | } | 
|---|
| 270 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */ | 
|---|
| 271 | { | 
|---|
| 272 | src++; | 
|---|
| 273 | srclen--; | 
|---|
| 274 | } | 
|---|
| 275 | /* WC_SEPCHARS is the default */ | 
|---|
| 276 | } | 
|---|
| 277 |  | 
|---|
| 278 | res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; | 
|---|
| 279 | if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value; | 
|---|
| 280 | if (res & 0xff00) len++; | 
|---|
| 281 | } | 
|---|
| 282 | return len; | 
|---|
| 283 | } | 
|---|
| 284 |  | 
|---|
| 285 | /* wcstombs for double-byte code page */ | 
|---|
| 286 | static inline int wcstombs_dbcs( const struct dbcs_table *table, | 
|---|
| 287 | const WCHAR *src, unsigned int srclen, | 
|---|
| 288 | char *dst, unsigned int dstlen ) | 
|---|
| 289 | { | 
|---|
| 290 | const unsigned short * const uni2cp_low = table->uni2cp_low; | 
|---|
| 291 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 292 | int len; | 
|---|
| 293 |  | 
|---|
| 294 | for (len = dstlen; srclen && len; len--, srclen--, src++) | 
|---|
| 295 | { | 
|---|
| 296 | unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)]; | 
|---|
| 297 | if (res & 0xff00) | 
|---|
| 298 | { | 
|---|
| 299 | if (len == 1) break;  /* do not output a partial char */ | 
|---|
| 300 | len--; | 
|---|
| 301 | *dst++ = res >> 8; | 
|---|
| 302 | } | 
|---|
| 303 | *dst++ = (char)res; | 
|---|
| 304 | } | 
|---|
| 305 | if (srclen) return -1;  /* overflow */ | 
|---|
| 306 | return dstlen - len; | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | /* slow version of wcstombs_dbcs that handles the various flags */ | 
|---|
| 310 | static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags, | 
|---|
| 311 | const WCHAR *src, unsigned int srclen, | 
|---|
| 312 | char *dst, unsigned int dstlen, | 
|---|
| 313 | const char *defchar, int *used ) | 
|---|
| 314 | { | 
|---|
| 315 | const unsigned short * const uni2cp_low = table->uni2cp_low; | 
|---|
| 316 | const unsigned short * const uni2cp_high = table->uni2cp_high; | 
|---|
| 317 | WCHAR defchar_value = table->info.def_char; | 
|---|
| 318 | WCHAR composed; | 
|---|
| 319 | int len, tmp; | 
|---|
| 320 |  | 
|---|
| 321 | if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0]; | 
|---|
| 322 | if (!used) used = &tmp;  /* avoid checking on every char */ | 
|---|
| 323 |  | 
|---|
| 324 | for (len = dstlen; srclen && len; len--, srclen--, src++) | 
|---|
| 325 | { | 
|---|
| 326 | unsigned short res; | 
|---|
| 327 | WCHAR wch = *src; | 
|---|
| 328 |  | 
|---|
| 329 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) | 
|---|
| 330 | { | 
|---|
| 331 | /* now check if we can use the composed char */ | 
|---|
| 332 | res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; | 
|---|
| 333 |  | 
|---|
| 334 | if (is_valid_dbcs_mapping( table, flags, composed, res )) | 
|---|
| 335 | { | 
|---|
| 336 | /* we have a good mapping for the composed char, use it */ | 
|---|
| 337 | src++; | 
|---|
| 338 | srclen--; | 
|---|
| 339 | goto output_char; | 
|---|
| 340 | } | 
|---|
| 341 | /* no mapping for the composed char, check the other flags */ | 
|---|
| 342 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */ | 
|---|
| 343 | { | 
|---|
| 344 | res = defchar_value; | 
|---|
| 345 | *used = 1; | 
|---|
| 346 | src++;  /* skip the non-spacing char */ | 
|---|
| 347 | srclen--; | 
|---|
| 348 | goto output_char; | 
|---|
| 349 | } | 
|---|
| 350 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */ | 
|---|
| 351 | { | 
|---|
| 352 | src++; | 
|---|
| 353 | srclen--; | 
|---|
| 354 | } | 
|---|
| 355 | /* WC_SEPCHARS is the default */ | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; | 
|---|
| 359 | if (!is_valid_dbcs_mapping( table, flags, wch, res )) | 
|---|
| 360 | { | 
|---|
| 361 | res = defchar_value; | 
|---|
| 362 | *used = 1; | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | output_char: | 
|---|
| 366 | if (res & 0xff00) | 
|---|
| 367 | { | 
|---|
| 368 | if (len == 1) break;  /* do not output a partial char */ | 
|---|
| 369 | len--; | 
|---|
| 370 | *dst++ = res >> 8; | 
|---|
| 371 | } | 
|---|
| 372 | *dst++ = (char)res; | 
|---|
| 373 | } | 
|---|
| 374 | if (srclen) return -1;  /* overflow */ | 
|---|
| 375 | return dstlen - len; | 
|---|
| 376 | } | 
|---|
| 377 |  | 
|---|
| 378 | /* wide char to multi byte string conversion */ | 
|---|
| 379 | /* return -1 on dst buffer overflow */ | 
|---|
| 380 | int cp_wcstombs( const union cptable *table, int flags, | 
|---|
| 381 | const WCHAR *src, int srclen, | 
|---|
| 382 | char *dst, int dstlen, const char *defchar, int *used ) | 
|---|
| 383 | { | 
|---|
| 384 | if (table->info.char_size == 1) | 
|---|
| 385 | { | 
|---|
| 386 | if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen ); | 
|---|
| 387 | if (flags || defchar || used) | 
|---|
| 388 | return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen, | 
|---|
| 389 | dst, dstlen, defchar, used ); | 
|---|
| 390 | return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen ); | 
|---|
| 391 | } | 
|---|
| 392 | else /* mbcs */ | 
|---|
| 393 | { | 
|---|
| 394 | if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar ); | 
|---|
| 395 | if (flags || defchar || used) | 
|---|
| 396 | return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen, | 
|---|
| 397 | dst, dstlen, defchar, used ); | 
|---|
| 398 | return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); | 
|---|
| 399 | } | 
|---|
| 400 | } | 
|---|