1 | /*
|
---|
2 | * WideCharToMultiByte implementation
|
---|
3 | *
|
---|
4 | * Copyright 2000 Alexandre Julliard
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <string.h>
|
---|
8 |
|
---|
9 | #include "winnls.h"
|
---|
10 | #include "wine/unicode.h"
|
---|
11 |
|
---|
12 | /* search for a character in the unicode_compose_table; helper for compose() */
|
---|
13 | static inline int binary_search( WCHAR ch, int low, int high )
|
---|
14 | {
|
---|
15 | extern const WCHAR unicode_compose_table[];
|
---|
16 | while (low <= high)
|
---|
17 | {
|
---|
18 | int pos = (low + high) / 2;
|
---|
19 | if (unicode_compose_table[2*pos] < ch)
|
---|
20 | {
|
---|
21 | low = pos + 1;
|
---|
22 | continue;
|
---|
23 | }
|
---|
24 | if (unicode_compose_table[2*pos] > ch)
|
---|
25 | {
|
---|
26 | high = pos - 1;
|
---|
27 | continue;
|
---|
28 | }
|
---|
29 | return pos;
|
---|
30 | }
|
---|
31 | return -1;
|
---|
32 | }
|
---|
33 |
|
---|
34 | /* return the result of the composition of two Unicode chars, or 0 if none */
|
---|
35 | static WCHAR compose( const WCHAR *str )
|
---|
36 | {
|
---|
37 | extern const WCHAR unicode_compose_table[];
|
---|
38 | extern const unsigned int unicode_compose_table_size;
|
---|
39 |
|
---|
40 | int idx = 1, low = 0, high = unicode_compose_table_size - 1;
|
---|
41 | for (;;)
|
---|
42 | {
|
---|
43 | int pos = binary_search( str[idx], low, high );
|
---|
44 | if (pos == -1) return 0;
|
---|
45 | if (!idx--) return unicode_compose_table[2*pos+1];
|
---|
46 | low = unicode_compose_table[2*pos+1];
|
---|
47 | high = unicode_compose_table[2*pos+3] - 1;
|
---|
48 | }
|
---|
49 | }
|
---|
50 |
|
---|
51 |
|
---|
52 | /****************************************************************/
|
---|
53 | /* sbcs support */
|
---|
54 |
|
---|
55 | /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
|
---|
56 | static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
|
---|
57 | WCHAR wch, unsigned char ch )
|
---|
58 | {
|
---|
59 | if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
|
---|
60 | if (ch != (unsigned char)table->info.def_char) return 1;
|
---|
61 | return (wch == table->info.def_unicode_char);
|
---|
62 | }
|
---|
63 |
|
---|
64 | /* query necessary dst length for src string */
|
---|
65 | static inline int get_length_sbcs( const struct sbcs_table *table, int flags,
|
---|
66 | const WCHAR *src, unsigned int srclen )
|
---|
67 | {
|
---|
68 | unsigned int ret = srclen;
|
---|
69 |
|
---|
70 | if (flags & WC_COMPOSITECHECK)
|
---|
71 | {
|
---|
72 | const unsigned char * const uni2cp_low = table->uni2cp_low;
|
---|
73 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
74 | WCHAR composed;
|
---|
75 |
|
---|
76 | for (ret = 0; srclen > 1; ret++, srclen--, src++)
|
---|
77 | {
|
---|
78 | if (!(composed = compose(src))) continue;
|
---|
79 | /* check if we should skip the next char */
|
---|
80 |
|
---|
81 | /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
|
---|
82 | /* the next char no matter if the composition is valid or not */
|
---|
83 | if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS)))
|
---|
84 | {
|
---|
85 | unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
---|
86 | if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue;
|
---|
87 | }
|
---|
88 | src++;
|
---|
89 | srclen--;
|
---|
90 | }
|
---|
91 | if (srclen) ret++; /* last char */
|
---|
92 | }
|
---|
93 | return ret;
|
---|
94 | }
|
---|
95 |
|
---|
96 | /* wcstombs for single-byte code page */
|
---|
97 | static inline int wcstombs_sbcs( const struct sbcs_table *table,
|
---|
98 | const WCHAR *src, unsigned int srclen,
|
---|
99 | char *dst, unsigned int dstlen )
|
---|
100 | {
|
---|
101 | const unsigned char * const uni2cp_low = table->uni2cp_low;
|
---|
102 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
103 | int ret = srclen;
|
---|
104 |
|
---|
105 | if (dstlen < srclen)
|
---|
106 | {
|
---|
107 | /* buffer too small: fill it up to dstlen and return error */
|
---|
108 | srclen = dstlen;
|
---|
109 | ret = -1;
|
---|
110 | }
|
---|
111 |
|
---|
112 | for (;;)
|
---|
113 | {
|
---|
114 | switch(srclen)
|
---|
115 | {
|
---|
116 | default:
|
---|
117 | case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
|
---|
118 | case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
|
---|
119 | case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
|
---|
120 | case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
|
---|
121 | case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
|
---|
122 | case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
|
---|
123 | case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
|
---|
124 | case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
|
---|
125 | case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
|
---|
126 | case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
|
---|
127 | case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
|
---|
128 | case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
|
---|
129 | case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
|
---|
130 | case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
|
---|
131 | case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
|
---|
132 | case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
|
---|
133 | case 0: break;
|
---|
134 | }
|
---|
135 | if (srclen < 16) return ret;
|
---|
136 | dst += 16;
|
---|
137 | src += 16;
|
---|
138 | srclen -= 16;
|
---|
139 | }
|
---|
140 | }
|
---|
141 |
|
---|
142 | /* slow version of wcstombs_sbcs that handles the various flags */
|
---|
143 | static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
|
---|
144 | const WCHAR *src, unsigned int srclen,
|
---|
145 | char *dst, unsigned int dstlen,
|
---|
146 | const char *defchar, int *used )
|
---|
147 | {
|
---|
148 | const unsigned char * const uni2cp_low = table->uni2cp_low;
|
---|
149 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
150 | const unsigned char table_default = table->info.def_char & 0xff;
|
---|
151 | unsigned int len;
|
---|
152 | int tmp;
|
---|
153 | WCHAR composed;
|
---|
154 |
|
---|
155 | if (!defchar) defchar = (const char *)&table_default;
|
---|
156 | if (!used) used = &tmp; /* avoid checking on every char */
|
---|
157 |
|
---|
158 | for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
|
---|
159 | {
|
---|
160 | WCHAR wch = *src;
|
---|
161 |
|
---|
162 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
---|
163 | {
|
---|
164 | /* now check if we can use the composed char */
|
---|
165 | *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
---|
166 | if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
|
---|
167 | {
|
---|
168 | /* we have a good mapping, use it */
|
---|
169 | src++;
|
---|
170 | srclen--;
|
---|
171 | continue;
|
---|
172 | }
|
---|
173 | /* no mapping for the composed char, check the other flags */
|
---|
174 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
---|
175 | {
|
---|
176 | *dst = *defchar;
|
---|
177 | *used = 1;
|
---|
178 | src++; /* skip the non-spacing char */
|
---|
179 | srclen--;
|
---|
180 | continue;
|
---|
181 | }
|
---|
182 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
---|
183 | {
|
---|
184 | src++;
|
---|
185 | srclen--;
|
---|
186 | }
|
---|
187 | /* WC_SEPCHARS is the default */
|
---|
188 | }
|
---|
189 |
|
---|
190 | *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
---|
191 | if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
|
---|
192 | {
|
---|
193 | *dst = *defchar;
|
---|
194 | *used = 1;
|
---|
195 | }
|
---|
196 | }
|
---|
197 | if (srclen) return -1; /* overflow */
|
---|
198 | return dstlen - len;
|
---|
199 | }
|
---|
200 |
|
---|
201 |
|
---|
202 | /****************************************************************/
|
---|
203 | /* dbcs support */
|
---|
204 |
|
---|
205 | /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
|
---|
206 | static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
|
---|
207 | WCHAR wch, unsigned short ch )
|
---|
208 | {
|
---|
209 | if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
|
---|
210 | if (flags & WC_NO_BEST_FIT_CHARS)
|
---|
211 | {
|
---|
212 | /* check if char maps back to the same Unicode value */
|
---|
213 | if (ch & 0xff00)
|
---|
214 | {
|
---|
215 | unsigned char off = table->cp2uni_leadbytes[ch >> 8];
|
---|
216 | return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
|
---|
217 | }
|
---|
218 | return (table->cp2uni[ch & 0xff] == wch);
|
---|
219 | }
|
---|
220 | return 1;
|
---|
221 | }
|
---|
222 |
|
---|
223 | /* query necessary dst length for src string */
|
---|
224 | static int get_length_dbcs( const struct dbcs_table *table, int flags,
|
---|
225 | const WCHAR *src, unsigned int srclen,
|
---|
226 | const char *defchar )
|
---|
227 | {
|
---|
228 | const unsigned short * const uni2cp_low = table->uni2cp_low;
|
---|
229 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
230 | WCHAR defchar_value = table->info.def_char;
|
---|
231 | WCHAR composed;
|
---|
232 | int len;
|
---|
233 |
|
---|
234 | if (!defchar && !(flags & WC_COMPOSITECHECK))
|
---|
235 | {
|
---|
236 | for (len = 0; srclen; srclen--, src++, len++)
|
---|
237 | {
|
---|
238 | if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
|
---|
239 | }
|
---|
240 | return len;
|
---|
241 | }
|
---|
242 |
|
---|
243 | if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
|
---|
244 | for (len = 0; srclen; len++, srclen--, src++)
|
---|
245 | {
|
---|
246 | unsigned short res;
|
---|
247 | WCHAR wch = *src;
|
---|
248 |
|
---|
249 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
---|
250 | {
|
---|
251 | /* now check if we can use the composed char */
|
---|
252 | res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
---|
253 |
|
---|
254 | if (is_valid_dbcs_mapping( table, flags, composed, res ))
|
---|
255 | {
|
---|
256 | /* we have a good mapping for the composed char, use it */
|
---|
257 | if (res & 0xff00) len++;
|
---|
258 | src++;
|
---|
259 | srclen--;
|
---|
260 | continue;
|
---|
261 | }
|
---|
262 | /* no mapping for the composed char, check the other flags */
|
---|
263 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
---|
264 | {
|
---|
265 | if (defchar_value & 0xff00) len++;
|
---|
266 | src++; /* skip the non-spacing char */
|
---|
267 | srclen--;
|
---|
268 | continue;
|
---|
269 | }
|
---|
270 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
---|
271 | {
|
---|
272 | src++;
|
---|
273 | srclen--;
|
---|
274 | }
|
---|
275 | /* WC_SEPCHARS is the default */
|
---|
276 | }
|
---|
277 |
|
---|
278 | res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
---|
279 | if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value;
|
---|
280 | if (res & 0xff00) len++;
|
---|
281 | }
|
---|
282 | return len;
|
---|
283 | }
|
---|
284 |
|
---|
285 | /* wcstombs for double-byte code page */
|
---|
286 | static inline int wcstombs_dbcs( const struct dbcs_table *table,
|
---|
287 | const WCHAR *src, unsigned int srclen,
|
---|
288 | char *dst, unsigned int dstlen )
|
---|
289 | {
|
---|
290 | const unsigned short * const uni2cp_low = table->uni2cp_low;
|
---|
291 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
292 | int len;
|
---|
293 |
|
---|
294 | for (len = dstlen; srclen && len; len--, srclen--, src++)
|
---|
295 | {
|
---|
296 | unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
|
---|
297 | if (res & 0xff00)
|
---|
298 | {
|
---|
299 | if (len == 1) break; /* do not output a partial char */
|
---|
300 | len--;
|
---|
301 | *dst++ = res >> 8;
|
---|
302 | }
|
---|
303 | *dst++ = (char)res;
|
---|
304 | }
|
---|
305 | if (srclen) return -1; /* overflow */
|
---|
306 | return dstlen - len;
|
---|
307 | }
|
---|
308 |
|
---|
309 | /* slow version of wcstombs_dbcs that handles the various flags */
|
---|
310 | static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
|
---|
311 | const WCHAR *src, unsigned int srclen,
|
---|
312 | char *dst, unsigned int dstlen,
|
---|
313 | const char *defchar, int *used )
|
---|
314 | {
|
---|
315 | const unsigned short * const uni2cp_low = table->uni2cp_low;
|
---|
316 | const unsigned short * const uni2cp_high = table->uni2cp_high;
|
---|
317 | WCHAR defchar_value = table->info.def_char;
|
---|
318 | WCHAR composed;
|
---|
319 | int len, tmp;
|
---|
320 |
|
---|
321 | if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
|
---|
322 | if (!used) used = &tmp; /* avoid checking on every char */
|
---|
323 |
|
---|
324 | for (len = dstlen; srclen && len; len--, srclen--, src++)
|
---|
325 | {
|
---|
326 | unsigned short res;
|
---|
327 | WCHAR wch = *src;
|
---|
328 |
|
---|
329 | if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
---|
330 | {
|
---|
331 | /* now check if we can use the composed char */
|
---|
332 | res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
---|
333 |
|
---|
334 | if (is_valid_dbcs_mapping( table, flags, composed, res ))
|
---|
335 | {
|
---|
336 | /* we have a good mapping for the composed char, use it */
|
---|
337 | src++;
|
---|
338 | srclen--;
|
---|
339 | goto output_char;
|
---|
340 | }
|
---|
341 | /* no mapping for the composed char, check the other flags */
|
---|
342 | if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
---|
343 | {
|
---|
344 | res = defchar_value;
|
---|
345 | *used = 1;
|
---|
346 | src++; /* skip the non-spacing char */
|
---|
347 | srclen--;
|
---|
348 | goto output_char;
|
---|
349 | }
|
---|
350 | if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
---|
351 | {
|
---|
352 | src++;
|
---|
353 | srclen--;
|
---|
354 | }
|
---|
355 | /* WC_SEPCHARS is the default */
|
---|
356 | }
|
---|
357 |
|
---|
358 | res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
---|
359 | if (!is_valid_dbcs_mapping( table, flags, wch, res ))
|
---|
360 | {
|
---|
361 | res = defchar_value;
|
---|
362 | *used = 1;
|
---|
363 | }
|
---|
364 |
|
---|
365 | output_char:
|
---|
366 | if (res & 0xff00)
|
---|
367 | {
|
---|
368 | if (len == 1) break; /* do not output a partial char */
|
---|
369 | len--;
|
---|
370 | *dst++ = res >> 8;
|
---|
371 | }
|
---|
372 | *dst++ = (char)res;
|
---|
373 | }
|
---|
374 | if (srclen) return -1; /* overflow */
|
---|
375 | return dstlen - len;
|
---|
376 | }
|
---|
377 |
|
---|
378 | /* wide char to multi byte string conversion */
|
---|
379 | /* return -1 on dst buffer overflow */
|
---|
380 | int cp_wcstombs( const union cptable *table, int flags,
|
---|
381 | const WCHAR *src, int srclen,
|
---|
382 | char *dst, int dstlen, const char *defchar, int *used )
|
---|
383 | {
|
---|
384 | if (table->info.char_size == 1)
|
---|
385 | {
|
---|
386 | if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen );
|
---|
387 | if (flags || defchar || used)
|
---|
388 | return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
|
---|
389 | dst, dstlen, defchar, used );
|
---|
390 | return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
|
---|
391 | }
|
---|
392 | else /* mbcs */
|
---|
393 | {
|
---|
394 | if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar );
|
---|
395 | if (flags || defchar || used)
|
---|
396 | return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
|
---|
397 | dst, dstlen, defchar, used );
|
---|
398 | return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
|
---|
399 | }
|
---|
400 | }
|
---|