source: trunk/src/kernel32/unicode/wctomb.c@ 8001

Last change on this file since 8001 was 5450, checked in by sandervl, 25 years ago

Wine's unicode translation functions & tables

File size: 14.0 KB
Line 
1/*
2 * WideCharToMultiByte implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 */
6
7#include <string.h>
8
9#include "winnls.h"
10#include "wine/unicode.h"
11
12/* search for a character in the unicode_compose_table; helper for compose() */
13static inline int binary_search( WCHAR ch, int low, int high )
14{
15 extern const WCHAR unicode_compose_table[];
16 while (low <= high)
17 {
18 int pos = (low + high) / 2;
19 if (unicode_compose_table[2*pos] < ch)
20 {
21 low = pos + 1;
22 continue;
23 }
24 if (unicode_compose_table[2*pos] > ch)
25 {
26 high = pos - 1;
27 continue;
28 }
29 return pos;
30 }
31 return -1;
32}
33
34/* return the result of the composition of two Unicode chars, or 0 if none */
35static WCHAR compose( const WCHAR *str )
36{
37 extern const WCHAR unicode_compose_table[];
38 extern const unsigned int unicode_compose_table_size;
39
40 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
41 for (;;)
42 {
43 int pos = binary_search( str[idx], low, high );
44 if (pos == -1) return 0;
45 if (!idx--) return unicode_compose_table[2*pos+1];
46 low = unicode_compose_table[2*pos+1];
47 high = unicode_compose_table[2*pos+3] - 1;
48 }
49}
50
51
52/****************************************************************/
53/* sbcs support */
54
55/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
56static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
57 WCHAR wch, unsigned char ch )
58{
59 if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
60 if (ch != (unsigned char)table->info.def_char) return 1;
61 return (wch == table->info.def_unicode_char);
62}
63
64/* query necessary dst length for src string */
65static inline int get_length_sbcs( const struct sbcs_table *table, int flags,
66 const WCHAR *src, unsigned int srclen )
67{
68 unsigned int ret = srclen;
69
70 if (flags & WC_COMPOSITECHECK)
71 {
72 const unsigned char * const uni2cp_low = table->uni2cp_low;
73 const unsigned short * const uni2cp_high = table->uni2cp_high;
74 WCHAR composed;
75
76 for (ret = 0; srclen > 1; ret++, srclen--, src++)
77 {
78 if (!(composed = compose(src))) continue;
79 /* check if we should skip the next char */
80
81 /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
82 /* the next char no matter if the composition is valid or not */
83 if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS)))
84 {
85 unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
86 if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue;
87 }
88 src++;
89 srclen--;
90 }
91 if (srclen) ret++; /* last char */
92 }
93 return ret;
94}
95
96/* wcstombs for single-byte code page */
97static inline int wcstombs_sbcs( const struct sbcs_table *table,
98 const WCHAR *src, unsigned int srclen,
99 char *dst, unsigned int dstlen )
100{
101 const unsigned char * const uni2cp_low = table->uni2cp_low;
102 const unsigned short * const uni2cp_high = table->uni2cp_high;
103 int ret = srclen;
104
105 if (dstlen < srclen)
106 {
107 /* buffer too small: fill it up to dstlen and return error */
108 srclen = dstlen;
109 ret = -1;
110 }
111
112 for (;;)
113 {
114 switch(srclen)
115 {
116 default:
117 case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
118 case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
119 case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
120 case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
121 case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
122 case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
123 case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
124 case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
125 case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
126 case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
127 case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
128 case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
129 case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
130 case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
131 case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
132 case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
133 case 0: break;
134 }
135 if (srclen < 16) return ret;
136 dst += 16;
137 src += 16;
138 srclen -= 16;
139 }
140}
141
142/* slow version of wcstombs_sbcs that handles the various flags */
143static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
144 const WCHAR *src, unsigned int srclen,
145 char *dst, unsigned int dstlen,
146 const char *defchar, int *used )
147{
148 const unsigned char * const uni2cp_low = table->uni2cp_low;
149 const unsigned short * const uni2cp_high = table->uni2cp_high;
150 const unsigned char table_default = table->info.def_char & 0xff;
151 unsigned int len;
152 int tmp;
153 WCHAR composed;
154
155 if (!defchar) defchar = (const char *)&table_default;
156 if (!used) used = &tmp; /* avoid checking on every char */
157
158 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
159 {
160 WCHAR wch = *src;
161
162 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
163 {
164 /* now check if we can use the composed char */
165 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
166 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
167 {
168 /* we have a good mapping, use it */
169 src++;
170 srclen--;
171 continue;
172 }
173 /* no mapping for the composed char, check the other flags */
174 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
175 {
176 *dst = *defchar;
177 *used = 1;
178 src++; /* skip the non-spacing char */
179 srclen--;
180 continue;
181 }
182 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
183 {
184 src++;
185 srclen--;
186 }
187 /* WC_SEPCHARS is the default */
188 }
189
190 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
191 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
192 {
193 *dst = *defchar;
194 *used = 1;
195 }
196 }
197 if (srclen) return -1; /* overflow */
198 return dstlen - len;
199}
200
201
202/****************************************************************/
203/* dbcs support */
204
205/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
206static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
207 WCHAR wch, unsigned short ch )
208{
209 if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
210 if (flags & WC_NO_BEST_FIT_CHARS)
211 {
212 /* check if char maps back to the same Unicode value */
213 if (ch & 0xff00)
214 {
215 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
216 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
217 }
218 return (table->cp2uni[ch & 0xff] == wch);
219 }
220 return 1;
221}
222
223/* query necessary dst length for src string */
224static int get_length_dbcs( const struct dbcs_table *table, int flags,
225 const WCHAR *src, unsigned int srclen,
226 const char *defchar )
227{
228 const unsigned short * const uni2cp_low = table->uni2cp_low;
229 const unsigned short * const uni2cp_high = table->uni2cp_high;
230 WCHAR defchar_value = table->info.def_char;
231 WCHAR composed;
232 int len;
233
234 if (!defchar && !(flags & WC_COMPOSITECHECK))
235 {
236 for (len = 0; srclen; srclen--, src++, len++)
237 {
238 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
239 }
240 return len;
241 }
242
243 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
244 for (len = 0; srclen; len++, srclen--, src++)
245 {
246 unsigned short res;
247 WCHAR wch = *src;
248
249 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
250 {
251 /* now check if we can use the composed char */
252 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
253
254 if (is_valid_dbcs_mapping( table, flags, composed, res ))
255 {
256 /* we have a good mapping for the composed char, use it */
257 if (res & 0xff00) len++;
258 src++;
259 srclen--;
260 continue;
261 }
262 /* no mapping for the composed char, check the other flags */
263 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
264 {
265 if (defchar_value & 0xff00) len++;
266 src++; /* skip the non-spacing char */
267 srclen--;
268 continue;
269 }
270 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
271 {
272 src++;
273 srclen--;
274 }
275 /* WC_SEPCHARS is the default */
276 }
277
278 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
279 if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value;
280 if (res & 0xff00) len++;
281 }
282 return len;
283}
284
285/* wcstombs for double-byte code page */
286static inline int wcstombs_dbcs( const struct dbcs_table *table,
287 const WCHAR *src, unsigned int srclen,
288 char *dst, unsigned int dstlen )
289{
290 const unsigned short * const uni2cp_low = table->uni2cp_low;
291 const unsigned short * const uni2cp_high = table->uni2cp_high;
292 int len;
293
294 for (len = dstlen; srclen && len; len--, srclen--, src++)
295 {
296 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
297 if (res & 0xff00)
298 {
299 if (len == 1) break; /* do not output a partial char */
300 len--;
301 *dst++ = res >> 8;
302 }
303 *dst++ = (char)res;
304 }
305 if (srclen) return -1; /* overflow */
306 return dstlen - len;
307}
308
309/* slow version of wcstombs_dbcs that handles the various flags */
310static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
311 const WCHAR *src, unsigned int srclen,
312 char *dst, unsigned int dstlen,
313 const char *defchar, int *used )
314{
315 const unsigned short * const uni2cp_low = table->uni2cp_low;
316 const unsigned short * const uni2cp_high = table->uni2cp_high;
317 WCHAR defchar_value = table->info.def_char;
318 WCHAR composed;
319 int len, tmp;
320
321 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
322 if (!used) used = &tmp; /* avoid checking on every char */
323
324 for (len = dstlen; srclen && len; len--, srclen--, src++)
325 {
326 unsigned short res;
327 WCHAR wch = *src;
328
329 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
330 {
331 /* now check if we can use the composed char */
332 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
333
334 if (is_valid_dbcs_mapping( table, flags, composed, res ))
335 {
336 /* we have a good mapping for the composed char, use it */
337 src++;
338 srclen--;
339 goto output_char;
340 }
341 /* no mapping for the composed char, check the other flags */
342 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
343 {
344 res = defchar_value;
345 *used = 1;
346 src++; /* skip the non-spacing char */
347 srclen--;
348 goto output_char;
349 }
350 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
351 {
352 src++;
353 srclen--;
354 }
355 /* WC_SEPCHARS is the default */
356 }
357
358 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
359 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
360 {
361 res = defchar_value;
362 *used = 1;
363 }
364
365 output_char:
366 if (res & 0xff00)
367 {
368 if (len == 1) break; /* do not output a partial char */
369 len--;
370 *dst++ = res >> 8;
371 }
372 *dst++ = (char)res;
373 }
374 if (srclen) return -1; /* overflow */
375 return dstlen - len;
376}
377
378/* wide char to multi byte string conversion */
379/* return -1 on dst buffer overflow */
380int cp_wcstombs( const union cptable *table, int flags,
381 const WCHAR *src, int srclen,
382 char *dst, int dstlen, const char *defchar, int *used )
383{
384 if (table->info.char_size == 1)
385 {
386 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen );
387 if (flags || defchar || used)
388 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
389 dst, dstlen, defchar, used );
390 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
391 }
392 else /* mbcs */
393 {
394 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar );
395 if (flags || defchar || used)
396 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
397 dst, dstlen, defchar, used );
398 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
399 }
400}
Note: See TracBrowser for help on using the repository browser.