source: trunk/src/kernel32/unicode/mbtowc.c@ 5655

Last change on this file since 5655 was 5450, checked in by sandervl, 24 years ago

Wine's unicode translation functions & tables

File size: 8.4 KB
Line 
1/*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 */
6
7#include <string.h>
8
9#include "winnls.h"
10#include "wine/unicode.h"
11
12/* get the decomposition of a Unicode char */
13static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
14{
15 extern const WCHAR unicode_decompose_table[];
16 const WCHAR *ptr = unicode_decompose_table;
17 int res;
18
19 *dst = src;
20 ptr = unicode_decompose_table + ptr[src >> 8];
21 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
22 if (!*ptr) return 1;
23 if (dstlen <= 1) return 0;
24 /* apply the decomposition recursively to the first char */
25 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
26 return res;
27}
28
29/* check src string for invalid chars; return non-zero if invalid char found */
30static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
31 const unsigned char *src, unsigned int srclen )
32{
33 const WCHAR * const cp2uni = table->cp2uni;
34 while (srclen)
35 {
36 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
37 break;
38 src++;
39 srclen--;
40 }
41 return srclen;
42}
43
44/* mbstowcs for single-byte code page */
45/* all lengths are in characters, not bytes */
46static inline int mbstowcs_sbcs( const struct sbcs_table *table,
47 const unsigned char *src, unsigned int srclen,
48 WCHAR *dst, unsigned int dstlen )
49{
50 const WCHAR * const cp2uni = table->cp2uni;
51 int ret = srclen;
52
53 if (dstlen < srclen)
54 {
55 /* buffer too small: fill it up to dstlen and return error */
56 srclen = dstlen;
57 ret = -1;
58 }
59
60 for (;;)
61 {
62 switch(srclen)
63 {
64 default:
65 case 16: dst[15] = cp2uni[src[15]];
66 case 15: dst[14] = cp2uni[src[14]];
67 case 14: dst[13] = cp2uni[src[13]];
68 case 13: dst[12] = cp2uni[src[12]];
69 case 12: dst[11] = cp2uni[src[11]];
70 case 11: dst[10] = cp2uni[src[10]];
71 case 10: dst[9] = cp2uni[src[9]];
72 case 9: dst[8] = cp2uni[src[8]];
73 case 8: dst[7] = cp2uni[src[7]];
74 case 7: dst[6] = cp2uni[src[6]];
75 case 6: dst[5] = cp2uni[src[5]];
76 case 5: dst[4] = cp2uni[src[4]];
77 case 4: dst[3] = cp2uni[src[3]];
78 case 3: dst[2] = cp2uni[src[2]];
79 case 2: dst[1] = cp2uni[src[1]];
80 case 1: dst[0] = cp2uni[src[0]];
81 case 0: break;
82 }
83 if (srclen < 16) return ret;
84 dst += 16;
85 src += 16;
86 srclen -= 16;
87 }
88}
89
90/* mbstowcs for single-byte code page with char decomposition */
91static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
92 const unsigned char *src, unsigned int srclen,
93 WCHAR *dst, unsigned int dstlen )
94{
95 const WCHAR * const cp2uni = table->cp2uni;
96 unsigned int len;
97
98 if (!dstlen) /* compute length */
99 {
100 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
101 for (len = 0; srclen; srclen--, src++)
102 len += get_decomposition( cp2uni[*src], dummy, 4 );
103 return len;
104 }
105
106 for (len = dstlen; srclen && len; srclen--, src++)
107 {
108 int res = get_decomposition( cp2uni[*src], dst, len );
109 if (!res) break;
110 len -= res;
111 dst += res;
112 }
113 if (srclen) return -1; /* overflow */
114 return dstlen - len;
115}
116
117/* query necessary dst length for src string */
118static inline int get_length_dbcs( const struct dbcs_table *table,
119 const unsigned char *src, unsigned int srclen )
120{
121 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
122 int len;
123
124 for (len = 0; srclen; srclen--, src++, len++)
125 {
126 if (cp2uni_lb[*src])
127 {
128 if (!--srclen) break; /* partial char, ignore it */
129 src++;
130 }
131 }
132 return len;
133}
134
135/* check src string for invalid chars; return non-zero if invalid char found */
136static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
137 const unsigned char *src, unsigned int srclen )
138{
139 const WCHAR * const cp2uni = table->cp2uni;
140 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
141
142 while (srclen)
143 {
144 unsigned char off = cp2uni_lb[*src];
145 if (off) /* multi-byte char */
146 {
147 if (srclen == 1) break; /* partial char, error */
148 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
149 ((src[0] << 8) | src[1]) != table->info.def_char) break;
150 src++;
151 srclen--;
152 }
153 else if (cp2uni[*src] == table->info.def_unicode_char &&
154 *src != table->info.def_char) break;
155 src++;
156 srclen--;
157 }
158 return srclen;
159}
160
161/* mbstowcs for double-byte code page */
162/* all lengths are in characters, not bytes */
163static inline int mbstowcs_dbcs( const struct dbcs_table *table,
164 const unsigned char *src, unsigned int srclen,
165 WCHAR *dst, unsigned int dstlen )
166{
167 const WCHAR * const cp2uni = table->cp2uni;
168 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
169 unsigned int len;
170
171 if (!dstlen) return get_length_dbcs( table, src, srclen );
172
173 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
174 {
175 unsigned char off = cp2uni_lb[*src];
176 if (off)
177 {
178 if (!--srclen) break; /* partial char, ignore it */
179 src++;
180 *dst = cp2uni[(off << 8) + *src];
181 }
182 else *dst = cp2uni[*src];
183 }
184 if (srclen) return -1; /* overflow */
185 return dstlen - len;
186}
187
188
189/* mbstowcs for double-byte code page with character decomposition */
190static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
191 const unsigned char *src, unsigned int srclen,
192 WCHAR *dst, unsigned int dstlen )
193{
194 const WCHAR * const cp2uni = table->cp2uni;
195 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
196 unsigned int len;
197 WCHAR ch;
198 int res;
199
200 if (!dstlen) /* compute length */
201 {
202 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
203 for (len = 0; srclen; srclen--, src++)
204 {
205 unsigned char off = cp2uni_lb[*src];
206 if (off)
207 {
208 if (!--srclen) break; /* partial char, ignore it */
209 src++;
210 ch = cp2uni[(off << 8) + *src];
211 }
212 else ch = cp2uni[*src];
213 len += get_decomposition( ch, dummy, 4 );
214 }
215 return len;
216 }
217
218 for (len = dstlen; srclen && len; srclen--, src++)
219 {
220 unsigned char off = cp2uni_lb[*src];
221 if (off)
222 {
223 if (!--srclen) break; /* partial char, ignore it */
224 src++;
225 ch = cp2uni[(off << 8) + *src];
226 }
227 else ch = cp2uni[*src];
228 if (!(res = get_decomposition( ch, dst, len ))) break;
229 dst += res;
230 len -= res;
231 }
232 if (srclen) return -1; /* overflow */
233 return dstlen - len;
234}
235
236
237/* return -1 on dst buffer overflow, -2 on invalid input char */
238int cp_mbstowcs( const union cptable *table, int flags,
239 const char *src, int srclen,
240 WCHAR *dst, int dstlen )
241{
242 if (table->info.char_size == 1)
243 {
244 if (flags & MB_ERR_INVALID_CHARS)
245 {
246 if (check_invalid_chars_sbcs( &table->sbcs, (const unsigned char *)src, srclen )) return -2;
247 }
248 if (!(flags & MB_COMPOSITE))
249 {
250 if (!dstlen) return srclen;
251 return mbstowcs_sbcs( &table->sbcs, (const unsigned char *)src, srclen, dst, dstlen );
252 }
253 return mbstowcs_sbcs_decompose( &table->sbcs, (const unsigned char *)src, srclen, dst, dstlen );
254 }
255 else /* mbcs */
256 {
257 if (flags & MB_ERR_INVALID_CHARS)
258 {
259 if (check_invalid_chars_dbcs( &table->dbcs, (const unsigned char *)src, srclen )) return -2;
260 }
261 if (!(flags & MB_COMPOSITE))
262 return mbstowcs_dbcs( &table->dbcs, (const unsigned char *)src, srclen, dst, dstlen );
263 else
264 return mbstowcs_dbcs_decompose( &table->dbcs, (const unsigned char *)src, srclen, dst, dstlen );
265 }
266}
Note: See TracBrowser for help on using the repository browser.