source: trunk/src/kernel32/unicode/mbtowc.c@ 8752

Last change on this file since 8752 was 8752, checked in by sandervl, 23 years ago

KSO: Fancy 16 step fallthru case isn't cool if the src length is bad.

File size: 8.7 KB
Line 
1/*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 */
6
7#include <string.h>
8
9#include "winnls.h"
10#include "wine/unicode.h"
11
12/* get the decomposition of a Unicode char */
13static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
14{
15 extern const WCHAR unicode_decompose_table[];
16 const WCHAR *ptr = unicode_decompose_table;
17 int res;
18
19 *dst = src;
20 ptr = unicode_decompose_table + ptr[src >> 8];
21 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
22 if (!*ptr) return 1;
23 if (dstlen <= 1) return 0;
24 /* apply the decomposition recursively to the first char */
25 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
26 return res;
27}
28
29/* check src string for invalid chars; return non-zero if invalid char found */
30static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
31 const unsigned char *src, unsigned int srclen )
32{
33 const WCHAR * const cp2uni = table->cp2uni;
34 while (srclen)
35 {
36 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
37 break;
38 src++;
39 srclen--;
40 }
41 return srclen;
42}
43
44/* mbstowcs for single-byte code page */
45/* all lengths are in characters, not bytes */
46static inline int mbstowcs_sbcs( const struct sbcs_table *table,
47 const unsigned char *src, unsigned int srclen,
48 WCHAR *dst, unsigned int dstlen )
49{
50 const WCHAR * const cp2uni = table->cp2uni;
51 int ret = srclen;
52
53 if (dstlen < srclen)
54 {
55 /* buffer too small: fill it up to dstlen and return error */
56 srclen = dstlen;
57 ret = -1;
58 }
59
60 #ifndef __WIN32OS2__
61 for (;;)
62 {
63 switch(srclen)
64 {
65 default:
66 case 16: dst[15] = cp2uni[src[15]];
67 case 15: dst[14] = cp2uni[src[14]];
68 case 14: dst[13] = cp2uni[src[13]];
69 case 13: dst[12] = cp2uni[src[12]];
70 case 12: dst[11] = cp2uni[src[11]];
71 case 11: dst[10] = cp2uni[src[10]];
72 case 10: dst[9] = cp2uni[src[9]];
73 case 9: dst[8] = cp2uni[src[8]];
74 case 8: dst[7] = cp2uni[src[7]];
75 case 7: dst[6] = cp2uni[src[6]];
76 case 6: dst[5] = cp2uni[src[5]];
77 case 5: dst[4] = cp2uni[src[4]];
78 case 4: dst[3] = cp2uni[src[3]];
79 case 3: dst[2] = cp2uni[src[2]];
80 case 2: dst[1] = cp2uni[src[1]];
81 case 1: dst[0] = cp2uni[src[0]];
82 case 0: break;
83 }
84 if (srclen < 16) return ret;
85 dst += 16;
86 src += 16;
87 srclen -= 16;
88 }
89 #else
90 /* kso: The above code isn't that cool when srclen is bad.
91 * This might be a little bit slower but way safer!
92 */
93 ret = 1; /* seems to be 1 based, at least the code is... */
94 while (*src && srclen--)
95 *dst++ = cp2uni[*src++], ret++;
96 return ret - srclen;
97 #endif
98}
99
100/* mbstowcs for single-byte code page with char decomposition */
101static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
102 const unsigned char *src, unsigned int srclen,
103 WCHAR *dst, unsigned int dstlen )
104{
105 const WCHAR * const cp2uni = table->cp2uni;
106 unsigned int len;
107
108 if (!dstlen) /* compute length */
109 {
110 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
111 for (len = 0; srclen; srclen--, src++)
112 len += get_decomposition( cp2uni[*src], dummy, 4 );
113 return len;
114 }
115
116 for (len = dstlen; srclen && len; srclen--, src++)
117 {
118 int res = get_decomposition( cp2uni[*src], dst, len );
119 if (!res) break;
120 len -= res;
121 dst += res;
122 }
123 if (srclen) return -1; /* overflow */
124 return dstlen - len;
125}
126
127/* query necessary dst length for src string */
128static inline int get_length_dbcs( const struct dbcs_table *table,
129 const unsigned char *src, unsigned int srclen )
130{
131 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
132 int len;
133
134 for (len = 0; srclen; srclen--, src++, len++)
135 {
136 if (cp2uni_lb[*src])
137 {
138 if (!--srclen) break; /* partial char, ignore it */
139 src++;
140 }
141 }
142 return len;
143}
144
145/* check src string for invalid chars; return non-zero if invalid char found */
146static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
147 const unsigned char *src, unsigned int srclen )
148{
149 const WCHAR * const cp2uni = table->cp2uni;
150 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
151
152 while (srclen)
153 {
154 unsigned char off = cp2uni_lb[*src];
155 if (off) /* multi-byte char */
156 {
157 if (srclen == 1) break; /* partial char, error */
158 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
159 ((src[0] << 8) | src[1]) != table->info.def_char) break;
160 src++;
161 srclen--;
162 }
163 else if (cp2uni[*src] == table->info.def_unicode_char &&
164 *src != table->info.def_char) break;
165 src++;
166 srclen--;
167 }
168 return srclen;
169}
170
171/* mbstowcs for double-byte code page */
172/* all lengths are in characters, not bytes */
173static inline int mbstowcs_dbcs( const struct dbcs_table *table,
174 const unsigned char *src, unsigned int srclen,
175 WCHAR *dst, unsigned int dstlen )
176{
177 const WCHAR * const cp2uni = table->cp2uni;
178 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
179 unsigned int len;
180
181 if (!dstlen) return get_length_dbcs( table, src, srclen );
182
183 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
184 {
185 unsigned char off = cp2uni_lb[*src];
186 if (off)
187 {
188 if (!--srclen) break; /* partial char, ignore it */
189 src++;
190 *dst = cp2uni[(off << 8) + *src];
191 }
192 else *dst = cp2uni[*src];
193 }
194 if (srclen) return -1; /* overflow */
195 return dstlen - len;
196}
197
198
199/* mbstowcs for double-byte code page with character decomposition */
200static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
201 const unsigned char *src, unsigned int srclen,
202 WCHAR *dst, unsigned int dstlen )
203{
204 const WCHAR * const cp2uni = table->cp2uni;
205 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
206 unsigned int len;
207 WCHAR ch;
208 int res;
209
210 if (!dstlen) /* compute length */
211 {
212 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
213 for (len = 0; srclen; srclen--, src++)
214 {
215 unsigned char off = cp2uni_lb[*src];
216 if (off)
217 {
218 if (!--srclen) break; /* partial char, ignore it */
219 src++;
220 ch = cp2uni[(off << 8) + *src];
221 }
222 else ch = cp2uni[*src];
223 len += get_decomposition( ch, dummy, 4 );
224 }
225 return len;
226 }
227
228 for (len = dstlen; srclen && len; srclen--, src++)
229 {
230 unsigned char off = cp2uni_lb[*src];
231 if (off)
232 {
233 if (!--srclen) break; /* partial char, ignore it */
234 src++;
235 ch = cp2uni[(off << 8) + *src];
236 }
237 else ch = cp2uni[*src];
238 if (!(res = get_decomposition( ch, dst, len ))) break;
239 dst += res;
240 len -= res;
241 }
242 if (srclen) return -1; /* overflow */
243 return dstlen - len;
244}
245
246
247/* return -1 on dst buffer overflow, -2 on invalid input char */
248int cp_mbstowcs( const union cptable *table, int flags,
249 const char *src, int srclen,
250 WCHAR *dst, int dstlen )
251{
252 if (table->info.char_size == 1)
253 {
254 if (flags & MB_ERR_INVALID_CHARS)
255 {
256 if (check_invalid_chars_sbcs( &table->sbcs, (const unsigned char *)src, srclen )) return -2;
257 }
258 if (!(flags & MB_COMPOSITE))
259 {
260 if (!dstlen) return srclen;
261 return mbstowcs_sbcs( &table->sbcs, (const unsigned char *)src, srclen, dst, dstlen );
262 }
263 return mbstowcs_sbcs_decompose( &table->sbcs, (const unsigned char *)src, srclen, dst, dstlen );
264 }
265 else /* mbcs */
266 {
267 if (flags & MB_ERR_INVALID_CHARS)
268 {
269 if (check_invalid_chars_dbcs( &table->dbcs, (const unsigned char *)src, srclen )) return -2;
270 }
271 if (!(flags & MB_COMPOSITE))
272 return mbstowcs_dbcs( &table->dbcs, (const unsigned char *)src, srclen, dst, dstlen );
273 else
274 return mbstowcs_dbcs_decompose( &table->dbcs, (const unsigned char *)src, srclen, dst, dstlen );
275 }
276}
Note: See TracBrowser for help on using the repository browser.