1 | /* shmbutil.h -- utility functions for multibyte characters. */
|
---|
2 |
|
---|
3 | /* Copyright (C) 2002-2004 Free Software Foundation, Inc.
|
---|
4 |
|
---|
5 | This file is part of GNU Bash, the Bourne Again SHell.
|
---|
6 |
|
---|
7 | Bash is free software; you can redistribute it and/or modify it under
|
---|
8 | the terms of the GNU General Public License as published by the Free
|
---|
9 | Software Foundation; either version 2, or (at your option) any later
|
---|
10 | version.
|
---|
11 |
|
---|
12 | Bash is distributed in the hope that it will be useful, but WITHOUT ANY
|
---|
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
---|
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
---|
15 | for more details.
|
---|
16 |
|
---|
17 | You should have received a copy of the GNU General Public License along
|
---|
18 | with Bash; see the file COPYING. If not, write to the Free Software
|
---|
19 | Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
|
---|
20 |
|
---|
21 | #if !defined (_SH_MBUTIL_H_)
|
---|
22 | #define _SH_MBUTIL_H_
|
---|
23 |
|
---|
24 | #include "stdc.h"
|
---|
25 |
|
---|
26 | /* Include config.h for HANDLE_MULTIBYTE */
|
---|
27 | #include <config.h>
|
---|
28 |
|
---|
29 | #if defined (HANDLE_MULTIBYTE)
|
---|
30 |
|
---|
31 | extern size_t xmbsrtowcs __P((wchar_t *, const char **, size_t, mbstate_t *));
|
---|
32 | extern size_t xdupmbstowcs __P((wchar_t **, char ***, const char *));
|
---|
33 |
|
---|
34 | extern size_t mbstrlen __P((const char *));
|
---|
35 |
|
---|
36 | extern char *xstrchr __P((const char *, int));
|
---|
37 |
|
---|
38 | #ifndef MB_INVALIDCH
|
---|
39 | #define MB_INVALIDCH(x) ((x) == (size_t)-1 || (x) == (size_t)-2)
|
---|
40 | #define MB_NULLWCH(x) ((x) == 0)
|
---|
41 | #endif
|
---|
42 |
|
---|
43 | #define MBSLEN(s) (((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
|
---|
44 | #define MB_STRLEN(s) ((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))
|
---|
45 |
|
---|
46 | #define MBLEN(s, n) ((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
|
---|
47 | #define MBRLEN(s, n, p) ((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)
|
---|
48 |
|
---|
49 | #else /* !HANDLE_MULTIBYTE */
|
---|
50 |
|
---|
51 | #undef MB_LEN_MAX
|
---|
52 | #undef MB_CUR_MAX
|
---|
53 |
|
---|
54 | #define MB_LEN_MAX 1
|
---|
55 | #define MB_CUR_MAX 1
|
---|
56 |
|
---|
57 | #undef xstrchr
|
---|
58 | #define xstrchr(s, c) strchr(s, c)
|
---|
59 |
|
---|
60 | #ifndef MB_INVALIDCH
|
---|
61 | #define MB_INVALIDCH(x) (0)
|
---|
62 | #define MB_NULLWCH(x) (0)
|
---|
63 | #endif
|
---|
64 |
|
---|
65 | #define MB_STRLEN(s) (STRLEN(s))
|
---|
66 |
|
---|
67 | #define MBLEN(s, n) 1
|
---|
68 | #define MBRLEN(s, n, p) 1
|
---|
69 |
|
---|
70 | #endif /* !HANDLE_MULTIBYTE */
|
---|
71 |
|
---|
72 | /* Declare and initialize a multibyte state. Call must be terminated
|
---|
73 | with `;'. */
|
---|
74 | #if defined (HANDLE_MULTIBYTE)
|
---|
75 | # define DECLARE_MBSTATE \
|
---|
76 | mbstate_t state; \
|
---|
77 | memset (&state, '\0', sizeof (mbstate_t))
|
---|
78 | #else
|
---|
79 | # define DECLARE_MBSTATE
|
---|
80 | #endif /* !HANDLE_MULTIBYTE */
|
---|
81 |
|
---|
82 | /* Initialize or reinitialize a multibyte state named `state'. Call must be
|
---|
83 | terminated with `;'. */
|
---|
84 | #if defined (HANDLE_MULTIBYTE)
|
---|
85 | # define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
|
---|
86 | #else
|
---|
87 | # define INITIALIZE_MBSTATE
|
---|
88 | #endif /* !HANDLE_MULTIBYTE */
|
---|
89 |
|
---|
90 | /* Advance one (possibly multi-byte) character in string _STR of length
|
---|
91 | _STRSIZE, starting at index _I. STATE must have already been declared. */
|
---|
92 | #if defined (HANDLE_MULTIBYTE)
|
---|
93 | # define ADVANCE_CHAR(_str, _strsize, _i) \
|
---|
94 | do \
|
---|
95 | { \
|
---|
96 | if (MB_CUR_MAX > 1) \
|
---|
97 | { \
|
---|
98 | mbstate_t state_bak; \
|
---|
99 | size_t mblength; \
|
---|
100 | \
|
---|
101 | state_bak = state; \
|
---|
102 | mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
|
---|
103 | \
|
---|
104 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
105 | { \
|
---|
106 | state = state_bak; \
|
---|
107 | (_i)++; \
|
---|
108 | } \
|
---|
109 | else if (mblength == 0) \
|
---|
110 | (_i)++; \
|
---|
111 | else \
|
---|
112 | (_i) += mblength; \
|
---|
113 | } \
|
---|
114 | else \
|
---|
115 | (_i)++; \
|
---|
116 | } \
|
---|
117 | while (0)
|
---|
118 | #else
|
---|
119 | # define ADVANCE_CHAR(_str, _strsize, _i) (_i)++
|
---|
120 | #endif /* !HANDLE_MULTIBYTE */
|
---|
121 |
|
---|
122 | /* Advance one (possibly multibyte) character in the string _STR of length
|
---|
123 | _STRSIZE.
|
---|
124 | SPECIAL: assume that _STR will be incremented by 1 after this call. */
|
---|
125 | #if defined (HANDLE_MULTIBYTE)
|
---|
126 | # define ADVANCE_CHAR_P(_str, _strsize) \
|
---|
127 | do \
|
---|
128 | { \
|
---|
129 | if (MB_CUR_MAX > 1) \
|
---|
130 | { \
|
---|
131 | mbstate_t state_bak; \
|
---|
132 | size_t mblength; \
|
---|
133 | \
|
---|
134 | state_bak = state; \
|
---|
135 | mblength = mbrlen ((_str), (_strsize), &state); \
|
---|
136 | \
|
---|
137 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
138 | { \
|
---|
139 | state = state_bak; \
|
---|
140 | mblength = 1; \
|
---|
141 | } \
|
---|
142 | else \
|
---|
143 | (_str) += (mblength < 1) ? 0 : (mblength - 1); \
|
---|
144 | } \
|
---|
145 | } \
|
---|
146 | while (0)
|
---|
147 | #else
|
---|
148 | # define ADVANCE_CHAR_P(_str, _strsize)
|
---|
149 | #endif /* !HANDLE_MULTIBYTE */
|
---|
150 |
|
---|
151 | /* Back up one (possibly multi-byte) character in string _STR of length
|
---|
152 | _STRSIZE, starting at index _I. STATE must have already been declared. */
|
---|
153 | #if defined (HANDLE_MULTIBYTE)
|
---|
154 | # define BACKUP_CHAR(_str, _strsize, _i) \
|
---|
155 | do \
|
---|
156 | { \
|
---|
157 | if (MB_CUR_MAX > 1) \
|
---|
158 | { \
|
---|
159 | mbstate_t state_bak; \
|
---|
160 | size_t mblength; \
|
---|
161 | int _x, _p; /* _x == temp index into string, _p == prev index */ \
|
---|
162 | \
|
---|
163 | _x = _p = 0; \
|
---|
164 | while (_x < (_i)) \
|
---|
165 | { \
|
---|
166 | state_bak = state; \
|
---|
167 | mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
|
---|
168 | \
|
---|
169 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
170 | { \
|
---|
171 | state = state_bak; \
|
---|
172 | _x++; \
|
---|
173 | } \
|
---|
174 | else if (mblength == 0) \
|
---|
175 | _x++; \
|
---|
176 | else \
|
---|
177 | { \
|
---|
178 | _p = _x; /* _p == start of prev mbchar */ \
|
---|
179 | _x += mblength; \
|
---|
180 | } \
|
---|
181 | } \
|
---|
182 | (_i) = _p; \
|
---|
183 | } \
|
---|
184 | else \
|
---|
185 | (_i)--; \
|
---|
186 | } \
|
---|
187 | while (0)
|
---|
188 | #else
|
---|
189 | # define BACKUP_CHAR(_str, _strsize, _i) (_i)--
|
---|
190 | #endif /* !HANDLE_MULTIBYTE */
|
---|
191 |
|
---|
192 | /* Back up one (possibly multibyte) character in the string _BASE of length
|
---|
193 | _STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
|
---|
194 | SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
|
---|
195 | #if defined (HANDLE_MULTIBYTE)
|
---|
196 | # define BACKUP_CHAR_P(_base, _strsize, _str) \
|
---|
197 | do \
|
---|
198 | { \
|
---|
199 | if (MB_CUR_MAX > 1) \
|
---|
200 | { \
|
---|
201 | mbstate_t state_bak; \
|
---|
202 | size_t mblength; \
|
---|
203 | char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
|
---|
204 | \
|
---|
205 | _x = _p = _base; \
|
---|
206 | while (_x < (_str)) \
|
---|
207 | { \
|
---|
208 | state_bak = state; \
|
---|
209 | mblength = mbrlen (_x, (_strsize) - _x, &state); \
|
---|
210 | \
|
---|
211 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
212 | { \
|
---|
213 | state = state_bak; \
|
---|
214 | _x++; \
|
---|
215 | } \
|
---|
216 | else if (mblength == 0) \
|
---|
217 | _x++; \
|
---|
218 | else \
|
---|
219 | { \
|
---|
220 | _p = _x; /* _p == start of prev mbchar */ \
|
---|
221 | _x += mblength; \
|
---|
222 | } \
|
---|
223 | } \
|
---|
224 | (_str) = _p; \
|
---|
225 | } \
|
---|
226 | else \
|
---|
227 | (_str)--; \
|
---|
228 | } \
|
---|
229 | while (0)
|
---|
230 | #else
|
---|
231 | # define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
|
---|
232 | #endif /* !HANDLE_MULTIBYTE */
|
---|
233 |
|
---|
234 | /* Copy a single character from the string _SRC to the string _DST.
|
---|
235 | _SRCEND is a pointer to the end of _SRC. */
|
---|
236 | #if defined (HANDLE_MULTIBYTE)
|
---|
237 | # define COPY_CHAR_P(_dst, _src, _srcend) \
|
---|
238 | do \
|
---|
239 | { \
|
---|
240 | if (MB_CUR_MAX > 1) \
|
---|
241 | { \
|
---|
242 | mbstate_t state_bak; \
|
---|
243 | size_t mblength; \
|
---|
244 | int _k; \
|
---|
245 | \
|
---|
246 | state_bak = state; \
|
---|
247 | mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
|
---|
248 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
249 | { \
|
---|
250 | state = state_bak; \
|
---|
251 | mblength = 1; \
|
---|
252 | } \
|
---|
253 | else \
|
---|
254 | mblength = (mblength < 1) ? 1 : mblength; \
|
---|
255 | \
|
---|
256 | for (_k = 0; _k < mblength; _k++) \
|
---|
257 | *(_dst)++ = *(_src)++; \
|
---|
258 | } \
|
---|
259 | else \
|
---|
260 | *(_dst)++ = *(_src)++; \
|
---|
261 | } \
|
---|
262 | while (0)
|
---|
263 | #else
|
---|
264 | # define COPY_CHAR_P(_dst, _src, _srcend) *(_dst)++ = *(_src)++
|
---|
265 | #endif /* !HANDLE_MULTIBYTE */
|
---|
266 |
|
---|
267 | /* Copy a single character from the string _SRC at index _SI to the string
|
---|
268 | _DST at index _DI. _SRCEND is a pointer to the end of _SRC. */
|
---|
269 | #if defined (HANDLE_MULTIBYTE)
|
---|
270 | # define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
|
---|
271 | do \
|
---|
272 | { \
|
---|
273 | if (MB_CUR_MAX > 1) \
|
---|
274 | { \
|
---|
275 | mbstate_t state_bak; \
|
---|
276 | size_t mblength; \
|
---|
277 | int _k; \
|
---|
278 | \
|
---|
279 | state_bak = state; \
|
---|
280 | mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
|
---|
281 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
282 | { \
|
---|
283 | state = state_bak; \
|
---|
284 | mblength = 1; \
|
---|
285 | } \
|
---|
286 | else \
|
---|
287 | mblength = (mblength < 1) ? 1 : mblength; \
|
---|
288 | \
|
---|
289 | for (_k = 0; _k < mblength; _k++) \
|
---|
290 | _dst[_di++] = _src[_si++]; \
|
---|
291 | } \
|
---|
292 | else \
|
---|
293 | _dst[_di++] = _src[_si++]; \
|
---|
294 | } \
|
---|
295 | while (0)
|
---|
296 | #else
|
---|
297 | # define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) _dst[_di++] = _src[_si++]
|
---|
298 | #endif /* !HANDLE_MULTIBYTE */
|
---|
299 |
|
---|
300 | /****************************************************************
|
---|
301 | * *
|
---|
302 | * The following are only guaranteed to work in subst.c *
|
---|
303 | * *
|
---|
304 | ****************************************************************/
|
---|
305 |
|
---|
306 | #if defined (HANDLE_MULTIBYTE)
|
---|
307 | # define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
|
---|
308 | do \
|
---|
309 | { \
|
---|
310 | if (MB_CUR_MAX > 1) \
|
---|
311 | { \
|
---|
312 | mbstate_t state_bak; \
|
---|
313 | size_t mblength; \
|
---|
314 | int _i; \
|
---|
315 | \
|
---|
316 | state_bak = state; \
|
---|
317 | mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
|
---|
318 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
319 | { \
|
---|
320 | state = state_bak; \
|
---|
321 | mblength = 1; \
|
---|
322 | } \
|
---|
323 | else \
|
---|
324 | mblength = (mblength < 1) ? 1 : mblength; \
|
---|
325 | \
|
---|
326 | temp = xmalloc (mblength + 2); \
|
---|
327 | temp[0] = _escchar; \
|
---|
328 | for (_i = 0; _i < mblength; _i++) \
|
---|
329 | temp[_i + 1] = _src[_si++]; \
|
---|
330 | temp[mblength + 1] = '\0'; \
|
---|
331 | \
|
---|
332 | goto add_string; \
|
---|
333 | } \
|
---|
334 | else \
|
---|
335 | { \
|
---|
336 | _dst[0] = _escchar; \
|
---|
337 | _dst[1] = _sc; \
|
---|
338 | } \
|
---|
339 | } \
|
---|
340 | while (0)
|
---|
341 | #else
|
---|
342 | # define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
|
---|
343 | _dst[0] = _escchar; \
|
---|
344 | _dst[1] = _sc
|
---|
345 | #endif /* !HANDLE_MULTIBYTE */
|
---|
346 |
|
---|
347 | #if defined (HANDLE_MULTIBYTE)
|
---|
348 | # define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
|
---|
349 | do \
|
---|
350 | { \
|
---|
351 | if (MB_CUR_MAX > 1) \
|
---|
352 | { \
|
---|
353 | mbstate_t state_bak; \
|
---|
354 | size_t mblength; \
|
---|
355 | \
|
---|
356 | state_bak = state; \
|
---|
357 | mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
|
---|
358 | if (mblength == (size_t)-2 || mblength == (size_t)-1) \
|
---|
359 | { \
|
---|
360 | state = state_bak; \
|
---|
361 | mblength = 1; \
|
---|
362 | } \
|
---|
363 | else \
|
---|
364 | mblength = (mblength < 1) ? 1 : mblength; \
|
---|
365 | \
|
---|
366 | FASTCOPY(((_src) + (_si)), (_dst), mblength); \
|
---|
367 | \
|
---|
368 | (_dst) += mblength; \
|
---|
369 | (_si) += mblength; \
|
---|
370 | } \
|
---|
371 | else \
|
---|
372 | { \
|
---|
373 | *(_dst)++ = _src[(_si)]; \
|
---|
374 | (_si)++; \
|
---|
375 | } \
|
---|
376 | } \
|
---|
377 | while (0)
|
---|
378 | #else
|
---|
379 | # define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
|
---|
380 | *(_dst)++ = _src[(_si)]; \
|
---|
381 | (_si)++
|
---|
382 | #endif /* !HANDLE_MULTIBYTE */
|
---|
383 |
|
---|
384 | #if HANDLE_MULTIBYTE
|
---|
385 | # define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
|
---|
386 | do \
|
---|
387 | { \
|
---|
388 | if (MB_CUR_MAX > 1) \
|
---|
389 | { \
|
---|
390 | int i; \
|
---|
391 | mbstate_t state_bak; \
|
---|
392 | size_t mblength; \
|
---|
393 | \
|
---|
394 | state_bak = state; \
|
---|
395 | mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
|
---|
396 | if (mblength == (size_t)-1 || mblength == (size_t)-2) \
|
---|
397 | { \
|
---|
398 | state = state_bak; \
|
---|
399 | mblength = 1; \
|
---|
400 | } \
|
---|
401 | if (mblength < 1) \
|
---|
402 | mblength = 1; \
|
---|
403 | \
|
---|
404 | _dst = (char *)xmalloc (mblength + 1); \
|
---|
405 | for (i = 0; i < mblength; i++) \
|
---|
406 | (_dst)[i] = (_src)[(_si)++]; \
|
---|
407 | (_dst)[mblength] = '\0'; \
|
---|
408 | \
|
---|
409 | goto add_string; \
|
---|
410 | } \
|
---|
411 | } \
|
---|
412 | while (0)
|
---|
413 |
|
---|
414 | #else
|
---|
415 | # define SADD_MBCHAR(_dst, _src, _si, _srcsize)
|
---|
416 | #endif
|
---|
417 |
|
---|
418 | /* Watch out when using this -- it's just straight textual subsitution */
|
---|
419 | #if defined (HANDLE_MULTIBYTE)
|
---|
420 | # define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
|
---|
421 | \
|
---|
422 | int i; \
|
---|
423 | mbstate_t state_bak; \
|
---|
424 | size_t mblength; \
|
---|
425 | \
|
---|
426 | state_bak = state; \
|
---|
427 | mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
|
---|
428 | if (mblength == (size_t)-1 || mblength == (size_t)-2) \
|
---|
429 | { \
|
---|
430 | state = state_bak; \
|
---|
431 | mblength = 1; \
|
---|
432 | } \
|
---|
433 | if (mblength < 1) \
|
---|
434 | mblength = 1; \
|
---|
435 | \
|
---|
436 | (_dst) = (char *)xmalloc (mblength + 2); \
|
---|
437 | (_dst)[0] = CTLESC; \
|
---|
438 | for (i = 0; i < mblength; i++) \
|
---|
439 | (_dst)[i+1] = (_src)[(_si)++]; \
|
---|
440 | (_dst)[mblength+1] = '\0'; \
|
---|
441 | \
|
---|
442 | goto add_string
|
---|
443 |
|
---|
444 | #endif /* HANDLE_MULTIBYTE */
|
---|
445 | #endif /* _SH_MBUTIL_H_ */
|
---|