Changeset 2066
- Timestamp:
- Jun 23, 2005, 8:03:42 AM (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/emx/src/lib/str/strxfrm.c
-
Property cvs2svn:cvs-rev
changed from
1.7
to1.8
r2065 r2066 19 19 struct __strxfrm_arg 20 20 { 21 char *out;22 size_t size;21 char *out; 22 size_t size; 23 23 }; 24 24 25 static int __uni_strxfrm 25 static int __uni_strxfrm(UniChar *ucs, void *arg) 26 26 { 27 struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg;27 struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg; 28 28 29 /* BUG WARNING!30 * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns31 * one character less than it really fills in the buffer. I haven't32 * implemented any workaround for that first because it can be fixed33 * in the future and second because the information at the end of the34 * buffer seems very seldom really needed. UniStrxfrm generates a lot35 * of output, and every character in the input buffer generates three36 * characters in the output buffer: one wchar_t and two bytes that37 * seems to be related to character type (e.g. similar to character38 * flags isXXX() works with).39 */29 /* BUG WARNING! 30 * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns 31 * one character less than it really fills in the buffer. I haven't 32 * implemented any workaround for that first because it can be fixed 33 * in the future and second because the information at the end of the 34 * buffer seems very seldom really needed. UniStrxfrm generates a lot 35 * of output, and every character in the input buffer generates three 36 * characters in the output buffer: one wchar_t and two bytes that 37 * seems to be related to character type (e.g. similar to character 38 * flags isXXX() works with). 39 */ 40 40 41 size_t rs = UniStrxfrm (__libc_gLocaleCollate.lobj, (UniChar *)x->out, 42 ucs, x->size); 43 /* rs is in UniChar's without trailing zero */ 44 rs *= sizeof (UniChar); 45 if (rs < x->size) 46 { 47 /* The string returned by Unicode API often contain zero characters 48 (in the top or bottom 8 bits of a Unicode character). 49 This is inappropiate for MBCS strings, so we increment all 50 character codes by one except code 0xff (which is very seldom 51 encountered). There is no other way to represent a Unicode 52 xfrm'ed string as a MBCS string, alas. */ 41 size_t rs = UniStrxfrm(__libc_gLocaleCollate.lobj, (UniChar *)x->out, ucs, x->size / sizeof(UniChar)); 42 if (!x->size) 43 x->size = rs * sizeof(UniChar) + 3; /* We add three byte so we'll get enough space for a UniChar null terminator 44 and any incorrect returns from UniStrxfrm. The caller will add 1 more byte for us. 45 This means that we'll return less on the actual job, but that's hopefully acceptable. */ 46 else 47 { 48 /* rs is in UniChar's without trailing zero. */ 49 rs *= sizeof(UniChar); 50 size_t size = x->size; 51 x->size = rs; 52 if (rs >= size) 53 rs = size - 1; 53 54 54 int i; 55 for (i = 0; i < rs; i++) 56 if (x->out [i] != -1) 57 x->out [i]++; 58 x->out [rs] = 0; 59 } 60 x->size = rs + 1; /* We need space for trailing zero too */ 61 return 0; 55 /* The string returned by Unicode API often contain zero characters 56 (in the top or bottom 8 bits of a Unicode character). 57 This is inappropiate for MBCS strings, so we increment all 58 character codes by one except code 0xff (which is very seldom 59 encountered). There is no other way to represent a Unicode 60 xfrm'ed string as a MBCS string, alas. */ 61 62 for (int i = 0; i < rs; i++) 63 if (x->out[i] != -1) 64 x->out[i]++; 65 x->out[rs] = '\0'; 66 } 67 return 0; 62 68 } 63 69 … … 65 71 size_t _STD(strxfrm) (char *s1, const char *s2, size_t size) 66 72 { 67 unsigned char c; 68 size_t ret = 1; /* We need at least space for trailing zero */ 73 if (__libc_gLocaleCollate.mbcs) 74 { 75 /* When using MBCS codepaes, we will convert the entire string to 76 Unicode and then apply the UniStrxfrm() function. The output strings 77 can be much longer than the original in this case, but if user program 78 is correctly written, it will work since strxfrm will return the 79 required output string length. */ 80 struct __strxfrm_arg x; 81 FS_VAR(); 82 FS_SAVE_LOAD(); 83 x.out = s1; 84 x.size = size; 85 __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm); 86 FS_RESTORE(); 87 return x.size; 88 } 69 89 70 if (__libc_gLocaleCollate.mbcs) 71 { 72 /* When using MBCS codepaes, we will convert the entire string to 73 Unicode and then apply the UniStrxfrm() function. The output strings 74 can be much longer than the original in this case, but if user program 75 is correctly written, it will work since strxfrm will return the 76 required output string length. */ 77 struct __strxfrm_arg x; 78 FS_VAR(); 79 FS_SAVE_LOAD(); 80 x.out = s1; 81 x.size = size / sizeof (UniChar); 82 __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm); 83 FS_RESTORE(); 84 return x.size; 85 } 90 /* buffer size query */ 91 if (!size) 92 return strlen(s2); 86 93 87 while ((c = *s2++)) 88 { 94 /* fill buffer as far as there is room */ 95 register const char *psz = s2; 96 register unsigned char c; 97 while ((c = *psz) && size) 98 { 99 size--; 100 *s1++ = __libc_gLocaleCollate.auchWeight[c]; 101 psz++; 102 } 103 104 /* if more input then skip to the end so we get the length right. */ 105 if (c) 106 psz = strchr(psz + 1, '\0'); 107 108 /* Append trailing zero, if there is space. */ 89 109 if (size) 90 { 91 *s1++ = __libc_gLocaleCollate.auchWeight [c]; 92 size--; 93 } 94 ret++; 95 } 96 97 /* Append trailing zero, if there is space. */ 98 if (size) 99 *s1 = 0; 100 101 return ret; 110 *s1 = '\0'; 111 return psz - s2; 102 112 } -
Property cvs2svn:cvs-rev
changed from
Note:
See TracChangeset
for help on using the changeset viewer.