Context Navigation

← Previous Changeset
Next Changeset →

Changeset 2066

Timestamp:

Jun 23, 2005, 8:03:42 AM (20 years ago)

Author:

bird

Message:

Corrected the strxfrm() return values.

File:

: 1 edited

trunk/src/emx/src/lib/str/strxfrm.c (modified) (2 diffs, 1 prop)

Legend:

: Unmodified
: Added
: Removed

trunk/src/emx/src/lib/str/strxfrm.c

Property cvs2svn:cvs-rev changed from 1.7 to 1.8

-              r2065
+              r2066
 struct __strxfrm_arg
+{
   char *out;
   size_t size;
+    char *out;
+    size_t size;
 };
 static int __uni_strxfrm (UniChar *ucs, void *arg)
+static int __uni_strxfrm(UniChar *ucs, void *arg)
+{
   struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg;
+    struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg;
   /* BUG WARNING!
    * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns
    * one character less than it really fills in the buffer. I haven't
    * implemented any workaround for that first because it can be fixed
    * in the future and second because the information at the end of the
    * buffer seems very seldom really needed. UniStrxfrm generates a lot
    * of output, and every character in the input buffer generates three
    * characters in the output buffer: one wchar_t and two bytes that
    * seems to be related to character type (e.g. similar to character
    * flags isXXX() works with).
    */
+    /* BUG WARNING!
+     * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns
+     * one character less than it really fills in the buffer. I haven't
+     * implemented any workaround for that first because it can be fixed
+     * in the future and second because the information at the end of the
+     * buffer seems very seldom really needed. UniStrxfrm generates a lot
+     * of output, and every character in the input buffer generates three
+     * characters in the output buffer: one wchar_t and two bytes that
+     * seems to be related to character type (e.g. similar to character
+     * flags isXXX() works with).
+     */
+  size_t rs = UniStrxfrm (__libc_gLocaleCollate.lobj, (UniChar *)x->out,
+    ucs, x->size);
+  /* rs is in UniChar's without trailing zero */
+  rs *= sizeof (UniChar);
+  if (rs < x->size)
+  {
+    /* The string returned by Unicode API often contain zero characters
+       (in the top or bottom 8 bits of a Unicode character).
+       This is inappropiate for MBCS strings, so we increment all
+       character codes by one except code 0xff (which is very seldom
+       encountered). There is no other way to represent a Unicode
+       xfrm'ed string as a MBCS string, alas. */
+    size_t rs = UniStrxfrm(__libc_gLocaleCollate.lobj, (UniChar *)x->out, ucs, x->size / sizeof(UniChar));
+    if (!x->size)
+        x->size = rs * sizeof(UniChar) + 3; /* We add three byte so we'll get enough space for a UniChar null terminator
+                                               and any incorrect returns from UniStrxfrm. The caller will add 1 more byte for us.
+                                               This means that we'll return less on the actual job, but that's hopefully acceptable. */
+    else
+    {
+        /* rs is in UniChar's without trailing zero. */
+        rs *= sizeof(UniChar);
+        size_t size = x->size;
+        x->size = rs;
+        if (rs >= size)
+            rs = size - 1;
+    int i;
+    for (i = 0; i < rs; i++)
+      if (x->out [i] != -1)
+        x->out [i]++;
+    x->out [rs] = 0;
+  }
+  x->size = rs + 1; /* We need space for trailing zero too */
+  return 0;
+        /* The string returned by Unicode API often contain zero characters
+           (in the top or bottom 8 bits of a Unicode character).
+           This is inappropiate for MBCS strings, so we increment all
+           character codes by one except code 0xff (which is very seldom
+           encountered). There is no other way to represent a Unicode
+           xfrm'ed string as a MBCS string, alas. */
+        for (int i = 0; i < rs; i++)
+            if (x->out[i] != -1)
+                x->out[i]++;
+        x->out[rs] = '\0';
+    }
+    return 0;
+}
 …
 size_t _STD(strxfrm) (char *s1, const char *s2, size_t size)
+{
+  unsigned char c;
+  size_t ret = 1; /* We need at least space for trailing zero */
+    if (__libc_gLocaleCollate.mbcs)
+    {
+        /* When using MBCS codepaes, we will convert the entire string to
+           Unicode and then apply the UniStrxfrm() function. The output strings
+           can be much longer than the original in this case, but if user program
+           is correctly written, it will work since strxfrm will return the
+           required output string length. */
+        struct __strxfrm_arg x;
+        FS_VAR();
+        FS_SAVE_LOAD();
+        x.out = s1;
+        x.size = size;
+        __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm);
+        FS_RESTORE();
+        return x.size;
+    }
+  if (__libc_gLocaleCollate.mbcs)
+  {
+    /* When using MBCS codepaes, we will convert the entire string to
+       Unicode and then apply the UniStrxfrm() function. The output strings
+       can be much longer than the original in this case, but if user program
+       is correctly written, it will work since strxfrm will return the
+       required output string length. */
+    struct __strxfrm_arg x;
+    FS_VAR();
+    FS_SAVE_LOAD();
+    x.out = s1;
+    x.size = size / sizeof (UniChar);
+    __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm);
+    FS_RESTORE();
+    return x.size;
+  }
+    /* buffer size query */
+    if (!size)
+        return strlen(s2);
+  while ((c = *s2++))
+  {
+    /* fill buffer as far as there is room */
+    register const char *psz = s2;
+    register unsigned char c;
+    while ((c = *psz) && size)
+    {
+        size--;
+        *s1++ = __libc_gLocaleCollate.auchWeight[c];
+        psz++;
+    }
+    /* if more input then skip to the end so we get the length right. */
+    if (c)
+        psz = strchr(psz + 1, '\0');
+    /* Append trailing zero, if there is space. */
     if (size)
+    {
+      *s1++ = __libc_gLocaleCollate.auchWeight [c];
+      size--;
+    }
+    ret++;
+  }
+  /* Append trailing zero, if there is space. */
+  if (size)
+    *s1 = 0;
+  return ret;
+        *s1 = '\0';
+    return psz - s2;
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 2066

Legend:

trunk/src/emx/src/lib/str/strxfrm.c

Download in other formats: