Changeset 2066


Ignore:
Timestamp:
Jun 23, 2005, 8:03:42 AM (20 years ago)
Author:
bird
Message:

Corrected the strxfrm() return values.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/emx/src/lib/str/strxfrm.c

    • Property cvs2svn:cvs-rev changed from 1.7 to 1.8
    r2065 r2066  
    1919struct __strxfrm_arg
    2020{
    21   char *out;
    22   size_t size;
     21    char *out;
     22    size_t size;
    2323};
    2424
    25 static int __uni_strxfrm (UniChar *ucs, void *arg)
     25static int __uni_strxfrm(UniChar *ucs, void *arg)
    2626{
    27   struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg;
     27    struct __strxfrm_arg *x = (struct __strxfrm_arg *)arg;
    2828
    29   /* BUG WARNING!
    30    * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns
    31    * one character less than it really fills in the buffer. I haven't
    32    * implemented any workaround for that first because it can be fixed
    33    * in the future and second because the information at the end of the
    34    * buffer seems very seldom really needed. UniStrxfrm generates a lot
    35    * of output, and every character in the input buffer generates three
    36    * characters in the output buffer: one wchar_t and two bytes that
    37    * seems to be related to character type (e.g. similar to character
    38    * flags isXXX() works with).
    39    */
     29    /* BUG WARNING!
     30     * As far as I've observed Unicode DLL has a bug that UniStrxfrm returns
     31     * one character less than it really fills in the buffer. I haven't
     32     * implemented any workaround for that first because it can be fixed
     33     * in the future and second because the information at the end of the
     34     * buffer seems very seldom really needed. UniStrxfrm generates a lot
     35     * of output, and every character in the input buffer generates three
     36     * characters in the output buffer: one wchar_t and two bytes that
     37     * seems to be related to character type (e.g. similar to character
     38     * flags isXXX() works with).
     39     */
    4040
    41   size_t rs = UniStrxfrm (__libc_gLocaleCollate.lobj, (UniChar *)x->out,
    42     ucs, x->size);
    43   /* rs is in UniChar's without trailing zero */
    44   rs *= sizeof (UniChar);
    45   if (rs < x->size)
    46   {
    47     /* The string returned by Unicode API often contain zero characters
    48        (in the top or bottom 8 bits of a Unicode character).
    49        This is inappropiate for MBCS strings, so we increment all
    50        character codes by one except code 0xff (which is very seldom
    51        encountered). There is no other way to represent a Unicode
    52        xfrm'ed string as a MBCS string, alas. */
     41    size_t rs = UniStrxfrm(__libc_gLocaleCollate.lobj, (UniChar *)x->out, ucs, x->size / sizeof(UniChar));
     42    if (!x->size)
     43        x->size = rs * sizeof(UniChar) + 3; /* We add three byte so we'll get enough space for a UniChar null terminator
     44                                               and any incorrect returns from UniStrxfrm. The caller will add 1 more byte for us.
     45                                               This means that we'll return less on the actual job, but that's hopefully acceptable. */
     46    else
     47    {
     48        /* rs is in UniChar's without trailing zero. */
     49        rs *= sizeof(UniChar);
     50        size_t size = x->size;
     51        x->size = rs;
     52        if (rs >= size)
     53            rs = size - 1;
    5354
    54     int i;
    55     for (i = 0; i < rs; i++)
    56       if (x->out [i] != -1)
    57         x->out [i]++;
    58     x->out [rs] = 0;
    59   }
    60   x->size = rs + 1; /* We need space for trailing zero too */
    61   return 0;
     55        /* The string returned by Unicode API often contain zero characters
     56           (in the top or bottom 8 bits of a Unicode character).
     57           This is inappropiate for MBCS strings, so we increment all
     58           character codes by one except code 0xff (which is very seldom
     59           encountered). There is no other way to represent a Unicode
     60           xfrm'ed string as a MBCS string, alas. */
     61
     62        for (int i = 0; i < rs; i++)
     63            if (x->out[i] != -1)
     64                x->out[i]++;
     65        x->out[rs] = '\0';
     66    }
     67    return 0;
    6268}
    6369
     
    6571size_t _STD(strxfrm) (char *s1, const char *s2, size_t size)
    6672{
    67   unsigned char c;
    68   size_t ret = 1; /* We need at least space for trailing zero */
     73    if (__libc_gLocaleCollate.mbcs)
     74    {
     75        /* When using MBCS codepaes, we will convert the entire string to
     76           Unicode and then apply the UniStrxfrm() function. The output strings
     77           can be much longer than the original in this case, but if user program
     78           is correctly written, it will work since strxfrm will return the
     79           required output string length. */
     80        struct __strxfrm_arg x;
     81        FS_VAR();
     82        FS_SAVE_LOAD();
     83        x.out = s1;
     84        x.size = size;
     85        __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm);
     86        FS_RESTORE();
     87        return x.size;
     88    }
    6989
    70   if (__libc_gLocaleCollate.mbcs)
    71   {
    72     /* When using MBCS codepaes, we will convert the entire string to
    73        Unicode and then apply the UniStrxfrm() function. The output strings
    74        can be much longer than the original in this case, but if user program
    75        is correctly written, it will work since strxfrm will return the
    76        required output string length. */
    77     struct __strxfrm_arg x;
    78     FS_VAR();
    79     FS_SAVE_LOAD();
    80     x.out = s1;
    81     x.size = size / sizeof (UniChar);
    82     __libc_ucs2Do (__libc_gLocaleCollate.uobj, (char *)s2, &x, __uni_strxfrm);
    83     FS_RESTORE();
    84     return x.size;
    85   }
     90    /* buffer size query */
     91    if (!size)
     92        return strlen(s2);
    8693
    87   while ((c = *s2++))
    88   {
     94    /* fill buffer as far as there is room */
     95    register const char *psz = s2;
     96    register unsigned char c;
     97    while ((c = *psz) && size)
     98    {
     99        size--;
     100        *s1++ = __libc_gLocaleCollate.auchWeight[c];
     101        psz++;
     102    }
     103
     104    /* if more input then skip to the end so we get the length right. */
     105    if (c)
     106        psz = strchr(psz + 1, '\0');
     107
     108    /* Append trailing zero, if there is space. */
    89109    if (size)
    90     {
    91       *s1++ = __libc_gLocaleCollate.auchWeight [c];
    92       size--;
    93     }
    94     ret++;
    95   }
    96 
    97   /* Append trailing zero, if there is space. */
    98   if (size)
    99     *s1 = 0;
    100 
    101   return ret;
     110        *s1 = '\0';
     111    return psz - s2;
    102112}
Note: See TracChangeset for help on using the changeset viewer.