Ignore:
Timestamp:
Jul 6, 2002, 11:12:59 PM (23 years ago)
Author:
umoeller
Message:

Some Unicode fixes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/helpers/encodings.c

    r184 r186  
    4040
    4141#include "encodings\base.h"
    42 #include "encodings\alltables.h"
    43 // #include "encodings\collate.h"
     42
     43#include "encodings\unicase.h"
     44
     45#include "encodings\alltables.h"        // this takes a very long time
    4446
    4547#pragma hdrstop
     
    126128
    127129/*
     130 *@@ ENCCASEFOLD:
     131 *
     132 *@@added V0.9.20 (2002-07-03) [umoeller]
     133 */
     134
     135typedef struct _ENCCASEFOLD
     136{
     137    unsigned long   cEntries;
     138    unsigned long   aulFolds[1];
     139} ENCCASEFOLD, *PENCCASEFOLD;
     140
     141static PENCCASEFOLD G_pFold = NULL;
     142
     143/*
    128144 *@@ encGetTable:
    129145 *
     
    239255 *      systems (and Windows uses OS/2 codepage 1252),
    240256 *      so for conversion between those, codecs are needed.
     257 *
     258 *      This works and is presently used in WarpIN.
    241259 */
    242260
     
    325343 *      and sets the given pointer to NULL.
    326344 *
     345 *      This works and is presently used in WarpIN.
     346 *
    327347 *@@added V0.9.18 (2002-03-08) [umoeller]
    328348 */
     
    351371 *      with Unicode though.
    352372 *
     373 *      This works and is presently used in WarpIN.
     374 *
    353375 *@@added V0.9.18 (2002-03-08) [umoeller]
    354376 */
     
    374396 *      has no codepage equivalent.
    375397 *
     398 *      This works and is presently used in WarpIN.
     399 *
    376400 *@@added V0.9.18 (2002-03-08) [umoeller]
    377401 */
     
    409433 *      This returns 0 if **ppch points to a
    410434 *      null character.
     435 *
     436 *      This works and is presently used in WarpIN.
    411437 *
    412438 *@@added V0.9.14 (2001-08-09) [umoeller]
     
    511537}
    512538
    513 
     539/*
     540 *@@ CreateCaseFold:
     541 *      creates a casefold for later use with
     542 *      encToUpper.
     543 *
     544 *      This only uses one-byte sequences from
     545 *      the Unicode case folding table (see
     546 *      include\encodings\unicase.h), so this
     547 *      cannot be used for expanding characters
     548 *      at this point.
     549 *
     550 *      Returns 1 (TRUE) on success.
     551 *
     552 *      This works and is presently used in WarpIN.
     553 *
     554 *@@added V0.9.20 (2002-07-03) [umoeller]
     555 */
     556
     557int encInitCase(void)
     558{
     559    unsigned long   ul,
     560                    cEntries = 0,
     561                    cb;
     562
     563    for (ul = 0;
     564         ul < ARRAYITEMCOUNT(G_aCaseFolds);
     565         ++ul)
     566    {
     567        // ignore CASEFL_T (duplicate entries for i chars)
     568        // and CASEFL_F (expansions)
     569        if (    (G_aCaseFolds[ul].fl & (CASEFL_C | CASEFL_S))
     570             && (G_aCaseFolds[ul].ulLow > cEntries)
     571           )
     572            cEntries = G_aCaseFolds[ul].ulLow;
     573    }
     574
     575    cb = sizeof(ENCCASEFOLD) + cEntries * sizeof(unsigned long);
     576    if (G_pFold = (PENCCASEFOLD)malloc(cb))
     577    {
     578        memset(G_pFold, 0, cb);
     579        G_pFold->cEntries = cEntries;
     580
     581        for (ul = 0;
     582             ul < ARRAYITEMCOUNT(G_aCaseFolds);
     583             ++ul)
     584        {
     585            if (G_aCaseFolds[ul].fl & (CASEFL_C | CASEFL_S))
     586                G_pFold->aulFolds[G_aCaseFolds[ul].ulLow] = G_aCaseFolds[ul].c1;
     587        }
     588
     589        return 1;
     590    }
     591
     592    return 0;
     593}
     594
     595/*
     596 *@@ encToUpper:
     597 *      converts the given unicode character to
     598 *      upper case, if possible, or returns
     599 *      ulUni back if Unicode doesn't define
     600 *      an upper-case character for it.
     601 *
     602 *      Special cases:
     603 *
     604 *      --  Returns 0 for 0.
     605 *
     606 *      Preconditions:
     607 *
     608 *      --  You must call encInitCase before
     609 *          the first call.
     610 *
     611 *      This works and is presently used in WarpIN.
     612 *
     613 *@@added V0.9.20 (2002-07-03) [umoeller]
     614 */
     615
     616unsigned long encToUpper(unsigned long ulUni)
     617{
     618    unsigned long ulFold;
     619
     620    if (    (ulUni < G_pFold->cEntries)
     621         && (ulFold = G_pFold->aulFolds[ulUni])
     622       )
     623        return ulFold;
     624
     625    return ulUni;
     626}
     627
     628/*
     629 *@@ encicmp:
     630 *      like stricmp, but for UTF-8 strings.
     631 *      This uses encToUpper for the comparisons.
     632 *
     633 *      Like stricmp, this returns:
     634 *
     635 *      --  -1 if pcsz1 is less than pcsz2
     636 *      --  0 if pcsz1 is equal to pcsz2
     637 *      --  +1 if pcsz1 is greater than pcsz2
     638 *
     639 *      However, this does not crash on passing
     640 *      in NULL strings.
     641 *
     642 *      Preconditions:
     643 *
     644 *      --  You must call encInitCase before
     645 *          the first call.
     646 *
     647 *      This works and is presently used in WarpIN.
     648 *
     649 *@@added V0.9.20 (2002-07-03) [umoeller]
     650 */
     651
     652int encicmp(const char *pcsz1,
     653            const char *pcsz2)
     654{
     655    const char  *p1 = pcsz1,
     656                *p2 = pcsz2;
     657
     658    unsigned long ul1, ul2;
     659
     660    do
     661    {
     662        // encDecodeUTF8 returns null for null, so this is safe
     663        ul1 = encToUpper(encDecodeUTF8(&p1));
     664        ul2 = encToUpper(encDecodeUTF8(&p2));
     665
     666        if (ul1 < ul2)
     667            return -1;
     668        if (ul1 > ul2)
     669            return +1;
     670
     671        // both are equal: check for null bytes then
     672        if (!ul1)
     673            if (!ul2)
     674                return 0;
     675            else
     676                // ul1 is null, but ul2 isn't:
     677                return -1;
     678        else
     679            if (!ul2)
     680                // ul1 is not null, but ul2 is:
     681                return +1;
     682
     683        // both are non-null: continue
     684
     685    } while (1);
     686
     687    return 0;
     688}
     689
Note: See TracChangeset for help on using the changeset viewer.