Changeset 186 for trunk/src


Ignore:
Timestamp:
Jul 6, 2002, 11:12:59 PM (23 years ago)
Author:
umoeller
Message:

Some Unicode fixes.

Location:
trunk/src/helpers
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/helpers/encodings.c

    r184 r186  
    4040
    4141#include "encodings\base.h"
    42 #include "encodings\alltables.h"
    43 // #include "encodings\collate.h"
     42
     43#include "encodings\unicase.h"
     44
     45#include "encodings\alltables.h"        // this takes a very long time
    4446
    4547#pragma hdrstop
     
    126128
    127129/*
     130 *@@ ENCCASEFOLD:
     131 *
     132 *@@added V0.9.20 (2002-07-03) [umoeller]
     133 */
     134
     135typedef struct _ENCCASEFOLD
     136{
     137    unsigned long   cEntries;
     138    unsigned long   aulFolds[1];
     139} ENCCASEFOLD, *PENCCASEFOLD;
     140
     141static PENCCASEFOLD G_pFold = NULL;
     142
     143/*
    128144 *@@ encGetTable:
    129145 *
     
    239255 *      systems (and Windows uses OS/2 codepage 1252),
    240256 *      so for conversion between those, codecs are needed.
     257 *
     258 *      This works and is presently used in WarpIN.
    241259 */
    242260
     
    325343 *      and sets the given pointer to NULL.
    326344 *
     345 *      This works and is presently used in WarpIN.
     346 *
    327347 *@@added V0.9.18 (2002-03-08) [umoeller]
    328348 */
     
    351371 *      with Unicode though.
    352372 *
     373 *      This works and is presently used in WarpIN.
     374 *
    353375 *@@added V0.9.18 (2002-03-08) [umoeller]
    354376 */
     
    374396 *      has no codepage equivalent.
    375397 *
     398 *      This works and is presently used in WarpIN.
     399 *
    376400 *@@added V0.9.18 (2002-03-08) [umoeller]
    377401 */
     
    409433 *      This returns 0 if **ppch points to a
    410434 *      null character.
     435 *
     436 *      This works and is presently used in WarpIN.
    411437 *
    412438 *@@added V0.9.14 (2001-08-09) [umoeller]
     
    511537}
    512538
    513 
     539/*
     540 *@@ CreateCaseFold:
     541 *      creates a casefold for later use with
     542 *      encToUpper.
     543 *
     544 *      This only uses one-byte sequences from
     545 *      the Unicode case folding table (see
     546 *      include\encodings\unicase.h), so this
     547 *      cannot be used for expanding characters
     548 *      at this point.
     549 *
     550 *      Returns 1 (TRUE) on success.
     551 *
     552 *      This works and is presently used in WarpIN.
     553 *
     554 *@@added V0.9.20 (2002-07-03) [umoeller]
     555 */
     556
     557int encInitCase(void)
     558{
     559    unsigned long   ul,
     560                    cEntries = 0,
     561                    cb;
     562
     563    for (ul = 0;
     564         ul < ARRAYITEMCOUNT(G_aCaseFolds);
     565         ++ul)
     566    {
     567        // ignore CASEFL_T (duplicate entries for i chars)
     568        // and CASEFL_F (expansions)
     569        if (    (G_aCaseFolds[ul].fl & (CASEFL_C | CASEFL_S))
     570             && (G_aCaseFolds[ul].ulLow > cEntries)
     571           )
     572            cEntries = G_aCaseFolds[ul].ulLow;
     573    }
     574
     575    cb = sizeof(ENCCASEFOLD) + cEntries * sizeof(unsigned long);
     576    if (G_pFold = (PENCCASEFOLD)malloc(cb))
     577    {
     578        memset(G_pFold, 0, cb);
     579        G_pFold->cEntries = cEntries;
     580
     581        for (ul = 0;
     582             ul < ARRAYITEMCOUNT(G_aCaseFolds);
     583             ++ul)
     584        {
     585            if (G_aCaseFolds[ul].fl & (CASEFL_C | CASEFL_S))
     586                G_pFold->aulFolds[G_aCaseFolds[ul].ulLow] = G_aCaseFolds[ul].c1;
     587        }
     588
     589        return 1;
     590    }
     591
     592    return 0;
     593}
     594
     595/*
     596 *@@ encToUpper:
     597 *      converts the given unicode character to
     598 *      upper case, if possible, or returns
     599 *      ulUni back if Unicode doesn't define
     600 *      an upper-case character for it.
     601 *
     602 *      Special cases:
     603 *
     604 *      --  Returns 0 for 0.
     605 *
     606 *      Preconditions:
     607 *
     608 *      --  You must call encInitCase before
     609 *          the first call.
     610 *
     611 *      This works and is presently used in WarpIN.
     612 *
     613 *@@added V0.9.20 (2002-07-03) [umoeller]
     614 */
     615
     616unsigned long encToUpper(unsigned long ulUni)
     617{
     618    unsigned long ulFold;
     619
     620    if (    (ulUni < G_pFold->cEntries)
     621         && (ulFold = G_pFold->aulFolds[ulUni])
     622       )
     623        return ulFold;
     624
     625    return ulUni;
     626}
     627
     628/*
     629 *@@ encicmp:
     630 *      like stricmp, but for UTF-8 strings.
     631 *      This uses encToUpper for the comparisons.
     632 *
     633 *      Like stricmp, this returns:
     634 *
     635 *      --  -1 if pcsz1 is less than pcsz2
     636 *      --  0 if pcsz1 is equal to pcsz2
     637 *      --  +1 if pcsz1 is greater than pcsz2
     638 *
     639 *      However, this does not crash on passing
     640 *      in NULL strings.
     641 *
     642 *      Preconditions:
     643 *
     644 *      --  You must call encInitCase before
     645 *          the first call.
     646 *
     647 *      This works and is presently used in WarpIN.
     648 *
     649 *@@added V0.9.20 (2002-07-03) [umoeller]
     650 */
     651
     652int encicmp(const char *pcsz1,
     653            const char *pcsz2)
     654{
     655    const char  *p1 = pcsz1,
     656                *p2 = pcsz2;
     657
     658    unsigned long ul1, ul2;
     659
     660    do
     661    {
     662        // encDecodeUTF8 returns null for null, so this is safe
     663        ul1 = encToUpper(encDecodeUTF8(&p1));
     664        ul2 = encToUpper(encDecodeUTF8(&p2));
     665
     666        if (ul1 < ul2)
     667            return -1;
     668        if (ul1 > ul2)
     669            return +1;
     670
     671        // both are equal: check for null bytes then
     672        if (!ul1)
     673            if (!ul2)
     674                return 0;
     675            else
     676                // ul1 is null, but ul2 isn't:
     677                return -1;
     678        else
     679            if (!ul2)
     680                // ul1 is not null, but ul2 is:
     681                return +1;
     682
     683        // both are non-null: continue
     684
     685    } while (1);
     686
     687    return 0;
     688}
     689
  • trunk/src/helpers/xml.c

    r169 r186  
    26012601}
    26022602
     2603#ifdef __DEBUG__
     2604
     2605/*
     2606 *@@ Dump:
     2607 *
     2608 *@@added V0.9.20 (2002-07-03) [umoeller]
     2609 */
     2610
     2611static VOID Dump(int iIndent,
     2612                 PDOMNODE pDomNode)
     2613{
     2614    PLISTNODE pChildNode;
     2615    int i;
     2616    for (i = 0;
     2617         i < iIndent;
     2618         ++i)
     2619    {
     2620        printf(" ");
     2621    }
     2622
     2623    switch (pDomNode->NodeBase.ulNodeType)
     2624    {
     2625        #define DUMPTYPE(t) case t: printf(#t); break;
     2626        DUMPTYPE(DOMNODE_ELEMENT)
     2627        DUMPTYPE(DOMNODE_ATTRIBUTE)
     2628        DUMPTYPE(DOMNODE_TEXT)
     2629        DUMPTYPE(DOMNODE_PROCESSING_INSTRUCTION)
     2630        DUMPTYPE(DOMNODE_COMMENT)
     2631        DUMPTYPE(DOMNODE_DOCUMENT)
     2632        DUMPTYPE(DOMNODE_DOCUMENT_TYPE)
     2633        DUMPTYPE(ELEMENTPARTICLE_EMPTY)
     2634        DUMPTYPE(ELEMENTPARTICLE_ANY)
     2635        DUMPTYPE(ELEMENTPARTICLE_MIXED)
     2636        DUMPTYPE(ELEMENTPARTICLE_CHOICE)
     2637        DUMPTYPE(ELEMENTPARTICLE_SEQ)
     2638        DUMPTYPE(ELEMENTPARTICLE_NAME)
     2639        DUMPTYPE(ATTRIBUTE_DECLARATION_BASE)
     2640        DUMPTYPE(ATTRIBUTE_DECLARATION)
     2641        DUMPTYPE(ATTRIBUTE_DECLARATION_ENUM)
     2642    }
     2643
     2644    printf(" \"%s\"\n", STRINGORNULL(pDomNode->NodeBase.strNodeName.psz));
     2645
     2646    ++iIndent;
     2647    for (pChildNode = lstQueryFirstNode(&pDomNode->llChildren);
     2648         pChildNode;
     2649         pChildNode = pChildNode->pNext)
     2650    {
     2651        Dump(iIndent, (PDOMNODE)pChildNode->pItemData);
     2652    }
     2653    --iIndent;
     2654}
     2655
     2656#endif
     2657
     2658/*
     2659 *@@ xmlDump:
     2660 *      debug function which dumps the DOM to stdout.
     2661 *
     2662 *@@added V0.9.20 (2002-07-03) [umoeller]
     2663 */
     2664
     2665VOID xmlDump(PXMLDOM pDom)
     2666{
     2667#ifdef __DEBUG__
     2668    if (!pDom)
     2669    {
     2670        printf(__FUNCTION__ ": pDom is NULL\n");
     2671        return;
     2672    }
     2673
     2674    printf(__FUNCTION__ ": dumping document node ");
     2675
     2676    Dump(0, (PDOMNODE)pDom->pDocumentNode);
     2677#endif
     2678}
     2679
    26032680/*
    26042681 *@@ xmlFreeDOM:
     
    27462823 *
    27472824 *@@added V0.9.11 (2001-04-22) [umoeller]
     2825 *@@changed V0.9.20 (2002-07-03) [umoeller]: this never worked with DTDs, fixed
    27482826 */
    27492827
     
    27572835       )
    27582836    {
    2759         return ((PDOMNODE)pListNode->pItemData);
     2837        // V0.9.20 (2002-07-03) [umoeller]:
     2838        // we can't just return the first node on the
     2839        // list, because if we have DTD, this might
     2840        // be the doctype... so loop until we find
     2841        // an element, which must be the root element
     2842        while (pListNode)
     2843        {
     2844            PDOMNODE pDomNode = (PDOMNODE)pListNode->pItemData;
     2845            if (pDomNode->NodeBase.ulNodeType == DOMNODE_ELEMENT)
     2846                return (pDomNode);
     2847
     2848            pListNode = pListNode->pNext;
     2849        }
    27602850    }
    27612851
     
    28432933                                  const char *pcszName)
    28442934{
    2845     // APIRET arc = NO_ERROR;
    2846 
    28472935    PLINKLIST pll = lstCreate(FALSE);       // no free
    28482936    if (pll)
Note: See TracChangeset for help on using the changeset viewer.