Changeset 97 for trunk/src/helpers/xml.c
- Timestamp:
- Aug 12, 2001, 5:34:51 PM (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/helpers/xml.c
r91 r97 136 136 137 137 #include "helpers\linklist.h" 138 #include "helpers\standards.h" 138 139 #include "helpers\stringh.h" 139 140 #include "helpers\tree.h" … … 171 172 const char* xmlDescribeError(int code) 172 173 { 173 s tatic const char *message[] =174 switch (code) 174 175 { 175 176 // start of expat (parser) errors 176 "Out of memory", 177 "Syntax error", 178 "No element found", 179 "Not well-formed (invalid token)", 180 "Unclosed token", 181 "Unclosed token", 182 "Mismatched tag", 183 "Duplicate attribute", 184 "Junk after root element", 185 "Illegal parameter entity reference", 186 "Undefined entity", 187 "Recursive entity reference", 188 "Asynchronous entity", 189 "Reference to invalid character number", 190 "Reference to binary entity", 191 "Reference to external entity in attribute", 192 "XML processing instruction not at start of external entity", 193 "Unknown encoding", 194 "Encoding specified in XML declaration is incorrect", 195 "Unclosed CDATA section", 196 "Error in processing external entity reference", 197 "Document is not standalone", 198 "Unexpected parser state - please send a bug report", 177 case ERROR_EXPAT_NO_MEMORY: 178 return ("Out of memory"); 179 180 case ERROR_EXPAT_SYNTAX: 181 return ("Syntax error"); 182 case ERROR_EXPAT_NO_ELEMENTS: 183 return ("No element found"); 184 case ERROR_EXPAT_INVALID_TOKEN: 185 return ("Not well-formed (invalid token)"); 186 case ERROR_EXPAT_UNCLOSED_TOKEN: 187 return ("Unclosed token"); 188 case ERROR_EXPAT_PARTIAL_CHAR: 189 return ("Unclosed token"); 190 case ERROR_EXPAT_TAG_MISMATCH: 191 return ("Mismatched tag"); 192 case ERROR_EXPAT_DUPLICATE_ATTRIBUTE: 193 return ("Duplicate attribute"); 194 case ERROR_EXPAT_JUNK_AFTER_DOC_ELEMENT: 195 return ("Junk after root element"); 196 case ERROR_EXPAT_PARAM_ENTITY_REF: 197 return ("Illegal parameter entity reference"); 198 case ERROR_EXPAT_UNDEFINED_ENTITY: 199 return ("Undefined entity"); 200 case ERROR_EXPAT_RECURSIVE_ENTITY_REF: 201 return ("Recursive entity reference"); 202 case ERROR_EXPAT_ASYNC_ENTITY: 203 return ("Asynchronous entity"); 204 case ERROR_EXPAT_BAD_CHAR_REF: 205 return ("Reference to invalid character number"); 206 case ERROR_EXPAT_BINARY_ENTITY_REF: 207 return ("Reference to binary entity"); 208 case ERROR_EXPAT_ATTRIBUTE_EXTERNAL_ENTITY_REF: 209 return ("Reference to external entity in attribute"); 210 case ERROR_EXPAT_MISPLACED_XML_PI: 211 return ("XML processing instruction not at start of external entity"); 212 case ERROR_EXPAT_UNKNOWN_ENCODING: 213 return ("Unknown encoding"); 214 case ERROR_EXPAT_INCORRECT_ENCODING: 215 return ("Encoding specified in XML declaration is incorrect"); 216 case ERROR_EXPAT_UNCLOSED_CDATA_SECTION: 217 return ("Unclosed CDATA section"); 218 case ERROR_EXPAT_EXTERNAL_ENTITY_HANDLING: 219 return ("Error in processing external entity reference"); 220 case ERROR_EXPAT_NOT_STANDALONE: 221 return ("Document is not standalone"); 222 case ERROR_EXPAT_UNEXPECTED_STATE: 223 return ("Unexpected parser state - please send a bug report"); 199 224 // end of expat (parser) errors 200 225 201 226 // start of validation errors 202 "Element has not been declared", 203 "Root element name does not match DOCTYPE name", 204 "Invalid or duplicate root element", 205 "Invalid sub-element in parent element", 206 "Duplicate element declaration", 207 "Duplicate attribute declaration", 208 "Undeclared attribute in element", 209 "Element cannot have content", 210 "Invalid attribute value", 211 "Required attribute is missing", 212 "Subelement in empty element", 213 214 "Parsing error", 215 "Validity error", 216 217 "DOM node type not supported", 218 "No DOM document", 219 "No DOM element", 220 "Duplicate doctype", 221 "Root element doesn't match doctype name", 222 "DOM integrity error", 223 "Duplicate attribute", 224 225 "Validation error: Undeclared element name", 226 "Element declaration outside doctype", 227 "Attlist declaration outside doctype" 228 }; 229 230 int code2 = code - ERROR_XML_FIRST; 231 232 if ( code2 >= 0 233 && code2 < sizeof(message) / sizeof(message[0]) 234 ) 235 return message[code2]; 236 237 return 0; 227 228 case ERROR_DOM_UNDECLARED_ELEMENT: 229 return ("Element has not been declared"); 230 case ERROR_DOM_ROOT_ELEMENT_MISNAMED: 231 return ("Root element name does not match DOCTYPE name"); 232 case ERROR_DOM_INVALID_ROOT_ELEMENT: 233 return ("Invalid or duplicate root element"); 234 235 case ERROR_DOM_INVALID_SUBELEMENT: 236 return ("Invalid sub-element in parent element"); 237 case ERROR_DOM_DUPLICATE_ELEMENT_DECL: 238 return ("Duplicate element declaration"); 239 case ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL: 240 return ("Duplicate attribute declaration"); 241 case ERROR_DOM_UNDECLARED_ATTRIBUTE: 242 return ("Undeclared attribute in element"); 243 case ERROR_ELEMENT_CANNOT_HAVE_CONTENT: 244 return ("Element cannot have content"); 245 case ERROR_DOM_INVALID_ATTRIB_VALUE: 246 return ("Invalid attribute value"); 247 case ERROR_DOM_REQUIRED_ATTRIBUTE_MISSING: 248 return ("Required attribute is missing"); 249 case ERROR_DOM_SUBELEMENT_IN_EMPTY_ELEMENT: 250 return ("Subelement in empty element"); 251 252 case ERROR_DOM_PARSING: 253 return ("Parsing error"); 254 case ERROR_DOM_VALIDITY: 255 return ("Validity error"); 256 257 case ERROR_DOM_NODETYPE_NOT_SUPPORTED: 258 return ("DOM node type not supported"); 259 case ERROR_DOM_NO_DOCUMENT: 260 return ("No DOM document"); 261 case ERROR_DOM_NO_ELEMENT: 262 return ("No DOM element"); 263 case ERROR_DOM_DUPLICATE_DOCTYPE: 264 return ("Duplicate doctype"); 265 case ERROR_DOM_DOCTYPE_ROOT_NAMES_MISMATCH: 266 return ("Root element doesn't match doctype name"); 267 case ERROR_DOM_INTEGRITY: 268 return ("DOM integrity error"); 269 case ERROR_DOM_DUPLICATE_ATTRIBUTE: 270 return ("Duplicate attribute"); 271 272 case ERROR_DOM_VALIDATE_INVALID_ELEMENT: 273 return ("Validation error: Undeclared element name"); 274 case ERROR_DOM_ELEMENT_DECL_OUTSIDE_DOCTYPE: 275 return ("Element declaration outside doctype"); 276 case ERROR_DOM_ATTLIST_DECL_OUTSIDE_DOCTYPE: 277 return ("Attlist declaration outside doctype"); 278 } 279 280 return NULL; 238 281 } 239 282 … … 291 334 * 292 335 *@@added V0.9.9 (2001-02-16) [umoeller] 336 *@@changed V0.9.14 (2001-08-09) [umoeller]: fixed map bug which caused the whole XML stuff to fail 293 337 */ 294 338 … … 297 341 { 298 342 return (strhcmp(((PXSTRING)ul1)->psz, 299 ((PXSTRING)ul 1)->psz));343 ((PXSTRING)ul2)->psz)); 300 344 } 301 345 … … 354 398 * 355 399 *@@added V0.9.9 (2001-02-16) [umoeller] 400 *@@changed V0.9.14 (2001-08-09) [umoeller]: fixed crash on string delete 356 401 */ 357 402 … … 501 546 lstClear(&llDeleteNodes); 502 547 503 xstr Free(((PXSTRING*)&pNode->Tree.ulKey));548 xstrClear(&pNode->strNodeName); 504 549 free(pNode); 505 550 } … … 557 602 } 558 603 559 arc = xmlCreateNodeBase(ulNodeType, 560 cb, 561 pcszNodeName, 562 ulNodeNameLength, 563 (PNODEBASE*)&pNewNode); 564 if (arc == NO_ERROR) 604 if (!(arc = xmlCreateNodeBase(ulNodeType, 605 cb, 606 pcszNodeName, 607 ulNodeNameLength, 608 (PNODEBASE*)&pNewNode))) 565 609 { 566 610 pNewNode->pParentNode = pParentNode; … … 635 679 PDOMNODE pNew = NULL; 636 680 APIRET arc = xmlCreateDomNode(pParent, 637 DOMNODE_ELEMENT,638 pcszElement,639 0,640 &pNew);681 DOMNODE_ELEMENT, 682 pcszElement, 683 0, 684 &pNew); 641 685 642 686 if (arc == NO_ERROR) … … 753 797 PDOMNODE pNew = NULL; 754 798 APIRET arc = xmlCreateDomNode(pParent, 755 DOMNODE_COMMENT,756 NULL,757 0,758 &pNew);799 DOMNODE_COMMENT, 800 NULL, 801 0, 802 &pNew); 759 803 if (arc == NO_ERROR) 760 804 { … … 1381 1425 1382 1426 /* 1427 *@@ UnknownEncodingHandler: 1428 * @expat handler called when the xml 1429 * @text_declaration has an @encoding that is not 1430 * one of the four encodings built into expat. 1431 * 1432 * See XML_SetUnknownEncodingHandler. 1433 * 1434 *@@added V0.9.14 (2001-08-09) [umoeller] 1435 */ 1436 1437 int EXPATENTRY UnknownEncodingHandler(void *pUserData, // in: out PXMLDOM really 1438 const XML_Char *pcszName, 1439 XML_Encoding *pEncoding) 1440 { 1441 PXMLDOM pDom = (PXMLDOM)pUserData; 1442 1443 ULONG ulCP; 1444 if ( (pDom->pfnGetCPData) // callback exists? 1445 && (!strncmp(pcszName, "cp", 2)) 1446 && (strlen(pcszName) > 4) // at least five chars (e.g. "cp850") 1447 && (ulCP = atoi(pcszName + 2)) 1448 ) 1449 { 1450 // this is a PC codepage: 1451 /* typedef struct _XML_Encoding 1452 { 1453 int map[256]; 1454 void *data; 1455 int (* EXPATENTRY convert)(void *data, const char *s); 1456 void (* EXPATENTRY release)(void *data); 1457 } XML_Encoding; */ 1458 1459 // ZERO(pEncoding); 1460 1461 pEncoding->convert = NULL; 1462 pEncoding->release = NULL; 1463 1464 memset(&pEncoding->map, -1, sizeof(pEncoding->map)); 1465 1466 if (pDom->pfnGetCPData(pDom, 1467 ulCP, 1468 pEncoding->map)) 1469 { 1470 // go check if there's any -1 chars left 1471 ULONG ul; 1472 for (ul = 0; 1473 ul < 256; 1474 ul++) 1475 { 1476 if (pEncoding->map[ul] < 0) 1477 xmlSetError(pDom, 1478 ERROR_DOM_INCOMPLETE_ENCODING_MAP, 1479 NULL, 1480 FALSE); 1481 } 1482 // return success 1483 return (1); 1484 } 1485 } 1486 1487 // error 1488 return (0); 1489 } 1490 1491 /* 1383 1492 *@@ StartElementHandler: 1384 1493 * @expat handler called when a new element is … … 1449 1558 &pAttrib); 1450 1559 1451 // shall we validate? 1452 if (pDom->pDocTypeNode) 1453 ValidateAttributeType(pDom, 1454 pAttrib, 1455 &pAttribDeclBase); 1560 if (pDom->arcDOM) 1561 xmlSetError(pDom, 1562 pDom->arcDOM, 1563 papcszAttribs[i], 1564 TRUE); // validation 1565 else 1566 // shall we validate? 1567 if (pDom->pDocTypeNode) 1568 ValidateAttributeType(pDom, 1569 pAttrib, 1570 &pAttribDeclBase); 1456 1571 } 1457 1572 … … 1524 1639 if (!pDom->arcDOM) 1525 1640 { 1526 // ULONG i;1527 1528 1641 if (len) 1529 1642 { … … 1535 1648 PDOMNODE pParent = pSI->pDomNode; 1536 1649 // pNew = NULL; 1650 1651 BOOL fIsWhitespace = FALSE; 1537 1652 1538 1653 // shall we validate? … … 1568 1683 ULONG ul; 1569 1684 const char *p = s; 1685 1686 if (pDom->flParserFlags & DF_DROP_WHITESPACE) 1687 fIsWhitespace = TRUE; 1688 1570 1689 for (ul = 0; 1571 1690 ul < len; … … 1578 1697 pParent->NodeBase.strNodeName.psz, 1579 1698 TRUE); 1699 fIsWhitespace = FALSE; 1580 1700 break; 1581 1701 } … … 1583 1703 } 1584 1704 } 1585 } 1705 1706 } // end if (pDom->pDocTypeNode) 1707 1708 if (!fIsWhitespace) 1709 // this is false if any of the following 1710 // is true: 1711 // -- we are not validating at all 1712 // -- we are validating, but the the element 1713 // can have mixed content 1714 // -- we are validating and the element does 1715 // _not_ have mixed content and DF_DROP_WHITESPACE 1716 // is set, but the string is whitespace only 1717 // --> drop it then 1586 1718 1587 1719 if (pDom->pLastWasTextNode) … … 1775 1907 * handler should be prepared to be called recursively. 1776 1908 * 1777 *@@added V0.9.9 (2001-02-14) [umoeller] 1778 */ 1779 1780 int EXPATENTRY ExternalEntityRefHandler(XML_Parser parser, 1909 *@@added V0.9.14 (2001-08-09) [umoeller] 1910 */ 1911 1912 int EXPATENTRY ExternalEntityRefHandler(void *pUserData, // in: our PXMLDOM really 1913 XML_Parser parser, 1781 1914 const XML_Char *pcszContext, 1782 1915 const XML_Char *pcszBase, … … 1784 1917 const XML_Char *pcszPublicId) 1785 1918 { 1786 int i = 1; 1787 1788 // @@todo: allow caller to load external references some way 1789 1790 /* PXMLDOM pDom = (PXMLDOM)pUserData; 1791 1792 // continue parsing only if we had no errors so far 1793 if (!pDom->arcDOM) 1794 { 1795 } */ 1919 PXMLDOM pDom = (PXMLDOM)pUserData; 1920 1921 int i = 0; // return error per default 1922 1923 APIRET arc = NO_ERROR; 1924 1925 // store the previous parser because 1926 // all the callbacks use the parser pointer 1927 XML_Parser pOldParser = pDom->pParser; 1928 pDom->pParser = NULL; 1929 1930 if ( (pDom->pfnExternalHandler) 1931 // create sub-parser and replace the one 1932 // in the DOM with it 1933 && (pDom->pParser = XML_ExternalEntityParserCreate(parser, 1934 pcszContext, 1935 "US-ASCII")) 1936 ) 1937 { 1938 if ((arc = pDom->pfnExternalHandler(pDom, 1939 pDom->pParser, 1940 pcszSystemId, 1941 pcszPublicId))) 1942 { 1943 // error: 1944 // now this needs special handling, since we're 1945 // dealing with a sub-handler here... 1946 1947 if (arc == -1) 1948 // parser error: well, then xmlSetError has been 1949 // called from somewhere in the callbacks already, 1950 // and we can safely ignore this 1951 ; 1952 else 1953 { 1954 pDom->arcDOM = arc; 1955 if (pcszSystemId) 1956 { 1957 if (!pDom->pxstrFailingNode) 1958 pDom->pxstrFailingNode = xstrCreate(0); 1959 xstrcpy(pDom->pxstrFailingNode, pcszSystemId, 0); 1960 } 1961 pDom->pcszErrorDescription = xmlDescribeError(arc); 1962 pDom->ulErrorLine = XML_GetCurrentLineNumber(pDom->pParser); 1963 pDom->ulErrorColumn = XML_GetCurrentColumnNumber(pDom->pParser); 1964 } 1965 } 1966 1967 i = 1; // success 1968 } 1969 else 1970 xmlSetError(pDom, 1971 (!arc) ? ERROR_DOM_INVALID_EXTERNAL_HANDLER : arc, 1972 NULL, 1973 FALSE); 1974 1975 if (pDom->pParser) 1976 XML_ParserFree(pDom->pParser); 1977 1978 pDom->pParser = pOldParser; 1796 1979 1797 1980 return (i); … … 2125 2308 * Pass the XMLDOM returned here to xmlParse afterwards. 2126 2309 * 2127 * ulFlags is any combination of the following: 2128 * 2129 * -- DF_PARSECOMMENTS: XML @comments are to be returned in 2130 * the DOM tree. Otherwise they are silently ignored. 2131 * 2132 * -- DF_PARSEDTD: add the @DTD of the document into the DOM tree 2133 * as well and validate the document, if a DTD was found. 2134 * Otherwise just parse and do not validate. 2135 * 2136 * -- DF_FAIL_IF_NO_DTD: fail if no @DTD was found. Useful 2137 * if you want to enforce validation. @@todo 2138 * 2139 * Usage: 2310 * Simplest possible usage: 2140 2311 * 2141 2312 * 1) Create a DOM instance. 2142 2313 * 2143 2314 + PXMLDOM pDom = NULL; 2144 + APIRET arc = xmlCreateDOM(flags, &pDom);2315 + APIRET arc = xmlCreateDOM(flags, NULL, NULL, NULL, &pDom); 2145 2316 + 2146 2317 * 2) Give chunks of data (or an entire buffer) … … 2164 2335 * 4) When done, call xmlFreeDOM, which will free all memory. 2165 2336 * 2337 * The above code has limitations: only a few character 2338 * @encodings are supported, and @external_entities are 2339 * silently ignored. 2340 * 2341 * This function supports a number of callbacks and flags 2342 * to allow for maximum flexibility. Note however that 2343 * not all @expat features are supported yet. 2344 * 2345 * The following callbacks can be specified (any of these 2346 * can be NULL): 2347 * 2348 * -- pfnGetCPData should be specified if you want to 2349 * support character @encodings other than the 2350 * four that built into expat itself (see 2351 * XML_SetUnknownEncodingHandler). This is probably 2352 * a good idea to do under OS/2 since most OS/2 2353 * documents are in a PC-specific codepage such as 2354 * CP 850. 2355 * 2356 * This callback must have the following prototype: 2357 * 2358 + int APIENTRY FNGETCPDATA(PXMLDOM pDom, ULONG ulCP, int *piMap) 2359 * 2360 * The callback will only be called once for each 2361 * document if the "encoding" attribute of the 2362 * XML @text_declaration starts with "cp" (e.g. 2363 * "cp850") and will then receives the following 2364 * parameters: 2365 * 2366 * -- "pDom" will be the XMLDOM created by this function. 2367 * 2368 * -- ulCP has the IBM code page number, such as "850". 2369 * 2370 * -- piMap is an array of 256 integer values which must 2371 * be filled with the callback. Each array item index 2372 * is the codepage value, and the value of each field 2373 * is the corresponding Unicode value, or -1 if the 2374 * character is invalid (shouldn't happen with codepages). 2375 * 2376 * For example, the German o-umlaut character is 2377 * 0x94 in CP850 and 0x00f6 in Unicode. So set 2378 * the int at index 0x94 to 0x00f6. 2379 * 2380 * pvCallbackUser is a user parameter which is simply stored 2381 * in the XMLDOM struct which is returned. Since the XMLDOM 2382 * is passed to all the callbacks, you can access that pointer 2383 * from them. 2384 * 2385 * flParserFlags is any combination of the following: 2386 * 2387 * -- DF_PARSECOMMENTS: XML @comments are to be returned in 2388 * the DOM tree. Otherwise they are discarded. 2389 * 2390 * -- DF_PARSEDTD: add the @DTD of the document into the DOM tree 2391 * as well and validate the document, if a DTD was found. 2392 * Otherwise just parse and do not validate. 2393 * 2394 * DF_PARSEDTD is required for external entities to work 2395 * also. 2396 * 2397 * -- DF_FAIL_IF_NO_DTD: fail if no @DTD was found. Useful 2398 * if you want to enforce validation. @@todo 2399 * 2400 * -- DF_DROP_WHITESPACE: discard all @whitespace for those 2401 * elements that can only have element content. Whitespace 2402 * will be preserved only for elements that can have 2403 * mixed content. -- If this flag is not set, all whitespace 2404 * is preserved. 2405 * 2166 2406 *@@added V0.9.9 (2001-02-14) [umoeller] 2167 */ 2168 2169 APIRET xmlCreateDOM(ULONG flParserFlags, 2170 PXMLDOM *ppDom) 2407 *@@changed V0.9.14 (2001-08-09) [umoeller]: added DF_DROP_WHITESPACE support 2408 */ 2409 2410 APIRET xmlCreateDOM(ULONG flParserFlags, // in: DF_* parser flags 2411 PFNGETCPDATA pfnGetCPData, // in: codepage callback or NULL 2412 PFNEXTERNALHANDLER pfnExternalHandler, // in: external entity callback or NULL 2413 PVOID pvCallbackUser, // in: user param for callbacks 2414 PXMLDOM *ppDom) // out: XMLDOM struct created 2171 2415 { 2172 2416 APIRET arc = NO_ERROR; … … 2180 2424 2181 2425 memset(pDom, 0, sizeof(XMLDOM)); 2426 2427 pDom->flParserFlags = flParserFlags; 2428 pDom->pfnGetCPData = pfnGetCPData; 2429 pDom->pfnExternalHandler = pfnExternalHandler; 2430 pDom->pvCallbackUser = pvCallbackUser; 2182 2431 2183 2432 lstInit(&pDom->llElementStack, … … 2207 2456 else 2208 2457 { 2458 if (pfnGetCPData) 2459 XML_SetUnknownEncodingHandler(pDom->pParser, 2460 UnknownEncodingHandler, 2461 pDom); // user data 2462 2463 XML_SetParamEntityParsing(pDom->pParser, 2464 XML_PARAM_ENTITY_PARSING_ALWAYS); 2465 2209 2466 XML_SetElementHandler(pDom->pParser, 2210 2467 StartElementHandler, … … 2221 2478 XML_SetCommentHandler(pDom->pParser, 2222 2479 CommentHandler); 2480 2481 if (pfnExternalHandler) 2482 XML_SetExternalEntityRefHandler(pDom->pParser, 2483 ExternalEntityRefHandler); 2223 2484 2224 2485 if (flParserFlags & DF_PARSEDTD) … … 2231 2492 NotationDeclHandler); 2232 2493 2233 XML_SetExternalEntityRefHandler(pDom->pParser,2234 ExternalEntityRefHandler);2235 2236 2494 XML_SetElementDeclHandler(pDom->pParser, 2237 2495 ElementDeclHandler); … … 2306 2564 else 2307 2565 { 2308 BOOL fSuccess = XML_Parse(pDom->pParser, 2309 pcszBuf, 2310 cb, 2311 fIsLast); 2312 2313 if (!fSuccess) 2566 // go parse then 2567 if (!XML_Parse(pDom->pParser, 2568 pcszBuf, 2569 cb, 2570 fIsLast)) 2314 2571 { 2315 2572 // expat parsing error: … … 2380 2637 2381 2638 xmlDeleteNode((PNODEBASE)pDom->pDocumentNode); 2639 2640 if (pDom->pxstrSystemID) 2641 xstrFree(&pDom->pxstrSystemID); 2642 if (pDom->pxstrFailingNode) 2643 xstrFree(&pDom->pxstrFailingNode); 2644 2645 lstClear(&pDom->llElementStack); 2382 2646 2383 2647 free(pDom);
Note:
See TracChangeset
for help on using the changeset viewer.