source: trunk/src/helpers/xml.c@ 38

Last change on this file since 38 was 38, checked in by umoeller, 25 years ago

Updates to XML.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 67.3 KB
Line 
1
2/*
3 *@@sourcefile xml.c:
4 * XML document handling.
5 *
6 * XML support in the XWorkplace Helpers is broken into two
7 * layers:
8 *
9 * -- The bottom layer is implemented by @expat, which I have
10 * ported and hacked to the xwphelpers. See xmlparse.c for
11 * an introduction.
12 *
13 * -- Because expat requires so many callbacks and is non-validating,
14 * I have added a top layer above the expat library
15 * which is vaguely modelled after the Document Object Model
16 * (DOM) standardized by the W3C. That's this file.
17 *
18 * <B>XML</B>
19 *
20 * In order to understand XML myself, I have written a couple of
21 * glossary entries for the complex XML terminology. See @XML
22 * for a start.
23 *
24 * <B>Document Object Model (DOM)</B>
25 *
26 * See @DOM for a general introduction.
27 *
28 * DOM really calls for object oriented programming so the various
29 * structs can inherit from each other. Since this implementation
30 * was supposed to be a C-only interface, we cannot implement
31 * inheritance at the language level. Instead, each XML document
32 * is broken up into a tree of node structures only (see _DOMNODE),
33 * each of which has a special type. The W3C DOM allows this
34 * (and calls this the "flattened" view, as opposed to the
35 * "inheritance view").
36 *
37 * The W3C DOM specification prescribes tons of methods, which I
38 * really had no use for, so I didn't implement them. This implementation
39 * is only a DOM insofar as it uses nodes which represent @documents,
40 * @elements, @attributes, @comments, and @processing_instructions.
41 *
42 * Most notably, there are the following differences:
43 *
44 * -- Not all node types are implemented. See _DOMNODE for
45 * the supported types.
46 *
47 * -- Only a small subset of the standardized methods is implemented,
48 * and they are called differently to adhere to the xwphelpers
49 * conventions.
50 *
51 * -- DOM uses UTF-16 for its DOMString type. @expat gives UTF-8
52 * strings to all the handlers though, so all data in the DOM nodes
53 * is UTF-8 encoded. This still needs to be fixed.
54 *
55 * -- DOM defines the DOMException class. This isn't supported in C.
56 * Instead, we use special error codes which add to the standard
57 * OS/2 error codes (APIRET). All our error codes are >= 40000
58 * to avoid conflicts.
59 *
60 * It shouldn't be too difficult to write a C++ encapsulation
61 * of this though which fully implements all the DOM methods.
62 *
63 * However, we do implement node management as in the standard.
64 * See xmlCreateNode and xmlDeleteNode.
65 *
66 * The main entry point into this is xmlCreateDOM. See remarks
67 * there for details.
68 *
69 * <B>Validation</B>
70 *
71 * @expat doesn't check XML documents for whether they are @valid.
72 * In other words, expat is a non-validating XML processor.
73 *
74 * By contrast, this pseudo-DOM implementation can validate. To
75 * do this, you must pass DF_PARSEDTD to xmlCreateDOM (otherwise
76 * the @DTD entries will not be stored in the DOM nodes). This
77 * will not validate yet; to do this, explicitly call xmlValidate.
78 *
79 *@@header "helpers\xml.h"
80 *@@added V0.9.6 (2000-10-29) [umoeller]
81 */
82
83/*
84 * Copyright (C) 2000-2001 Ulrich M”ller.
85 * This file is part of the "XWorkplace helpers" source package.
86 * This is free software; you can redistribute it and/or modify
87 * it under the terms of the GNU General Public License as published
88 * by the Free Software Foundation, in version 2 as it comes in the
89 * "COPYING" file of the XWorkplace main distribution.
90 * This program is distributed in the hope that it will be useful,
91 * but WITHOUT ANY WARRANTY; without even the implied warranty of
92 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
93 * GNU General Public License for more details.
94 */
95
96#define OS2EMX_PLAIN_CHAR
97 // this is needed for "os2emx.h"; if this is defined,
98 // emx will define PSZ as _signed_ char, otherwise
99 // as unsigned char
100
101#define INCL_DOSERRORS
102#include <os2.h>
103
104#include <stdlib.h>
105#include <string.h>
106
107#include "setup.h" // code generation and debugging options
108
109#include "expat\expat.h"
110
111#include "helpers\linklist.h"
112#include "helpers\stringh.h"
113#include "helpers\tree.h"
114#include "helpers\xstring.h"
115#include "helpers\xml.h"
116
117#pragma hdrstop
118
119/*
120 *@@category: Helpers\C helpers\XML
121 * see xml.c.
122 */
123
124/*
125 *@@category: Helpers\C helpers\XML\Document Object Model (DOM)
126 * see xml.c.
127 */
128
129/* ******************************************************************
130 *
131 * Generic methods
132 *
133 ********************************************************************/
134
135/*
136 *@@ CompareCMNodeNodes:
137 * tree comparison func for CMNodes.
138 * This works for all trees which contain structures
139 * whose first item is a _NODEBASE because NODEBASE's first
140 * member is a TREE.
141 *
142 * Used in two places:
143 *
144 * -- to insert _CMELEMENTDECLNODE nodes into
145 * _DOMDOCTYPENODE.ElementDeclsTree;
146 *
147 * -- to insert _CMELEMENTPARTICLE nodes into
148 * _CMELEMENTDECLNODE.ElementNamesTree.
149 *
150 *@@added V0.9.9 (2001-02-16) [umoeller]
151 */
152
153int CompareCMNodeNodes(TREE *t1,
154 TREE *t2)
155{
156 PNODEBASE p1 = (PNODEBASE)t1,
157 p2 = (PNODEBASE)t2;
158 return (strhcmp(p1->strNodeName.psz, p2->strNodeName.psz));
159}
160
161/*
162 *@@ CompareCMNodeNodes:
163 * tree comparison func for element declarations.
164 * Used to find nodes in _DOMDOCTYPENODE.ElementDeclsTree.
165 *
166 *@@added V0.9.9 (2001-02-16) [umoeller]
167 */
168
169int CompareCMNodeData(TREE *t1,
170 void *pData)
171{
172 PNODEBASE p1 = (PNODEBASE)t1;
173 return (strhcmp(p1->strNodeName.psz, (const char*)pData));
174}
175
176/*
177 *@@ xmlCreateNode:
178 * creates a new DOMNODE with the specified
179 * type and parent. Other than that, the
180 * node fields are zeroed.
181 *
182 * If pParentNode is specified (which is required,
183 * unless you are creating a document node),
184 * its children list is automatically updated
185 * (unless this is an attribute node, which updates
186 * the attributes map).
187 *
188 * This returns the following errors:
189 *
190 * -- ERROR_NOT_ENOUGH_MEMORY
191 *
192 * -- ERROR_DOM_NOT_SUPPORTED: invalid ulNodeType
193 * specified.
194 *
195 * -- ERROR_DOM_WRONG_DOCUMENT: cannot find the
196 * document for this node. This happens if you do
197 * not have a document node at the root of your tree.
198 */
199
200APIRET xmlCreateNode(PDOMNODE pParentNode, // in: parent node or NULL if root
201 ULONG ulNodeType, // in: DOMNODE_* type
202 PDOMNODE *ppNew) // out: new node
203{
204 PDOMNODE pNewNode = NULL;
205 APIRET arc = NO_ERROR;
206
207 ULONG cb = 0;
208
209 switch (ulNodeType)
210 {
211 case DOMNODE_DOCUMENT:
212 cb = sizeof(DOMDOCUMENTNODE);
213 break;
214
215 case DOMNODE_DOCUMENT_TYPE:
216 cb = sizeof(DOMDOCTYPENODE);
217 break;
218
219 default:
220 cb = sizeof(DOMNODE);
221 break;
222 }
223
224 pNewNode = (PDOMNODE)malloc(cb);
225
226 if (!pNewNode)
227 arc = ERROR_NOT_ENOUGH_MEMORY;
228 else
229 {
230 memset(pNewNode, 0, cb);
231 pNewNode->NodeBase.ulNodeType = ulNodeType;
232 xstrInit(&pNewNode->NodeBase.strNodeName, 0);
233 pNewNode->pParentNode = pParentNode;
234
235 if (pParentNode)
236 {
237 // parent specified:
238 // check if this is an attribute
239 if (ulNodeType == DOMNODE_ATTRIBUTE)
240 {
241 // attribute:
242 // add to parent's attributes list
243 if (treeInsertNode(&pParentNode->AttributesMap,
244 (TREE*)pNewNode,
245 CompareCMNodeNodes,
246 FALSE) // no duplicates
247 == TREE_DUPLICATE)
248 arc = ERROR_DOM_DUPLICATE_ATTRIBUTE;
249 // shouldn't happen, because expat takes care of this
250 }
251 else
252 // append this new node to the parent's
253 // list of child nodes
254 lstAppendItem(&pParentNode->llChildren,
255 pNewNode);
256
257 if (!arc)
258 {
259 // set document pointer...
260 // if the parent node has a document pointer,
261 // we can copy that
262 if (pParentNode->pDocumentNode)
263 pNewNode->pDocumentNode = pParentNode->pDocumentNode;
264 else
265 // parent has no document pointer: then it is probably
266 // the document itself... check
267 if (pParentNode->NodeBase.ulNodeType == DOMNODE_DOCUMENT)
268 pNewNode->pDocumentNode = pParentNode;
269 else
270 arc = ERROR_DOM_NO_DOCUMENT;
271 }
272 }
273
274 lstInit(&pNewNode->llChildren, FALSE);
275 treeInit(&pNewNode->AttributesMap);
276 }
277
278 if (!arc)
279 *ppNew = pNewNode;
280 else
281 if (pNewNode)
282 free(pNewNode);
283
284 return (arc);
285}
286
287/*
288 *@@ xmlDeleteNode:
289 * deletes the specified node and updates the
290 * parent node's children list.
291 *
292 * If the node has child nodes, all of them are deleted
293 * as well. This recurses, if necessary.
294 *
295 * As a result, if the node is a document node, this
296 * deletes an entire document, including all of its
297 * child nodes.
298 *
299 * This returns the following errors:
300 *
301 * -- ERROR_DOM_NOT_FOUND
302 */
303
304APIRET xmlDeleteNode(PDOMNODE pNode)
305{
306 ULONG ulrc = 0;
307
308 if (!pNode)
309 {
310 ulrc = ERROR_INVALID_PARAMETER;
311 }
312 else
313 {
314 PLISTNODE pNodeThis;
315
316 // recurse into child nodes
317 while (pNodeThis = lstQueryFirstNode(&pNode->llChildren))
318 // recurse!!
319 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
320 // this updates llChildren
321
322 // recurse into attribute nodes
323 // while (pNodeThis = lstQueryFirstNode(&pNode->llAttributes))
324 // recurse!! ###
325 // xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
326 // this updates llAttributes
327
328 if (pNode->pParentNode)
329 {
330 // node has a parent:
331 if (pNode->NodeBase.ulNodeType == DOMNODE_ATTRIBUTE)
332 // this is an attribute:
333 // remove from parent's attributes map
334 treeDelete(&pNode->pParentNode->AttributesMap,
335 (TREE*)pNode);
336 else
337 // remove this node from the parent's list
338 // of child nodes before deleting this node
339 lstRemoveItem(&pNode->pParentNode->llChildren,
340 pNode);
341
342 pNode->pParentNode = NULL;
343 }
344
345 xstrClear(&pNode->NodeBase.strNodeName);
346 xstrFree(pNode->pstrNodeValue);
347
348 lstClear(&pNode->llChildren);
349 // lstClear(&pNode->llAttributes); ###
350
351 free(pNode);
352 }
353
354 return (ulrc);
355}
356
357/*
358 *@@ xmlGetFirstChild:
359 * returns the first child node of pDomNode.
360 * See _DOMNODE for what a "child" can be for the
361 * various node types.
362 *
363 *@@added V0.9.9 (2001-02-14) [umoeller]
364 */
365
366PDOMNODE xmlGetFirstChild(PDOMNODE pDomNode)
367{
368 PLISTNODE pListNode = lstQueryFirstNode(&pDomNode->llChildren);
369 if (pListNode)
370 return ((PDOMNODE)pListNode->pItemData);
371
372 return (0);
373}
374
375/*
376 *@@ xmlGetLastChild:
377 * returns the last child node of pDomNode.
378 * See _DOMNODE for what a "child" can be for the
379 * various node types.
380 *
381 *@@added V0.9.9 (2001-02-14) [umoeller]
382 */
383
384PDOMNODE xmlGetLastChild(PDOMNODE pDomNode)
385{
386 PLISTNODE pListNode = lstQueryLastNode(&pDomNode->llChildren);
387 if (pListNode)
388 return ((PDOMNODE)pListNode->pItemData);
389
390 return (0);
391}
392
393/*
394 *@@ xmlDescribeError:
395 * returns a string describing the error corresponding to code.
396 * The code should be one of the enums that can be returned from
397 * XML_GetErrorCode.
398 *
399 *@@changed V0.9.9 (2001-02-14) [umoeller]: adjusted for new error codes
400 *@@changed V0.9.9 (2001-02-16) [umoeller]: moved this here from xmlparse.c
401 */
402
403const char* xmlDescribeError(int code)
404{
405 static const char *message[] =
406 {
407 // start of expat (parser) errors
408 "Out of memory",
409 "Syntax error",
410 "No element found",
411 "Not well-formed (invalid token)",
412 "Unclosed token",
413 "Unclosed token",
414 "Mismatched tag",
415 "Duplicate attribute",
416 "Junk after root element",
417 "Illegal parameter entity reference",
418 "Undefined entity",
419 "Recursive entity reference",
420 "Asynchronous entity",
421 "Reference to invalid character number",
422 "Reference to binary entity",
423 "Reference to external entity in attribute",
424 "XML processing instruction not at start of external entity",
425 "Unknown encoding",
426 "Encoding specified in XML declaration is incorrect",
427 "Unclosed CDATA section",
428 "Error in processing external entity reference",
429 "Document is not standalone",
430 "Unexpected parser state - please send a bug report",
431 // end of expat (parser) errors
432
433 // start of validation errors
434 "Element has not been declared",
435 "Root element name does not match DOCTYPE name",
436 "Invalid or duplicate root element",
437 "Invalid sub-element in parent element",
438 "Duplicate element declaration",
439 "Duplicate attribute declaration",
440 "Undeclared attribute in element"
441 };
442
443 int code2 = code - ERROR_XML_FIRST;
444
445 if ( code2 >= 0
446 && code2 < sizeof(message) / sizeof(message[0])
447 )
448 return message[code2];
449
450 return 0;
451}
452
453/*
454 *@@ xmlSetError:
455 * sets the DOM's error state and stores error information
456 * and parser position.
457 *
458 *@@added V0.9.9 (2001-02-16) [umoeller]
459 */
460
461VOID xmlSetError(PXMLDOM pDom,
462 APIRET arc,
463 const char *pcszFailing,
464 BOOL fValidityError) // in: if TRUE, this is a validation error;
465 // if FALSE, this is a parser error
466{
467 pDom->arcDOM = arc;
468 pDom->pcszErrorDescription = xmlDescribeError(pDom->arcDOM);
469 pDom->ulErrorLine = XML_GetCurrentLineNumber(pDom->pParser);
470 pDom->ulErrorColumn = XML_GetCurrentColumnNumber(pDom->pParser);
471
472 if (pcszFailing)
473 {
474 if (!pDom->pxstrFailingNode)
475 pDom->pxstrFailingNode = xstrCreate(0);
476
477 xstrcpy(pDom->pxstrFailingNode, pcszFailing, 0);
478 }
479
480 if (fValidityError)
481 pDom->fInvalid = TRUE;
482}
483
484/* ******************************************************************
485 *
486 * Specific DOM node methods
487 *
488 ********************************************************************/
489
490/*
491 *@@ xmlCreateElementNode:
492 * creates a new element node with the specified name.
493 *
494 *@@added V0.9.9 (2001-02-14) [umoeller]
495 */
496
497APIRET xmlCreateElementNode(PDOMNODE pParent, // in: parent node (either document or element)
498 const char *pcszElement, // in: element name (null-terminated)
499 PDOMNODE *ppNew)
500{
501 PDOMNODE pNew = NULL;
502 APIRET arc = xmlCreateNode(pParent,
503 DOMNODE_ELEMENT,
504 &pNew);
505
506 if (arc == NO_ERROR)
507 {
508 xstrcpy(&pNew->NodeBase.strNodeName, pcszElement, 0);
509
510 *ppNew = pNew;
511 }
512
513 return (arc);
514}
515
516/*
517 *@@ xmlCreateAttributeNode:
518 * creates a new attribute node with the specified data.
519 *
520 * NOTE: Attributes have no "parent" node, technically.
521 * They are added to a special, separate list in @DOM_ELEMENT
522 * nodes.
523 *
524 * This returns the following errors:
525 *
526 * -- Error codes from xmlCreateNode.
527 *
528 * -- ERROR_DOM_NO_ELEMENT: pElement is invalid or does
529 * not point to an @DOM_ELEMENT node.
530 *
531 *@@added V0.9.9 (2001-02-14) [umoeller]
532 */
533
534APIRET xmlCreateAttributeNode(PDOMNODE pElement, // in: element node
535 const char *pcszName, // in: attribute name (null-terminated)
536 const char *pcszValue, // in: attribute value (null-terminated)
537 PDOMNODE *ppNew)
538{
539 APIRET arc = NO_ERROR;
540
541 if ( !pElement
542 || pElement->NodeBase.ulNodeType != DOMNODE_ELEMENT
543 )
544 arc = ERROR_DOM_NO_ELEMENT;
545 else
546 {
547 PDOMNODE pNew = NULL;
548 arc = xmlCreateNode(pElement, // this takes care of adding to the list
549 DOMNODE_ATTRIBUTE,
550 &pNew);
551 if (arc == NO_ERROR)
552 {
553 xstrcpy(&pNew->NodeBase.strNodeName, pcszName, 0);
554 pNew->pstrNodeValue = xstrCreate(0);
555 xstrcpy(pNew->pstrNodeValue, pcszValue, 0);
556
557 *ppNew = pNew;
558 }
559 }
560
561 return (arc);
562}
563
564/*
565 *@@ xmlCreateTextNode:
566 * creates a new text node with the specified content.
567 *
568 * Note: This differs from the createText method
569 * as specified by DOM, which has no ulLength parameter.
570 * We need this for speed with @expat though.
571 *
572 *@@added V0.9.9 (2001-02-14) [umoeller]
573 */
574
575APIRET xmlCreateTextNode(PDOMNODE pParent, // in: parent element node
576 const char *pcszText, // in: ptr to start of text
577 ULONG ulLength, // in: length of *pcszText
578 PDOMNODE *ppNew)
579{
580 PDOMNODE pNew = NULL;
581 APIRET arc = xmlCreateNode(pParent,
582 DOMNODE_TEXT,
583 &pNew);
584 if (arc == NO_ERROR)
585 {
586 PSZ pszNodeValue = (PSZ)malloc(ulLength + 1);
587 if (!pszNodeValue)
588 {
589 arc = ERROR_NOT_ENOUGH_MEMORY;
590 xmlDeleteNode(pNew);
591 }
592 else
593 {
594 memcpy(pszNodeValue, pcszText, ulLength);
595 pszNodeValue[ulLength] = '\0';
596 pNew->pstrNodeValue = xstrCreate(0);
597 xstrset(pNew->pstrNodeValue, pszNodeValue);
598
599 *ppNew = pNew;
600 }
601 }
602
603 return (arc);
604}
605
606/*
607 *@@ xmlCreateCommentNode:
608 * creates a new comment node with the specified
609 * content.
610 *
611 *@@added V0.9.9 (2001-02-14) [umoeller]
612 */
613
614APIRET xmlCreateCommentNode(PDOMNODE pParent, // in: parent element node
615 const char *pcszText, // in: comment (null-terminated)
616 PDOMNODE *ppNew)
617{
618 PDOMNODE pNew = NULL;
619 APIRET arc = xmlCreateNode(pParent,
620 DOMNODE_COMMENT,
621 &pNew);
622 if (arc == NO_ERROR)
623 {
624 pNew->pstrNodeValue = xstrCreate(0);
625 xstrcpy(pNew->pstrNodeValue, pcszText, 0);
626 *ppNew = pNew;
627 }
628
629 return (arc);
630}
631
632/*
633 *@@ xmlCreatePINode:
634 * creates a new processing instruction node with the
635 * specified data.
636 *
637 *@@added V0.9.9 (2001-02-14) [umoeller]
638 */
639
640APIRET xmlCreatePINode(PDOMNODE pParent, // in: parent element node
641 const char *pcszTarget, // in: PI target (null-terminated)
642 const char *pcszData, // in: PI data (null-terminated)
643 PDOMNODE *ppNew)
644{
645 PDOMNODE pNew = NULL;
646 APIRET arc = xmlCreateNode(pParent,
647 DOMNODE_PROCESSING_INSTRUCTION,
648 &pNew);
649 if (arc == NO_ERROR)
650 {
651 xstrcpy(&pNew->NodeBase.strNodeName, pcszTarget, 0);
652 pNew->pstrNodeValue = xstrCreate(0);
653 xstrcpy(pNew->pstrNodeValue, pcszData, 0);
654
655 *ppNew = pNew;
656 }
657
658 return (arc);
659}
660
661/*
662 *@@ xmlCreateDocumentTypeNode:
663 * creates a new document type node with the
664 * specified data.
665 *
666 *@@added V0.9.9 (2001-02-14) [umoeller]
667 */
668
669APIRET xmlCreateDocumentTypeNode(PDOMDOCUMENTNODE pDocumentNode, // in: document node
670 const char *pcszDoctypeName,
671 const char *pcszSysid,
672 const char *pcszPubid,
673 int fHasInternalSubset,
674 PDOMDOCTYPENODE *ppNew)
675{
676 APIRET arc = NO_ERROR;
677
678 if (pDocumentNode->pDocType)
679 // we already have a doctype:
680 arc = ERROR_DOM_DUPLICATE_DOCTYPE;
681 else
682 {
683 // create doctype node
684 PDOMDOCTYPENODE pNew = NULL;
685 arc = xmlCreateNode((PDOMNODE)pDocumentNode,
686 DOMNODE_DOCUMENT_TYPE,
687 (PDOMNODE*)&pNew);
688
689 if (!arc)
690 {
691 // the node has already been added to the children
692 // list of the document node... in addition, set
693 // the doctype field in the document
694 pDocumentNode->pDocType = pNew;
695
696 // initialize the extra fields
697 xstrcpy(&pNew->strPublicID, pcszPubid, 0);
698 xstrcpy(&pNew->strSystemID, pcszSysid, 0);
699 pNew->fHasInternalSubset = fHasInternalSubset;
700
701 if (pcszDoctypeName)
702 {
703 ULONG ul = strlen(pcszDoctypeName);
704 if (ul)
705 {
706 xstrcpy(&pDocumentNode->DomNode.NodeBase.strNodeName,
707 pcszDoctypeName,
708 ul);
709 }
710 }
711
712 treeInit(&pNew->ElementDeclsTree);
713 treeInit(&pNew->AttribDeclBasesTree);
714
715 *ppNew = pNew;
716 }
717 }
718 return (arc);
719}
720
721/*
722 *@@ xmlGetElementsByTagName:
723 * returns a linked list of @DOM_ELEMENT nodes which
724 * match the specified element name. The special name
725 * "*" matches all elements.
726 *
727 * The caller must free the list by calling lstFree.
728 * Returns NULL if no such elements could be found.
729 *
730 *@@added V0.9.9 (2001-02-14) [umoeller]
731 */
732
733PLINKLIST xmlGetElementsByTagName(const char *pcszName)
734{
735 APIRET arc = NO_ERROR;
736
737 return (0);
738}
739
740/* ******************************************************************
741 *
742 * Content model methods
743 *
744 ********************************************************************/
745
746/*
747 *@@ SetupParticleAndSubs:
748 *
749 * This creates sub-particles and recurses to set them up,
750 * if necessary.
751 *
752 *@@added V0.9.9 (2001-02-16) [umoeller]
753 */
754
755APIRET SetupParticleAndSubs(PCMELEMENTPARTICLE pParticle,
756 PXMLCONTENT pModel,
757 TREE **ppElementNamesTree) // in: ptr to _CMELEMENTDECLNODE.ElementNamesTree
758 // (passed to all recursions)
759{
760 APIRET arc = NO_ERROR;
761
762 // set up member NODEBASE
763 switch (pModel->type)
764 {
765 case XML_CTYPE_EMPTY: // that's easy
766 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_EMPTY;
767 break;
768
769 case XML_CTYPE_ANY: // that's easy
770 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_ANY;
771 break;
772
773 case XML_CTYPE_NAME: // that's easy
774 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_NAME;
775 xstrInitCopy(&pParticle->CMNode.strNodeName, pModel->name, 0);
776 treeInsertNode(ppElementNamesTree,
777 &pParticle->CMNode.Tree,
778 CompareCMNodeNodes,
779 TRUE); // allow duplicates here
780 break;
781
782 case XML_CTYPE_MIXED:
783 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_MIXED;
784 break;
785
786 case XML_CTYPE_CHOICE:
787 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_CHOICE;
788 break;
789
790 case XML_CTYPE_SEQ:
791 pParticle->CMNode.ulNodeType = ELEMENTPARTICLE_SEQ;
792 break;
793 }
794
795 pParticle->ulRepeater = pModel->quant;
796
797 if (pModel->numchildren)
798 {
799 // these are the three cases where we have subnodes
800 // in the XMLCONTENT... go for these and recurse
801 ULONG ul;
802 pParticle->pllSubNodes = lstCreate(FALSE);
803 for (ul = 0;
804 ul < pModel->numchildren;
805 ul++)
806 {
807 PXMLCONTENT pSubModel = &pModel->children[ul];
808 PCMELEMENTPARTICLE pSubNew
809 = (PCMELEMENTPARTICLE)malloc(sizeof(*pSubNew));
810 if (!pSubNew)
811 arc = ERROR_NOT_ENOUGH_MEMORY;
812 else
813 {
814 memset(pSubNew, 0, sizeof(*pSubNew));
815
816 arc = SetupParticleAndSubs(pSubNew,
817 pSubModel,
818 ppElementNamesTree);
819
820 if (!arc)
821 // no error: append sub-particle to this particle's
822 // children list
823 lstAppendItem(pParticle->pllSubNodes,
824 pSubNew);
825 }
826
827 if (arc)
828 break;
829 }
830 }
831
832 return (arc);
833}
834
835/*
836 *@@ xmlCreateElementDecl:
837 * creates a new _CMELEMENTDECLNODE for the specified
838 * _XMLCONTENT content model (which is the @expat structure).
839 * This recurses, if necessary.
840 *
841 *@@added V0.9.9 (2001-02-16) [umoeller]
842 */
843
844APIRET xmlCreateElementDecl(const char *pcszName,
845 PXMLCONTENT pModel,
846 PCMELEMENTDECLNODE *ppNew)
847{
848 APIRET arc = NO_ERROR;
849 PCMELEMENTDECLNODE pNew = (PCMELEMENTDECLNODE)malloc(sizeof(*pNew));
850 if (!pNew)
851 arc = ERROR_NOT_ENOUGH_MEMORY;
852 else
853 {
854 memset(pNew, 0, sizeof(CMELEMENTDECLNODE));
855
856 // pNew->Particle.CMNode.ulNodeType = ELEMENT_DECLARATION;
857
858 xstrcpy(&pNew->Particle.CMNode.strNodeName, pcszName, 0);
859
860 treeInit(&pNew->ParticleNamesTree);
861
862 // set up the "particle" member and recurse into sub-particles
863 arc = SetupParticleAndSubs(&pNew->Particle,
864 pModel,
865 &pNew->ParticleNamesTree);
866
867 if (!arc)
868 *ppNew = pNew;
869 else
870 free(pNew);
871 }
872
873 return (arc);
874}
875
876/*
877 *@@ xmlFindElementDecl:
878 * returns the _CMELEMENTDECLNODE for the element
879 * with the specified name or NULL if there's none.
880 *
881 *@@added V0.9.9 (2001-02-16) [umoeller]
882 */
883
884PCMELEMENTDECLNODE xmlFindElementDecl(PXMLDOM pDom,
885 const XSTRING *pstrElementName)
886{
887 PCMELEMENTDECLNODE pElementDecl = NULL;
888
889 PDOMDOCTYPENODE pDocTypeNode = pDom->pDocTypeNode;
890 if ( (pDocTypeNode)
891 && (pstrElementName)
892 && (pstrElementName->ulLength)
893 )
894 {
895 pElementDecl = treeFindEQData(&pDocTypeNode->ElementDeclsTree,
896 (void*)pstrElementName->psz,
897 CompareCMNodeData);
898 }
899
900 return (pElementDecl);
901}
902
903/*
904 *@@ xmlFindAttribDeclBase:
905 * returns the _CMATTRIBUTEDEDECLBASE for the specified
906 * element name, or NULL if none exists.
907 *
908 * To find a specific attribute declaration from both
909 * an element and an attribute name, use xmlFindAttribDecl
910 * instead.
911 *
912 *@@added V0.9.9 (2001-02-16) [umoeller]
913 */
914
915PCMATTRIBUTEDEDECLBASE xmlFindAttribDeclBase(PXMLDOM pDom,
916 const XSTRING *pstrElementName)
917{
918 PCMATTRIBUTEDEDECLBASE pAttribDeclBase = NULL;
919
920 PDOMDOCTYPENODE pDocTypeNode = pDom->pDocTypeNode;
921 if ( (pDocTypeNode)
922 && (pstrElementName)
923 && (pstrElementName->ulLength)
924 )
925 {
926 pAttribDeclBase = treeFindEQData(&pDocTypeNode->AttribDeclBasesTree,
927 (void*)pstrElementName->psz,
928 CompareCMNodeData);
929 }
930
931 return (pAttribDeclBase);
932}
933
934/*
935 *@@ xmlFindAttribDecl:
936 * returns the _CMATTRIBUTEDEDECL for the specified
937 * element and attribute name, or NULL if none exists.
938 *
939 *@@added V0.9.9 (2001-02-16) [umoeller]
940 */
941
942PCMATTRIBUTEDECL xmlFindAttribDecl(PXMLDOM pDom,
943 const XSTRING *pstrElementName,
944 const XSTRING *pstrAttribName)
945{
946 PCMATTRIBUTEDECL pAttribDecl = NULL;
947 if (pstrElementName && pstrAttribName)
948 {
949 PCMATTRIBUTEDEDECLBASE pAttribDeclBase = xmlFindAttribDeclBase(pDom,
950 pstrElementName);
951 if (pAttribDeclBase)
952 {
953 pAttribDecl = treeFindEQData(&pAttribDeclBase->AttribDeclsTree,
954 (void*)pstrAttribName->psz,
955 CompareCMNodeData);
956 }
957 }
958
959 return (pAttribDecl);
960}
961
962/*
963 *@@ ValidateElement:
964 * validates the specified element against the document's
965 * @DTD.
966 *
967 * This sets arcDOM in XMLDOM on errors.
968 *
969 * According to the XML spec, an element is valid if there
970 * is a declaration matching the element declaration where the
971 * element's name matches the element type, and _one_ of the
972 * following holds: ###
973 *
974 * (1) The declaration matches EMPTY and the element has no @content.
975 *
976 * (2) The declaration matches (children) (see @element_declaration)
977 * and the sequence of child elements belongs to the language
978 * generated by the regular expression in the content model, with
979 * optional @white_space between the start-tag and the first child
980 * element, between child elements, or between the last
981 * child element and the end-tag. Note that a CDATA section
982 * is never considered "whitespace", even if it contains
983 * white space only.
984 *
985 * (3) The declaration matches (mixed) (see @element_declaration)
986 * and the content consists of @content and child elements
987 * whose types match names in the content model.
988 *
989 * (4) The declaration matches ANY, and the types of any child
990 * elements have been declared. (done)
991 *
992 *@@added V0.9.9 (2001-02-16) [umoeller]
993 */
994
995VOID ValidateElement(PXMLDOM pDom,
996 PDOMNODE pElement)
997{
998 // yes: get the element decl from the tree
999 PCMELEMENTDECLNODE pElementDecl = xmlFindElementDecl(pDom,
1000 &pElement->NodeBase.strNodeName);
1001 if (!pElementDecl)
1002 {
1003 xmlSetError(pDom,
1004 ERROR_DOM_UNDECLARED_ELEMENT,
1005 pElement->NodeBase.strNodeName.psz,
1006 TRUE);
1007 }
1008 else
1009 {
1010 // element has been declared:
1011 // check if it may appear in this element's parent...
1012 PDOMNODE pParentElement = pElement->pParentNode;
1013
1014 if (!pParentElement)
1015 pDom->arcDOM = ERROR_DOM_INTEGRITY;
1016 else switch (pParentElement->NodeBase.ulNodeType)
1017 {
1018 case DOMNODE_DOCUMENT:
1019 {
1020 // if this is the root element, compare its name
1021 // to the DOCTYPE name
1022 if (pParentElement != (PDOMNODE)pDom->pDocumentNode)
1023 xmlSetError(pDom,
1024 ERROR_DOM_INVALID_ROOT_ELEMENT,
1025 pElement->NodeBase.strNodeName.psz,
1026 TRUE);
1027 else if (strcmp(pDom->pDocumentNode->DomNode.NodeBase.strNodeName.psz,
1028 pElement->NodeBase.strNodeName.psz))
1029 // no match:
1030 xmlSetError(pDom,
1031 ERROR_DOM_ROOT_ELEMENT_MISNAMED,
1032 pElement->NodeBase.strNodeName.psz,
1033 TRUE);
1034 break; }
1035
1036 case DOMNODE_ELEMENT:
1037 {
1038 // parent of element is another element:
1039 // check the parent in the DTD and find out if
1040 // this element may appear in the parent element
1041 PCMELEMENTDECLNODE pParentElementDecl
1042 = xmlFindElementDecl(pDom,
1043 &pParentElement->NodeBase.strNodeName);
1044 if (!pParentElementDecl)
1045 pDom->arcDOM = ERROR_DOM_INTEGRITY;
1046 else
1047 {
1048 // now check the element names tree of the parent element decl
1049 // for whether this element is allowed as a sub-element at all
1050 PCMELEMENTPARTICLE pParticle
1051 = treeFindEQData(&pParentElementDecl->ParticleNamesTree,
1052 (void*)pElement->NodeBase.strNodeName.psz,
1053 CompareCMNodeData);
1054 if (!pParticle)
1055 // not found: then this element is not allowed within this
1056 // parent
1057 xmlSetError(pDom,
1058 ERROR_DOM_INVALID_SUBELEMENT,
1059 pElement->NodeBase.strNodeName.psz,
1060 TRUE);
1061 }
1062 break; }
1063 }
1064 }
1065}
1066
1067/*
1068 *@@ ValidateAttribute:
1069 * validates the specified element against the document's
1070 * @DTD.
1071 *
1072 * This sets arcDOM in XMLDOM on errors.
1073 *
1074 *@@added V0.9.9 (2001-02-16) [umoeller]
1075 */
1076
1077VOID ValidateAttribute(PXMLDOM pDom,
1078 PDOMNODE pAttrib)
1079{
1080 PDOMNODE pElement = pAttrib->pParentNode;
1081
1082 PCMATTRIBUTEDECL pAttribDecl = xmlFindAttribDecl(pDom,
1083 &pElement->NodeBase.strNodeName,
1084 &pAttrib->NodeBase.strNodeName);
1085 if (!pAttribDecl)
1086 xmlSetError(pDom,
1087 ERROR_DOM_UNDECLARED_ATTRIBUTE,
1088 pAttrib->NodeBase.strNodeName.psz,
1089 TRUE);
1090}
1091
1092/* ******************************************************************
1093 *
1094 * Expat handlers
1095 *
1096 ********************************************************************/
1097
1098/*
1099 *@@ StartElementHandler:
1100 * @expat handler called when a new element is
1101 * found.
1102 *
1103 * We create a new record in the container and
1104 * push it onto our stack so we can insert
1105 * children into it. We first start with the
1106 * attributes.
1107 */
1108
1109void EXPATENTRY StartElementHandler(void *pUserData, // in: our PXMLDOM really
1110 const char *pcszElement,
1111 const char **papcszAttribs)
1112{
1113 PXMLDOM pDom = (PXMLDOM)pUserData;
1114
1115 // continue parsing only if we had no errors so far
1116 if (!pDom->arcDOM)
1117 {
1118 ULONG i;
1119
1120 PDOMNODE pParent = NULL,
1121 pNew = NULL;
1122
1123 PLISTNODE pParentNode = lstPop(&pDom->llStack);
1124
1125 if (!pParentNode)
1126 pDom->arcDOM = ERROR_DOM_NO_DOCUMENT;
1127 else
1128 {
1129 // we have at least one node:
1130 pParent = (PDOMNODE)pParentNode->pItemData;
1131
1132 pDom->arcDOM = xmlCreateElementNode(pParent,
1133 pcszElement,
1134 &pNew);
1135
1136 if (!pDom->arcDOM)
1137 {
1138 // shall we validate?
1139 if (pDom->pDocTypeNode)
1140 {
1141 // yes:
1142 ValidateElement(pDom,
1143 pNew);
1144 }
1145
1146 if (!pDom->arcDOM)
1147 {
1148 // OK, node is valid:
1149 // push this on the stack so we can add child elements
1150 lstPush(&pDom->llStack, pNew);
1151
1152 // now for the attribs
1153 for (i = 0;
1154 (papcszAttribs[i]) && (!pDom->arcDOM);
1155 i += 2)
1156 {
1157 PDOMNODE pAttrib;
1158
1159 pDom->arcDOM = xmlCreateAttributeNode(pNew, // element,
1160 papcszAttribs[i], // attr name
1161 papcszAttribs[i + 1], // attr value
1162 &pAttrib);
1163
1164 // shall we validate?
1165 if (pDom->pDocTypeNode)
1166 {
1167 ValidateAttribute(pDom,
1168 pAttrib);
1169 }
1170 }
1171 }
1172 }
1173 }
1174
1175 pDom->pLastWasTextNode = NULL;
1176 }
1177}
1178
1179/*
1180 *@@ EndElementHandler:
1181 * @expat handler for when parsing an element is done.
1182 * We pop the element off of our stack then.
1183 */
1184
1185void EXPATENTRY EndElementHandler(void *pUserData, // in: our PXMLDOM really
1186 const XML_Char *name)
1187{
1188 PXMLDOM pDom = (PXMLDOM)pUserData;
1189 // continue parsing only if we had no errors so far
1190 if (!pDom->arcDOM)
1191 {
1192 PLISTNODE pNode = lstPop(&pDom->llStack);
1193 if (pNode)
1194 lstRemoveNode(&pDom->llStack, pNode);
1195
1196 pDom->pLastWasTextNode = NULL;
1197 }
1198}
1199
1200/*
1201 *@@ CharacterDataHandler:
1202 * @expat handler for character data (@content).
1203 *
1204 * Note: expat passes chunks of content without zero-terminating
1205 * them. We must concatenate the chunks to a full text node.
1206 */
1207
1208void EXPATENTRY CharacterDataHandler(void *pUserData, // in: our PXMLDOM really
1209 const XML_Char *s,
1210 int len)
1211{
1212 PXMLDOM pDom = (PXMLDOM)pUserData;
1213
1214 // continue parsing only if we had no errors so far
1215 if (!pDom->arcDOM)
1216 {
1217 ULONG i;
1218
1219 if (len)
1220 {
1221 if (pDom->pLastWasTextNode)
1222 {
1223 // we had a text node, and no elements or other
1224 // stuff in between:
1225 xstrcat(pDom->pLastWasTextNode->pstrNodeValue,
1226 s,
1227 len);
1228 }
1229 else
1230 {
1231 // we need a new text node:
1232 PDOMNODE pNew,
1233 pParent;
1234 // non-root level:
1235 PLISTNODE pParentNode = lstPop(&pDom->llStack);
1236 pParent = (PDOMNODE)pParentNode->pItemData;
1237
1238 pDom->arcDOM = xmlCreateTextNode(pParent,
1239 s,
1240 len,
1241 &pDom->pLastWasTextNode);
1242 }
1243 }
1244 }
1245}
1246
1247/*
1248 *@@ CommentHandler:
1249 * @expat handler for @comments.
1250 *
1251 * Note: This is only set if DF_PARSECOMMENTS is
1252 * flagged with xmlCreateDOM.
1253 *
1254 *@@added V0.9.9 (2001-02-14) [umoeller]
1255 */
1256
1257void EXPATENTRY CommentHandler(void *pUserData, // in: our PXMLDOM really
1258 const XML_Char *data)
1259{
1260 PXMLDOM pDom = (PXMLDOM)pUserData;
1261
1262 // continue parsing only if we had no errors so far
1263 if (!pDom->arcDOM)
1264 {
1265 PLISTNODE pParentNode = lstPop(&pDom->llStack);
1266
1267 if (pParentNode)
1268 {
1269 // non-root level:
1270 PDOMNODE pParent = (PDOMNODE)pParentNode->pItemData;
1271 PDOMNODE pComment;
1272
1273 pDom->arcDOM = xmlCreateCommentNode(pParent,
1274 data,
1275 &pComment);
1276 }
1277 }
1278}
1279
1280/*
1281 *@@ StartDoctypeDeclHandler:
1282 * @expat handler that is called at the start of a DOCTYPE
1283 * declaration, before any external or internal subset is
1284 * parsed.
1285 *
1286 * Both pcszSysid and pcszPubid may be NULL. "fHasInternalSubset"
1287 * will be non-zero if the DOCTYPE declaration has an internal subset.
1288 *
1289 *@@added V0.9.9 (2001-02-14) [umoeller]
1290 */
1291
1292void EXPATENTRY StartDoctypeDeclHandler(void *pUserData,
1293 const XML_Char *pcszDoctypeName,
1294 const XML_Char *pcszSysid,
1295 const XML_Char *pcszPubid,
1296 int fHasInternalSubset)
1297{
1298 PXMLDOM pDom = (PXMLDOM)pUserData;
1299
1300 // continue parsing only if we had no errors so far
1301 if (!pDom->arcDOM)
1302 {
1303 // get the document node
1304 PDOMDOCUMENTNODE pDocumentNode = pDom->pDocumentNode;
1305 if (!pDocumentNode)
1306 pDom->arcDOM = ERROR_DOM_NO_DOCUMENT;
1307 else
1308 {
1309 pDom->arcDOM = xmlCreateDocumentTypeNode(pDocumentNode,
1310 pcszDoctypeName,
1311 pcszSysid,
1312 pcszPubid,
1313 fHasInternalSubset,
1314 &pDom->pDocTypeNode);
1315
1316 // push this on the stack so we can add child elements
1317 lstPush(&pDom->llStack, pDom->pDocTypeNode);
1318 }
1319 }
1320}
1321
1322/*
1323 *@@ EndDoctypeDeclHandler:
1324 * @expat handler that is called at the end of a DOCTYPE
1325 * declaration, after parsing any external subset.
1326 *
1327 *@@added V0.9.9 (2001-02-14) [umoeller]
1328 */
1329
1330void EXPATENTRY EndDoctypeDeclHandler(void *pUserData) // in: our PXMLDOM really
1331{
1332 PXMLDOM pDom = (PXMLDOM)pUserData;
1333
1334 PLISTNODE pListNode = lstPop(&pDom->llStack);
1335 if (!pListNode)
1336 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1337 else
1338 {
1339 PDOMNODE pDomNode = (PDOMNODE)pListNode->pItemData;
1340 if (pDomNode->NodeBase.ulNodeType != DOMNODE_DOCUMENT_TYPE)
1341 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1342
1343 lstRemoveNode(&pDom->llStack, pListNode);
1344 }
1345
1346 // continue parsing only if we had no errors so far
1347 if (!pDom->arcDOM)
1348 {
1349
1350 }
1351}
1352
1353/*
1354 *@@ NotationDeclHandler:
1355 * @expat handler for @notation_declarations.
1356 *
1357 *@@added V0.9.9 (2001-02-14) [umoeller]
1358 */
1359
1360void EXPATENTRY NotationDeclHandler(void *pUserData, // in: our PXMLDOM really
1361 const XML_Char *pcszNotationName,
1362 const XML_Char *pcszBase,
1363 const XML_Char *pcszSystemId,
1364 const XML_Char *pcszPublicId)
1365{
1366 PXMLDOM pDom = (PXMLDOM)pUserData;
1367
1368 // continue parsing only if we had no errors so far
1369 if (!pDom->arcDOM)
1370 {
1371 }
1372}
1373
1374/*
1375 *@@ ExternalEntityRefHandler:
1376 * @expat handler for references to @external_entities.
1377 *
1378 * This handler is also called for processing an external DTD
1379 * subset if parameter entity parsing is in effect.
1380 * (See XML_SetParamEntityParsing.)
1381 *
1382 * The pcszContext argument specifies the parsing context in the
1383 * format expected by the context argument to
1384 * XML_ExternalEntityParserCreate; pcszContext is valid only until
1385 * the handler returns, so if the referenced entity is to be
1386 * parsed later, it must be copied.
1387 *
1388 * The pcszBase parameter is the base to use for relative system
1389 * identifiers. It is set by XML_SetBase and may be null.
1390 *
1391 * The pcszPublicId parameter is the public id given in the entity
1392 * declaration and may be null.
1393 *
1394 * The pcszSystemId is the system identifier specified in the
1395 * entity declaration and is never null.
1396 *
1397 * There are a couple of ways in which this handler differs
1398 * from others. First, this handler returns an integer. A
1399 * non-zero value should be returned for successful handling
1400 * of the external entity reference. Returning a zero indicates
1401 * failure, and causes the calling parser to return an
1402 * ERROR_EXPAT_EXTERNAL_ENTITY_HANDLING error.
1403 *
1404 * Second, instead of having pUserData as its first argument,
1405 * it receives the parser that encountered the entity reference.
1406 * This, along with the context parameter, may be used as
1407 * arguments to a call to XML_ExternalEntityParserCreate.
1408 * Using the returned parser, the body of the external entity
1409 * can be recursively parsed.
1410 *
1411 * Since this handler may be called recursively, it should not
1412 * be saving information into global or static variables.
1413 *
1414 * Your handler isn't actually responsible for parsing the entity,
1415 * but it is responsible for creating a subsidiary parser with
1416 * XML_ExternalEntityParserCreate that will do the job. That returns
1417 * an instance of XML_Parser that has handlers and other data
1418 * structures initialized from the parent parser. You may then use
1419 * XML_Parse or XML_ParseBuffer calls against that parser. Since
1420 * external entities may refer to other external entities, your
1421 * handler should be prepared to be called recursively.
1422 *
1423 *@@added V0.9.9 (2001-02-14) [umoeller]
1424 */
1425
1426int EXPATENTRY ExternalEntityRefHandler(XML_Parser parser,
1427 const XML_Char *pcszContext,
1428 const XML_Char *pcszBase,
1429 const XML_Char *pcszSystemId,
1430 const XML_Char *pcszPublicId)
1431{
1432 int i = 1;
1433
1434 /* PXMLDOM pDom = (PXMLDOM)pUserData;
1435
1436 // continue parsing only if we had no errors so far
1437 if (!pDom->arcDOM)
1438 {
1439 } */
1440
1441 return (i);
1442}
1443
1444/*
1445 *@@ ElementDeclHandler:
1446 * @expat handler for element declarations in a DTD. The
1447 * handler gets called with the name of the element in
1448 * the declaration and a pointer to a structure that contains
1449 * the element model.
1450 *
1451 * It is the application's responsibility to free this data
1452 * structure. ###
1453 *
1454 * The XML spec defines that no element may be declared more
1455 * than once.
1456 *
1457 *@@added V0.9.9 (2001-02-14) [umoeller]
1458 */
1459
1460void EXPATENTRY ElementDeclHandler(void *pUserData, // in: our PXMLDOM really
1461 const XML_Char *pcszName,
1462 XMLCONTENT *pModel)
1463{
1464 PXMLDOM pDom = (PXMLDOM)pUserData;
1465
1466 // continue parsing only if we had no errors so far
1467 if (!pDom->arcDOM)
1468 {
1469 // pop the last DOMNODE off the stack and check if it's a DOCTYPE
1470 PLISTNODE pListNode = lstPop(&pDom->llStack);
1471 if (!pListNode)
1472 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1473 else
1474 {
1475 PDOMNODE pDomNode = (PDOMNODE)pListNode->pItemData;
1476 if (pDomNode->NodeBase.ulNodeType != DOMNODE_DOCUMENT_TYPE)
1477 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1478 else
1479 {
1480 // OK, we're in a DOCTYPE node:
1481 PDOMDOCTYPENODE pDocType = (PDOMDOCTYPENODE)pDomNode;
1482
1483 // create an element declaration and push it unto the
1484 // declarations tree
1485 PCMELEMENTDECLNODE pNew = NULL;
1486 pDom->arcDOM = xmlCreateElementDecl(pcszName,
1487 pModel,
1488 &pNew);
1489 // this recurses!!
1490
1491 if (pDom->arcDOM == NO_ERROR)
1492 {
1493 // add this to the doctype's declarations tree
1494 if (treeInsertNode(&pDocType->ElementDeclsTree,
1495 (TREE*)pNew,
1496 CompareCMNodeNodes,
1497 FALSE)
1498 == TREE_DUPLICATE)
1499 // element already declared:
1500 // according to the XML specs, this is a validity
1501 // constraint, so we report a validation error
1502 xmlSetError(pDom,
1503 ERROR_DOM_DUPLICATE_ELEMENT_DECL,
1504 pNew->Particle.CMNode.strNodeName.psz,
1505 TRUE);
1506 }
1507 }
1508 }
1509 }
1510}
1511
1512/*
1513 *@@ AddEnum:
1514 *
1515 *@@added V0.9.9 (2001-02-16) [umoeller]
1516 */
1517
1518VOID AddEnum(PCMATTRIBUTEDECL pNew,
1519 const char *p,
1520 const char *pNext)
1521{
1522 PSZ pszType = strhSubstr(p, pNext);
1523 PNODEBASE pCMNode = (PNODEBASE)malloc(sizeof(*pCMNode));
1524 memset(pCMNode, 0, sizeof(*pCMNode));
1525 pCMNode->ulNodeType = ATTRIBUTE_DECLARATION_ENUM;
1526 xstrInitSet(&pCMNode->strNodeName, pszType);
1527
1528 treeInsertNode(&pNew->ValuesTree,
1529 (TREE*)pCMNode,
1530 CompareCMNodeNodes,
1531 FALSE);
1532}
1533
1534/*
1535 *@@ AttlistDeclHandler:
1536 * @expat handler for attlist declarations in the DTD.
1537 *
1538 * This handler is called for each attribute. So a single attlist
1539 * declaration with multiple attributes declared will generate
1540 * multiple calls to this handler.
1541 *
1542 * -- pcszElementName is the name of the element for which the
1543 * attribute is being declared.
1544 *
1545 * -- pcszAttribName has the attribute name being declared.
1546 *
1547 * -- pcszAttribType is the attribute type.
1548 * It is the string representing the type in the declaration
1549 * with whitespace removed.
1550 *
1551 * -- pcszDefault holds the default value. It will be
1552 * NULL in the case of "#IMPLIED" or "#REQUIRED" attributes.
1553 * You can distinguish these two cases by checking the
1554 * fIsRequired parameter, which will be true in the case of
1555 * "#REQUIRED" attributes. Attributes which are "#FIXED"
1556 * will have also have a TRUE fIsRequired, but they will have
1557 * the non-NULL fixed value in the pcszDefault parameter.
1558 *
1559 *@@added V0.9.9 (2001-02-14) [umoeller]
1560 */
1561
1562void EXPATENTRY AttlistDeclHandler(void *pUserData, // in: our PXMLDOM really
1563 const XML_Char *pcszElementName,
1564 const XML_Char *pcszAttribName,
1565 const XML_Char *pcszAttribType,
1566 const XML_Char *pcszDefault,
1567 int fIsRequired)
1568{
1569 PXMLDOM pDom = (PXMLDOM)pUserData;
1570
1571 // continue parsing only if we had no errors so far
1572 if (!pDom->arcDOM)
1573 {
1574 // pop the last DOMNODE off the stack and check if it's a DOCTYPE
1575 PLISTNODE pListNode = lstPop(&pDom->llStack);
1576 if (!pListNode)
1577 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1578 else
1579 {
1580 PDOMNODE pDomNode = (PDOMNODE)pListNode->pItemData;
1581 if (pDomNode->NodeBase.ulNodeType != DOMNODE_DOCUMENT_TYPE)
1582 pDom->arcDOM = ERROR_DOM_DOCTYPE_STRUCTURE;
1583 else
1584 {
1585 // OK, we're in a DOCTYPE node:
1586 PDOMDOCTYPENODE pDocType = (PDOMDOCTYPENODE)pDomNode;
1587 PCMATTRIBUTEDEDECLBASE pThis = NULL,
1588 pCache = pDom->pAttListDeclCache;
1589
1590 // check if this is for the same attlist as the previous
1591 // call (we cache the pointer for speed)
1592 if ( (pCache)
1593 && (!strhcmp(pCache->CMNode.strNodeName.psz,
1594 pcszElementName))
1595 )
1596 // this attdecl is for the same element:
1597 // use that (we won't have to search the tree)
1598 pThis = pDom->pAttListDeclCache;
1599
1600 if (!pThis)
1601 {
1602 // cache didn't match: look up attributes tree then
1603 pThis = treeFindEQData(&pDocType->AttribDeclBasesTree,
1604 (void*)pcszElementName,
1605 CompareCMNodeData);
1606
1607 if (!pThis)
1608 {
1609 // still not found:
1610 // we need a new node then
1611 pThis = (PCMATTRIBUTEDEDECLBASE)malloc(sizeof(*pThis));
1612 if (!pThis)
1613 pDom->arcDOM = ERROR_NOT_ENOUGH_MEMORY;
1614 else
1615 {
1616 pThis->CMNode.ulNodeType = ATTRIBUTE_DECLARATION_BASE;
1617 xstrInitCopy(&pThis->CMNode.strNodeName, pcszElementName, 0);
1618
1619 // initialize the subtree
1620 treeInit(&pThis->AttribDeclsTree);
1621
1622 treeInsertNode(&pDocType->AttribDeclBasesTree,
1623 (TREE*)pThis,
1624 CompareCMNodeNodes,
1625 FALSE);
1626 }
1627 }
1628
1629 pDom->pAttListDeclCache = pThis;
1630 }
1631
1632 if (pThis)
1633 {
1634 // pThis now has either an existing or a new CMATTRIBUTEDEDECLBASE;
1635 // add a new attribute def (CMATTRIBUTEDEDECL) to that
1636 PCMATTRIBUTEDECL pNew = (PCMATTRIBUTEDECL)malloc(sizeof(*pNew));
1637 if (!pNew)
1638 pDom->arcDOM = ERROR_NOT_ENOUGH_MEMORY;
1639 else
1640 {
1641 memset(pNew, 0, sizeof(*pNew));
1642 pNew->CMNode.ulNodeType = ATTRIBUTE_DECLARATION;
1643
1644 xstrInitCopy(&pNew->CMNode.strNodeName,
1645 pcszAttribName,
1646 0);
1647
1648 // fill the other fields
1649 /* xstrInitCopy(&pNew->strType,
1650 pcszAttribType,
1651 0); */
1652
1653 treeInit(&pNew->ValuesTree);
1654
1655 // check the type... expat is too lazy to parse this for
1656 // us, so we must check manually. Expat only normalizes
1657 // the "type" string to kick out whitespace, so we get:
1658 // (TYPE1|TYPE2|TYPE3)
1659 if (*pcszAttribType == '(')
1660 {
1661 // enumeration:
1662 const char *p = pcszAttribType + 1,
1663 *pNext;
1664 while (pNext = strchr(p, '|'))
1665 {
1666 AddEnum(pNew, p, pNext);
1667 p = pNext + 1;
1668 }
1669
1670 pNext = strchr(p, ')');
1671 AddEnum(pNew, p, pNext);
1672
1673 pNew->ulAttrType = CMAT_ENUM;
1674 }
1675 else if (!strcmp(pcszAttribType, "CDATA"))
1676 pNew->ulAttrType = CMAT_CDATA;
1677 else if (!strcmp(pcszAttribType, "ID"))
1678 pNew->ulAttrType = CMAT_ID;
1679 else if (!strcmp(pcszAttribType, "IDREF"))
1680 pNew->ulAttrType = CMAT_IDREF;
1681 else if (!strcmp(pcszAttribType, "IDREFS"))
1682 pNew->ulAttrType = CMAT_IDREFS;
1683 else if (!strcmp(pcszAttribType, "ENTITY"))
1684 pNew->ulAttrType = CMAT_ENTITY;
1685 else if (!strcmp(pcszAttribType, "ENTITIES"))
1686 pNew->ulAttrType = CMAT_ENTITIES;
1687 else if (!strcmp(pcszAttribType, "NMTOKEN"))
1688 pNew->ulAttrType = CMAT_NMTOKEN;
1689 else if (!strcmp(pcszAttribType, "NMTOKENS"))
1690 pNew->ulAttrType = CMAT_NMTOKENS;
1691
1692 if (pcszDefault)
1693 {
1694 // fixed or default:
1695 if (fIsRequired)
1696 // fixed:
1697 pNew->ulConstraint = CMAT_FIXED_VALUE;
1698 else
1699 pNew->ulConstraint = CMAT_DEFAULT_VALUE;
1700
1701 pNew->pstrDefaultValue = xstrCreate(0);
1702 xstrcpy(pNew->pstrDefaultValue, pcszDefault, 0);
1703 }
1704 else
1705 // implied or required:
1706 if (fIsRequired)
1707 pNew->ulConstraint = CMAT_REQUIRED;
1708 else
1709 pNew->ulConstraint = CMAT_IMPLIED;
1710
1711 if (treeInsertNode(&pThis->AttribDeclsTree,
1712 (TREE*)pNew,
1713 CompareCMNodeNodes,
1714 FALSE)
1715 == TREE_DUPLICATE)
1716 xmlSetError(pDom,
1717 ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL,
1718 pcszAttribName,
1719 TRUE);
1720 }
1721 }
1722 }
1723 }
1724 }
1725}
1726
1727/*
1728 *@@ EntityDeclHandler:
1729 * @expat handler that will be called for all entity declarations.
1730 *
1731 * The fIsParameterEntity argument will be non-zero in the case
1732 * of parameter entities and zero otherwise.
1733 *
1734 * For internal entities (<!ENTITY foo "bar">), pcszValue will be
1735 * non-NULL and pcszSystemId, pcszPublicId, and pcszNotationName
1736 * will all be NULL. The value string is not NULL terminated; the
1737 * length is provided in the iValueLength parameter. Do not use
1738 * iValueLength to test for internal entities, since it is legal
1739 * to have zero-length values. Instead check for whether or not
1740 * pcszValue is NULL.
1741 *
1742 * The pcszNotationName argument will have a non-NULL value only
1743 * for unparsed entity declarations.
1744 *
1745 *@@added V0.9.9 (2001-02-14) [umoeller]
1746 */
1747
1748void EXPATENTRY EntityDeclHandler(void *pUserData, // in: our PXMLDOM really
1749 const XML_Char *pcszEntityName,
1750 int fIsParameterEntity,
1751 const XML_Char *pcszValue,
1752 int iValueLength,
1753 const XML_Char *pcszBase,
1754 const XML_Char *pcszSystemId,
1755 const XML_Char *pcszPublicId,
1756 const XML_Char *pcszNotationName)
1757{
1758 PXMLDOM pDom = (PXMLDOM)pUserData;
1759
1760 // continue parsing only if we had no errors so far
1761 if (!pDom->arcDOM)
1762 {
1763 }
1764}
1765
1766/* ******************************************************************
1767 *
1768 * DOM APIs
1769 *
1770 ********************************************************************/
1771
1772/*
1773 *@@ xmlCreateDOM:
1774 * creates an XMLDOM instance, which can be used for parsing
1775 * an XML document and building a @DOM tree from it at the
1776 * same time.
1777 *
1778 * Pass the XMLDOM returned here to xmlParse afterwards.
1779 *
1780 * ulFlags is any combination of the following:
1781 *
1782 * -- DF_PARSECOMMENTS: XML @comments are to be returned in
1783 * the DOM tree. Otherwise they are silently ignored.
1784 *
1785 * -- DF_PARSEDTD: add the @DTD of the document into the DOM tree
1786 * as well and validate the document.
1787 *
1788 * Usage:
1789 *
1790 * 1) Create a DOM instance.
1791 *
1792 + PXMLDOM pDom = NULL;
1793 + APIRET arc = xmlCreateDom(flags, &pDom);
1794 +
1795 * 2) Give chunks of data (or an entire buffer)
1796 * to the DOM instance for parsing.
1797 *
1798 + arc = xmlParse(pDom,
1799 + pBuf,
1800 + TRUE); // if last, this will clean up the parser
1801 *
1802 * 3) Process the data in the DOM tree. When done,
1803 * call xmlFreeDOM, which will free all memory.
1804 *
1805 *@@added V0.9.9 (2001-02-14) [umoeller]
1806 */
1807
1808APIRET xmlCreateDOM(ULONG flParserFlags,
1809 PXMLDOM *ppDom)
1810{
1811 APIRET arc = NO_ERROR;
1812
1813 PXMLDOM pDom = (PXMLDOM)malloc(sizeof(*pDom));
1814 if (!pDom)
1815 arc = ERROR_NOT_ENOUGH_MEMORY;
1816 else
1817 {
1818 PDOMNODE pDocument = NULL;
1819
1820 memset(pDom, 0, sizeof(XMLDOM));
1821
1822 lstInit(&pDom->llStack,
1823 FALSE); // no auto-free
1824
1825 // create the document node
1826 arc = xmlCreateNode(NULL, // no parent
1827 DOMNODE_DOCUMENT,
1828 &pDocument);
1829
1830 if (arc == NO_ERROR)
1831 {
1832 // store the document in the DOM
1833 pDom->pDocumentNode = (PDOMDOCUMENTNODE)pDocument;
1834
1835 // push the document on the stack so the handlers
1836 // will append to that
1837 lstPush(&pDom->llStack,
1838 pDom->pDocumentNode);
1839
1840 pDom->pParser = XML_ParserCreate(NULL);
1841
1842 if (!pDom->pParser)
1843 arc = ERROR_NOT_ENOUGH_MEMORY;
1844 else
1845 {
1846 XML_SetElementHandler(pDom->pParser,
1847 StartElementHandler,
1848 EndElementHandler);
1849
1850 XML_SetCharacterDataHandler(pDom->pParser,
1851 CharacterDataHandler);
1852
1853 // XML_SetProcessingInstructionHandler(XML_Parser parser,
1854 // XML_ProcessingInstructionHandler handler);
1855
1856
1857 if (flParserFlags & DF_PARSECOMMENTS)
1858 XML_SetCommentHandler(pDom->pParser,
1859 CommentHandler);
1860
1861 if (flParserFlags & DF_PARSEDTD)
1862 {
1863 XML_SetDoctypeDeclHandler(pDom->pParser,
1864 StartDoctypeDeclHandler,
1865 EndDoctypeDeclHandler);
1866
1867 XML_SetNotationDeclHandler(pDom->pParser,
1868 NotationDeclHandler);
1869
1870 XML_SetExternalEntityRefHandler(pDom->pParser,
1871 ExternalEntityRefHandler);
1872
1873 XML_SetElementDeclHandler(pDom->pParser,
1874 ElementDeclHandler);
1875
1876 XML_SetAttlistDeclHandler(pDom->pParser,
1877 AttlistDeclHandler);
1878
1879 XML_SetEntityDeclHandler(pDom->pParser,
1880 EntityDeclHandler);
1881
1882 XML_SetParamEntityParsing(pDom->pParser,
1883 XML_PARAM_ENTITY_PARSING_ALWAYS);
1884 }
1885
1886 // XML_SetXmlDeclHandler ... do we care for this? I guess not
1887
1888 // pass the XMLDOM as user data to the handlers
1889 XML_SetUserData(pDom->pParser,
1890 pDom);
1891 }
1892 }
1893 }
1894
1895 if (arc == NO_ERROR)
1896 *ppDom = pDom;
1897 else
1898 xmlFreeDOM(pDom);
1899
1900 return (arc);
1901}
1902
1903/*
1904 *@@ xmlParse:
1905 * parses another piece of XML data.
1906 *
1907 * If (fIsLast == TRUE), the internal @expat parser
1908 * will be freed, but not the DOM itself.
1909 *
1910 * You can pass an XML document to this function
1911 * in one flush. Set fIsLast = TRUE on the first
1912 * and only call then.
1913 *
1914 * This returns NO_ERROR if the chunk was successfully
1915 * parsed. Otherwise one of the following errors is
1916 * returned:
1917 *
1918 * -- ERROR_INVALID_PARAMETER
1919 *
1920 * -- ERROR_DOM_PARSING: an @expat parsing error occured.
1921 * This might also be memory problems.
1922 * With this error code, you will find specific
1923 * error information in the XMLDOM fields.
1924 *
1925 * -- ERROR_DOM_PARSING: the document is not @valid.
1926 * This can only happen if @DTD parsing was enabled
1927 * with xmlCreateDOM.
1928 * With this error code, you will find specific
1929 * error information in the XMLDOM fields.
1930 *
1931 *@@added V0.9.9 (2001-02-14) [umoeller]
1932 */
1933
1934APIRET xmlParse(PXMLDOM pDom,
1935 const char *pcszBuf,
1936 ULONG cb,
1937 BOOL fIsLast)
1938{
1939 APIRET arc = NO_ERROR;
1940
1941 if (!pDom)
1942 arc = ERROR_INVALID_PARAMETER;
1943 else
1944 {
1945 BOOL fSuccess = XML_Parse(pDom->pParser,
1946 pcszBuf,
1947 cb,
1948 fIsLast);
1949
1950 if (!fSuccess)
1951 {
1952 // expat parsing error:
1953 xmlSetError(pDom,
1954 XML_GetErrorCode(pDom->pParser),
1955 NULL,
1956 FALSE);
1957
1958 if (pDom->pDocumentNode)
1959 {
1960 xmlDeleteNode((PDOMNODE)pDom->pDocumentNode);
1961 pDom->pDocumentNode = NULL;
1962 }
1963
1964 arc = ERROR_DOM_PARSING;
1965 }
1966 else if (pDom->fInvalid)
1967 {
1968 // expat was doing OK, but the handlers' validation failed:
1969 arc = ERROR_DOM_VALIDITY;
1970 // error info has already been set
1971 }
1972 else
1973 // expat was doing OK, but maybe we have integrity errors
1974 // from our DOM callbacks:
1975 if (pDom->arcDOM)
1976 arc = pDom->arcDOM;
1977
1978 if (arc != NO_ERROR || fIsLast)
1979 {
1980 // last call or error: clean up
1981 XML_ParserFree(pDom->pParser);
1982 pDom->pParser = NULL;
1983
1984 // clean up the stack (but not the DOM itself)
1985 lstClear(&pDom->llStack);
1986 }
1987 }
1988
1989 return (arc);
1990}
1991
1992/*
1993 *@@ xmlFreeDOM:
1994 * cleans up all resources allocated by
1995 * xmlCreateDOM and xmlParse, including
1996 * the entire DOM tree.
1997 *
1998 * If you wish to keep any data, make
1999 * a copy of the respective pointers in pDom
2000 * or subitems and set them to NULL before
2001 * calling this function.
2002 *
2003 *@@added V0.9.9 (2001-02-14) [umoeller]
2004 */
2005
2006APIRET xmlFreeDOM(PXMLDOM pDom)
2007{
2008 APIRET arc = NO_ERROR;
2009 if (pDom)
2010 {
2011 // if the parser is still alive for some reason, close it.
2012 if (pDom->pParser)
2013 {
2014 XML_ParserFree(pDom->pParser);
2015 pDom->pParser = NULL;
2016 }
2017
2018 free(pDom);
2019 }
2020
2021 return (arc);
2022}
Note: See TracBrowser for help on using the repository browser.