source: trunk/include/helpers/xml.h@ 53

Last change on this file since 53 was 39, checked in by umoeller, 24 years ago

Misc. fixes.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 23.8 KB
Line 
1
2/*
3 *@@sourcefile xml.h:
4 * header file for xml.c (XML parsing).
5 *
6 * See remarks there.
7 *
8 *@@added V0.9.6 (2000-10-29) [umoeller]
9 *@@include #include <os2.h>
10 *@@include #include "expat\expat.h" // must come before xml.h
11 *@@include #include "helpers\linklist.h"
12 *@@include #include "helpers\tree.h"
13 *@@include #include "helpers\xstring.h"
14 *@@include #include "helpers\xml.h"
15 */
16
17#if __cplusplus
18extern "C" {
19#endif
20
21#ifndef XML_HEADER_INCLUDED
22 #define XML_HEADER_INCLUDED
23
24 // define some basic things to make this work even with standard C
25 #if (!defined OS2_INCLUDED) && (!defined _OS2_H) && (!defined __SIMPLES_DEFINED) // changed V0.9.0 (99-10-22) [umoeller]
26 typedef unsigned long BOOL;
27 typedef unsigned long ULONG;
28 typedef unsigned char *PSZ;
29 #define TRUE (BOOL)1
30 #define FALSE (BOOL)0
31
32 #ifdef __IBMCPP__ // added V0.9.0 (99-10-22) [umoeller]
33 #define APIENTRY _System
34 #endif
35
36 #define __SIMPLES_DEFINED
37 #endif
38
39 typedef enum _DOMERROR
40 {
41 // validity errors:
42// START MATCHING ERROR MESSAGES (xmlDescribeError)
43 ERROR_DOM_UNDECLARED_ELEMENT = ERROR_EXPAT_AFTER_LAST,
44 // invalidity: element is undeclared
45 ERROR_DOM_ROOT_ELEMENT_MISNAMED,
46 ERROR_DOM_INVALID_ROOT_ELEMENT,
47 ERROR_DOM_INVALID_SUBELEMENT,
48 // subelement may not appear in its parent element
49 ERROR_DOM_DUPLICATE_ELEMENT_DECL,
50 // more than one declaration for an element type
51 ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL,
52 // more than one declaration for an attribute type
53 ERROR_DOM_UNDECLARED_ATTRIBUTE,
54 ERROR_ELEMENT_CANNOT_HAVE_CONTENT,
55 // element was declared "empty" and contains text anyway,
56 // or was declared "children" and contains something other
57 // than whitespace
58 ERROR_DOM_INVALID_ATTRIB_VALUE,
59 ERROR_DOM_REQUIRED_ATTRIBUTE_MISSING,
60 ERROR_DOM_SUBELEMENT_IN_EMPTY_ELEMENT,
61// END MATCHING ERROR MESSAGES (xmlDescribeError)
62
63 // error categories:
64 ERROR_DOM_PARSING,
65 ERROR_DOM_VALIDITY,
66
67 // additional DOM errors
68 ERROR_DOM_NODETYPE_NOT_SUPPORTED,
69 // invalid node type in xmlCreateDomNode
70 ERROR_DOM_NO_DOCUMENT,
71 // cannot find document node
72 ERROR_DOM_NO_ELEMENT,
73 ERROR_DOM_DUPLICATE_DOCTYPE,
74 ERROR_DOM_DOCTYPE_STRUCTURE,
75 // DOCTYPE is given and root element name does not match doctype name
76 ERROR_DOM_INTEGRITY,
77 ERROR_DOM_DUPLICATE_ATTRIBUTE
78 } DOMERROR;
79
80 /* ******************************************************************
81 *
82 * Most basic node management
83 *
84 ********************************************************************/
85
86 // content model node types:
87 typedef enum _NODEBASETYPE
88 {
89 TYPE_UNKNOWN,
90
91 DOMNODE_ELEMENT, // node is a DOM ELEMENT
92 DOMNODE_ATTRIBUTE, // node is a DOM ATTRIBUTE
93 DOMNODE_TEXT, // node is a DOM TEXT node
94 // DOMNODE_CDATA_SECTION 4
95 // DOMNODE_ENTITY_REFERENCE 5
96 // DOMNODE_ENTITY 6
97 DOMNODE_PROCESSING_INSTRUCTION, // node is a DOM PI
98 DOMNODE_COMMENT, // node is a DOM COMMENT
99 DOMNODE_DOCUMENT, // node is a DOM document
100 DOMNODE_DOCUMENT_TYPE, // node is a DOM DOCUMENTTYPE
101 // #define DOMNODE_DOCUMENT_FRAGMENT 11
102 // #define DOMNODE_NOTATION 12
103
104 // the following are all CMELEMENTPARTICLE nodes
105 ELEMENTPARTICLE_EMPTY,
106 ELEMENTPARTICLE_ANY,
107 ELEMENTPARTICLE_MIXED,
108 ELEMENTPARTICLE_CHOICE,
109 ELEMENTPARTICLE_SEQ,
110 ELEMENTPARTICLE_NAME,
111
112 ATTRIBUTE_DECLARATION_BASE, // node is a CMATTRIBUTEDECLBASE
113 ATTRIBUTE_DECLARATION, // node is a CMATTRIBUTEDECL
114 ATTRIBUTE_DECLARATION_ENUM // node is a plain NODEBASE, part of an attr value enum
115 } NODEBASETYPE;
116
117 /*
118 *@@ NODEBASE:
119 * "content model" node. With the DOM content models,
120 * this represents an entry in a DTD or XML schema.
121 *
122 *@@added V0.9.9 (2001-02-14) [umoeller]
123 */
124
125 typedef struct _NODEBASE
126 {
127 TREE Tree;
128
129 NODEBASETYPE ulNodeType;
130
131 XSTRING strNodeName;
132 // node name;
133 // -- for the various DOMNODE_* items, see _DOMNODE;
134 // -- for CMELEMENTPARTICLE nodes, this is the particle's name
135 // -- for CMELEMENTDECLNODE nodes, element name being declared
136 // -- for CMATTRIBUTEDECLBASE nodes, name of element to which this
137 // attrib decl belongs
138 // -- for CMATTRIBUTEDECL nodes, name of attribute;
139 // -- for ATTRIBUTE_DECLARATION_ENUM, attribute value in the
140 // possible values list.
141
142 } NODEBASE, *PNODEBASE;
143
144 /* ******************************************************************
145 *
146 * DOM level 1
147 *
148 ********************************************************************/
149
150 /*
151 *@@ DOMNODE:
152 * this represents one @DOM node in an @XML document.
153 *
154 * The document itself is represented by a node with the
155 * DOMNODE_DOCUMENT type, which is the root of a tree as
156 * shown with xmlParse.
157 *
158 * The contents of the members vary according
159 * to ulNodeType (0 specifies that the field does not
160 * apply to that type).
161 *
162 * The first member of a DOMNODE is a NODEBASE to allow
163 * inserting these things in a tree. NODEBASE.ulNodeType
164 * _always_ specifies the various types that are using
165 * that structure to allow for type-safety (if we watch out).
166 * This is for faking inheritance.
167 *
168 * Note that we also implement specialized sub-structures of
169 * DOMNODE, whose first member is the DOMNODE (and therefore
170 * a NODEBASE as well):
171 *
172 * -- DOCUMENT nodes are given a _DOMDOCUMENTNODE structure.
173 *
174 * -- DOCTYPE nodes are given a _DOMDOCTYPENODE structure.
175 *
176 * Overview of member fields usage:
177 +
178 + ulNodeType | strNodeName | strNodeValue | llChildren | llAttributes
179 + =======================================================================
180 + | | | |
181 + DOCUMENT | name from | 0 | 1 root | 0
182 + | DOCTYPE or | | ELEMENT |
183 + | NULL | | |
184 + | | | |
185 + --------------+-------------+--------------+------------+--------------
186 + | | | |
187 + ELEMENT | tag name | 0 8 | ELEMENT | ATTRIBUTE
188 + | | | nodes | nodes
189 + | | | |
190 + --------------+-------------+--------------+------------+--------------
191 + | | | |
192 + ATTRIBUTE | attribute | attribute | 0 | 0
193 + | name | value | |
194 + | | | |
195 + --------------+-------------+--------------+------------+--------------
196 + | | | |
197 + TEXT | 0 | text | 0 | 0
198 + | | contents | |
199 + | | | |
200 + --------------+-------------+--------------+------------+--------------
201 + | | | |
202 + COMMENT | 0 | comment | 0 | 0
203 + | | contents | |
204 + | | | |
205 + --------------+-------------+--------------+------------+--------------
206 + | | | |
207 + PI | PI target | PI data | 0 | 0
208 + | | | |
209 + | | | |
210 + --------------+-------------+--------------+------------+--------------
211 + | | | |
212 + DOCTYPE | doctype | | 0 | 0
213 + | name | | |
214 + | | | |
215 +
216 * The xwphelpers implementation does not implement CDATA sections,
217 * for which we have no need because @expat properly converts these
218 * into plain @content.
219 *
220 * In addition, W3C DOM specifies that the "node name" members contain
221 * "#document", "#text", and "#comment" strings for DOCUMENT,
222 * TEXT, and COMMENT nodes, respectively. I see no point in this other
223 * than consuming memory, so these fields are empty with this implementation.
224 */
225
226 typedef struct _DOMNODE
227 {
228 NODEBASE NodeBase;
229
230 PXSTRING pstrNodeValue; // ptr is NULL if none
231
232 struct _DOMNODE *pParentNode;
233 // the parent node;
234 // NULL for DOCUMENT, DOCUMENT_FRAGMENT.
235 // The DOM spec says that attribs have no parent,
236 // but even though the attribute is not added to
237 // the "children" list of an element (but to the
238 // attributes map instead), we specify the element
239 // as the attribute's parent here.
240
241 struct _DOMNODE *pDocumentNode;
242 // the document node, unless this is a DOCUMENT in itself.
243
244 LINKLIST llChildren; // of DOMNODE* pointers, no auto-free
245
246 TREE *AttributesMap; // of DOMNODE* pointers
247
248 } DOMNODE, *PDOMNODE;
249
250 /*
251 *@@ DOMDOCTYPENODE:
252 * specific _DOMNODE replacement structure which
253 * is used for DOCTYPE nodes.
254 *
255 * The DOMDOCTYPENODE is special (other than having
256 * extra fields) in that it is stored both in
257 * the document node's children list and in its
258 * pDocType field.
259 *
260 * DOMNODE.pstrNodeName is set to the name in the
261 * DOCTYPE statement by xmlCreateDocumentTypeNode,
262 * or is NULL if there's no DOCTYPE.
263 *
264 *@@added V0.9.9 (2001-02-14) [umoeller]
265 */
266
267 typedef struct _DOMDOCTYPENODE
268 {
269 DOMNODE DomNode;
270
271 XSTRING strPublicID;
272 XSTRING strSystemID;
273
274 BOOL fHasInternalSubset;
275
276 TREE *ElementDeclsTree;
277 // tree with pointers to _CMELEMENTDECLNODE nodes
278
279 TREE *AttribDeclBasesTree;
280 // tree with pointers to _CMATTRIBUTEDECLBASE nodes
281
282 } DOMDOCTYPENODE, *PDOMDOCTYPENODE;
283
284 /*
285 *@@ DOMDOCUMENTNODE:
286 * specific _DOMNODE replacement structure which
287 * is used for DOCUMENT nodes.
288 *
289 *@@added V0.9.9 (2001-02-14) [umoeller]
290 */
291
292 typedef struct _DOMDOCUMENTNODE
293 {
294 DOMNODE DomNode;
295
296 PDOMDOCTYPENODE pDocType;
297 // != NULL if DOCTYPE was found
298
299 } DOMDOCUMENTNODE, *PDOMDOCUMENTNODE;
300
301 APIRET xmlCreateDomNode(PDOMNODE pParentNode,
302 NODEBASETYPE ulNodeType,
303 const char *pcszNodeName,
304 ULONG ulNodeNameLength,
305 PDOMNODE *ppNew);
306
307 VOID xmlDeleteNode(PNODEBASE pNode);
308
309 /* ******************************************************************
310 *
311 * DOM level 3 content models
312 *
313 ********************************************************************/
314
315 // data types (XML schemes):
316 #define STRING_DATATYPE 1
317 #define BOOLEAN_DATATYPE 2
318 #define FLOAT_DATATYPE 3
319 #define DOUBLE_DATATYPE 4
320 #define LONG_DATATYPE 5
321 #define INT_DATATYPE 6
322 #define SHORT_DATATYPE 7
323 #define BYTE_DATATYPE 8
324
325 /*
326 *@@ CMELEMENTPARTICLE:
327 * element declaration particle in a
328 * _CMELEMENTDECLNODE.
329 *
330 * One of these structures is a full
331 * (non-pointer) member in _CMELEMENTDECLNODE.
332 * This struct in turn has a linked list with
333 * possible subnodes. See _CMELEMENTDECLNODE.
334 *
335 *@@added V0.9.9 (2001-02-16) [umoeller]
336 */
337
338 typedef struct _CMELEMENTPARTICLE
339 {
340 NODEBASE NodeBase; // has TREE* as first item in turn
341 // NODEBASE.ulNodeType may be one of these:
342 // -- ELEMENTPARTICLE_EMPTY:
343 // ulRepeater will be XML_CQUANT_NONE, rest is NULL
344 // -- ELEMENTPARTICLE_ANY:
345 // ulRepeater will be XML_CQUANT_NONE, rest is NULL
346 // -- ELEMENTPARTICLE_MIXED:
347 // mixed content (with PCDATA); if the list contains
348 // something, the element may have PCDATA and sub-elements
349 // mixed
350 // -- ELEMENTPARTICLE_CHOICE:
351 // list is a choicelist
352 // -- ELEMENTPARTICLE_SEQ:
353 // list is a seqlist
354 // -- ELEMENTPARTICLE_NAME:
355 // used for terminal particles in a parent particle's
356 // list, which finally specifies the name of a sub-particle.
357 // This can never appear in a root particle.
358
359 ULONG ulRepeater;
360 // one of:
361 // -- XML_CQUANT_NONE --> all fields below are NULL
362 // -- XML_CQUANT_OPT,
363 // -- XML_CQUANT_REP,
364 // -- XML_CQUANT_PLUS
365
366 struct _CMELEMENTPARTICLE *pParentParticle; // or NULL if this is in the
367 // CMELEMENTDECLNODE
368
369 PLINKLIST pllSubNodes;
370 // linked list of sub-CMELEMENTPARTICLE structs
371 // (for mixed, choice, seq types);
372 // if NULL, there's no sub-CMELEMENTPARTICLE
373
374 } CMELEMENTPARTICLE, *PCMELEMENTPARTICLE;
375
376 /*
377 *@@ CMELEMENTDECLNODE:
378 * representation of an @element_declaration within a
379 * _DOMDOCTYPENODE (a document @DTD).
380 *
381 * This is complicated because element declarations
382 * are complicated with nested lists and content
383 * particles. For this, we introduce the representation
384 * of a _CMELEMENTPARTICLE, which is contained in the
385 * "Particle" member.
386 *
387 * For minimal memory consumption, the _CMELEMENTDECLNODE
388 * is an _CMELEMENTPARTICLE with extra fields, while the
389 * list in _CMELEMENTPARTICLE points to plain
390 * _CMELEMENTPARTICLE structs only.
391 *
392 * For the "root" element declaration in the DTD,
393 * Particle.NODEBASE.ulNodeType will always be one of the following:
394 *
395 * -- ELEMENTPARTICLE_EMPTY: element must be empty.
396 *
397 * -- ELEMENTPARTICLE_ANY: element can have any content.
398 *
399 * -- ELEMENTPARTICLE_CHOICE: _CMELEMENTPARTICLE has a choicelist with
400 * more _CMELEMENTPARTICLE structs.
401 *
402 * -- ELEMENTPARTICLE_SEQ: _CMELEMENTPARTICLE has a seqlist with
403 * more _CMELEMENTPARTICLE structs.
404 *
405 * -- ELEMENTPARTICLE_MIXED: element can have mixed content including #PCDATA.
406 * If there is no content particle list, then the element may
407 * ONLY have PCDATA. If there's a content particle list, then the
408 * element may have both sub-elements and PCDATA. Oh my.
409 *
410 *@@added V0.9.9 (2001-02-14) [umoeller]
411 */
412
413 typedef struct _CMELEMENTDECLNODE
414 {
415 CMELEMENTPARTICLE Particle;
416 // root particle for this element decl; this may contain
417 // sub-particles...
418 // this has a NODEBASE as first member, which has TREE* as
419 // first item in turn
420
421 TREE *ParticleNamesTree;
422 // tree sorted by element names with all sub-particles,
423 // no matter how deeply nested; this is just for quickly
424 // checking if an element name is allowed as a sub-element
425 // at all. Tree items are _CMELEMENTPARTICLE nodes.
426
427 } CMELEMENTDECLNODE, *PCMELEMENTDECLNODE;
428
429 typedef enum _ATTRIBCONSTRAINT
430 {
431 CMAT_IMPLIED,
432 CMAT_REQUIRED,
433 CMAT_DEFAULT_VALUE,
434 CMAT_FIXED_VALUE
435 } ATTRIBCONSTRAINT;
436
437 typedef enum _ATTRIBTYPE
438 {
439 CMAT_CDATA,
440 CMAT_ID,
441 CMAT_IDREF,
442 CMAT_IDREFS,
443 CMAT_ENTITY,
444 CMAT_ENTITIES,
445 CMAT_NMTOKEN,
446 CMAT_NMTOKENS,
447 CMAT_ENUM
448 } ATTRIBTYPE;
449
450 /*
451 *@@ CMATTRIBUTEDECL:
452 * single attribute declaration within the attribute
453 * declarations tree in _CMATTRIBUTEDECLBASE.
454 *
455 *@@added V0.9.9 (2001-02-16) [umoeller]
456 */
457
458 typedef struct _CMATTRIBUTEDECL
459 {
460 NODEBASE NodeBase; // has TREE* as first item in turn
461 // NodeBase.strName is attribute name
462
463 ATTRIBTYPE ulAttrType;
464 // one of:
465 // -- CMAT_CDATA
466 // -- CMAT_ID
467 // -- CMAT_IDREF
468 // -- CMAT_IDREFS
469 // -- CMAT_ENTITY
470 // -- CMAT_ENTITIES
471 // -- CMAT_NMTOKEN
472 // -- CMAT_NMTOKENS
473 // -- CMAT_ENUM: pllEnum lists the allowed values.
474 TREE *ValuesTree;
475 // enumeration of allowed values, if CMAT_ENUM;
476 // tree entries are plain NODEBASEs with
477 // ATTRIBUTE_DECLARATION_ENUM type
478
479 ATTRIBCONSTRAINT ulConstraint;
480 // one of:
481 // -- CMAT_IMPLIED: attrib can have any value.
482 // -- CMAT_REQUIRED: attrib must be specified.
483 // -- CMAT_DEFAULT_VALUE: attrib is optional and has default
484 // value as in pstrDefaultValue.
485 // -- CMAT_FIXED_VALUE: attrib is optional, but must have
486 // fixed value as in pstrDefaultValue.
487 PXSTRING pstrDefaultValue;
488 // default value of this attribute; NULL with implied or required
489
490 } CMATTRIBUTEDECL, *PCMATTRIBUTEDECL;
491
492 /*
493 *@@ CMATTRIBUTEDECLBASE:
494 * representation of an @attribute_declaration.
495 *
496 * I'd love to have stored the attribute declarations with
497 * the element specifications, but the XML spec says that
498 * attribute declarations are allowed even if no element
499 * declaration exists for the element to which the attribute
500 * belongs. Now, whatever this is good for... anyway, this
501 * forces us to do a second tree in the _DOMDOCTYPENODE node
502 * according to attribute's element names.
503 *
504 *@@added V0.9.9 (2001-02-14) [umoeller]
505 */
506
507 typedef struct _CMATTRIBUTEDECLBASE
508 {
509 NODEBASE NodeBase; // has TREE* as first item in turn
510 // NodeBase.strName is element name
511
512 TREE *AttribDeclsTree;
513 // root of tree with CMATTRIBUTEDECL;
514
515 } CMATTRIBUTEDECLBASE, *PCMATTRIBUTEDECLBASE;
516
517 /*
518 *@@ CMENTITYDECLNODE:
519 *
520 * See @entity_declaration.
521 *
522 *@@added V0.9.9 (2001-02-14) [umoeller]
523 */
524
525 typedef struct _CMENTITYDECLNODE
526 {
527 NODEBASE NodeBase;
528 } CMENTITYDECLNODE, *PCMENTITYDECLNODE;
529
530 /*
531 *@@ CMNOTATIONDECLNODE:
532 *
533 * See @notation_declaration.
534 *
535 *@@added V0.9.9 (2001-02-14) [umoeller]
536 */
537
538 typedef struct _CMNOTATIONDECLNODE
539 {
540 NODEBASE NodeBase;
541 } CMNOTATIONDECLNODE, *PCMNOTATIONDECLNODE;
542
543 /* ******************************************************************
544 *
545 * DOM APIs
546 *
547 ********************************************************************/
548
549 /*
550 *@@ XMLDOM:
551 * DOM instance returned by xmlCreateDOM.
552 *
553 *@@added V0.9.9 (2001-02-14) [umoeller]
554 */
555
556 typedef struct _XMLDOM
557 {
558 /*
559 * Public fields (should be read only)
560 */
561
562 PDOMDOCUMENTNODE pDocumentNode;
563
564 PDOMDOCTYPENODE pDocTypeNode;
565 // != NULL only if the document has a DOCTYPE
566
567 APIRET arcDOM; // validation errors etc.
568 BOOL fInvalid; // TRUE after validation failed
569
570 const char *pcszErrorDescription;
571 ULONG ulErrorLine;
572 ULONG ulErrorColumn;
573 PXSTRING pxstrFailingNode; // element or attribute name
574
575 /*
576 * Private fields (for xml* functions)
577 */
578
579 XML_Parser pParser;
580 // expat parser instance
581
582 LINKLIST llElementStack;
583 // stack for maintaining the current items;
584 // these point to DOMSTACKITEMs (auto-free)
585
586 PDOMNODE pLastWasTextNode;
587
588 PCMATTRIBUTEDECLBASE pAttListDeclCache;
589 // cache for attribute declarations according
590 // to attdecl element name
591 } XMLDOM, *PXMLDOM;
592
593 #define DF_PARSECOMMENTS 0x0001
594 #define DF_PARSEDTD 0x0002
595
596 APIRET xmlCreateDOM(ULONG flParserFlags,
597 PXMLDOM *ppDom);
598
599 APIRET xmlParse(PXMLDOM pDom,
600 const char *pcszBuf,
601 ULONG cb,
602 BOOL fIsLast);
603
604 APIRET xmlFreeDOM(PXMLDOM pDom);
605
606 PCMELEMENTDECLNODE xmlFindElementDecl(PXMLDOM pDom,
607 const XSTRING *pstrElementName);
608
609 PCMATTRIBUTEDECLBASE xmlFindAttribDeclBase(PXMLDOM pDom,
610 const XSTRING *pstrElementName);
611
612 PCMATTRIBUTEDECL xmlFindAttribDecl(PXMLDOM pDom,
613 const XSTRING *pstrElementName,
614 const XSTRING *pstrAttribName,
615 PCMATTRIBUTEDECLBASE *ppAttribDeclBase);
616
617#endif
618
619#if __cplusplus
620}
621#endif
622
Note: See TracBrowser for help on using the repository browser.