source: trunk/include/helpers/xml.h@ 38

Last change on this file since 38 was 38, checked in by umoeller, 25 years ago

Updates to XML.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 23.1 KB
Line 
1
2/*
3 *@@sourcefile xml.h:
4 * header file for xml.c (XML parsing).
5 *
6 * See remarks there.
7 *
8 *@@added V0.9.6 (2000-10-29) [umoeller]
9 *@@include #include <os2.h>
10 *@@include #include "expat\expat.h" // must come before xml.h
11 *@@include #include "helpers\linklist.h"
12 *@@include #include "helpers\tree.h"
13 *@@include #include "helpers\xstring.h"
14 *@@include #include "helpers\xml.h"
15 */
16
17#if __cplusplus
18extern "C" {
19#endif
20
21#ifndef XML_HEADER_INCLUDED
22 #define XML_HEADER_INCLUDED
23
24 // define some basic things to make this work even with standard C
25 #if (!defined OS2_INCLUDED) && (!defined _OS2_H) && (!defined __SIMPLES_DEFINED) // changed V0.9.0 (99-10-22) [umoeller]
26 typedef unsigned long BOOL;
27 typedef unsigned long ULONG;
28 typedef unsigned char *PSZ;
29 #define TRUE (BOOL)1
30 #define FALSE (BOOL)0
31
32 #ifdef __IBMCPP__ // added V0.9.0 (99-10-22) [umoeller]
33 #define APIENTRY _System
34 #endif
35
36 #define __SIMPLES_DEFINED
37 #endif
38
39 typedef enum _DOMERROR
40 {
41 // validity errors:
42// START MATCHING ERROR MESSAGES (xmlDescribeError)
43 ERROR_DOM_UNDECLARED_ELEMENT = ERROR_EXPAT_AFTER_LAST,
44 // invalidity: element is undeclared
45 ERROR_DOM_ROOT_ELEMENT_MISNAMED,
46 ERROR_DOM_INVALID_ROOT_ELEMENT,
47 ERROR_DOM_INVALID_SUBELEMENT,
48 // subelement may not appear in its parent element
49 ERROR_DOM_DUPLICATE_ELEMENT_DECL,
50 // more than one declaration for an element type
51 ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL,
52 // more than one declaration for an attribute type
53 ERROR_DOM_UNDECLARED_ATTRIBUTE,
54// END MATCHING ERROR MESSAGES (xmlDescribeError)
55
56 // error categories:
57 ERROR_DOM_PARSING,
58 ERROR_DOM_VALIDITY,
59
60 // additional DOM errors
61 ERROR_DOM_NODETYPE_NOT_SUPPORTED,
62 // invalid node type in xmlCreateNode
63 ERROR_DOM_NO_DOCUMENT,
64 // cannot find document node
65 ERROR_DOM_NO_ELEMENT,
66 ERROR_DOM_DUPLICATE_DOCTYPE,
67 ERROR_DOM_DOCTYPE_STRUCTURE,
68 // DOCTYPE is given and root element name does not match doctype name
69 ERROR_DOM_INTEGRITY,
70 ERROR_DOM_DUPLICATE_ATTRIBUTE
71 } DOMERROR;
72
73 /* ******************************************************************
74 *
75 * Most basic node management
76 *
77 ********************************************************************/
78
79 // content model node types:
80 typedef enum _NODEBASETYPE
81 {
82 DOMNODE_ELEMENT, // node is a DOM ELEMENT
83 DOMNODE_ATTRIBUTE, // node is a DOM ATTRIBUTE
84 DOMNODE_TEXT, // node is a DOM TEXT node
85 // DOMNODE_CDATA_SECTION 4
86 // DOMNODE_ENTITY_REFERENCE 5
87 // DOMNODE_ENTITY 6
88 DOMNODE_PROCESSING_INSTRUCTION, // node is a DOM PI
89 DOMNODE_COMMENT, // node is a DOM COMMENT
90 DOMNODE_DOCUMENT, // node is a DOM document
91 DOMNODE_DOCUMENT_TYPE, // node is a DOM DOCUMENTTYPE
92 // #define DOMNODE_DOCUMENT_FRAGMENT 11
93 // #define DOMNODE_NOTATION 12
94
95 // the following are all CMELEMENTPARTICLE nodes
96 ELEMENTPARTICLE_EMPTY,
97 ELEMENTPARTICLE_ANY,
98 ELEMENTPARTICLE_MIXED,
99 ELEMENTPARTICLE_CHOICE,
100 ELEMENTPARTICLE_SEQ,
101 ELEMENTPARTICLE_NAME,
102
103 ATTRIBUTE_DECLARATION_BASE, // node is a CMATTRIBUTEDECLBASE
104 ATTRIBUTE_DECLARATION, // node is a CMATTRIBUTEDECL
105 ATTRIBUTE_DECLARATION_ENUM // node is a plain NODEBASE, part of an attr value enum
106 } NODEBASETYPE;
107
108 /*
109 *@@ NODEBASE:
110 * "content model" node. With the DOM content models,
111 * this represents an entry in a DTD or XML schema.
112 *
113 *@@added V0.9.9 (2001-02-14) [umoeller]
114 */
115
116 typedef struct _NODEBASE
117 {
118 TREE Tree;
119
120 NODEBASETYPE ulNodeType;
121
122 XSTRING strNodeName;
123 // node name;
124 // -- for the various DOMNODE_* items, see _DOMNODE;
125 // -- for CMELEMENTPARTICLE nodes, this is the particle's name
126 // -- for CMELEMENTDECLNODE nodes, element name being declared
127 // -- for CMATTRIBUTEDECLBASE nodes, name of element to which this
128 // attrib decl belongs
129 // -- for CMATTRIBUTEDECL nodes, name of attribute;
130 // -- for ATTRIBUTE_DECLARATION_ENUM, attribute value in the
131 // possible values list.
132
133 } NODEBASE, *PNODEBASE;
134
135 /* ******************************************************************
136 *
137 * DOM level 1
138 *
139 ********************************************************************/
140
141 /*
142 *@@ DOMNODE:
143 * this represents one @DOM node in an @XML document.
144 *
145 * The document itself is represented by a node with the
146 * DOMNODE_DOCUMENT type, which is the root of a tree as
147 * shown with xmlParse.
148 *
149 * The contents of the members vary according
150 * to ulNodeType (0 specifies that the field does not
151 * apply to that type).
152 *
153 * The first member of a DOMNODE is a NODEBASE to allow
154 * inserting these things in a tree. NODEBASE.ulNodeType
155 * _always_ specifies the various types that are using
156 * that structure to allow for type-safety (if we watch out).
157 * This is for faking inheritance.
158 *
159 * Note that we also implement specialized sub-structures of
160 * DOMNODE, whose first member is the DOMNODE (and therefore
161 * a NODEBASE as well):
162 *
163 * -- DOCUMENT nodes are given a _DOMDOCUMENTNODE structure.
164 *
165 * -- DOCTYPE nodes are given a _DOMDOCTYPENODE structure.
166 *
167 * Overview of member fields usage:
168 +
169 + ulNodeType | strNodeName | strNodeValue | llChildren | llAttributes
170 + =======================================================================
171 + | | | |
172 + DOCUMENT | name from | 0 | 1 root | 0
173 + | DOCTYPE or | | ELEMENT |
174 + | NULL | | |
175 + | | | |
176 + --------------+-------------+--------------+------------+--------------
177 + | | | |
178 + ELEMENT | tag name | 0 8 | ELEMENT | ATTRIBUTE
179 + | | | nodes | nodes
180 + | | | |
181 + --------------+-------------+--------------+------------+--------------
182 + | | | |
183 + ATTRIBUTE | attribute | attribute | 0 | 0
184 + | name | value | |
185 + | | | |
186 + --------------+-------------+--------------+------------+--------------
187 + | | | |
188 + TEXT | 0 | text | 0 | 0
189 + | | contents | |
190 + | | | |
191 + --------------+-------------+--------------+------------+--------------
192 + | | | |
193 + COMMENT | 0 | comment | 0 | 0
194 + | | contents | |
195 + | | | |
196 + --------------+-------------+--------------+------------+--------------
197 + | | | |
198 + PI | PI target | PI data | 0 | 0
199 + | | | |
200 + | | | |
201 + --------------+-------------+--------------+------------+--------------
202 + | | | |
203 + DOCTYPE | doctype | | 0 | 0
204 + | name | | |
205 + | | | |
206 +
207 * The xwphelpers implementation does not implement CDATA sections,
208 * for which we have no need because @expat properly converts these
209 * into plain @content.
210 *
211 * In addition, W3C DOM specifies that the "node name" members contain
212 * "#document", "#text", and "#comment" strings for DOCUMENT,
213 * TEXT, and COMMENT nodes, respectively. I see no point in this other
214 * than consuming memory, so these fields are empty with this implementation.
215 */
216
217 typedef struct _DOMNODE
218 {
219 NODEBASE NodeBase;
220
221 // PXSTRING pstrNodeName; // ptr is NULL if none
222 PXSTRING pstrNodeValue; // ptr is NULL if none
223
224 struct _DOMNODE *pParentNode;
225 // the parent node;
226 // NULL for DOCUMENT, DOCUMENT_FRAGMENT.
227 // The DOM spec says that attribs have no parent,
228 // but even though the attribute is not added to
229 // the "children" list of an element (but to the
230 // attributes map instead), we specify the element
231 // as the attribute's parent here.
232
233 struct _DOMNODE *pDocumentNode;
234 // the document node, unless this is a DOCUMENT in itself.
235
236 LINKLIST llChildren; // of DOMNODE* pointers, no auto-free
237
238 TREE *AttributesMap; // of DOMNODE* pointers
239
240 } DOMNODE, *PDOMNODE;
241
242 /*
243 *@@ DOMDOCTYPENODE:
244 * specific _DOMNODE replacement structure which
245 * is used for DOCTYPE nodes.
246 *
247 * The DOMDOCTYPENODE is special (other than having
248 * extra fields) in that it is stored both in
249 * the document node's children list and in its
250 * pDocType field.
251 *
252 * DOMNODE.pstrNodeName is set to the name in the
253 * DOCTYPE statement by xmlCreateDocumentTypeNode,
254 * or is NULL if there's no DOCTYPE.
255 *
256 *@@added V0.9.9 (2001-02-14) [umoeller]
257 */
258
259 typedef struct _DOMDOCTYPENODE
260 {
261 DOMNODE DomNode;
262
263 XSTRING strPublicID;
264 XSTRING strSystemID;
265
266 BOOL fHasInternalSubset;
267
268 TREE *ElementDeclsTree;
269 // tree with pointers to _CMELEMENTDECLNODE nodes
270
271 TREE *AttribDeclBasesTree;
272 // tree with pointers to _CMATTRIBUTEDEDECLBASE nodes
273
274 } DOMDOCTYPENODE, *PDOMDOCTYPENODE;
275
276 /*
277 *@@ DOMDOCUMENTNODE:
278 * specific _DOMNODE replacement structure which
279 * is used for DOCUMENT nodes.
280 *
281 *@@added V0.9.9 (2001-02-14) [umoeller]
282 */
283
284 typedef struct _DOMDOCUMENTNODE
285 {
286 DOMNODE DomNode;
287
288 PDOMDOCTYPENODE pDocType;
289 // != NULL if DOCTYPE was found
290
291 } DOMDOCUMENTNODE, *PDOMDOCUMENTNODE;
292
293 APIRET xmlCreateNode(PDOMNODE pParentNode,
294 ULONG ulNodeType,
295 PDOMNODE *ppNew);
296
297 ULONG xmlDeleteNode(PDOMNODE pNode);
298
299 /* ******************************************************************
300 *
301 * DOM level 3 content models
302 *
303 ********************************************************************/
304
305 // data types (XML schemes):
306 #define STRING_DATATYPE 1
307 #define BOOLEAN_DATATYPE 2
308 #define FLOAT_DATATYPE 3
309 #define DOUBLE_DATATYPE 4
310 #define LONG_DATATYPE 5
311 #define INT_DATATYPE 6
312 #define SHORT_DATATYPE 7
313 #define BYTE_DATATYPE 8
314
315 /*
316 *@@ CMELEMENTPARTICLE:
317 * element declaration particle in a
318 * _CMELEMENTDECLNODE.
319 *
320 * One of these structures is a full
321 * (non-pointer) member in _CMELEMENTDECLNODE.
322 * This struct in turn has a linked list with
323 * possible subnodes. See _CMELEMENTDECLNODE.
324 *
325 *@@added V0.9.9 (2001-02-16) [umoeller]
326 */
327
328 typedef struct _CMELEMENTPARTICLE
329 {
330 NODEBASE CMNode; // has TREE* as first item in turn
331 // NODEBASE.ulCMNodeType may be one of these:
332 // -- ELEMENTPARTICLE_EMPTY:
333 // ulRepeater will be XML_CQUANT_NONE, rest is NULL
334 // -- ELEMENTPARTICLE_ANY:
335 // ulRepeater will be XML_CQUANT_NONE, rest is NULL
336 // -- ELEMENTPARTICLE_MIXED:
337 // mixed content (with PCDATA); if the list contains
338 // something, the element may have PCDATA and sub-elements
339 // mixed
340 // -- ELEMENTPARTICLE_CHOICE:
341 // list is a choicelist
342 // -- ELEMENTPARTICLE_SEQ:
343 // list is a seqlist
344 // -- ELEMENTPARTICLE_NAME:
345 // used for terminal particles in a parent particle's
346 // list, which finally specifies the name of a sub-particle.
347 // This can never appear in a root particle.
348
349 ULONG ulRepeater;
350 // one of:
351 // -- XML_CQUANT_NONE --> all fields below are NULL
352 // -- XML_CQUANT_OPT,
353 // -- XML_CQUANT_REP,
354 // -- XML_CQUANT_PLUS
355
356 PLINKLIST pllSubNodes;
357 // linked list of sub-CMELEMENTPARTICLE structs
358 // (for mixed, choice, seq types);
359 // if NULL, there's no sub-CMELEMENTPARTICLE
360
361 } CMELEMENTPARTICLE, *PCMELEMENTPARTICLE;
362
363 /*
364 *@@ CMELEMENTDECLNODE:
365 * representation of an @element_declaration within a
366 * _DOMDOCTYPENODE (a document @DTD).
367 *
368 * This is complicated because element declarations
369 * are complicated with nested lists and content
370 * particles. For this, we introduce the representation
371 * of a _CMELEMENTPARTICLE, which is contained in the
372 * "Particle" member.
373 *
374 * For minimal memory consumption, the _CMELEMENTDECLNODE
375 * is an _CMELEMENTPARTICLE with extra fields, while the
376 * list in _CMELEMENTPARTICLE points to plain
377 * _CMELEMENTPARTICLE structs only.
378 *
379 * For the "root" element declaration in the DTD,
380 * Particle.NODEBASE.ulCMNodeType will always be one of the following:
381 *
382 * -- ELEMENTPARTICLE_EMPTY: element must be empty.
383 *
384 * -- ELEMENTPARTICLE_ANY: element can have any content.
385 *
386 * -- ELEMENTPARTICLE_CHOICE: _CMELEMENTPARTICLE has a choicelist with
387 * more _CMELEMENTPARTICLE structs.
388 *
389 * -- ELEMENTPARTICLE_SEQ: _CMELEMENTPARTICLE has a seqlist with
390 * more _CMELEMENTPARTICLE structs.
391 *
392 * -- ELEMENTPARTICLE_MIXED: element can have mixed content including #PCDATA.
393 * If there is no content particle list, then the element may
394 * ONLY have PCDATA. If there's a content particle list, then the
395 * element may have both sub-elements and PCDATA. Oh my.
396 *
397 *@@added V0.9.9 (2001-02-14) [umoeller]
398 */
399
400 typedef struct _CMELEMENTDECLNODE
401 {
402 CMELEMENTPARTICLE Particle;
403 // root particle for this element decl; this may contain
404 // sub-particles...
405 // this has a NODEBASE as first member, which has TREE* as
406 // first item in turn
407
408 TREE *ParticleNamesTree;
409 // tree sorted by element names with all sub-particles,
410 // no matter how deeply nested; this is just for quickly
411 // checking if an element name is allowed as a sub-element
412 // at all. Tree items are _CMELEMENTPARTICLE nodes.
413
414 } CMELEMENTDECLNODE, *PCMELEMENTDECLNODE;
415
416 typedef enum _ATTRIBCONSTRAINT
417 {
418 CMAT_IMPLIED,
419 CMAT_REQUIRED,
420 CMAT_DEFAULT_VALUE,
421 CMAT_FIXED_VALUE
422 } ATTRIBCONSTRAINT;
423
424 typedef enum _ATTRIBTYPE
425 {
426 CMAT_CDATA,
427 CMAT_ID,
428 CMAT_IDREF,
429 CMAT_IDREFS,
430 CMAT_ENTITY,
431 CMAT_ENTITIES,
432 CMAT_NMTOKEN,
433 CMAT_NMTOKENS,
434 CMAT_ENUM
435 } ATTRIBTYPE;
436
437 /*
438 *@@ CMATTRIBUTEDECL:
439 * single attribute declaration within the attribute
440 * declarations tree in _CMATTRIBUTEDEDECLBASE.
441 *
442 *@@added V0.9.9 (2001-02-16) [umoeller]
443 */
444
445 typedef struct _CMATTRIBUTEDECL
446 {
447 NODEBASE CMNode; // has TREE* as first item in turn
448 // CMNode.strName is attribute name
449
450 // XSTRING strType;
451
452 ATTRIBTYPE ulAttrType;
453 // one of:
454 // -- CMAT_CDATA
455 // -- CMAT_ID
456 // -- CMAT_IDREF
457 // -- CMAT_IDREFS
458 // -- CMAT_ENTITY
459 // -- CMAT_ENTITIES
460 // -- CMAT_NMTOKEN
461 // -- CMAT_NMTOKENS
462 // -- CMAT_ENUM: pllEnum lists the allowed values.
463 TREE *ValuesTree;
464 // enumeration of allowed values, if CMAT_ENUM;
465 // tree entries are plain NODEBASEs
466
467 ATTRIBCONSTRAINT ulConstraint;
468 // one of:
469 // -- CMAT_IMPLIED: attrib can have any value.
470 // -- CMAT_REQUIRED: attrib must be specified.
471 // -- CMAT_DEFAULT_VALUE: attrib is optional and has default
472 // value as in pstrDefaultValue.
473 // -- CMAT_FIXED_VALUE: attrib is optional, but must have
474 // fixed value as in pstrDefaultValue.
475 PXSTRING pstrDefaultValue;
476 // default value of this attribute; NULL with implied or required
477
478 } CMATTRIBUTEDECL, *PCMATTRIBUTEDECL;
479
480 /*
481 *@@ CMATTRIBUTEDEDECLBASE:
482 * representation of an @attribute_declaration.
483 *
484 * I'd love to have stored the attribute declarations with
485 * the element specifications, but the XML spec says that
486 * attribute declarations are allowed even if no element
487 * declaration exists for the element to which the attribute
488 * belongs. Now, whatever this is good for... anyway, this
489 * forces us to do a second tree in the _DOMDOCTYPENODE node
490 * according to attribute's element names.
491 *
492 *@@added V0.9.9 (2001-02-14) [umoeller]
493 */
494
495 typedef struct _CMATTRIBUTEDEDECLBASE
496 {
497 NODEBASE CMNode; // has TREE* as first item in turn
498 // CMNode.strName is element name
499
500 TREE *AttribDeclsTree;
501 // root of tree with CMATTRIBUTEDECL;
502
503 } CMATTRIBUTEDEDECLBASE, *PCMATTRIBUTEDEDECLBASE;
504
505 /*
506 *@@ CMENTITYDECLNODE:
507 *
508 * See @entity_declaration.
509 *
510 *@@added V0.9.9 (2001-02-14) [umoeller]
511 */
512
513 typedef struct _CMENTITYDECLNODE
514 {
515 NODEBASE CMNode;
516 } CMENTITYDECLNODE, *PCMENTITYDECLNODE;
517
518 /*
519 *@@ CMNOTATIONDECLNODE:
520 *
521 * See @notation_declaration.
522 *
523 *@@added V0.9.9 (2001-02-14) [umoeller]
524 */
525
526 typedef struct _CMNOTATIONDECLNODE
527 {
528 NODEBASE CMNode;
529 } CMNOTATIONDECLNODE, *PCMNOTATIONDECLNODE;
530
531 /* ******************************************************************
532 *
533 * DOM APIs
534 *
535 ********************************************************************/
536
537 /*
538 *@@ XMLDOM:
539 * DOM instance returned by xmlCreateDOM.
540 *
541 *@@added V0.9.9 (2001-02-14) [umoeller]
542 */
543
544 typedef struct _XMLDOM
545 {
546 /*
547 * Public fields (should be read only)
548 */
549
550 PDOMDOCUMENTNODE pDocumentNode;
551
552 PDOMDOCTYPENODE pDocTypeNode;
553 // != NULL only if the document has a DOCTYPE
554
555 APIRET arcDOM; // validation errors etc.
556 BOOL fInvalid; // TRUE after validation failed
557
558 const char *pcszErrorDescription;
559 ULONG ulErrorLine;
560 ULONG ulErrorColumn;
561 PXSTRING pxstrFailingNode; // element or attribute name
562
563 /*
564 * Private fields (for xml* functions)
565 */
566
567 XML_Parser pParser;
568 // expat parser instance
569
570 LINKLIST llStack;
571 // stack for maintaining the current items;
572 // these point to the NODERECORDs (no auto-free)
573
574 PDOMNODE pLastWasTextNode;
575
576 PCMATTRIBUTEDEDECLBASE pAttListDeclCache;
577 // cache for attribute declarations according
578 // to attdecl element name
579 } XMLDOM, *PXMLDOM;
580
581 #define DF_PARSECOMMENTS 0x0001
582 #define DF_PARSEDTD 0x0002
583
584 APIRET xmlCreateDOM(ULONG flParserFlags,
585 PXMLDOM *ppDom);
586
587 APIRET xmlParse(PXMLDOM pDom,
588 const char *pcszBuf,
589 ULONG cb,
590 BOOL fIsLast);
591
592 APIRET xmlFreeDOM(PXMLDOM pDom);
593
594 PCMELEMENTDECLNODE xmlFindElementDecl(PXMLDOM pDom,
595 const XSTRING *pstrElementName);
596
597 PCMATTRIBUTEDEDECLBASE xmlFindAttribDeclBase(PXMLDOM pDom,
598 const XSTRING *pstrElementName);
599
600 PCMATTRIBUTEDECL xmlFindAttribDecl(PXMLDOM pDom,
601 const XSTRING *pstrElementName,
602 const XSTRING *pstrAttribName);
603
604#endif
605
606#if __cplusplus
607}
608#endif
609
Note: See TracBrowser for help on using the repository browser.