source: trunk/src/helpers/xml.c@ 35

Last change on this file since 35 was 35, checked in by umoeller, 25 years ago

Added XML.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1
2/*
3 *@@sourcefile xml.c:
4 * XML document handling.
5 *
6 * XML support in the XWorkplace Helpers is broken into two
7 * layers:
8 *
9 * -- The bottom layer is implemented by expat, which I have
10 * ported to this library. See xmlparse.c for an introduction.
11 *
12 * -- Because expat requires so many callbacks and is non-validating,
13 * I have added a top layer above the expat library
14 * which is vaguely modelled after the Document Object Model
15 * (DOM) standardized by the W3C. That's this file.
16 *
17 * To understand and use this code, you should be familiar with
18 * the following:
19 *
20 * -- XML parsers operate on XML @documents.
21 *
22 * -- Each XML document has both a logical and a physical
23 * structure.
24 *
25 * Physically, the document is composed of units called
26 * @entities.
27 *
28 * Logically, the document is composed of @markup and
29 * @content. Among other things, markup separates the content
30 * into @elements.
31 *
32 * -- The logical and physical structures must nest properly (be
33 * @well-formed) for each entity, which results in the entire
34 * XML document being well-formed as well.
35 *
36 * <B>Document Object Model (DOM)</B>
37 *
38 * In short, DOM specifies that an XML document is broken
39 * up into a tree of nodes, representing the various parts
40 * of an XML document. The W3C calls this "a platform- and
41 * language-neutral interface that allows programs and scripts
42 * to dynamically access and update the content, structure
43 * and style of documents. The Document Object Model provides
44 * a standard set of objects for representing HTML and XML
45 * documents, a standard model of how these objects can
46 * be combined, and a standard interface for accessing and
47 * manipulating them. Vendors can support the DOM as an
48 * interface to their proprietary data structures and APIs,
49 * and content authors can write to the standard DOM
50 * interfaces rather than product-specific APIs, thus
51 * increasing interoperability on the Web."
52 *
53 * Example: Take this HTML table definition:
54 +
55 + <TABLE>
56 + <TBODY>
57 + <TR>
58 + <TD>Column 1-1</TD>
59 + <TD>Column 1-2</TD>
60 + </TR>
61 + <TR>
62 + <TD>Column 2-1</TD>
63 + <TD>Column 2-2</TD>
64 + </TR>
65 + </TBODY>
66 + </TABLE>
67 *
68 * This function will create a tree as follows:
69 +
70 + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
71 + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
72 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
73 + ³
74 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
75 + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
76 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
77 + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
78 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
79 + ³ TR ³ ³ TR ³
80 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
81 + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
82 + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
83 + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
84 + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
85 + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
86 + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
87 + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
88 *
89 * DOM really calls for object oriented programming so the various
90 * structs can inherit from each other. Since this implementation
91 * was supposed to be a C-only interface, we do not implement
92 * inheritance. Instead, each XML document is broken up into a tree
93 * of DOMNODE's only, each of which has a special type.
94 *
95 * It shouldn't be too difficult to write a C++ encapsulation
96 * of this which implements all the methods required by the DOM
97 * standard.
98 *
99 * The main entry point into this is xmlParse or
100 * xmlCreateDocumentFromString. See remarks there for details.
101 *
102 * Limitations:
103 *
104 * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
105 * and COMMENT nodes.
106 *
107 * 2) This doesn't use 16-bit characters, but 8-bit characters.
108 *
109 *@@header "helpers\xml.h"
110 *@@added V0.9.6 (2000-10-29) [umoeller]
111 */
112
113/*
114 * Copyright (C) 2000-2001 Ulrich M”ller.
115 * This file is part of the "XWorkplace helpers" source package.
116 * This is free software; you can redistribute it and/or modify
117 * it under the terms of the GNU General Public License as published
118 * by the Free Software Foundation, in version 2 as it comes in the
119 * "COPYING" file of the XWorkplace main distribution.
120 * This program is distributed in the hope that it will be useful,
121 * but WITHOUT ANY WARRANTY; without even the implied warranty of
122 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
123 * GNU General Public License for more details.
124 */
125
126#define OS2EMX_PLAIN_CHAR
127 // this is needed for "os2emx.h"; if this is defined,
128 // emx will define PSZ as _signed_ char, otherwise
129 // as unsigned char
130
131#define INCL_DOSERRORS
132#include <os2.h>
133
134#include <stdlib.h>
135#include <string.h>
136
137#include "setup.h" // code generation and debugging options
138
139#include "expat\expat.h"
140
141#include "helpers\linklist.h"
142#include "helpers\stringh.h"
143#include "helpers\xstring.h"
144#include "helpers\xml.h"
145
146#pragma hdrstop
147
148/*
149 *@@category: Helpers\C helpers\XML
150 * see xml.c.
151 */
152
153/*
154 *@@category: Helpers\C helpers\XML\Document Object Model (DOM)
155 * see xml.c.
156 */
157
158/* ******************************************************************
159 *
160 * Node management
161 *
162 ********************************************************************/
163
164/*
165 *@@ xmlCreateNode:
166 * creates a new DOMNODE with the specified
167 * type and parent. Other than that, the
168 * node is zeroed.
169 */
170
171PDOMNODE xmlCreateNode(PDOMNODE pParentNode, // in: parent node or NULL if root
172 ULONG ulNodeType) // in: DOMNODE_* type
173{
174 PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
175 if (pNewNode)
176 {
177 memset(pNewNode, 0, sizeof(DOMNODE));
178 pNewNode->ulNodeType = ulNodeType;
179 pNewNode->pParentNode = pParentNode;
180 if (pParentNode)
181 {
182 // parent specified:
183 // append this new node to the parent's
184 // list of child nodes
185 lstAppendItem(&pParentNode->llChildNodes,
186 pNewNode);
187 }
188
189 lstInit(&pNewNode->llChildNodes, FALSE);
190 lstInit(&pNewNode->llAttributeNodes, FALSE);
191 }
192
193 return (pNewNode);
194}
195
196/*
197 *@@ xmlDeleteNode:
198 * deletes the specified node.
199 *
200 * If the node has child nodes, all of them are deleted
201 * as well. This recurses, if necessary.
202 *
203 * As a result, if the node is a document node, this
204 * deletes an entire document, including all of its
205 * child nodes.
206 *
207 * Returns:
208 *
209 * -- 0: NO_ERROR.
210 */
211
212ULONG xmlDeleteNode(PDOMNODE pNode)
213{
214 ULONG ulrc = 0;
215
216 if (!pNode)
217 {
218 ulrc = ERROR_DOM_NOT_FOUND;
219 }
220 else
221 {
222 // recurse into child nodes
223 PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->llChildNodes);
224 while (pNodeThis)
225 {
226 // recurse!!
227 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
228
229 pNodeThis = pNodeThis->pNext;
230 }
231
232 // delete attribute nodes
233 pNodeThis = lstQueryFirstNode(&pNode->llAttributeNodes);
234 while (pNodeThis)
235 {
236 // recurse!!
237 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
238
239 pNodeThis = pNodeThis->pNext;
240 }
241
242 if (pNode->pParentNode)
243 {
244 // node has a parent:
245 // remove this node from the parent's list
246 // of child nodes before deleting this node
247 lstRemoveItem(&pNode->pParentNode->llChildNodes,
248 pNode);
249 pNode->pParentNode = NULL;
250 }
251
252 xstrClear(&pNode->strNodeName);
253 xstrClear(&pNode->strNodeValue);
254
255 lstClear(&pNode->llChildNodes);
256 lstClear(&pNode->llAttributeNodes);
257
258 free(pNode);
259 }
260
261 return (ulrc);
262}
263
264/* ******************************************************************
265 *
266 * Expat handlers
267 *
268 ********************************************************************/
269
270/*
271 *@@ StartElementHandler:
272 * expat handler called when a new element is
273 * found.
274 *
275 * We create a new record in the container and
276 * push it onto our stack so we can insert
277 * children into it. We first start with the
278 * attributes.
279 */
280
281void EXPATENTRY StartElementHandler(void *data, // in: our PXMLFILE really
282 const char *pcszElement,
283 const char **papcszAttribs)
284{
285 PXMLDOM pDom = (PXMLDOM)data;
286
287 ULONG i;
288
289 PDOMNODE pParent = NULL,
290 pNew = NULL;
291
292 PLISTNODE pParentNode = lstPop(&pDom->llStack);
293
294 if (pParentNode)
295 {
296 // non-root level:
297 pParent = (PDOMNODE)pParentNode->pItemData;
298
299 pNew = xmlCreateNode(pParent,
300 DOMNODE_ELEMENT);
301
302 if (pNew)
303 xstrcpy(&pNew->strNodeName, pcszElement, 0);
304
305 // push this on the stack so we can add child elements
306 lstPush(&pDom->llStack, pNew);
307
308 // now for the attribs
309 for (i = 0;
310 papcszAttribs[i];
311 i += 2)
312 {
313 PDOMNODE pAttrNode = xmlCreateNode(pNew, // element
314 DOMNODE_ATTRIBUTE);
315 if (pAttrNode)
316 {
317 xstrcpy(&pAttrNode->strNodeName, papcszAttribs[i], 0);
318 xstrcpy(&pAttrNode->strNodeValue, papcszAttribs[i + 1], 0);
319 }
320 }
321 }
322
323 pDom->pLastWasTextNode = NULL;
324}
325
326/*
327 *@@ EndElementHandler:
328 *
329 */
330
331void EXPATENTRY EndElementHandler(void *data, // in: our PXMLFILE really
332 const XML_Char *name)
333{
334 PXMLDOM pDom = (PXMLDOM)data;
335 PLISTNODE pNode = lstPop(&pDom->llStack);
336 if (pNode)
337 lstRemoveNode(&pDom->llStack, pNode);
338
339 pDom->pLastWasTextNode = NULL;
340}
341
342/*
343 *@@ CharacterDataHandler:
344 *
345 */
346
347void EXPATENTRY CharacterDataHandler(void *userData, // in: our PXMLFILE really
348 const XML_Char *s,
349 int len)
350{
351 PXMLDOM pDom = (PXMLDOM)userData;
352
353 ULONG i;
354
355 if (len)
356 {
357 if (pDom->pLastWasTextNode)
358 {
359 // we had a text node, and no elements or other
360 // stuff in between:
361 xstrcat(&pDom->pLastWasTextNode->strNodeValue,
362 s,
363 len);
364 }
365 else
366 {
367 // we need a new text node:
368 PDOMNODE pNew,
369 pParent;
370 // non-root level:
371 PLISTNODE pParentNode = lstPop(&pDom->llStack);
372 pParent = (PDOMNODE)pParentNode->pItemData;
373
374 pNew = xmlCreateNode(pParent,
375 DOMNODE_TEXT);
376 if (pNew)
377 {
378 PSZ pszNodeValue = (PSZ)malloc(len + 1);
379 memcpy(pszNodeValue, s, len);
380 pszNodeValue[len] = '\0';
381 xstrInitSet(&pNew->strNodeValue, pszNodeValue);
382 }
383
384 pDom->pLastWasTextNode = pNew;
385 }
386 }
387}
388
389/* ******************************************************************
390 *
391 * DOM APIs
392 *
393 ********************************************************************/
394
395/*
396 *@@ xmlCreateDOM:
397 *
398 * Usage:
399 *
400 * 1) Create a DOM instance.
401 *
402 + PXMLDOM pDom = NULL;
403 + APIRET arc = xmlCreateDom(flags, &pDom);
404 +
405 * 2) Give chunks of data (or an entire buffer)
406 * to the DOM instance for parsing.
407 *
408 + arc = xmlParse(pDom,
409 + pBuf,
410 + TRUE); // if last, this will clean up the parser
411 *
412 * 3) Process the data in the DOM tree. When done,
413 * call xmlFreeDOM, which will free all memory.
414 *
415 *@@added V0.9.9 (2000-02-14) [umoeller]
416 */
417
418APIRET xmlCreateDOM(ULONG flParserFlags,
419 PXMLDOM *ppDom)
420{
421 APIRET arc = NO_ERROR;
422
423 PXMLDOM pDom = (PXMLDOM)malloc(sizeof(XMLDOM));
424 if (!pDom)
425 arc = ERROR_NOT_ENOUGH_MEMORY;
426 else
427 {
428 memset(pDom, 0, sizeof(XMLDOM));
429
430 lstInit(&pDom->llStack,
431 FALSE); // no auto-free
432
433 // create the document node
434 pDom->pDocumentNode = xmlCreateNode(NULL, // no parent
435 DOMNODE_DOCUMENT);
436
437 if (!pDom->pDocumentNode)
438 arc = ERROR_NOT_ENOUGH_MEMORY;
439 else
440 {
441 // push the document on the stack so the handlers
442 // will append to that
443 lstPush(&pDom->llStack,
444 pDom->pDocumentNode);
445
446 pDom->pParser = XML_ParserCreate(NULL);
447
448 if (!pDom->pParser)
449 arc = ERROR_NOT_ENOUGH_MEMORY;
450 else
451 {
452 XML_SetElementHandler(pDom->pParser,
453 StartElementHandler,
454 EndElementHandler);
455
456 XML_SetCharacterDataHandler(pDom->pParser,
457 CharacterDataHandler);
458
459 // pass the XMLDOM as user data to the handlers
460 XML_SetUserData(pDom->pParser,
461 pDom);
462
463 }
464 }
465 }
466
467 if (arc == NO_ERROR)
468 *ppDom = pDom;
469 else
470 xmlFreeDOM(pDom);
471
472 return (arc);
473}
474
475/*
476 *@@ xmlParse:
477 * parses another piece of XML data.
478 *
479 * If (fIsLast == TRUE), the internal expat parser
480 * will be freed, but not the DOM itself.
481 *
482 * You can pass an XML document to this function
483 * in one flush. Set fIsLast = TRUE on the first
484 * and only call then.
485 *
486 * This returns NO_ERROR if the chunk was successfully
487 * parsed. Otherwise ERROR_DOM_PARSING is returned,
488 * and you will find error information in the XMLDOM
489 * fields.
490 *
491 *@@added V0.9.9 (2000-02-14) [umoeller]
492 */
493
494APIRET xmlParse(PXMLDOM pDom,
495 const char *pcszBuf,
496 ULONG cb,
497 BOOL fIsLast)
498{
499 APIRET arc = NO_ERROR;
500
501 if (!pDom)
502 arc = ERROR_INVALID_PARAMETER;
503 else
504 {
505 BOOL fSuccess = XML_Parse(pDom->pParser,
506 pcszBuf,
507 cb,
508 fIsLast);
509
510 if (!fSuccess)
511 {
512 // error:
513 pDom->Error = XML_GetErrorCode(pDom->pParser);
514 pDom->pcszErrorDescription = XML_ErrorString(pDom->Error);
515 pDom->ulErrorLine = XML_GetCurrentLineNumber(pDom->pParser);
516 pDom->ulErrorColumn = XML_GetCurrentColumnNumber(pDom->pParser);
517
518 if (pDom->pDocumentNode)
519 {
520 xmlDeleteNode(pDom->pDocumentNode);
521 pDom->pDocumentNode = NULL;
522 }
523
524 arc = ERROR_DOM_PARSING;
525 }
526
527
528 if (!fSuccess && fIsLast)
529 {
530 // last call or error: clean up
531 XML_ParserFree(pDom->pParser);
532 pDom->pParser = NULL;
533
534 // clean up the stack (but not the DOM itself)
535 lstClear(&pDom->llStack);
536 }
537 }
538
539 return (arc);
540}
541
542/*
543 *@@ xmlFreeDOM:
544 * cleans up all resources allocated by
545 * xmlCreateDOM and xmlParse, including
546 * the entire DOM tree.
547 *
548 * If you wish to keep any data, make
549 * a copy of the respective pointers in pDom
550 * or subitems and set them to NULL before
551 * calling this function.
552 *
553 *@@added V0.9.9 (2000-02-14) [umoeller]
554 */
555
556APIRET xmlFreeDOM(PXMLDOM pDom)
557{
558 APIRET arc = NO_ERROR;
559 if (pDom)
560 {
561 // if the parser is still alive for some reason, close it.
562 if (pDom->pParser)
563 {
564 XML_ParserFree(pDom->pParser);
565 pDom->pParser = NULL;
566 }
567
568 free(pDom);
569 }
570
571 return (arc);
572}
Note: See TracBrowser for help on using the repository browser.