1 |
|
---|
2 | /*
|
---|
3 | *@@sourcefile xml.c:
|
---|
4 | * XML parsing.
|
---|
5 | *
|
---|
6 | * This is vaguely modelled after the Document Object Model
|
---|
7 | * (DOM) standardized by the W3C.
|
---|
8 | *
|
---|
9 | * In short, DOM specifies that an XML document is broken
|
---|
10 | * up into a tree of nodes, representing the various parts
|
---|
11 | * of an XML document. Most importantly, we have:
|
---|
12 | *
|
---|
13 | * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
|
---|
14 | *
|
---|
15 | * -- ATTRIBUTE: an attribute to an element.
|
---|
16 | *
|
---|
17 | * -- TEXT: a piece of, well, text.
|
---|
18 | *
|
---|
19 | * -- COMMENT: a comment.
|
---|
20 | *
|
---|
21 | * See xmlParse() for a more detailed explanation.
|
---|
22 | *
|
---|
23 | * However, since this implementation was supposed to be a
|
---|
24 | * C-only interface, we do not implement inheritance. Instead,
|
---|
25 | * each XML document is broken up into a tree of DOMNODE's only,
|
---|
26 | * each of which has a special type.
|
---|
27 | *
|
---|
28 | * It shouldn't be too difficult to write a C++ encapsulation
|
---|
29 | * of this which implements all the methods required by the DOM
|
---|
30 | * standard.
|
---|
31 | *
|
---|
32 | * The main entry point into this is xmlParse or
|
---|
33 | * xmlCreateDocumentFromString. See remarks there for details.
|
---|
34 | *
|
---|
35 | * Limitations:
|
---|
36 | *
|
---|
37 | * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
|
---|
38 | * and COMMENT nodes.
|
---|
39 | *
|
---|
40 | * 2) This doesn't use 16-bit characters, but 8-bit characters.
|
---|
41 | *
|
---|
42 | *@@header "helpers\xml.h"
|
---|
43 | *@@added V0.9.6 (2000-10-29) [umoeller]
|
---|
44 | */
|
---|
45 |
|
---|
46 | /*
|
---|
47 | * Copyright (C) 2000 Ulrich Mller.
|
---|
48 | * This file is part of the "XWorkplace helpers" source package.
|
---|
49 | * This is free software; you can redistribute it and/or modify
|
---|
50 | * it under the terms of the GNU General Public License as published
|
---|
51 | * by the Free Software Foundation, in version 2 as it comes in the
|
---|
52 | * "COPYING" file of the XWorkplace main distribution.
|
---|
53 | * This program is distributed in the hope that it will be useful,
|
---|
54 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
55 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
56 | * GNU General Public License for more details.
|
---|
57 | */
|
---|
58 |
|
---|
59 | #define OS2EMX_PLAIN_CHAR
|
---|
60 | // this is needed for "os2emx.h"; if this is defined,
|
---|
61 | // emx will define PSZ as _signed_ char, otherwise
|
---|
62 | // as unsigned char
|
---|
63 |
|
---|
64 | #define INCL_DOSERRORS
|
---|
65 | #include <os2.h>
|
---|
66 |
|
---|
67 | #include <stdlib.h>
|
---|
68 | #include <string.h>
|
---|
69 |
|
---|
70 | #include "setup.h" // code generation and debugging options
|
---|
71 |
|
---|
72 | #include "helpers\linklist.h"
|
---|
73 | #include "helpers\stringh.h"
|
---|
74 | #include "helpers\xml.h"
|
---|
75 |
|
---|
76 | #pragma hdrstop
|
---|
77 |
|
---|
78 | /*
|
---|
79 | *@@category: Helpers\C helpers\XML\Node management
|
---|
80 | */
|
---|
81 |
|
---|
82 | /* ******************************************************************
|
---|
83 | *
|
---|
84 | * Node Management
|
---|
85 | *
|
---|
86 | ********************************************************************/
|
---|
87 |
|
---|
88 | /*
|
---|
89 | *@@ xmlCreateNode:
|
---|
90 | * creates a new DOMNODE with the specified
|
---|
91 | * type and parent.
|
---|
92 | */
|
---|
93 |
|
---|
94 | PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
|
---|
95 | ULONG ulNodeType)
|
---|
96 | {
|
---|
97 | PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
|
---|
98 | if (pNewNode)
|
---|
99 | {
|
---|
100 | memset(pNewNode, 0, sizeof(DOMNODE));
|
---|
101 | pNewNode->ulNodeType = ulNodeType;
|
---|
102 | pNewNode->pParentNode = pParentNode;
|
---|
103 | if (pParentNode)
|
---|
104 | {
|
---|
105 | // parent specified:
|
---|
106 | // append this new node to the parent's
|
---|
107 | // list of child nodes
|
---|
108 | lstAppendItem(&pParentNode->listChildNodes,
|
---|
109 | pNewNode);
|
---|
110 | }
|
---|
111 |
|
---|
112 | lstInit(&pNewNode->listChildNodes, FALSE);
|
---|
113 | lstInit(&pNewNode->listAttributeNodes, FALSE);
|
---|
114 | }
|
---|
115 |
|
---|
116 | return (pNewNode);
|
---|
117 | }
|
---|
118 |
|
---|
119 | /*
|
---|
120 | *@@ xmlDeleteNode:
|
---|
121 | * deletes the specified node.
|
---|
122 | *
|
---|
123 | * If the node has child nodes, all of them are deleted
|
---|
124 | * as well. This recurses, if necessary.
|
---|
125 | *
|
---|
126 | * As a result, if the node is a document node, this
|
---|
127 | * deletes an entire document, including all of its
|
---|
128 | * child nodes.
|
---|
129 | *
|
---|
130 | * Returns:
|
---|
131 | *
|
---|
132 | * -- 0: NO_ERROR.
|
---|
133 | */
|
---|
134 |
|
---|
135 | ULONG xmlDeleteNode(PDOMNODE pNode)
|
---|
136 | {
|
---|
137 | ULONG ulrc = 0;
|
---|
138 |
|
---|
139 | if (!pNode)
|
---|
140 | {
|
---|
141 | ulrc = DOMERR_NOT_FOUND;
|
---|
142 | }
|
---|
143 | else
|
---|
144 | {
|
---|
145 | // recurse into child nodes
|
---|
146 | PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
|
---|
147 | while (pNodeThis)
|
---|
148 | {
|
---|
149 | // recurse!!
|
---|
150 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
151 |
|
---|
152 | pNodeThis = pNodeThis->pNext;
|
---|
153 | }
|
---|
154 |
|
---|
155 | // delete attribute nodes
|
---|
156 | pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
|
---|
157 | while (pNodeThis)
|
---|
158 | {
|
---|
159 | // recurse!!
|
---|
160 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
161 |
|
---|
162 | pNodeThis = pNodeThis->pNext;
|
---|
163 | }
|
---|
164 |
|
---|
165 | if (pNode->pParentNode)
|
---|
166 | {
|
---|
167 | // node has a parent:
|
---|
168 | // remove this node from the parent's list
|
---|
169 | // of child nodes before deleting this node
|
---|
170 | lstRemoveItem(&pNode->pParentNode->listChildNodes,
|
---|
171 | pNode);
|
---|
172 | pNode->pParentNode = NULL;
|
---|
173 | }
|
---|
174 |
|
---|
175 | if (pNode->pszNodeName)
|
---|
176 | {
|
---|
177 | free(pNode->pszNodeName);
|
---|
178 | pNode->pszNodeName = NULL;
|
---|
179 | }
|
---|
180 | if (pNode->pszNodeValue)
|
---|
181 | {
|
---|
182 | free(pNode->pszNodeValue);
|
---|
183 | pNode->pszNodeValue = NULL;
|
---|
184 | }
|
---|
185 |
|
---|
186 | free(pNode);
|
---|
187 | }
|
---|
188 |
|
---|
189 | return (ulrc);
|
---|
190 | }
|
---|
191 |
|
---|
192 | /*
|
---|
193 | *@@category: Helpers\C helpers\XML\Parsing
|
---|
194 | */
|
---|
195 |
|
---|
196 | /* ******************************************************************
|
---|
197 | *
|
---|
198 | * Tokenizing (Compiling)
|
---|
199 | *
|
---|
200 | ********************************************************************/
|
---|
201 |
|
---|
202 | /*
|
---|
203 | *@@ xmlTokenize:
|
---|
204 | * this takes any block of XML text and "tokenizes"
|
---|
205 | * it.
|
---|
206 | *
|
---|
207 | * Tokenizing (or compiling, or "scanning" in bison/flex
|
---|
208 | * terms) means preparing the XML code for parsing later.
|
---|
209 | * This finds all tags and tag attributes and creates
|
---|
210 | * special codes for them in the output buffer.
|
---|
211 | *
|
---|
212 | * For example:
|
---|
213 | +
|
---|
214 | + <TAG ATTR="text"> block </TAG>
|
---|
215 | +
|
---|
216 | * becomes
|
---|
217 | *
|
---|
218 | + 0xFF escape code
|
---|
219 | + 0x01 tag start code
|
---|
220 | + "TAG" tag name
|
---|
221 | + 0xFF end of tag name code
|
---|
222 | +
|
---|
223 | + 0xFF escape code
|
---|
224 | + 0x03 attribute name code
|
---|
225 | + "ATTR" attribute name
|
---|
226 | + 0xFF
|
---|
227 | + "text" attribute value (without quotes)
|
---|
228 | + 0xFF end of attribute code
|
---|
229 | +
|
---|
230 | + " block " regular text
|
---|
231 | +
|
---|
232 | + 0xFF escape code
|
---|
233 | + 0x01 tag start code
|
---|
234 | + "/TAG" tag name
|
---|
235 | + 0xFF end of tag name code
|
---|
236 | *
|
---|
237 | *@@added V0.9.6 (2000-11-01) [umoeller]
|
---|
238 | */
|
---|
239 |
|
---|
240 | PSZ xmlTokenize(const char *pcszXML)
|
---|
241 | {
|
---|
242 | return (0);
|
---|
243 | }
|
---|
244 |
|
---|
245 | /* ******************************************************************
|
---|
246 | *
|
---|
247 | * Parsing
|
---|
248 | *
|
---|
249 | ********************************************************************/
|
---|
250 |
|
---|
251 | /*
|
---|
252 | * TAGFOUND:
|
---|
253 | * structure created for each tag by BuildTagsList.
|
---|
254 | */
|
---|
255 |
|
---|
256 | typedef struct _TAGFOUND
|
---|
257 | {
|
---|
258 | BOOL fIsComment;
|
---|
259 | const char *pOpenBrck;
|
---|
260 | const char *pStartOfTagName;
|
---|
261 | const char *pFirstAfterTagName;
|
---|
262 | const char *pCloseBrck; // ptr to '>' char; this plus one should
|
---|
263 | // point to after the tag
|
---|
264 | } TAGFOUND, *PTAGFOUND;
|
---|
265 |
|
---|
266 | /*
|
---|
267 | * BuildTagsList:
|
---|
268 | * builds a LINKLIST containing TAGFOUND structs for
|
---|
269 | * each tag found in the specified buffer.
|
---|
270 | *
|
---|
271 | * This is a flat list without any tree structure. This
|
---|
272 | * only searches for the tags and doesn't create any
|
---|
273 | * hierarchy.
|
---|
274 | *
|
---|
275 | * The tags are simply added to the list in the order
|
---|
276 | * in which they are found in pcszBuffer.
|
---|
277 | *
|
---|
278 | * The list is auto-free, you can simply do a lstFree
|
---|
279 | * to clean up.
|
---|
280 | */
|
---|
281 |
|
---|
282 | PLINKLIST BuildTagsList(const char *pcszBuffer)
|
---|
283 | {
|
---|
284 | PLINKLIST pllTags = lstCreate(TRUE);
|
---|
285 |
|
---|
286 | const char *pSearchPos = pcszBuffer;
|
---|
287 |
|
---|
288 | while ((pSearchPos) && (*pSearchPos))
|
---|
289 | {
|
---|
290 | // find first '<'
|
---|
291 | PSZ pOpenBrck = strchr(pSearchPos, '<');
|
---|
292 | if (!pOpenBrck)
|
---|
293 | // no open bracket found: stop search
|
---|
294 | pSearchPos = 0;
|
---|
295 | else
|
---|
296 | {
|
---|
297 | if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
|
---|
298 | {
|
---|
299 | // it's a comment:
|
---|
300 | // treat that differently
|
---|
301 | const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
|
---|
302 | const char *pCloseBrck = 0;
|
---|
303 | const char *pFirstAfterTagName = 0;
|
---|
304 | PTAGFOUND pTagFound;
|
---|
305 | if (!pEndOfComment)
|
---|
306 | {
|
---|
307 | // no end of comment found:
|
---|
308 | // skip entire rest of string
|
---|
309 | pCloseBrck = pOpenBrck + strlen(pOpenBrck);
|
---|
310 | pFirstAfterTagName = pCloseBrck;
|
---|
311 | pSearchPos = 0;
|
---|
312 | }
|
---|
313 | else
|
---|
314 | {
|
---|
315 | pCloseBrck = pEndOfComment + 2; // point directly to '>'
|
---|
316 | pFirstAfterTagName = pCloseBrck + 1;
|
---|
317 | }
|
---|
318 |
|
---|
319 | // append it to the list
|
---|
320 | pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
321 | if (!pTagFound)
|
---|
322 | // error:
|
---|
323 | pSearchPos = 0;
|
---|
324 | else
|
---|
325 | {
|
---|
326 | pTagFound->fIsComment = TRUE;
|
---|
327 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
328 | pTagFound->pStartOfTagName = pOpenBrck + 1;
|
---|
329 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
330 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
331 |
|
---|
332 | lstAppendItem(pllTags, pTagFound);
|
---|
333 | }
|
---|
334 |
|
---|
335 | pSearchPos = pFirstAfterTagName;
|
---|
336 | }
|
---|
337 | else
|
---|
338 | {
|
---|
339 | // no comment:
|
---|
340 | // find matching closing bracket
|
---|
341 | const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
|
---|
342 | if (!pCloseBrck)
|
---|
343 | pSearchPos = 0;
|
---|
344 | else
|
---|
345 | {
|
---|
346 | const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
|
---|
347 | // if we have another opening bracket before the closing bracket,
|
---|
348 | if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
|
---|
349 | // ignore this one
|
---|
350 | pSearchPos = pNextOpenBrck;
|
---|
351 | else
|
---|
352 | {
|
---|
353 | // OK, apparently we have a tag.
|
---|
354 | // Skip all spaces after the tag.
|
---|
355 | const char *pTagName = pOpenBrck + 1;
|
---|
356 | while ( (*pTagName)
|
---|
357 | && ( (*pTagName == ' ')
|
---|
358 | || (*pTagName == '\r')
|
---|
359 | || (*pTagName == '\n')
|
---|
360 | )
|
---|
361 | )
|
---|
362 | pTagName++;
|
---|
363 | if (!*pTagName)
|
---|
364 | // no tag name: stop
|
---|
365 | pSearchPos = 0;
|
---|
366 | else
|
---|
367 | {
|
---|
368 | // ookaaayyy, we got a tag now.
|
---|
369 | // Find first space or ">" after tag name:
|
---|
370 | const char *pFirstAfterTagName = pTagName + 1;
|
---|
371 | while ( (*pFirstAfterTagName)
|
---|
372 | && (*pFirstAfterTagName != ' ')
|
---|
373 | && (*pFirstAfterTagName != '\n')
|
---|
374 | && (*pFirstAfterTagName != '\r')
|
---|
375 | && (*pFirstAfterTagName != '\t') // tab
|
---|
376 | && (*pFirstAfterTagName != '>')
|
---|
377 | )
|
---|
378 | pFirstAfterTagName++;
|
---|
379 | if (!*pFirstAfterTagName)
|
---|
380 | // no closing bracket found:
|
---|
381 | pSearchPos = 0;
|
---|
382 | else
|
---|
383 | {
|
---|
384 | // got a tag name:
|
---|
385 | // append it to the list
|
---|
386 | PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
387 | if (!pTagFound)
|
---|
388 | // error:
|
---|
389 | pSearchPos = 0;
|
---|
390 | else
|
---|
391 | {
|
---|
392 | pTagFound->fIsComment = FALSE;
|
---|
393 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
394 | pTagFound->pStartOfTagName = pTagName;
|
---|
395 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
396 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
397 |
|
---|
398 | lstAppendItem(pllTags, pTagFound);
|
---|
399 |
|
---|
400 | // search on after closing bracket
|
---|
401 | pSearchPos = pCloseBrck + 1;
|
---|
402 | }
|
---|
403 | }
|
---|
404 | }
|
---|
405 | }
|
---|
406 | } // end else if (!pCloseBrck)
|
---|
407 | } // end else if (strncmp(pOpenBrck + 1, "!--"))
|
---|
408 | } // end if (pOpenBrck)
|
---|
409 | } // end while
|
---|
410 |
|
---|
411 | return (pllTags);
|
---|
412 | }
|
---|
413 |
|
---|
414 | /*
|
---|
415 | *@@ CreateTextNode:
|
---|
416 | * shortcut for creating a TEXT node. Calls
|
---|
417 | * xmlCreateNode in turn.
|
---|
418 | *
|
---|
419 | * The text is extracted from in between the
|
---|
420 | * two pointers using strhSubstr.
|
---|
421 | */
|
---|
422 |
|
---|
423 | PDOMNODE CreateTextNode(PDOMNODE pParentNode,
|
---|
424 | const char *pStart,
|
---|
425 | const char *pEnd)
|
---|
426 | {
|
---|
427 | PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
|
---|
428 | DOMNODE_TEXT);
|
---|
429 | if (pNewTextNode)
|
---|
430 | pNewTextNode->pszNodeValue = strhSubstr(pStart,
|
---|
431 | pEnd);
|
---|
432 |
|
---|
433 | return (pNewTextNode);
|
---|
434 | }
|
---|
435 |
|
---|
436 | /*
|
---|
437 | *@@ CreateElementNode:
|
---|
438 | * shortcut for creating a new ELEMENT node and
|
---|
439 | * parsing attributes at the same time.
|
---|
440 | *
|
---|
441 | * pszTagName is assumed to be static (no copy
|
---|
442 | * is made).
|
---|
443 | *
|
---|
444 | * pAttribs is assumed to point to an attributes
|
---|
445 | * string. This function creates ATTRIBUTE nodes
|
---|
446 | * from that string until either a null character
|
---|
447 | * or '>' is found.
|
---|
448 | */
|
---|
449 |
|
---|
450 | PDOMNODE CreateElementNode(PDOMNODE pParentNode,
|
---|
451 | PSZ pszTagName,
|
---|
452 | const char *pAttribs) // in: ptr to attribs; can be NULL
|
---|
453 | {
|
---|
454 | PDOMNODE pNewNode = xmlCreateNode(pParentNode,
|
---|
455 | DOMNODE_ELEMENT);
|
---|
456 | if (pNewNode)
|
---|
457 | {
|
---|
458 | const char *p = pAttribs;
|
---|
459 |
|
---|
460 | pNewNode->pszNodeName = pszTagName;
|
---|
461 |
|
---|
462 | // find-start-of-attribute loop
|
---|
463 | while (p)
|
---|
464 | {
|
---|
465 | switch (*p)
|
---|
466 | {
|
---|
467 | case 0:
|
---|
468 | case '>':
|
---|
469 | p = 0;
|
---|
470 | break;
|
---|
471 |
|
---|
472 | case ' ':
|
---|
473 | case '\t': // tab
|
---|
474 | case '\n':
|
---|
475 | case '\r':
|
---|
476 | p++;
|
---|
477 | break;
|
---|
478 |
|
---|
479 | default:
|
---|
480 | {
|
---|
481 | // first (or next) non-space:
|
---|
482 | // that's the start of an attrib, probably
|
---|
483 | // go until we find a space or '>'
|
---|
484 |
|
---|
485 | const char *pNameStart = p,
|
---|
486 | *p2 = p;
|
---|
487 |
|
---|
488 | const char *pEquals = 0,
|
---|
489 | *pFirstQuote = 0,
|
---|
490 | *pEnd = 0; // last char... non-inclusive!
|
---|
491 |
|
---|
492 | // copy-rest-of-attribute loop
|
---|
493 | while (p2)
|
---|
494 | {
|
---|
495 | switch (*p2)
|
---|
496 | {
|
---|
497 | case '"':
|
---|
498 | if (!pEquals)
|
---|
499 | {
|
---|
500 | // '"' cannot appear before '='
|
---|
501 | p2 = 0;
|
---|
502 | p = 0;
|
---|
503 | }
|
---|
504 | else
|
---|
505 | {
|
---|
506 | if (pFirstQuote)
|
---|
507 | {
|
---|
508 | // second quote:
|
---|
509 | // get value between quotes
|
---|
510 | pEnd = p2;
|
---|
511 | // we're done with this one
|
---|
512 | p = p2 + 1;
|
---|
513 | p2 = 0;
|
---|
514 | }
|
---|
515 | else
|
---|
516 | {
|
---|
517 | // first quote:
|
---|
518 | pFirstQuote = p2;
|
---|
519 | p2++;
|
---|
520 | }
|
---|
521 | }
|
---|
522 | break;
|
---|
523 |
|
---|
524 | case '=':
|
---|
525 | if (!pEquals)
|
---|
526 | {
|
---|
527 | // first equals sign:
|
---|
528 | pEquals = p2;
|
---|
529 | // extract name
|
---|
530 | p2++;
|
---|
531 | }
|
---|
532 | else
|
---|
533 | if (pFirstQuote)
|
---|
534 | p2++;
|
---|
535 | else
|
---|
536 | {
|
---|
537 | // error
|
---|
538 | p2 = 0;
|
---|
539 | p = 0;
|
---|
540 | }
|
---|
541 | break;
|
---|
542 |
|
---|
543 | case ' ':
|
---|
544 | case '\t': // tab
|
---|
545 | case '\n':
|
---|
546 | case '\r':
|
---|
547 | // spaces can appear in quotes
|
---|
548 | if (pFirstQuote)
|
---|
549 | // just continue
|
---|
550 | p2++;
|
---|
551 | else
|
---|
552 | {
|
---|
553 | // end of it!
|
---|
554 | pEnd = p2;
|
---|
555 | p = p2 + 1;
|
---|
556 | p2 = 0;
|
---|
557 | }
|
---|
558 | break;
|
---|
559 |
|
---|
560 | case 0:
|
---|
561 | case '>':
|
---|
562 | {
|
---|
563 | pEnd = p2;
|
---|
564 | // quit inner AND outer loop
|
---|
565 | p2 = 0;
|
---|
566 | p = 0;
|
---|
567 | break; }
|
---|
568 |
|
---|
569 | default:
|
---|
570 | p2++;
|
---|
571 | }
|
---|
572 | } // end while (p2)
|
---|
573 |
|
---|
574 | if (pEnd)
|
---|
575 | {
|
---|
576 | PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
|
---|
577 | DOMNODE_ATTRIBUTE);
|
---|
578 | if (pAttribNode)
|
---|
579 | {
|
---|
580 | if (pEquals)
|
---|
581 | {
|
---|
582 | pAttribNode->pszNodeName
|
---|
583 | = strhSubstr(pNameStart, pEquals);
|
---|
584 |
|
---|
585 | // did we have quotes?
|
---|
586 | if (pFirstQuote)
|
---|
587 | pAttribNode->pszNodeValue
|
---|
588 | = strhSubstr(pFirstQuote + 1, pEnd);
|
---|
589 | else
|
---|
590 | pAttribNode->pszNodeValue
|
---|
591 | = strhSubstr(pEquals + 1, pEnd);
|
---|
592 | }
|
---|
593 | else
|
---|
594 | // no "equals":
|
---|
595 | pAttribNode->pszNodeName
|
---|
596 | = strhSubstr(pNameStart, pEnd);
|
---|
597 | }
|
---|
598 | }
|
---|
599 | break; }
|
---|
600 | }
|
---|
601 | }
|
---|
602 | }
|
---|
603 |
|
---|
604 | return (pNewNode);
|
---|
605 | }
|
---|
606 |
|
---|
607 | /*
|
---|
608 | *@@ CreateNodesForBuf:
|
---|
609 | * this gets called (recursively) for a piece of text
|
---|
610 | * for which we need to create TEXT and ELEMENT DOMNODE's.
|
---|
611 | *
|
---|
612 | * This does the heavy work for xmlParse.
|
---|
613 | *
|
---|
614 | * If an error (!= 0) is returned, *ppError points to
|
---|
615 | * the code part that failed.
|
---|
616 | */
|
---|
617 |
|
---|
618 | ULONG CreateNodesForBuf(const char *pcszBufStart,
|
---|
619 | const char *pcszBufEnd, // in: can be NULL
|
---|
620 | PLINKLIST pllTagsList,
|
---|
621 | PDOMNODE pParentNode,
|
---|
622 | PFNVALIDATE pfnValidateTag,
|
---|
623 | const char **ppError)
|
---|
624 | {
|
---|
625 | ULONG ulrc = 0;
|
---|
626 | PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
|
---|
627 | const char *pBufCurrent = pcszBufStart;
|
---|
628 | BOOL fContinue = TRUE;
|
---|
629 |
|
---|
630 | if (pcszBufEnd == NULL)
|
---|
631 | pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
|
---|
632 |
|
---|
633 | while (fContinue)
|
---|
634 | {
|
---|
635 | if ( (!*pBufCurrent)
|
---|
636 | || (pBufCurrent == pcszBufEnd)
|
---|
637 | )
|
---|
638 | // end of buf reached:
|
---|
639 | fContinue = FALSE;
|
---|
640 |
|
---|
641 | else if (!pCurrentTagListNode)
|
---|
642 | {
|
---|
643 | // no (more) tags for this buffer:
|
---|
644 | CreateTextNode(pParentNode,
|
---|
645 | pBufCurrent,
|
---|
646 | pcszBufEnd);
|
---|
647 | fContinue = FALSE;
|
---|
648 | }
|
---|
649 | else
|
---|
650 | {
|
---|
651 | // another tag found:
|
---|
652 | PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
|
---|
653 | const char *pStartOfTag = pFoundTag->pOpenBrck;
|
---|
654 | if (pStartOfTag > pBufCurrent + 1)
|
---|
655 | {
|
---|
656 | // we have text before the opening tag:
|
---|
657 | // make a DOMTEXT out of this
|
---|
658 | CreateTextNode(pParentNode,
|
---|
659 | pBufCurrent,
|
---|
660 | pStartOfTag);
|
---|
661 | pBufCurrent = pStartOfTag;
|
---|
662 | }
|
---|
663 | else
|
---|
664 | {
|
---|
665 | // OK, go for this tag...
|
---|
666 |
|
---|
667 | if (*(pFoundTag->pStartOfTagName) == '/')
|
---|
668 | {
|
---|
669 | // this is a closing tag: that's an error
|
---|
670 | ulrc = 1;
|
---|
671 | *ppError = pFoundTag->pStartOfTagName;
|
---|
672 | fContinue = FALSE;
|
---|
673 | }
|
---|
674 | else if (pFoundTag->fIsComment)
|
---|
675 | {
|
---|
676 | // it's a comment: that's simple
|
---|
677 | PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
|
---|
678 | DOMNODE_COMMENT);
|
---|
679 | if (!pCommentNode)
|
---|
680 | ulrc = ERROR_NOT_ENOUGH_MEMORY;
|
---|
681 | else
|
---|
682 | {
|
---|
683 | pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
|
---|
684 | pFoundTag->pCloseBrck - 2);
|
---|
685 | }
|
---|
686 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
687 | }
|
---|
688 | else
|
---|
689 | {
|
---|
690 | BOOL fKeepTagName = FALSE; // free pszTagName below
|
---|
691 | PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
|
---|
692 | pFoundTag->pFirstAfterTagName);
|
---|
693 | if (!pszTagName)
|
---|
694 | // zero-length string:
|
---|
695 | // go ahead after that
|
---|
696 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
697 | else
|
---|
698 | {
|
---|
699 | // XML knows two types of elements:
|
---|
700 |
|
---|
701 | // a) Element pairs, which have opening and closing tags
|
---|
702 | // (<TAG> and </TAG>
|
---|
703 | // b) Single elements, which must have "/" as their last
|
---|
704 | // character; these have no closing tag
|
---|
705 | // (<TAG/>)
|
---|
706 |
|
---|
707 | // However, HTML doesn't usually tag single elements
|
---|
708 | // with a trailing '/'. To maintain compatibility,
|
---|
709 | // if we don't find a matching closing tag, we extract
|
---|
710 | // everything up to the end of the buffer.
|
---|
711 |
|
---|
712 | ULONG ulTagNameLen = strlen(pszTagName);
|
---|
713 |
|
---|
714 | // search for closing tag first...
|
---|
715 | // create string with closing tag to search for;
|
---|
716 | // that's '/' plus opening tag name
|
---|
717 | ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
|
---|
718 | PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
|
---|
719 | PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
|
---|
720 | PLISTNODE pTagListNodeForChildren = pTagListNode2;
|
---|
721 |
|
---|
722 | BOOL fClosingTagFound = FALSE;
|
---|
723 |
|
---|
724 | *pszClosingTag2Find = '/';
|
---|
725 | strcpy(pszClosingTag2Find + 1, pszTagName);
|
---|
726 |
|
---|
727 | // now find matching closing tag
|
---|
728 | while (pTagListNode2)
|
---|
729 | {
|
---|
730 | PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
|
---|
731 | ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
|
---|
732 | // compare tag name lengths
|
---|
733 | if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
734 | {
|
---|
735 | // same length:
|
---|
736 | // compare
|
---|
737 | if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
738 | pszClosingTag2Find,
|
---|
739 | ulClosingTagLen2Find)
|
---|
740 | == 0)
|
---|
741 | {
|
---|
742 | // found matching closing tag:
|
---|
743 |
|
---|
744 | // we now have
|
---|
745 | // -- pCurrentTagListNode pointing to the opening tag
|
---|
746 | // (pFoundTag has its PTAGFOUND item data)
|
---|
747 | // -- pTagListNode2 pointing to the closing tag
|
---|
748 | // (pFoundTag2 has its PTAGFOUND item data)
|
---|
749 |
|
---|
750 | // create DOM node
|
---|
751 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
752 | pszTagName,
|
---|
753 | pFoundTag->pFirstAfterTagName);
|
---|
754 | if (pNewNode)
|
---|
755 | {
|
---|
756 | ULONG ulAction = XMLACTION_BREAKUP;
|
---|
757 |
|
---|
758 | fKeepTagName = TRUE; // do not free below
|
---|
759 |
|
---|
760 | // validate tag
|
---|
761 | if (pfnValidateTag)
|
---|
762 | {
|
---|
763 | // validator specified:
|
---|
764 | ulAction = pfnValidateTag(pszTagName);
|
---|
765 | }
|
---|
766 |
|
---|
767 | if (ulAction == XMLACTION_COPYASTEXT)
|
---|
768 | {
|
---|
769 | CreateTextNode(pNewNode,
|
---|
770 | pFoundTag->pCloseBrck + 1,
|
---|
771 | pFoundTag2->pOpenBrck - 1);
|
---|
772 | }
|
---|
773 | else if (ulAction == XMLACTION_BREAKUP)
|
---|
774 | {
|
---|
775 | PLINKLIST pllSubList = lstCreate(FALSE);
|
---|
776 | PLISTNODE pSubNode = 0;
|
---|
777 | ULONG cSubNodes = 0;
|
---|
778 |
|
---|
779 | // text buffer to search
|
---|
780 | const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
|
---|
781 | const char *pSubBufEnd = pFoundTag2->pOpenBrck;
|
---|
782 |
|
---|
783 | // create a child list containing
|
---|
784 | // all tags from the first tag after
|
---|
785 | // the current opening tag to the closing tag
|
---|
786 | for (pSubNode = pTagListNodeForChildren;
|
---|
787 | pSubNode != pTagListNode2;
|
---|
788 | pSubNode = pSubNode->pNext)
|
---|
789 | {
|
---|
790 | lstAppendItem(pllSubList,
|
---|
791 | pSubNode->pItemData);
|
---|
792 | cSubNodes++;
|
---|
793 | }
|
---|
794 |
|
---|
795 | // now recurse to build child nodes
|
---|
796 | // (text and elements), even if the
|
---|
797 | // list is empty, we can have text!
|
---|
798 | CreateNodesForBuf(pSubBufStart,
|
---|
799 | pSubBufEnd,
|
---|
800 | pllSubList,
|
---|
801 | pNewNode,
|
---|
802 | pfnValidateTag,
|
---|
803 | ppError);
|
---|
804 |
|
---|
805 | lstFree(pllSubList);
|
---|
806 | } // end if (ulAction == XMLACTION_BREAKUP)
|
---|
807 |
|
---|
808 | // now search on after the closing tag
|
---|
809 | // we've found; the next tag will be set below
|
---|
810 | pCurrentTagListNode = pTagListNode2;
|
---|
811 | pBufCurrent = pFoundTag2->pCloseBrck + 1;
|
---|
812 |
|
---|
813 | fClosingTagFound = TRUE;
|
---|
814 |
|
---|
815 | break; // // while (pTagListNode2)
|
---|
816 | } // end if (pNewNode)
|
---|
817 | } // end if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
818 | } // if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
819 |
|
---|
820 | pTagListNode2 = pTagListNode2->pNext;
|
---|
821 |
|
---|
822 | } // while (pTagListNode2)
|
---|
823 |
|
---|
824 | if (!fClosingTagFound)
|
---|
825 | {
|
---|
826 | // no matching closing tag found:
|
---|
827 | // that's maybe a block of not well-formed XML
|
---|
828 |
|
---|
829 | // e.g. with WarpIN:
|
---|
830 | // <README> <-- we start after this
|
---|
831 | // block of plain HTML with <P> tags and such
|
---|
832 | // </README>
|
---|
833 |
|
---|
834 | // just create an element
|
---|
835 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
836 | pszTagName,
|
---|
837 | pFoundTag->pFirstAfterTagName);
|
---|
838 | if (pNewNode)
|
---|
839 | fKeepTagName = TRUE;
|
---|
840 |
|
---|
841 | // now search on after the closing tag
|
---|
842 | // we've found; the next tag will be set below
|
---|
843 | // pCurrentTagListNode = pTagListNodeForChildren;
|
---|
844 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
845 | }
|
---|
846 |
|
---|
847 | free(pszClosingTag2Find);
|
---|
848 |
|
---|
849 | if (!fKeepTagName)
|
---|
850 | free(pszTagName);
|
---|
851 | } // end if (pszTagName)
|
---|
852 | }
|
---|
853 |
|
---|
854 | pCurrentTagListNode = pCurrentTagListNode->pNext;
|
---|
855 | }
|
---|
856 | }
|
---|
857 | }
|
---|
858 |
|
---|
859 | return (ulrc);
|
---|
860 | }
|
---|
861 |
|
---|
862 | /*
|
---|
863 | * xmlParse:
|
---|
864 | * generic XML parser.
|
---|
865 | *
|
---|
866 | * This takes the specified zero-terminated string
|
---|
867 | * in pcszBuf and parses it, adding DOMNODE's as
|
---|
868 | * children to pNode.
|
---|
869 | *
|
---|
870 | * This recurses, if necessary, to build a node tree.
|
---|
871 | *
|
---|
872 | * Example: Take this HTML table definition:
|
---|
873 | +
|
---|
874 | + <TABLE>
|
---|
875 | + <TBODY>
|
---|
876 | + <TR>
|
---|
877 | + <TD>Column 1-1</TD>
|
---|
878 | + <TD>Column 1-2</TD>
|
---|
879 | + </TR>
|
---|
880 | + <TR>
|
---|
881 | + <TD>Column 2-1</TD>
|
---|
882 | + <TD>Column 2-2</TD>
|
---|
883 | + </TR>
|
---|
884 | + </TBODY>
|
---|
885 | + </TABLE>
|
---|
886 | *
|
---|
887 | * This function will create a tree as follows:
|
---|
888 | +
|
---|
889 | + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
890 | + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
|
---|
891 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
892 | + ³
|
---|
893 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
894 | + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
|
---|
895 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
896 | + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
897 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
898 | + ³ TR ³ ³ TR ³
|
---|
899 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
900 | + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
|
---|
901 | + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
|
---|
902 | + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
|
---|
903 | + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
|
---|
904 | + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
|
---|
905 | + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
|
---|
906 | + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
|
---|
907 | */
|
---|
908 |
|
---|
909 | ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
|
---|
910 | const char *pcszBuf, // in: buffer to search
|
---|
911 | PFNVALIDATE pfnValidateTag)
|
---|
912 | {
|
---|
913 | ULONG ulrc = 0;
|
---|
914 |
|
---|
915 | PLINKLIST pllTags = BuildTagsList(pcszBuf);
|
---|
916 |
|
---|
917 | // now create DOMNODE's according to that list...
|
---|
918 | const char *pcszError = 0;
|
---|
919 | CreateNodesForBuf(pcszBuf,
|
---|
920 | NULL, // enitre buffer
|
---|
921 | pllTags,
|
---|
922 | pParentNode,
|
---|
923 | pfnValidateTag,
|
---|
924 | &pcszError);
|
---|
925 |
|
---|
926 | lstFree(pllTags);
|
---|
927 |
|
---|
928 | return (ulrc);
|
---|
929 | }
|
---|
930 |
|
---|
931 | /*
|
---|
932 | *@@ xmlCreateDocumentFromString:
|
---|
933 | * creates a DOCUMENT DOMNODE and calls xmlParse
|
---|
934 | * to break down the specified buffer into that
|
---|
935 | * node.
|
---|
936 | */
|
---|
937 |
|
---|
938 | PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
|
---|
939 | PFNVALIDATE pfnValidateTag)
|
---|
940 | {
|
---|
941 | PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
|
---|
942 | DOMNODE_DOCUMENT);
|
---|
943 | xmlParse(pDocument,
|
---|
944 | pcszXML,
|
---|
945 | pfnValidateTag);
|
---|
946 |
|
---|
947 | return (pDocument);
|
---|
948 | }
|
---|
949 |
|
---|
950 |
|
---|