1 |
|
---|
2 | /*
|
---|
3 | *@@sourcefile xml.c:
|
---|
4 | * XML parsing.
|
---|
5 | *
|
---|
6 | * This is vaguely modelled after the Document Object Model
|
---|
7 | * (DOM) standardized by the W3C.
|
---|
8 | *
|
---|
9 | * In short, DOM specifies that an XML document is broken
|
---|
10 | * up into a tree of nodes, representing the various parts
|
---|
11 | * of an XML document. Most importantly, we have:
|
---|
12 | *
|
---|
13 | * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
|
---|
14 | *
|
---|
15 | * -- ATTRIBUTE: an attribute to an element.
|
---|
16 | *
|
---|
17 | * -- TEXT: a piece of, well, text.
|
---|
18 | *
|
---|
19 | * -- COMMENT: a comment.
|
---|
20 | *
|
---|
21 | * See xmlParse() for a more detailed explanation.
|
---|
22 | *
|
---|
23 | * However, since this implementation was supposed to be a
|
---|
24 | * C-only interface, we do not implement inheritance. Instead,
|
---|
25 | * each XML document is broken up into a tree of DOMNODE's only,
|
---|
26 | * each of which has a special type.
|
---|
27 | *
|
---|
28 | * It shouldn't be too difficult to write a C++ encapsulation
|
---|
29 | * of this which implements all the methods required by the DOM
|
---|
30 | * standard.
|
---|
31 | *
|
---|
32 | * The main entry point into this is xmlParse or
|
---|
33 | * xmlCreateDocumentFromString. See remarks there for details.
|
---|
34 | *
|
---|
35 | * Limitations:
|
---|
36 | *
|
---|
37 | * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
|
---|
38 | * and COMMENT nodes.
|
---|
39 | *
|
---|
40 | * 2) This doesn't use 16-bit characters, but 8-bit characters.
|
---|
41 | *
|
---|
42 | *@@header "helpers\xml.h"
|
---|
43 | *@@added V0.9.6 (2000-10-29) [umoeller]
|
---|
44 | */
|
---|
45 |
|
---|
46 | /*
|
---|
47 | * Copyright (C) 2000 Ulrich Mller.
|
---|
48 | * This file is part of the XWorkplace source package.
|
---|
49 | * XWorkplace is free software; you can redistribute it and/or modify
|
---|
50 | * it under the terms of the GNU General Public License as published
|
---|
51 | * by the Free Software Foundation, in version 2 as it comes in the
|
---|
52 | * "COPYING" file of the XWorkplace main distribution.
|
---|
53 | * This program is distributed in the hope that it will be useful,
|
---|
54 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
55 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
56 | * GNU General Public License for more details.
|
---|
57 | */
|
---|
58 |
|
---|
59 | #define OS2EMX_PLAIN_CHAR
|
---|
60 | // this is needed for "os2emx.h"; if this is defined,
|
---|
61 | // emx will define PSZ as _signed_ char, otherwise
|
---|
62 | // as unsigned char
|
---|
63 |
|
---|
64 | #define INCL_DOSERRORS
|
---|
65 | #include <os2.h>
|
---|
66 |
|
---|
67 | #include <stdlib.h>
|
---|
68 | #include <string.h>
|
---|
69 |
|
---|
70 | #include "setup.h" // code generation and debugging options
|
---|
71 |
|
---|
72 | #include "helpers\linklist.h"
|
---|
73 | #include "helpers\stringh.h"
|
---|
74 | #include "helpers\xml.h"
|
---|
75 |
|
---|
76 | #pragma hdrstop
|
---|
77 |
|
---|
78 | /*
|
---|
79 | *@@category: Helpers\C helpers\XML\Node management
|
---|
80 | */
|
---|
81 |
|
---|
82 | /* ******************************************************************
|
---|
83 | *
|
---|
84 | * Node Management
|
---|
85 | *
|
---|
86 | ********************************************************************/
|
---|
87 |
|
---|
88 | /*
|
---|
89 | *@@ xmlCreateNode:
|
---|
90 | * creates a new DOMNODE with the specified
|
---|
91 | * type and parent.
|
---|
92 | */
|
---|
93 |
|
---|
94 | PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
|
---|
95 | ULONG ulNodeType)
|
---|
96 | {
|
---|
97 | PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
|
---|
98 | if (pNewNode)
|
---|
99 | {
|
---|
100 | memset(pNewNode, 0, sizeof(DOMNODE));
|
---|
101 | pNewNode->ulNodeType = ulNodeType;
|
---|
102 | pNewNode->pParentNode = pParentNode;
|
---|
103 | if (pParentNode)
|
---|
104 | {
|
---|
105 | // parent specified:
|
---|
106 | // append this new node to the parent's
|
---|
107 | // list of child nodes
|
---|
108 | lstAppendItem(&pParentNode->listChildNodes,
|
---|
109 | pNewNode);
|
---|
110 | }
|
---|
111 |
|
---|
112 | lstInit(&pNewNode->listChildNodes, FALSE);
|
---|
113 | lstInit(&pNewNode->listAttributeNodes, FALSE);
|
---|
114 | }
|
---|
115 |
|
---|
116 | return (pNewNode);
|
---|
117 | }
|
---|
118 |
|
---|
119 | /*
|
---|
120 | *@@ xmlDeleteNode:
|
---|
121 | * deletes the specified node.
|
---|
122 | *
|
---|
123 | * If the node has child nodes, all of them are deleted
|
---|
124 | * as well. This recurses, if necessary.
|
---|
125 | *
|
---|
126 | * As a result, if the node is a document node, this
|
---|
127 | * deletes an entire document, including all of its
|
---|
128 | * child nodes.
|
---|
129 | *
|
---|
130 | * Returns:
|
---|
131 | *
|
---|
132 | * -- 0: NO_ERROR.
|
---|
133 | */
|
---|
134 |
|
---|
135 | ULONG xmlDeleteNode(PDOMNODE pNode)
|
---|
136 | {
|
---|
137 | ULONG ulrc = 0;
|
---|
138 |
|
---|
139 | if (!pNode)
|
---|
140 | {
|
---|
141 | ulrc = DOMERR_NOT_FOUND;
|
---|
142 | }
|
---|
143 | else
|
---|
144 | {
|
---|
145 | // recurse into child nodes
|
---|
146 | PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
|
---|
147 | while (pNodeThis)
|
---|
148 | {
|
---|
149 | // recurse!!
|
---|
150 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
151 |
|
---|
152 | pNodeThis = pNodeThis->pNext;
|
---|
153 | }
|
---|
154 |
|
---|
155 | // delete attribute nodes
|
---|
156 | pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
|
---|
157 | while (pNodeThis)
|
---|
158 | {
|
---|
159 | // recurse!!
|
---|
160 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
161 |
|
---|
162 | pNodeThis = pNodeThis->pNext;
|
---|
163 | }
|
---|
164 |
|
---|
165 | if (pNode->pParentNode)
|
---|
166 | {
|
---|
167 | // node has a parent:
|
---|
168 | // remove this node from the parent's list
|
---|
169 | // of child nodes before deleting this node
|
---|
170 | lstRemoveItem(&pNode->pParentNode->listChildNodes,
|
---|
171 | pNode);
|
---|
172 | pNode->pParentNode = NULL;
|
---|
173 | }
|
---|
174 |
|
---|
175 | if (pNode->pszNodeName)
|
---|
176 | {
|
---|
177 | free(pNode->pszNodeName);
|
---|
178 | pNode->pszNodeName = NULL;
|
---|
179 | }
|
---|
180 | if (pNode->pszNodeValue)
|
---|
181 | {
|
---|
182 | free(pNode->pszNodeValue);
|
---|
183 | pNode->pszNodeValue = NULL;
|
---|
184 | }
|
---|
185 |
|
---|
186 | free(pNode);
|
---|
187 | }
|
---|
188 |
|
---|
189 | return (ulrc);
|
---|
190 | }
|
---|
191 |
|
---|
192 | /*
|
---|
193 | *@@category: Helpers\C helpers\XML\Parsing
|
---|
194 | */
|
---|
195 |
|
---|
196 | /* ******************************************************************
|
---|
197 | *
|
---|
198 | * Tokenizing (Compiling)
|
---|
199 | *
|
---|
200 | ********************************************************************/
|
---|
201 |
|
---|
202 | /*
|
---|
203 | *@@ xmlTokenize:
|
---|
204 | * this takes any block of XML text and "tokenizes"
|
---|
205 | * it.
|
---|
206 | *
|
---|
207 | * Tokenizing (or compiling, or "scanning" in bison/flex
|
---|
208 | * terms) means preparing the XML code for parsing later.
|
---|
209 | * This finds all tags and tag attributes and creates
|
---|
210 | * special codes for them in the output buffer.
|
---|
211 | *
|
---|
212 | * For example:
|
---|
213 | +
|
---|
214 | + <TAG ATTR="text"> block </TAG>
|
---|
215 | +
|
---|
216 | * becomes
|
---|
217 | *
|
---|
218 | + 0xFF escape code
|
---|
219 | + 0x01 tag start code
|
---|
220 | + "TAG" tag name
|
---|
221 | + 0xFF end of tag name code
|
---|
222 | +
|
---|
223 | + 0xFF escape code
|
---|
224 | + 0x03 attribute name code
|
---|
225 | + "ATTR" attribute name
|
---|
226 | + 0xFF
|
---|
227 | + "text" attribute value (without quotes)
|
---|
228 | + 0xFF end of attribute code
|
---|
229 | +
|
---|
230 | + " block " regular text
|
---|
231 | +
|
---|
232 | + 0xFF escape code
|
---|
233 | + 0x01 tag start code
|
---|
234 | + "/TAG" tag name
|
---|
235 | + 0xFF end of tag name code
|
---|
236 | *
|
---|
237 | *@@added V0.9.6 (2000-11-01) [umoeller]
|
---|
238 | */
|
---|
239 |
|
---|
240 | PSZ xmlTokenize(const char *pcszXML)
|
---|
241 | {
|
---|
242 | }
|
---|
243 |
|
---|
244 | /* ******************************************************************
|
---|
245 | *
|
---|
246 | * Parsing
|
---|
247 | *
|
---|
248 | ********************************************************************/
|
---|
249 |
|
---|
250 | /*
|
---|
251 | * TAGFOUND:
|
---|
252 | * structure created for each tag by BuildTagsList.
|
---|
253 | */
|
---|
254 |
|
---|
255 | typedef struct _TAGFOUND
|
---|
256 | {
|
---|
257 | BOOL fIsComment;
|
---|
258 | const char *pOpenBrck;
|
---|
259 | const char *pStartOfTagName;
|
---|
260 | const char *pFirstAfterTagName;
|
---|
261 | const char *pCloseBrck; // ptr to '>' char; this plus one should
|
---|
262 | // point to after the tag
|
---|
263 | } TAGFOUND, *PTAGFOUND;
|
---|
264 |
|
---|
265 | /*
|
---|
266 | * BuildTagsList:
|
---|
267 | * builds a LINKLIST containing TAGFOUND structs for
|
---|
268 | * each tag found in the specified buffer.
|
---|
269 | *
|
---|
270 | * This is a flat list without any tree structure. This
|
---|
271 | * only searches for the tags and doesn't create any
|
---|
272 | * hierarchy.
|
---|
273 | *
|
---|
274 | * The tags are simply added to the list in the order
|
---|
275 | * in which they are found in pcszBuffer.
|
---|
276 | *
|
---|
277 | * The list is auto-free, you can simply do a lstFree
|
---|
278 | * to clean up.
|
---|
279 | */
|
---|
280 |
|
---|
281 | PLINKLIST BuildTagsList(const char *pcszBuffer)
|
---|
282 | {
|
---|
283 | PLINKLIST pllTags = lstCreate(TRUE);
|
---|
284 |
|
---|
285 | const char *pSearchPos = pcszBuffer;
|
---|
286 |
|
---|
287 | while ((pSearchPos) && (*pSearchPos))
|
---|
288 | {
|
---|
289 | // find first '<'
|
---|
290 | PSZ pOpenBrck = strchr(pSearchPos, '<');
|
---|
291 | if (!pOpenBrck)
|
---|
292 | // no open bracket found: stop search
|
---|
293 | pSearchPos = 0;
|
---|
294 | else
|
---|
295 | {
|
---|
296 | if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
|
---|
297 | {
|
---|
298 | // it's a comment:
|
---|
299 | // treat that differently
|
---|
300 | const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
|
---|
301 | const char *pCloseBrck = 0;
|
---|
302 | const char *pFirstAfterTagName = 0;
|
---|
303 | PTAGFOUND pTagFound;
|
---|
304 | if (!pEndOfComment)
|
---|
305 | {
|
---|
306 | // no end of comment found:
|
---|
307 | // skip entire rest of string
|
---|
308 | pCloseBrck = pOpenBrck + strlen(pOpenBrck);
|
---|
309 | pFirstAfterTagName = pCloseBrck;
|
---|
310 | pSearchPos = 0;
|
---|
311 | }
|
---|
312 | else
|
---|
313 | {
|
---|
314 | pCloseBrck = pEndOfComment + 2; // point directly to '>'
|
---|
315 | pFirstAfterTagName = pCloseBrck + 1;
|
---|
316 | }
|
---|
317 |
|
---|
318 | // append it to the list
|
---|
319 | pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
320 | if (!pTagFound)
|
---|
321 | // error:
|
---|
322 | pSearchPos = 0;
|
---|
323 | else
|
---|
324 | {
|
---|
325 | pTagFound->fIsComment = TRUE;
|
---|
326 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
327 | pTagFound->pStartOfTagName = pOpenBrck + 1;
|
---|
328 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
329 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
330 |
|
---|
331 | lstAppendItem(pllTags, pTagFound);
|
---|
332 | }
|
---|
333 |
|
---|
334 | pSearchPos = pFirstAfterTagName;
|
---|
335 | }
|
---|
336 | else
|
---|
337 | {
|
---|
338 | // no comment:
|
---|
339 | // find matching closing bracket
|
---|
340 | const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
|
---|
341 | if (!pCloseBrck)
|
---|
342 | pSearchPos = 0;
|
---|
343 | else
|
---|
344 | {
|
---|
345 | const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
|
---|
346 | // if we have another opening bracket before the closing bracket,
|
---|
347 | if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
|
---|
348 | // ignore this one
|
---|
349 | pSearchPos = pNextOpenBrck;
|
---|
350 | else
|
---|
351 | {
|
---|
352 | // OK, apparently we have a tag.
|
---|
353 | // Skip all spaces after the tag.
|
---|
354 | const char *pTagName = pOpenBrck + 1;
|
---|
355 | while ( (*pTagName)
|
---|
356 | && ( (*pTagName == ' ')
|
---|
357 | || (*pTagName == '\r')
|
---|
358 | || (*pTagName == '\n')
|
---|
359 | )
|
---|
360 | )
|
---|
361 | pTagName++;
|
---|
362 | if (!*pTagName)
|
---|
363 | // no tag name: stop
|
---|
364 | pSearchPos = 0;
|
---|
365 | else
|
---|
366 | {
|
---|
367 | // ookaaayyy, we got a tag now.
|
---|
368 | // Find first space or ">" after tag name:
|
---|
369 | const char *pFirstAfterTagName = pTagName + 1;
|
---|
370 | while ( (*pFirstAfterTagName)
|
---|
371 | && (*pFirstAfterTagName != ' ')
|
---|
372 | && (*pFirstAfterTagName != '\n')
|
---|
373 | && (*pFirstAfterTagName != '\r')
|
---|
374 | && (*pFirstAfterTagName != '\t') // tab
|
---|
375 | && (*pFirstAfterTagName != '>')
|
---|
376 | )
|
---|
377 | pFirstAfterTagName++;
|
---|
378 | if (!*pFirstAfterTagName)
|
---|
379 | // no closing bracket found:
|
---|
380 | pSearchPos = 0;
|
---|
381 | else
|
---|
382 | {
|
---|
383 | // got a tag name:
|
---|
384 | // append it to the list
|
---|
385 | PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
386 | if (!pTagFound)
|
---|
387 | // error:
|
---|
388 | pSearchPos = 0;
|
---|
389 | else
|
---|
390 | {
|
---|
391 | pTagFound->fIsComment = FALSE;
|
---|
392 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
393 | pTagFound->pStartOfTagName = pTagName;
|
---|
394 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
395 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
396 |
|
---|
397 | lstAppendItem(pllTags, pTagFound);
|
---|
398 |
|
---|
399 | // search on after closing bracket
|
---|
400 | pSearchPos = pCloseBrck + 1;
|
---|
401 | }
|
---|
402 | }
|
---|
403 | }
|
---|
404 | }
|
---|
405 | } // end else if (!pCloseBrck)
|
---|
406 | } // end else if (strncmp(pOpenBrck + 1, "!--"))
|
---|
407 | } // end if (pOpenBrck)
|
---|
408 | } // end while
|
---|
409 |
|
---|
410 | return (pllTags);
|
---|
411 | }
|
---|
412 |
|
---|
413 | /*
|
---|
414 | *@@ CreateTextNode:
|
---|
415 | * shortcut for creating a TEXT node. Calls
|
---|
416 | * xmlCreateNode in turn.
|
---|
417 | *
|
---|
418 | * The text is extracted from in between the
|
---|
419 | * two pointers using strhSubstr.
|
---|
420 | */
|
---|
421 |
|
---|
422 | PDOMNODE CreateTextNode(PDOMNODE pParentNode,
|
---|
423 | const char *pStart,
|
---|
424 | const char *pEnd)
|
---|
425 | {
|
---|
426 | PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
|
---|
427 | DOMNODE_TEXT);
|
---|
428 | if (pNewTextNode)
|
---|
429 | pNewTextNode->pszNodeValue = strhSubstr(pStart,
|
---|
430 | pEnd);
|
---|
431 |
|
---|
432 | return (pNewTextNode);
|
---|
433 | }
|
---|
434 |
|
---|
435 | /*
|
---|
436 | *@@ CreateElementNode:
|
---|
437 | * shortcut for creating a new ELEMENT node and
|
---|
438 | * parsing attributes at the same time.
|
---|
439 | *
|
---|
440 | * pszTagName is assumed to be static (no copy
|
---|
441 | * is made).
|
---|
442 | *
|
---|
443 | * pAttribs is assumed to point to an attributes
|
---|
444 | * string. This function creates ATTRIBUTE nodes
|
---|
445 | * from that string until either a null character
|
---|
446 | * or '>' is found.
|
---|
447 | */
|
---|
448 |
|
---|
449 | PDOMNODE CreateElementNode(PDOMNODE pParentNode,
|
---|
450 | PSZ pszTagName,
|
---|
451 | const char *pAttribs) // in: ptr to attribs; can be NULL
|
---|
452 | {
|
---|
453 | PDOMNODE pNewNode = xmlCreateNode(pParentNode,
|
---|
454 | DOMNODE_ELEMENT);
|
---|
455 | if (pNewNode)
|
---|
456 | {
|
---|
457 | const char *p = pAttribs;
|
---|
458 |
|
---|
459 | pNewNode->pszNodeName = pszTagName;
|
---|
460 |
|
---|
461 | // find-start-of-attribute loop
|
---|
462 | while (p)
|
---|
463 | {
|
---|
464 | switch (*p)
|
---|
465 | {
|
---|
466 | case 0:
|
---|
467 | case '>':
|
---|
468 | p = 0;
|
---|
469 | break;
|
---|
470 |
|
---|
471 | case ' ':
|
---|
472 | case '\t': // tab
|
---|
473 | case '\n':
|
---|
474 | case '\r':
|
---|
475 | p++;
|
---|
476 | break;
|
---|
477 |
|
---|
478 | default:
|
---|
479 | {
|
---|
480 | // first (or next) non-space:
|
---|
481 | // that's the start of an attrib, probably
|
---|
482 | // go until we find a space or '>'
|
---|
483 |
|
---|
484 | const char *pNameStart = p,
|
---|
485 | *p2 = p;
|
---|
486 |
|
---|
487 | const char *pEquals = 0,
|
---|
488 | *pFirstQuote = 0,
|
---|
489 | *pEnd = 0; // last char... non-inclusive!
|
---|
490 |
|
---|
491 | // copy-rest-of-attribute loop
|
---|
492 | while (p2)
|
---|
493 | {
|
---|
494 | switch (*p2)
|
---|
495 | {
|
---|
496 | case '"':
|
---|
497 | if (!pEquals)
|
---|
498 | {
|
---|
499 | // '"' cannot appear before '='
|
---|
500 | p2 = 0;
|
---|
501 | p = 0;
|
---|
502 | }
|
---|
503 | else
|
---|
504 | {
|
---|
505 | if (pFirstQuote)
|
---|
506 | {
|
---|
507 | // second quote:
|
---|
508 | // get value between quotes
|
---|
509 | pEnd = p2;
|
---|
510 | // we're done with this one
|
---|
511 | p = p2 + 1;
|
---|
512 | p2 = 0;
|
---|
513 | }
|
---|
514 | else
|
---|
515 | {
|
---|
516 | // first quote:
|
---|
517 | pFirstQuote = p2;
|
---|
518 | p2++;
|
---|
519 | }
|
---|
520 | }
|
---|
521 | break;
|
---|
522 |
|
---|
523 | case '=':
|
---|
524 | if (!pEquals)
|
---|
525 | {
|
---|
526 | // first equals sign:
|
---|
527 | pEquals = p2;
|
---|
528 | // extract name
|
---|
529 | p2++;
|
---|
530 | }
|
---|
531 | else
|
---|
532 | if (pFirstQuote)
|
---|
533 | p2++;
|
---|
534 | else
|
---|
535 | {
|
---|
536 | // error
|
---|
537 | p2 = 0;
|
---|
538 | p = 0;
|
---|
539 | }
|
---|
540 | break;
|
---|
541 |
|
---|
542 | case ' ':
|
---|
543 | case '\t': // tab
|
---|
544 | case '\n':
|
---|
545 | case '\r':
|
---|
546 | // spaces can appear in quotes
|
---|
547 | if (pFirstQuote)
|
---|
548 | // just continue
|
---|
549 | p2++;
|
---|
550 | else
|
---|
551 | {
|
---|
552 | // end of it!
|
---|
553 | pEnd = p2;
|
---|
554 | p = p2 + 1;
|
---|
555 | p2 = 0;
|
---|
556 | }
|
---|
557 | break;
|
---|
558 |
|
---|
559 | case 0:
|
---|
560 | case '>':
|
---|
561 | {
|
---|
562 | pEnd = p2;
|
---|
563 | // quit inner AND outer loop
|
---|
564 | p2 = 0;
|
---|
565 | p = 0;
|
---|
566 | break; }
|
---|
567 |
|
---|
568 | default:
|
---|
569 | p2++;
|
---|
570 | }
|
---|
571 | } // end while (p2)
|
---|
572 |
|
---|
573 | if (pEnd)
|
---|
574 | {
|
---|
575 | PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
|
---|
576 | DOMNODE_ATTRIBUTE);
|
---|
577 | if (pAttribNode)
|
---|
578 | {
|
---|
579 | if (pEquals)
|
---|
580 | {
|
---|
581 | pAttribNode->pszNodeName
|
---|
582 | = strhSubstr(pNameStart, pEquals);
|
---|
583 |
|
---|
584 | // did we have quotes?
|
---|
585 | if (pFirstQuote)
|
---|
586 | pAttribNode->pszNodeValue
|
---|
587 | = strhSubstr(pFirstQuote + 1, pEnd);
|
---|
588 | else
|
---|
589 | pAttribNode->pszNodeValue
|
---|
590 | = strhSubstr(pEquals + 1, pEnd);
|
---|
591 | }
|
---|
592 | else
|
---|
593 | // no "equals":
|
---|
594 | pAttribNode->pszNodeName
|
---|
595 | = strhSubstr(pNameStart, pEnd);
|
---|
596 | }
|
---|
597 | }
|
---|
598 | break; }
|
---|
599 | }
|
---|
600 | }
|
---|
601 | }
|
---|
602 |
|
---|
603 | return (pNewNode);
|
---|
604 | }
|
---|
605 |
|
---|
606 | /*
|
---|
607 | *@@ CreateNodesForBuf:
|
---|
608 | * this gets called (recursively) for a piece of text
|
---|
609 | * for which we need to create TEXT and ELEMENT DOMNODE's.
|
---|
610 | *
|
---|
611 | * This does the heavy work for xmlParse.
|
---|
612 | *
|
---|
613 | * If an error (!= 0) is returned, *ppError points to
|
---|
614 | * the code part that failed.
|
---|
615 | */
|
---|
616 |
|
---|
617 | ULONG CreateNodesForBuf(const char *pcszBufStart,
|
---|
618 | const char *pcszBufEnd, // in: can be NULL
|
---|
619 | PLINKLIST pllTagsList,
|
---|
620 | PDOMNODE pParentNode,
|
---|
621 | PFNVALIDATE pfnValidateTag,
|
---|
622 | const char **ppError)
|
---|
623 | {
|
---|
624 | ULONG ulrc = 0;
|
---|
625 | PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
|
---|
626 | const char *pBufCurrent = pcszBufStart;
|
---|
627 | BOOL fContinue = TRUE;
|
---|
628 |
|
---|
629 | if (pcszBufEnd == NULL)
|
---|
630 | pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
|
---|
631 |
|
---|
632 | while (fContinue)
|
---|
633 | {
|
---|
634 | if ( (!*pBufCurrent)
|
---|
635 | || (pBufCurrent == pcszBufEnd)
|
---|
636 | )
|
---|
637 | // end of buf reached:
|
---|
638 | fContinue = FALSE;
|
---|
639 |
|
---|
640 | else if (!pCurrentTagListNode)
|
---|
641 | {
|
---|
642 | // no (more) tags for this buffer:
|
---|
643 | CreateTextNode(pParentNode,
|
---|
644 | pBufCurrent,
|
---|
645 | pcszBufEnd);
|
---|
646 | fContinue = FALSE;
|
---|
647 | }
|
---|
648 | else
|
---|
649 | {
|
---|
650 | // another tag found:
|
---|
651 | PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
|
---|
652 | const char *pStartOfTag = pFoundTag->pOpenBrck;
|
---|
653 | if (pStartOfTag > pBufCurrent + 1)
|
---|
654 | {
|
---|
655 | // we have text before the opening tag:
|
---|
656 | // make a DOMTEXT out of this
|
---|
657 | CreateTextNode(pParentNode,
|
---|
658 | pBufCurrent,
|
---|
659 | pStartOfTag);
|
---|
660 | pBufCurrent = pStartOfTag;
|
---|
661 | }
|
---|
662 | else
|
---|
663 | {
|
---|
664 | // OK, go for this tag...
|
---|
665 |
|
---|
666 | if (*(pFoundTag->pStartOfTagName) == '/')
|
---|
667 | {
|
---|
668 | // this is a closing tag: that's an error
|
---|
669 | ulrc = 1;
|
---|
670 | *ppError = pFoundTag->pStartOfTagName;
|
---|
671 | fContinue = FALSE;
|
---|
672 | }
|
---|
673 | else if (pFoundTag->fIsComment)
|
---|
674 | {
|
---|
675 | // it's a comment: that's simple
|
---|
676 | PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
|
---|
677 | DOMNODE_COMMENT);
|
---|
678 | if (!pCommentNode)
|
---|
679 | ulrc = ERROR_NOT_ENOUGH_MEMORY;
|
---|
680 | else
|
---|
681 | {
|
---|
682 | pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
|
---|
683 | pFoundTag->pCloseBrck - 2);
|
---|
684 | }
|
---|
685 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
686 | }
|
---|
687 | else
|
---|
688 | {
|
---|
689 | BOOL fKeepTagName = FALSE; // free pszTagName below
|
---|
690 | PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
|
---|
691 | pFoundTag->pFirstAfterTagName);
|
---|
692 | if (!pszTagName)
|
---|
693 | // zero-length string:
|
---|
694 | // go ahead after that
|
---|
695 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
696 | else
|
---|
697 | {
|
---|
698 | // XML knows two types of elements:
|
---|
699 |
|
---|
700 | // a) Element pairs, which have opening and closing tags
|
---|
701 | // (<TAG> and </TAG>
|
---|
702 | // b) Single elements, which must have "/" as their last
|
---|
703 | // character; these have no closing tag
|
---|
704 | // (<TAG/>)
|
---|
705 |
|
---|
706 | // However, HTML doesn't usually tag single elements
|
---|
707 | // with a trailing '/'. To maintain compatibility,
|
---|
708 | // if we don't find a matching closing tag, we extract
|
---|
709 | // everything up to the end of the buffer.
|
---|
710 |
|
---|
711 | ULONG ulTagNameLen = strlen(pszTagName);
|
---|
712 |
|
---|
713 | // search for closing tag first...
|
---|
714 | // create string with closing tag to search for;
|
---|
715 | // that's '/' plus opening tag name
|
---|
716 | ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
|
---|
717 | PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
|
---|
718 | PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
|
---|
719 | PLISTNODE pTagListNodeForChildren = pTagListNode2;
|
---|
720 |
|
---|
721 | BOOL fClosingTagFound = FALSE;
|
---|
722 |
|
---|
723 | *pszClosingTag2Find = '/';
|
---|
724 | strcpy(pszClosingTag2Find + 1, pszTagName);
|
---|
725 |
|
---|
726 | // now find matching closing tag
|
---|
727 | while (pTagListNode2)
|
---|
728 | {
|
---|
729 | PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
|
---|
730 | ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
|
---|
731 | // compare tag name lengths
|
---|
732 | if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
733 | {
|
---|
734 | // same length:
|
---|
735 | // compare
|
---|
736 | if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
737 | pszClosingTag2Find,
|
---|
738 | ulClosingTagLen2Find)
|
---|
739 | == 0)
|
---|
740 | {
|
---|
741 | // found matching closing tag:
|
---|
742 |
|
---|
743 | // we now have
|
---|
744 | // -- pCurrentTagListNode pointing to the opening tag
|
---|
745 | // (pFoundTag has its PTAGFOUND item data)
|
---|
746 | // -- pTagListNode2 pointing to the closing tag
|
---|
747 | // (pFoundTag2 has its PTAGFOUND item data)
|
---|
748 |
|
---|
749 | // create DOM node
|
---|
750 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
751 | pszTagName,
|
---|
752 | pFoundTag->pFirstAfterTagName);
|
---|
753 | if (pNewNode)
|
---|
754 | {
|
---|
755 | ULONG ulAction = XMLACTION_BREAKUP;
|
---|
756 |
|
---|
757 | fKeepTagName = TRUE; // do not free below
|
---|
758 |
|
---|
759 | // validate tag
|
---|
760 | if (pfnValidateTag)
|
---|
761 | {
|
---|
762 | // validator specified:
|
---|
763 | ulAction = pfnValidateTag(pszTagName);
|
---|
764 | }
|
---|
765 |
|
---|
766 | if (ulAction == XMLACTION_COPYASTEXT)
|
---|
767 | {
|
---|
768 | CreateTextNode(pNewNode,
|
---|
769 | pFoundTag->pCloseBrck + 1,
|
---|
770 | pFoundTag2->pOpenBrck - 1);
|
---|
771 | }
|
---|
772 | else if (ulAction == XMLACTION_BREAKUP)
|
---|
773 | {
|
---|
774 | PLINKLIST pllSubList = lstCreate(FALSE);
|
---|
775 | PLISTNODE pSubNode = 0;
|
---|
776 | ULONG cSubNodes = 0;
|
---|
777 |
|
---|
778 | // text buffer to search
|
---|
779 | const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
|
---|
780 | const char *pSubBufEnd = pFoundTag2->pOpenBrck;
|
---|
781 |
|
---|
782 | // create a child list containing
|
---|
783 | // all tags from the first tag after
|
---|
784 | // the current opening tag to the closing tag
|
---|
785 | for (pSubNode = pTagListNodeForChildren;
|
---|
786 | pSubNode != pTagListNode2;
|
---|
787 | pSubNode = pSubNode->pNext)
|
---|
788 | {
|
---|
789 | lstAppendItem(pllSubList,
|
---|
790 | pSubNode->pItemData);
|
---|
791 | cSubNodes++;
|
---|
792 | }
|
---|
793 |
|
---|
794 | // now recurse to build child nodes
|
---|
795 | // (text and elements), even if the
|
---|
796 | // list is empty, we can have text!
|
---|
797 | CreateNodesForBuf(pSubBufStart,
|
---|
798 | pSubBufEnd,
|
---|
799 | pllSubList,
|
---|
800 | pNewNode,
|
---|
801 | pfnValidateTag,
|
---|
802 | ppError);
|
---|
803 |
|
---|
804 | lstFree(pllSubList);
|
---|
805 | } // end if (ulAction == XMLACTION_BREAKUP)
|
---|
806 |
|
---|
807 | // now search on after the closing tag
|
---|
808 | // we've found; the next tag will be set below
|
---|
809 | pCurrentTagListNode = pTagListNode2;
|
---|
810 | pBufCurrent = pFoundTag2->pCloseBrck + 1;
|
---|
811 |
|
---|
812 | fClosingTagFound = TRUE;
|
---|
813 |
|
---|
814 | break; // // while (pTagListNode2)
|
---|
815 | } // end if (pNewNode)
|
---|
816 | } // end if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
817 | } // if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
818 |
|
---|
819 | pTagListNode2 = pTagListNode2->pNext;
|
---|
820 |
|
---|
821 | } // while (pTagListNode2)
|
---|
822 |
|
---|
823 | if (!fClosingTagFound)
|
---|
824 | {
|
---|
825 | // no matching closing tag found:
|
---|
826 | // that's maybe a block of not well-formed XML
|
---|
827 |
|
---|
828 | // e.g. with WarpIN:
|
---|
829 | // <README> <-- we start after this
|
---|
830 | // block of plain HTML with <P> tags and such
|
---|
831 | // </README>
|
---|
832 |
|
---|
833 | // just create an element
|
---|
834 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
835 | pszTagName,
|
---|
836 | pFoundTag->pFirstAfterTagName);
|
---|
837 | if (pNewNode)
|
---|
838 | fKeepTagName = TRUE;
|
---|
839 |
|
---|
840 | // now search on after the closing tag
|
---|
841 | // we've found; the next tag will be set below
|
---|
842 | // pCurrentTagListNode = pTagListNodeForChildren;
|
---|
843 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
844 | }
|
---|
845 |
|
---|
846 | free(pszClosingTag2Find);
|
---|
847 |
|
---|
848 | if (!fKeepTagName)
|
---|
849 | free(pszTagName);
|
---|
850 | } // end if (pszTagName)
|
---|
851 | }
|
---|
852 |
|
---|
853 | pCurrentTagListNode = pCurrentTagListNode->pNext;
|
---|
854 | }
|
---|
855 | }
|
---|
856 | }
|
---|
857 |
|
---|
858 | return (ulrc);
|
---|
859 | }
|
---|
860 |
|
---|
861 | /*
|
---|
862 | * xmlParse:
|
---|
863 | * generic XML parser.
|
---|
864 | *
|
---|
865 | * This takes the specified zero-terminated string
|
---|
866 | * in pcszBuf and parses it, adding DOMNODE's as
|
---|
867 | * children to pNode.
|
---|
868 | *
|
---|
869 | * This recurses, if necessary, to build a node tree.
|
---|
870 | *
|
---|
871 | * Example: Take this HTML table definition:
|
---|
872 | +
|
---|
873 | + <TABLE>
|
---|
874 | + <TBODY>
|
---|
875 | + <TR>
|
---|
876 | + <TD>Column 1-1</TD>
|
---|
877 | + <TD>Column 1-2</TD>
|
---|
878 | + </TR>
|
---|
879 | + <TR>
|
---|
880 | + <TD>Column 2-1</TD>
|
---|
881 | + <TD>Column 2-2</TD>
|
---|
882 | + </TR>
|
---|
883 | + </TBODY>
|
---|
884 | + </TABLE>
|
---|
885 | *
|
---|
886 | * This function will create a tree as follows:
|
---|
887 | +
|
---|
888 | + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
889 | + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
|
---|
890 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
891 | + ³
|
---|
892 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
893 | + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
|
---|
894 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
895 | + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
896 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
897 | + ³ TR ³ ³ TR ³
|
---|
898 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
899 | + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
|
---|
900 | + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
|
---|
901 | + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
|
---|
902 | + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
|
---|
903 | + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
|
---|
904 | + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
|
---|
905 | + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
|
---|
906 | */
|
---|
907 |
|
---|
908 | ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
|
---|
909 | const char *pcszBuf, // in: buffer to search
|
---|
910 | PFNVALIDATE pfnValidateTag)
|
---|
911 | {
|
---|
912 | ULONG ulrc = 0;
|
---|
913 |
|
---|
914 | PLINKLIST pllTags = BuildTagsList(pcszBuf);
|
---|
915 |
|
---|
916 | // now create DOMNODE's according to that list...
|
---|
917 | const char *pcszError = 0;
|
---|
918 | CreateNodesForBuf(pcszBuf,
|
---|
919 | NULL, // enitre buffer
|
---|
920 | pllTags,
|
---|
921 | pParentNode,
|
---|
922 | pfnValidateTag,
|
---|
923 | &pcszError);
|
---|
924 |
|
---|
925 | lstFree(pllTags);
|
---|
926 |
|
---|
927 | return (ulrc);
|
---|
928 | }
|
---|
929 |
|
---|
930 | /*
|
---|
931 | *@@ xmlCreateDocumentFromString:
|
---|
932 | * creates a DOCUMENT DOMNODE and calls xmlParse
|
---|
933 | * to break down the specified buffer into that
|
---|
934 | * node.
|
---|
935 | */
|
---|
936 |
|
---|
937 | PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
|
---|
938 | PFNVALIDATE pfnValidateTag)
|
---|
939 | {
|
---|
940 | PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
|
---|
941 | DOMNODE_DOCUMENT);
|
---|
942 | xmlParse(pDocument,
|
---|
943 | pcszXML,
|
---|
944 | pfnValidateTag);
|
---|
945 |
|
---|
946 | return (pDocument);
|
---|
947 | }
|
---|
948 |
|
---|
949 |
|
---|