1 |
|
---|
2 | /*
|
---|
3 | *@@sourcefile xml.c:
|
---|
4 | * XML parsing.
|
---|
5 | *
|
---|
6 | * This is vaguely modelled after the Document Object Model
|
---|
7 | * (DOM) standardized by the W3C.
|
---|
8 | *
|
---|
9 | * In short, DOM specifies that an XML document is broken
|
---|
10 | * up into a tree of nodes, representing the various parts
|
---|
11 | * of an XML document. Most importantly, we have:
|
---|
12 | *
|
---|
13 | * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
|
---|
14 | *
|
---|
15 | * -- ATTRIBUTE: an attribute to an element.
|
---|
16 | *
|
---|
17 | * -- TEXT: a piece of, well, text.
|
---|
18 | *
|
---|
19 | * -- COMMENT: a comment.
|
---|
20 | *
|
---|
21 | * See xmlParse() for a more detailed explanation.
|
---|
22 | *
|
---|
23 | * However, since this implementation was supposed to be a
|
---|
24 | * C-only interface, we do not implement inheritance. Instead,
|
---|
25 | * each XML document is broken up into a tree of DOMNODE's only,
|
---|
26 | * each of which has a special type.
|
---|
27 | *
|
---|
28 | * It shouldn't be too difficult to write a C++ encapsulation
|
---|
29 | * of this which implements all the methods required by the DOM
|
---|
30 | * standard.
|
---|
31 | *
|
---|
32 | * The main entry point into this is xmlParse or
|
---|
33 | * xmlCreateDocumentFromString. See remarks there for details.
|
---|
34 | *
|
---|
35 | * Limitations:
|
---|
36 | *
|
---|
37 | * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
|
---|
38 | * and COMMENT nodes.
|
---|
39 | *
|
---|
40 | * 2) This doesn't use 16-bit characters, but 8-bit characters.
|
---|
41 | *
|
---|
42 | *@@header "xml.h"
|
---|
43 | *@@added V0.9.6 (2000-10-29) [umoeller]
|
---|
44 | */
|
---|
45 |
|
---|
46 | /*
|
---|
47 | * Copyright (C) 2000 Ulrich Mller.
|
---|
48 | * This file is part of the XWorkplace source package.
|
---|
49 | * XWorkplace is free software; you can redistribute it and/or modify
|
---|
50 | * it under the terms of the GNU General Public License as published
|
---|
51 | * by the Free Software Foundation, in version 2 as it comes in the
|
---|
52 | * "COPYING" file of the XWorkplace main distribution.
|
---|
53 | * This program is distributed in the hope that it will be useful,
|
---|
54 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
55 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
56 | * GNU General Public License for more details.
|
---|
57 | */
|
---|
58 |
|
---|
59 | #define OS2EMX_PLAIN_CHAR
|
---|
60 | // this is needed for "os2emx.h"; if this is defined,
|
---|
61 | // emx will define PSZ as _signed_ char, otherwise
|
---|
62 | // as unsigned char
|
---|
63 |
|
---|
64 | #define INCL_DOSERRORS
|
---|
65 | #include <os2.h>
|
---|
66 |
|
---|
67 | #include <stdlib.h>
|
---|
68 | #include <string.h>
|
---|
69 |
|
---|
70 | #include "setup.h" // code generation and debugging options
|
---|
71 |
|
---|
72 | #include "helpers\linklist.h"
|
---|
73 | #include "helpers\stringh.h"
|
---|
74 | #include "helpers\xml.h"
|
---|
75 |
|
---|
76 | #pragma hdrstop
|
---|
77 |
|
---|
78 | /*
|
---|
79 | *@@category: Helpers\C helpers\XML parsing
|
---|
80 | */
|
---|
81 |
|
---|
82 | /*
|
---|
83 | *@@ xmlCreateNode:
|
---|
84 | * creates a new DOMNODE with the specified
|
---|
85 | * type and parent.
|
---|
86 | */
|
---|
87 |
|
---|
88 | PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
|
---|
89 | ULONG ulNodeType)
|
---|
90 | {
|
---|
91 | PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
|
---|
92 | if (pNewNode)
|
---|
93 | {
|
---|
94 | memset(pNewNode, 0, sizeof(DOMNODE));
|
---|
95 | pNewNode->ulNodeType = ulNodeType;
|
---|
96 | pNewNode->pParentNode = pParentNode;
|
---|
97 | if (pParentNode)
|
---|
98 | {
|
---|
99 | // parent specified:
|
---|
100 | // append this new node to the parent's
|
---|
101 | // list of child nodes
|
---|
102 | lstAppendItem(&pParentNode->listChildNodes,
|
---|
103 | pNewNode);
|
---|
104 | }
|
---|
105 |
|
---|
106 | lstInit(&pNewNode->listChildNodes, FALSE);
|
---|
107 | lstInit(&pNewNode->listAttributeNodes, FALSE);
|
---|
108 | }
|
---|
109 |
|
---|
110 | return (pNewNode);
|
---|
111 | }
|
---|
112 |
|
---|
113 | /*
|
---|
114 | *@@ xmlDeleteNode:
|
---|
115 | * deletes the specified node.
|
---|
116 | *
|
---|
117 | * If the node has child nodes, all of them are deleted
|
---|
118 | * as well. This recurses, if necessary.
|
---|
119 | *
|
---|
120 | * As a result, if the node is a document node, this
|
---|
121 | * deletes an entire document, including all of its
|
---|
122 | * child nodes.
|
---|
123 | *
|
---|
124 | * Returns:
|
---|
125 | *
|
---|
126 | * -- 0: NO_ERROR.
|
---|
127 | */
|
---|
128 |
|
---|
129 | ULONG xmlDeleteNode(PDOMNODE pNode)
|
---|
130 | {
|
---|
131 | ULONG ulrc = 0;
|
---|
132 |
|
---|
133 | if (!pNode)
|
---|
134 | {
|
---|
135 | ulrc = DOMERR_NOT_FOUND;
|
---|
136 | }
|
---|
137 | else
|
---|
138 | {
|
---|
139 | // recurse into child nodes
|
---|
140 | PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
|
---|
141 | while (pNodeThis)
|
---|
142 | {
|
---|
143 | // recurse!!
|
---|
144 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
145 |
|
---|
146 | pNodeThis = pNodeThis->pNext;
|
---|
147 | }
|
---|
148 |
|
---|
149 | // delete attribute nodes
|
---|
150 | pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
|
---|
151 | while (pNodeThis)
|
---|
152 | {
|
---|
153 | // recurse!!
|
---|
154 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
155 |
|
---|
156 | pNodeThis = pNodeThis->pNext;
|
---|
157 | }
|
---|
158 |
|
---|
159 | if (pNode->pParentNode)
|
---|
160 | {
|
---|
161 | // node has a parent:
|
---|
162 | // remove this node from the parent's list
|
---|
163 | // of child nodes before deleting this node
|
---|
164 | lstRemoveItem(&pNode->pParentNode->listChildNodes,
|
---|
165 | pNode);
|
---|
166 | pNode->pParentNode = NULL;
|
---|
167 | }
|
---|
168 |
|
---|
169 | if (pNode->pszNodeName)
|
---|
170 | {
|
---|
171 | free(pNode->pszNodeName);
|
---|
172 | pNode->pszNodeName = NULL;
|
---|
173 | }
|
---|
174 | if (pNode->pszNodeValue)
|
---|
175 | {
|
---|
176 | free(pNode->pszNodeValue);
|
---|
177 | pNode->pszNodeValue = NULL;
|
---|
178 | }
|
---|
179 |
|
---|
180 | free(pNode);
|
---|
181 | }
|
---|
182 |
|
---|
183 | return (ulrc);
|
---|
184 | }
|
---|
185 |
|
---|
186 | /*
|
---|
187 | * TAGFOUND:
|
---|
188 | * structure created for each tag by BuildTagsList.
|
---|
189 | */
|
---|
190 |
|
---|
191 | typedef struct _TAGFOUND
|
---|
192 | {
|
---|
193 | BOOL fIsComment;
|
---|
194 | const char *pOpenBrck;
|
---|
195 | const char *pStartOfTagName;
|
---|
196 | const char *pFirstAfterTagName;
|
---|
197 | const char *pCloseBrck; // ptr to '>' char; this plus one should
|
---|
198 | // point to after the tag
|
---|
199 | } TAGFOUND, *PTAGFOUND;
|
---|
200 |
|
---|
201 | /*
|
---|
202 | * BuildTagsList:
|
---|
203 | * builds a LINKLIST containing TAGFOUND structs for
|
---|
204 | * each tag found in the specified buffer.
|
---|
205 | *
|
---|
206 | * This is a flat list without any tree structure. This
|
---|
207 | * only searches for the tags and doesn't create any
|
---|
208 | * hierarchy.
|
---|
209 | *
|
---|
210 | * The tags are simply added to the list in the order
|
---|
211 | * in which they are found in pcszBuffer.
|
---|
212 | *
|
---|
213 | * The list is auto-free, you can simply do a lstFree
|
---|
214 | * to clean up.
|
---|
215 | */
|
---|
216 |
|
---|
217 | PLINKLIST BuildTagsList(const char *pcszBuffer)
|
---|
218 | {
|
---|
219 | PLINKLIST pllTags = lstCreate(TRUE);
|
---|
220 |
|
---|
221 | const char *pSearchPos = pcszBuffer;
|
---|
222 |
|
---|
223 | while ((pSearchPos) && (*pSearchPos))
|
---|
224 | {
|
---|
225 | // find first '<'
|
---|
226 | PSZ pOpenBrck = strchr(pSearchPos, '<');
|
---|
227 | if (!pOpenBrck)
|
---|
228 | // no open bracket found: stop search
|
---|
229 | pSearchPos = 0;
|
---|
230 | else
|
---|
231 | {
|
---|
232 | if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
|
---|
233 | {
|
---|
234 | // it's a comment:
|
---|
235 | // treat that differently
|
---|
236 | const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
|
---|
237 | const char *pCloseBrck = 0;
|
---|
238 | const char *pFirstAfterTagName = 0;
|
---|
239 | PTAGFOUND pTagFound;
|
---|
240 | if (!pEndOfComment)
|
---|
241 | {
|
---|
242 | // no end of comment found:
|
---|
243 | // skip entire rest of string
|
---|
244 | pCloseBrck = pOpenBrck + strlen(pOpenBrck);
|
---|
245 | pFirstAfterTagName = pCloseBrck;
|
---|
246 | pSearchPos = 0;
|
---|
247 | }
|
---|
248 | else
|
---|
249 | {
|
---|
250 | pCloseBrck = pEndOfComment + 2; // point directly to '>'
|
---|
251 | pFirstAfterTagName = pCloseBrck + 1;
|
---|
252 | }
|
---|
253 |
|
---|
254 | // append it to the list
|
---|
255 | pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
256 | if (!pTagFound)
|
---|
257 | // error:
|
---|
258 | pSearchPos = 0;
|
---|
259 | else
|
---|
260 | {
|
---|
261 | pTagFound->fIsComment = TRUE;
|
---|
262 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
263 | pTagFound->pStartOfTagName = pOpenBrck + 1;
|
---|
264 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
265 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
266 |
|
---|
267 | lstAppendItem(pllTags, pTagFound);
|
---|
268 | }
|
---|
269 |
|
---|
270 | pSearchPos = pFirstAfterTagName;
|
---|
271 | }
|
---|
272 | else
|
---|
273 | {
|
---|
274 | // no comment:
|
---|
275 | // find matching closing bracket
|
---|
276 | const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
|
---|
277 | if (!pCloseBrck)
|
---|
278 | pSearchPos = 0;
|
---|
279 | else
|
---|
280 | {
|
---|
281 | const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
|
---|
282 | // if we have another opening bracket before the closing bracket,
|
---|
283 | if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
|
---|
284 | // ignore this one
|
---|
285 | pSearchPos = pNextOpenBrck;
|
---|
286 | else
|
---|
287 | {
|
---|
288 | // OK, apparently we have a tag.
|
---|
289 | // Skip all spaces after the tag.
|
---|
290 | const char *pTagName = pOpenBrck + 1;
|
---|
291 | while ( (*pTagName)
|
---|
292 | && ( (*pTagName == ' ')
|
---|
293 | || (*pTagName == '\r')
|
---|
294 | || (*pTagName == '\n')
|
---|
295 | )
|
---|
296 | )
|
---|
297 | pTagName++;
|
---|
298 | if (!*pTagName)
|
---|
299 | // no tag name: stop
|
---|
300 | pSearchPos = 0;
|
---|
301 | else
|
---|
302 | {
|
---|
303 | // ookaaayyy, we got a tag now.
|
---|
304 | // Find first space or ">" after tag name:
|
---|
305 | const char *pFirstAfterTagName = pTagName + 1;
|
---|
306 | while ( (*pFirstAfterTagName)
|
---|
307 | && (*pFirstAfterTagName != ' ')
|
---|
308 | && (*pFirstAfterTagName != '\n')
|
---|
309 | && (*pFirstAfterTagName != '\r')
|
---|
310 | && (*pFirstAfterTagName != '>')
|
---|
311 | )
|
---|
312 | pFirstAfterTagName++;
|
---|
313 | if (!*pFirstAfterTagName)
|
---|
314 | // no closing bracket found:
|
---|
315 | pSearchPos = 0;
|
---|
316 | else
|
---|
317 | {
|
---|
318 | // got a tag name:
|
---|
319 | // append it to the list
|
---|
320 | PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
321 | if (!pTagFound)
|
---|
322 | // error:
|
---|
323 | pSearchPos = 0;
|
---|
324 | else
|
---|
325 | {
|
---|
326 | pTagFound->fIsComment = FALSE;
|
---|
327 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
328 | pTagFound->pStartOfTagName = pTagName;
|
---|
329 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
330 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
331 |
|
---|
332 | lstAppendItem(pllTags, pTagFound);
|
---|
333 |
|
---|
334 | // search on after closing bracket
|
---|
335 | pSearchPos = pCloseBrck + 1;
|
---|
336 | }
|
---|
337 | }
|
---|
338 | }
|
---|
339 | }
|
---|
340 | } // end else if (!pCloseBrck)
|
---|
341 | } // end else if (strncmp(pOpenBrck + 1, "!--"))
|
---|
342 | } // end if (pOpenBrck)
|
---|
343 | } // end while
|
---|
344 |
|
---|
345 | return (pllTags);
|
---|
346 | }
|
---|
347 |
|
---|
348 | /*
|
---|
349 | *@@ CreateTextNode:
|
---|
350 | * shortcut for creating a TEXT node. Calls
|
---|
351 | * xmlCreateNode in turn.
|
---|
352 | *
|
---|
353 | * The text is extracted from in between the
|
---|
354 | * two pointers using strhSubstr.
|
---|
355 | */
|
---|
356 |
|
---|
357 | PDOMNODE CreateTextNode(PDOMNODE pParentNode,
|
---|
358 | const char *pStart,
|
---|
359 | const char *pEnd)
|
---|
360 | {
|
---|
361 | PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
|
---|
362 | DOMNODE_TEXT);
|
---|
363 | if (pNewTextNode)
|
---|
364 | pNewTextNode->pszNodeValue = strhSubstr(pStart,
|
---|
365 | pEnd);
|
---|
366 |
|
---|
367 | return (pNewTextNode);
|
---|
368 | }
|
---|
369 |
|
---|
370 | /*
|
---|
371 | *@@ CreateElementNode:
|
---|
372 | * shortcut for creating a new ELEMENT node and
|
---|
373 | * parsing attributes at the same time.
|
---|
374 | *
|
---|
375 | * pszTagName is assumed to be static (no copy
|
---|
376 | * is made).
|
---|
377 | *
|
---|
378 | * pAttribs is assumed to point to an attributes
|
---|
379 | * string. This function creates ATTRIBUTE nodes
|
---|
380 | * from that string until either a null character
|
---|
381 | * or '>' is found.
|
---|
382 | */
|
---|
383 |
|
---|
384 | PDOMNODE CreateElementNode(PDOMNODE pParentNode,
|
---|
385 | PSZ pszTagName,
|
---|
386 | const char *pAttribs) // in: ptr to attribs; can be NULL
|
---|
387 | {
|
---|
388 | PDOMNODE pNewNode = xmlCreateNode(pParentNode,
|
---|
389 | DOMNODE_ELEMENT);
|
---|
390 | if (pNewNode)
|
---|
391 | {
|
---|
392 | const char *p = pAttribs;
|
---|
393 |
|
---|
394 | pNewNode->pszNodeName = pszTagName;
|
---|
395 |
|
---|
396 | // find-start-of-attribute loop
|
---|
397 | while (p)
|
---|
398 | {
|
---|
399 | switch (*p)
|
---|
400 | {
|
---|
401 | case 0:
|
---|
402 | case '>':
|
---|
403 | p = 0;
|
---|
404 | break;
|
---|
405 |
|
---|
406 | case ' ':
|
---|
407 | case '\n':
|
---|
408 | case '\r':
|
---|
409 | p++;
|
---|
410 | break;
|
---|
411 |
|
---|
412 | default:
|
---|
413 | {
|
---|
414 | // first (or next) non-space:
|
---|
415 | // that's the start of an attrib, probably
|
---|
416 | // go until we find a space or '>'
|
---|
417 |
|
---|
418 | const char *pNameStart = p,
|
---|
419 | *p2 = p;
|
---|
420 |
|
---|
421 | const char *pEquals = 0,
|
---|
422 | *pFirstQuote = 0,
|
---|
423 | *pEnd = 0; // last char... non-inclusive!
|
---|
424 |
|
---|
425 | // copy-rest-of-attribute loop
|
---|
426 | while (p2)
|
---|
427 | {
|
---|
428 | switch (*p2)
|
---|
429 | {
|
---|
430 | case '"':
|
---|
431 | if (!pEquals)
|
---|
432 | {
|
---|
433 | // '"' cannot appear before '='
|
---|
434 | p2 = 0;
|
---|
435 | p = 0;
|
---|
436 | }
|
---|
437 | else
|
---|
438 | {
|
---|
439 | if (pFirstQuote)
|
---|
440 | {
|
---|
441 | // second quote:
|
---|
442 | // get value between quotes
|
---|
443 | pEnd = p2;
|
---|
444 | // we're done with this one
|
---|
445 | p = p2 + 1;
|
---|
446 | p2 = 0;
|
---|
447 | }
|
---|
448 | else
|
---|
449 | {
|
---|
450 | // first quote:
|
---|
451 | pFirstQuote = p2;
|
---|
452 | p2++;
|
---|
453 | }
|
---|
454 | }
|
---|
455 | break;
|
---|
456 |
|
---|
457 | case '=':
|
---|
458 | if (!pEquals)
|
---|
459 | {
|
---|
460 | // first equals sign:
|
---|
461 | pEquals = p2;
|
---|
462 | // extract name
|
---|
463 | p2++;
|
---|
464 | }
|
---|
465 | else
|
---|
466 | if (pFirstQuote)
|
---|
467 | p2++;
|
---|
468 | else
|
---|
469 | {
|
---|
470 | // error
|
---|
471 | p2 = 0;
|
---|
472 | p = 0;
|
---|
473 | }
|
---|
474 | break;
|
---|
475 |
|
---|
476 | case ' ':
|
---|
477 | case '\n':
|
---|
478 | case '\r':
|
---|
479 | // spaces can appear in quotes
|
---|
480 | if (pFirstQuote)
|
---|
481 | // just continue
|
---|
482 | p2++;
|
---|
483 | else
|
---|
484 | {
|
---|
485 | // end of it!
|
---|
486 | pEnd = p2;
|
---|
487 | p = p2 + 1;
|
---|
488 | p2 = 0;
|
---|
489 | }
|
---|
490 | break;
|
---|
491 |
|
---|
492 | case 0:
|
---|
493 | case '>':
|
---|
494 | {
|
---|
495 | pEnd = p2;
|
---|
496 | // quit inner AND outer loop
|
---|
497 | p2 = 0;
|
---|
498 | p = 0;
|
---|
499 | break; }
|
---|
500 |
|
---|
501 | default:
|
---|
502 | p2++;
|
---|
503 | }
|
---|
504 | } // end while (p2)
|
---|
505 |
|
---|
506 | if (pEnd)
|
---|
507 | {
|
---|
508 | PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
|
---|
509 | DOMNODE_ATTRIBUTE);
|
---|
510 | if (pAttribNode)
|
---|
511 | {
|
---|
512 | if (pEquals)
|
---|
513 | {
|
---|
514 | pAttribNode->pszNodeName
|
---|
515 | = strhSubstr(pNameStart, pEquals);
|
---|
516 |
|
---|
517 | // did we have quotes?
|
---|
518 | if (pFirstQuote)
|
---|
519 | pAttribNode->pszNodeValue
|
---|
520 | = strhSubstr(pFirstQuote + 1, pEnd);
|
---|
521 | else
|
---|
522 | pAttribNode->pszNodeValue
|
---|
523 | = strhSubstr(pEquals + 1, pEnd);
|
---|
524 | }
|
---|
525 | else
|
---|
526 | // no "equals":
|
---|
527 | pAttribNode->pszNodeName
|
---|
528 | = strhSubstr(pNameStart, pEnd);
|
---|
529 | }
|
---|
530 | }
|
---|
531 | break; }
|
---|
532 | }
|
---|
533 | }
|
---|
534 | }
|
---|
535 |
|
---|
536 | return (pNewNode);
|
---|
537 | }
|
---|
538 |
|
---|
539 | /*
|
---|
540 | *@@ CreateNodesForBuf:
|
---|
541 | * this gets called (recursively) for a piece of text
|
---|
542 | * for which we need to create TEXT and ELEMENT DOMNODE's.
|
---|
543 | *
|
---|
544 | * This does the heavy work for xmlParse.
|
---|
545 | *
|
---|
546 | * If an error (!= 0) is returned, *ppError points to
|
---|
547 | * the code part that failed.
|
---|
548 | */
|
---|
549 |
|
---|
550 | ULONG CreateNodesForBuf(const char *pcszBufStart,
|
---|
551 | const char *pcszBufEnd, // in: can be NULL
|
---|
552 | PLINKLIST pllTagsList,
|
---|
553 | PDOMNODE pParentNode,
|
---|
554 | PFNVALIDATE pfnValidateTag,
|
---|
555 | const char **ppError)
|
---|
556 | {
|
---|
557 | ULONG ulrc = 0;
|
---|
558 | PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
|
---|
559 | const char *pBufCurrent = pcszBufStart;
|
---|
560 | BOOL fContinue = TRUE;
|
---|
561 |
|
---|
562 | if (pcszBufEnd == NULL)
|
---|
563 | pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
|
---|
564 |
|
---|
565 | while (fContinue)
|
---|
566 | {
|
---|
567 | if ( (!*pBufCurrent)
|
---|
568 | || (pBufCurrent == pcszBufEnd)
|
---|
569 | )
|
---|
570 | // end of buf reached:
|
---|
571 | fContinue = FALSE;
|
---|
572 |
|
---|
573 | else if (!pCurrentTagListNode)
|
---|
574 | {
|
---|
575 | // no (more) tags for this buffer:
|
---|
576 | CreateTextNode(pParentNode,
|
---|
577 | pBufCurrent,
|
---|
578 | pcszBufEnd);
|
---|
579 | fContinue = FALSE;
|
---|
580 | }
|
---|
581 | else
|
---|
582 | {
|
---|
583 | // another tag found:
|
---|
584 | PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
|
---|
585 | const char *pStartOfTag = pFoundTag->pOpenBrck;
|
---|
586 | if (pStartOfTag > pBufCurrent + 1)
|
---|
587 | {
|
---|
588 | // we have text before the opening tag:
|
---|
589 | // make a DOMTEXT out of this
|
---|
590 | CreateTextNode(pParentNode,
|
---|
591 | pBufCurrent,
|
---|
592 | pStartOfTag);
|
---|
593 | pBufCurrent = pStartOfTag;
|
---|
594 | }
|
---|
595 | else
|
---|
596 | {
|
---|
597 | // OK, go for this tag...
|
---|
598 |
|
---|
599 | if (*(pFoundTag->pStartOfTagName) == '/')
|
---|
600 | {
|
---|
601 | // this is a closing tag: that's an error
|
---|
602 | ulrc = 1;
|
---|
603 | *ppError = pFoundTag->pStartOfTagName;
|
---|
604 | fContinue = FALSE;
|
---|
605 | }
|
---|
606 | else if (pFoundTag->fIsComment)
|
---|
607 | {
|
---|
608 | // it's a comment: that's simple
|
---|
609 | PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
|
---|
610 | DOMNODE_COMMENT);
|
---|
611 | if (!pCommentNode)
|
---|
612 | ulrc = ERROR_NOT_ENOUGH_MEMORY;
|
---|
613 | else
|
---|
614 | {
|
---|
615 | pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
|
---|
616 | pFoundTag->pCloseBrck - 2);
|
---|
617 | }
|
---|
618 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
619 | }
|
---|
620 | else
|
---|
621 | {
|
---|
622 | BOOL fKeepTagName = FALSE; // free pszTagName below
|
---|
623 | PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
|
---|
624 | pFoundTag->pFirstAfterTagName);
|
---|
625 | if (!pszTagName)
|
---|
626 | // zero-length string:
|
---|
627 | // go ahead after that
|
---|
628 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
629 | else
|
---|
630 | {
|
---|
631 | // XML knows two types of elements:
|
---|
632 |
|
---|
633 | // a) Element pairs, which have opening and closing tags
|
---|
634 | // (<TAG> and </TAG>
|
---|
635 | // b) Single elements, which must have "/" as their last
|
---|
636 | // character; these have no closing tag
|
---|
637 | // (<TAG/>)
|
---|
638 |
|
---|
639 | // However, HTML doesn't usually tag single elements
|
---|
640 | // with a trailing '/'. To maintain compatibility,
|
---|
641 | // if we don't find a matching closing tag, we extract
|
---|
642 | // everything up to the end of the buffer.
|
---|
643 |
|
---|
644 | ULONG ulTagNameLen = strlen(pszTagName);
|
---|
645 |
|
---|
646 | // search for closing tag first...
|
---|
647 | // create string with closing tag to search for;
|
---|
648 | // that's '/' plus opening tag name
|
---|
649 | ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
|
---|
650 | PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
|
---|
651 | PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
|
---|
652 | PLISTNODE pTagListNodeForChildren = pTagListNode2;
|
---|
653 |
|
---|
654 | BOOL fClosingTagFound = FALSE;
|
---|
655 |
|
---|
656 | *pszClosingTag2Find = '/';
|
---|
657 | strcpy(pszClosingTag2Find + 1, pszTagName);
|
---|
658 |
|
---|
659 | // now find matching closing tag
|
---|
660 | while (pTagListNode2)
|
---|
661 | {
|
---|
662 | PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
|
---|
663 | ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
|
---|
664 | // compare tag name lengths
|
---|
665 | if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
666 | {
|
---|
667 | // same length:
|
---|
668 | // compare
|
---|
669 | if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
670 | pszClosingTag2Find,
|
---|
671 | ulClosingTagLen2Find)
|
---|
672 | == 0)
|
---|
673 | {
|
---|
674 | // found matching closing tag:
|
---|
675 |
|
---|
676 | // we now have
|
---|
677 | // -- pCurrentTagListNode pointing to the opening tag
|
---|
678 | // (pFoundTag has its PTAGFOUND item data)
|
---|
679 | // -- pTagListNode2 pointing to the closing tag
|
---|
680 | // (pFoundTag2 has its PTAGFOUND item data)
|
---|
681 |
|
---|
682 | // create DOM node
|
---|
683 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
684 | pszTagName,
|
---|
685 | pFoundTag->pFirstAfterTagName);
|
---|
686 | if (pNewNode)
|
---|
687 | {
|
---|
688 | ULONG ulAction = XMLACTION_BREAKUP;
|
---|
689 |
|
---|
690 | fKeepTagName = TRUE; // do not free below
|
---|
691 |
|
---|
692 | // validate tag
|
---|
693 | if (pfnValidateTag)
|
---|
694 | {
|
---|
695 | // validator specified:
|
---|
696 | ulAction = pfnValidateTag(pszTagName);
|
---|
697 | }
|
---|
698 |
|
---|
699 | if (ulAction == XMLACTION_COPYASTEXT)
|
---|
700 | {
|
---|
701 | CreateTextNode(pNewNode,
|
---|
702 | pFoundTag->pCloseBrck + 1,
|
---|
703 | pFoundTag2->pOpenBrck - 1);
|
---|
704 | }
|
---|
705 | else if (ulAction == XMLACTION_BREAKUP)
|
---|
706 | {
|
---|
707 | PLINKLIST pllSubList = lstCreate(FALSE);
|
---|
708 | PLISTNODE pSubNode = 0;
|
---|
709 | ULONG cSubNodes = 0;
|
---|
710 |
|
---|
711 | // text buffer to search
|
---|
712 | const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
|
---|
713 | const char *pSubBufEnd = pFoundTag2->pOpenBrck;
|
---|
714 |
|
---|
715 | // create a child list containing
|
---|
716 | // all tags from the first tag after
|
---|
717 | // the current opening tag to the closing tag
|
---|
718 | for (pSubNode = pTagListNodeForChildren;
|
---|
719 | pSubNode != pTagListNode2;
|
---|
720 | pSubNode = pSubNode->pNext)
|
---|
721 | {
|
---|
722 | lstAppendItem(pllSubList,
|
---|
723 | pSubNode->pItemData);
|
---|
724 | cSubNodes++;
|
---|
725 | }
|
---|
726 |
|
---|
727 | // now recurse to build child nodes
|
---|
728 | // (text and elements), even if the
|
---|
729 | // list is empty, we can have text!
|
---|
730 | CreateNodesForBuf(pSubBufStart,
|
---|
731 | pSubBufEnd,
|
---|
732 | pllSubList,
|
---|
733 | pNewNode,
|
---|
734 | pfnValidateTag,
|
---|
735 | ppError);
|
---|
736 |
|
---|
737 | lstFree(pllSubList);
|
---|
738 | } // end if (ulAction == XMLACTION_BREAKUP)
|
---|
739 |
|
---|
740 | // now search on after the closing tag
|
---|
741 | // we've found; the next tag will be set below
|
---|
742 | pCurrentTagListNode = pTagListNode2;
|
---|
743 | pBufCurrent = pFoundTag2->pCloseBrck + 1;
|
---|
744 |
|
---|
745 | fClosingTagFound = TRUE;
|
---|
746 |
|
---|
747 | break; // // while (pTagListNode2)
|
---|
748 | } // end if (pNewNode)
|
---|
749 | } // end if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
750 | } // if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
751 |
|
---|
752 | pTagListNode2 = pTagListNode2->pNext;
|
---|
753 |
|
---|
754 | } // while (pTagListNode2)
|
---|
755 |
|
---|
756 | if (!fClosingTagFound)
|
---|
757 | {
|
---|
758 | // no matching closing tag found:
|
---|
759 | // that's maybe a block of not well-formed XML
|
---|
760 |
|
---|
761 | // e.g. with WarpIN:
|
---|
762 | // <README> <-- we start after this
|
---|
763 | // block of plain HTML with <P> tags and such
|
---|
764 | // </README>
|
---|
765 |
|
---|
766 | // just create an element
|
---|
767 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
768 | pszTagName,
|
---|
769 | pFoundTag->pFirstAfterTagName);
|
---|
770 | if (pNewNode)
|
---|
771 | fKeepTagName = TRUE;
|
---|
772 |
|
---|
773 | // now search on after the closing tag
|
---|
774 | // we've found; the next tag will be set below
|
---|
775 | // pCurrentTagListNode = pTagListNodeForChildren;
|
---|
776 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
777 | }
|
---|
778 |
|
---|
779 | free(pszClosingTag2Find);
|
---|
780 |
|
---|
781 | if (!fKeepTagName)
|
---|
782 | free(pszTagName);
|
---|
783 | } // end if (pszTagName)
|
---|
784 | }
|
---|
785 |
|
---|
786 | pCurrentTagListNode = pCurrentTagListNode->pNext;
|
---|
787 | }
|
---|
788 | }
|
---|
789 | }
|
---|
790 |
|
---|
791 | return (ulrc);
|
---|
792 | }
|
---|
793 |
|
---|
794 | /*
|
---|
795 | * xmlParse:
|
---|
796 | * generic XML parser.
|
---|
797 | *
|
---|
798 | * This takes the specified zero-terminated string
|
---|
799 | * in pcszBuf and parses it, adding DOMNODE's as
|
---|
800 | * children to pNode.
|
---|
801 | *
|
---|
802 | * This recurses, if necessary, to build a node tree.
|
---|
803 | *
|
---|
804 | * Example: Take this HTML table definition:
|
---|
805 | +
|
---|
806 | + <TABLE>
|
---|
807 | + <TBODY>
|
---|
808 | + <TR>
|
---|
809 | + <TD>Column 1-1</TD>
|
---|
810 | + <TD>Column 1-2</TD>
|
---|
811 | + </TR>
|
---|
812 | + <TR>
|
---|
813 | + <TD>Column 2-1</TD>
|
---|
814 | + <TD>Column 2-2</TD>
|
---|
815 | + </TR>
|
---|
816 | + </TBODY>
|
---|
817 | + </TABLE>
|
---|
818 | *
|
---|
819 | * This function will create a tree as follows:
|
---|
820 | +
|
---|
821 | + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
822 | + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
|
---|
823 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
824 | + ³
|
---|
825 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
826 | + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
|
---|
827 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
828 | + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
829 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
830 | + ³ TR ³ ³ TR ³
|
---|
831 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
832 | + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
|
---|
833 | + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
|
---|
834 | + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
|
---|
835 | + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
|
---|
836 | + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
|
---|
837 | + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
|
---|
838 | + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
|
---|
839 | */
|
---|
840 |
|
---|
841 | ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
|
---|
842 | const char *pcszBuf, // in: buffer to search
|
---|
843 | PFNVALIDATE pfnValidateTag)
|
---|
844 | {
|
---|
845 | ULONG ulrc = 0;
|
---|
846 |
|
---|
847 | PLINKLIST pllTags = BuildTagsList(pcszBuf);
|
---|
848 |
|
---|
849 | // now create DOMNODE's according to that list...
|
---|
850 | const char *pcszError = 0;
|
---|
851 | CreateNodesForBuf(pcszBuf,
|
---|
852 | NULL, // enitre buffer
|
---|
853 | pllTags,
|
---|
854 | pParentNode,
|
---|
855 | pfnValidateTag,
|
---|
856 | &pcszError);
|
---|
857 |
|
---|
858 | lstFree(pllTags);
|
---|
859 |
|
---|
860 | return (ulrc);
|
---|
861 | }
|
---|
862 |
|
---|
863 | /*
|
---|
864 | *@@ xmlCreateDocumentFromString:
|
---|
865 | * creates a DOCUMENT DOMNODE and calls xmlParse
|
---|
866 | * to break down the specified buffer into that
|
---|
867 | * node.
|
---|
868 | */
|
---|
869 |
|
---|
870 | PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
|
---|
871 | PFNVALIDATE pfnValidateTag)
|
---|
872 | {
|
---|
873 | PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
|
---|
874 | DOMNODE_DOCUMENT);
|
---|
875 | xmlParse(pDocument,
|
---|
876 | pcszXML,
|
---|
877 | pfnValidateTag);
|
---|
878 |
|
---|
879 | return (pDocument);
|
---|
880 | }
|
---|
881 |
|
---|
882 |
|
---|