1 |
|
---|
2 | /*
|
---|
3 | *@@sourcefile xml.c:
|
---|
4 | * XML parsing.
|
---|
5 | *
|
---|
6 | * This is vaguely modelled after the Document Object Model
|
---|
7 | * (DOM) standardized by the W3C.
|
---|
8 | *
|
---|
9 | * In short, DOM specifies that an XML document is broken
|
---|
10 | * up into a tree of nodes, representing the various parts
|
---|
11 | * of an XML document. Most importantly, we have:
|
---|
12 | *
|
---|
13 | * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
|
---|
14 | *
|
---|
15 | * -- ATTRIBUTE: an attribute to an element.
|
---|
16 | *
|
---|
17 | * -- TEXT: a piece of, well, text.
|
---|
18 | *
|
---|
19 | * -- COMMENT: a comment.
|
---|
20 | *
|
---|
21 | * See xmlParse() for a more detailed explanation.
|
---|
22 | *
|
---|
23 | * However, since this implementation was supposed to be a
|
---|
24 | * C-only interface, we do not implement inheritance. Instead,
|
---|
25 | * each XML document is broken up into a tree of DOMNODE's only,
|
---|
26 | * each of which has a special type.
|
---|
27 | *
|
---|
28 | * It shouldn't be too difficult to write a C++ encapsulation
|
---|
29 | * of this which implements all the methods required by the DOM
|
---|
30 | * standard.
|
---|
31 | *
|
---|
32 | * The main entry point into this is xmlParse or
|
---|
33 | * xmlCreateDocumentFromString. See remarks there for details.
|
---|
34 | *
|
---|
35 | * Limitations:
|
---|
36 | *
|
---|
37 | * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
|
---|
38 | * and COMMENT nodes.
|
---|
39 | *
|
---|
40 | * 2) This doesn't use 16-bit characters, but 8-bit characters.
|
---|
41 | *
|
---|
42 | *@@header "xml.h"
|
---|
43 | *@@added V0.9.6 (2000-10-29) [umoeller]
|
---|
44 | */
|
---|
45 |
|
---|
46 | /*
|
---|
47 | * Copyright (C) 2000 Ulrich Mller.
|
---|
48 | * This file is part of the XWorkplace source package.
|
---|
49 | * XWorkplace is free software; you can redistribute it and/or modify
|
---|
50 | * it under the terms of the GNU General Public License as published
|
---|
51 | * by the Free Software Foundation, in version 2 as it comes in the
|
---|
52 | * "COPYING" file of the XWorkplace main distribution.
|
---|
53 | * This program is distributed in the hope that it will be useful,
|
---|
54 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
55 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
56 | * GNU General Public License for more details.
|
---|
57 | */
|
---|
58 |
|
---|
59 | #include <stdlib.h>
|
---|
60 | #include <string.h>
|
---|
61 |
|
---|
62 | #include <os2.h>
|
---|
63 |
|
---|
64 | #include "setup.h" // code generation and debugging options
|
---|
65 |
|
---|
66 | #include "helpers\linklist.h"
|
---|
67 | #include "helpers\stringh.h"
|
---|
68 | #include "helpers\xml.h"
|
---|
69 |
|
---|
70 | #pragma hdrstop
|
---|
71 |
|
---|
72 | /*
|
---|
73 | *@@category: Helpers\C helpers\XML parsing
|
---|
74 | */
|
---|
75 |
|
---|
76 | /*
|
---|
77 | *@@ xmlCreateNode:
|
---|
78 | * creates a new DOMNODE with the specified
|
---|
79 | * type and parent.
|
---|
80 | */
|
---|
81 |
|
---|
82 | PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
|
---|
83 | ULONG ulNodeType)
|
---|
84 | {
|
---|
85 | PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
|
---|
86 | if (pNewNode)
|
---|
87 | {
|
---|
88 | memset(pNewNode, 0, sizeof(DOMNODE));
|
---|
89 | pNewNode->ulNodeType = ulNodeType;
|
---|
90 | pNewNode->pParentNode = pParentNode;
|
---|
91 | if (pParentNode)
|
---|
92 | {
|
---|
93 | // parent specified:
|
---|
94 | // append this new node to the parent's
|
---|
95 | // list of child nodes
|
---|
96 | lstAppendItem(&pParentNode->listChildNodes,
|
---|
97 | pNewNode);
|
---|
98 | }
|
---|
99 |
|
---|
100 | lstInit(&pNewNode->listChildNodes, FALSE);
|
---|
101 | lstInit(&pNewNode->listAttributeNodes, FALSE);
|
---|
102 | }
|
---|
103 |
|
---|
104 | return (pNewNode);
|
---|
105 | }
|
---|
106 |
|
---|
107 | /*
|
---|
108 | *@@ xmlDeleteNode:
|
---|
109 | * deletes the specified node.
|
---|
110 | *
|
---|
111 | * If the node has child nodes, all of them are deleted
|
---|
112 | * as well. This recurses, if necessary.
|
---|
113 | *
|
---|
114 | * As a result, if the node is a document node, this
|
---|
115 | * deletes an entire document, including all of its
|
---|
116 | * child nodes.
|
---|
117 | *
|
---|
118 | * Returns:
|
---|
119 | *
|
---|
120 | * -- 0: NO_ERROR.
|
---|
121 | */
|
---|
122 |
|
---|
123 | ULONG xmlDeleteNode(PDOMNODE pNode)
|
---|
124 | {
|
---|
125 | ULONG ulrc = 0;
|
---|
126 |
|
---|
127 | if (!pNode)
|
---|
128 | {
|
---|
129 | ulrc = DOMERR_NOT_FOUND;
|
---|
130 | }
|
---|
131 | else
|
---|
132 | {
|
---|
133 | // recurse into child nodes
|
---|
134 | PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
|
---|
135 | while (pNodeThis)
|
---|
136 | {
|
---|
137 | // recurse!!
|
---|
138 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
139 |
|
---|
140 | pNodeThis = pNodeThis->pNext;
|
---|
141 | }
|
---|
142 |
|
---|
143 | // delete attribute nodes
|
---|
144 | pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
|
---|
145 | while (pNodeThis)
|
---|
146 | {
|
---|
147 | // recurse!!
|
---|
148 | xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
|
---|
149 |
|
---|
150 | pNodeThis = pNodeThis->pNext;
|
---|
151 | }
|
---|
152 |
|
---|
153 | if (pNode->pParentNode)
|
---|
154 | {
|
---|
155 | // node has a parent:
|
---|
156 | // remove this node from the parent's list
|
---|
157 | // of child nodes before deleting this node
|
---|
158 | lstRemoveItem(&pNode->pParentNode->listChildNodes,
|
---|
159 | pNode);
|
---|
160 | pNode->pParentNode = NULL;
|
---|
161 | }
|
---|
162 |
|
---|
163 | if (pNode->pszNodeName)
|
---|
164 | {
|
---|
165 | free(pNode->pszNodeName);
|
---|
166 | pNode->pszNodeName = NULL;
|
---|
167 | }
|
---|
168 | if (pNode->pszNodeValue)
|
---|
169 | {
|
---|
170 | free(pNode->pszNodeValue);
|
---|
171 | pNode->pszNodeValue = NULL;
|
---|
172 | }
|
---|
173 |
|
---|
174 | free(pNode);
|
---|
175 | }
|
---|
176 |
|
---|
177 | return (ulrc);
|
---|
178 | }
|
---|
179 |
|
---|
180 | /*
|
---|
181 | * TAGFOUND:
|
---|
182 | * structure created for each tag by BuildTagsList.
|
---|
183 | */
|
---|
184 |
|
---|
185 | typedef struct _TAGFOUND
|
---|
186 | {
|
---|
187 | BOOL fIsComment;
|
---|
188 | const char *pOpenBrck;
|
---|
189 | const char *pStartOfTagName;
|
---|
190 | const char *pFirstAfterTagName;
|
---|
191 | const char *pCloseBrck; // ptr to '>' char; this plus one should
|
---|
192 | // point to after the tag
|
---|
193 | } TAGFOUND, *PTAGFOUND;
|
---|
194 |
|
---|
195 | /*
|
---|
196 | * BuildTagsList:
|
---|
197 | * builds a LINKLIST containing TAGFOUND structs for
|
---|
198 | * each tag found in the specified buffer.
|
---|
199 | *
|
---|
200 | * This is a flat list without any tree structure. This
|
---|
201 | * only searches for the tags and doesn't create any
|
---|
202 | * hierarchy.
|
---|
203 | *
|
---|
204 | * The tags are simply added to the list in the order
|
---|
205 | * in which they are found in pcszBuffer.
|
---|
206 | *
|
---|
207 | * The list is auto-free, you can simply do a lstFree
|
---|
208 | * to clean up.
|
---|
209 | */
|
---|
210 |
|
---|
211 | PLINKLIST BuildTagsList(const char *pcszBuffer)
|
---|
212 | {
|
---|
213 | PLINKLIST pllTags = lstCreate(TRUE);
|
---|
214 |
|
---|
215 | const char *pSearchPos = pcszBuffer;
|
---|
216 |
|
---|
217 | while ((pSearchPos) && (*pSearchPos))
|
---|
218 | {
|
---|
219 | // find first '<'
|
---|
220 | PSZ pOpenBrck = strchr(pSearchPos, '<');
|
---|
221 | if (!pOpenBrck)
|
---|
222 | // no open bracket found: stop search
|
---|
223 | pSearchPos = 0;
|
---|
224 | else
|
---|
225 | {
|
---|
226 | if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
|
---|
227 | {
|
---|
228 | // it's a comment:
|
---|
229 | // treat that differently
|
---|
230 | const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
|
---|
231 | const char *pCloseBrck = 0;
|
---|
232 | const char *pFirstAfterTagName = 0;
|
---|
233 | PTAGFOUND pTagFound;
|
---|
234 | if (!pEndOfComment)
|
---|
235 | {
|
---|
236 | // no end of comment found:
|
---|
237 | // skip entire rest of string
|
---|
238 | pCloseBrck = pOpenBrck + strlen(pOpenBrck);
|
---|
239 | pFirstAfterTagName = pCloseBrck;
|
---|
240 | pSearchPos = 0;
|
---|
241 | }
|
---|
242 | else
|
---|
243 | {
|
---|
244 | pCloseBrck = pEndOfComment + 2; // point directly to '>'
|
---|
245 | pFirstAfterTagName = pCloseBrck + 1;
|
---|
246 | }
|
---|
247 |
|
---|
248 | // append it to the list
|
---|
249 | pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
250 | if (!pTagFound)
|
---|
251 | // error:
|
---|
252 | pSearchPos = 0;
|
---|
253 | else
|
---|
254 | {
|
---|
255 | pTagFound->fIsComment = TRUE;
|
---|
256 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
257 | pTagFound->pStartOfTagName = pOpenBrck + 1;
|
---|
258 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
259 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
260 |
|
---|
261 | lstAppendItem(pllTags, pTagFound);
|
---|
262 | }
|
---|
263 |
|
---|
264 | pSearchPos = pFirstAfterTagName;
|
---|
265 | }
|
---|
266 | else
|
---|
267 | {
|
---|
268 | // no comment:
|
---|
269 | // find matching closing bracket
|
---|
270 | const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
|
---|
271 | if (!pCloseBrck)
|
---|
272 | pSearchPos = 0;
|
---|
273 | else
|
---|
274 | {
|
---|
275 | const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
|
---|
276 | // if we have another opening bracket before the closing bracket,
|
---|
277 | if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
|
---|
278 | // ignore this one
|
---|
279 | pSearchPos = pNextOpenBrck;
|
---|
280 | else
|
---|
281 | {
|
---|
282 | // OK, apparently we have a tag.
|
---|
283 | // Skip all spaces after the tag.
|
---|
284 | const char *pTagName = pOpenBrck + 1;
|
---|
285 | while ( (*pTagName)
|
---|
286 | && ( (*pTagName == ' ')
|
---|
287 | || (*pTagName == '\r')
|
---|
288 | || (*pTagName == '\n')
|
---|
289 | )
|
---|
290 | )
|
---|
291 | pTagName++;
|
---|
292 | if (!*pTagName)
|
---|
293 | // no tag name: stop
|
---|
294 | pSearchPos = 0;
|
---|
295 | else
|
---|
296 | {
|
---|
297 | // ookaaayyy, we got a tag now.
|
---|
298 | // Find first space or ">" after tag name:
|
---|
299 | const char *pFirstAfterTagName = pTagName + 1;
|
---|
300 | while ( (*pFirstAfterTagName)
|
---|
301 | && (*pFirstAfterTagName != ' ')
|
---|
302 | && (*pFirstAfterTagName != '\n')
|
---|
303 | && (*pFirstAfterTagName != '\r')
|
---|
304 | && (*pFirstAfterTagName != '>')
|
---|
305 | )
|
---|
306 | pFirstAfterTagName++;
|
---|
307 | if (!*pFirstAfterTagName)
|
---|
308 | // no closing bracket found:
|
---|
309 | pSearchPos = 0;
|
---|
310 | else
|
---|
311 | {
|
---|
312 | // got a tag name:
|
---|
313 | // append it to the list
|
---|
314 | PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
|
---|
315 | if (!pTagFound)
|
---|
316 | // error:
|
---|
317 | pSearchPos = 0;
|
---|
318 | else
|
---|
319 | {
|
---|
320 | pTagFound->fIsComment = FALSE;
|
---|
321 | pTagFound->pOpenBrck = pOpenBrck;
|
---|
322 | pTagFound->pStartOfTagName = pTagName;
|
---|
323 | pTagFound->pFirstAfterTagName = pFirstAfterTagName;
|
---|
324 | pTagFound->pCloseBrck = pCloseBrck;
|
---|
325 |
|
---|
326 | lstAppendItem(pllTags, pTagFound);
|
---|
327 |
|
---|
328 | // search on after closing bracket
|
---|
329 | pSearchPos = pCloseBrck + 1;
|
---|
330 | }
|
---|
331 | }
|
---|
332 | }
|
---|
333 | }
|
---|
334 | } // end else if (!pCloseBrck)
|
---|
335 | } // end else if (strncmp(pOpenBrck + 1, "!--"))
|
---|
336 | } // end if (pOpenBrck)
|
---|
337 | } // end while
|
---|
338 |
|
---|
339 | return (pllTags);
|
---|
340 | }
|
---|
341 |
|
---|
342 | /*
|
---|
343 | *@@ CreateTextNode:
|
---|
344 | * shortcut for creating a TEXT node. Calls
|
---|
345 | * xmlCreateNode in turn.
|
---|
346 | *
|
---|
347 | * The text is extracted from in between the
|
---|
348 | * two pointers using strhSubstr.
|
---|
349 | */
|
---|
350 |
|
---|
351 | PDOMNODE CreateTextNode(PDOMNODE pParentNode,
|
---|
352 | const char *pStart,
|
---|
353 | const char *pEnd)
|
---|
354 | {
|
---|
355 | PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
|
---|
356 | DOMNODE_TEXT);
|
---|
357 | if (pNewTextNode)
|
---|
358 | pNewTextNode->pszNodeValue = strhSubstr(pStart,
|
---|
359 | pEnd);
|
---|
360 |
|
---|
361 | return (pNewTextNode);
|
---|
362 | }
|
---|
363 |
|
---|
364 | /*
|
---|
365 | *@@ CreateElementNode:
|
---|
366 | * shortcut for creating a new ELEMENT node and
|
---|
367 | * parsing attributes at the same time.
|
---|
368 | *
|
---|
369 | * pszTagName is assumed to be static (no copy
|
---|
370 | * is made).
|
---|
371 | *
|
---|
372 | * pAttribs is assumed to point to an attributes
|
---|
373 | * string. This function creates ATTRIBUTE nodes
|
---|
374 | * from that string until either a null character
|
---|
375 | * or '>' is found.
|
---|
376 | */
|
---|
377 |
|
---|
378 | PDOMNODE CreateElementNode(PDOMNODE pParentNode,
|
---|
379 | PSZ pszTagName,
|
---|
380 | const char *pAttribs) // in: ptr to attribs; can be NULL
|
---|
381 | {
|
---|
382 | PDOMNODE pNewNode = xmlCreateNode(pParentNode,
|
---|
383 | DOMNODE_ELEMENT);
|
---|
384 | if (pNewNode)
|
---|
385 | {
|
---|
386 | const char *p = pAttribs;
|
---|
387 |
|
---|
388 | pNewNode->pszNodeName = pszTagName;
|
---|
389 |
|
---|
390 | // find-start-of-attribute loop
|
---|
391 | while (p)
|
---|
392 | {
|
---|
393 | switch (*p)
|
---|
394 | {
|
---|
395 | case 0:
|
---|
396 | case '>':
|
---|
397 | p = 0;
|
---|
398 | break;
|
---|
399 |
|
---|
400 | case ' ':
|
---|
401 | case '\n':
|
---|
402 | case '\r':
|
---|
403 | p++;
|
---|
404 | break;
|
---|
405 |
|
---|
406 | default:
|
---|
407 | {
|
---|
408 | // first (or next) non-space:
|
---|
409 | // that's the start of an attrib, probably
|
---|
410 | // go until we find a space or '>'
|
---|
411 |
|
---|
412 | const char *pNameStart = p,
|
---|
413 | *p2 = p;
|
---|
414 |
|
---|
415 | const char *pEquals = 0,
|
---|
416 | *pFirstQuote = 0,
|
---|
417 | *pEnd = 0; // last char... non-inclusive!
|
---|
418 |
|
---|
419 | // copy-rest-of-attribute loop
|
---|
420 | while (p2)
|
---|
421 | {
|
---|
422 | switch (*p2)
|
---|
423 | {
|
---|
424 | case '"':
|
---|
425 | if (!pEquals)
|
---|
426 | {
|
---|
427 | // '"' cannot appear before '='
|
---|
428 | p2 = 0;
|
---|
429 | p = 0;
|
---|
430 | }
|
---|
431 | else
|
---|
432 | {
|
---|
433 | if (pFirstQuote)
|
---|
434 | {
|
---|
435 | // second quote:
|
---|
436 | // get value between quotes
|
---|
437 | pEnd = p2;
|
---|
438 | // we're done with this one
|
---|
439 | p = p2 + 1;
|
---|
440 | p2 = 0;
|
---|
441 | }
|
---|
442 | else
|
---|
443 | {
|
---|
444 | // first quote:
|
---|
445 | pFirstQuote = p2;
|
---|
446 | p2++;
|
---|
447 | }
|
---|
448 | }
|
---|
449 | break;
|
---|
450 |
|
---|
451 | case '=':
|
---|
452 | if (!pEquals)
|
---|
453 | {
|
---|
454 | // first equals sign:
|
---|
455 | pEquals = p2;
|
---|
456 | // extract name
|
---|
457 | p2++;
|
---|
458 | }
|
---|
459 | else
|
---|
460 | if (pFirstQuote)
|
---|
461 | p2++;
|
---|
462 | else
|
---|
463 | {
|
---|
464 | // error
|
---|
465 | p2 = 0;
|
---|
466 | p = 0;
|
---|
467 | }
|
---|
468 | break;
|
---|
469 |
|
---|
470 | case ' ':
|
---|
471 | case '\n':
|
---|
472 | case '\r':
|
---|
473 | // spaces can appear in quotes
|
---|
474 | if (pFirstQuote)
|
---|
475 | // just continue
|
---|
476 | p2++;
|
---|
477 | else
|
---|
478 | {
|
---|
479 | // end of it!
|
---|
480 | pEnd = p2;
|
---|
481 | p = p2 + 1;
|
---|
482 | p2 = 0;
|
---|
483 | }
|
---|
484 | break;
|
---|
485 |
|
---|
486 | case 0:
|
---|
487 | case '>':
|
---|
488 | {
|
---|
489 | pEnd = p2;
|
---|
490 | // quit inner AND outer loop
|
---|
491 | p2 = 0;
|
---|
492 | p = 0;
|
---|
493 | break; }
|
---|
494 |
|
---|
495 | default:
|
---|
496 | p2++;
|
---|
497 | }
|
---|
498 | } // end while (p2)
|
---|
499 |
|
---|
500 | if (pEnd)
|
---|
501 | {
|
---|
502 | PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
|
---|
503 | DOMNODE_ATTRIBUTE);
|
---|
504 | if (pAttribNode)
|
---|
505 | {
|
---|
506 | if (pEquals)
|
---|
507 | {
|
---|
508 | pAttribNode->pszNodeName
|
---|
509 | = strhSubstr(pNameStart, pEquals);
|
---|
510 |
|
---|
511 | // did we have quotes?
|
---|
512 | if (pFirstQuote)
|
---|
513 | pAttribNode->pszNodeValue
|
---|
514 | = strhSubstr(pFirstQuote + 1, pEnd);
|
---|
515 | else
|
---|
516 | pAttribNode->pszNodeValue
|
---|
517 | = strhSubstr(pEquals + 1, pEnd);
|
---|
518 | }
|
---|
519 | else
|
---|
520 | // no "equals":
|
---|
521 | pAttribNode->pszNodeName
|
---|
522 | = strhSubstr(pNameStart, pEnd);
|
---|
523 | }
|
---|
524 | }
|
---|
525 | break; }
|
---|
526 | }
|
---|
527 | }
|
---|
528 | }
|
---|
529 |
|
---|
530 | return (pNewNode);
|
---|
531 | }
|
---|
532 |
|
---|
533 | /*
|
---|
534 | *@@ CreateNodesForBuf:
|
---|
535 | * this gets called (recursively) for a piece of text
|
---|
536 | * for which we need to create TEXT and ELEMENT DOMNODE's.
|
---|
537 | *
|
---|
538 | * This does the heavy work for xmlParse.
|
---|
539 | *
|
---|
540 | * If an error (!= 0) is returned, *ppError points to
|
---|
541 | * the code part that failed.
|
---|
542 | */
|
---|
543 |
|
---|
544 | ULONG CreateNodesForBuf(const char *pcszBufStart,
|
---|
545 | const char *pcszBufEnd, // in: can be NULL
|
---|
546 | PLINKLIST pllTagsList,
|
---|
547 | PDOMNODE pParentNode,
|
---|
548 | PFNVALIDATE pfnValidateTag,
|
---|
549 | const char **ppError);
|
---|
550 | {
|
---|
551 | ULONG ulrc = 0;
|
---|
552 | PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
|
---|
553 | const char *pBufCurrent = pcszBufStart;
|
---|
554 | BOOL fContinue = TRUE;
|
---|
555 |
|
---|
556 | if (pcszBufEnd == NULL)
|
---|
557 | pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
|
---|
558 |
|
---|
559 | while (fContinue)
|
---|
560 | {
|
---|
561 | if ( (!*pBufCurrent)
|
---|
562 | || (pBufCurrent == pcszBufEnd)
|
---|
563 | )
|
---|
564 | // end of buf reached:
|
---|
565 | fContinue = FALSE;
|
---|
566 |
|
---|
567 | else if (!pCurrentTagListNode)
|
---|
568 | {
|
---|
569 | // no (more) tags for this buffer:
|
---|
570 | CreateTextNode(pParentNode,
|
---|
571 | pBufCurrent,
|
---|
572 | pcszBufEnd);
|
---|
573 | fContinue = FALSE;
|
---|
574 | }
|
---|
575 | else
|
---|
576 | {
|
---|
577 | // another tag found:
|
---|
578 | PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
|
---|
579 | const char *pStartOfTag = pFoundTag->pOpenBrck;
|
---|
580 | if (pStartOfTag > pBufCurrent + 1)
|
---|
581 | {
|
---|
582 | // we have text before the opening tag:
|
---|
583 | // make a DOMTEXT out of this
|
---|
584 | CreateTextNode(pParentNode,
|
---|
585 | pBufCurrent,
|
---|
586 | pStartOfTag);
|
---|
587 | pBufCurrent = pStartOfTag;
|
---|
588 | }
|
---|
589 | else
|
---|
590 | {
|
---|
591 | // OK, go for this tag...
|
---|
592 |
|
---|
593 | if (*(pFoundTag->pStartOfTagName) == '/')
|
---|
594 | {
|
---|
595 | // this is a closing tag: that's an error
|
---|
596 | ulrc = 1;
|
---|
597 | *ppError = pFoundTag->pStartOfTagName;
|
---|
598 | fContinue = FALSE;
|
---|
599 | }
|
---|
600 | else if (pFoundTag->fIsComment)
|
---|
601 | {
|
---|
602 | // it's a comment: that's simple
|
---|
603 | PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
|
---|
604 | DOMNODE_COMMENT);
|
---|
605 | if (!pCommentNode)
|
---|
606 | ulrc = ERROR_NOT_ENOUGH_MEMORY;
|
---|
607 | else
|
---|
608 | {
|
---|
609 | pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
|
---|
610 | pFoundTag->pCloseBrck - 2);
|
---|
611 | }
|
---|
612 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
613 | }
|
---|
614 | else
|
---|
615 | {
|
---|
616 | BOOL fKeepTagName = FALSE; // free pszTagName below
|
---|
617 | PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
|
---|
618 | pFoundTag->pFirstAfterTagName);
|
---|
619 | if (!pszTagName)
|
---|
620 | // zero-length string:
|
---|
621 | // go ahead after that
|
---|
622 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
623 | else
|
---|
624 | {
|
---|
625 | // XML knows two types of elements:
|
---|
626 |
|
---|
627 | // a) Element pairs, which have opening and closing tags
|
---|
628 | // (<TAG> and </TAG>
|
---|
629 | // b) Single elements, which must have "/" as their last
|
---|
630 | // character; these have no closing tag
|
---|
631 | // (<TAG/>)
|
---|
632 |
|
---|
633 | // However, HTML doesn't usually tag single elements
|
---|
634 | // with a trailing '/'. To maintain compatibility,
|
---|
635 | // if we don't find a matching closing tag, we extract
|
---|
636 | // everything up to the end of the buffer.
|
---|
637 |
|
---|
638 | ULONG ulTagNameLen = strlen(pszTagName);
|
---|
639 |
|
---|
640 | // search for closing tag first...
|
---|
641 | // create string with closing tag to search for;
|
---|
642 | // that's '/' plus opening tag name
|
---|
643 | ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
|
---|
644 | PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
|
---|
645 | PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
|
---|
646 | PLISTNODE pTagListNodeForChildren = pTagListNode2;
|
---|
647 |
|
---|
648 | BOOL fClosingTagFound = FALSE;
|
---|
649 |
|
---|
650 | *pszClosingTag2Find = '/';
|
---|
651 | strcpy(pszClosingTag2Find + 1, pszTagName);
|
---|
652 |
|
---|
653 | // now find matching closing tag
|
---|
654 | while (pTagListNode2)
|
---|
655 | {
|
---|
656 | PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
|
---|
657 | ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
|
---|
658 | // compare tag name lengths
|
---|
659 | if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
660 | {
|
---|
661 | // same length:
|
---|
662 | // compare
|
---|
663 | if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
664 | pszClosingTag2Find,
|
---|
665 | ulClosingTagLen2Find)
|
---|
666 | == 0)
|
---|
667 | {
|
---|
668 | // found matching closing tag:
|
---|
669 |
|
---|
670 | // we now have
|
---|
671 | // -- pCurrentTagListNode pointing to the opening tag
|
---|
672 | // (pFoundTag has its PTAGFOUND item data)
|
---|
673 | // -- pTagListNode2 pointing to the closing tag
|
---|
674 | // (pFoundTag2 has its PTAGFOUND item data)
|
---|
675 |
|
---|
676 | // create DOM node
|
---|
677 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
678 | pszTagName,
|
---|
679 | pFoundTag->pFirstAfterTagName);
|
---|
680 | if (pNewNode)
|
---|
681 | {
|
---|
682 | ULONG ulAction = XMLACTION_BREAKUP;
|
---|
683 |
|
---|
684 | fKeepTagName = TRUE; // do not free below
|
---|
685 |
|
---|
686 | // validate tag
|
---|
687 | if (pfnValidateTag)
|
---|
688 | {
|
---|
689 | // validator specified:
|
---|
690 | ulAction = pfnValidateTag(pszTagName);
|
---|
691 | }
|
---|
692 |
|
---|
693 | if (ulAction == XMLACTION_COPYASTEXT)
|
---|
694 | {
|
---|
695 | CreateTextNode(pNewNode,
|
---|
696 | pFoundTag->pCloseBrck + 1,
|
---|
697 | pFoundTag2->pOpenBrck - 1);
|
---|
698 | }
|
---|
699 | else if (ulAction == XMLACTION_BREAKUP)
|
---|
700 | {
|
---|
701 | PLINKLIST pllSubList = lstCreate(FALSE);
|
---|
702 | PLISTNODE pSubNode = 0;
|
---|
703 | ULONG cSubNodes = 0;
|
---|
704 |
|
---|
705 | // text buffer to search
|
---|
706 | const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
|
---|
707 | const char *pSubBufEnd = pFoundTag2->pOpenBrck;
|
---|
708 |
|
---|
709 | // create a child list containing
|
---|
710 | // all tags from the first tag after
|
---|
711 | // the current opening tag to the closing tag
|
---|
712 | for (pSubNode = pTagListNodeForChildren;
|
---|
713 | pSubNode != pTagListNode2;
|
---|
714 | pSubNode = pSubNode->pNext)
|
---|
715 | {
|
---|
716 | lstAppendItem(pllSubList,
|
---|
717 | pSubNode->pItemData);
|
---|
718 | cSubNodes++;
|
---|
719 | }
|
---|
720 |
|
---|
721 | // now recurse to build child nodes
|
---|
722 | // (text and elements), even if the
|
---|
723 | // list is empty, we can have text!
|
---|
724 | CreateNodesForBuf(pSubBufStart,
|
---|
725 | pSubBufEnd,
|
---|
726 | pllSubList,
|
---|
727 | pNewNode,
|
---|
728 | pfnValidateTag),
|
---|
729 |
|
---|
730 | lstFree(pllSubList);
|
---|
731 | } // end if (ulAction == XMLACTION_BREAKUP)
|
---|
732 |
|
---|
733 | // now search on after the closing tag
|
---|
734 | // we've found; the next tag will be set below
|
---|
735 | pCurrentTagListNode = pTagListNode2;
|
---|
736 | pBufCurrent = pFoundTag2->pCloseBrck + 1;
|
---|
737 |
|
---|
738 | fClosingTagFound = TRUE;
|
---|
739 |
|
---|
740 | break; // // while (pTagListNode2)
|
---|
741 | } // end if (pNewNode)
|
---|
742 | } // end if (memcmp(pFoundTag2->pStartOfTagName,
|
---|
743 | } // if (ulFoundTag2Len == ulClosingTagLen2Find)
|
---|
744 |
|
---|
745 | pTagListNode2 = pTagListNode2->pNext;
|
---|
746 |
|
---|
747 | } // while (pTagListNode2)
|
---|
748 |
|
---|
749 | if (!fClosingTagFound)
|
---|
750 | {
|
---|
751 | // no matching closing tag found:
|
---|
752 | // that's maybe a block of not well-formed XML
|
---|
753 |
|
---|
754 | // e.g. with WarpIN:
|
---|
755 | // <README> <-- we start after this
|
---|
756 | // block of plain HTML with <P> tags and such
|
---|
757 | // </README>
|
---|
758 |
|
---|
759 | // just create an element
|
---|
760 | PDOMNODE pNewNode = CreateElementNode(pParentNode,
|
---|
761 | pszTagName,
|
---|
762 | pFoundTag->pFirstAfterTagName);
|
---|
763 | if (pNewNode)
|
---|
764 | fKeepTagName = TRUE;
|
---|
765 |
|
---|
766 | // now search on after the closing tag
|
---|
767 | // we've found; the next tag will be set below
|
---|
768 | // pCurrentTagListNode = pTagListNodeForChildren;
|
---|
769 | pBufCurrent = pFoundTag->pCloseBrck + 1;
|
---|
770 | }
|
---|
771 |
|
---|
772 | free(pszClosingTag2Find);
|
---|
773 |
|
---|
774 | if (!fKeepTagName)
|
---|
775 | free(pszTagName);
|
---|
776 | } // end if (pszTagName)
|
---|
777 | }
|
---|
778 |
|
---|
779 | pCurrentTagListNode = pCurrentTagListNode->pNext;
|
---|
780 | }
|
---|
781 | }
|
---|
782 | }
|
---|
783 |
|
---|
784 | return (ulrc);
|
---|
785 | }
|
---|
786 |
|
---|
787 | /*
|
---|
788 | * xmlParse:
|
---|
789 | * generic XML parser.
|
---|
790 | *
|
---|
791 | * This takes the specified zero-terminated string
|
---|
792 | * in pcszBuf and parses it, adding DOMNODE's as
|
---|
793 | * children to pNode.
|
---|
794 | *
|
---|
795 | * This recurses, if necessary, to build a node tree.
|
---|
796 | *
|
---|
797 | * Example: Take this HTML table definition:
|
---|
798 | +
|
---|
799 | + <TABLE>
|
---|
800 | + <TBODY>
|
---|
801 | + <TR>
|
---|
802 | + <TD>Column 1-1</TD>
|
---|
803 | + <TD>Column 1-2</TD>
|
---|
804 | + </TR>
|
---|
805 | + <TR>
|
---|
806 | + <TD>Column 2-1</TD>
|
---|
807 | + <TD>Column 2-2</TD>
|
---|
808 | + </TR>
|
---|
809 | + </TBODY>
|
---|
810 | + </TABLE>
|
---|
811 | *
|
---|
812 | * This function will create a tree as follows:
|
---|
813 | +
|
---|
814 | + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
815 | + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
|
---|
816 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
817 | + ³
|
---|
818 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
819 | + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
|
---|
820 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
821 | + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
|
---|
822 | + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
|
---|
823 | + ³ TR ³ ³ TR ³
|
---|
824 | + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
|
---|
825 | + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
|
---|
826 | + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
|
---|
827 | + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
|
---|
828 | + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
|
---|
829 | + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
|
---|
830 | + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
|
---|
831 | + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
|
---|
832 | */
|
---|
833 |
|
---|
834 | ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
|
---|
835 | const char *pcszBuf, // in: buffer to search
|
---|
836 | PFNVALIDATE pfnValidateTag)
|
---|
837 | {
|
---|
838 | ULONG ulrc = 0;
|
---|
839 |
|
---|
840 | const char *pSearchPos = pcszBuf;
|
---|
841 |
|
---|
842 | PLINKLIST pllTags = BuildTagsList(pcszBuf);
|
---|
843 |
|
---|
844 | // now create DOMNODE's according to that list...
|
---|
845 | CreateNodesForBuf(pcszBuf,
|
---|
846 | NULL, // enitre buffer
|
---|
847 | pllTags,
|
---|
848 | pParentNode,
|
---|
849 | pfnValidateTag);
|
---|
850 |
|
---|
851 | lstFree(pllTags);
|
---|
852 |
|
---|
853 | return (ulrc);
|
---|
854 | }
|
---|
855 |
|
---|
856 | /*
|
---|
857 | *@@ xmlCreateDocumentFromString:
|
---|
858 | * creates a DOCUMENT DOMNODE and calls xmlParse
|
---|
859 | * to break down the specified buffer into that
|
---|
860 | * node.
|
---|
861 | */
|
---|
862 |
|
---|
863 | PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
|
---|
864 | PFNVALIDATE pfnValidateTag)
|
---|
865 | {
|
---|
866 | PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
|
---|
867 | DOMNODE_DOCUMENT);
|
---|
868 | xmlParse(pDocument,
|
---|
869 | pcszXML,
|
---|
870 | pfnValidateTag);
|
---|
871 |
|
---|
872 | return (pDocument);
|
---|
873 | }
|
---|
874 |
|
---|
875 |
|
---|