source: trunk/src/helpers/xml.c@ 12

Last change on this file since 12 was 12, checked in by umoeller, 25 years ago

Updated string helpers.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 34.9 KB
Line 
1
2/*
3 *@@sourcefile xml.c:
4 * XML parsing.
5 *
6 * This is vaguely modelled after the Document Object Model
7 * (DOM) standardized by the W3C.
8 *
9 * In short, DOM specifies that an XML document is broken
10 * up into a tree of nodes, representing the various parts
11 * of an XML document. Most importantly, we have:
12 *
13 * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
14 *
15 * -- ATTRIBUTE: an attribute to an element.
16 *
17 * -- TEXT: a piece of, well, text.
18 *
19 * -- COMMENT: a comment.
20 *
21 * See xmlParse() for a more detailed explanation.
22 *
23 * However, since this implementation was supposed to be a
24 * C-only interface, we do not implement inheritance. Instead,
25 * each XML document is broken up into a tree of DOMNODE's only,
26 * each of which has a special type.
27 *
28 * It shouldn't be too difficult to write a C++ encapsulation
29 * of this which implements all the methods required by the DOM
30 * standard.
31 *
32 * The main entry point into this is xmlParse or
33 * xmlCreateDocumentFromString. See remarks there for details.
34 *
35 * Limitations:
36 *
37 * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
38 * and COMMENT nodes.
39 *
40 * 2) This doesn't use 16-bit characters, but 8-bit characters.
41 *
42 *@@header "helpers\xml.h"
43 *@@added V0.9.6 (2000-10-29) [umoeller]
44 */
45
46/*
47 * Copyright (C) 2000 Ulrich M”ller.
48 * This file is part of the XWorkplace source package.
49 * XWorkplace is free software; you can redistribute it and/or modify
50 * it under the terms of the GNU General Public License as published
51 * by the Free Software Foundation, in version 2 as it comes in the
52 * "COPYING" file of the XWorkplace main distribution.
53 * This program is distributed in the hope that it will be useful,
54 * but WITHOUT ANY WARRANTY; without even the implied warranty of
55 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 * GNU General Public License for more details.
57 */
58
59#define OS2EMX_PLAIN_CHAR
60 // this is needed for "os2emx.h"; if this is defined,
61 // emx will define PSZ as _signed_ char, otherwise
62 // as unsigned char
63
64#define INCL_DOSERRORS
65#include <os2.h>
66
67#include <stdlib.h>
68#include <string.h>
69
70#include "setup.h" // code generation and debugging options
71
72#include "helpers\linklist.h"
73#include "helpers\stringh.h"
74#include "helpers\xml.h"
75
76#pragma hdrstop
77
78/*
79 *@@category: Helpers\C helpers\XML\Node management
80 */
81
82/* ******************************************************************
83 *
84 * Node Management
85 *
86 ********************************************************************/
87
88/*
89 *@@ xmlCreateNode:
90 * creates a new DOMNODE with the specified
91 * type and parent.
92 */
93
94PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
95 ULONG ulNodeType)
96{
97 PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
98 if (pNewNode)
99 {
100 memset(pNewNode, 0, sizeof(DOMNODE));
101 pNewNode->ulNodeType = ulNodeType;
102 pNewNode->pParentNode = pParentNode;
103 if (pParentNode)
104 {
105 // parent specified:
106 // append this new node to the parent's
107 // list of child nodes
108 lstAppendItem(&pParentNode->listChildNodes,
109 pNewNode);
110 }
111
112 lstInit(&pNewNode->listChildNodes, FALSE);
113 lstInit(&pNewNode->listAttributeNodes, FALSE);
114 }
115
116 return (pNewNode);
117}
118
119/*
120 *@@ xmlDeleteNode:
121 * deletes the specified node.
122 *
123 * If the node has child nodes, all of them are deleted
124 * as well. This recurses, if necessary.
125 *
126 * As a result, if the node is a document node, this
127 * deletes an entire document, including all of its
128 * child nodes.
129 *
130 * Returns:
131 *
132 * -- 0: NO_ERROR.
133 */
134
135ULONG xmlDeleteNode(PDOMNODE pNode)
136{
137 ULONG ulrc = 0;
138
139 if (!pNode)
140 {
141 ulrc = DOMERR_NOT_FOUND;
142 }
143 else
144 {
145 // recurse into child nodes
146 PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
147 while (pNodeThis)
148 {
149 // recurse!!
150 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
151
152 pNodeThis = pNodeThis->pNext;
153 }
154
155 // delete attribute nodes
156 pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
157 while (pNodeThis)
158 {
159 // recurse!!
160 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
161
162 pNodeThis = pNodeThis->pNext;
163 }
164
165 if (pNode->pParentNode)
166 {
167 // node has a parent:
168 // remove this node from the parent's list
169 // of child nodes before deleting this node
170 lstRemoveItem(&pNode->pParentNode->listChildNodes,
171 pNode);
172 pNode->pParentNode = NULL;
173 }
174
175 if (pNode->pszNodeName)
176 {
177 free(pNode->pszNodeName);
178 pNode->pszNodeName = NULL;
179 }
180 if (pNode->pszNodeValue)
181 {
182 free(pNode->pszNodeValue);
183 pNode->pszNodeValue = NULL;
184 }
185
186 free(pNode);
187 }
188
189 return (ulrc);
190}
191
192/*
193 *@@category: Helpers\C helpers\XML\Parsing
194 */
195
196/* ******************************************************************
197 *
198 * Tokenizing (Compiling)
199 *
200 ********************************************************************/
201
202/*
203 *@@ xmlTokenize:
204 * this takes any block of XML text and "tokenizes"
205 * it.
206 *
207 * Tokenizing (or compiling, or "scanning" in bison/flex
208 * terms) means preparing the XML code for parsing later.
209 * This finds all tags and tag attributes and creates
210 * special codes for them in the output buffer.
211 *
212 * For example:
213 +
214 + <TAG ATTR="text"> block </TAG>
215 +
216 * becomes
217 *
218 + 0xFF escape code
219 + 0x01 tag start code
220 + "TAG" tag name
221 + 0xFF end of tag name code
222 +
223 + 0xFF escape code
224 + 0x03 attribute name code
225 + "ATTR" attribute name
226 + 0xFF
227 + "text" attribute value (without quotes)
228 + 0xFF end of attribute code
229 +
230 + " block " regular text
231 +
232 + 0xFF escape code
233 + 0x01 tag start code
234 + "/TAG" tag name
235 + 0xFF end of tag name code
236 *
237 *@@added V0.9.6 (2000-11-01) [umoeller]
238 */
239
240PSZ xmlTokenize(const char *pcszXML)
241{
242}
243
244/* ******************************************************************
245 *
246 * Parsing
247 *
248 ********************************************************************/
249
250/*
251 * TAGFOUND:
252 * structure created for each tag by BuildTagsList.
253 */
254
255typedef struct _TAGFOUND
256{
257 BOOL fIsComment;
258 const char *pOpenBrck;
259 const char *pStartOfTagName;
260 const char *pFirstAfterTagName;
261 const char *pCloseBrck; // ptr to '>' char; this plus one should
262 // point to after the tag
263} TAGFOUND, *PTAGFOUND;
264
265/*
266 * BuildTagsList:
267 * builds a LINKLIST containing TAGFOUND structs for
268 * each tag found in the specified buffer.
269 *
270 * This is a flat list without any tree structure. This
271 * only searches for the tags and doesn't create any
272 * hierarchy.
273 *
274 * The tags are simply added to the list in the order
275 * in which they are found in pcszBuffer.
276 *
277 * The list is auto-free, you can simply do a lstFree
278 * to clean up.
279 */
280
281PLINKLIST BuildTagsList(const char *pcszBuffer)
282{
283 PLINKLIST pllTags = lstCreate(TRUE);
284
285 const char *pSearchPos = pcszBuffer;
286
287 while ((pSearchPos) && (*pSearchPos))
288 {
289 // find first '<'
290 PSZ pOpenBrck = strchr(pSearchPos, '<');
291 if (!pOpenBrck)
292 // no open bracket found: stop search
293 pSearchPos = 0;
294 else
295 {
296 if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
297 {
298 // it's a comment:
299 // treat that differently
300 const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
301 const char *pCloseBrck = 0;
302 const char *pFirstAfterTagName = 0;
303 PTAGFOUND pTagFound;
304 if (!pEndOfComment)
305 {
306 // no end of comment found:
307 // skip entire rest of string
308 pCloseBrck = pOpenBrck + strlen(pOpenBrck);
309 pFirstAfterTagName = pCloseBrck;
310 pSearchPos = 0;
311 }
312 else
313 {
314 pCloseBrck = pEndOfComment + 2; // point directly to '>'
315 pFirstAfterTagName = pCloseBrck + 1;
316 }
317
318 // append it to the list
319 pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
320 if (!pTagFound)
321 // error:
322 pSearchPos = 0;
323 else
324 {
325 pTagFound->fIsComment = TRUE;
326 pTagFound->pOpenBrck = pOpenBrck;
327 pTagFound->pStartOfTagName = pOpenBrck + 1;
328 pTagFound->pFirstAfterTagName = pFirstAfterTagName;
329 pTagFound->pCloseBrck = pCloseBrck;
330
331 lstAppendItem(pllTags, pTagFound);
332 }
333
334 pSearchPos = pFirstAfterTagName;
335 }
336 else
337 {
338 // no comment:
339 // find matching closing bracket
340 const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
341 if (!pCloseBrck)
342 pSearchPos = 0;
343 else
344 {
345 const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
346 // if we have another opening bracket before the closing bracket,
347 if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
348 // ignore this one
349 pSearchPos = pNextOpenBrck;
350 else
351 {
352 // OK, apparently we have a tag.
353 // Skip all spaces after the tag.
354 const char *pTagName = pOpenBrck + 1;
355 while ( (*pTagName)
356 && ( (*pTagName == ' ')
357 || (*pTagName == '\r')
358 || (*pTagName == '\n')
359 )
360 )
361 pTagName++;
362 if (!*pTagName)
363 // no tag name: stop
364 pSearchPos = 0;
365 else
366 {
367 // ookaaayyy, we got a tag now.
368 // Find first space or ">" after tag name:
369 const char *pFirstAfterTagName = pTagName + 1;
370 while ( (*pFirstAfterTagName)
371 && (*pFirstAfterTagName != ' ')
372 && (*pFirstAfterTagName != '\n')
373 && (*pFirstAfterTagName != '\r')
374 && (*pFirstAfterTagName != '\t') // tab
375 && (*pFirstAfterTagName != '>')
376 )
377 pFirstAfterTagName++;
378 if (!*pFirstAfterTagName)
379 // no closing bracket found:
380 pSearchPos = 0;
381 else
382 {
383 // got a tag name:
384 // append it to the list
385 PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
386 if (!pTagFound)
387 // error:
388 pSearchPos = 0;
389 else
390 {
391 pTagFound->fIsComment = FALSE;
392 pTagFound->pOpenBrck = pOpenBrck;
393 pTagFound->pStartOfTagName = pTagName;
394 pTagFound->pFirstAfterTagName = pFirstAfterTagName;
395 pTagFound->pCloseBrck = pCloseBrck;
396
397 lstAppendItem(pllTags, pTagFound);
398
399 // search on after closing bracket
400 pSearchPos = pCloseBrck + 1;
401 }
402 }
403 }
404 }
405 } // end else if (!pCloseBrck)
406 } // end else if (strncmp(pOpenBrck + 1, "!--"))
407 } // end if (pOpenBrck)
408 } // end while
409
410 return (pllTags);
411}
412
413/*
414 *@@ CreateTextNode:
415 * shortcut for creating a TEXT node. Calls
416 * xmlCreateNode in turn.
417 *
418 * The text is extracted from in between the
419 * two pointers using strhSubstr.
420 */
421
422PDOMNODE CreateTextNode(PDOMNODE pParentNode,
423 const char *pStart,
424 const char *pEnd)
425{
426 PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
427 DOMNODE_TEXT);
428 if (pNewTextNode)
429 pNewTextNode->pszNodeValue = strhSubstr(pStart,
430 pEnd);
431
432 return (pNewTextNode);
433}
434
435/*
436 *@@ CreateElementNode:
437 * shortcut for creating a new ELEMENT node and
438 * parsing attributes at the same time.
439 *
440 * pszTagName is assumed to be static (no copy
441 * is made).
442 *
443 * pAttribs is assumed to point to an attributes
444 * string. This function creates ATTRIBUTE nodes
445 * from that string until either a null character
446 * or '>' is found.
447 */
448
449PDOMNODE CreateElementNode(PDOMNODE pParentNode,
450 PSZ pszTagName,
451 const char *pAttribs) // in: ptr to attribs; can be NULL
452{
453 PDOMNODE pNewNode = xmlCreateNode(pParentNode,
454 DOMNODE_ELEMENT);
455 if (pNewNode)
456 {
457 const char *p = pAttribs;
458
459 pNewNode->pszNodeName = pszTagName;
460
461 // find-start-of-attribute loop
462 while (p)
463 {
464 switch (*p)
465 {
466 case 0:
467 case '>':
468 p = 0;
469 break;
470
471 case ' ':
472 case '\t': // tab
473 case '\n':
474 case '\r':
475 p++;
476 break;
477
478 default:
479 {
480 // first (or next) non-space:
481 // that's the start of an attrib, probably
482 // go until we find a space or '>'
483
484 const char *pNameStart = p,
485 *p2 = p;
486
487 const char *pEquals = 0,
488 *pFirstQuote = 0,
489 *pEnd = 0; // last char... non-inclusive!
490
491 // copy-rest-of-attribute loop
492 while (p2)
493 {
494 switch (*p2)
495 {
496 case '"':
497 if (!pEquals)
498 {
499 // '"' cannot appear before '='
500 p2 = 0;
501 p = 0;
502 }
503 else
504 {
505 if (pFirstQuote)
506 {
507 // second quote:
508 // get value between quotes
509 pEnd = p2;
510 // we're done with this one
511 p = p2 + 1;
512 p2 = 0;
513 }
514 else
515 {
516 // first quote:
517 pFirstQuote = p2;
518 p2++;
519 }
520 }
521 break;
522
523 case '=':
524 if (!pEquals)
525 {
526 // first equals sign:
527 pEquals = p2;
528 // extract name
529 p2++;
530 }
531 else
532 if (pFirstQuote)
533 p2++;
534 else
535 {
536 // error
537 p2 = 0;
538 p = 0;
539 }
540 break;
541
542 case ' ':
543 case '\t': // tab
544 case '\n':
545 case '\r':
546 // spaces can appear in quotes
547 if (pFirstQuote)
548 // just continue
549 p2++;
550 else
551 {
552 // end of it!
553 pEnd = p2;
554 p = p2 + 1;
555 p2 = 0;
556 }
557 break;
558
559 case 0:
560 case '>':
561 {
562 pEnd = p2;
563 // quit inner AND outer loop
564 p2 = 0;
565 p = 0;
566 break; }
567
568 default:
569 p2++;
570 }
571 } // end while (p2)
572
573 if (pEnd)
574 {
575 PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
576 DOMNODE_ATTRIBUTE);
577 if (pAttribNode)
578 {
579 if (pEquals)
580 {
581 pAttribNode->pszNodeName
582 = strhSubstr(pNameStart, pEquals);
583
584 // did we have quotes?
585 if (pFirstQuote)
586 pAttribNode->pszNodeValue
587 = strhSubstr(pFirstQuote + 1, pEnd);
588 else
589 pAttribNode->pszNodeValue
590 = strhSubstr(pEquals + 1, pEnd);
591 }
592 else
593 // no "equals":
594 pAttribNode->pszNodeName
595 = strhSubstr(pNameStart, pEnd);
596 }
597 }
598 break; }
599 }
600 }
601 }
602
603 return (pNewNode);
604}
605
606/*
607 *@@ CreateNodesForBuf:
608 * this gets called (recursively) for a piece of text
609 * for which we need to create TEXT and ELEMENT DOMNODE's.
610 *
611 * This does the heavy work for xmlParse.
612 *
613 * If an error (!= 0) is returned, *ppError points to
614 * the code part that failed.
615 */
616
617ULONG CreateNodesForBuf(const char *pcszBufStart,
618 const char *pcszBufEnd, // in: can be NULL
619 PLINKLIST pllTagsList,
620 PDOMNODE pParentNode,
621 PFNVALIDATE pfnValidateTag,
622 const char **ppError)
623{
624 ULONG ulrc = 0;
625 PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
626 const char *pBufCurrent = pcszBufStart;
627 BOOL fContinue = TRUE;
628
629 if (pcszBufEnd == NULL)
630 pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
631
632 while (fContinue)
633 {
634 if ( (!*pBufCurrent)
635 || (pBufCurrent == pcszBufEnd)
636 )
637 // end of buf reached:
638 fContinue = FALSE;
639
640 else if (!pCurrentTagListNode)
641 {
642 // no (more) tags for this buffer:
643 CreateTextNode(pParentNode,
644 pBufCurrent,
645 pcszBufEnd);
646 fContinue = FALSE;
647 }
648 else
649 {
650 // another tag found:
651 PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
652 const char *pStartOfTag = pFoundTag->pOpenBrck;
653 if (pStartOfTag > pBufCurrent + 1)
654 {
655 // we have text before the opening tag:
656 // make a DOMTEXT out of this
657 CreateTextNode(pParentNode,
658 pBufCurrent,
659 pStartOfTag);
660 pBufCurrent = pStartOfTag;
661 }
662 else
663 {
664 // OK, go for this tag...
665
666 if (*(pFoundTag->pStartOfTagName) == '/')
667 {
668 // this is a closing tag: that's an error
669 ulrc = 1;
670 *ppError = pFoundTag->pStartOfTagName;
671 fContinue = FALSE;
672 }
673 else if (pFoundTag->fIsComment)
674 {
675 // it's a comment: that's simple
676 PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
677 DOMNODE_COMMENT);
678 if (!pCommentNode)
679 ulrc = ERROR_NOT_ENOUGH_MEMORY;
680 else
681 {
682 pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
683 pFoundTag->pCloseBrck - 2);
684 }
685 pBufCurrent = pFoundTag->pCloseBrck + 1;
686 }
687 else
688 {
689 BOOL fKeepTagName = FALSE; // free pszTagName below
690 PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
691 pFoundTag->pFirstAfterTagName);
692 if (!pszTagName)
693 // zero-length string:
694 // go ahead after that
695 pBufCurrent = pFoundTag->pCloseBrck + 1;
696 else
697 {
698 // XML knows two types of elements:
699
700 // a) Element pairs, which have opening and closing tags
701 // (<TAG> and </TAG>
702 // b) Single elements, which must have "/" as their last
703 // character; these have no closing tag
704 // (<TAG/>)
705
706 // However, HTML doesn't usually tag single elements
707 // with a trailing '/'. To maintain compatibility,
708 // if we don't find a matching closing tag, we extract
709 // everything up to the end of the buffer.
710
711 ULONG ulTagNameLen = strlen(pszTagName);
712
713 // search for closing tag first...
714 // create string with closing tag to search for;
715 // that's '/' plus opening tag name
716 ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
717 PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
718 PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
719 PLISTNODE pTagListNodeForChildren = pTagListNode2;
720
721 BOOL fClosingTagFound = FALSE;
722
723 *pszClosingTag2Find = '/';
724 strcpy(pszClosingTag2Find + 1, pszTagName);
725
726 // now find matching closing tag
727 while (pTagListNode2)
728 {
729 PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
730 ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
731 // compare tag name lengths
732 if (ulFoundTag2Len == ulClosingTagLen2Find)
733 {
734 // same length:
735 // compare
736 if (memcmp(pFoundTag2->pStartOfTagName,
737 pszClosingTag2Find,
738 ulClosingTagLen2Find)
739 == 0)
740 {
741 // found matching closing tag:
742
743 // we now have
744 // -- pCurrentTagListNode pointing to the opening tag
745 // (pFoundTag has its PTAGFOUND item data)
746 // -- pTagListNode2 pointing to the closing tag
747 // (pFoundTag2 has its PTAGFOUND item data)
748
749 // create DOM node
750 PDOMNODE pNewNode = CreateElementNode(pParentNode,
751 pszTagName,
752 pFoundTag->pFirstAfterTagName);
753 if (pNewNode)
754 {
755 ULONG ulAction = XMLACTION_BREAKUP;
756
757 fKeepTagName = TRUE; // do not free below
758
759 // validate tag
760 if (pfnValidateTag)
761 {
762 // validator specified:
763 ulAction = pfnValidateTag(pszTagName);
764 }
765
766 if (ulAction == XMLACTION_COPYASTEXT)
767 {
768 CreateTextNode(pNewNode,
769 pFoundTag->pCloseBrck + 1,
770 pFoundTag2->pOpenBrck - 1);
771 }
772 else if (ulAction == XMLACTION_BREAKUP)
773 {
774 PLINKLIST pllSubList = lstCreate(FALSE);
775 PLISTNODE pSubNode = 0;
776 ULONG cSubNodes = 0;
777
778 // text buffer to search
779 const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
780 const char *pSubBufEnd = pFoundTag2->pOpenBrck;
781
782 // create a child list containing
783 // all tags from the first tag after
784 // the current opening tag to the closing tag
785 for (pSubNode = pTagListNodeForChildren;
786 pSubNode != pTagListNode2;
787 pSubNode = pSubNode->pNext)
788 {
789 lstAppendItem(pllSubList,
790 pSubNode->pItemData);
791 cSubNodes++;
792 }
793
794 // now recurse to build child nodes
795 // (text and elements), even if the
796 // list is empty, we can have text!
797 CreateNodesForBuf(pSubBufStart,
798 pSubBufEnd,
799 pllSubList,
800 pNewNode,
801 pfnValidateTag,
802 ppError);
803
804 lstFree(pllSubList);
805 } // end if (ulAction == XMLACTION_BREAKUP)
806
807 // now search on after the closing tag
808 // we've found; the next tag will be set below
809 pCurrentTagListNode = pTagListNode2;
810 pBufCurrent = pFoundTag2->pCloseBrck + 1;
811
812 fClosingTagFound = TRUE;
813
814 break; // // while (pTagListNode2)
815 } // end if (pNewNode)
816 } // end if (memcmp(pFoundTag2->pStartOfTagName,
817 } // if (ulFoundTag2Len == ulClosingTagLen2Find)
818
819 pTagListNode2 = pTagListNode2->pNext;
820
821 } // while (pTagListNode2)
822
823 if (!fClosingTagFound)
824 {
825 // no matching closing tag found:
826 // that's maybe a block of not well-formed XML
827
828 // e.g. with WarpIN:
829 // <README> <-- we start after this
830 // block of plain HTML with <P> tags and such
831 // </README>
832
833 // just create an element
834 PDOMNODE pNewNode = CreateElementNode(pParentNode,
835 pszTagName,
836 pFoundTag->pFirstAfterTagName);
837 if (pNewNode)
838 fKeepTagName = TRUE;
839
840 // now search on after the closing tag
841 // we've found; the next tag will be set below
842 // pCurrentTagListNode = pTagListNodeForChildren;
843 pBufCurrent = pFoundTag->pCloseBrck + 1;
844 }
845
846 free(pszClosingTag2Find);
847
848 if (!fKeepTagName)
849 free(pszTagName);
850 } // end if (pszTagName)
851 }
852
853 pCurrentTagListNode = pCurrentTagListNode->pNext;
854 }
855 }
856 }
857
858 return (ulrc);
859}
860
861/*
862 * xmlParse:
863 * generic XML parser.
864 *
865 * This takes the specified zero-terminated string
866 * in pcszBuf and parses it, adding DOMNODE's as
867 * children to pNode.
868 *
869 * This recurses, if necessary, to build a node tree.
870 *
871 * Example: Take this HTML table definition:
872 +
873 + <TABLE>
874 + <TBODY>
875 + <TR>
876 + <TD>Column 1-1</TD>
877 + <TD>Column 1-2</TD>
878 + </TR>
879 + <TR>
880 + <TD>Column 2-1</TD>
881 + <TD>Column 2-2</TD>
882 + </TR>
883 + </TBODY>
884 + </TABLE>
885 *
886 * This function will create a tree as follows:
887 +
888 + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
889 + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
890 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
891 + ³
892 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
893 + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
894 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
895 + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
896 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
897 + ³ TR ³ ³ TR ³
898 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
899 + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
900 + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
901 + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
902 + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
903 + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
904 + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
905 + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
906 */
907
908ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
909 const char *pcszBuf, // in: buffer to search
910 PFNVALIDATE pfnValidateTag)
911{
912 ULONG ulrc = 0;
913
914 PLINKLIST pllTags = BuildTagsList(pcszBuf);
915
916 // now create DOMNODE's according to that list...
917 const char *pcszError = 0;
918 CreateNodesForBuf(pcszBuf,
919 NULL, // enitre buffer
920 pllTags,
921 pParentNode,
922 pfnValidateTag,
923 &pcszError);
924
925 lstFree(pllTags);
926
927 return (ulrc);
928}
929
930/*
931 *@@ xmlCreateDocumentFromString:
932 * creates a DOCUMENT DOMNODE and calls xmlParse
933 * to break down the specified buffer into that
934 * node.
935 */
936
937PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
938 PFNVALIDATE pfnValidateTag)
939{
940 PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
941 DOMNODE_DOCUMENT);
942 xmlParse(pDocument,
943 pcszXML,
944 pfnValidateTag);
945
946 return (pDocument);
947}
948
949
Note: See TracBrowser for help on using the repository browser.