source: trunk/src/helpers/xml.c@ 15

Last change on this file since 15 was 14, checked in by umoeller, 25 years ago

Major updates; timers, LVM, miscellaneous.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 34.9 KB
Line 
1
2/*
3 *@@sourcefile xml.c:
4 * XML parsing.
5 *
6 * This is vaguely modelled after the Document Object Model
7 * (DOM) standardized by the W3C.
8 *
9 * In short, DOM specifies that an XML document is broken
10 * up into a tree of nodes, representing the various parts
11 * of an XML document. Most importantly, we have:
12 *
13 * -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
14 *
15 * -- ATTRIBUTE: an attribute to an element.
16 *
17 * -- TEXT: a piece of, well, text.
18 *
19 * -- COMMENT: a comment.
20 *
21 * See xmlParse() for a more detailed explanation.
22 *
23 * However, since this implementation was supposed to be a
24 * C-only interface, we do not implement inheritance. Instead,
25 * each XML document is broken up into a tree of DOMNODE's only,
26 * each of which has a special type.
27 *
28 * It shouldn't be too difficult to write a C++ encapsulation
29 * of this which implements all the methods required by the DOM
30 * standard.
31 *
32 * The main entry point into this is xmlParse or
33 * xmlCreateDocumentFromString. See remarks there for details.
34 *
35 * Limitations:
36 *
37 * 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
38 * and COMMENT nodes.
39 *
40 * 2) This doesn't use 16-bit characters, but 8-bit characters.
41 *
42 *@@header "helpers\xml.h"
43 *@@added V0.9.6 (2000-10-29) [umoeller]
44 */
45
46/*
47 * Copyright (C) 2000 Ulrich M”ller.
48 * This file is part of the "XWorkplace helpers" source package.
49 * This is free software; you can redistribute it and/or modify
50 * it under the terms of the GNU General Public License as published
51 * by the Free Software Foundation, in version 2 as it comes in the
52 * "COPYING" file of the XWorkplace main distribution.
53 * This program is distributed in the hope that it will be useful,
54 * but WITHOUT ANY WARRANTY; without even the implied warranty of
55 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 * GNU General Public License for more details.
57 */
58
59#define OS2EMX_PLAIN_CHAR
60 // this is needed for "os2emx.h"; if this is defined,
61 // emx will define PSZ as _signed_ char, otherwise
62 // as unsigned char
63
64#define INCL_DOSERRORS
65#include <os2.h>
66
67#include <stdlib.h>
68#include <string.h>
69
70#include "setup.h" // code generation and debugging options
71
72#include "helpers\linklist.h"
73#include "helpers\stringh.h"
74#include "helpers\xml.h"
75
76#pragma hdrstop
77
78/*
79 *@@category: Helpers\C helpers\XML\Node management
80 */
81
82/* ******************************************************************
83 *
84 * Node Management
85 *
86 ********************************************************************/
87
88/*
89 *@@ xmlCreateNode:
90 * creates a new DOMNODE with the specified
91 * type and parent.
92 */
93
94PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
95 ULONG ulNodeType)
96{
97 PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
98 if (pNewNode)
99 {
100 memset(pNewNode, 0, sizeof(DOMNODE));
101 pNewNode->ulNodeType = ulNodeType;
102 pNewNode->pParentNode = pParentNode;
103 if (pParentNode)
104 {
105 // parent specified:
106 // append this new node to the parent's
107 // list of child nodes
108 lstAppendItem(&pParentNode->listChildNodes,
109 pNewNode);
110 }
111
112 lstInit(&pNewNode->listChildNodes, FALSE);
113 lstInit(&pNewNode->listAttributeNodes, FALSE);
114 }
115
116 return (pNewNode);
117}
118
119/*
120 *@@ xmlDeleteNode:
121 * deletes the specified node.
122 *
123 * If the node has child nodes, all of them are deleted
124 * as well. This recurses, if necessary.
125 *
126 * As a result, if the node is a document node, this
127 * deletes an entire document, including all of its
128 * child nodes.
129 *
130 * Returns:
131 *
132 * -- 0: NO_ERROR.
133 */
134
135ULONG xmlDeleteNode(PDOMNODE pNode)
136{
137 ULONG ulrc = 0;
138
139 if (!pNode)
140 {
141 ulrc = DOMERR_NOT_FOUND;
142 }
143 else
144 {
145 // recurse into child nodes
146 PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
147 while (pNodeThis)
148 {
149 // recurse!!
150 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
151
152 pNodeThis = pNodeThis->pNext;
153 }
154
155 // delete attribute nodes
156 pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
157 while (pNodeThis)
158 {
159 // recurse!!
160 xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
161
162 pNodeThis = pNodeThis->pNext;
163 }
164
165 if (pNode->pParentNode)
166 {
167 // node has a parent:
168 // remove this node from the parent's list
169 // of child nodes before deleting this node
170 lstRemoveItem(&pNode->pParentNode->listChildNodes,
171 pNode);
172 pNode->pParentNode = NULL;
173 }
174
175 if (pNode->pszNodeName)
176 {
177 free(pNode->pszNodeName);
178 pNode->pszNodeName = NULL;
179 }
180 if (pNode->pszNodeValue)
181 {
182 free(pNode->pszNodeValue);
183 pNode->pszNodeValue = NULL;
184 }
185
186 free(pNode);
187 }
188
189 return (ulrc);
190}
191
192/*
193 *@@category: Helpers\C helpers\XML\Parsing
194 */
195
196/* ******************************************************************
197 *
198 * Tokenizing (Compiling)
199 *
200 ********************************************************************/
201
202/*
203 *@@ xmlTokenize:
204 * this takes any block of XML text and "tokenizes"
205 * it.
206 *
207 * Tokenizing (or compiling, or "scanning" in bison/flex
208 * terms) means preparing the XML code for parsing later.
209 * This finds all tags and tag attributes and creates
210 * special codes for them in the output buffer.
211 *
212 * For example:
213 +
214 + <TAG ATTR="text"> block </TAG>
215 +
216 * becomes
217 *
218 + 0xFF escape code
219 + 0x01 tag start code
220 + "TAG" tag name
221 + 0xFF end of tag name code
222 +
223 + 0xFF escape code
224 + 0x03 attribute name code
225 + "ATTR" attribute name
226 + 0xFF
227 + "text" attribute value (without quotes)
228 + 0xFF end of attribute code
229 +
230 + " block " regular text
231 +
232 + 0xFF escape code
233 + 0x01 tag start code
234 + "/TAG" tag name
235 + 0xFF end of tag name code
236 *
237 *@@added V0.9.6 (2000-11-01) [umoeller]
238 */
239
240PSZ xmlTokenize(const char *pcszXML)
241{
242 return (0);
243}
244
245/* ******************************************************************
246 *
247 * Parsing
248 *
249 ********************************************************************/
250
251/*
252 * TAGFOUND:
253 * structure created for each tag by BuildTagsList.
254 */
255
256typedef struct _TAGFOUND
257{
258 BOOL fIsComment;
259 const char *pOpenBrck;
260 const char *pStartOfTagName;
261 const char *pFirstAfterTagName;
262 const char *pCloseBrck; // ptr to '>' char; this plus one should
263 // point to after the tag
264} TAGFOUND, *PTAGFOUND;
265
266/*
267 * BuildTagsList:
268 * builds a LINKLIST containing TAGFOUND structs for
269 * each tag found in the specified buffer.
270 *
271 * This is a flat list without any tree structure. This
272 * only searches for the tags and doesn't create any
273 * hierarchy.
274 *
275 * The tags are simply added to the list in the order
276 * in which they are found in pcszBuffer.
277 *
278 * The list is auto-free, you can simply do a lstFree
279 * to clean up.
280 */
281
282PLINKLIST BuildTagsList(const char *pcszBuffer)
283{
284 PLINKLIST pllTags = lstCreate(TRUE);
285
286 const char *pSearchPos = pcszBuffer;
287
288 while ((pSearchPos) && (*pSearchPos))
289 {
290 // find first '<'
291 PSZ pOpenBrck = strchr(pSearchPos, '<');
292 if (!pOpenBrck)
293 // no open bracket found: stop search
294 pSearchPos = 0;
295 else
296 {
297 if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
298 {
299 // it's a comment:
300 // treat that differently
301 const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
302 const char *pCloseBrck = 0;
303 const char *pFirstAfterTagName = 0;
304 PTAGFOUND pTagFound;
305 if (!pEndOfComment)
306 {
307 // no end of comment found:
308 // skip entire rest of string
309 pCloseBrck = pOpenBrck + strlen(pOpenBrck);
310 pFirstAfterTagName = pCloseBrck;
311 pSearchPos = 0;
312 }
313 else
314 {
315 pCloseBrck = pEndOfComment + 2; // point directly to '>'
316 pFirstAfterTagName = pCloseBrck + 1;
317 }
318
319 // append it to the list
320 pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
321 if (!pTagFound)
322 // error:
323 pSearchPos = 0;
324 else
325 {
326 pTagFound->fIsComment = TRUE;
327 pTagFound->pOpenBrck = pOpenBrck;
328 pTagFound->pStartOfTagName = pOpenBrck + 1;
329 pTagFound->pFirstAfterTagName = pFirstAfterTagName;
330 pTagFound->pCloseBrck = pCloseBrck;
331
332 lstAppendItem(pllTags, pTagFound);
333 }
334
335 pSearchPos = pFirstAfterTagName;
336 }
337 else
338 {
339 // no comment:
340 // find matching closing bracket
341 const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
342 if (!pCloseBrck)
343 pSearchPos = 0;
344 else
345 {
346 const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
347 // if we have another opening bracket before the closing bracket,
348 if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
349 // ignore this one
350 pSearchPos = pNextOpenBrck;
351 else
352 {
353 // OK, apparently we have a tag.
354 // Skip all spaces after the tag.
355 const char *pTagName = pOpenBrck + 1;
356 while ( (*pTagName)
357 && ( (*pTagName == ' ')
358 || (*pTagName == '\r')
359 || (*pTagName == '\n')
360 )
361 )
362 pTagName++;
363 if (!*pTagName)
364 // no tag name: stop
365 pSearchPos = 0;
366 else
367 {
368 // ookaaayyy, we got a tag now.
369 // Find first space or ">" after tag name:
370 const char *pFirstAfterTagName = pTagName + 1;
371 while ( (*pFirstAfterTagName)
372 && (*pFirstAfterTagName != ' ')
373 && (*pFirstAfterTagName != '\n')
374 && (*pFirstAfterTagName != '\r')
375 && (*pFirstAfterTagName != '\t') // tab
376 && (*pFirstAfterTagName != '>')
377 )
378 pFirstAfterTagName++;
379 if (!*pFirstAfterTagName)
380 // no closing bracket found:
381 pSearchPos = 0;
382 else
383 {
384 // got a tag name:
385 // append it to the list
386 PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
387 if (!pTagFound)
388 // error:
389 pSearchPos = 0;
390 else
391 {
392 pTagFound->fIsComment = FALSE;
393 pTagFound->pOpenBrck = pOpenBrck;
394 pTagFound->pStartOfTagName = pTagName;
395 pTagFound->pFirstAfterTagName = pFirstAfterTagName;
396 pTagFound->pCloseBrck = pCloseBrck;
397
398 lstAppendItem(pllTags, pTagFound);
399
400 // search on after closing bracket
401 pSearchPos = pCloseBrck + 1;
402 }
403 }
404 }
405 }
406 } // end else if (!pCloseBrck)
407 } // end else if (strncmp(pOpenBrck + 1, "!--"))
408 } // end if (pOpenBrck)
409 } // end while
410
411 return (pllTags);
412}
413
414/*
415 *@@ CreateTextNode:
416 * shortcut for creating a TEXT node. Calls
417 * xmlCreateNode in turn.
418 *
419 * The text is extracted from in between the
420 * two pointers using strhSubstr.
421 */
422
423PDOMNODE CreateTextNode(PDOMNODE pParentNode,
424 const char *pStart,
425 const char *pEnd)
426{
427 PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
428 DOMNODE_TEXT);
429 if (pNewTextNode)
430 pNewTextNode->pszNodeValue = strhSubstr(pStart,
431 pEnd);
432
433 return (pNewTextNode);
434}
435
436/*
437 *@@ CreateElementNode:
438 * shortcut for creating a new ELEMENT node and
439 * parsing attributes at the same time.
440 *
441 * pszTagName is assumed to be static (no copy
442 * is made).
443 *
444 * pAttribs is assumed to point to an attributes
445 * string. This function creates ATTRIBUTE nodes
446 * from that string until either a null character
447 * or '>' is found.
448 */
449
450PDOMNODE CreateElementNode(PDOMNODE pParentNode,
451 PSZ pszTagName,
452 const char *pAttribs) // in: ptr to attribs; can be NULL
453{
454 PDOMNODE pNewNode = xmlCreateNode(pParentNode,
455 DOMNODE_ELEMENT);
456 if (pNewNode)
457 {
458 const char *p = pAttribs;
459
460 pNewNode->pszNodeName = pszTagName;
461
462 // find-start-of-attribute loop
463 while (p)
464 {
465 switch (*p)
466 {
467 case 0:
468 case '>':
469 p = 0;
470 break;
471
472 case ' ':
473 case '\t': // tab
474 case '\n':
475 case '\r':
476 p++;
477 break;
478
479 default:
480 {
481 // first (or next) non-space:
482 // that's the start of an attrib, probably
483 // go until we find a space or '>'
484
485 const char *pNameStart = p,
486 *p2 = p;
487
488 const char *pEquals = 0,
489 *pFirstQuote = 0,
490 *pEnd = 0; // last char... non-inclusive!
491
492 // copy-rest-of-attribute loop
493 while (p2)
494 {
495 switch (*p2)
496 {
497 case '"':
498 if (!pEquals)
499 {
500 // '"' cannot appear before '='
501 p2 = 0;
502 p = 0;
503 }
504 else
505 {
506 if (pFirstQuote)
507 {
508 // second quote:
509 // get value between quotes
510 pEnd = p2;
511 // we're done with this one
512 p = p2 + 1;
513 p2 = 0;
514 }
515 else
516 {
517 // first quote:
518 pFirstQuote = p2;
519 p2++;
520 }
521 }
522 break;
523
524 case '=':
525 if (!pEquals)
526 {
527 // first equals sign:
528 pEquals = p2;
529 // extract name
530 p2++;
531 }
532 else
533 if (pFirstQuote)
534 p2++;
535 else
536 {
537 // error
538 p2 = 0;
539 p = 0;
540 }
541 break;
542
543 case ' ':
544 case '\t': // tab
545 case '\n':
546 case '\r':
547 // spaces can appear in quotes
548 if (pFirstQuote)
549 // just continue
550 p2++;
551 else
552 {
553 // end of it!
554 pEnd = p2;
555 p = p2 + 1;
556 p2 = 0;
557 }
558 break;
559
560 case 0:
561 case '>':
562 {
563 pEnd = p2;
564 // quit inner AND outer loop
565 p2 = 0;
566 p = 0;
567 break; }
568
569 default:
570 p2++;
571 }
572 } // end while (p2)
573
574 if (pEnd)
575 {
576 PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
577 DOMNODE_ATTRIBUTE);
578 if (pAttribNode)
579 {
580 if (pEquals)
581 {
582 pAttribNode->pszNodeName
583 = strhSubstr(pNameStart, pEquals);
584
585 // did we have quotes?
586 if (pFirstQuote)
587 pAttribNode->pszNodeValue
588 = strhSubstr(pFirstQuote + 1, pEnd);
589 else
590 pAttribNode->pszNodeValue
591 = strhSubstr(pEquals + 1, pEnd);
592 }
593 else
594 // no "equals":
595 pAttribNode->pszNodeName
596 = strhSubstr(pNameStart, pEnd);
597 }
598 }
599 break; }
600 }
601 }
602 }
603
604 return (pNewNode);
605}
606
607/*
608 *@@ CreateNodesForBuf:
609 * this gets called (recursively) for a piece of text
610 * for which we need to create TEXT and ELEMENT DOMNODE's.
611 *
612 * This does the heavy work for xmlParse.
613 *
614 * If an error (!= 0) is returned, *ppError points to
615 * the code part that failed.
616 */
617
618ULONG CreateNodesForBuf(const char *pcszBufStart,
619 const char *pcszBufEnd, // in: can be NULL
620 PLINKLIST pllTagsList,
621 PDOMNODE pParentNode,
622 PFNVALIDATE pfnValidateTag,
623 const char **ppError)
624{
625 ULONG ulrc = 0;
626 PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
627 const char *pBufCurrent = pcszBufStart;
628 BOOL fContinue = TRUE;
629
630 if (pcszBufEnd == NULL)
631 pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
632
633 while (fContinue)
634 {
635 if ( (!*pBufCurrent)
636 || (pBufCurrent == pcszBufEnd)
637 )
638 // end of buf reached:
639 fContinue = FALSE;
640
641 else if (!pCurrentTagListNode)
642 {
643 // no (more) tags for this buffer:
644 CreateTextNode(pParentNode,
645 pBufCurrent,
646 pcszBufEnd);
647 fContinue = FALSE;
648 }
649 else
650 {
651 // another tag found:
652 PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
653 const char *pStartOfTag = pFoundTag->pOpenBrck;
654 if (pStartOfTag > pBufCurrent + 1)
655 {
656 // we have text before the opening tag:
657 // make a DOMTEXT out of this
658 CreateTextNode(pParentNode,
659 pBufCurrent,
660 pStartOfTag);
661 pBufCurrent = pStartOfTag;
662 }
663 else
664 {
665 // OK, go for this tag...
666
667 if (*(pFoundTag->pStartOfTagName) == '/')
668 {
669 // this is a closing tag: that's an error
670 ulrc = 1;
671 *ppError = pFoundTag->pStartOfTagName;
672 fContinue = FALSE;
673 }
674 else if (pFoundTag->fIsComment)
675 {
676 // it's a comment: that's simple
677 PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
678 DOMNODE_COMMENT);
679 if (!pCommentNode)
680 ulrc = ERROR_NOT_ENOUGH_MEMORY;
681 else
682 {
683 pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
684 pFoundTag->pCloseBrck - 2);
685 }
686 pBufCurrent = pFoundTag->pCloseBrck + 1;
687 }
688 else
689 {
690 BOOL fKeepTagName = FALSE; // free pszTagName below
691 PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
692 pFoundTag->pFirstAfterTagName);
693 if (!pszTagName)
694 // zero-length string:
695 // go ahead after that
696 pBufCurrent = pFoundTag->pCloseBrck + 1;
697 else
698 {
699 // XML knows two types of elements:
700
701 // a) Element pairs, which have opening and closing tags
702 // (<TAG> and </TAG>
703 // b) Single elements, which must have "/" as their last
704 // character; these have no closing tag
705 // (<TAG/>)
706
707 // However, HTML doesn't usually tag single elements
708 // with a trailing '/'. To maintain compatibility,
709 // if we don't find a matching closing tag, we extract
710 // everything up to the end of the buffer.
711
712 ULONG ulTagNameLen = strlen(pszTagName);
713
714 // search for closing tag first...
715 // create string with closing tag to search for;
716 // that's '/' plus opening tag name
717 ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
718 PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
719 PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
720 PLISTNODE pTagListNodeForChildren = pTagListNode2;
721
722 BOOL fClosingTagFound = FALSE;
723
724 *pszClosingTag2Find = '/';
725 strcpy(pszClosingTag2Find + 1, pszTagName);
726
727 // now find matching closing tag
728 while (pTagListNode2)
729 {
730 PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
731 ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
732 // compare tag name lengths
733 if (ulFoundTag2Len == ulClosingTagLen2Find)
734 {
735 // same length:
736 // compare
737 if (memcmp(pFoundTag2->pStartOfTagName,
738 pszClosingTag2Find,
739 ulClosingTagLen2Find)
740 == 0)
741 {
742 // found matching closing tag:
743
744 // we now have
745 // -- pCurrentTagListNode pointing to the opening tag
746 // (pFoundTag has its PTAGFOUND item data)
747 // -- pTagListNode2 pointing to the closing tag
748 // (pFoundTag2 has its PTAGFOUND item data)
749
750 // create DOM node
751 PDOMNODE pNewNode = CreateElementNode(pParentNode,
752 pszTagName,
753 pFoundTag->pFirstAfterTagName);
754 if (pNewNode)
755 {
756 ULONG ulAction = XMLACTION_BREAKUP;
757
758 fKeepTagName = TRUE; // do not free below
759
760 // validate tag
761 if (pfnValidateTag)
762 {
763 // validator specified:
764 ulAction = pfnValidateTag(pszTagName);
765 }
766
767 if (ulAction == XMLACTION_COPYASTEXT)
768 {
769 CreateTextNode(pNewNode,
770 pFoundTag->pCloseBrck + 1,
771 pFoundTag2->pOpenBrck - 1);
772 }
773 else if (ulAction == XMLACTION_BREAKUP)
774 {
775 PLINKLIST pllSubList = lstCreate(FALSE);
776 PLISTNODE pSubNode = 0;
777 ULONG cSubNodes = 0;
778
779 // text buffer to search
780 const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
781 const char *pSubBufEnd = pFoundTag2->pOpenBrck;
782
783 // create a child list containing
784 // all tags from the first tag after
785 // the current opening tag to the closing tag
786 for (pSubNode = pTagListNodeForChildren;
787 pSubNode != pTagListNode2;
788 pSubNode = pSubNode->pNext)
789 {
790 lstAppendItem(pllSubList,
791 pSubNode->pItemData);
792 cSubNodes++;
793 }
794
795 // now recurse to build child nodes
796 // (text and elements), even if the
797 // list is empty, we can have text!
798 CreateNodesForBuf(pSubBufStart,
799 pSubBufEnd,
800 pllSubList,
801 pNewNode,
802 pfnValidateTag,
803 ppError);
804
805 lstFree(pllSubList);
806 } // end if (ulAction == XMLACTION_BREAKUP)
807
808 // now search on after the closing tag
809 // we've found; the next tag will be set below
810 pCurrentTagListNode = pTagListNode2;
811 pBufCurrent = pFoundTag2->pCloseBrck + 1;
812
813 fClosingTagFound = TRUE;
814
815 break; // // while (pTagListNode2)
816 } // end if (pNewNode)
817 } // end if (memcmp(pFoundTag2->pStartOfTagName,
818 } // if (ulFoundTag2Len == ulClosingTagLen2Find)
819
820 pTagListNode2 = pTagListNode2->pNext;
821
822 } // while (pTagListNode2)
823
824 if (!fClosingTagFound)
825 {
826 // no matching closing tag found:
827 // that's maybe a block of not well-formed XML
828
829 // e.g. with WarpIN:
830 // <README> <-- we start after this
831 // block of plain HTML with <P> tags and such
832 // </README>
833
834 // just create an element
835 PDOMNODE pNewNode = CreateElementNode(pParentNode,
836 pszTagName,
837 pFoundTag->pFirstAfterTagName);
838 if (pNewNode)
839 fKeepTagName = TRUE;
840
841 // now search on after the closing tag
842 // we've found; the next tag will be set below
843 // pCurrentTagListNode = pTagListNodeForChildren;
844 pBufCurrent = pFoundTag->pCloseBrck + 1;
845 }
846
847 free(pszClosingTag2Find);
848
849 if (!fKeepTagName)
850 free(pszTagName);
851 } // end if (pszTagName)
852 }
853
854 pCurrentTagListNode = pCurrentTagListNode->pNext;
855 }
856 }
857 }
858
859 return (ulrc);
860}
861
862/*
863 * xmlParse:
864 * generic XML parser.
865 *
866 * This takes the specified zero-terminated string
867 * in pcszBuf and parses it, adding DOMNODE's as
868 * children to pNode.
869 *
870 * This recurses, if necessary, to build a node tree.
871 *
872 * Example: Take this HTML table definition:
873 +
874 + <TABLE>
875 + <TBODY>
876 + <TR>
877 + <TD>Column 1-1</TD>
878 + <TD>Column 1-2</TD>
879 + </TR>
880 + <TR>
881 + <TD>Column 2-1</TD>
882 + <TD>Column 2-2</TD>
883 + </TR>
884 + </TBODY>
885 + </TABLE>
886 *
887 * This function will create a tree as follows:
888 +
889 + ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
890 + ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
891 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
892 + ³
893 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
894 + ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
895 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
896 + ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
897 + ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
898 + ³ TR ³ ³ TR ³
899 + ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
900 + ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
901 + ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
902 + ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
903 + ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
904 + ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
905 + ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
906 + ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
907 */
908
909ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
910 const char *pcszBuf, // in: buffer to search
911 PFNVALIDATE pfnValidateTag)
912{
913 ULONG ulrc = 0;
914
915 PLINKLIST pllTags = BuildTagsList(pcszBuf);
916
917 // now create DOMNODE's according to that list...
918 const char *pcszError = 0;
919 CreateNodesForBuf(pcszBuf,
920 NULL, // enitre buffer
921 pllTags,
922 pParentNode,
923 pfnValidateTag,
924 &pcszError);
925
926 lstFree(pllTags);
927
928 return (ulrc);
929}
930
931/*
932 *@@ xmlCreateDocumentFromString:
933 * creates a DOCUMENT DOMNODE and calls xmlParse
934 * to break down the specified buffer into that
935 * node.
936 */
937
938PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
939 PFNVALIDATE pfnValidateTag)
940{
941 PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
942 DOMNODE_DOCUMENT);
943 xmlParse(pDocument,
944 pcszXML,
945 pfnValidateTag);
946
947 return (pDocument);
948}
949
950
Note: See TracBrowser for help on using the repository browser.