Context Navigation

source: trunk/src/helpers/xml.c@ 12

Visit:

Last change on this file since 12 was 12, checked in by umoeller, 25 years ago
Updated string helpers.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 34.9 KB

Line
1
2	/*
3	*@@sourcefile xml.c:
4	* XML parsing.
5	*
6	* This is vaguely modelled after the Document Object Model
7	* (DOM) standardized by the W3C.
8	*
9	* In short, DOM specifies that an XML document is broken
10	* up into a tree of nodes, representing the various parts
11	* of an XML document. Most importantly, we have:
12	*
13	* -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
14	*
15	* -- ATTRIBUTE: an attribute to an element.
16	*
17	* -- TEXT: a piece of, well, text.
18	*
19	* -- COMMENT: a comment.
20	*
21	* See xmlParse() for a more detailed explanation.
22	*
23	* However, since this implementation was supposed to be a
24	* C-only interface, we do not implement inheritance. Instead,
25	* each XML document is broken up into a tree of DOMNODE's only,
26	* each of which has a special type.
27	*
28	* It shouldn't be too difficult to write a C++ encapsulation
29	* of this which implements all the methods required by the DOM
30	* standard.
31	*
32	* The main entry point into this is xmlParse or
33	* xmlCreateDocumentFromString. See remarks there for details.
34	*
35	* Limitations:
36	*
37	* 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
38	* and COMMENT nodes.
39	*
40	* 2) This doesn't use 16-bit characters, but 8-bit characters.
41	*
42	*@@header "helpers\xml.h"
43	*@@added V0.9.6 (2000-10-29) [umoeller]
44	*/
45
46	/*
47	* Copyright (C) 2000 Ulrich Mller.
48	* This file is part of the XWorkplace source package.
49	* XWorkplace is free software; you can redistribute it and/or modify
50	* it under the terms of the GNU General Public License as published
51	* by the Free Software Foundation, in version 2 as it comes in the
52	* "COPYING" file of the XWorkplace main distribution.
53	* This program is distributed in the hope that it will be useful,
54	* but WITHOUT ANY WARRANTY; without even the implied warranty of
55	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56	* GNU General Public License for more details.
57	*/
58
59	#define OS2EMX_PLAIN_CHAR
60	// this is needed for "os2emx.h"; if this is defined,
61	// emx will define PSZ as _signed_ char, otherwise
62	// as unsigned char
63
64	#define INCL_DOSERRORS
65	#include <os2.h>
66
67	#include <stdlib.h>
68	#include <string.h>
69
70	#include "setup.h" // code generation and debugging options
71
72	#include "helpers\linklist.h"
73	#include "helpers\stringh.h"
74	#include "helpers\xml.h"
75
76	#pragma hdrstop
77
78	/*
79	*@@category: Helpers\C helpers\XML\Node management
80	*/
81
82	/* ******************************************************************
83	*
84	* Node Management
85	*
86	********************************************************************/
87
88	/*
89	*@@ xmlCreateNode:
90	* creates a new DOMNODE with the specified
91	* type and parent.
92	*/
93
94	PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
95	ULONG ulNodeType)
96	{
97	PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
98	if (pNewNode)
99	{
100	memset(pNewNode, 0, sizeof(DOMNODE));
101	pNewNode->ulNodeType = ulNodeType;
102	pNewNode->pParentNode = pParentNode;
103	if (pParentNode)
104	{
105	// parent specified:
106	// append this new node to the parent's
107	// list of child nodes
108	lstAppendItem(&pParentNode->listChildNodes,
109	pNewNode);
110	}
111
112	lstInit(&pNewNode->listChildNodes, FALSE);
113	lstInit(&pNewNode->listAttributeNodes, FALSE);
114	}
115
116	return (pNewNode);
117	}
118
119	/*
120	*@@ xmlDeleteNode:
121	* deletes the specified node.
122	*
123	* If the node has child nodes, all of them are deleted
124	* as well. This recurses, if necessary.
125	*
126	* As a result, if the node is a document node, this
127	* deletes an entire document, including all of its
128	* child nodes.
129	*
130	* Returns:
131	*
132	* -- 0: NO_ERROR.
133	*/
134
135	ULONG xmlDeleteNode(PDOMNODE pNode)
136	{
137	ULONG ulrc = 0;
138
139	if (!pNode)
140	{
141	ulrc = DOMERR_NOT_FOUND;
142	}
143	else
144	{
145	// recurse into child nodes
146	PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
147	while (pNodeThis)
148	{
149	// recurse!!
150	xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
151
152	pNodeThis = pNodeThis->pNext;
153	}
154
155	// delete attribute nodes
156	pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
157	while (pNodeThis)
158	{
159	// recurse!!
160	xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
161
162	pNodeThis = pNodeThis->pNext;
163	}
164
165	if (pNode->pParentNode)
166	{
167	// node has a parent:
168	// remove this node from the parent's list
169	// of child nodes before deleting this node
170	lstRemoveItem(&pNode->pParentNode->listChildNodes,
171	pNode);
172	pNode->pParentNode = NULL;
173	}
174
175	if (pNode->pszNodeName)
176	{
177	free(pNode->pszNodeName);
178	pNode->pszNodeName = NULL;
179	}
180	if (pNode->pszNodeValue)
181	{
182	free(pNode->pszNodeValue);
183	pNode->pszNodeValue = NULL;
184	}
185
186	free(pNode);
187	}
188
189	return (ulrc);
190	}
191
192	/*
193	*@@category: Helpers\C helpers\XML\Parsing
194	*/
195
196	/* ******************************************************************
197	*
198	* Tokenizing (Compiling)
199	*
200	********************************************************************/
201
202	/*
203	*@@ xmlTokenize:
204	* this takes any block of XML text and "tokenizes"
205	* it.
206	*
207	* Tokenizing (or compiling, or "scanning" in bison/flex
208	* terms) means preparing the XML code for parsing later.
209	* This finds all tags and tag attributes and creates
210	* special codes for them in the output buffer.
211	*
212	* For example:
213	+
214	+ <TAG ATTR="text"> block </TAG>
215	+
216	* becomes
217	*
218	+ 0xFF escape code
219	+ 0x01 tag start code
220	+ "TAG" tag name
221	+ 0xFF end of tag name code
222	+
223	+ 0xFF escape code
224	+ 0x03 attribute name code
225	+ "ATTR" attribute name
226	+ 0xFF
227	+ "text" attribute value (without quotes)
228	+ 0xFF end of attribute code
229	+
230	+ " block " regular text
231	+
232	+ 0xFF escape code
233	+ 0x01 tag start code
234	+ "/TAG" tag name
235	+ 0xFF end of tag name code
236	*
237	*@@added V0.9.6 (2000-11-01) [umoeller]
238	*/
239
240	PSZ xmlTokenize(const char *pcszXML)
241	{
242	}
243
244	/* ******************************************************************
245	*
246	* Parsing
247	*
248	********************************************************************/
249
250	/*
251	* TAGFOUND:
252	* structure created for each tag by BuildTagsList.
253	*/
254
255	typedef struct _TAGFOUND
256	{
257	BOOL fIsComment;
258	const char *pOpenBrck;
259	const char *pStartOfTagName;
260	const char *pFirstAfterTagName;
261	const char *pCloseBrck; // ptr to '>' char; this plus one should
262	// point to after the tag
263	} TAGFOUND, *PTAGFOUND;
264
265	/*
266	* BuildTagsList:
267	* builds a LINKLIST containing TAGFOUND structs for
268	* each tag found in the specified buffer.
269	*
270	* This is a flat list without any tree structure. This
271	* only searches for the tags and doesn't create any
272	* hierarchy.
273	*
274	* The tags are simply added to the list in the order
275	* in which they are found in pcszBuffer.
276	*
277	* The list is auto-free, you can simply do a lstFree
278	* to clean up.
279	*/
280
281	PLINKLIST BuildTagsList(const char *pcszBuffer)
282	{
283	PLINKLIST pllTags = lstCreate(TRUE);
284
285	const char *pSearchPos = pcszBuffer;
286
287	while ((pSearchPos) && (*pSearchPos))
288	{
289	// find first '<'
290	PSZ pOpenBrck = strchr(pSearchPos, '<');
291	if (!pOpenBrck)
292	// no open bracket found: stop search
293	pSearchPos = 0;
294	else
295	{
296	if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
297	{
298	// it's a comment:
299	// treat that differently
300	const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
301	const char *pCloseBrck = 0;
302	const char *pFirstAfterTagName = 0;
303	PTAGFOUND pTagFound;
304	if (!pEndOfComment)
305	{
306	// no end of comment found:
307	// skip entire rest of string
308	pCloseBrck = pOpenBrck + strlen(pOpenBrck);
309	pFirstAfterTagName = pCloseBrck;
310	pSearchPos = 0;
311	}
312	else
313	{
314	pCloseBrck = pEndOfComment + 2; // point directly to '>'
315	pFirstAfterTagName = pCloseBrck + 1;
316	}
317
318	// append it to the list
319	pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
320	if (!pTagFound)
321	// error:
322	pSearchPos = 0;
323	else
324	{
325	pTagFound->fIsComment = TRUE;
326	pTagFound->pOpenBrck = pOpenBrck;
327	pTagFound->pStartOfTagName = pOpenBrck + 1;
328	pTagFound->pFirstAfterTagName = pFirstAfterTagName;
329	pTagFound->pCloseBrck = pCloseBrck;
330
331	lstAppendItem(pllTags, pTagFound);
332	}
333
334	pSearchPos = pFirstAfterTagName;
335	}
336	else
337	{
338	// no comment:
339	// find matching closing bracket
340	const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
341	if (!pCloseBrck)
342	pSearchPos = 0;
343	else
344	{
345	const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
346	// if we have another opening bracket before the closing bracket,
347	if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
348	// ignore this one
349	pSearchPos = pNextOpenBrck;
350	else
351	{
352	// OK, apparently we have a tag.
353	// Skip all spaces after the tag.
354	const char *pTagName = pOpenBrck + 1;
355	while ( (*pTagName)
356	&& ( (*pTagName == ' ')
357	\|\| (*pTagName == '\r')
358	\|\| (*pTagName == '\n')
359	)
360	)
361	pTagName++;
362	if (!*pTagName)
363	// no tag name: stop
364	pSearchPos = 0;
365	else
366	{
367	// ookaaayyy, we got a tag now.
368	// Find first space or ">" after tag name:
369	const char *pFirstAfterTagName = pTagName + 1;
370	while ( (*pFirstAfterTagName)
371	&& (*pFirstAfterTagName != ' ')
372	&& (*pFirstAfterTagName != '\n')
373	&& (*pFirstAfterTagName != '\r')
374	&& (*pFirstAfterTagName != '\t') // tab
375	&& (*pFirstAfterTagName != '>')
376	)
377	pFirstAfterTagName++;
378	if (!*pFirstAfterTagName)
379	// no closing bracket found:
380	pSearchPos = 0;
381	else
382	{
383	// got a tag name:
384	// append it to the list
385	PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
386	if (!pTagFound)
387	// error:
388	pSearchPos = 0;
389	else
390	{
391	pTagFound->fIsComment = FALSE;
392	pTagFound->pOpenBrck = pOpenBrck;
393	pTagFound->pStartOfTagName = pTagName;
394	pTagFound->pFirstAfterTagName = pFirstAfterTagName;
395	pTagFound->pCloseBrck = pCloseBrck;
396
397	lstAppendItem(pllTags, pTagFound);
398
399	// search on after closing bracket
400	pSearchPos = pCloseBrck + 1;
401	}
402	}
403	}
404	}
405	} // end else if (!pCloseBrck)
406	} // end else if (strncmp(pOpenBrck + 1, "!--"))
407	} // end if (pOpenBrck)
408	} // end while
409
410	return (pllTags);
411	}
412
413	/*
414	*@@ CreateTextNode:
415	* shortcut for creating a TEXT node. Calls
416	* xmlCreateNode in turn.
417	*
418	* The text is extracted from in between the
419	* two pointers using strhSubstr.
420	*/
421
422	PDOMNODE CreateTextNode(PDOMNODE pParentNode,
423	const char *pStart,
424	const char *pEnd)
425	{
426	PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
427	DOMNODE_TEXT);
428	if (pNewTextNode)
429	pNewTextNode->pszNodeValue = strhSubstr(pStart,
430	pEnd);
431
432	return (pNewTextNode);
433	}
434
435	/*
436	*@@ CreateElementNode:
437	* shortcut for creating a new ELEMENT node and
438	* parsing attributes at the same time.
439	*
440	* pszTagName is assumed to be static (no copy
441	* is made).
442	*
443	* pAttribs is assumed to point to an attributes
444	* string. This function creates ATTRIBUTE nodes
445	* from that string until either a null character
446	* or '>' is found.
447	*/
448
449	PDOMNODE CreateElementNode(PDOMNODE pParentNode,
450	PSZ pszTagName,
451	const char *pAttribs) // in: ptr to attribs; can be NULL
452	{
453	PDOMNODE pNewNode = xmlCreateNode(pParentNode,
454	DOMNODE_ELEMENT);
455	if (pNewNode)
456	{
457	const char *p = pAttribs;
458
459	pNewNode->pszNodeName = pszTagName;
460
461	// find-start-of-attribute loop
462	while (p)
463	{
464	switch (*p)
465	{
466	case 0:
467	case '>':
468	p = 0;
469	break;
470
471	case ' ':
472	case '\t': // tab
473	case '\n':
474	case '\r':
475	p++;
476	break;
477
478	default:
479	{
480	// first (or next) non-space:
481	// that's the start of an attrib, probably
482	// go until we find a space or '>'
483
484	const char *pNameStart = p,
485	*p2 = p;
486
487	const char *pEquals = 0,
488	*pFirstQuote = 0,
489	*pEnd = 0; // last char... non-inclusive!
490
491	// copy-rest-of-attribute loop
492	while (p2)
493	{
494	switch (*p2)
495	{
496	case '"':
497	if (!pEquals)
498	{
499	// '"' cannot appear before '='
500	p2 = 0;
501	p = 0;
502	}
503	else
504	{
505	if (pFirstQuote)
506	{
507	// second quote:
508	// get value between quotes
509	pEnd = p2;
510	// we're done with this one
511	p = p2 + 1;
512	p2 = 0;
513	}
514	else
515	{
516	// first quote:
517	pFirstQuote = p2;
518	p2++;
519	}
520	}
521	break;
522
523	case '=':
524	if (!pEquals)
525	{
526	// first equals sign:
527	pEquals = p2;
528	// extract name
529	p2++;
530	}
531	else
532	if (pFirstQuote)
533	p2++;
534	else
535	{
536	// error
537	p2 = 0;
538	p = 0;
539	}
540	break;
541
542	case ' ':
543	case '\t': // tab
544	case '\n':
545	case '\r':
546	// spaces can appear in quotes
547	if (pFirstQuote)
548	// just continue
549	p2++;
550	else
551	{
552	// end of it!
553	pEnd = p2;
554	p = p2 + 1;
555	p2 = 0;
556	}
557	break;
558
559	case 0:
560	case '>':
561	{
562	pEnd = p2;
563	// quit inner AND outer loop
564	p2 = 0;
565	p = 0;
566	break; }
567
568	default:
569	p2++;
570	}
571	} // end while (p2)
572
573	if (pEnd)
574	{
575	PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
576	DOMNODE_ATTRIBUTE);
577	if (pAttribNode)
578	{
579	if (pEquals)
580	{
581	pAttribNode->pszNodeName
582	= strhSubstr(pNameStart, pEquals);
583
584	// did we have quotes?
585	if (pFirstQuote)
586	pAttribNode->pszNodeValue
587	= strhSubstr(pFirstQuote + 1, pEnd);
588	else
589	pAttribNode->pszNodeValue
590	= strhSubstr(pEquals + 1, pEnd);
591	}
592	else
593	// no "equals":
594	pAttribNode->pszNodeName
595	= strhSubstr(pNameStart, pEnd);
596	}
597	}
598	break; }
599	}
600	}
601	}
602
603	return (pNewNode);
604	}
605
606	/*
607	*@@ CreateNodesForBuf:
608	* this gets called (recursively) for a piece of text
609	* for which we need to create TEXT and ELEMENT DOMNODE's.
610	*
611	* This does the heavy work for xmlParse.
612	*
613	* If an error (!= 0) is returned, *ppError points to
614	* the code part that failed.
615	*/
616
617	ULONG CreateNodesForBuf(const char *pcszBufStart,
618	const char *pcszBufEnd, // in: can be NULL
619	PLINKLIST pllTagsList,
620	PDOMNODE pParentNode,
621	PFNVALIDATE pfnValidateTag,
622	const char **ppError)
623	{
624	ULONG ulrc = 0;
625	PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
626	const char *pBufCurrent = pcszBufStart;
627	BOOL fContinue = TRUE;
628
629	if (pcszBufEnd == NULL)
630	pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
631
632	while (fContinue)
633	{
634	if ( (!*pBufCurrent)
635	\|\| (pBufCurrent == pcszBufEnd)
636	)
637	// end of buf reached:
638	fContinue = FALSE;
639
640	else if (!pCurrentTagListNode)
641	{
642	// no (more) tags for this buffer:
643	CreateTextNode(pParentNode,
644	pBufCurrent,
645	pcszBufEnd);
646	fContinue = FALSE;
647	}
648	else
649	{
650	// another tag found:
651	PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
652	const char *pStartOfTag = pFoundTag->pOpenBrck;
653	if (pStartOfTag > pBufCurrent + 1)
654	{
655	// we have text before the opening tag:
656	// make a DOMTEXT out of this
657	CreateTextNode(pParentNode,
658	pBufCurrent,
659	pStartOfTag);
660	pBufCurrent = pStartOfTag;
661	}
662	else
663	{
664	// OK, go for this tag...
665
666	if (*(pFoundTag->pStartOfTagName) == '/')
667	{
668	// this is a closing tag: that's an error
669	ulrc = 1;
670	*ppError = pFoundTag->pStartOfTagName;
671	fContinue = FALSE;
672	}
673	else if (pFoundTag->fIsComment)
674	{
675	// it's a comment: that's simple
676	PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
677	DOMNODE_COMMENT);
678	if (!pCommentNode)
679	ulrc = ERROR_NOT_ENOUGH_MEMORY;
680	else
681	{
682	pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
683	pFoundTag->pCloseBrck - 2);
684	}
685	pBufCurrent = pFoundTag->pCloseBrck + 1;
686	}
687	else
688	{
689	BOOL fKeepTagName = FALSE; // free pszTagName below
690	PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
691	pFoundTag->pFirstAfterTagName);
692	if (!pszTagName)
693	// zero-length string:
694	// go ahead after that
695	pBufCurrent = pFoundTag->pCloseBrck + 1;
696	else
697	{
698	// XML knows two types of elements:
699
700	// a) Element pairs, which have opening and closing tags
701	// (<TAG> and </TAG>
702	// b) Single elements, which must have "/" as their last
703	// character; these have no closing tag
704	// (<TAG/>)
705
706	// However, HTML doesn't usually tag single elements
707	// with a trailing '/'. To maintain compatibility,
708	// if we don't find a matching closing tag, we extract
709	// everything up to the end of the buffer.
710
711	ULONG ulTagNameLen = strlen(pszTagName);
712
713	// search for closing tag first...
714	// create string with closing tag to search for;
715	// that's '/' plus opening tag name
716	ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
717	PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
718	PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
719	PLISTNODE pTagListNodeForChildren = pTagListNode2;
720
721	BOOL fClosingTagFound = FALSE;
722
723	*pszClosingTag2Find = '/';
724	strcpy(pszClosingTag2Find + 1, pszTagName);
725
726	// now find matching closing tag
727	while (pTagListNode2)
728	{
729	PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
730	ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
731	// compare tag name lengths
732	if (ulFoundTag2Len == ulClosingTagLen2Find)
733	{
734	// same length:
735	// compare
736	if (memcmp(pFoundTag2->pStartOfTagName,
737	pszClosingTag2Find,
738	ulClosingTagLen2Find)
739	== 0)
740	{
741	// found matching closing tag:
742
743	// we now have
744	// -- pCurrentTagListNode pointing to the opening tag
745	// (pFoundTag has its PTAGFOUND item data)
746	// -- pTagListNode2 pointing to the closing tag
747	// (pFoundTag2 has its PTAGFOUND item data)
748
749	// create DOM node
750	PDOMNODE pNewNode = CreateElementNode(pParentNode,
751	pszTagName,
752	pFoundTag->pFirstAfterTagName);
753	if (pNewNode)
754	{
755	ULONG ulAction = XMLACTION_BREAKUP;
756
757	fKeepTagName = TRUE; // do not free below
758
759	// validate tag
760	if (pfnValidateTag)
761	{
762	// validator specified:
763	ulAction = pfnValidateTag(pszTagName);
764	}
765
766	if (ulAction == XMLACTION_COPYASTEXT)
767	{
768	CreateTextNode(pNewNode,
769	pFoundTag->pCloseBrck + 1,
770	pFoundTag2->pOpenBrck - 1);
771	}
772	else if (ulAction == XMLACTION_BREAKUP)
773	{
774	PLINKLIST pllSubList = lstCreate(FALSE);
775	PLISTNODE pSubNode = 0;
776	ULONG cSubNodes = 0;
777
778	// text buffer to search
779	const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
780	const char *pSubBufEnd = pFoundTag2->pOpenBrck;
781
782	// create a child list containing
783	// all tags from the first tag after
784	// the current opening tag to the closing tag
785	for (pSubNode = pTagListNodeForChildren;
786	pSubNode != pTagListNode2;
787	pSubNode = pSubNode->pNext)
788	{
789	lstAppendItem(pllSubList,
790	pSubNode->pItemData);
791	cSubNodes++;
792	}
793
794	// now recurse to build child nodes
795	// (text and elements), even if the
796	// list is empty, we can have text!
797	CreateNodesForBuf(pSubBufStart,
798	pSubBufEnd,
799	pllSubList,
800	pNewNode,
801	pfnValidateTag,
802	ppError);
803
804	lstFree(pllSubList);
805	} // end if (ulAction == XMLACTION_BREAKUP)
806
807	// now search on after the closing tag
808	// we've found; the next tag will be set below
809	pCurrentTagListNode = pTagListNode2;
810	pBufCurrent = pFoundTag2->pCloseBrck + 1;
811
812	fClosingTagFound = TRUE;
813
814	break; // // while (pTagListNode2)
815	} // end if (pNewNode)
816	} // end if (memcmp(pFoundTag2->pStartOfTagName,
817	} // if (ulFoundTag2Len == ulClosingTagLen2Find)
818
819	pTagListNode2 = pTagListNode2->pNext;
820
821	} // while (pTagListNode2)
822
823	if (!fClosingTagFound)
824	{
825	// no matching closing tag found:
826	// that's maybe a block of not well-formed XML
827
828	// e.g. with WarpIN:
829	// <README> <-- we start after this
830	// block of plain HTML with <P> tags and such
831	// </README>
832
833	// just create an element
834	PDOMNODE pNewNode = CreateElementNode(pParentNode,
835	pszTagName,
836	pFoundTag->pFirstAfterTagName);
837	if (pNewNode)
838	fKeepTagName = TRUE;
839
840	// now search on after the closing tag
841	// we've found; the next tag will be set below
842	// pCurrentTagListNode = pTagListNodeForChildren;
843	pBufCurrent = pFoundTag->pCloseBrck + 1;
844	}
845
846	free(pszClosingTag2Find);
847
848	if (!fKeepTagName)
849	free(pszTagName);
850	} // end if (pszTagName)
851	}
852
853	pCurrentTagListNode = pCurrentTagListNode->pNext;
854	}
855	}
856	}
857
858	return (ulrc);
859	}
860
861	/*
862	* xmlParse:
863	* generic XML parser.
864	*
865	* This takes the specified zero-terminated string
866	* in pcszBuf and parses it, adding DOMNODE's as
867	* children to pNode.
868	*
869	* This recurses, if necessary, to build a node tree.
870	*
871	* Example: Take this HTML table definition:
872	+
873	+ <TABLE>
874	+ <TBODY>
875	+ <TR>
876	+ <TD>Column 1-1</TD>
877	+ <TD>Column 1-2</TD>
878	+ </TR>
879	+ <TR>
880	+ <TD>Column 2-1</TD>
881	+ <TD>Column 2-2</TD>
882	+ </TR>
883	+ </TBODY>
884	+ </TABLE>
885	*
886	* This function will create a tree as follows:
887	+
888	+ ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
889	+ ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
890	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
891	+ ³
892	+ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
893	+ ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
894	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
895	+ ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
896	+ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
897	+ ³ TR ³ ³ TR ³
898	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
899	+ ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
900	+ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
901	+ ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
902	+ ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
903	+ ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
904	+ ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
905	+ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
906	*/
907
908	ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
909	const char *pcszBuf, // in: buffer to search
910	PFNVALIDATE pfnValidateTag)
911	{
912	ULONG ulrc = 0;
913
914	PLINKLIST pllTags = BuildTagsList(pcszBuf);
915
916	// now create DOMNODE's according to that list...
917	const char *pcszError = 0;
918	CreateNodesForBuf(pcszBuf,
919	NULL, // enitre buffer
920	pllTags,
921	pParentNode,
922	pfnValidateTag,
923	&pcszError);
924
925	lstFree(pllTags);
926
927	return (ulrc);
928	}
929
930	/*
931	*@@ xmlCreateDocumentFromString:
932	* creates a DOCUMENT DOMNODE and calls xmlParse
933	* to break down the specified buffer into that
934	* node.
935	*/
936
937	PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
938	PFNVALIDATE pfnValidateTag)
939	{
940	PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
941	DOMNODE_DOCUMENT);
942	xmlParse(pDocument,
943	pcszXML,
944	pfnValidateTag);
945
946	return (pDocument);
947	}
948
949

Note: See TracBrowser for help on using the repository browser.

Download in other formats: