Context Navigation

source: trunk/src/helpers/xml.c@ 15

Visit:

Last change on this file since 15 was 14, checked in by umoeller, 25 years ago
Major updates; timers, LVM, miscellaneous.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 34.9 KB

Line
1
2	/*
3	*@@sourcefile xml.c:
4	* XML parsing.
5	*
6	* This is vaguely modelled after the Document Object Model
7	* (DOM) standardized by the W3C.
8	*
9	* In short, DOM specifies that an XML document is broken
10	* up into a tree of nodes, representing the various parts
11	* of an XML document. Most importantly, we have:
12	*
13	* -- ELEMENT: some XML tag or a pair of tags (e.g. <LI>...<LI>.
14	*
15	* -- ATTRIBUTE: an attribute to an element.
16	*
17	* -- TEXT: a piece of, well, text.
18	*
19	* -- COMMENT: a comment.
20	*
21	* See xmlParse() for a more detailed explanation.
22	*
23	* However, since this implementation was supposed to be a
24	* C-only interface, we do not implement inheritance. Instead,
25	* each XML document is broken up into a tree of DOMNODE's only,
26	* each of which has a special type.
27	*
28	* It shouldn't be too difficult to write a C++ encapsulation
29	* of this which implements all the methods required by the DOM
30	* standard.
31	*
32	* The main entry point into this is xmlParse or
33	* xmlCreateDocumentFromString. See remarks there for details.
34	*
35	* Limitations:
36	*
37	* 1) This presently only parses ELEMENT, ATTRIBUTE, TEXT,
38	* and COMMENT nodes.
39	*
40	* 2) This doesn't use 16-bit characters, but 8-bit characters.
41	*
42	*@@header "helpers\xml.h"
43	*@@added V0.9.6 (2000-10-29) [umoeller]
44	*/
45
46	/*
47	* Copyright (C) 2000 Ulrich Mller.
48	* This file is part of the "XWorkplace helpers" source package.
49	* This is free software; you can redistribute it and/or modify
50	* it under the terms of the GNU General Public License as published
51	* by the Free Software Foundation, in version 2 as it comes in the
52	* "COPYING" file of the XWorkplace main distribution.
53	* This program is distributed in the hope that it will be useful,
54	* but WITHOUT ANY WARRANTY; without even the implied warranty of
55	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56	* GNU General Public License for more details.
57	*/
58
59	#define OS2EMX_PLAIN_CHAR
60	// this is needed for "os2emx.h"; if this is defined,
61	// emx will define PSZ as _signed_ char, otherwise
62	// as unsigned char
63
64	#define INCL_DOSERRORS
65	#include <os2.h>
66
67	#include <stdlib.h>
68	#include <string.h>
69
70	#include "setup.h" // code generation and debugging options
71
72	#include "helpers\linklist.h"
73	#include "helpers\stringh.h"
74	#include "helpers\xml.h"
75
76	#pragma hdrstop
77
78	/*
79	*@@category: Helpers\C helpers\XML\Node management
80	*/
81
82	/* ******************************************************************
83	*
84	* Node Management
85	*
86	********************************************************************/
87
88	/*
89	*@@ xmlCreateNode:
90	* creates a new DOMNODE with the specified
91	* type and parent.
92	*/
93
94	PDOMNODE xmlCreateNode(PDOMNODE pParentNode,
95	ULONG ulNodeType)
96	{
97	PDOMNODE pNewNode = (PDOMNODE)malloc(sizeof(DOMNODE));
98	if (pNewNode)
99	{
100	memset(pNewNode, 0, sizeof(DOMNODE));
101	pNewNode->ulNodeType = ulNodeType;
102	pNewNode->pParentNode = pParentNode;
103	if (pParentNode)
104	{
105	// parent specified:
106	// append this new node to the parent's
107	// list of child nodes
108	lstAppendItem(&pParentNode->listChildNodes,
109	pNewNode);
110	}
111
112	lstInit(&pNewNode->listChildNodes, FALSE);
113	lstInit(&pNewNode->listAttributeNodes, FALSE);
114	}
115
116	return (pNewNode);
117	}
118
119	/*
120	*@@ xmlDeleteNode:
121	* deletes the specified node.
122	*
123	* If the node has child nodes, all of them are deleted
124	* as well. This recurses, if necessary.
125	*
126	* As a result, if the node is a document node, this
127	* deletes an entire document, including all of its
128	* child nodes.
129	*
130	* Returns:
131	*
132	* -- 0: NO_ERROR.
133	*/
134
135	ULONG xmlDeleteNode(PDOMNODE pNode)
136	{
137	ULONG ulrc = 0;
138
139	if (!pNode)
140	{
141	ulrc = DOMERR_NOT_FOUND;
142	}
143	else
144	{
145	// recurse into child nodes
146	PLISTNODE pNodeThis = lstQueryFirstNode(&pNode->listChildNodes);
147	while (pNodeThis)
148	{
149	// recurse!!
150	xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
151
152	pNodeThis = pNodeThis->pNext;
153	}
154
155	// delete attribute nodes
156	pNodeThis = lstQueryFirstNode(&pNode->listAttributeNodes);
157	while (pNodeThis)
158	{
159	// recurse!!
160	xmlDeleteNode((PDOMNODE)(pNodeThis->pItemData));
161
162	pNodeThis = pNodeThis->pNext;
163	}
164
165	if (pNode->pParentNode)
166	{
167	// node has a parent:
168	// remove this node from the parent's list
169	// of child nodes before deleting this node
170	lstRemoveItem(&pNode->pParentNode->listChildNodes,
171	pNode);
172	pNode->pParentNode = NULL;
173	}
174
175	if (pNode->pszNodeName)
176	{
177	free(pNode->pszNodeName);
178	pNode->pszNodeName = NULL;
179	}
180	if (pNode->pszNodeValue)
181	{
182	free(pNode->pszNodeValue);
183	pNode->pszNodeValue = NULL;
184	}
185
186	free(pNode);
187	}
188
189	return (ulrc);
190	}
191
192	/*
193	*@@category: Helpers\C helpers\XML\Parsing
194	*/
195
196	/* ******************************************************************
197	*
198	* Tokenizing (Compiling)
199	*
200	********************************************************************/
201
202	/*
203	*@@ xmlTokenize:
204	* this takes any block of XML text and "tokenizes"
205	* it.
206	*
207	* Tokenizing (or compiling, or "scanning" in bison/flex
208	* terms) means preparing the XML code for parsing later.
209	* This finds all tags and tag attributes and creates
210	* special codes for them in the output buffer.
211	*
212	* For example:
213	+
214	+ <TAG ATTR="text"> block </TAG>
215	+
216	* becomes
217	*
218	+ 0xFF escape code
219	+ 0x01 tag start code
220	+ "TAG" tag name
221	+ 0xFF end of tag name code
222	+
223	+ 0xFF escape code
224	+ 0x03 attribute name code
225	+ "ATTR" attribute name
226	+ 0xFF
227	+ "text" attribute value (without quotes)
228	+ 0xFF end of attribute code
229	+
230	+ " block " regular text
231	+
232	+ 0xFF escape code
233	+ 0x01 tag start code
234	+ "/TAG" tag name
235	+ 0xFF end of tag name code
236	*
237	*@@added V0.9.6 (2000-11-01) [umoeller]
238	*/
239
240	PSZ xmlTokenize(const char *pcszXML)
241	{
242	return (0);
243	}
244
245	/* ******************************************************************
246	*
247	* Parsing
248	*
249	********************************************************************/
250
251	/*
252	* TAGFOUND:
253	* structure created for each tag by BuildTagsList.
254	*/
255
256	typedef struct _TAGFOUND
257	{
258	BOOL fIsComment;
259	const char *pOpenBrck;
260	const char *pStartOfTagName;
261	const char *pFirstAfterTagName;
262	const char *pCloseBrck; // ptr to '>' char; this plus one should
263	// point to after the tag
264	} TAGFOUND, *PTAGFOUND;
265
266	/*
267	* BuildTagsList:
268	* builds a LINKLIST containing TAGFOUND structs for
269	* each tag found in the specified buffer.
270	*
271	* This is a flat list without any tree structure. This
272	* only searches for the tags and doesn't create any
273	* hierarchy.
274	*
275	* The tags are simply added to the list in the order
276	* in which they are found in pcszBuffer.
277	*
278	* The list is auto-free, you can simply do a lstFree
279	* to clean up.
280	*/
281
282	PLINKLIST BuildTagsList(const char *pcszBuffer)
283	{
284	PLINKLIST pllTags = lstCreate(TRUE);
285
286	const char *pSearchPos = pcszBuffer;
287
288	while ((pSearchPos) && (*pSearchPos))
289	{
290	// find first '<'
291	PSZ pOpenBrck = strchr(pSearchPos, '<');
292	if (!pOpenBrck)
293	// no open bracket found: stop search
294	pSearchPos = 0;
295	else
296	{
297	if (strncmp(pOpenBrck + 1, "!--", 3) == 0)
298	{
299	// it's a comment:
300	// treat that differently
301	const char *pEndOfComment = strstr(pOpenBrck + 4, "-->");
302	const char *pCloseBrck = 0;
303	const char *pFirstAfterTagName = 0;
304	PTAGFOUND pTagFound;
305	if (!pEndOfComment)
306	{
307	// no end of comment found:
308	// skip entire rest of string
309	pCloseBrck = pOpenBrck + strlen(pOpenBrck);
310	pFirstAfterTagName = pCloseBrck;
311	pSearchPos = 0;
312	}
313	else
314	{
315	pCloseBrck = pEndOfComment + 2; // point directly to '>'
316	pFirstAfterTagName = pCloseBrck + 1;
317	}
318
319	// append it to the list
320	pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
321	if (!pTagFound)
322	// error:
323	pSearchPos = 0;
324	else
325	{
326	pTagFound->fIsComment = TRUE;
327	pTagFound->pOpenBrck = pOpenBrck;
328	pTagFound->pStartOfTagName = pOpenBrck + 1;
329	pTagFound->pFirstAfterTagName = pFirstAfterTagName;
330	pTagFound->pCloseBrck = pCloseBrck;
331
332	lstAppendItem(pllTags, pTagFound);
333	}
334
335	pSearchPos = pFirstAfterTagName;
336	}
337	else
338	{
339	// no comment:
340	// find matching closing bracket
341	const char *pCloseBrck = strchr(pOpenBrck + 1, '>');
342	if (!pCloseBrck)
343	pSearchPos = 0;
344	else
345	{
346	const char *pNextOpenBrck = strchr(pOpenBrck + 1, '<');
347	// if we have another opening bracket before the closing bracket,
348	if ((pNextOpenBrck) && (pNextOpenBrck < pCloseBrck))
349	// ignore this one
350	pSearchPos = pNextOpenBrck;
351	else
352	{
353	// OK, apparently we have a tag.
354	// Skip all spaces after the tag.
355	const char *pTagName = pOpenBrck + 1;
356	while ( (*pTagName)
357	&& ( (*pTagName == ' ')
358	\|\| (*pTagName == '\r')
359	\|\| (*pTagName == '\n')
360	)
361	)
362	pTagName++;
363	if (!*pTagName)
364	// no tag name: stop
365	pSearchPos = 0;
366	else
367	{
368	// ookaaayyy, we got a tag now.
369	// Find first space or ">" after tag name:
370	const char *pFirstAfterTagName = pTagName + 1;
371	while ( (*pFirstAfterTagName)
372	&& (*pFirstAfterTagName != ' ')
373	&& (*pFirstAfterTagName != '\n')
374	&& (*pFirstAfterTagName != '\r')
375	&& (*pFirstAfterTagName != '\t') // tab
376	&& (*pFirstAfterTagName != '>')
377	)
378	pFirstAfterTagName++;
379	if (!*pFirstAfterTagName)
380	// no closing bracket found:
381	pSearchPos = 0;
382	else
383	{
384	// got a tag name:
385	// append it to the list
386	PTAGFOUND pTagFound = (PTAGFOUND)malloc(sizeof(TAGFOUND));
387	if (!pTagFound)
388	// error:
389	pSearchPos = 0;
390	else
391	{
392	pTagFound->fIsComment = FALSE;
393	pTagFound->pOpenBrck = pOpenBrck;
394	pTagFound->pStartOfTagName = pTagName;
395	pTagFound->pFirstAfterTagName = pFirstAfterTagName;
396	pTagFound->pCloseBrck = pCloseBrck;
397
398	lstAppendItem(pllTags, pTagFound);
399
400	// search on after closing bracket
401	pSearchPos = pCloseBrck + 1;
402	}
403	}
404	}
405	}
406	} // end else if (!pCloseBrck)
407	} // end else if (strncmp(pOpenBrck + 1, "!--"))
408	} // end if (pOpenBrck)
409	} // end while
410
411	return (pllTags);
412	}
413
414	/*
415	*@@ CreateTextNode:
416	* shortcut for creating a TEXT node. Calls
417	* xmlCreateNode in turn.
418	*
419	* The text is extracted from in between the
420	* two pointers using strhSubstr.
421	*/
422
423	PDOMNODE CreateTextNode(PDOMNODE pParentNode,
424	const char *pStart,
425	const char *pEnd)
426	{
427	PDOMNODE pNewTextNode = xmlCreateNode(pParentNode,
428	DOMNODE_TEXT);
429	if (pNewTextNode)
430	pNewTextNode->pszNodeValue = strhSubstr(pStart,
431	pEnd);
432
433	return (pNewTextNode);
434	}
435
436	/*
437	*@@ CreateElementNode:
438	* shortcut for creating a new ELEMENT node and
439	* parsing attributes at the same time.
440	*
441	* pszTagName is assumed to be static (no copy
442	* is made).
443	*
444	* pAttribs is assumed to point to an attributes
445	* string. This function creates ATTRIBUTE nodes
446	* from that string until either a null character
447	* or '>' is found.
448	*/
449
450	PDOMNODE CreateElementNode(PDOMNODE pParentNode,
451	PSZ pszTagName,
452	const char *pAttribs) // in: ptr to attribs; can be NULL
453	{
454	PDOMNODE pNewNode = xmlCreateNode(pParentNode,
455	DOMNODE_ELEMENT);
456	if (pNewNode)
457	{
458	const char *p = pAttribs;
459
460	pNewNode->pszNodeName = pszTagName;
461
462	// find-start-of-attribute loop
463	while (p)
464	{
465	switch (*p)
466	{
467	case 0:
468	case '>':
469	p = 0;
470	break;
471
472	case ' ':
473	case '\t': // tab
474	case '\n':
475	case '\r':
476	p++;
477	break;
478
479	default:
480	{
481	// first (or next) non-space:
482	// that's the start of an attrib, probably
483	// go until we find a space or '>'
484
485	const char *pNameStart = p,
486	*p2 = p;
487
488	const char *pEquals = 0,
489	*pFirstQuote = 0,
490	*pEnd = 0; // last char... non-inclusive!
491
492	// copy-rest-of-attribute loop
493	while (p2)
494	{
495	switch (*p2)
496	{
497	case '"':
498	if (!pEquals)
499	{
500	// '"' cannot appear before '='
501	p2 = 0;
502	p = 0;
503	}
504	else
505	{
506	if (pFirstQuote)
507	{
508	// second quote:
509	// get value between quotes
510	pEnd = p2;
511	// we're done with this one
512	p = p2 + 1;
513	p2 = 0;
514	}
515	else
516	{
517	// first quote:
518	pFirstQuote = p2;
519	p2++;
520	}
521	}
522	break;
523
524	case '=':
525	if (!pEquals)
526	{
527	// first equals sign:
528	pEquals = p2;
529	// extract name
530	p2++;
531	}
532	else
533	if (pFirstQuote)
534	p2++;
535	else
536	{
537	// error
538	p2 = 0;
539	p = 0;
540	}
541	break;
542
543	case ' ':
544	case '\t': // tab
545	case '\n':
546	case '\r':
547	// spaces can appear in quotes
548	if (pFirstQuote)
549	// just continue
550	p2++;
551	else
552	{
553	// end of it!
554	pEnd = p2;
555	p = p2 + 1;
556	p2 = 0;
557	}
558	break;
559
560	case 0:
561	case '>':
562	{
563	pEnd = p2;
564	// quit inner AND outer loop
565	p2 = 0;
566	p = 0;
567	break; }
568
569	default:
570	p2++;
571	}
572	} // end while (p2)
573
574	if (pEnd)
575	{
576	PDOMNODE pAttribNode = xmlCreateNode(pNewNode,
577	DOMNODE_ATTRIBUTE);
578	if (pAttribNode)
579	{
580	if (pEquals)
581	{
582	pAttribNode->pszNodeName
583	= strhSubstr(pNameStart, pEquals);
584
585	// did we have quotes?
586	if (pFirstQuote)
587	pAttribNode->pszNodeValue
588	= strhSubstr(pFirstQuote + 1, pEnd);
589	else
590	pAttribNode->pszNodeValue
591	= strhSubstr(pEquals + 1, pEnd);
592	}
593	else
594	// no "equals":
595	pAttribNode->pszNodeName
596	= strhSubstr(pNameStart, pEnd);
597	}
598	}
599	break; }
600	}
601	}
602	}
603
604	return (pNewNode);
605	}
606
607	/*
608	*@@ CreateNodesForBuf:
609	* this gets called (recursively) for a piece of text
610	* for which we need to create TEXT and ELEMENT DOMNODE's.
611	*
612	* This does the heavy work for xmlParse.
613	*
614	* If an error (!= 0) is returned, *ppError points to
615	* the code part that failed.
616	*/
617
618	ULONG CreateNodesForBuf(const char *pcszBufStart,
619	const char *pcszBufEnd, // in: can be NULL
620	PLINKLIST pllTagsList,
621	PDOMNODE pParentNode,
622	PFNVALIDATE pfnValidateTag,
623	const char **ppError)
624	{
625	ULONG ulrc = 0;
626	PLISTNODE pCurrentTagListNode = lstQueryFirstNode(pllTagsList);
627	const char *pBufCurrent = pcszBufStart;
628	BOOL fContinue = TRUE;
629
630	if (pcszBufEnd == NULL)
631	pcszBufEnd = pcszBufStart + strlen(pcszBufStart);
632
633	while (fContinue)
634	{
635	if ( (!*pBufCurrent)
636	\|\| (pBufCurrent == pcszBufEnd)
637	)
638	// end of buf reached:
639	fContinue = FALSE;
640
641	else if (!pCurrentTagListNode)
642	{
643	// no (more) tags for this buffer:
644	CreateTextNode(pParentNode,
645	pBufCurrent,
646	pcszBufEnd);
647	fContinue = FALSE;
648	}
649	else
650	{
651	// another tag found:
652	PTAGFOUND pFoundTag = (PTAGFOUND)pCurrentTagListNode->pItemData;
653	const char *pStartOfTag = pFoundTag->pOpenBrck;
654	if (pStartOfTag > pBufCurrent + 1)
655	{
656	// we have text before the opening tag:
657	// make a DOMTEXT out of this
658	CreateTextNode(pParentNode,
659	pBufCurrent,
660	pStartOfTag);
661	pBufCurrent = pStartOfTag;
662	}
663	else
664	{
665	// OK, go for this tag...
666
667	if (*(pFoundTag->pStartOfTagName) == '/')
668	{
669	// this is a closing tag: that's an error
670	ulrc = 1;
671	*ppError = pFoundTag->pStartOfTagName;
672	fContinue = FALSE;
673	}
674	else if (pFoundTag->fIsComment)
675	{
676	// it's a comment: that's simple
677	PDOMNODE pCommentNode = xmlCreateNode(pParentNode,
678	DOMNODE_COMMENT);
679	if (!pCommentNode)
680	ulrc = ERROR_NOT_ENOUGH_MEMORY;
681	else
682	{
683	pCommentNode->pszNodeValue = strhSubstr(pFoundTag->pOpenBrck + 4,
684	pFoundTag->pCloseBrck - 2);
685	}
686	pBufCurrent = pFoundTag->pCloseBrck + 1;
687	}
688	else
689	{
690	BOOL fKeepTagName = FALSE; // free pszTagName below
691	PSZ pszTagName = strhSubstr(pFoundTag->pStartOfTagName,
692	pFoundTag->pFirstAfterTagName);
693	if (!pszTagName)
694	// zero-length string:
695	// go ahead after that
696	pBufCurrent = pFoundTag->pCloseBrck + 1;
697	else
698	{
699	// XML knows two types of elements:
700
701	// a) Element pairs, which have opening and closing tags
702	// (<TAG> and </TAG>
703	// b) Single elements, which must have "/" as their last
704	// character; these have no closing tag
705	// (<TAG/>)
706
707	// However, HTML doesn't usually tag single elements
708	// with a trailing '/'. To maintain compatibility,
709	// if we don't find a matching closing tag, we extract
710	// everything up to the end of the buffer.
711
712	ULONG ulTagNameLen = strlen(pszTagName);
713
714	// search for closing tag first...
715	// create string with closing tag to search for;
716	// that's '/' plus opening tag name
717	ULONG ulClosingTagLen2Find = ulTagNameLen + 1;
718	PSZ pszClosingTag2Find = (PSZ)malloc(ulClosingTagLen2Find + 1); // plus null byte
719	PLISTNODE pTagListNode2 = pCurrentTagListNode->pNext;
720	PLISTNODE pTagListNodeForChildren = pTagListNode2;
721
722	BOOL fClosingTagFound = FALSE;
723
724	*pszClosingTag2Find = '/';
725	strcpy(pszClosingTag2Find + 1, pszTagName);
726
727	// now find matching closing tag
728	while (pTagListNode2)
729	{
730	PTAGFOUND pFoundTag2 = (PTAGFOUND)pTagListNode2->pItemData;
731	ULONG ulFoundTag2Len = (pFoundTag2->pFirstAfterTagName - pFoundTag2->pStartOfTagName);
732	// compare tag name lengths
733	if (ulFoundTag2Len == ulClosingTagLen2Find)
734	{
735	// same length:
736	// compare
737	if (memcmp(pFoundTag2->pStartOfTagName,
738	pszClosingTag2Find,
739	ulClosingTagLen2Find)
740	== 0)
741	{
742	// found matching closing tag:
743
744	// we now have
745	// -- pCurrentTagListNode pointing to the opening tag
746	// (pFoundTag has its PTAGFOUND item data)
747	// -- pTagListNode2 pointing to the closing tag
748	// (pFoundTag2 has its PTAGFOUND item data)
749
750	// create DOM node
751	PDOMNODE pNewNode = CreateElementNode(pParentNode,
752	pszTagName,
753	pFoundTag->pFirstAfterTagName);
754	if (pNewNode)
755	{
756	ULONG ulAction = XMLACTION_BREAKUP;
757
758	fKeepTagName = TRUE; // do not free below
759
760	// validate tag
761	if (pfnValidateTag)
762	{
763	// validator specified:
764	ulAction = pfnValidateTag(pszTagName);
765	}
766
767	if (ulAction == XMLACTION_COPYASTEXT)
768	{
769	CreateTextNode(pNewNode,
770	pFoundTag->pCloseBrck + 1,
771	pFoundTag2->pOpenBrck - 1);
772	}
773	else if (ulAction == XMLACTION_BREAKUP)
774	{
775	PLINKLIST pllSubList = lstCreate(FALSE);
776	PLISTNODE pSubNode = 0;
777	ULONG cSubNodes = 0;
778
779	// text buffer to search
780	const char *pSubBufStart = pFoundTag->pCloseBrck + 1;
781	const char *pSubBufEnd = pFoundTag2->pOpenBrck;
782
783	// create a child list containing
784	// all tags from the first tag after
785	// the current opening tag to the closing tag
786	for (pSubNode = pTagListNodeForChildren;
787	pSubNode != pTagListNode2;
788	pSubNode = pSubNode->pNext)
789	{
790	lstAppendItem(pllSubList,
791	pSubNode->pItemData);
792	cSubNodes++;
793	}
794
795	// now recurse to build child nodes
796	// (text and elements), even if the
797	// list is empty, we can have text!
798	CreateNodesForBuf(pSubBufStart,
799	pSubBufEnd,
800	pllSubList,
801	pNewNode,
802	pfnValidateTag,
803	ppError);
804
805	lstFree(pllSubList);
806	} // end if (ulAction == XMLACTION_BREAKUP)
807
808	// now search on after the closing tag
809	// we've found; the next tag will be set below
810	pCurrentTagListNode = pTagListNode2;
811	pBufCurrent = pFoundTag2->pCloseBrck + 1;
812
813	fClosingTagFound = TRUE;
814
815	break; // // while (pTagListNode2)
816	} // end if (pNewNode)
817	} // end if (memcmp(pFoundTag2->pStartOfTagName,
818	} // if (ulFoundTag2Len == ulClosingTagLen2Find)
819
820	pTagListNode2 = pTagListNode2->pNext;
821
822	} // while (pTagListNode2)
823
824	if (!fClosingTagFound)
825	{
826	// no matching closing tag found:
827	// that's maybe a block of not well-formed XML
828
829	// e.g. with WarpIN:
830	// <README> <-- we start after this
831	// block of plain HTML with <P> tags and such
832	// </README>
833
834	// just create an element
835	PDOMNODE pNewNode = CreateElementNode(pParentNode,
836	pszTagName,
837	pFoundTag->pFirstAfterTagName);
838	if (pNewNode)
839	fKeepTagName = TRUE;
840
841	// now search on after the closing tag
842	// we've found; the next tag will be set below
843	// pCurrentTagListNode = pTagListNodeForChildren;
844	pBufCurrent = pFoundTag->pCloseBrck + 1;
845	}
846
847	free(pszClosingTag2Find);
848
849	if (!fKeepTagName)
850	free(pszTagName);
851	} // end if (pszTagName)
852	}
853
854	pCurrentTagListNode = pCurrentTagListNode->pNext;
855	}
856	}
857	}
858
859	return (ulrc);
860	}
861
862	/*
863	* xmlParse:
864	* generic XML parser.
865	*
866	* This takes the specified zero-terminated string
867	* in pcszBuf and parses it, adding DOMNODE's as
868	* children to pNode.
869	*
870	* This recurses, if necessary, to build a node tree.
871	*
872	* Example: Take this HTML table definition:
873	+
874	+ <TABLE>
875	+ <TBODY>
876	+ <TR>
877	+ <TD>Column 1-1</TD>
878	+ <TD>Column 1-2</TD>
879	+ </TR>
880	+ <TR>
881	+ <TD>Column 2-1</TD>
882	+ <TD>Column 2-2</TD>
883	+ </TR>
884	+ </TBODY>
885	+ </TABLE>
886	*
887	* This function will create a tree as follows:
888	+
889	+ ÚÄÄÄÄÄÄÄÄÄÄÄÄ¿
890	+ ³ TABLE ³ (only ELEMENT node in root DOCUMENT node)
891	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
892	+ ³
893	+ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
894	+ ³ TBODY ³ (only ELEMENT node in root "TABLE" node)
895	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
896	+ ÚÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÄÄÄÄ¿
897	+ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÄÄÁÄÄÄÄÄÄ¿
898	+ ³ TR ³ ³ TR ³
899	+ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ ÀÄÄÄÄÄÂÄÄÄÄÄÄÙ
900	+ ÚÄÄÄÁÄÄÄÄÄÄ¿ ÚÄÄÄÁÄÄÄÄÄÄ¿
901	+ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿ ÚÄÄÄÁÄ¿ ÚÄÄÁÄÄ¿
902	+ ³ TD ³ ³ TD ³ ³ TD ³ ³ TD ³
903	+ ÀÄÄÂÄÄÙ ÀÄÄÂÄÄÙ ÀÄÄÄÂÄÙ ÀÄÄÂÄÄÙ
904	+ ÉÍÍÍÍÍÊÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÍÍÊÍÍÍÍÍ» ÉÍÍÊÍÍÍÍÍÍÍ»
905	+ ºColumn 1-1º ºColumn 1-2º ºColumn 2-1º ºColumn 2-2º (one TEXT node in each parent node)
906	+ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ ÈÍÍÍÍÍÍÍÍÍÍŒ
907	*/
908
909	ULONG xmlParse(PDOMNODE pParentNode, // in: node to append children to; must not be NULL
910	const char *pcszBuf, // in: buffer to search
911	PFNVALIDATE pfnValidateTag)
912	{
913	ULONG ulrc = 0;
914
915	PLINKLIST pllTags = BuildTagsList(pcszBuf);
916
917	// now create DOMNODE's according to that list...
918	const char *pcszError = 0;
919	CreateNodesForBuf(pcszBuf,
920	NULL, // enitre buffer
921	pllTags,
922	pParentNode,
923	pfnValidateTag,
924	&pcszError);
925
926	lstFree(pllTags);
927
928	return (ulrc);
929	}
930
931	/*
932	*@@ xmlCreateDocumentFromString:
933	* creates a DOCUMENT DOMNODE and calls xmlParse
934	* to break down the specified buffer into that
935	* node.
936	*/
937
938	PDOMNODE xmlCreateDocumentFromString(const char *pcszXML,
939	PFNVALIDATE pfnValidateTag)
940	{
941	PDOMNODE pDocument = xmlCreateNode(NULL, // no parent
942	DOMNODE_DOCUMENT);
943	xmlParse(pDocument,
944	pcszXML,
945	pfnValidateTag);
946
947	return (pDocument);
948	}
949
950

Note: See TracBrowser for help on using the repository browser.

Download in other formats: