Context Navigation

source: branches/branch-1-0/src/helpers/stringh.c@ 365

Visit:

Last change on this file since 365 was 335, checked in by pr, 19 years ago
Allow EOL to terminate non-quoted attributes
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 59.4 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2006 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return pszReturn;
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return -1;
269	if (i > 0) return +1;
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return +1;
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return -1;
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return 0;
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return -1;
296	if (i > 0) return +1;
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return +1;
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return -1;
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return 0;
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344
345	return prc;
346	}
347
348	/*
349	*@@ strhncpy0:
350	* like strncpy, but always appends a 0 character.
351	*
352	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
353	*/
354
355	ULONG strhncpy0(PSZ pszTarget,
356	PCSZ pszSource,
357	ULONG cbSource)
358	{
359	ULONG ul = 0;
360	PSZ pTarget = pszTarget,
361	pSource;
362
363	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
364	{
365	for (ul = 0; ul < cbSource; ul++)
366	if (*pSource)
367	pTarget++ = pSource++;
368	else
369	break;
370	}
371
372	*pTarget = 0;
373
374	return ul;
375	}
376
377	/*
378	*@@ strhlen:
379	* like strlen, but doesn't crash on
380	* null strings, but returns 0 also.
381	*
382	*@@added V0.9.19 (2002-04-02) [umoeller]
383	*/
384
385	ULONG strhlen(PCSZ pcsz)
386	{
387	if (pcsz)
388	return strlen(pcsz);
389
390	return 0;
391	}
392
393	/*
394	*@@ strhSize:
395	* returns the size of the given string, which
396	* is the memory required to allocate a copy,
397	* including the null terminator.
398	*
399	* Returns 0 only if pcsz is NULL. If pcsz
400	* points to a null character, this returns 1.
401	*
402	*@@added V0.9.18 (2002-02-13) [umoeller]
403	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
404	*/
405
406	ULONG strhSize(PCSZ pcsz)
407	{
408	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
409	return (strlen(pcsz) + 1);
410
411	return 0;
412	}
413
414	/*
415	* strhCount:
416	* this counts the occurences of c in pszSearch.
417	*/
418
419	ULONG strhCount(PCSZ pszSearch,
420	CHAR c)
421	{
422	PSZ p = (PSZ)pszSearch;
423	ULONG ulCount = 0;
424	while (TRUE)
425	{
426	p = strchr(p, c);
427	if (p)
428	{
429	ulCount++;
430	p++;
431	}
432	else
433	break;
434	}
435	return ulCount;
436	}
437
438	/*
439	*@@ strhIsDecimal:
440	* returns TRUE if psz consists of decimal digits only.
441	*/
442
443	BOOL strhIsDecimal(PSZ psz)
444	{
445	PSZ p = psz;
446	while (*p != 0)
447	{
448	if (isdigit(*p) == 0)
449	return FALSE;
450	p++;
451	}
452
453	return TRUE;
454	}
455
456	#ifdef __DEBUG_MALLOC_ENABLED__
457
458	/*
459	*@@ strhSubstrDebug:
460	* memory debug version of strhSubstr.
461	*
462	*@@added V0.9.14 (2001-08-01) [umoeller]
463	*/
464
465	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
466	PCSZ pEnd, // in: last char (not included)
467	PCSZ pcszSourceFile,
468	unsigned long ulLine,
469	PCSZ pcszFunction)
470	{
471	PSZ pszSubstr = NULL;
472
473	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
474	{
475	ULONG cbSubstr = (pEnd - pBegin);
476	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
477	pcszSourceFile,
478	ulLine,
479	pcszFunction))
480	{
481	// strhncpy0(pszSubstr, pBegin, cbSubstr);
482	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
483	*(pszSubstr + cbSubstr) = '\0';
484	}
485	}
486
487	return pszSubstr;
488	}
489
490	#endif // __DEBUG_MALLOC_ENABLED__
491
492	/*
493	*@@ strhSubstr:
494	* this creates a new PSZ containing the string
495	* from pBegin to pEnd, excluding the pEnd character.
496	* The new string is null-terminated. The caller
497	* must free() the new string after use.
498	*
499	* Example:
500	+ "1234567890"
501	+ ^ ^
502	+ p1 p2
503	+ strhSubstr(p1, p2)
504	* would return a new string containing "2345678".
505	*
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
508	*/
509
510	PSZ strhSubstr(PCSZ pBegin, // in: first char
511	PCSZ pEnd) // in: last char (not included)
512	{
513	PSZ pszSubstr = NULL;
514
515	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
516	{
517	ULONG cbSubstr = (pEnd - pBegin);
518	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
519	{
520	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
521	*(pszSubstr + cbSubstr) = '\0';
522	}
523	}
524
525	return pszSubstr;
526	}
527
528	/*
529	*@@ strhExtract:
530	* searches pszBuf for the cOpen character and returns
531	* the data in between cOpen and cClose, excluding
532	* those two characters, in a newly allocated buffer
533	* which you must free() afterwards.
534	*
535	* Spaces and newlines/linefeeds are skipped.
536	*
537	* If the search was successful, the new buffer
538	* is returned and, if (ppEnd != NULL), *ppEnd points
539	* to the first character after the cClose character
540	* found in the buffer.
541	*
542	* If the search was not successful, NULL is
543	* returned, and *ppEnd is unchanged.
544	*
545	* If another cOpen character is found before
546	* cClose, matching cClose characters will be skipped.
547	* You can therefore nest the cOpen and cClose
548	* characters.
549	*
550	* This function ignores cOpen and cClose characters
551	* in C-style comments and strings surrounded by
552	* double quotes.
553	*
554	* Example:
555	*
556	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
557	+ pEnd;
558	+ strhExtract(pszBuf,
559	+ '{', '}',
560	+ &pEnd)
561	*
562	* would return a new buffer containing " --blah-- ",
563	* and ppEnd would afterwards point to the space
564	* before "next" in the static buffer.
565	*
566	*@@added V0.9.0 [umoeller]
567	*/
568
569	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
570	CHAR cOpen, // in: opening char
571	CHAR cClose, // in: closing char
572	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
573	{
574	PSZ pszReturn = NULL;
575	PCSZ pOpen;
576	if ( (pszBuf)
577	&& (pOpen = strchr(pszBuf, cOpen))
578	)
579	{
580	// opening char found:
581	// now go thru the whole rest of the buffer
582	PCSZ p = pOpen + 1;
583	LONG lLevel = 1; // if this goes 0, we're done
584	while (*p)
585	{
586	if (*p == cOpen)
587	lLevel++;
588	else if (*p == cClose)
589	{
590	lLevel--;
591	if (lLevel <= 0)
592	{
593	// matching closing bracket found:
594	// extract string
595	pszReturn = strhSubstr(pOpen + 1, // after cOpen
596	p); // excluding cClose
597	if (ppEnd)
598	*ppEnd = p + 1;
599	break; // while (*p)
600	}
601	}
602	else if (*p == '\"')
603	{
604	// beginning of string:
605	PCSZ p2 = p+1;
606	// find end of string
607	while ((p2) && (p2 != '\"'))
608	p2++;
609
610	if (*p2 == '\"')
611	// closing quote found:
612	// search on after that
613	p = p2; // raised below
614	else
615	break; // while (*p)
616	}
617
618	p++;
619	}
620	}
621
622	return pszReturn;
623	}
624
625	/*
626	*@@ strhQuote:
627	* similar to strhExtract, except that
628	* opening and closing chars are the same,
629	* and therefore no nesting is possible.
630	* Useful for extracting stuff between
631	* quotes.
632	*
633	*@@added V0.9.0 [umoeller]
634	*/
635
636	PSZ strhQuote(PSZ pszBuf,
637	CHAR cQuote,
638	PSZ *ppEnd)
639	{
640	PSZ pszReturn = NULL,
641	p1 = NULL;
642	if ((p1 = strchr(pszBuf, cQuote)))
643	{
644	PSZ p2;
645	if (p2 = strchr(p1+1, cQuote))
646	{
647	pszReturn = strhSubstr(p1+1, p2);
648	if (ppEnd)
649	// store closing char
650	*ppEnd = p2 + 1;
651	}
652	}
653
654	return pszReturn;
655	}
656
657	/*
658	*@@ strhStrip:
659	* removes all double spaces.
660	* This copies within the "psz" buffer.
661	* If any double spaces are found, the
662	* string will be shorter than before,
663	* but the buffer is _not_ reallocated,
664	* so there will be unused bytes at the
665	* end.
666	*
667	* Returns the number of spaces removed.
668	*
669	*@@added V0.9.0 [umoeller]
670	*/
671
672	ULONG strhStrip(PSZ psz) // in/out: string
673	{
674	PSZ p;
675	ULONG cb = strlen(psz),
676	ulrc = 0;
677
678	for (p = psz; p < psz+cb; p++)
679	{
680	if ((p == ' ') && ((p+1) == ' '))
681	{
682	PSZ p2 = p;
683	while (*p2)
684	{
685	p2 = (p2+1);
686	p2++;
687	}
688	cb--;
689	p--;
690	ulrc++;
691	}
692	}
693	return ulrc;
694	}
695
696	/*
697	*@@ strhins:
698	* this inserts one string into another.
699	*
700	* pszInsert is inserted into pszBuffer at offset
701	* ulInsertOfs (which counts from 0).
702	*
703	* A newly allocated string is returned. pszBuffer is
704	* not changed. The new string should be free()'d after
705	* use.
706	*
707	* Upon errors, NULL is returned.
708	*
709	*@@changed V0.9.0 [umoeller]: completely rewritten.
710	*/
711
712	PSZ strhins(PCSZ pcszBuffer,
713	ULONG ulInsertOfs,
714	PCSZ pcszInsert)
715	{
716	PSZ pszNew = NULL;
717
718	if ((pcszBuffer) && (pcszInsert))
719	{
720	do {
721	ULONG cbBuffer = strlen(pcszBuffer);
722	ULONG cbInsert = strlen(pcszInsert);
723
724	// check string length
725	if (ulInsertOfs > cbBuffer + 1)
726	break; // do
727
728	// OK, let's go.
729	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
730
731	// copy stuff before pInsertPos
732	memcpy(pszNew,
733	pcszBuffer,
734	ulInsertOfs);
735	// copy string to be inserted
736	memcpy(pszNew + ulInsertOfs,
737	pcszInsert,
738	cbInsert);
739	// copy stuff after pInsertPos
740	strcpy(pszNew + ulInsertOfs + cbInsert,
741	pcszBuffer + ulInsertOfs);
742	} while (FALSE);
743	}
744
745	return pszNew;
746	}
747
748	/*
749	*@@ strhFindReplace:
750	* wrapper around xstrFindReplace to work with C strings.
751	* Note that *ppszBuf can get reallocated and must
752	* be free()'able.
753	*
754	* Repetitive use of this wrapper is not recommended
755	* because it is considerably slower than xstrFindReplace.
756	*
757	*@@added V0.9.6 (2000-11-01) [umoeller]
758	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
759	*/
760
761	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
762	PULONG pulOfs, // in: where to begin search (0 = start);
763	// out: ofs of first char after replacement string
764	PCSZ pcszSearch, // in: search string; cannot be NULL
765	PCSZ pcszReplace) // in: replacement string; cannot be NULL
766	{
767	ULONG ulrc = 0;
768	XSTRING xstrBuf,
769	xstrFind,
770	xstrReplace;
771	size_t ShiftTable[256];
772	BOOL fRepeat = FALSE;
773	xstrInitSet(&xstrBuf, *ppszBuf);
774	// reallocated and returned, so we're safe
775	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
776	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
777	// these two are never freed, so we're safe too
778
779	if ((ulrc = xstrFindReplace(&xstrBuf,
780	pulOfs,
781	&xstrFind,
782	&xstrReplace,
783	ShiftTable,
784	&fRepeat)))
785	// replaced:
786	*ppszBuf = xstrBuf.psz;
787
788	return ulrc;
789	}
790
791	/*
792	* strhWords:
793	* returns the no. of words in "psz".
794	* A string is considered a "word" if
795	* it is surrounded by spaces only.
796	*
797	*@@added V0.9.0 [umoeller]
798	*/
799
800	ULONG strhWords(PSZ psz)
801	{
802	PSZ p;
803	ULONG cb = strlen(psz),
804	ulWords = 0;
805	if (cb > 1)
806	{
807	ulWords = 1;
808	for (p = psz; p < psz+cb; p++)
809	if (*p == ' ')
810	ulWords++;
811	}
812	return ulWords;
813	}
814
815	/*
816	*@@ strhGetWord:
817	* finds word boundaries.
818	*
819	* *ppszStart is used as the beginning of the
820	* search.
821	*
822	* If a word is found, *ppszStart is set to
823	* the first character of the word which was
824	* found and *ppszEnd receives the address
825	* of the first character _after_ the word,
826	* which is probably a space or a \n or \r char.
827	* We then return TRUE.
828	*
829	* The search is stopped if a null character
830	* is found or pLimit is reached. In that case,
831	* FALSE is returned.
832	*
833	*@@added V0.9.1 (2000-02-13) [umoeller]
834	*/
835
836	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
837	// out: start of word (if TRUE is returned)
838	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
839	// searched; if the word does not end before
840	// or with this char, FALSE is returned
841	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
842	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
843	PSZ *ppszEnd) // out: first char _after_ word
844	// (if TRUE is returned)
845	{
846	// characters after which a word can be started
847	// PCSZ pcszBeginChars = "\x0d\x0a ";
848	// PCSZ pcszEndChars = "\x0d\x0a /-";
849
850	PSZ pStart = *ppszStart;
851
852	// find start of word
853	while ( (pStart < (PSZ)pLimit)
854	&& (strchr(pcszBeginChars, *pStart))
855	)
856	// if char is a "before word" char: go for next
857	pStart++;
858
859	if (pStart < (PSZ)pLimit)
860	{
861	// found a valid "word start" character
862	// (which is not in pcszBeginChars):
863
864	// find end of word
865	PSZ pEndOfWord = pStart;
866	while ( (pEndOfWord <= (PSZ)pLimit)
867	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
868	)
869	// if char is not an "end word" char: go for next
870	pEndOfWord++;
871
872	if (pEndOfWord <= (PSZ)pLimit)
873	{
874	// whoa, got a word:
875	*ppszStart = pStart;
876	*ppszEnd = pEndOfWord;
877	return TRUE;
878	}
879	}
880
881	return FALSE;
882	}
883
884	/*
885	*@@ strhIsWord:
886	* returns TRUE if p points to a "word"
887	* in pcszBuf.
888	*
889	* p is considered a word if the character _before_
890	* it is in pcszBeginChars and the char _after_
891	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
892	*
893	*@@added V0.9.6 (2000-11-12) [umoeller]
894	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
895	*/
896
897	BOOL strhIsWord(PCSZ pcszBuf,
898	PCSZ p, // in: start of word
899	ULONG cbSearch, // in: length of word
900	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
901	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
902	{
903	// check previous char
904	if ( (p == pcszBuf)
905	\|\| (strchr(pcszBeginChars, *(p-1)))
906	)
907	{
908	// OK, valid begin char:
909	// check end char
910	CHAR cNextChar;
911	if (!(cNextChar = p[cbSearch]))
912	// null terminator:
913	return TRUE;
914	else
915	{
916	// not null terminator: check if char is
917	// in the list of valid end chars
918	if (strchr(pcszEndChars, cNextChar))
919	{
920	// OK, is end char: avoid doubles of that char,
921	// but allow spaces
922	// fixed V0.9.18 (2002-02-23) [umoeller]
923	CHAR cNextNext = p[cbSearch + 1];
924	if ( (cNextNext != cNextChar)
925	\|\| (cNextNext == ' ')
926	\|\| (cNextNext == 0)
927	)
928	return TRUE;
929	}
930	}
931	}
932
933	return FALSE;
934	}
935
936	/*
937	*@@ strhFindWord:
938	* searches for pszSearch in pszBuf, which is
939	* returned if found (or NULL if not).
940	*
941	* As opposed to strstr, this finds pszSearch
942	* only if it is a "word". A search string is
943	* considered a word if the character _before_
944	* it is in pcszBeginChars and the char _after_
945	* it is in pcszEndChars.
946	*
947	* Example:
948	+ strhFindWord("This is an example.", "is");
949	+ returns ...........^ this, but not the "is" in "This".
950	*
951	* The algorithm here uses strstr to find pszSearch in pszBuf
952	* and performs additional "is-word" checks for each item found
953	* (by calling strhIsWord).
954	*
955	* Note that this function is fairly slow compared to xstrFindWord.
956	*
957	*@@added V0.9.0 (99-11-08) [umoeller]
958	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
959	*/
960
961	PSZ strhFindWord(PCSZ pszBuf,
962	PCSZ pszSearch,
963	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
964	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
965	{
966	PSZ pszReturn = 0;
967	ULONG cbBuf = strlen(pszBuf),
968	cbSearch = strlen(pszSearch);
969
970	if ((cbBuf) && (cbSearch))
971	{
972	PCSZ p = pszBuf;
973
974	do // while p
975	{
976	p = strstr(p, pszSearch);
977	if (p)
978	{
979	// string found:
980	// check if that's a word
981
982	if (strhIsWord(pszBuf,
983	p,
984	cbSearch,
985	pcszBeginChars,
986	pcszEndChars))
987	{
988	// valid end char:
989	pszReturn = (PSZ)p;
990	break;
991	}
992
993	p += cbSearch;
994	}
995	} while (p);
996
997	}
998	return pszReturn;
999	}
1000
1001	/*
1002	*@@ strhFindEOL:
1003	* returns a pointer to the next \r, \n or null character
1004	* following pszSearchIn. Stores the offset in *pulOffset.
1005	*
1006	* This should never return NULL because at some point,
1007	* there will be a null byte in your string.
1008	*
1009	*@@added V0.9.4 (2000-07-01) [umoeller]
1010	*/
1011
1012	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1013	PULONG pulOffset) // out: offset (ptr can be NULL)
1014	{
1015	PCSZ p = pcszSearchIn,
1016	prc = 0;
1017	while (TRUE)
1018	{
1019	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1020	{
1021	prc = p;
1022	break;
1023	}
1024	p++;
1025	}
1026
1027	if ((pulOffset) && (prc))
1028	*pulOffset = prc - pcszSearchIn;
1029
1030	return ((PSZ)prc);
1031	}
1032
1033	/*
1034	*@@ strhFindNextLine:
1035	* like strhFindEOL, but this returns the character
1036	* _after_ \r or \n. Note that this might return
1037	* a pointer to terminating NULL character also.
1038	*/
1039
1040	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1041	{
1042	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1043	// pEOL now points to the \r char or the terminating 0 byte;
1044	// if not null byte, advance pointer
1045	PSZ pNextLine = pEOL;
1046	if (*pNextLine == '\r')
1047	pNextLine++;
1048	if (*pNextLine == '\n')
1049	pNextLine++;
1050	if (pulOffset)
1051	*pulOffset = pNextLine - pszSearchIn;
1052	return pNextLine;
1053	}
1054
1055	/*
1056	*@@ strhBeautifyTitle:
1057	* replaces all line breaks (0xd, 0xa) with spaces.
1058	* Returns the new length of the string or 0 on
1059	* errors.
1060	*
1061	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1062	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
1063	*/
1064
1065	ULONG strhBeautifyTitle(PSZ psz)
1066	{
1067	ULONG ulrc;
1068	PSZ p = psz;
1069
1070	while (*p)
1071	{
1072	if ( (*p == '\r')
1073	\|\| (*p == '\n')
1074	)
1075	{
1076	if ( (p != psz)
1077	&& (p[-1] == ' ')
1078	)
1079	memmove(p, p + 1, strlen(p));
1080	else
1081	*p++ = ' ';
1082	}
1083	else
1084	p++;
1085	}
1086
1087	return (p - psz);
1088	}
1089
1090	/*
1091	*@@ strhBeautifyTitle:
1092	* like strhBeautifyTitle, but copies into
1093	* a new buffer. More efficient.
1094	*
1095	*@@added V0.9.19 (2002-06-18) [umoeller]
1096	*/
1097
1098	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
1099	PCSZ pcszSource) // in: string to be beautified (can be NULL)
1100	{
1101	ULONG ulrc;
1102	PCSZ pSource = pcszSource;
1103	PSZ pTarget = pszTarget;
1104	CHAR c;
1105	if (!pcszSource)
1106	{
1107	*pszTarget = '\0';
1108	return 0;
1109	}
1110
1111	while (c = *pSource++)
1112	{
1113	if ( (c == '\r')
1114	\|\| (c == '\n')
1115	)
1116	{
1117	if ( (pTarget == pszTarget)
1118	\|\| (pTarget[-1] != ' ')
1119	)
1120	*pTarget++ = ' ';
1121	}
1122	else
1123	*pTarget++ = c;
1124	}
1125
1126	// null-terminate
1127	*pTarget = '\0';
1128
1129	return (pTarget - pszTarget);
1130	}
1131
1132	/*
1133	* strhFindAttribValue:
1134	* searches for pszAttrib in pszSearchIn; if found,
1135	* returns the first character after the "=" char.
1136	* If "=" is not found, a space, \r, and \n are
1137	* also accepted. This function searches without
1138	* respecting case.
1139	*
1140	* <B>Example:</B>
1141	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1142	+
1143	+ returns ....................... ^ this address.
1144	*
1145	*@@added V0.9.0 [umoeller]
1146	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1147	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1148	*@@changed WarpIN V1.0.11 (2006-08-29) [pr]: handle attrib names in quoted strings @@fixes 718
1149	*@@changed WarpIN V1.0.12 (2006-09-07) [pr]: fix attrib handling again @@fixes 718 @@fixes 836
1150	*/
1151
1152	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1153	{
1154	PSZ prc = 0;
1155	PSZ pszSearchIn2, p, pszStart, pszName, pszValue;
1156	ULONG cbAttrib = strlen(pszAttrib),
1157	ulLength = strlen(pszSearchIn);
1158	BOOL fInQuote = FALSE;
1159
1160	// use alloca(), so memory is freed on function exit
1161	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1162	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1163
1164	// V1.0.12 (2006-09-07) [pr]: filter leading " and ' left over from the previous pass
1165	for (p = pszSearchIn2; p == '\'' \|\| p == '"' \|\| *p == ' '
1166	\|\| p == '\n' \|\| p == '\r' \|\| *p == '\t'; p++);
1167	for (pszStart = p; *p; p++)
1168	{
1169	if (fInQuote)
1170	{
1171	// V1.0.12 (2006-09-07) [pr]: allow end of line to terminate a (broken) quote
1172	if (p == '"' \|\| p == '\n' \|\| *p == '\r')
1173	fInQuote = FALSE;
1174	}
1175	else
1176	{
1177	if (*p == '"')
1178	fInQuote = TRUE;
1179	else
1180	{
1181	if (p == ' ' \|\| p == '\n' \|\| p == '\r' \|\| p == '\t')
1182	{
1183	*p = '\0';
1184	pszName = strtok(pszStart, "=>");
1185	pszStart = p + 1;
1186	if (pszName && !stricmp(pszName, pszAttrib))
1187	{
1188	pszValue = strtok(NULL, "");
1189	if (pszValue)
1190	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
1191	else
1192	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
1193
1194	return(prc);
1195	}
1196	}
1197	}
1198	}
1199	}
1200
1201	if (pszStart != p)
1202	{
1203	pszName = strtok(pszStart, "=>");
1204	if (pszName && !stricmp(pszName, pszAttrib))
1205	{
1206	pszValue = strtok(NULL, "");
1207	if (pszValue)
1208	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
1209	else
1210	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
1211	}
1212	}
1213
1214	return prc;
1215	}
1216
1217	/*
1218	* strhGetNumAttribValue:
1219	* stores the numerical parameter value of an HTML-style
1220	* tag in *pl.
1221	*
1222	* Returns the address of the tag parameter in the
1223	* search buffer, if found, or NULL.
1224	*
1225	* <B>Example:</B>
1226	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1227	*
1228	* stores 123 in the "l" variable.
1229	*
1230	*@@added V0.9.0 [umoeller]
1231	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1232	*/
1233
1234	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1235	const char *pszTag, // e.g. "INDEX"
1236	PLONG pl) // out: numerical value
1237	{
1238	PSZ pParam;
1239	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1240	{
1241	if ( (*pParam == '\"')
1242	\|\| (*pParam == '\'')
1243	)
1244	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1245
1246	sscanf(pParam, "%ld", pl);
1247	}
1248
1249	return pParam;
1250	}
1251
1252	/*
1253	* strhGetTextAttr:
1254	* retrieves the attribute value of a textual HTML-style tag
1255	* in a newly allocated buffer, which is returned,
1256	* or NULL if attribute not found.
1257	* If an attribute value is to contain spaces, it
1258	* must be enclosed in quotes.
1259	*
1260	* The offset of the attribute data in pszSearchIn is
1261	* returned in *pulOffset so that you can do multiple
1262	* searches.
1263	*
1264	* This returns a new buffer, which should be free()'d after use.
1265	*
1266	* <B>Example:</B>
1267	+ ULONG ulOfs = 0;
1268	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1269	+ ............^ ulOfs
1270	*
1271	* returns a new string with the value "blublub" (without
1272	* quotes) and sets ulOfs to 12.
1273	*
1274	*@@added V0.9.0 [umoeller]
1275	*@@changed V1.0.13 (2006-09-10) [pr]: improved parsing
1276	*/
1277
1278	PSZ strhGetTextAttr(const char *pszSearchIn,
1279	const char *pszTag,
1280	PULONG pulOffset) // out: offset where found
1281	{
1282	PSZ pParam,
1283	pParam2,
1284	prc = NULL;
1285	ULONG ulCount = 0;
1286	LONG lNestingLevel = 0;
1287
1288	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1289	{
1290	// determine end character to search for: a space
1291	CHAR cEnd = ' ';
1292	// V1.0.3 (2004-11-10) [pr]: @@fixes 461
1293	// V1.0.13 (2006-09-10) [pr]: optimized
1294	if ((pParam == '\"') \|\| (pParam == '\''))
1295	{
1296	// or, if the data is enclosed in quotes, a quote or single quote
1297	cEnd = *pParam;
1298	pParam++;
1299	}
1300
1301	if (pulOffset)
1302	// store the offset
1303	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1304
1305	// now find end of attribute
1306	pParam2 = pParam;
1307	while (*pParam)
1308	{
1309	// V1.0.13 (2006-09-10) [pr]: line end terminates non-quoted attribute
1310	if ( ( (cEnd == ' ')
1311	&& ((pParam == ' ') \|\| (pParam == '\r') \|\| (*pParam == '\n')))
1312	\|\| (*pParam == cEnd)
1313	)
1314	// end character found
1315	break;
1316	else if (*pParam == '<')
1317	// yet another opening tag found:
1318	// this is probably some "<" in the attributes
1319	lNestingLevel++;
1320	else if (*pParam == '>')
1321	{
1322	lNestingLevel--;
1323	if (lNestingLevel < 0)
1324	// end of tag found:
1325	break;
1326	}
1327	ulCount++;
1328	pParam++;
1329	}
1330
1331	// copy attribute to new buffer
1332	if (ulCount)
1333	{
1334	prc = (PSZ)malloc(ulCount+1);
1335	memcpy(prc, pParam2, ulCount);
1336	*(prc+ulCount) = 0;
1337	}
1338	}
1339	return prc;
1340	}
1341
1342	/*
1343	* strhFindEndOfTag:
1344	* returns a pointer to the ">" char
1345	* which seems to terminate the tag beginning
1346	* after pszBeginOfTag.
1347	*
1348	* If additional "<" chars are found, we look
1349	* for additional ">" characters too.
1350	*
1351	* Note: You must pass the address of the opening
1352	* '<' character to this function.
1353	*
1354	* Example:
1355	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1356	+ strhFindEndOfTag(pszTest)
1357	+ returns.................................^ this.
1358	*
1359	*@@added V0.9.0 [umoeller]
1360	*/
1361
1362	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1363	{
1364	PSZ p = (PSZ)pszBeginOfTag,
1365	prc = NULL;
1366	LONG lNestingLevel = 0;
1367
1368	while (*p)
1369	{
1370	if (*p == '<')
1371	// another opening tag found:
1372	lNestingLevel++;
1373	else if (*p == '>')
1374	{
1375	// closing tag found:
1376	lNestingLevel--;
1377	if (lNestingLevel < 1)
1378	{
1379	// corresponding: return this
1380	prc = p;
1381	break;
1382	}
1383	}
1384	p++;
1385	}
1386
1387	return prc;
1388	}
1389
1390	/*
1391	* strhGetBlock:
1392	* this complex function searches the given string
1393	* for a pair of opening/closing HTML-style tags.
1394	*
1395	* If found, this routine returns TRUE and does
1396	* the following:
1397	*
1398	* 1) allocate a new buffer, copy the text
1399	* enclosed by the opening/closing tags
1400	* into it and set *ppszBlock to that
1401	* buffer;
1402	*
1403	* 2) if the opening tag has any attributes,
1404	* allocate another buffer, copy the
1405	* attributes into it and set *ppszAttrs
1406	* to that buffer; if no attributes are
1407	* found, *ppszAttrs will be NULL;
1408	*
1409	* 3) set *pulOffset to the offset from the
1410	* beginning of *ppszSearchIn where the
1411	* opening tag was found;
1412	*
1413	* 4) advance *ppszSearchIn to after the
1414	* closing tag, so that you can do
1415	* multiple searches without finding the
1416	* same tags twice.
1417	*
1418	* All buffers should be freed using free().
1419	*
1420	* This returns the following:
1421	* -- 0: no error
1422	* -- 1: tag not found at all (doesn't have to be an error)
1423	* -- 2: begin tag found, but no corresponding end tag found. This
1424	* is a real error.
1425	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1426	*
1427	* <B>Example:</B>
1428	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1429	+ PSZ pszBlock, pszAttrs;
1430	+ ULONG ulOfs;
1431	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1432	*
1433	* would do the following:
1434	*
1435	* 1) set pszBlock to a new string containing "This is page 1."
1436	* without quotes;
1437	*
1438	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1439	*
1440	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1441	*
1442	* 4) pSearch would be advanced to point to the "More text"
1443	* string in the original buffer.
1444	*
1445	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1446	* for HTML parsing.
1447	*
1448	*@@added V0.9.0 [umoeller]
1449	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1450	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1451	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1452	*/
1453
1454	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1455	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1456	const char *pszTag,
1457	PSZ *ppszBlock, // out: block enclosed by the tags
1458	PSZ *ppszAttribs, // out: attributes of the opening tag
1459	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1460	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1461	{
1462	ULONG ulrc = 1;
1463	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1464	pszSearch2 = pszBeginTag,
1465	pszClosingTag;
1466	ULONG cbTag = strlen(pszTag);
1467
1468	// go thru the block and check all tags if it's the
1469	// begin tag we're looking for
1470	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1471	{
1472	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1473	// yes: stop
1474	break;
1475	else
1476	pszBeginTag++;
1477	}
1478
1479	if (pszBeginTag)
1480	{
1481	// we found <TAG>:
1482	ULONG ulNestingLevel = 0;
1483
1484	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1485	// strchr(pszBeginTag, '>');
1486	if (pszEndOfBeginTag)
1487	{
1488	// does the caller want the attributes?
1489	if (ppszAttribs)
1490	{
1491	// yes: then copy them
1492	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1493	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1494	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1495	// add terminating 0
1496	*(pszAttrs + ulAttrLen) = 0;
1497
1498	*ppszAttribs = pszAttrs;
1499	}
1500
1501	// output offset of where we found the begin tag
1502	if (pulOfsBeginTag)
1503	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1504
1505	// now find corresponding closing tag (e.g. "</BODY>"
1506	pszBeginTag = pszEndOfBeginTag+1;
1507	// now we're behind the '>' char of the opening tag
1508	// increase offset of that too
1509	if (pulOfsBeginBlock)
1510	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1511
1512	// find next closing tag;
1513	// for the first run, pszSearch2 points to right
1514	// after the '>' char of the opening tag
1515	pszSearch2 = pszBeginTag;
1516	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1517	&& (pszClosingTag = strstr(pszSearch2, "<"))
1518	)
1519	{
1520	// if we have another opening tag before our closing
1521	// tag, we need to have several closing tags before
1522	// we're done
1523	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1524	ulNestingLevel++;
1525	else
1526	{
1527	// is this ours?
1528	if ( (*(pszClosingTag+1) == '/')
1529	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1530	)
1531	{
1532	// we've found a matching closing tag; is
1533	// it ours?
1534	if (ulNestingLevel == 0)
1535	{
1536	// our closing tag found:
1537	// allocate mem for a new buffer
1538	// and extract all the text between
1539	// open and closing tags to it
1540	ULONG ulLen = pszClosingTag - pszBeginTag;
1541	if (ppszBlock)
1542	{
1543	PSZ pNew = (PSZ)malloc(ulLen + 1);
1544	strhncpy0(pNew, pszBeginTag, ulLen);
1545	*ppszBlock = pNew;
1546	}
1547
1548	// raise search offset to after the closing tag
1549	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1550
1551	ulrc = 0;
1552
1553	break;
1554	} else
1555	// not our closing tag:
1556	ulNestingLevel--;
1557	}
1558	}
1559	// no matching closing tag: search on after that
1560	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1561	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1562
1563	if (!pszClosingTag)
1564	// no matching closing tag found:
1565	// return 2 (closing tag not found)
1566	ulrc = 2;
1567	} // end if (pszBeginTag)
1568	else
1569	// no matching ">" for opening tag found:
1570	ulrc = 3;
1571	}
1572
1573	return ulrc;
1574	}
1575
1576	/* ******************************************************************
1577	*
1578	* Miscellaneous
1579	*
1580	********************************************************************/
1581
1582	/*
1583	*@@ strhArrayAppend:
1584	* this appends a string to a "string array".
1585	*
1586	* A string array is considered a sequence of
1587	* zero-terminated strings in memory. That is,
1588	* after each string's null-byte, the next
1589	* string comes up.
1590	*
1591	* This is useful for composing a single block
1592	* of memory from, say, list box entries, which
1593	* can then be written to OS2.INI in one flush.
1594	*
1595	* To append strings to such an array, call this
1596	* function for each string you wish to append.
1597	* This will re-allocate *ppszRoot with each call,
1598	* and update *pcbRoot, which then contains the
1599	* total size of all strings (including all null
1600	* terminators).
1601	*
1602	* Pass *pcbRoot to PrfSaveProfileData to have the
1603	* block saved.
1604	*
1605	* Note: On the first call, ppszRoot and pcbRoot
1606	* _must_ be both NULL, or this crashes.
1607	*
1608	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1609	*/
1610
1611	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1612	const char *pcszNew, // in: string to append
1613	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1614	PULONG pcbRoot) // in/out: size of array
1615	{
1616	PSZ pszTemp;
1617
1618	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1619	cbNew = strlen(pcszNew);
1620
1621	pszTemp = (PSZ)malloc(*pcbRoot
1622	+ cbNew
1623	+ 1); // two null bytes
1624	if (*ppszRoot)
1625	{
1626	// not first loop: copy old stuff
1627	memcpy(pszTemp,
1628	*ppszRoot,
1629	*pcbRoot);
1630	free(*ppszRoot);
1631	}
1632	// append new string
1633	strcpy(pszTemp + *pcbRoot,
1634	pcszNew);
1635	// update root
1636	*ppszRoot = pszTemp;
1637	// update length
1638	*pcbRoot += cbNew + 1;
1639	}
1640
1641	/*
1642	*@@ strhCreateDump:
1643	* this dumps a memory block into a string
1644	* and returns that string in a new buffer.
1645	*
1646	* You must free() the returned PSZ after use.
1647	*
1648	* The output looks like the following:
1649	*
1650	+ 0000: FE FF 0E 02 90 00 00 00 ........
1651	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1652	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1653	*
1654	* Each line is terminated with a newline (\n)
1655	* character only.
1656	*
1657	*@@added V0.9.1 (2000-01-22) [umoeller]
1658	*/
1659
1660	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1661	ULONG ulSize, // in: size of buffer
1662	ULONG ulIndent) // in: indentation of every line
1663	{
1664	PSZ pszReturn = 0;
1665	XSTRING strReturn;
1666	CHAR szTemp[1000];
1667
1668	PBYTE pbCurrent = pb; // current byte
1669	ULONG ulCount = 0,
1670	ulCharsInLine = 0; // if this grows > 7, a new line is started
1671	CHAR szLine[400] = "",
1672	szAscii[30] = " "; // ASCII representation; filled for every line
1673	PSZ pszLine = szLine,
1674	pszAscii = szAscii;
1675
1676	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1677
1678	for (pbCurrent = pb;
1679	ulCount < ulSize;
1680	pbCurrent++, ulCount++)
1681	{
1682	if (ulCharsInLine == 0)
1683	{
1684	memset(szLine, ' ', ulIndent);
1685	pszLine += ulIndent;
1686	}
1687	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1688
1689	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1690	// printable character:
1691	pszAscii = pbCurrent;
1692	else
1693	*pszAscii = '.';
1694	pszAscii++;
1695
1696	ulCharsInLine++;
1697	if ( (ulCharsInLine > 7) // 8 bytes added?
1698	\|\| (ulCount == ulSize-1) // end of buffer reached?
1699	)
1700	{
1701	// if we haven't had eight bytes yet,
1702	// fill buffer up to eight bytes with spaces
1703	ULONG ul2;
1704	for (ul2 = ulCharsInLine;
1705	ul2 < 8;
1706	ul2++)
1707	pszLine += sprintf(pszLine, " ");
1708
1709	sprintf(szTemp, "%04lX: %s %s\n",
1710	(ulCount & 0xFFFFFFF8), // offset in hex
1711	szLine, // bytes string
1712	szAscii); // ASCII string
1713	xstrcat(&strReturn, szTemp, 0);
1714
1715	// restart line buffer
1716	pszLine = szLine;
1717
1718	// clear ASCII buffer
1719	strcpy(szAscii, " ");
1720	pszAscii = szAscii;
1721
1722	// reset line counter
1723	ulCharsInLine = 0;
1724	}
1725	}
1726
1727	if (strReturn.cbAllocated)
1728	pszReturn = strReturn.psz;
1729
1730	return pszReturn;
1731	}
1732
1733	/* ******************************************************************
1734	*
1735	* Fast string searches
1736	*
1737	********************************************************************/
1738
1739	#define ASSERT(a)
1740
1741	/*
1742	* The following code has been taken from the "Standard
1743	* Function Library", file sflfind.c, and only slightly
1744	* modified to conform to the rest of this file.
1745	*
1746	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1747	* Revised: 98/05/04
1748	*
1749	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1750	*
1751	* The SFL Licence allows incorporating SFL code into other
1752	* programs, as long as the copyright is reprinted and the
1753	* code is marked as modified, so this is what we do.
1754	*/
1755
1756	/*
1757	*@@ strhmemfind:
1758	* searches for a pattern in a block of memory using the
1759	* Boyer-Moore-Horspool-Sunday algorithm.
1760	*
1761	* The block and pattern may contain any values; you must
1762	* explicitly provide their lengths. If you search for strings,
1763	* use strlen() on the buffers.
1764	*
1765	* Returns a pointer to the pattern if found within the block,
1766	* or NULL if the pattern was not found.
1767	*
1768	* This algorithm needs a "shift table" to cache data for the
1769	* search pattern. This table can be reused when performing
1770	* several searches with the same pattern.
1771	*
1772	* "shift" must point to an array big enough to hold 256 (8**2)
1773	* "size_t" values.
1774	*
1775	* If (*repeat_find == FALSE), the shift table is initialized.
1776	* So on the first search with a given pattern, *repeat_find
1777	* should be FALSE. This function sets it to TRUE after the
1778	* shift table is initialised, allowing the initialisation
1779	* phase to be skipped on subsequent searches.
1780	*
1781	* This function is most effective when repeated searches are
1782	* made for the same pattern in one or more large buffers.
1783	*
1784	* Example:
1785	*
1786	+ PSZ pszHaystack = "This is a sample string.",
1787	+ pszNeedle = "string";
1788	+ size_t shift[256];
1789	+ BOOL fRepeat = FALSE;
1790	+
1791	+ PSZ pFound = strhmemfind(pszHaystack,
1792	+ strlen(pszHaystack), // block size
1793	+ pszNeedle,
1794	+ strlen(pszNeedle), // pattern size
1795	+ shift,
1796	+ &fRepeat);
1797	*
1798	* Taken from the "Standard Function Library", file sflfind.c.
1799	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1800	* Slightly modified by umoeller.
1801	*
1802	*@@added V0.9.3 (2000-05-08) [umoeller]
1803	*/
1804
1805	void* strhmemfind(const void *in_block, // in: block containing data
1806	size_t block_size, // in: size of block in bytes
1807	const void *in_pattern, // in: pattern to search for
1808	size_t pattern_size, // in: size of pattern block
1809	size_t *shift, // in/out: shift table (search buffer)
1810	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1811	{
1812	size_t byte_nbr, // Distance through block
1813	match_size; // Size of matched part
1814	const unsigned char
1815	*match_base = NULL, // Base of match of pattern
1816	*match_ptr = NULL, // Point within current match
1817	*limit = NULL; // Last potiental match point
1818	const unsigned char
1819	block = (unsigned char ) in_block, // Concrete pointer to block data
1820	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1821
1822	if ( (block == NULL)
1823	\|\| (pattern == NULL)
1824	\|\| (shift == NULL)
1825	)
1826	return NULL;
1827
1828	// Pattern must be smaller or equal in size to string
1829	if (block_size < pattern_size)
1830	return NULL; // Otherwise it's not found
1831
1832	if (pattern_size == 0) // Empty patterns match at start
1833	return ((void *)block);
1834
1835	// Build the shift table unless we're continuing a previous search
1836
1837	// The shift table determines how far to shift before trying to match
1838	// again, if a match at this point fails. If the byte after where the
1839	// end of our pattern falls is not in our pattern, then we start to
1840	// match again after that byte; otherwise we line up the last occurence
1841	// of that byte in our pattern under that byte, and try match again.
1842
1843	if (!repeat_find \|\| !*repeat_find)
1844	{
1845	for (byte_nbr = 0;
1846	byte_nbr < 256;
1847	byte_nbr++)
1848	shift[byte_nbr] = pattern_size + 1;
1849	for (byte_nbr = 0;
1850	byte_nbr < pattern_size;
1851	byte_nbr++)
1852	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1853
1854	if (repeat_find)
1855	*repeat_find = TRUE;
1856	}
1857
1858	// Search for the block, each time jumping up by the amount
1859	// computed in the shift table
1860
1861	limit = block + (block_size - pattern_size + 1);
1862	ASSERT (limit > block);
1863
1864	for (match_base = block;
1865	match_base < limit;
1866	match_base += shift[*(match_base + pattern_size)])
1867	{
1868	match_ptr = match_base;
1869	match_size = 0;
1870
1871	// Compare pattern until it all matches, or we find a difference
1872	while (*match_ptr++ == pattern[match_size++])
1873	{
1874	ASSERT (match_size <= pattern_size &&
1875	match_ptr == (match_base + match_size));
1876
1877	// If we found a match, return the start address
1878	if (match_size >= pattern_size)
1879	return ((void*)(match_base));
1880
1881	}
1882	}
1883	return NULL; // Found nothing
1884	}
1885
1886	/*
1887	*@@ strhtxtfind:
1888	* searches for a case-insensitive text pattern in a string
1889	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1890	* pattern are null-terminated strings. Returns a pointer to the pattern
1891	* if found within the string, or NULL if the pattern was not found.
1892	* Will match strings irrespective of case. To match exact strings, use
1893	* strhfind(). Will not work on multibyte characters.
1894	*
1895	* Examples:
1896	+ char *result;
1897	+
1898	+ result = strhtxtfind ("AbracaDabra", "cad");
1899	+ if (result)
1900	+ puts (result);
1901	+
1902	* Taken from the "Standard Function Library", file sflfind.c.
1903	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1904	* Slightly modified.
1905	*
1906	*@@added V0.9.3 (2000-05-08) [umoeller]
1907	*/
1908
1909	char* strhtxtfind (const char *string, // String containing data
1910	const char *pattern) // Pattern to search for
1911	{
1912	size_t
1913	shift [256]; // Shift distance for each value
1914	size_t
1915	string_size,
1916	pattern_size,
1917	byte_nbr, // Index into byte array
1918	match_size; // Size of matched part
1919	const char
1920	*match_base = NULL, // Base of match of pattern
1921	*match_ptr = NULL, // Point within current match
1922	*limit = NULL; // Last potiental match point
1923
1924	ASSERT (string); // Expect non-NULL pointers, but
1925	ASSERT (pattern); // fail gracefully if not debugging
1926	if (string == NULL \|\| pattern == NULL)
1927	return NULL;
1928
1929	string_size = strlen (string);
1930	pattern_size = strlen (pattern);
1931
1932	// Pattern must be smaller or equal in size to string
1933	if (string_size < pattern_size)
1934	return NULL; // Otherwise it cannot be found
1935
1936	if (pattern_size == 0) // Empty string matches at start
1937	return (char *) string;
1938
1939	// Build the shift table
1940
1941	// The shift table determines how far to shift before trying to match
1942	// again, if a match at this point fails. If the byte after where the
1943	// end of our pattern falls is not in our pattern, then we start to
1944	// match again after that byte; otherwise we line up the last occurence
1945	// of that byte in our pattern under that byte, and try match again.
1946
1947	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1948	shift [byte_nbr] = pattern_size + 1;
1949
1950	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1951	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1952
1953	// Search for the string. If we don't find a match, move up by the
1954	// amount we computed in the shift table above, to find location of
1955	// the next potiental match.
1956
1957	limit = string + (string_size - pattern_size + 1);
1958	ASSERT (limit > string);
1959
1960	for (match_base = string;
1961	match_base < limit;
1962	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1963	{
1964	match_ptr = match_base;
1965	match_size = 0;
1966
1967	// Compare pattern until it all matches, or we find a difference
1968	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1969	{
1970	ASSERT (match_size <= pattern_size &&
1971	match_ptr == (match_base + match_size));
1972
1973	// If we found a match, return the start address
1974	if (match_size >= pattern_size)
1975	return ((char *)(match_base));
1976	}
1977	}
1978	return NULL; // Found nothing
1979	}
1980

Note: See TracBrowser for help on using the repository browser.

Download in other formats: