Context Navigation

source: branches/branch-1-0/src/helpers/stringh.c@ 231

Visit:

Last change on this file since 231 was 196, checked in by umoeller, 23 years ago
Misc fixes.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 59.3 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2002 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return pszReturn;
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return -1;
269	if (i > 0) return +1;
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return +1;
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return -1;
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return 0;
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return -1;
296	if (i > 0) return +1;
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return +1;
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return -1;
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return 0;
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344
345	return prc;
346	}
347
348	/*
349	*@@ strhncpy0:
350	* like strncpy, but always appends a 0 character.
351	*
352	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
353	*/
354
355	ULONG strhncpy0(PSZ pszTarget,
356	PCSZ pszSource,
357	ULONG cbSource)
358	{
359	ULONG ul = 0;
360	PSZ pTarget = pszTarget,
361	pSource;
362
363	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
364	{
365	for (ul = 0; ul < cbSource; ul++)
366	if (*pSource)
367	pTarget++ = pSource++;
368	else
369	break;
370	}
371
372	*pTarget = 0;
373
374	return ul;
375	}
376
377	/*
378	*@@ strhlen:
379	* like strlen, but doesn't crash on
380	* null strings, but returns 0 also.
381	*
382	*@@added V0.9.19 (2002-04-02) [umoeller]
383	*/
384
385	ULONG strhlen(PCSZ pcsz)
386	{
387	if (pcsz)
388	return strlen(pcsz);
389
390	return 0;
391	}
392
393	/*
394	*@@ strhSize:
395	* returns the size of the given string, which
396	* is the memory required to allocate a copy,
397	* including the null terminator.
398	*
399	* Returns 0 only if pcsz is NULL. If pcsz
400	* points to a null character, this returns 1.
401	*
402	*@@added V0.9.18 (2002-02-13) [umoeller]
403	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
404	*/
405
406	ULONG strhSize(PCSZ pcsz)
407	{
408	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
409	return (strlen(pcsz) + 1);
410
411	return 0;
412	}
413
414	/*
415	* strhCount:
416	* this counts the occurences of c in pszSearch.
417	*/
418
419	ULONG strhCount(PCSZ pszSearch,
420	CHAR c)
421	{
422	PSZ p = (PSZ)pszSearch;
423	ULONG ulCount = 0;
424	while (TRUE)
425	{
426	p = strchr(p, c);
427	if (p)
428	{
429	ulCount++;
430	p++;
431	}
432	else
433	break;
434	}
435	return ulCount;
436	}
437
438	/*
439	*@@ strhIsDecimal:
440	* returns TRUE if psz consists of decimal digits only.
441	*/
442
443	BOOL strhIsDecimal(PSZ psz)
444	{
445	PSZ p = psz;
446	while (*p != 0)
447	{
448	if (isdigit(*p) == 0)
449	return FALSE;
450	p++;
451	}
452
453	return TRUE;
454	}
455
456	#ifdef __DEBUG_MALLOC_ENABLED__
457
458	/*
459	*@@ strhSubstrDebug:
460	* memory debug version of strhSubstr.
461	*
462	*@@added V0.9.14 (2001-08-01) [umoeller]
463	*/
464
465	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
466	PCSZ pEnd, // in: last char (not included)
467	PCSZ pcszSourceFile,
468	unsigned long ulLine,
469	PCSZ pcszFunction)
470	{
471	PSZ pszSubstr = NULL;
472
473	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
474	{
475	ULONG cbSubstr = (pEnd - pBegin);
476	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
477	pcszSourceFile,
478	ulLine,
479	pcszFunction))
480	{
481	// strhncpy0(pszSubstr, pBegin, cbSubstr);
482	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
483	*(pszSubstr + cbSubstr) = '\0';
484	}
485	}
486
487	return pszSubstr;
488	}
489
490	#endif // __DEBUG_MALLOC_ENABLED__
491
492	/*
493	*@@ strhSubstr:
494	* this creates a new PSZ containing the string
495	* from pBegin to pEnd, excluding the pEnd character.
496	* The new string is null-terminated. The caller
497	* must free() the new string after use.
498	*
499	* Example:
500	+ "1234567890"
501	+ ^ ^
502	+ p1 p2
503	+ strhSubstr(p1, p2)
504	* would return a new string containing "2345678".
505	*
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
508	*/
509
510	PSZ strhSubstr(PCSZ pBegin, // in: first char
511	PCSZ pEnd) // in: last char (not included)
512	{
513	PSZ pszSubstr = NULL;
514
515	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
516	{
517	ULONG cbSubstr = (pEnd - pBegin);
518	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
519	{
520	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
521	*(pszSubstr + cbSubstr) = '\0';
522	}
523	}
524
525	return pszSubstr;
526	}
527
528	/*
529	*@@ strhExtract:
530	* searches pszBuf for the cOpen character and returns
531	* the data in between cOpen and cClose, excluding
532	* those two characters, in a newly allocated buffer
533	* which you must free() afterwards.
534	*
535	* Spaces and newlines/linefeeds are skipped.
536	*
537	* If the search was successful, the new buffer
538	* is returned and, if (ppEnd != NULL), *ppEnd points
539	* to the first character after the cClose character
540	* found in the buffer.
541	*
542	* If the search was not successful, NULL is
543	* returned, and *ppEnd is unchanged.
544	*
545	* If another cOpen character is found before
546	* cClose, matching cClose characters will be skipped.
547	* You can therefore nest the cOpen and cClose
548	* characters.
549	*
550	* This function ignores cOpen and cClose characters
551	* in C-style comments and strings surrounded by
552	* double quotes.
553	*
554	* Example:
555	*
556	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
557	+ pEnd;
558	+ strhExtract(pszBuf,
559	+ '{', '}',
560	+ &pEnd)
561	*
562	* would return a new buffer containing " --blah-- ",
563	* and ppEnd would afterwards point to the space
564	* before "next" in the static buffer.
565	*
566	*@@added V0.9.0 [umoeller]
567	*/
568
569	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
570	CHAR cOpen, // in: opening char
571	CHAR cClose, // in: closing char
572	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
573	{
574	PSZ pszReturn = NULL;
575	PCSZ pOpen;
576	if ( (pszBuf)
577	&& (pOpen = strchr(pszBuf, cOpen))
578	)
579	{
580	// opening char found:
581	// now go thru the whole rest of the buffer
582	PCSZ p = pOpen + 1;
583	LONG lLevel = 1; // if this goes 0, we're done
584	while (*p)
585	{
586	if (*p == cOpen)
587	lLevel++;
588	else if (*p == cClose)
589	{
590	lLevel--;
591	if (lLevel <= 0)
592	{
593	// matching closing bracket found:
594	// extract string
595	pszReturn = strhSubstr(pOpen + 1, // after cOpen
596	p); // excluding cClose
597	if (ppEnd)
598	*ppEnd = p + 1;
599	break; // while (*p)
600	}
601	}
602	else if (*p == '\"')
603	{
604	// beginning of string:
605	PCSZ p2 = p+1;
606	// find end of string
607	while ((p2) && (p2 != '\"'))
608	p2++;
609
610	if (*p2 == '\"')
611	// closing quote found:
612	// search on after that
613	p = p2; // raised below
614	else
615	break; // while (*p)
616	}
617
618	p++;
619	}
620	}
621
622	return pszReturn;
623	}
624
625	/*
626	*@@ strhQuote:
627	* similar to strhExtract, except that
628	* opening and closing chars are the same,
629	* and therefore no nesting is possible.
630	* Useful for extracting stuff between
631	* quotes.
632	*
633	*@@added V0.9.0 [umoeller]
634	*/
635
636	PSZ strhQuote(PSZ pszBuf,
637	CHAR cQuote,
638	PSZ *ppEnd)
639	{
640	PSZ pszReturn = NULL,
641	p1 = NULL;
642	if ((p1 = strchr(pszBuf, cQuote)))
643	{
644	PSZ p2;
645	if (p2 = strchr(p1+1, cQuote))
646	{
647	pszReturn = strhSubstr(p1+1, p2);
648	if (ppEnd)
649	// store closing char
650	*ppEnd = p2 + 1;
651	}
652	}
653
654	return pszReturn;
655	}
656
657	/*
658	*@@ strhStrip:
659	* removes all double spaces.
660	* This copies within the "psz" buffer.
661	* If any double spaces are found, the
662	* string will be shorter than before,
663	* but the buffer is _not_ reallocated,
664	* so there will be unused bytes at the
665	* end.
666	*
667	* Returns the number of spaces removed.
668	*
669	*@@added V0.9.0 [umoeller]
670	*/
671
672	ULONG strhStrip(PSZ psz) // in/out: string
673	{
674	PSZ p;
675	ULONG cb = strlen(psz),
676	ulrc = 0;
677
678	for (p = psz; p < psz+cb; p++)
679	{
680	if ((p == ' ') && ((p+1) == ' '))
681	{
682	PSZ p2 = p;
683	while (*p2)
684	{
685	p2 = (p2+1);
686	p2++;
687	}
688	cb--;
689	p--;
690	ulrc++;
691	}
692	}
693	return ulrc;
694	}
695
696	/*
697	*@@ strhins:
698	* this inserts one string into another.
699	*
700	* pszInsert is inserted into pszBuffer at offset
701	* ulInsertOfs (which counts from 0).
702	*
703	* A newly allocated string is returned. pszBuffer is
704	* not changed. The new string should be free()'d after
705	* use.
706	*
707	* Upon errors, NULL is returned.
708	*
709	*@@changed V0.9.0 [umoeller]: completely rewritten.
710	*/
711
712	PSZ strhins(PCSZ pcszBuffer,
713	ULONG ulInsertOfs,
714	PCSZ pcszInsert)
715	{
716	PSZ pszNew = NULL;
717
718	if ((pcszBuffer) && (pcszInsert))
719	{
720	do {
721	ULONG cbBuffer = strlen(pcszBuffer);
722	ULONG cbInsert = strlen(pcszInsert);
723
724	// check string length
725	if (ulInsertOfs > cbBuffer + 1)
726	break; // do
727
728	// OK, let's go.
729	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
730
731	// copy stuff before pInsertPos
732	memcpy(pszNew,
733	pcszBuffer,
734	ulInsertOfs);
735	// copy string to be inserted
736	memcpy(pszNew + ulInsertOfs,
737	pcszInsert,
738	cbInsert);
739	// copy stuff after pInsertPos
740	strcpy(pszNew + ulInsertOfs + cbInsert,
741	pcszBuffer + ulInsertOfs);
742	} while (FALSE);
743	}
744
745	return pszNew;
746	}
747
748	/*
749	*@@ strhFindReplace:
750	* wrapper around xstrFindReplace to work with C strings.
751	* Note that *ppszBuf can get reallocated and must
752	* be free()'able.
753	*
754	* Repetitive use of this wrapper is not recommended
755	* because it is considerably slower than xstrFindReplace.
756	*
757	*@@added V0.9.6 (2000-11-01) [umoeller]
758	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
759	*/
760
761	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
762	PULONG pulOfs, // in: where to begin search (0 = start);
763	// out: ofs of first char after replacement string
764	PCSZ pcszSearch, // in: search string; cannot be NULL
765	PCSZ pcszReplace) // in: replacement string; cannot be NULL
766	{
767	ULONG ulrc = 0;
768	XSTRING xstrBuf,
769	xstrFind,
770	xstrReplace;
771	size_t ShiftTable[256];
772	BOOL fRepeat = FALSE;
773	xstrInitSet(&xstrBuf, *ppszBuf);
774	// reallocated and returned, so we're safe
775	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
776	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
777	// these two are never freed, so we're safe too
778
779	if ((ulrc = xstrFindReplace(&xstrBuf,
780	pulOfs,
781	&xstrFind,
782	&xstrReplace,
783	ShiftTable,
784	&fRepeat)))
785	// replaced:
786	*ppszBuf = xstrBuf.psz;
787
788	return ulrc;
789	}
790
791	/*
792	* strhWords:
793	* returns the no. of words in "psz".
794	* A string is considered a "word" if
795	* it is surrounded by spaces only.
796	*
797	*@@added V0.9.0 [umoeller]
798	*/
799
800	ULONG strhWords(PSZ psz)
801	{
802	PSZ p;
803	ULONG cb = strlen(psz),
804	ulWords = 0;
805	if (cb > 1)
806	{
807	ulWords = 1;
808	for (p = psz; p < psz+cb; p++)
809	if (*p == ' ')
810	ulWords++;
811	}
812	return ulWords;
813	}
814
815	/*
816	*@@ strhGetWord:
817	* finds word boundaries.
818	*
819	* *ppszStart is used as the beginning of the
820	* search.
821	*
822	* If a word is found, *ppszStart is set to
823	* the first character of the word which was
824	* found and *ppszEnd receives the address
825	* of the first character _after_ the word,
826	* which is probably a space or a \n or \r char.
827	* We then return TRUE.
828	*
829	* The search is stopped if a null character
830	* is found or pLimit is reached. In that case,
831	* FALSE is returned.
832	*
833	*@@added V0.9.1 (2000-02-13) [umoeller]
834	*/
835
836	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
837	// out: start of word (if TRUE is returned)
838	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
839	// searched; if the word does not end before
840	// or with this char, FALSE is returned
841	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
842	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
843	PSZ *ppszEnd) // out: first char _after_ word
844	// (if TRUE is returned)
845	{
846	// characters after which a word can be started
847	// PCSZ pcszBeginChars = "\x0d\x0a ";
848	// PCSZ pcszEndChars = "\x0d\x0a /-";
849
850	PSZ pStart = *ppszStart;
851
852	// find start of word
853	while ( (pStart < (PSZ)pLimit)
854	&& (strchr(pcszBeginChars, *pStart))
855	)
856	// if char is a "before word" char: go for next
857	pStart++;
858
859	if (pStart < (PSZ)pLimit)
860	{
861	// found a valid "word start" character
862	// (which is not in pcszBeginChars):
863
864	// find end of word
865	PSZ pEndOfWord = pStart;
866	while ( (pEndOfWord <= (PSZ)pLimit)
867	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
868	)
869	// if char is not an "end word" char: go for next
870	pEndOfWord++;
871
872	if (pEndOfWord <= (PSZ)pLimit)
873	{
874	// whoa, got a word:
875	*ppszStart = pStart;
876	*ppszEnd = pEndOfWord;
877	return TRUE;
878	}
879	}
880
881	return FALSE;
882	}
883
884	/*
885	*@@ strhIsWord:
886	* returns TRUE if p points to a "word"
887	* in pcszBuf.
888	*
889	* p is considered a word if the character _before_
890	* it is in pcszBeginChars and the char _after_
891	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
892	*
893	*@@added V0.9.6 (2000-11-12) [umoeller]
894	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
895	*/
896
897	BOOL strhIsWord(PCSZ pcszBuf,
898	PCSZ p, // in: start of word
899	ULONG cbSearch, // in: length of word
900	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
901	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
902	{
903	// check previous char
904	if ( (p == pcszBuf)
905	\|\| (strchr(pcszBeginChars, *(p-1)))
906	)
907	{
908	// OK, valid begin char:
909	// check end char
910	CHAR cNextChar;
911	if (!(cNextChar = p[cbSearch]))
912	// null terminator:
913	return TRUE;
914	else
915	{
916	// not null terminator: check if char is
917	// in the list of valid end chars
918	if (strchr(pcszEndChars, cNextChar))
919	{
920	// OK, is end char: avoid doubles of that char,
921	// but allow spaces
922	// fixed V0.9.18 (2002-02-23) [umoeller]
923	CHAR cNextNext = p[cbSearch + 1];
924	if ( (cNextNext != cNextChar)
925	\|\| (cNextNext == ' ')
926	\|\| (cNextNext == 0)
927	)
928	return TRUE;
929	}
930	}
931	}
932
933	return FALSE;
934	}
935
936	/*
937	*@@ strhFindWord:
938	* searches for pszSearch in pszBuf, which is
939	* returned if found (or NULL if not).
940	*
941	* As opposed to strstr, this finds pszSearch
942	* only if it is a "word". A search string is
943	* considered a word if the character _before_
944	* it is in pcszBeginChars and the char _after_
945	* it is in pcszEndChars.
946	*
947	* Example:
948	+ strhFindWord("This is an example.", "is");
949	+ returns ...........^ this, but not the "is" in "This".
950	*
951	* The algorithm here uses strstr to find pszSearch in pszBuf
952	* and performs additional "is-word" checks for each item found
953	* (by calling strhIsWord).
954	*
955	* Note that this function is fairly slow compared to xstrFindWord.
956	*
957	*@@added V0.9.0 (99-11-08) [umoeller]
958	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
959	*/
960
961	PSZ strhFindWord(PCSZ pszBuf,
962	PCSZ pszSearch,
963	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
964	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
965	{
966	PSZ pszReturn = 0;
967	ULONG cbBuf = strlen(pszBuf),
968	cbSearch = strlen(pszSearch);
969
970	if ((cbBuf) && (cbSearch))
971	{
972	PCSZ p = pszBuf;
973
974	do // while p
975	{
976	p = strstr(p, pszSearch);
977	if (p)
978	{
979	// string found:
980	// check if that's a word
981
982	if (strhIsWord(pszBuf,
983	p,
984	cbSearch,
985	pcszBeginChars,
986	pcszEndChars))
987	{
988	// valid end char:
989	pszReturn = (PSZ)p;
990	break;
991	}
992
993	p += cbSearch;
994	}
995	} while (p);
996
997	}
998	return pszReturn;
999	}
1000
1001	/*
1002	*@@ strhFindEOL:
1003	* returns a pointer to the next \r, \n or null character
1004	* following pszSearchIn. Stores the offset in *pulOffset.
1005	*
1006	* This should never return NULL because at some point,
1007	* there will be a null byte in your string.
1008	*
1009	*@@added V0.9.4 (2000-07-01) [umoeller]
1010	*/
1011
1012	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1013	PULONG pulOffset) // out: offset (ptr can be NULL)
1014	{
1015	PCSZ p = pcszSearchIn,
1016	prc = 0;
1017	while (TRUE)
1018	{
1019	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1020	{
1021	prc = p;
1022	break;
1023	}
1024	p++;
1025	}
1026
1027	if ((pulOffset) && (prc))
1028	*pulOffset = prc - pcszSearchIn;
1029
1030	return ((PSZ)prc);
1031	}
1032
1033	/*
1034	*@@ strhFindNextLine:
1035	* like strhFindEOL, but this returns the character
1036	* _after_ \r or \n. Note that this might return
1037	* a pointer to terminating NULL character also.
1038	*/
1039
1040	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1041	{
1042	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1043	// pEOL now points to the \r char or the terminating 0 byte;
1044	// if not null byte, advance pointer
1045	PSZ pNextLine = pEOL;
1046	if (*pNextLine == '\r')
1047	pNextLine++;
1048	if (*pNextLine == '\n')
1049	pNextLine++;
1050	if (pulOffset)
1051	*pulOffset = pNextLine - pszSearchIn;
1052	return pNextLine;
1053	}
1054
1055	/*
1056	*@@ strhBeautifyTitle:
1057	* replaces all line breaks (0xd, 0xa) with spaces.
1058	* Returns the new length of the string or 0 on
1059	* errors.
1060	*
1061	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1062	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
1063	*/
1064
1065	ULONG strhBeautifyTitle(PSZ psz)
1066	{
1067	ULONG ulrc;
1068	PSZ p = psz;
1069
1070	while (*p)
1071	{
1072	if ( (*p == '\r')
1073	\|\| (*p == '\n')
1074	)
1075	{
1076	if ( (p != psz)
1077	&& (p[-1] == ' ')
1078	)
1079	memmove(p, p + 1, strlen(p));
1080	else
1081	*p++ = ' ';
1082	}
1083	else
1084	p++;
1085	}
1086
1087	return (p - psz);
1088	}
1089
1090	/*
1091	*@@ strhBeautifyTitle:
1092	* like strhBeautifyTitle, but copies into
1093	* a new buffer. More efficient.
1094	*
1095	*@@added V0.9.19 (2002-06-18) [umoeller]
1096	*/
1097
1098	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
1099	PCSZ pcszSource) // in: string to be beautified (can be NULL)
1100	{
1101	ULONG ulrc;
1102	PCSZ pSource = pcszSource;
1103	PSZ pTarget = pszTarget;
1104	CHAR c;
1105	if (!pcszSource)
1106	{
1107	*pszTarget = '\0';
1108	return 0;
1109	}
1110
1111	while (c = *pSource++)
1112	{
1113	if ( (c == '\r')
1114	\|\| (c == '\n')
1115	)
1116	{
1117	if ( (pTarget == pszTarget)
1118	\|\| (pTarget[-1] != ' ')
1119	)
1120	*pTarget++ = ' ';
1121	}
1122	else
1123	*pTarget++ = c;
1124	}
1125
1126	// null-terminate
1127	*pTarget = '\0';
1128
1129	return (pTarget - pszTarget);
1130	}
1131
1132	/*
1133	* strhFindAttribValue:
1134	* searches for pszAttrib in pszSearchIn; if found,
1135	* returns the first character after the "=" char.
1136	* If "=" is not found, a space, \r, and \n are
1137	* also accepted. This function searches without
1138	* respecting case.
1139	*
1140	* <B>Example:</B>
1141	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1142	+
1143	+ returns ....................... ^ this address.
1144	*
1145	*@@added V0.9.0 [umoeller]
1146	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1147	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1148	*/
1149
1150	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1151	{
1152	PSZ prc = 0;
1153	PSZ pszSearchIn2, p;
1154	ULONG cbAttrib = strlen(pszAttrib),
1155	ulLength = strlen(pszSearchIn);
1156
1157	// use alloca(), so memory is freed on function exit
1158	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1159	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1160
1161	// 1) find token, (space char, \n, \r, \t)
1162	p = strtok(pszSearchIn2, " \n\r\t");
1163	while (p)
1164	{
1165	CHAR c2;
1166	PSZ pOrig;
1167
1168	// check tag name
1169	if (!strnicmp(p, pszAttrib, cbAttrib))
1170	{
1171	// position in original string
1172	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1173
1174	// yes:
1175	prc = pOrig + cbAttrib;
1176	c2 = *prc;
1177	while ( ( (c2 == ' ')
1178	\|\| (c2 == '=')
1179	\|\| (c2 == '\n')
1180	\|\| (c2 == '\r')
1181	)
1182	&& (c2 != 0)
1183	)
1184	c2 = *++prc;
1185
1186	break;
1187	}
1188
1189	p = strtok(NULL, " \n\r\t");
1190	}
1191
1192	return prc;
1193	}
1194
1195	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1196	{
1197	PSZ prc = 0;
1198	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1199	p,
1200	p2;
1201	ULONG cbAttrib = strlen(pszAttrib);
1202
1203	// 1) find space char
1204	while ((p = strchr(pszSearchIn2, ' ')))
1205	{
1206	CHAR c;
1207	p++;
1208	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1209	{
1210	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1211	// now check whether the p+strlen(pszAttrib)
1212	// is a valid end-of-tag character
1213	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1214	&& ( (c == ' ')
1215	\|\| (c == '>')
1216	\|\| (c == '=')
1217	\|\| (c == '\r')
1218	\|\| (c == '\n')
1219	\|\| (c == 0)
1220	)
1221	)
1222	{
1223	// yes:
1224	CHAR c2;
1225	p2 = p + cbAttrib;
1226	c2 = *p2;
1227	while ( ( (c2 == ' ')
1228	\|\| (c2 == '=')
1229	\|\| (c2 == '\n')
1230	\|\| (c2 == '\r')
1231	)
1232	&& (c2 != 0)
1233	)
1234	c2 = *++p2;
1235
1236	prc = p2;
1237	break; // first while
1238	}
1239	}
1240	else
1241	break;
1242
1243	pszSearchIn2++;
1244	}
1245	return prc;
1246	} */
1247
1248	/*
1249	* strhGetNumAttribValue:
1250	* stores the numerical parameter value of an HTML-style
1251	* tag in *pl.
1252	*
1253	* Returns the address of the tag parameter in the
1254	* search buffer, if found, or NULL.
1255	*
1256	* <B>Example:</B>
1257	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1258	*
1259	* stores 123 in the "l" variable.
1260	*
1261	*@@added V0.9.0 [umoeller]
1262	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1263	*/
1264
1265	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1266	const char *pszTag, // e.g. "INDEX"
1267	PLONG pl) // out: numerical value
1268	{
1269	PSZ pParam;
1270	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1271	{
1272	if ( (*pParam == '\"')
1273	\|\| (*pParam == '\'')
1274	)
1275	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1276
1277	sscanf(pParam, "%ld", pl);
1278	}
1279
1280	return pParam;
1281	}
1282
1283	/*
1284	* strhGetTextAttr:
1285	* retrieves the attribute value of a textual HTML-style tag
1286	* in a newly allocated buffer, which is returned,
1287	* or NULL if attribute not found.
1288	* If an attribute value is to contain spaces, it
1289	* must be enclosed in quotes.
1290	*
1291	* The offset of the attribute data in pszSearchIn is
1292	* returned in *pulOffset so that you can do multiple
1293	* searches.
1294	*
1295	* This returns a new buffer, which should be free()'d after use.
1296	*
1297	* <B>Example:</B>
1298	+ ULONG ulOfs = 0;
1299	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1300	+ ............^ ulOfs
1301	*
1302	* returns a new string with the value "blublub" (without
1303	* quotes) and sets ulOfs to 12.
1304	*
1305	*@@added V0.9.0 [umoeller]
1306	*/
1307
1308	PSZ strhGetTextAttr(const char *pszSearchIn,
1309	const char *pszTag,
1310	PULONG pulOffset) // out: offset where found
1311	{
1312	PSZ pParam,
1313	pParam2,
1314	prc = NULL;
1315	ULONG ulCount = 0;
1316	LONG lNestingLevel = 0;
1317
1318	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1319	{
1320	// determine end character to search for: a space
1321	CHAR cEnd = ' ';
1322	if (*pParam == '\"')
1323	{
1324	// or, if the data is enclosed in quotes, a quote
1325	cEnd = '\"';
1326	pParam++;
1327	}
1328
1329	if (pulOffset)
1330	// store the offset
1331	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1332
1333	// now find end of attribute
1334	pParam2 = pParam;
1335	while (*pParam)
1336	{
1337	if (*pParam == cEnd)
1338	// end character found
1339	break;
1340	else if (*pParam == '<')
1341	// yet another opening tag found:
1342	// this is probably some "<" in the attributes
1343	lNestingLevel++;
1344	else if (*pParam == '>')
1345	{
1346	lNestingLevel--;
1347	if (lNestingLevel < 0)
1348	// end of tag found:
1349	break;
1350	}
1351	ulCount++;
1352	pParam++;
1353	}
1354
1355	// copy attribute to new buffer
1356	if (ulCount)
1357	{
1358	prc = (PSZ)malloc(ulCount+1);
1359	memcpy(prc, pParam2, ulCount);
1360	*(prc+ulCount) = 0;
1361	}
1362	}
1363	return prc;
1364	}
1365
1366	/*
1367	* strhFindEndOfTag:
1368	* returns a pointer to the ">" char
1369	* which seems to terminate the tag beginning
1370	* after pszBeginOfTag.
1371	*
1372	* If additional "<" chars are found, we look
1373	* for additional ">" characters too.
1374	*
1375	* Note: You must pass the address of the opening
1376	* '<' character to this function.
1377	*
1378	* Example:
1379	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1380	+ strhFindEndOfTag(pszTest)
1381	+ returns.................................^ this.
1382	*
1383	*@@added V0.9.0 [umoeller]
1384	*/
1385
1386	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1387	{
1388	PSZ p = (PSZ)pszBeginOfTag,
1389	prc = NULL;
1390	LONG lNestingLevel = 0;
1391
1392	while (*p)
1393	{
1394	if (*p == '<')
1395	// another opening tag found:
1396	lNestingLevel++;
1397	else if (*p == '>')
1398	{
1399	// closing tag found:
1400	lNestingLevel--;
1401	if (lNestingLevel < 1)
1402	{
1403	// corresponding: return this
1404	prc = p;
1405	break;
1406	}
1407	}
1408	p++;
1409	}
1410
1411	return prc;
1412	}
1413
1414	/*
1415	* strhGetBlock:
1416	* this complex function searches the given string
1417	* for a pair of opening/closing HTML-style tags.
1418	*
1419	* If found, this routine returns TRUE and does
1420	* the following:
1421	*
1422	* 1) allocate a new buffer, copy the text
1423	* enclosed by the opening/closing tags
1424	* into it and set *ppszBlock to that
1425	* buffer;
1426	*
1427	* 2) if the opening tag has any attributes,
1428	* allocate another buffer, copy the
1429	* attributes into it and set *ppszAttrs
1430	* to that buffer; if no attributes are
1431	* found, *ppszAttrs will be NULL;
1432	*
1433	* 3) set *pulOffset to the offset from the
1434	* beginning of *ppszSearchIn where the
1435	* opening tag was found;
1436	*
1437	* 4) advance *ppszSearchIn to after the
1438	* closing tag, so that you can do
1439	* multiple searches without finding the
1440	* same tags twice.
1441	*
1442	* All buffers should be freed using free().
1443	*
1444	* This returns the following:
1445	* -- 0: no error
1446	* -- 1: tag not found at all (doesn't have to be an error)
1447	* -- 2: begin tag found, but no corresponding end tag found. This
1448	* is a real error.
1449	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1450	*
1451	* <B>Example:</B>
1452	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1453	+ PSZ pszBlock, pszAttrs;
1454	+ ULONG ulOfs;
1455	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1456	*
1457	* would do the following:
1458	*
1459	* 1) set pszBlock to a new string containing "This is page 1."
1460	* without quotes;
1461	*
1462	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1463	*
1464	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1465	*
1466	* 4) pSearch would be advanced to point to the "More text"
1467	* string in the original buffer.
1468	*
1469	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1470	* for HTML parsing.
1471	*
1472	*@@added V0.9.0 [umoeller]
1473	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1474	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1475	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1476	*/
1477
1478	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1479	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1480	const char *pszTag,
1481	PSZ *ppszBlock, // out: block enclosed by the tags
1482	PSZ *ppszAttribs, // out: attributes of the opening tag
1483	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1484	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1485	{
1486	ULONG ulrc = 1;
1487	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1488	pszSearch2 = pszBeginTag,
1489	pszClosingTag;
1490	ULONG cbTag = strlen(pszTag);
1491
1492	// go thru the block and check all tags if it's the
1493	// begin tag we're looking for
1494	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1495	{
1496	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1497	// yes: stop
1498	break;
1499	else
1500	pszBeginTag++;
1501	}
1502
1503	if (pszBeginTag)
1504	{
1505	// we found <TAG>:
1506	ULONG ulNestingLevel = 0;
1507
1508	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1509	// strchr(pszBeginTag, '>');
1510	if (pszEndOfBeginTag)
1511	{
1512	// does the caller want the attributes?
1513	if (ppszAttribs)
1514	{
1515	// yes: then copy them
1516	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1517	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1518	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1519	// add terminating 0
1520	*(pszAttrs + ulAttrLen) = 0;
1521
1522	*ppszAttribs = pszAttrs;
1523	}
1524
1525	// output offset of where we found the begin tag
1526	if (pulOfsBeginTag)
1527	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1528
1529	// now find corresponding closing tag (e.g. "</BODY>"
1530	pszBeginTag = pszEndOfBeginTag+1;
1531	// now we're behind the '>' char of the opening tag
1532	// increase offset of that too
1533	if (pulOfsBeginBlock)
1534	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1535
1536	// find next closing tag;
1537	// for the first run, pszSearch2 points to right
1538	// after the '>' char of the opening tag
1539	pszSearch2 = pszBeginTag;
1540	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1541	&& (pszClosingTag = strstr(pszSearch2, "<"))
1542	)
1543	{
1544	// if we have another opening tag before our closing
1545	// tag, we need to have several closing tags before
1546	// we're done
1547	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1548	ulNestingLevel++;
1549	else
1550	{
1551	// is this ours?
1552	if ( (*(pszClosingTag+1) == '/')
1553	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1554	)
1555	{
1556	// we've found a matching closing tag; is
1557	// it ours?
1558	if (ulNestingLevel == 0)
1559	{
1560	// our closing tag found:
1561	// allocate mem for a new buffer
1562	// and extract all the text between
1563	// open and closing tags to it
1564	ULONG ulLen = pszClosingTag - pszBeginTag;
1565	if (ppszBlock)
1566	{
1567	PSZ pNew = (PSZ)malloc(ulLen + 1);
1568	strhncpy0(pNew, pszBeginTag, ulLen);
1569	*ppszBlock = pNew;
1570	}
1571
1572	// raise search offset to after the closing tag
1573	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1574
1575	ulrc = 0;
1576
1577	break;
1578	} else
1579	// not our closing tag:
1580	ulNestingLevel--;
1581	}
1582	}
1583	// no matching closing tag: search on after that
1584	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1585	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1586
1587	if (!pszClosingTag)
1588	// no matching closing tag found:
1589	// return 2 (closing tag not found)
1590	ulrc = 2;
1591	} // end if (pszBeginTag)
1592	else
1593	// no matching ">" for opening tag found:
1594	ulrc = 3;
1595	}
1596
1597	return ulrc;
1598	}
1599
1600	/* ******************************************************************
1601	*
1602	* Miscellaneous
1603	*
1604	********************************************************************/
1605
1606	/*
1607	*@@ strhArrayAppend:
1608	* this appends a string to a "string array".
1609	*
1610	* A string array is considered a sequence of
1611	* zero-terminated strings in memory. That is,
1612	* after each string's null-byte, the next
1613	* string comes up.
1614	*
1615	* This is useful for composing a single block
1616	* of memory from, say, list box entries, which
1617	* can then be written to OS2.INI in one flush.
1618	*
1619	* To append strings to such an array, call this
1620	* function for each string you wish to append.
1621	* This will re-allocate *ppszRoot with each call,
1622	* and update *pcbRoot, which then contains the
1623	* total size of all strings (including all null
1624	* terminators).
1625	*
1626	* Pass *pcbRoot to PrfSaveProfileData to have the
1627	* block saved.
1628	*
1629	* Note: On the first call, ppszRoot and pcbRoot
1630	* _must_ be both NULL, or this crashes.
1631	*
1632	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1633	*/
1634
1635	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1636	const char *pcszNew, // in: string to append
1637	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1638	PULONG pcbRoot) // in/out: size of array
1639	{
1640	PSZ pszTemp;
1641
1642	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1643	cbNew = strlen(pcszNew);
1644
1645	pszTemp = (PSZ)malloc(*pcbRoot
1646	+ cbNew
1647	+ 1); // two null bytes
1648	if (*ppszRoot)
1649	{
1650	// not first loop: copy old stuff
1651	memcpy(pszTemp,
1652	*ppszRoot,
1653	*pcbRoot);
1654	free(*ppszRoot);
1655	}
1656	// append new string
1657	strcpy(pszTemp + *pcbRoot,
1658	pcszNew);
1659	// update root
1660	*ppszRoot = pszTemp;
1661	// update length
1662	*pcbRoot += cbNew + 1;
1663	}
1664
1665	/*
1666	*@@ strhCreateDump:
1667	* this dumps a memory block into a string
1668	* and returns that string in a new buffer.
1669	*
1670	* You must free() the returned PSZ after use.
1671	*
1672	* The output looks like the following:
1673	*
1674	+ 0000: FE FF 0E 02 90 00 00 00 ........
1675	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1676	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1677	*
1678	* Each line is terminated with a newline (\n)
1679	* character only.
1680	*
1681	*@@added V0.9.1 (2000-01-22) [umoeller]
1682	*/
1683
1684	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1685	ULONG ulSize, // in: size of buffer
1686	ULONG ulIndent) // in: indentation of every line
1687	{
1688	PSZ pszReturn = 0;
1689	XSTRING strReturn;
1690	CHAR szTemp[1000];
1691
1692	PBYTE pbCurrent = pb; // current byte
1693	ULONG ulCount = 0,
1694	ulCharsInLine = 0; // if this grows > 7, a new line is started
1695	CHAR szLine[400] = "",
1696	szAscii[30] = " "; // ASCII representation; filled for every line
1697	PSZ pszLine = szLine,
1698	pszAscii = szAscii;
1699
1700	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1701
1702	for (pbCurrent = pb;
1703	ulCount < ulSize;
1704	pbCurrent++, ulCount++)
1705	{
1706	if (ulCharsInLine == 0)
1707	{
1708	memset(szLine, ' ', ulIndent);
1709	pszLine += ulIndent;
1710	}
1711	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1712
1713	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1714	// printable character:
1715	pszAscii = pbCurrent;
1716	else
1717	*pszAscii = '.';
1718	pszAscii++;
1719
1720	ulCharsInLine++;
1721	if ( (ulCharsInLine > 7) // 8 bytes added?
1722	\|\| (ulCount == ulSize-1) // end of buffer reached?
1723	)
1724	{
1725	// if we haven't had eight bytes yet,
1726	// fill buffer up to eight bytes with spaces
1727	ULONG ul2;
1728	for (ul2 = ulCharsInLine;
1729	ul2 < 8;
1730	ul2++)
1731	pszLine += sprintf(pszLine, " ");
1732
1733	sprintf(szTemp, "%04lX: %s %s\n",
1734	(ulCount & 0xFFFFFFF8), // offset in hex
1735	szLine, // bytes string
1736	szAscii); // ASCII string
1737	xstrcat(&strReturn, szTemp, 0);
1738
1739	// restart line buffer
1740	pszLine = szLine;
1741
1742	// clear ASCII buffer
1743	strcpy(szAscii, " ");
1744	pszAscii = szAscii;
1745
1746	// reset line counter
1747	ulCharsInLine = 0;
1748	}
1749	}
1750
1751	if (strReturn.cbAllocated)
1752	pszReturn = strReturn.psz;
1753
1754	return pszReturn;
1755	}
1756
1757	/* ******************************************************************
1758	*
1759	* Fast string searches
1760	*
1761	********************************************************************/
1762
1763	#define ASSERT(a)
1764
1765	/*
1766	* The following code has been taken from the "Standard
1767	* Function Library", file sflfind.c, and only slightly
1768	* modified to conform to the rest of this file.
1769	*
1770	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1771	* Revised: 98/05/04
1772	*
1773	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1774	*
1775	* The SFL Licence allows incorporating SFL code into other
1776	* programs, as long as the copyright is reprinted and the
1777	* code is marked as modified, so this is what we do.
1778	*/
1779
1780	/*
1781	*@@ strhmemfind:
1782	* searches for a pattern in a block of memory using the
1783	* Boyer-Moore-Horspool-Sunday algorithm.
1784	*
1785	* The block and pattern may contain any values; you must
1786	* explicitly provide their lengths. If you search for strings,
1787	* use strlen() on the buffers.
1788	*
1789	* Returns a pointer to the pattern if found within the block,
1790	* or NULL if the pattern was not found.
1791	*
1792	* This algorithm needs a "shift table" to cache data for the
1793	* search pattern. This table can be reused when performing
1794	* several searches with the same pattern.
1795	*
1796	* "shift" must point to an array big enough to hold 256 (8**2)
1797	* "size_t" values.
1798	*
1799	* If (*repeat_find == FALSE), the shift table is initialized.
1800	* So on the first search with a given pattern, *repeat_find
1801	* should be FALSE. This function sets it to TRUE after the
1802	* shift table is initialised, allowing the initialisation
1803	* phase to be skipped on subsequent searches.
1804	*
1805	* This function is most effective when repeated searches are
1806	* made for the same pattern in one or more large buffers.
1807	*
1808	* Example:
1809	*
1810	+ PSZ pszHaystack = "This is a sample string.",
1811	+ pszNeedle = "string";
1812	+ size_t shift[256];
1813	+ BOOL fRepeat = FALSE;
1814	+
1815	+ PSZ pFound = strhmemfind(pszHaystack,
1816	+ strlen(pszHaystack), // block size
1817	+ pszNeedle,
1818	+ strlen(pszNeedle), // pattern size
1819	+ shift,
1820	+ &fRepeat);
1821	*
1822	* Taken from the "Standard Function Library", file sflfind.c.
1823	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1824	* Slightly modified by umoeller.
1825	*
1826	*@@added V0.9.3 (2000-05-08) [umoeller]
1827	*/
1828
1829	void* strhmemfind(const void *in_block, // in: block containing data
1830	size_t block_size, // in: size of block in bytes
1831	const void *in_pattern, // in: pattern to search for
1832	size_t pattern_size, // in: size of pattern block
1833	size_t *shift, // in/out: shift table (search buffer)
1834	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1835	{
1836	size_t byte_nbr, // Distance through block
1837	match_size; // Size of matched part
1838	const unsigned char
1839	*match_base = NULL, // Base of match of pattern
1840	*match_ptr = NULL, // Point within current match
1841	*limit = NULL; // Last potiental match point
1842	const unsigned char
1843	block = (unsigned char ) in_block, // Concrete pointer to block data
1844	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1845
1846	if ( (block == NULL)
1847	\|\| (pattern == NULL)
1848	\|\| (shift == NULL)
1849	)
1850	return NULL;
1851
1852	// Pattern must be smaller or equal in size to string
1853	if (block_size < pattern_size)
1854	return NULL; // Otherwise it's not found
1855
1856	if (pattern_size == 0) // Empty patterns match at start
1857	return ((void *)block);
1858
1859	// Build the shift table unless we're continuing a previous search
1860
1861	// The shift table determines how far to shift before trying to match
1862	// again, if a match at this point fails. If the byte after where the
1863	// end of our pattern falls is not in our pattern, then we start to
1864	// match again after that byte; otherwise we line up the last occurence
1865	// of that byte in our pattern under that byte, and try match again.
1866
1867	if (!repeat_find \|\| !*repeat_find)
1868	{
1869	for (byte_nbr = 0;
1870	byte_nbr < 256;
1871	byte_nbr++)
1872	shift[byte_nbr] = pattern_size + 1;
1873	for (byte_nbr = 0;
1874	byte_nbr < pattern_size;
1875	byte_nbr++)
1876	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1877
1878	if (repeat_find)
1879	*repeat_find = TRUE;
1880	}
1881
1882	// Search for the block, each time jumping up by the amount
1883	// computed in the shift table
1884
1885	limit = block + (block_size - pattern_size + 1);
1886	ASSERT (limit > block);
1887
1888	for (match_base = block;
1889	match_base < limit;
1890	match_base += shift[*(match_base + pattern_size)])
1891	{
1892	match_ptr = match_base;
1893	match_size = 0;
1894
1895	// Compare pattern until it all matches, or we find a difference
1896	while (*match_ptr++ == pattern[match_size++])
1897	{
1898	ASSERT (match_size <= pattern_size &&
1899	match_ptr == (match_base + match_size));
1900
1901	// If we found a match, return the start address
1902	if (match_size >= pattern_size)
1903	return ((void*)(match_base));
1904
1905	}
1906	}
1907	return NULL; // Found nothing
1908	}
1909
1910	/*
1911	*@@ strhtxtfind:
1912	* searches for a case-insensitive text pattern in a string
1913	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1914	* pattern are null-terminated strings. Returns a pointer to the pattern
1915	* if found within the string, or NULL if the pattern was not found.
1916	* Will match strings irrespective of case. To match exact strings, use
1917	* strhfind(). Will not work on multibyte characters.
1918	*
1919	* Examples:
1920	+ char *result;
1921	+
1922	+ result = strhtxtfind ("AbracaDabra", "cad");
1923	+ if (result)
1924	+ puts (result);
1925	+
1926	* Taken from the "Standard Function Library", file sflfind.c.
1927	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1928	* Slightly modified.
1929	*
1930	*@@added V0.9.3 (2000-05-08) [umoeller]
1931	*/
1932
1933	char* strhtxtfind (const char *string, // String containing data
1934	const char *pattern) // Pattern to search for
1935	{
1936	size_t
1937	shift [256]; // Shift distance for each value
1938	size_t
1939	string_size,
1940	pattern_size,
1941	byte_nbr, // Index into byte array
1942	match_size; // Size of matched part
1943	const char
1944	*match_base = NULL, // Base of match of pattern
1945	*match_ptr = NULL, // Point within current match
1946	*limit = NULL; // Last potiental match point
1947
1948	ASSERT (string); // Expect non-NULL pointers, but
1949	ASSERT (pattern); // fail gracefully if not debugging
1950	if (string == NULL \|\| pattern == NULL)
1951	return NULL;
1952
1953	string_size = strlen (string);
1954	pattern_size = strlen (pattern);
1955
1956	// Pattern must be smaller or equal in size to string
1957	if (string_size < pattern_size)
1958	return NULL; // Otherwise it cannot be found
1959
1960	if (pattern_size == 0) // Empty string matches at start
1961	return (char *) string;
1962
1963	// Build the shift table
1964
1965	// The shift table determines how far to shift before trying to match
1966	// again, if a match at this point fails. If the byte after where the
1967	// end of our pattern falls is not in our pattern, then we start to
1968	// match again after that byte; otherwise we line up the last occurence
1969	// of that byte in our pattern under that byte, and try match again.
1970
1971	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1972	shift [byte_nbr] = pattern_size + 1;
1973
1974	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1975	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1976
1977	// Search for the string. If we don't find a match, move up by the
1978	// amount we computed in the shift table above, to find location of
1979	// the next potiental match.
1980
1981	limit = string + (string_size - pattern_size + 1);
1982	ASSERT (limit > string);
1983
1984	for (match_base = string;
1985	match_base < limit;
1986	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1987	{
1988	match_ptr = match_base;
1989	match_size = 0;
1990
1991	// Compare pattern until it all matches, or we find a difference
1992	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1993	{
1994	ASSERT (match_size <= pattern_size &&
1995	match_ptr == (match_base + match_size));
1996
1997	// If we found a match, return the start address
1998	if (match_size >= pattern_size)
1999	return ((char *)(match_base));
2000	}
2001	}
2002	return NULL; // Found nothing
2003	}
2004

Note: See TracBrowser for help on using the repository browser.

Download in other formats: