Context Navigation

source: branches/branch-1-0/src/helpers/stringh.c@ 302

Visit:

Last change on this file since 302 was 263, checked in by pr, 21 years ago
Fixes WarpIN bug 461
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 59.6 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2002 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return pszReturn;
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return -1;
269	if (i > 0) return +1;
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return +1;
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return -1;
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return 0;
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return -1;
296	if (i > 0) return +1;
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return +1;
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return -1;
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return 0;
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344
345	return prc;
346	}
347
348	/*
349	*@@ strhncpy0:
350	* like strncpy, but always appends a 0 character.
351	*
352	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
353	*/
354
355	ULONG strhncpy0(PSZ pszTarget,
356	PCSZ pszSource,
357	ULONG cbSource)
358	{
359	ULONG ul = 0;
360	PSZ pTarget = pszTarget,
361	pSource;
362
363	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
364	{
365	for (ul = 0; ul < cbSource; ul++)
366	if (*pSource)
367	pTarget++ = pSource++;
368	else
369	break;
370	}
371
372	*pTarget = 0;
373
374	return ul;
375	}
376
377	/*
378	*@@ strhlen:
379	* like strlen, but doesn't crash on
380	* null strings, but returns 0 also.
381	*
382	*@@added V0.9.19 (2002-04-02) [umoeller]
383	*/
384
385	ULONG strhlen(PCSZ pcsz)
386	{
387	if (pcsz)
388	return strlen(pcsz);
389
390	return 0;
391	}
392
393	/*
394	*@@ strhSize:
395	* returns the size of the given string, which
396	* is the memory required to allocate a copy,
397	* including the null terminator.
398	*
399	* Returns 0 only if pcsz is NULL. If pcsz
400	* points to a null character, this returns 1.
401	*
402	*@@added V0.9.18 (2002-02-13) [umoeller]
403	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
404	*/
405
406	ULONG strhSize(PCSZ pcsz)
407	{
408	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
409	return (strlen(pcsz) + 1);
410
411	return 0;
412	}
413
414	/*
415	* strhCount:
416	* this counts the occurences of c in pszSearch.
417	*/
418
419	ULONG strhCount(PCSZ pszSearch,
420	CHAR c)
421	{
422	PSZ p = (PSZ)pszSearch;
423	ULONG ulCount = 0;
424	while (TRUE)
425	{
426	p = strchr(p, c);
427	if (p)
428	{
429	ulCount++;
430	p++;
431	}
432	else
433	break;
434	}
435	return ulCount;
436	}
437
438	/*
439	*@@ strhIsDecimal:
440	* returns TRUE if psz consists of decimal digits only.
441	*/
442
443	BOOL strhIsDecimal(PSZ psz)
444	{
445	PSZ p = psz;
446	while (*p != 0)
447	{
448	if (isdigit(*p) == 0)
449	return FALSE;
450	p++;
451	}
452
453	return TRUE;
454	}
455
456	#ifdef __DEBUG_MALLOC_ENABLED__
457
458	/*
459	*@@ strhSubstrDebug:
460	* memory debug version of strhSubstr.
461	*
462	*@@added V0.9.14 (2001-08-01) [umoeller]
463	*/
464
465	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
466	PCSZ pEnd, // in: last char (not included)
467	PCSZ pcszSourceFile,
468	unsigned long ulLine,
469	PCSZ pcszFunction)
470	{
471	PSZ pszSubstr = NULL;
472
473	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
474	{
475	ULONG cbSubstr = (pEnd - pBegin);
476	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
477	pcszSourceFile,
478	ulLine,
479	pcszFunction))
480	{
481	// strhncpy0(pszSubstr, pBegin, cbSubstr);
482	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
483	*(pszSubstr + cbSubstr) = '\0';
484	}
485	}
486
487	return pszSubstr;
488	}
489
490	#endif // __DEBUG_MALLOC_ENABLED__
491
492	/*
493	*@@ strhSubstr:
494	* this creates a new PSZ containing the string
495	* from pBegin to pEnd, excluding the pEnd character.
496	* The new string is null-terminated. The caller
497	* must free() the new string after use.
498	*
499	* Example:
500	+ "1234567890"
501	+ ^ ^
502	+ p1 p2
503	+ strhSubstr(p1, p2)
504	* would return a new string containing "2345678".
505	*
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
508	*/
509
510	PSZ strhSubstr(PCSZ pBegin, // in: first char
511	PCSZ pEnd) // in: last char (not included)
512	{
513	PSZ pszSubstr = NULL;
514
515	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
516	{
517	ULONG cbSubstr = (pEnd - pBegin);
518	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
519	{
520	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
521	*(pszSubstr + cbSubstr) = '\0';
522	}
523	}
524
525	return pszSubstr;
526	}
527
528	/*
529	*@@ strhExtract:
530	* searches pszBuf for the cOpen character and returns
531	* the data in between cOpen and cClose, excluding
532	* those two characters, in a newly allocated buffer
533	* which you must free() afterwards.
534	*
535	* Spaces and newlines/linefeeds are skipped.
536	*
537	* If the search was successful, the new buffer
538	* is returned and, if (ppEnd != NULL), *ppEnd points
539	* to the first character after the cClose character
540	* found in the buffer.
541	*
542	* If the search was not successful, NULL is
543	* returned, and *ppEnd is unchanged.
544	*
545	* If another cOpen character is found before
546	* cClose, matching cClose characters will be skipped.
547	* You can therefore nest the cOpen and cClose
548	* characters.
549	*
550	* This function ignores cOpen and cClose characters
551	* in C-style comments and strings surrounded by
552	* double quotes.
553	*
554	* Example:
555	*
556	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
557	+ pEnd;
558	+ strhExtract(pszBuf,
559	+ '{', '}',
560	+ &pEnd)
561	*
562	* would return a new buffer containing " --blah-- ",
563	* and ppEnd would afterwards point to the space
564	* before "next" in the static buffer.
565	*
566	*@@added V0.9.0 [umoeller]
567	*/
568
569	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
570	CHAR cOpen, // in: opening char
571	CHAR cClose, // in: closing char
572	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
573	{
574	PSZ pszReturn = NULL;
575	PCSZ pOpen;
576	if ( (pszBuf)
577	&& (pOpen = strchr(pszBuf, cOpen))
578	)
579	{
580	// opening char found:
581	// now go thru the whole rest of the buffer
582	PCSZ p = pOpen + 1;
583	LONG lLevel = 1; // if this goes 0, we're done
584	while (*p)
585	{
586	if (*p == cOpen)
587	lLevel++;
588	else if (*p == cClose)
589	{
590	lLevel--;
591	if (lLevel <= 0)
592	{
593	// matching closing bracket found:
594	// extract string
595	pszReturn = strhSubstr(pOpen + 1, // after cOpen
596	p); // excluding cClose
597	if (ppEnd)
598	*ppEnd = p + 1;
599	break; // while (*p)
600	}
601	}
602	else if (*p == '\"')
603	{
604	// beginning of string:
605	PCSZ p2 = p+1;
606	// find end of string
607	while ((p2) && (p2 != '\"'))
608	p2++;
609
610	if (*p2 == '\"')
611	// closing quote found:
612	// search on after that
613	p = p2; // raised below
614	else
615	break; // while (*p)
616	}
617
618	p++;
619	}
620	}
621
622	return pszReturn;
623	}
624
625	/*
626	*@@ strhQuote:
627	* similar to strhExtract, except that
628	* opening and closing chars are the same,
629	* and therefore no nesting is possible.
630	* Useful for extracting stuff between
631	* quotes.
632	*
633	*@@added V0.9.0 [umoeller]
634	*/
635
636	PSZ strhQuote(PSZ pszBuf,
637	CHAR cQuote,
638	PSZ *ppEnd)
639	{
640	PSZ pszReturn = NULL,
641	p1 = NULL;
642	if ((p1 = strchr(pszBuf, cQuote)))
643	{
644	PSZ p2;
645	if (p2 = strchr(p1+1, cQuote))
646	{
647	pszReturn = strhSubstr(p1+1, p2);
648	if (ppEnd)
649	// store closing char
650	*ppEnd = p2 + 1;
651	}
652	}
653
654	return pszReturn;
655	}
656
657	/*
658	*@@ strhStrip:
659	* removes all double spaces.
660	* This copies within the "psz" buffer.
661	* If any double spaces are found, the
662	* string will be shorter than before,
663	* but the buffer is _not_ reallocated,
664	* so there will be unused bytes at the
665	* end.
666	*
667	* Returns the number of spaces removed.
668	*
669	*@@added V0.9.0 [umoeller]
670	*/
671
672	ULONG strhStrip(PSZ psz) // in/out: string
673	{
674	PSZ p;
675	ULONG cb = strlen(psz),
676	ulrc = 0;
677
678	for (p = psz; p < psz+cb; p++)
679	{
680	if ((p == ' ') && ((p+1) == ' '))
681	{
682	PSZ p2 = p;
683	while (*p2)
684	{
685	p2 = (p2+1);
686	p2++;
687	}
688	cb--;
689	p--;
690	ulrc++;
691	}
692	}
693	return ulrc;
694	}
695
696	/*
697	*@@ strhins:
698	* this inserts one string into another.
699	*
700	* pszInsert is inserted into pszBuffer at offset
701	* ulInsertOfs (which counts from 0).
702	*
703	* A newly allocated string is returned. pszBuffer is
704	* not changed. The new string should be free()'d after
705	* use.
706	*
707	* Upon errors, NULL is returned.
708	*
709	*@@changed V0.9.0 [umoeller]: completely rewritten.
710	*/
711
712	PSZ strhins(PCSZ pcszBuffer,
713	ULONG ulInsertOfs,
714	PCSZ pcszInsert)
715	{
716	PSZ pszNew = NULL;
717
718	if ((pcszBuffer) && (pcszInsert))
719	{
720	do {
721	ULONG cbBuffer = strlen(pcszBuffer);
722	ULONG cbInsert = strlen(pcszInsert);
723
724	// check string length
725	if (ulInsertOfs > cbBuffer + 1)
726	break; // do
727
728	// OK, let's go.
729	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
730
731	// copy stuff before pInsertPos
732	memcpy(pszNew,
733	pcszBuffer,
734	ulInsertOfs);
735	// copy string to be inserted
736	memcpy(pszNew + ulInsertOfs,
737	pcszInsert,
738	cbInsert);
739	// copy stuff after pInsertPos
740	strcpy(pszNew + ulInsertOfs + cbInsert,
741	pcszBuffer + ulInsertOfs);
742	} while (FALSE);
743	}
744
745	return pszNew;
746	}
747
748	/*
749	*@@ strhFindReplace:
750	* wrapper around xstrFindReplace to work with C strings.
751	* Note that *ppszBuf can get reallocated and must
752	* be free()'able.
753	*
754	* Repetitive use of this wrapper is not recommended
755	* because it is considerably slower than xstrFindReplace.
756	*
757	*@@added V0.9.6 (2000-11-01) [umoeller]
758	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
759	*/
760
761	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
762	PULONG pulOfs, // in: where to begin search (0 = start);
763	// out: ofs of first char after replacement string
764	PCSZ pcszSearch, // in: search string; cannot be NULL
765	PCSZ pcszReplace) // in: replacement string; cannot be NULL
766	{
767	ULONG ulrc = 0;
768	XSTRING xstrBuf,
769	xstrFind,
770	xstrReplace;
771	size_t ShiftTable[256];
772	BOOL fRepeat = FALSE;
773	xstrInitSet(&xstrBuf, *ppszBuf);
774	// reallocated and returned, so we're safe
775	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
776	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
777	// these two are never freed, so we're safe too
778
779	if ((ulrc = xstrFindReplace(&xstrBuf,
780	pulOfs,
781	&xstrFind,
782	&xstrReplace,
783	ShiftTable,
784	&fRepeat)))
785	// replaced:
786	*ppszBuf = xstrBuf.psz;
787
788	return ulrc;
789	}
790
791	/*
792	* strhWords:
793	* returns the no. of words in "psz".
794	* A string is considered a "word" if
795	* it is surrounded by spaces only.
796	*
797	*@@added V0.9.0 [umoeller]
798	*/
799
800	ULONG strhWords(PSZ psz)
801	{
802	PSZ p;
803	ULONG cb = strlen(psz),
804	ulWords = 0;
805	if (cb > 1)
806	{
807	ulWords = 1;
808	for (p = psz; p < psz+cb; p++)
809	if (*p == ' ')
810	ulWords++;
811	}
812	return ulWords;
813	}
814
815	/*
816	*@@ strhGetWord:
817	* finds word boundaries.
818	*
819	* *ppszStart is used as the beginning of the
820	* search.
821	*
822	* If a word is found, *ppszStart is set to
823	* the first character of the word which was
824	* found and *ppszEnd receives the address
825	* of the first character _after_ the word,
826	* which is probably a space or a \n or \r char.
827	* We then return TRUE.
828	*
829	* The search is stopped if a null character
830	* is found or pLimit is reached. In that case,
831	* FALSE is returned.
832	*
833	*@@added V0.9.1 (2000-02-13) [umoeller]
834	*/
835
836	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
837	// out: start of word (if TRUE is returned)
838	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
839	// searched; if the word does not end before
840	// or with this char, FALSE is returned
841	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
842	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
843	PSZ *ppszEnd) // out: first char _after_ word
844	// (if TRUE is returned)
845	{
846	// characters after which a word can be started
847	// PCSZ pcszBeginChars = "\x0d\x0a ";
848	// PCSZ pcszEndChars = "\x0d\x0a /-";
849
850	PSZ pStart = *ppszStart;
851
852	// find start of word
853	while ( (pStart < (PSZ)pLimit)
854	&& (strchr(pcszBeginChars, *pStart))
855	)
856	// if char is a "before word" char: go for next
857	pStart++;
858
859	if (pStart < (PSZ)pLimit)
860	{
861	// found a valid "word start" character
862	// (which is not in pcszBeginChars):
863
864	// find end of word
865	PSZ pEndOfWord = pStart;
866	while ( (pEndOfWord <= (PSZ)pLimit)
867	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
868	)
869	// if char is not an "end word" char: go for next
870	pEndOfWord++;
871
872	if (pEndOfWord <= (PSZ)pLimit)
873	{
874	// whoa, got a word:
875	*ppszStart = pStart;
876	*ppszEnd = pEndOfWord;
877	return TRUE;
878	}
879	}
880
881	return FALSE;
882	}
883
884	/*
885	*@@ strhIsWord:
886	* returns TRUE if p points to a "word"
887	* in pcszBuf.
888	*
889	* p is considered a word if the character _before_
890	* it is in pcszBeginChars and the char _after_
891	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
892	*
893	*@@added V0.9.6 (2000-11-12) [umoeller]
894	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
895	*/
896
897	BOOL strhIsWord(PCSZ pcszBuf,
898	PCSZ p, // in: start of word
899	ULONG cbSearch, // in: length of word
900	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
901	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
902	{
903	// check previous char
904	if ( (p == pcszBuf)
905	\|\| (strchr(pcszBeginChars, *(p-1)))
906	)
907	{
908	// OK, valid begin char:
909	// check end char
910	CHAR cNextChar;
911	if (!(cNextChar = p[cbSearch]))
912	// null terminator:
913	return TRUE;
914	else
915	{
916	// not null terminator: check if char is
917	// in the list of valid end chars
918	if (strchr(pcszEndChars, cNextChar))
919	{
920	// OK, is end char: avoid doubles of that char,
921	// but allow spaces
922	// fixed V0.9.18 (2002-02-23) [umoeller]
923	CHAR cNextNext = p[cbSearch + 1];
924	if ( (cNextNext != cNextChar)
925	\|\| (cNextNext == ' ')
926	\|\| (cNextNext == 0)
927	)
928	return TRUE;
929	}
930	}
931	}
932
933	return FALSE;
934	}
935
936	/*
937	*@@ strhFindWord:
938	* searches for pszSearch in pszBuf, which is
939	* returned if found (or NULL if not).
940	*
941	* As opposed to strstr, this finds pszSearch
942	* only if it is a "word". A search string is
943	* considered a word if the character _before_
944	* it is in pcszBeginChars and the char _after_
945	* it is in pcszEndChars.
946	*
947	* Example:
948	+ strhFindWord("This is an example.", "is");
949	+ returns ...........^ this, but not the "is" in "This".
950	*
951	* The algorithm here uses strstr to find pszSearch in pszBuf
952	* and performs additional "is-word" checks for each item found
953	* (by calling strhIsWord).
954	*
955	* Note that this function is fairly slow compared to xstrFindWord.
956	*
957	*@@added V0.9.0 (99-11-08) [umoeller]
958	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
959	*/
960
961	PSZ strhFindWord(PCSZ pszBuf,
962	PCSZ pszSearch,
963	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
964	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
965	{
966	PSZ pszReturn = 0;
967	ULONG cbBuf = strlen(pszBuf),
968	cbSearch = strlen(pszSearch);
969
970	if ((cbBuf) && (cbSearch))
971	{
972	PCSZ p = pszBuf;
973
974	do // while p
975	{
976	p = strstr(p, pszSearch);
977	if (p)
978	{
979	// string found:
980	// check if that's a word
981
982	if (strhIsWord(pszBuf,
983	p,
984	cbSearch,
985	pcszBeginChars,
986	pcszEndChars))
987	{
988	// valid end char:
989	pszReturn = (PSZ)p;
990	break;
991	}
992
993	p += cbSearch;
994	}
995	} while (p);
996
997	}
998	return pszReturn;
999	}
1000
1001	/*
1002	*@@ strhFindEOL:
1003	* returns a pointer to the next \r, \n or null character
1004	* following pszSearchIn. Stores the offset in *pulOffset.
1005	*
1006	* This should never return NULL because at some point,
1007	* there will be a null byte in your string.
1008	*
1009	*@@added V0.9.4 (2000-07-01) [umoeller]
1010	*/
1011
1012	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1013	PULONG pulOffset) // out: offset (ptr can be NULL)
1014	{
1015	PCSZ p = pcszSearchIn,
1016	prc = 0;
1017	while (TRUE)
1018	{
1019	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1020	{
1021	prc = p;
1022	break;
1023	}
1024	p++;
1025	}
1026
1027	if ((pulOffset) && (prc))
1028	*pulOffset = prc - pcszSearchIn;
1029
1030	return ((PSZ)prc);
1031	}
1032
1033	/*
1034	*@@ strhFindNextLine:
1035	* like strhFindEOL, but this returns the character
1036	* _after_ \r or \n. Note that this might return
1037	* a pointer to terminating NULL character also.
1038	*/
1039
1040	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1041	{
1042	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1043	// pEOL now points to the \r char or the terminating 0 byte;
1044	// if not null byte, advance pointer
1045	PSZ pNextLine = pEOL;
1046	if (*pNextLine == '\r')
1047	pNextLine++;
1048	if (*pNextLine == '\n')
1049	pNextLine++;
1050	if (pulOffset)
1051	*pulOffset = pNextLine - pszSearchIn;
1052	return pNextLine;
1053	}
1054
1055	/*
1056	*@@ strhBeautifyTitle:
1057	* replaces all line breaks (0xd, 0xa) with spaces.
1058	* Returns the new length of the string or 0 on
1059	* errors.
1060	*
1061	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1062	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
1063	*/
1064
1065	ULONG strhBeautifyTitle(PSZ psz)
1066	{
1067	ULONG ulrc;
1068	PSZ p = psz;
1069
1070	while (*p)
1071	{
1072	if ( (*p == '\r')
1073	\|\| (*p == '\n')
1074	)
1075	{
1076	if ( (p != psz)
1077	&& (p[-1] == ' ')
1078	)
1079	memmove(p, p + 1, strlen(p));
1080	else
1081	*p++ = ' ';
1082	}
1083	else
1084	p++;
1085	}
1086
1087	return (p - psz);
1088	}
1089
1090	/*
1091	*@@ strhBeautifyTitle:
1092	* like strhBeautifyTitle, but copies into
1093	* a new buffer. More efficient.
1094	*
1095	*@@added V0.9.19 (2002-06-18) [umoeller]
1096	*/
1097
1098	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
1099	PCSZ pcszSource) // in: string to be beautified (can be NULL)
1100	{
1101	ULONG ulrc;
1102	PCSZ pSource = pcszSource;
1103	PSZ pTarget = pszTarget;
1104	CHAR c;
1105	if (!pcszSource)
1106	{
1107	*pszTarget = '\0';
1108	return 0;
1109	}
1110
1111	while (c = *pSource++)
1112	{
1113	if ( (c == '\r')
1114	\|\| (c == '\n')
1115	)
1116	{
1117	if ( (pTarget == pszTarget)
1118	\|\| (pTarget[-1] != ' ')
1119	)
1120	*pTarget++ = ' ';
1121	}
1122	else
1123	*pTarget++ = c;
1124	}
1125
1126	// null-terminate
1127	*pTarget = '\0';
1128
1129	return (pTarget - pszTarget);
1130	}
1131
1132	/*
1133	* strhFindAttribValue:
1134	* searches for pszAttrib in pszSearchIn; if found,
1135	* returns the first character after the "=" char.
1136	* If "=" is not found, a space, \r, and \n are
1137	* also accepted. This function searches without
1138	* respecting case.
1139	*
1140	* <B>Example:</B>
1141	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1142	+
1143	+ returns ....................... ^ this address.
1144	*
1145	*@@added V0.9.0 [umoeller]
1146	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1147	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1148	*/
1149
1150	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1151	{
1152	PSZ prc = 0;
1153	PSZ pszSearchIn2, p;
1154	ULONG cbAttrib = strlen(pszAttrib),
1155	ulLength = strlen(pszSearchIn);
1156
1157	// use alloca(), so memory is freed on function exit
1158	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1159	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1160
1161	// 1) find token, (space char, \n, \r, \t)
1162	p = strtok(pszSearchIn2, " \n\r\t");
1163	while (p)
1164	{
1165	CHAR c2;
1166	PSZ pOrig;
1167
1168	// check tag name
1169	if (!strnicmp(p, pszAttrib, cbAttrib))
1170	{
1171	// position in original string
1172	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1173
1174	// yes:
1175	prc = pOrig + cbAttrib;
1176	c2 = *prc;
1177	while ( ( (c2 == ' ')
1178	\|\| (c2 == '=')
1179	\|\| (c2 == '\n')
1180	\|\| (c2 == '\r')
1181	)
1182	&& (c2 != 0)
1183	)
1184	c2 = *++prc;
1185
1186	break;
1187	}
1188
1189	p = strtok(NULL, " \n\r\t");
1190	}
1191
1192	return prc;
1193	}
1194
1195	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1196	{
1197	PSZ prc = 0;
1198	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1199	p,
1200	p2;
1201	ULONG cbAttrib = strlen(pszAttrib);
1202
1203	// 1) find space char
1204	while ((p = strchr(pszSearchIn2, ' ')))
1205	{
1206	CHAR c;
1207	p++;
1208	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1209	{
1210	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1211	// now check whether the p+strlen(pszAttrib)
1212	// is a valid end-of-tag character
1213	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1214	&& ( (c == ' ')
1215	\|\| (c == '>')
1216	\|\| (c == '=')
1217	\|\| (c == '\r')
1218	\|\| (c == '\n')
1219	\|\| (c == 0)
1220	)
1221	)
1222	{
1223	// yes:
1224	CHAR c2;
1225	p2 = p + cbAttrib;
1226	c2 = *p2;
1227	while ( ( (c2 == ' ')
1228	\|\| (c2 == '=')
1229	\|\| (c2 == '\n')
1230	\|\| (c2 == '\r')
1231	)
1232	&& (c2 != 0)
1233	)
1234	c2 = *++p2;
1235
1236	prc = p2;
1237	break; // first while
1238	}
1239	}
1240	else
1241	break;
1242
1243	pszSearchIn2++;
1244	}
1245	return prc;
1246	} */
1247
1248	/*
1249	* strhGetNumAttribValue:
1250	* stores the numerical parameter value of an HTML-style
1251	* tag in *pl.
1252	*
1253	* Returns the address of the tag parameter in the
1254	* search buffer, if found, or NULL.
1255	*
1256	* <B>Example:</B>
1257	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1258	*
1259	* stores 123 in the "l" variable.
1260	*
1261	*@@added V0.9.0 [umoeller]
1262	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1263	*/
1264
1265	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1266	const char *pszTag, // e.g. "INDEX"
1267	PLONG pl) // out: numerical value
1268	{
1269	PSZ pParam;
1270	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1271	{
1272	if ( (*pParam == '\"')
1273	\|\| (*pParam == '\'')
1274	)
1275	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1276
1277	sscanf(pParam, "%ld", pl);
1278	}
1279
1280	return pParam;
1281	}
1282
1283	/*
1284	* strhGetTextAttr:
1285	* retrieves the attribute value of a textual HTML-style tag
1286	* in a newly allocated buffer, which is returned,
1287	* or NULL if attribute not found.
1288	* If an attribute value is to contain spaces, it
1289	* must be enclosed in quotes.
1290	*
1291	* The offset of the attribute data in pszSearchIn is
1292	* returned in *pulOffset so that you can do multiple
1293	* searches.
1294	*
1295	* This returns a new buffer, which should be free()'d after use.
1296	*
1297	* <B>Example:</B>
1298	+ ULONG ulOfs = 0;
1299	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1300	+ ............^ ulOfs
1301	*
1302	* returns a new string with the value "blublub" (without
1303	* quotes) and sets ulOfs to 12.
1304	*
1305	*@@added V0.9.0 [umoeller]
1306	*/
1307
1308	PSZ strhGetTextAttr(const char *pszSearchIn,
1309	const char *pszTag,
1310	PULONG pulOffset) // out: offset where found
1311	{
1312	PSZ pParam,
1313	pParam2,
1314	prc = NULL;
1315	ULONG ulCount = 0;
1316	LONG lNestingLevel = 0;
1317
1318	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1319	{
1320	// determine end character to search for: a space
1321	CHAR cEnd = ' ';
1322	if (*pParam == '\"')
1323	{
1324	// or, if the data is enclosed in quotes, a quote
1325	cEnd = '\"';
1326	pParam++;
1327	}
1328
1329	// V1.0.3 (2004-11-10) [pr]: @@fixes 461
1330	if (*pParam == '\'')
1331	{
1332	// or, if the data is enclosed in single quotes, a single quote
1333	cEnd = '\'';
1334	pParam++;
1335	}
1336
1337	if (pulOffset)
1338	// store the offset
1339	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1340
1341	// now find end of attribute
1342	pParam2 = pParam;
1343	while (*pParam)
1344	{
1345	if (*pParam == cEnd)
1346	// end character found
1347	break;
1348	else if (*pParam == '<')
1349	// yet another opening tag found:
1350	// this is probably some "<" in the attributes
1351	lNestingLevel++;
1352	else if (*pParam == '>')
1353	{
1354	lNestingLevel--;
1355	if (lNestingLevel < 0)
1356	// end of tag found:
1357	break;
1358	}
1359	ulCount++;
1360	pParam++;
1361	}
1362
1363	// copy attribute to new buffer
1364	if (ulCount)
1365	{
1366	prc = (PSZ)malloc(ulCount+1);
1367	memcpy(prc, pParam2, ulCount);
1368	*(prc+ulCount) = 0;
1369	}
1370	}
1371	return prc;
1372	}
1373
1374	/*
1375	* strhFindEndOfTag:
1376	* returns a pointer to the ">" char
1377	* which seems to terminate the tag beginning
1378	* after pszBeginOfTag.
1379	*
1380	* If additional "<" chars are found, we look
1381	* for additional ">" characters too.
1382	*
1383	* Note: You must pass the address of the opening
1384	* '<' character to this function.
1385	*
1386	* Example:
1387	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1388	+ strhFindEndOfTag(pszTest)
1389	+ returns.................................^ this.
1390	*
1391	*@@added V0.9.0 [umoeller]
1392	*/
1393
1394	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1395	{
1396	PSZ p = (PSZ)pszBeginOfTag,
1397	prc = NULL;
1398	LONG lNestingLevel = 0;
1399
1400	while (*p)
1401	{
1402	if (*p == '<')
1403	// another opening tag found:
1404	lNestingLevel++;
1405	else if (*p == '>')
1406	{
1407	// closing tag found:
1408	lNestingLevel--;
1409	if (lNestingLevel < 1)
1410	{
1411	// corresponding: return this
1412	prc = p;
1413	break;
1414	}
1415	}
1416	p++;
1417	}
1418
1419	return prc;
1420	}
1421
1422	/*
1423	* strhGetBlock:
1424	* this complex function searches the given string
1425	* for a pair of opening/closing HTML-style tags.
1426	*
1427	* If found, this routine returns TRUE and does
1428	* the following:
1429	*
1430	* 1) allocate a new buffer, copy the text
1431	* enclosed by the opening/closing tags
1432	* into it and set *ppszBlock to that
1433	* buffer;
1434	*
1435	* 2) if the opening tag has any attributes,
1436	* allocate another buffer, copy the
1437	* attributes into it and set *ppszAttrs
1438	* to that buffer; if no attributes are
1439	* found, *ppszAttrs will be NULL;
1440	*
1441	* 3) set *pulOffset to the offset from the
1442	* beginning of *ppszSearchIn where the
1443	* opening tag was found;
1444	*
1445	* 4) advance *ppszSearchIn to after the
1446	* closing tag, so that you can do
1447	* multiple searches without finding the
1448	* same tags twice.
1449	*
1450	* All buffers should be freed using free().
1451	*
1452	* This returns the following:
1453	* -- 0: no error
1454	* -- 1: tag not found at all (doesn't have to be an error)
1455	* -- 2: begin tag found, but no corresponding end tag found. This
1456	* is a real error.
1457	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1458	*
1459	* <B>Example:</B>
1460	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1461	+ PSZ pszBlock, pszAttrs;
1462	+ ULONG ulOfs;
1463	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1464	*
1465	* would do the following:
1466	*
1467	* 1) set pszBlock to a new string containing "This is page 1."
1468	* without quotes;
1469	*
1470	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1471	*
1472	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1473	*
1474	* 4) pSearch would be advanced to point to the "More text"
1475	* string in the original buffer.
1476	*
1477	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1478	* for HTML parsing.
1479	*
1480	*@@added V0.9.0 [umoeller]
1481	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1482	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1483	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1484	*/
1485
1486	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1487	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1488	const char *pszTag,
1489	PSZ *ppszBlock, // out: block enclosed by the tags
1490	PSZ *ppszAttribs, // out: attributes of the opening tag
1491	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1492	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1493	{
1494	ULONG ulrc = 1;
1495	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1496	pszSearch2 = pszBeginTag,
1497	pszClosingTag;
1498	ULONG cbTag = strlen(pszTag);
1499
1500	// go thru the block and check all tags if it's the
1501	// begin tag we're looking for
1502	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1503	{
1504	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1505	// yes: stop
1506	break;
1507	else
1508	pszBeginTag++;
1509	}
1510
1511	if (pszBeginTag)
1512	{
1513	// we found <TAG>:
1514	ULONG ulNestingLevel = 0;
1515
1516	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1517	// strchr(pszBeginTag, '>');
1518	if (pszEndOfBeginTag)
1519	{
1520	// does the caller want the attributes?
1521	if (ppszAttribs)
1522	{
1523	// yes: then copy them
1524	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1525	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1526	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1527	// add terminating 0
1528	*(pszAttrs + ulAttrLen) = 0;
1529
1530	*ppszAttribs = pszAttrs;
1531	}
1532
1533	// output offset of where we found the begin tag
1534	if (pulOfsBeginTag)
1535	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1536
1537	// now find corresponding closing tag (e.g. "</BODY>"
1538	pszBeginTag = pszEndOfBeginTag+1;
1539	// now we're behind the '>' char of the opening tag
1540	// increase offset of that too
1541	if (pulOfsBeginBlock)
1542	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1543
1544	// find next closing tag;
1545	// for the first run, pszSearch2 points to right
1546	// after the '>' char of the opening tag
1547	pszSearch2 = pszBeginTag;
1548	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1549	&& (pszClosingTag = strstr(pszSearch2, "<"))
1550	)
1551	{
1552	// if we have another opening tag before our closing
1553	// tag, we need to have several closing tags before
1554	// we're done
1555	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1556	ulNestingLevel++;
1557	else
1558	{
1559	// is this ours?
1560	if ( (*(pszClosingTag+1) == '/')
1561	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1562	)
1563	{
1564	// we've found a matching closing tag; is
1565	// it ours?
1566	if (ulNestingLevel == 0)
1567	{
1568	// our closing tag found:
1569	// allocate mem for a new buffer
1570	// and extract all the text between
1571	// open and closing tags to it
1572	ULONG ulLen = pszClosingTag - pszBeginTag;
1573	if (ppszBlock)
1574	{
1575	PSZ pNew = (PSZ)malloc(ulLen + 1);
1576	strhncpy0(pNew, pszBeginTag, ulLen);
1577	*ppszBlock = pNew;
1578	}
1579
1580	// raise search offset to after the closing tag
1581	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1582
1583	ulrc = 0;
1584
1585	break;
1586	} else
1587	// not our closing tag:
1588	ulNestingLevel--;
1589	}
1590	}
1591	// no matching closing tag: search on after that
1592	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1593	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1594
1595	if (!pszClosingTag)
1596	// no matching closing tag found:
1597	// return 2 (closing tag not found)
1598	ulrc = 2;
1599	} // end if (pszBeginTag)
1600	else
1601	// no matching ">" for opening tag found:
1602	ulrc = 3;
1603	}
1604
1605	return ulrc;
1606	}
1607
1608	/* ******************************************************************
1609	*
1610	* Miscellaneous
1611	*
1612	********************************************************************/
1613
1614	/*
1615	*@@ strhArrayAppend:
1616	* this appends a string to a "string array".
1617	*
1618	* A string array is considered a sequence of
1619	* zero-terminated strings in memory. That is,
1620	* after each string's null-byte, the next
1621	* string comes up.
1622	*
1623	* This is useful for composing a single block
1624	* of memory from, say, list box entries, which
1625	* can then be written to OS2.INI in one flush.
1626	*
1627	* To append strings to such an array, call this
1628	* function for each string you wish to append.
1629	* This will re-allocate *ppszRoot with each call,
1630	* and update *pcbRoot, which then contains the
1631	* total size of all strings (including all null
1632	* terminators).
1633	*
1634	* Pass *pcbRoot to PrfSaveProfileData to have the
1635	* block saved.
1636	*
1637	* Note: On the first call, ppszRoot and pcbRoot
1638	* _must_ be both NULL, or this crashes.
1639	*
1640	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1641	*/
1642
1643	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1644	const char *pcszNew, // in: string to append
1645	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1646	PULONG pcbRoot) // in/out: size of array
1647	{
1648	PSZ pszTemp;
1649
1650	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1651	cbNew = strlen(pcszNew);
1652
1653	pszTemp = (PSZ)malloc(*pcbRoot
1654	+ cbNew
1655	+ 1); // two null bytes
1656	if (*ppszRoot)
1657	{
1658	// not first loop: copy old stuff
1659	memcpy(pszTemp,
1660	*ppszRoot,
1661	*pcbRoot);
1662	free(*ppszRoot);
1663	}
1664	// append new string
1665	strcpy(pszTemp + *pcbRoot,
1666	pcszNew);
1667	// update root
1668	*ppszRoot = pszTemp;
1669	// update length
1670	*pcbRoot += cbNew + 1;
1671	}
1672
1673	/*
1674	*@@ strhCreateDump:
1675	* this dumps a memory block into a string
1676	* and returns that string in a new buffer.
1677	*
1678	* You must free() the returned PSZ after use.
1679	*
1680	* The output looks like the following:
1681	*
1682	+ 0000: FE FF 0E 02 90 00 00 00 ........
1683	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1684	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1685	*
1686	* Each line is terminated with a newline (\n)
1687	* character only.
1688	*
1689	*@@added V0.9.1 (2000-01-22) [umoeller]
1690	*/
1691
1692	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1693	ULONG ulSize, // in: size of buffer
1694	ULONG ulIndent) // in: indentation of every line
1695	{
1696	PSZ pszReturn = 0;
1697	XSTRING strReturn;
1698	CHAR szTemp[1000];
1699
1700	PBYTE pbCurrent = pb; // current byte
1701	ULONG ulCount = 0,
1702	ulCharsInLine = 0; // if this grows > 7, a new line is started
1703	CHAR szLine[400] = "",
1704	szAscii[30] = " "; // ASCII representation; filled for every line
1705	PSZ pszLine = szLine,
1706	pszAscii = szAscii;
1707
1708	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1709
1710	for (pbCurrent = pb;
1711	ulCount < ulSize;
1712	pbCurrent++, ulCount++)
1713	{
1714	if (ulCharsInLine == 0)
1715	{
1716	memset(szLine, ' ', ulIndent);
1717	pszLine += ulIndent;
1718	}
1719	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1720
1721	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1722	// printable character:
1723	pszAscii = pbCurrent;
1724	else
1725	*pszAscii = '.';
1726	pszAscii++;
1727
1728	ulCharsInLine++;
1729	if ( (ulCharsInLine > 7) // 8 bytes added?
1730	\|\| (ulCount == ulSize-1) // end of buffer reached?
1731	)
1732	{
1733	// if we haven't had eight bytes yet,
1734	// fill buffer up to eight bytes with spaces
1735	ULONG ul2;
1736	for (ul2 = ulCharsInLine;
1737	ul2 < 8;
1738	ul2++)
1739	pszLine += sprintf(pszLine, " ");
1740
1741	sprintf(szTemp, "%04lX: %s %s\n",
1742	(ulCount & 0xFFFFFFF8), // offset in hex
1743	szLine, // bytes string
1744	szAscii); // ASCII string
1745	xstrcat(&strReturn, szTemp, 0);
1746
1747	// restart line buffer
1748	pszLine = szLine;
1749
1750	// clear ASCII buffer
1751	strcpy(szAscii, " ");
1752	pszAscii = szAscii;
1753
1754	// reset line counter
1755	ulCharsInLine = 0;
1756	}
1757	}
1758
1759	if (strReturn.cbAllocated)
1760	pszReturn = strReturn.psz;
1761
1762	return pszReturn;
1763	}
1764
1765	/* ******************************************************************
1766	*
1767	* Fast string searches
1768	*
1769	********************************************************************/
1770
1771	#define ASSERT(a)
1772
1773	/*
1774	* The following code has been taken from the "Standard
1775	* Function Library", file sflfind.c, and only slightly
1776	* modified to conform to the rest of this file.
1777	*
1778	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1779	* Revised: 98/05/04
1780	*
1781	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1782	*
1783	* The SFL Licence allows incorporating SFL code into other
1784	* programs, as long as the copyright is reprinted and the
1785	* code is marked as modified, so this is what we do.
1786	*/
1787
1788	/*
1789	*@@ strhmemfind:
1790	* searches for a pattern in a block of memory using the
1791	* Boyer-Moore-Horspool-Sunday algorithm.
1792	*
1793	* The block and pattern may contain any values; you must
1794	* explicitly provide their lengths. If you search for strings,
1795	* use strlen() on the buffers.
1796	*
1797	* Returns a pointer to the pattern if found within the block,
1798	* or NULL if the pattern was not found.
1799	*
1800	* This algorithm needs a "shift table" to cache data for the
1801	* search pattern. This table can be reused when performing
1802	* several searches with the same pattern.
1803	*
1804	* "shift" must point to an array big enough to hold 256 (8**2)
1805	* "size_t" values.
1806	*
1807	* If (*repeat_find == FALSE), the shift table is initialized.
1808	* So on the first search with a given pattern, *repeat_find
1809	* should be FALSE. This function sets it to TRUE after the
1810	* shift table is initialised, allowing the initialisation
1811	* phase to be skipped on subsequent searches.
1812	*
1813	* This function is most effective when repeated searches are
1814	* made for the same pattern in one or more large buffers.
1815	*
1816	* Example:
1817	*
1818	+ PSZ pszHaystack = "This is a sample string.",
1819	+ pszNeedle = "string";
1820	+ size_t shift[256];
1821	+ BOOL fRepeat = FALSE;
1822	+
1823	+ PSZ pFound = strhmemfind(pszHaystack,
1824	+ strlen(pszHaystack), // block size
1825	+ pszNeedle,
1826	+ strlen(pszNeedle), // pattern size
1827	+ shift,
1828	+ &fRepeat);
1829	*
1830	* Taken from the "Standard Function Library", file sflfind.c.
1831	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1832	* Slightly modified by umoeller.
1833	*
1834	*@@added V0.9.3 (2000-05-08) [umoeller]
1835	*/
1836
1837	void* strhmemfind(const void *in_block, // in: block containing data
1838	size_t block_size, // in: size of block in bytes
1839	const void *in_pattern, // in: pattern to search for
1840	size_t pattern_size, // in: size of pattern block
1841	size_t *shift, // in/out: shift table (search buffer)
1842	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1843	{
1844	size_t byte_nbr, // Distance through block
1845	match_size; // Size of matched part
1846	const unsigned char
1847	*match_base = NULL, // Base of match of pattern
1848	*match_ptr = NULL, // Point within current match
1849	*limit = NULL; // Last potiental match point
1850	const unsigned char
1851	block = (unsigned char ) in_block, // Concrete pointer to block data
1852	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1853
1854	if ( (block == NULL)
1855	\|\| (pattern == NULL)
1856	\|\| (shift == NULL)
1857	)
1858	return NULL;
1859
1860	// Pattern must be smaller or equal in size to string
1861	if (block_size < pattern_size)
1862	return NULL; // Otherwise it's not found
1863
1864	if (pattern_size == 0) // Empty patterns match at start
1865	return ((void *)block);
1866
1867	// Build the shift table unless we're continuing a previous search
1868
1869	// The shift table determines how far to shift before trying to match
1870	// again, if a match at this point fails. If the byte after where the
1871	// end of our pattern falls is not in our pattern, then we start to
1872	// match again after that byte; otherwise we line up the last occurence
1873	// of that byte in our pattern under that byte, and try match again.
1874
1875	if (!repeat_find \|\| !*repeat_find)
1876	{
1877	for (byte_nbr = 0;
1878	byte_nbr < 256;
1879	byte_nbr++)
1880	shift[byte_nbr] = pattern_size + 1;
1881	for (byte_nbr = 0;
1882	byte_nbr < pattern_size;
1883	byte_nbr++)
1884	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1885
1886	if (repeat_find)
1887	*repeat_find = TRUE;
1888	}
1889
1890	// Search for the block, each time jumping up by the amount
1891	// computed in the shift table
1892
1893	limit = block + (block_size - pattern_size + 1);
1894	ASSERT (limit > block);
1895
1896	for (match_base = block;
1897	match_base < limit;
1898	match_base += shift[*(match_base + pattern_size)])
1899	{
1900	match_ptr = match_base;
1901	match_size = 0;
1902
1903	// Compare pattern until it all matches, or we find a difference
1904	while (*match_ptr++ == pattern[match_size++])
1905	{
1906	ASSERT (match_size <= pattern_size &&
1907	match_ptr == (match_base + match_size));
1908
1909	// If we found a match, return the start address
1910	if (match_size >= pattern_size)
1911	return ((void*)(match_base));
1912
1913	}
1914	}
1915	return NULL; // Found nothing
1916	}
1917
1918	/*
1919	*@@ strhtxtfind:
1920	* searches for a case-insensitive text pattern in a string
1921	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1922	* pattern are null-terminated strings. Returns a pointer to the pattern
1923	* if found within the string, or NULL if the pattern was not found.
1924	* Will match strings irrespective of case. To match exact strings, use
1925	* strhfind(). Will not work on multibyte characters.
1926	*
1927	* Examples:
1928	+ char *result;
1929	+
1930	+ result = strhtxtfind ("AbracaDabra", "cad");
1931	+ if (result)
1932	+ puts (result);
1933	+
1934	* Taken from the "Standard Function Library", file sflfind.c.
1935	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1936	* Slightly modified.
1937	*
1938	*@@added V0.9.3 (2000-05-08) [umoeller]
1939	*/
1940
1941	char* strhtxtfind (const char *string, // String containing data
1942	const char *pattern) // Pattern to search for
1943	{
1944	size_t
1945	shift [256]; // Shift distance for each value
1946	size_t
1947	string_size,
1948	pattern_size,
1949	byte_nbr, // Index into byte array
1950	match_size; // Size of matched part
1951	const char
1952	*match_base = NULL, // Base of match of pattern
1953	*match_ptr = NULL, // Point within current match
1954	*limit = NULL; // Last potiental match point
1955
1956	ASSERT (string); // Expect non-NULL pointers, but
1957	ASSERT (pattern); // fail gracefully if not debugging
1958	if (string == NULL \|\| pattern == NULL)
1959	return NULL;
1960
1961	string_size = strlen (string);
1962	pattern_size = strlen (pattern);
1963
1964	// Pattern must be smaller or equal in size to string
1965	if (string_size < pattern_size)
1966	return NULL; // Otherwise it cannot be found
1967
1968	if (pattern_size == 0) // Empty string matches at start
1969	return (char *) string;
1970
1971	// Build the shift table
1972
1973	// The shift table determines how far to shift before trying to match
1974	// again, if a match at this point fails. If the byte after where the
1975	// end of our pattern falls is not in our pattern, then we start to
1976	// match again after that byte; otherwise we line up the last occurence
1977	// of that byte in our pattern under that byte, and try match again.
1978
1979	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1980	shift [byte_nbr] = pattern_size + 1;
1981
1982	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1983	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1984
1985	// Search for the string. If we don't find a match, move up by the
1986	// amount we computed in the shift table above, to find location of
1987	// the next potiental match.
1988
1989	limit = string + (string_size - pattern_size + 1);
1990	ASSERT (limit > string);
1991
1992	for (match_base = string;
1993	match_base < limit;
1994	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1995	{
1996	match_ptr = match_base;
1997	match_size = 0;
1998
1999	// Compare pattern until it all matches, or we find a difference
2000	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2001	{
2002	ASSERT (match_size <= pattern_size &&
2003	match_ptr == (match_base + match_size));
2004
2005	// If we found a match, return the start address
2006	if (match_size >= pattern_size)
2007	return ((char *)(match_base));
2008	}
2009	}
2010	return NULL; // Found nothing
2011	}
2012

Note: See TracBrowser for help on using the repository browser.

Download in other formats: