Context Navigation

source: trunk/src/helpers/stringh.c@ 124

Visit:

Last change on this file since 124 was 123, checked in by umoeller, 24 years ago
Lots of changes for icons and refresh.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 76.7 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET strhStoreDebug(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	const char *pcszSourceFile,
84	unsigned long ulLine,
85	const char *pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89	if (ppszTarget)
90	{
91	if (*ppszTarget)
92	free(*ppszTarget);
93
94	if ( (pcszSource)
95	&& (ulLength = strlen(pcszSource))
96	)
97	{
98	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
99	pcszSourceFile,
100	ulLine,
101	pcszFunction))
102	memcpy(*ppszTarget, pcszSource, ulLength + 1);
103	else
104	return (ERROR_NOT_ENOUGH_MEMORY);
105	}
106	else
107	*ppszTarget = NULL;
108	}
109
110	if (pulLength)
111	*pulLength = ulLength;
112
113	return (NO_ERROR);
114	}
115
116	#endif
117
118	/*
119	*@@ strhStore:
120	* stores a copy of the given string in the specified
121	* buffer. Uses strdup internally.
122	*
123	* If *ppszTarget != NULL, the previous string is freed
124	* and set to NULL.
125	* If pcszSource != NULL, a copy of it is stored in the
126	* buffer.
127	*
128	*@@added V0.9.16 (2001-12-06) [umoeller]
129	*/
130
131	APIRET strhStore(PSZ *ppszTarget,
132	PCSZ pcszSource,
133	PULONG pulLength) // out: length of new string (ptr can be NULL)
134	{
135	ULONG ulLength = 0;
136
137	if (ppszTarget)
138	{
139	if (*ppszTarget)
140	free(*ppszTarget);
141
142	if ( (pcszSource)
143	&& (ulLength = strlen(pcszSource))
144	)
145	{
146	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
147	memcpy(*ppszTarget, pcszSource, ulLength + 1);
148	else
149	return (ERROR_NOT_ENOUGH_MEMORY);
150	}
151	else
152	*ppszTarget = NULL;
153	}
154
155	if (pulLength)
156	*pulLength = ulLength;
157
158	return (NO_ERROR);
159	}
160
161	/*
162	*@@ strhcpy:
163	* like strdup, but this one doesn't crash if string2 is NULL,
164	* but sets the first byte in string1 to \0 instead.
165	*
166	*@@added V0.9.14 (2001-08-01) [umoeller]
167	*/
168
169	PSZ strhcpy(PSZ string1, const char *string2)
170	{
171	if (string2)
172	return (strcpy(string1, string2));
173
174	*string1 = '\0';
175	return (string1);
176	}
177
178	#ifdef __DEBUG_MALLOC_ENABLED__
179
180	/*
181	*@@ strhdupDebug:
182	* memory debug version of strhdup.
183	*
184	*@@added V0.9.0 [umoeller]
185	*/
186
187	PSZ strhdupDebug(const char *pcszSource,
188	unsigned long *pulLength,
189	const char *pcszSourceFile,
190	unsigned long ulLine,
191	const char *pcszFunction)
192	{
193	PSZ pszReturn = NULL;
194	ULONG ulLength = 0;
195
196	if ( (pcszSource)
197	&& (ulLength = strlen(pcszSource))
198	)
199	{
200	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
201	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
202	ulLine,
203	pcszFunction))
204	memcpy(pszReturn, pcszSource, ulLength + 1);
205	}
206
207	if (pulLength)
208	*pulLength = ulLength;
209
210	return (pszReturn);
211	}
212
213	#endif // __DEBUG_MALLOC_ENABLED__
214
215	/*
216	*@@ strhdup:
217	* like strdup, but this one doesn't crash if pszSource
218	* is NULL, but returns NULL also. In addition, this
219	* can report the length of the string (V0.9.16).
220	*
221	*@@added V0.9.0 [umoeller]
222	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
223	*/
224
225	PSZ strhdup(const char *pcszSource,
226	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
227	{
228	PSZ pszReturn = NULL;
229	ULONG ulLength = 0;
230
231	if ( (pcszSource)
232	&& (ulLength = strlen(pcszSource))
233	)
234	{
235	if (pszReturn = (PSZ)malloc(ulLength + 1))
236	memcpy(pszReturn, pcszSource, ulLength + 1);
237	}
238
239	if (pulLength)
240	*pulLength = ulLength;
241
242	return (pszReturn);
243	}
244
245	/*
246	*@@ strhcmp:
247	* better strcmp. This doesn't crash if any of the
248	* string pointers are NULL, but returns a proper
249	* value then.
250	*
251	* Besides, this is guaranteed to only return -1, 0,
252	* or +1, while strcmp can return any positive or
253	* negative value. This is useful for tree comparison
254	* funcs.
255	*
256	*@@added V0.9.9 (2001-02-16) [umoeller]
257	*/
258
259	int strhcmp(const char p1, const char p2)
260	{
261	if (p1 && p2)
262	{
263	int i = strcmp(p1, p2);
264	if (i < 0) return (-1);
265	if (i > 0) return (+1);
266	}
267	else if (p1)
268	// but p2 is NULL: p1 greater than p2 then
269	return (+1);
270	else if (p2)
271	// but p1 is NULL: p1 less than p2 then
272	return (-1);
273
274	// return 0 if strcmp returned 0 above or both strings are NULL
275	return (0);
276	}
277
278	/*
279	*@@ strhicmp:
280	* like strhcmp, but compares without respect
281	* to case.
282	*
283	*@@added V0.9.9 (2001-04-07) [umoeller]
284	*/
285
286	int strhicmp(const char p1, const char p2)
287	{
288	if (p1 && p2)
289	{
290	int i = stricmp(p1, p2);
291	if (i < 0) return (-1);
292	if (i > 0) return (+1);
293	}
294	else if (p1)
295	// but p2 is NULL: p1 greater than p2 then
296	return (+1);
297	else if (p2)
298	// but p1 is NULL: p1 less than p2 then
299	return (-1);
300
301	// return 0 if strcmp returned 0 above or both strings are NULL
302	return (0);
303	}
304
305	/*
306	*@@ strhistr:
307	* like strstr, but case-insensitive.
308	*
309	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
310	*/
311
312	PSZ strhistr(const char string1, const char string2)
313	{
314	PSZ prc = NULL;
315
316	if ((string1) && (string2))
317	{
318	PSZ pszSrchIn = strdup(string1);
319	PSZ pszSrchFor = strdup(string2);
320
321	if ((pszSrchIn) && (pszSrchFor))
322	{
323	strupr(pszSrchIn);
324	strupr(pszSrchFor);
325
326	prc = strstr(pszSrchIn, pszSrchFor);
327	if (prc)
328	{
329	// prc now has the first occurence of the string,
330	// but in pszSrchIn; we need to map this
331	// return value to the original string
332	prc = (prc-pszSrchIn) // offset in pszSrchIn
333	+ (PSZ)string1;
334	}
335	}
336	if (pszSrchFor)
337	free(pszSrchFor);
338	if (pszSrchIn)
339	free(pszSrchIn);
340	}
341	return (prc);
342	}
343
344	/*
345	*@@ strhncpy0:
346	* like strncpy, but always appends a 0 character.
347	*/
348
349	ULONG strhncpy0(PSZ pszTarget,
350	const char *pszSource,
351	ULONG cbSource)
352	{
353	ULONG ul = 0;
354	PSZ pTarget = pszTarget,
355	pSource = (PSZ)pszSource;
356
357	for (ul = 0; ul < cbSource; ul++)
358	if (*pSource)
359	pTarget++ = pSource++;
360	else
361	break;
362	*pTarget = 0;
363
364	return (ul);
365	}
366
367	/*
368	* strhCount:
369	* this counts the occurences of c in pszSearch.
370	*/
371
372	ULONG strhCount(const char *pszSearch,
373	CHAR c)
374	{
375	PSZ p = (PSZ)pszSearch;
376	ULONG ulCount = 0;
377	while (TRUE)
378	{
379	p = strchr(p, c);
380	if (p)
381	{
382	ulCount++;
383	p++;
384	}
385	else
386	break;
387	}
388	return (ulCount);
389	}
390
391	/*
392	*@@ strhIsDecimal:
393	* returns TRUE if psz consists of decimal digits only.
394	*/
395
396	BOOL strhIsDecimal(PSZ psz)
397	{
398	PSZ p = psz;
399	while (*p != 0)
400	{
401	if (isdigit(*p) == 0)
402	return (FALSE);
403	p++;
404	}
405
406	return (TRUE);
407	}
408
409	#ifdef __DEBUG_MALLOC_ENABLED__
410
411	/*
412	*@@ strhSubstrDebug:
413	* memory debug version of strhSubstr.
414	*
415	*@@added V0.9.14 (2001-08-01) [umoeller]
416	*/
417
418	PSZ strhSubstrDebug(const char *pBegin, // in: first char
419	const char *pEnd, // in: last char (not included)
420	const char *pcszSourceFile,
421	unsigned long ulLine,
422	const char *pcszFunction)
423	{
424	PSZ pszSubstr = NULL;
425
426	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
427	{
428	ULONG cbSubstr = (pEnd - pBegin);
429	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
430	pcszSourceFile,
431	ulLine,
432	pcszFunction))
433	{
434	// strhncpy0(pszSubstr, pBegin, cbSubstr);
435	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
436	*(pszSubstr + cbSubstr) = '\0';
437	}
438	}
439
440	return (pszSubstr);
441	}
442
443	#endif // __DEBUG_MALLOC_ENABLED__
444
445	/*
446	*@@ strhSubstr:
447	* this creates a new PSZ containing the string
448	* from pBegin to pEnd, excluding the pEnd character.
449	* The new string is null-terminated. The caller
450	* must free() the new string after use.
451	*
452	* Example:
453	+ "1234567890"
454	+ ^ ^
455	+ p1 p2
456	+ strhSubstr(p1, p2)
457	* would return a new string containing "2345678".
458	*
459	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
460	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
461	*/
462
463	PSZ strhSubstr(const char *pBegin, // in: first char
464	const char *pEnd) // in: last char (not included)
465	{
466	PSZ pszSubstr = NULL;
467
468	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
469	{
470	ULONG cbSubstr = (pEnd - pBegin);
471	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
472	{
473	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
474	*(pszSubstr + cbSubstr) = '\0';
475	}
476	}
477
478	return (pszSubstr);
479	}
480
481	/*
482	*@@ strhExtract:
483	* searches pszBuf for the cOpen character and returns
484	* the data in between cOpen and cClose, excluding
485	* those two characters, in a newly allocated buffer
486	* which you must free() afterwards.
487	*
488	* Spaces and newlines/linefeeds are skipped.
489	*
490	* If the search was successful, the new buffer
491	* is returned and, if (ppEnd != NULL), *ppEnd points
492	* to the first character after the cClose character
493	* found in the buffer.
494	*
495	* If the search was not successful, NULL is
496	* returned, and *ppEnd is unchanged.
497	*
498	* If another cOpen character is found before
499	* cClose, matching cClose characters will be skipped.
500	* You can therefore nest the cOpen and cClose
501	* characters.
502	*
503	* This function ignores cOpen and cClose characters
504	* in C-style comments and strings surrounded by
505	* double quotes.
506	*
507	* Example:
508	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
509	+ pEnd;
510	+ strhExtract(pszBuf,
511	+ '{', '}',
512	+ &pEnd)
513	* would return a new buffer containing " --blah-- ",
514	* and ppEnd would afterwards point to the space
515	* before "next" in the static buffer.
516	*
517	*@@added V0.9.0 [umoeller]
518	*/
519
520	PSZ strhExtract(PSZ pszBuf, // in: search buffer
521	CHAR cOpen, // in: opening char
522	CHAR cClose, // in: closing char
523	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
524	{
525	PSZ pszReturn = NULL;
526
527	if (pszBuf)
528	{
529	PSZ pOpen = strchr(pszBuf, cOpen);
530	if (pOpen)
531	{
532	// opening char found:
533	// now go thru the whole rest of the buffer
534	PSZ p = pOpen+1;
535	LONG lLevel = 1; // if this goes 0, we're done
536	while (*p)
537	{
538	if (*p == cOpen)
539	lLevel++;
540	else if (*p == cClose)
541	{
542	lLevel--;
543	if (lLevel <= 0)
544	{
545	// matching closing bracket found:
546	// extract string
547	pszReturn = strhSubstr(pOpen+1, // after cOpen
548	p); // excluding cClose
549	if (ppEnd)
550	*ppEnd = p+1;
551	break; // while (*p)
552	}
553	}
554	else if (*p == '\"')
555	{
556	// beginning of string:
557	PSZ p2 = p+1;
558	// find end of string
559	while ((p2) && (p2 != '\"'))
560	p2++;
561
562	if (*p2 == '\"')
563	// closing quote found:
564	// search on after that
565	p = p2; // raised below
566	else
567	break; // while (*p)
568	}
569
570	p++;
571	}
572	}
573	}
574
575	return (pszReturn);
576	}
577
578	/*
579	*@@ strhQuote:
580	* similar to strhExtract, except that
581	* opening and closing chars are the same,
582	* and therefore no nesting is possible.
583	* Useful for extracting stuff between
584	* quotes.
585	*
586	*@@added V0.9.0 [umoeller]
587	*/
588
589	PSZ strhQuote(PSZ pszBuf,
590	CHAR cQuote,
591	PSZ *ppEnd)
592	{
593	PSZ pszReturn = NULL,
594	p1 = NULL;
595	if ((p1 = strchr(pszBuf, cQuote)))
596	{
597	PSZ p2 = strchr(p1+1, cQuote);
598	if (p2)
599	{
600	pszReturn = strhSubstr(p1+1, p2);
601	if (ppEnd)
602	// store closing char
603	*ppEnd = p2 + 1;
604	}
605	}
606
607	return (pszReturn);
608	}
609
610	/*
611	*@@ strhStrip:
612	* removes all double spaces.
613	* This copies within the "psz" buffer.
614	* If any double spaces are found, the
615	* string will be shorter than before,
616	* but the buffer is _not_ reallocated,
617	* so there will be unused bytes at the
618	* end.
619	*
620	* Returns the number of spaces removed.
621	*
622	*@@added V0.9.0 [umoeller]
623	*/
624
625	ULONG strhStrip(PSZ psz) // in/out: string
626	{
627	PSZ p;
628	ULONG cb = strlen(psz),
629	ulrc = 0;
630
631	for (p = psz; p < psz+cb; p++)
632	{
633	if ((p == ' ') && ((p+1) == ' '))
634	{
635	PSZ p2 = p;
636	while (*p2)
637	{
638	p2 = (p2+1);
639	p2++;
640	}
641	cb--;
642	p--;
643	ulrc++;
644	}
645	}
646	return (ulrc);
647	}
648
649	/*
650	*@@ strhins:
651	* this inserts one string into another.
652	*
653	* pszInsert is inserted into pszBuffer at offset
654	* ulInsertOfs (which counts from 0).
655	*
656	* A newly allocated string is returned. pszBuffer is
657	* not changed. The new string should be free()'d after
658	* use.
659	*
660	* Upon errors, NULL is returned.
661	*
662	*@@changed V0.9.0 [umoeller]: completely rewritten.
663	*/
664
665	PSZ strhins(const char *pcszBuffer,
666	ULONG ulInsertOfs,
667	const char *pcszInsert)
668	{
669	PSZ pszNew = NULL;
670
671	if ((pcszBuffer) && (pcszInsert))
672	{
673	do {
674	ULONG cbBuffer = strlen(pcszBuffer);
675	ULONG cbInsert = strlen(pcszInsert);
676
677	// check string length
678	if (ulInsertOfs > cbBuffer + 1)
679	break; // do
680
681	// OK, let's go.
682	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
683
684	// copy stuff before pInsertPos
685	memcpy(pszNew,
686	pcszBuffer,
687	ulInsertOfs);
688	// copy string to be inserted
689	memcpy(pszNew + ulInsertOfs,
690	pcszInsert,
691	cbInsert);
692	// copy stuff after pInsertPos
693	strcpy(pszNew + ulInsertOfs + cbInsert,
694	pcszBuffer + ulInsertOfs);
695	} while (FALSE);
696	}
697
698	return (pszNew);
699	}
700
701	/*
702	*@@ strhFindReplace:
703	* wrapper around xstrFindReplace to work with C strings.
704	* Note that *ppszBuf can get reallocated and must
705	* be free()'able.
706	*
707	* Repetitive use of this wrapper is not recommended
708	* because it is considerably slower than xstrFindReplace.
709	*
710	*@@added V0.9.6 (2000-11-01) [umoeller]
711	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
712	*/
713
714	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
715	PULONG pulOfs, // in: where to begin search (0 = start);
716	// out: ofs of first char after replacement string
717	const char *pcszSearch, // in: search string; cannot be NULL
718	const char *pcszReplace) // in: replacement string; cannot be NULL
719	{
720	ULONG ulrc = 0;
721	XSTRING xstrBuf,
722	xstrFind,
723	xstrReplace;
724	size_t ShiftTable[256];
725	BOOL fRepeat = FALSE;
726	xstrInitSet(&xstrBuf, *ppszBuf);
727	// reallocated and returned, so we're safe
728	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
729	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
730	// these two are never freed, so we're safe too
731
732	if ((ulrc = xstrFindReplace(&xstrBuf,
733	pulOfs,
734	&xstrFind,
735	&xstrReplace,
736	ShiftTable,
737	&fRepeat)))
738	// replaced:
739	*ppszBuf = xstrBuf.psz;
740
741	return (ulrc);
742	}
743
744	/*
745	* strhWords:
746	* returns the no. of words in "psz".
747	* A string is considered a "word" if
748	* it is surrounded by spaces only.
749	*
750	*@@added V0.9.0 [umoeller]
751	*/
752
753	ULONG strhWords(PSZ psz)
754	{
755	PSZ p;
756	ULONG cb = strlen(psz),
757	ulWords = 0;
758	if (cb > 1)
759	{
760	ulWords = 1;
761	for (p = psz; p < psz+cb; p++)
762	if (*p == ' ')
763	ulWords++;
764	}
765	return (ulWords);
766	}
767
768	/*
769	*@@ strhGetWord:
770	* finds word boundaries.
771	*
772	* *ppszStart is used as the beginning of the
773	* search.
774	*
775	* If a word is found, *ppszStart is set to
776	* the first character of the word which was
777	* found and *ppszEnd receives the address
778	* of the first character _after_ the word,
779	* which is probably a space or a \n or \r char.
780	* We then return TRUE.
781	*
782	* The search is stopped if a null character
783	* is found or pLimit is reached. In that case,
784	* FALSE is returned.
785	*
786	*@@added V0.9.1 (2000-02-13) [umoeller]
787	*/
788
789	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
790	// out: start of word (if TRUE is returned)
791	const char pLimit, // in: ptr to last char after ppszStart to be
792	// searched; if the word does not end before
793	// or with this char, FALSE is returned
794	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
795	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
796	PSZ *ppszEnd) // out: first char _after_ word
797	// (if TRUE is returned)
798	{
799	// characters after which a word can be started
800	// const char *pcszBeginChars = "\x0d\x0a ";
801	// const char *pcszEndChars = "\x0d\x0a /-";
802
803	PSZ pStart = *ppszStart;
804
805	// find start of word
806	while ( (pStart < (PSZ)pLimit)
807	&& (strchr(pcszBeginChars, *pStart))
808	)
809	// if char is a "before word" char: go for next
810	pStart++;
811
812	if (pStart < (PSZ)pLimit)
813	{
814	// found a valid "word start" character
815	// (which is not in pcszBeginChars):
816
817	// find end of word
818	PSZ pEndOfWord = pStart;
819	while ( (pEndOfWord <= (PSZ)pLimit)
820	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
821	)
822	// if char is not an "end word" char: go for next
823	pEndOfWord++;
824
825	if (pEndOfWord <= (PSZ)pLimit)
826	{
827	// whoa, got a word:
828	*ppszStart = pStart;
829	*ppszEnd = pEndOfWord;
830	return (TRUE);
831	}
832	}
833
834	return (FALSE);
835	}
836
837	/*
838	*@@ strhIsWord:
839	* returns TRUE if p points to a "word"
840	* in pcszBuf.
841	*
842	* p is considered a word if the character _before_
843	* it is in pcszBeginChars and the char _after_
844	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
845	*
846	*@@added V0.9.6 (2000-11-12) [umoeller]
847	*/
848
849	BOOL strhIsWord(const char *pcszBuf,
850	const char *p, // in: start of word
851	ULONG cbSearch, // in: length of word
852	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
853	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
854	{
855	BOOL fEndOK = FALSE;
856
857	// check previous char
858	if ( (p == pcszBuf)
859	\|\| (strchr(pcszBeginChars, *(p-1)))
860	)
861	{
862	// OK, valid begin char:
863	// check end char
864	CHAR cNextChar = *(p + cbSearch);
865	if (cNextChar == 0)
866	fEndOK = TRUE;
867	else
868	{
869	char *pc = strchr(pcszEndChars, cNextChar);
870	if (pc)
871	// OK, is end char: avoid doubles of that char,
872	// but allow spaces
873	if ( (cNextChar+1 != *pc)
874	\|\| (cNextChar+1 == ' ')
875	\|\| (cNextChar+1 == 0)
876	)
877	fEndOK = TRUE;
878	}
879	}
880
881	return (fEndOK);
882	}
883
884	/*
885	*@@ strhFindWord:
886	* searches for pszSearch in pszBuf, which is
887	* returned if found (or NULL if not).
888	*
889	* As opposed to strstr, this finds pszSearch
890	* only if it is a "word". A search string is
891	* considered a word if the character _before_
892	* it is in pcszBeginChars and the char _after_
893	* it is in pcszEndChars.
894	*
895	* Example:
896	+ strhFindWord("This is an example.", "is");
897	+ returns ...........^ this, but not the "is" in "This".
898	*
899	* The algorithm here uses strstr to find pszSearch in pszBuf
900	* and performs additional "is-word" checks for each item found
901	* (by calling strhIsWord).
902	*
903	* Note that this function is fairly slow compared to xstrFindWord.
904	*
905	*@@added V0.9.0 (99-11-08) [umoeller]
906	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
907	*/
908
909	PSZ strhFindWord(const char *pszBuf,
910	const char *pszSearch,
911	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
912	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
913	{
914	PSZ pszReturn = 0;
915	ULONG cbBuf = strlen(pszBuf),
916	cbSearch = strlen(pszSearch);
917
918	if ((cbBuf) && (cbSearch))
919	{
920	const char *p = pszBuf;
921
922	do // while p
923	{
924	p = strstr(p, pszSearch);
925	if (p)
926	{
927	// string found:
928	// check if that's a word
929
930	if (strhIsWord(pszBuf,
931	p,
932	cbSearch,
933	pcszBeginChars,
934	pcszEndChars))
935	{
936	// valid end char:
937	pszReturn = (PSZ)p;
938	break;
939	}
940
941	p += cbSearch;
942	}
943	} while (p);
944
945	}
946	return (pszReturn);
947	}
948
949	/*
950	*@@ strhFindEOL:
951	* returns a pointer to the next \r, \n or null character
952	* following pszSearchIn. Stores the offset in *pulOffset.
953	*
954	* This should never return NULL because at some point,
955	* there will be a null byte in your string.
956	*
957	*@@added V0.9.4 (2000-07-01) [umoeller]
958	*/
959
960	PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
961	PULONG pulOffset) // out: offset (ptr can be NULL)
962	{
963	const char *p = pcszSearchIn,
964	*prc = 0;
965	while (TRUE)
966	{
967	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
968	{
969	prc = p;
970	break;
971	}
972	p++;
973	}
974
975	if ((pulOffset) && (prc))
976	*pulOffset = prc - pcszSearchIn;
977
978	return ((PSZ)prc);
979	}
980
981	/*
982	*@@ strhFindNextLine:
983	* like strhFindEOL, but this returns the character
984	* _after_ \r or \n. Note that this might return
985	* a pointer to terminating NULL character also.
986	*/
987
988	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
989	{
990	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
991	// pEOL now points to the \r char or the terminating 0 byte;
992	// if not null byte, advance pointer
993	PSZ pNextLine = pEOL;
994	if (*pNextLine == '\r')
995	pNextLine++;
996	if (*pNextLine == '\n')
997	pNextLine++;
998	if (pulOffset)
999	*pulOffset = pNextLine - pszSearchIn;
1000	return (pNextLine);
1001	}
1002
1003	/*
1004	*@@ strhBeautifyTitle:
1005	* replaces all line breaks (0xd, 0xa) with spaces.
1006	*
1007	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1008	*/
1009
1010	BOOL strhBeautifyTitle(PSZ psz)
1011	{
1012	BOOL rc = FALSE;
1013	CHAR *p = psz;
1014
1015	while(*p)
1016	if ( (*p == '\r')
1017	\|\| (*p == '\n')
1018	)
1019	{
1020	rc = TRUE;
1021	if ( (p != psz)
1022	&& (p[-1] == ' ')
1023	)
1024	memmove(p, p + 1, strlen(p));
1025	else
1026	*p++ = ' ';
1027	}
1028	else
1029	p++;
1030
1031	return (rc);
1032	}
1033
1034	/*
1035	* strhFindAttribValue:
1036	* searches for pszAttrib in pszSearchIn; if found,
1037	* returns the first character after the "=" char.
1038	* If "=" is not found, a space, \r, and \n are
1039	* also accepted. This function searches without
1040	* respecting case.
1041	*
1042	* <B>Example:</B>
1043	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1044	+
1045	+ returns ....................... ^ this address.
1046	*
1047	*@@added V0.9.0 [umoeller]
1048	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1049	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1050	*/
1051
1052	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1053	{
1054	PSZ prc = 0;
1055	PSZ pszSearchIn2, p;
1056	ULONG cbAttrib = strlen(pszAttrib),
1057	ulLength = strlen(pszSearchIn);
1058
1059	// use alloca(), so memory is freed on function exit
1060	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1061	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1062
1063	// 1) find token, (space char, \n, \r, \t)
1064	p = strtok(pszSearchIn2, " \n\r\t");
1065	while (p)
1066	{
1067	CHAR c2;
1068	PSZ pOrig;
1069
1070	// check tag name
1071	if (!strnicmp(p, pszAttrib, cbAttrib))
1072	{
1073	// position in original string
1074	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1075
1076	// yes:
1077	prc = pOrig + cbAttrib;
1078	c2 = *prc;
1079	while ( ( (c2 == ' ')
1080	\|\| (c2 == '=')
1081	\|\| (c2 == '\n')
1082	\|\| (c2 == '\r')
1083	)
1084	&& (c2 != 0)
1085	)
1086	c2 = *++prc;
1087
1088	break;
1089	}
1090
1091	p = strtok(NULL, " \n\r\t");
1092	}
1093
1094	return (prc);
1095	}
1096
1097	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1098	{
1099	PSZ prc = 0;
1100	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1101	p,
1102	p2;
1103	ULONG cbAttrib = strlen(pszAttrib);
1104
1105	// 1) find space char
1106	while ((p = strchr(pszSearchIn2, ' ')))
1107	{
1108	CHAR c;
1109	p++;
1110	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1111	{
1112	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1113	// now check whether the p+strlen(pszAttrib)
1114	// is a valid end-of-tag character
1115	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1116	&& ( (c == ' ')
1117	\|\| (c == '>')
1118	\|\| (c == '=')
1119	\|\| (c == '\r')
1120	\|\| (c == '\n')
1121	\|\| (c == 0)
1122	)
1123	)
1124	{
1125	// yes:
1126	CHAR c2;
1127	p2 = p + cbAttrib;
1128	c2 = *p2;
1129	while ( ( (c2 == ' ')
1130	\|\| (c2 == '=')
1131	\|\| (c2 == '\n')
1132	\|\| (c2 == '\r')
1133	)
1134	&& (c2 != 0)
1135	)
1136	c2 = *++p2;
1137
1138	prc = p2;
1139	break; // first while
1140	}
1141	}
1142	else
1143	break;
1144
1145	pszSearchIn2++;
1146	}
1147	return (prc);
1148	} */
1149
1150	/*
1151	* strhGetNumAttribValue:
1152	* stores the numerical parameter value of an HTML-style
1153	* tag in *pl.
1154	*
1155	* Returns the address of the tag parameter in the
1156	* search buffer, if found, or NULL.
1157	*
1158	* <B>Example:</B>
1159	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1160	*
1161	* stores 123 in the "l" variable.
1162	*
1163	*@@added V0.9.0 [umoeller]
1164	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1165	*/
1166
1167	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1168	const char *pszTag, // e.g. "INDEX"
1169	PLONG pl) // out: numerical value
1170	{
1171	PSZ pParam;
1172	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1173	{
1174	if ( (*pParam == '\"')
1175	\|\| (*pParam == '\'')
1176	)
1177	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1178
1179	sscanf(pParam, "%ld", pl);
1180	}
1181
1182	return (pParam);
1183	}
1184
1185	/*
1186	* strhGetTextAttr:
1187	* retrieves the attribute value of a textual HTML-style tag
1188	* in a newly allocated buffer, which is returned,
1189	* or NULL if attribute not found.
1190	* If an attribute value is to contain spaces, it
1191	* must be enclosed in quotes.
1192	*
1193	* The offset of the attribute data in pszSearchIn is
1194	* returned in *pulOffset so that you can do multiple
1195	* searches.
1196	*
1197	* This returns a new buffer, which should be free()'d after use.
1198	*
1199	* <B>Example:</B>
1200	+ ULONG ulOfs = 0;
1201	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1202	+ ............^ ulOfs
1203	*
1204	* returns a new string with the value "blublub" (without
1205	* quotes) and sets ulOfs to 12.
1206	*
1207	*@@added V0.9.0 [umoeller]
1208	*/
1209
1210	PSZ strhGetTextAttr(const char *pszSearchIn,
1211	const char *pszTag,
1212	PULONG pulOffset) // out: offset where found
1213	{
1214	PSZ pParam,
1215	pParam2,
1216	prc = NULL;
1217	ULONG ulCount = 0;
1218	LONG lNestingLevel = 0;
1219
1220	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1221	{
1222	// determine end character to search for: a space
1223	CHAR cEnd = ' ';
1224	if (*pParam == '\"')
1225	{
1226	// or, if the data is enclosed in quotes, a quote
1227	cEnd = '\"';
1228	pParam++;
1229	}
1230
1231	if (pulOffset)
1232	// store the offset
1233	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1234
1235	// now find end of attribute
1236	pParam2 = pParam;
1237	while (*pParam)
1238	{
1239	if (*pParam == cEnd)
1240	// end character found
1241	break;
1242	else if (*pParam == '<')
1243	// yet another opening tag found:
1244	// this is probably some "<" in the attributes
1245	lNestingLevel++;
1246	else if (*pParam == '>')
1247	{
1248	lNestingLevel--;
1249	if (lNestingLevel < 0)
1250	// end of tag found:
1251	break;
1252	}
1253	ulCount++;
1254	pParam++;
1255	}
1256
1257	// copy attribute to new buffer
1258	if (ulCount)
1259	{
1260	prc = (PSZ)malloc(ulCount+1);
1261	memcpy(prc, pParam2, ulCount);
1262	*(prc+ulCount) = 0;
1263	}
1264	}
1265	return (prc);
1266	}
1267
1268	/*
1269	* strhFindEndOfTag:
1270	* returns a pointer to the ">" char
1271	* which seems to terminate the tag beginning
1272	* after pszBeginOfTag.
1273	*
1274	* If additional "<" chars are found, we look
1275	* for additional ">" characters too.
1276	*
1277	* Note: You must pass the address of the opening
1278	* '<' character to this function.
1279	*
1280	* Example:
1281	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1282	+ strhFindEndOfTag(pszTest)
1283	+ returns.................................^ this.
1284	*
1285	*@@added V0.9.0 [umoeller]
1286	*/
1287
1288	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1289	{
1290	PSZ p = (PSZ)pszBeginOfTag,
1291	prc = NULL;
1292	LONG lNestingLevel = 0;
1293
1294	while (*p)
1295	{
1296	if (*p == '<')
1297	// another opening tag found:
1298	lNestingLevel++;
1299	else if (*p == '>')
1300	{
1301	// closing tag found:
1302	lNestingLevel--;
1303	if (lNestingLevel < 1)
1304	{
1305	// corresponding: return this
1306	prc = p;
1307	break;
1308	}
1309	}
1310	p++;
1311	}
1312
1313	return (prc);
1314	}
1315
1316	/*
1317	* strhGetBlock:
1318	* this complex function searches the given string
1319	* for a pair of opening/closing HTML-style tags.
1320	*
1321	* If found, this routine returns TRUE and does
1322	* the following:
1323	*
1324	* 1) allocate a new buffer, copy the text
1325	* enclosed by the opening/closing tags
1326	* into it and set *ppszBlock to that
1327	* buffer;
1328	*
1329	* 2) if the opening tag has any attributes,
1330	* allocate another buffer, copy the
1331	* attributes into it and set *ppszAttrs
1332	* to that buffer; if no attributes are
1333	* found, *ppszAttrs will be NULL;
1334	*
1335	* 3) set *pulOffset to the offset from the
1336	* beginning of *ppszSearchIn where the
1337	* opening tag was found;
1338	*
1339	* 4) advance *ppszSearchIn to after the
1340	* closing tag, so that you can do
1341	* multiple searches without finding the
1342	* same tags twice.
1343	*
1344	* All buffers should be freed using free().
1345	*
1346	* This returns the following:
1347	* -- 0: no error
1348	* -- 1: tag not found at all (doesn't have to be an error)
1349	* -- 2: begin tag found, but no corresponding end tag found. This
1350	* is a real error.
1351	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1352	*
1353	* <B>Example:</B>
1354	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1355	+ PSZ pszBlock, pszAttrs;
1356	+ ULONG ulOfs;
1357	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1358	*
1359	* would do the following:
1360	*
1361	* 1) set pszBlock to a new string containing "This is page 1."
1362	* without quotes;
1363	*
1364	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1365	*
1366	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1367	*
1368	* 4) pSearch would be advanced to point to the "More text"
1369	* string in the original buffer.
1370	*
1371	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1372	* for HTML parsing.
1373	*
1374	*@@added V0.9.0 [umoeller]
1375	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1376	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1377	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1378	*/
1379
1380	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1381	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1382	PSZ pszTag,
1383	PSZ *ppszBlock, // out: block enclosed by the tags
1384	PSZ *ppszAttribs, // out: attributes of the opening tag
1385	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1386	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1387	{
1388	ULONG ulrc = 1;
1389	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1390	pszSearch2 = pszBeginTag,
1391	pszClosingTag;
1392	ULONG cbTag = strlen(pszTag);
1393
1394	// go thru the block and check all tags if it's the
1395	// begin tag we're looking for
1396	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1397	{
1398	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1399	// yes: stop
1400	break;
1401	else
1402	pszBeginTag++;
1403	}
1404
1405	if (pszBeginTag)
1406	{
1407	// we found <TAG>:
1408	ULONG ulNestingLevel = 0;
1409
1410	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1411	// strchr(pszBeginTag, '>');
1412	if (pszEndOfBeginTag)
1413	{
1414	// does the caller want the attributes?
1415	if (ppszAttribs)
1416	{
1417	// yes: then copy them
1418	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1419	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1420	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1421	// add terminating 0
1422	*(pszAttrs + ulAttrLen) = 0;
1423
1424	*ppszAttribs = pszAttrs;
1425	}
1426
1427	// output offset of where we found the begin tag
1428	if (pulOfsBeginTag)
1429	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1430
1431	// now find corresponding closing tag (e.g. "</BODY>"
1432	pszBeginTag = pszEndOfBeginTag+1;
1433	// now we're behind the '>' char of the opening tag
1434	// increase offset of that too
1435	if (pulOfsBeginBlock)
1436	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1437
1438	// find next closing tag;
1439	// for the first run, pszSearch2 points to right
1440	// after the '>' char of the opening tag
1441	pszSearch2 = pszBeginTag;
1442	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1443	&& (pszClosingTag = strstr(pszSearch2, "<"))
1444	)
1445	{
1446	// if we have another opening tag before our closing
1447	// tag, we need to have several closing tags before
1448	// we're done
1449	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1450	ulNestingLevel++;
1451	else
1452	{
1453	// is this ours?
1454	if ( (*(pszClosingTag+1) == '/')
1455	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1456	)
1457	{
1458	// we've found a matching closing tag; is
1459	// it ours?
1460	if (ulNestingLevel == 0)
1461	{
1462	// our closing tag found:
1463	// allocate mem for a new buffer
1464	// and extract all the text between
1465	// open and closing tags to it
1466	ULONG ulLen = pszClosingTag - pszBeginTag;
1467	if (ppszBlock)
1468	{
1469	PSZ pNew = (PSZ)malloc(ulLen + 1);
1470	strhncpy0(pNew, pszBeginTag, ulLen);
1471	*ppszBlock = pNew;
1472	}
1473
1474	// raise search offset to after the closing tag
1475	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1476
1477	ulrc = 0;
1478
1479	break;
1480	} else
1481	// not our closing tag:
1482	ulNestingLevel--;
1483	}
1484	}
1485	// no matching closing tag: search on after that
1486	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1487	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1488
1489	if (!pszClosingTag)
1490	// no matching closing tag found:
1491	// return 2 (closing tag not found)
1492	ulrc = 2;
1493	} // end if (pszBeginTag)
1494	else
1495	// no matching ">" for opening tag found:
1496	ulrc = 3;
1497	}
1498
1499	return (ulrc);
1500	}
1501
1502	/* ******************************************************************
1503	*
1504	* Miscellaneous
1505	*
1506	********************************************************************/
1507
1508	/*
1509	*@@ strhArrayAppend:
1510	* this appends a string to a "string array".
1511	*
1512	* A string array is considered a sequence of
1513	* zero-terminated strings in memory. That is,
1514	* after each string's null-byte, the next
1515	* string comes up.
1516	*
1517	* This is useful for composing a single block
1518	* of memory from, say, list box entries, which
1519	* can then be written to OS2.INI in one flush.
1520	*
1521	* To append strings to such an array, call this
1522	* function for each string you wish to append.
1523	* This will re-allocate *ppszRoot with each call,
1524	* and update *pcbRoot, which then contains the
1525	* total size of all strings (including all null
1526	* terminators).
1527	*
1528	* Pass *pcbRoot to PrfSaveProfileData to have the
1529	* block saved.
1530	*
1531	* Note: On the first call, ppszRoot and pcbRoot
1532	* _must_ be both NULL, or this crashes.
1533	*
1534	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1535	*/
1536
1537	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1538	const char *pcszNew, // in: string to append
1539	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1540	PULONG pcbRoot) // in/out: size of array
1541	{
1542	PSZ pszTemp;
1543
1544	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1545	cbNew = strlen(pcszNew);
1546
1547	pszTemp = (PSZ)malloc(*pcbRoot
1548	+ cbNew
1549	+ 1); // two null bytes
1550	if (*ppszRoot)
1551	{
1552	// not first loop: copy old stuff
1553	memcpy(pszTemp,
1554	*ppszRoot,
1555	*pcbRoot);
1556	free(*ppszRoot);
1557	}
1558	// append new string
1559	strcpy(pszTemp + *pcbRoot,
1560	pcszNew);
1561	// update root
1562	*ppszRoot = pszTemp;
1563	// update length
1564	*pcbRoot += cbNew + 1;
1565	}
1566
1567	/*
1568	*@@ strhCreateDump:
1569	* this dumps a memory block into a string
1570	* and returns that string in a new buffer.
1571	*
1572	* You must free() the returned PSZ after use.
1573	*
1574	* The output looks like the following:
1575	*
1576	+ 0000: FE FF 0E 02 90 00 00 00 ........
1577	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1578	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1579	*
1580	* Each line is terminated with a newline (\n)
1581	* character only.
1582	*
1583	*@@added V0.9.1 (2000-01-22) [umoeller]
1584	*/
1585
1586	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1587	ULONG ulSize, // in: size of buffer
1588	ULONG ulIndent) // in: indentation of every line
1589	{
1590	PSZ pszReturn = 0;
1591	XSTRING strReturn;
1592	CHAR szTemp[1000];
1593
1594	PBYTE pbCurrent = pb; // current byte
1595	ULONG ulCount = 0,
1596	ulCharsInLine = 0; // if this grows > 7, a new line is started
1597	CHAR szLine[400] = "",
1598	szAscii[30] = " "; // ASCII representation; filled for every line
1599	PSZ pszLine = szLine,
1600	pszAscii = szAscii;
1601
1602	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1603
1604	for (pbCurrent = pb;
1605	ulCount < ulSize;
1606	pbCurrent++, ulCount++)
1607	{
1608	if (ulCharsInLine == 0)
1609	{
1610	memset(szLine, ' ', ulIndent);
1611	pszLine += ulIndent;
1612	}
1613	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1614
1615	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1616	// printable character:
1617	pszAscii = pbCurrent;
1618	else
1619	*pszAscii = '.';
1620	pszAscii++;
1621
1622	ulCharsInLine++;
1623	if ( (ulCharsInLine > 7) // 8 bytes added?
1624	\|\| (ulCount == ulSize-1) // end of buffer reached?
1625	)
1626	{
1627	// if we haven't had eight bytes yet,
1628	// fill buffer up to eight bytes with spaces
1629	ULONG ul2;
1630	for (ul2 = ulCharsInLine;
1631	ul2 < 8;
1632	ul2++)
1633	pszLine += sprintf(pszLine, " ");
1634
1635	sprintf(szTemp, "%04lX: %s %s\n",
1636	(ulCount & 0xFFFFFFF8), // offset in hex
1637	szLine, // bytes string
1638	szAscii); // ASCII string
1639	xstrcat(&strReturn, szTemp, 0);
1640
1641	// restart line buffer
1642	pszLine = szLine;
1643
1644	// clear ASCII buffer
1645	strcpy(szAscii, " ");
1646	pszAscii = szAscii;
1647
1648	// reset line counter
1649	ulCharsInLine = 0;
1650	}
1651	}
1652
1653	if (strReturn.cbAllocated)
1654	pszReturn = strReturn.psz;
1655
1656	return (pszReturn);
1657	}
1658
1659	/* ******************************************************************
1660	*
1661	* Wildcard matching
1662	*
1663	********************************************************************/
1664
1665	/*
1666	* The following code has been taken from "fnmatch.zip".
1667	*
1668	* (c) 1994-1996 by Eberhard Mattes.
1669	*/
1670
1671	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1672	* This macro returns true iff C is a separator. */
1673
1674	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1675
1676
1677	/* This macro returns true if C is at the end of a component of a
1678	* path. */
1679
1680	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1681
1682	/*
1683	* skip_comp_os2:
1684	* Return a pointer to the next component of the path SRC, for OS/2
1685	* and DOS styles. When the end of the string is reached, a pointer
1686	* to the terminating null character is returned.
1687	*
1688	* (c) 1994-1996 by Eberhard Mattes.
1689	*/
1690
1691	static const unsigned char* skip_comp_os2(const unsigned char *src)
1692	{
1693	/* Skip characters until hitting a separator or the end of the
1694	* string. */
1695
1696	while (!IS_OS2_COMP_END(*src))
1697	++src;
1698
1699	/* Skip the separator if we hit a separator. */
1700
1701	if (*src != 0)
1702	++src;
1703	return src;
1704	}
1705
1706	/*
1707	* has_colon:
1708	* returns true iff the path P contains a colon.
1709	*
1710	* (c) 1994-1996 by Eberhard Mattes.
1711	*/
1712
1713	static int has_colon(const unsigned char *p)
1714	{
1715	while (*p != 0)
1716	if (*p == ':')
1717	return 1;
1718	else
1719	++p;
1720	return 0;
1721	}
1722
1723	/*
1724	* match_comp_os2:
1725	* compares a single component (directory name or file name)
1726	* of the paths, for OS/2 and DOS styles. MASK and NAME point
1727	* into a component of the wildcard and the name to be checked,
1728	* respectively. Comparing stops at the next separator.
1729	* The FLAGS argument is the same as that of fnmatch().
1730	*
1731	* HAS_DOT is true if a dot is in the current component of NAME.
1732	* The number of dots is not restricted, even in DOS style.
1733	*
1734	* Returns FNM_MATCH iff MASK and NAME match.
1735	*
1736	* Note that this function is recursive.
1737	*
1738	* (c) 1994-1996 by Eberhard Mattes.
1739	*/
1740
1741	static int match_comp_os2(const unsigned char *mask,
1742	const unsigned char *name,
1743	unsigned flags,
1744	int has_dot)
1745	{
1746	int rc;
1747
1748	for (;;)
1749	switch (*mask)
1750	{
1751	case 0:
1752
1753	/* There must be no extra characters at the end of NAME when
1754	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1755	* in that case, NAME may point to a separator. */
1756
1757	if (*name == 0)
1758	return FNM_MATCH;
1759	if ((flags & FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1760	return FNM_MATCH;
1761	return FNM_NOMATCH;
1762
1763	case '/':
1764	case '\\':
1765
1766	/* Separators match separators. */
1767
1768	if (IS_OS2_COMP_SEP(*name))
1769	return FNM_MATCH;
1770
1771	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1772	* is ignored at the end of NAME. */
1773
1774	if ((flags & FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1775	return FNM_MATCH;
1776
1777	/* Stop comparing at the separator. */
1778
1779	return FNM_NOMATCH;
1780
1781	case '?':
1782
1783	/* A question mark matches one character. It does not match
1784	* a dot. At the end of the component (and before a dot),
1785	* it also matches zero characters. */
1786
1787	if (name != '.' && !IS_OS2_COMP_END(name))
1788	++name;
1789	++mask;
1790	break;
1791
1792	case '*':
1793
1794	/* An asterisk matches zero or more characters. In DOS
1795	* mode, dots are not matched. */
1796
1797	do
1798	{
1799	++mask;
1800	}
1801	while (mask == '');
1802	for (;;)
1803	{
1804	rc = match_comp_os2(mask, name, flags, has_dot);
1805	if (rc != FNM_NOMATCH)
1806	return rc;
1807	if (IS_OS2_COMP_END(*name))
1808	return FNM_NOMATCH;
1809	if (*name == '.' && (flags & FNM_STYLE_MASK) == FNM_DOS)
1810	return FNM_NOMATCH;
1811	++name;
1812	}
1813
1814	case '.':
1815
1816	/* A dot matches a dot. It also matches the implicit dot at
1817	* the end of a dot-less NAME. */
1818
1819	++mask;
1820	if (*name == '.')
1821	++name;
1822	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
1823	return FNM_NOMATCH;
1824	break;
1825
1826	default:
1827
1828	/* All other characters match themselves. */
1829
1830	if (flags & FNM_IGNORECASE)
1831	{
1832	if (tolower(mask) != tolower(name))
1833	return FNM_NOMATCH;
1834	}
1835	else
1836	{
1837	if (mask != name)
1838	return FNM_NOMATCH;
1839	}
1840	++mask;
1841	++name;
1842	break;
1843	}
1844	}
1845
1846	/*
1847	* match_comp:
1848	* compares a single component (directory name or file
1849	* name) of the paths, for all styles which need
1850	* component-by-component matching. MASK and NAME point
1851	* to the start of a component of the wildcard and the
1852	* name to be checked, respectively. Comparing stops at
1853	* the next separator. The FLAGS argument is the same as
1854	* that of fnmatch().
1855	*
1856	* Return FNM_MATCH iff MASK and NAME match.
1857	*
1858	* (c) 1994-1996 by Eberhard Mattes.
1859	*/
1860
1861	static int match_comp(const unsigned char *mask,
1862	const unsigned char *name,
1863	unsigned flags)
1864	{
1865	const unsigned char *s;
1866
1867	switch (flags & FNM_STYLE_MASK)
1868	{
1869	case FNM_OS2:
1870	case FNM_DOS:
1871
1872	/* For OS/2 and DOS styles, we add an implicit dot at the end of
1873	* the component if the component doesn't include a dot. */
1874
1875	s = name;
1876	while (!IS_OS2_COMP_END(s) && s != '.')
1877	++s;
1878	return match_comp_os2(mask, name, flags, *s == '.');
1879
1880	default:
1881	return FNM_ERR;
1882	}
1883	}
1884
1885	/* In Unix styles, / separates components of a path. This macro
1886	* returns true iff C is a separator. */
1887
1888	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1889
1890
1891	/* This macro returns true if C is at the end of a component of a
1892	* path. */
1893
1894	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
1895
1896	/*
1897	* match_unix:
1898	* matches complete paths for Unix styles.
1899	*
1900	* The FLAGS argument is the same as that of fnmatch().
1901	* COMP points to the start of the current component in
1902	* NAME. Return FNM_MATCH iff MASK and NAME match. The
1903	* backslash character is used for escaping ? and * unless
1904	* FNM_NOESCAPE is set.
1905	*
1906	* (c) 1994-1996 by Eberhard Mattes.
1907	*/
1908
1909	static int match_unix(const unsigned char *mask,
1910	const unsigned char *name,
1911	unsigned flags,
1912	const unsigned char *comp)
1913	{
1914	unsigned char c1, c2;
1915	char invert, matched;
1916	const unsigned char *start;
1917	int rc;
1918
1919	for (;;)
1920	switch (*mask)
1921	{
1922	case 0:
1923
1924	/* There must be no extra characters at the end of NAME when
1925	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1926	* in that case, NAME may point to a separator. */
1927
1928	if (*name == 0)
1929	return FNM_MATCH;
1930	if ((flags & FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1931	return FNM_MATCH;
1932	return FNM_NOMATCH;
1933
1934	case '?':
1935
1936	/* A question mark matches one character. It does not match
1937	* the component separator if FNM_PATHNAME is set. It does
1938	* not match a dot at the start of a component if FNM_PERIOD
1939	* is set. */
1940
1941	if (*name == 0)
1942	return FNM_NOMATCH;
1943	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1944	return FNM_NOMATCH;
1945	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1946	return FNM_NOMATCH;
1947	++mask;
1948	++name;
1949	break;
1950
1951	case '*':
1952
1953	/* An asterisk matches zero or more characters. It does not
1954	* match the component separator if FNM_PATHNAME is set. It
1955	* does not match a dot at the start of a component if
1956	* FNM_PERIOD is set. */
1957
1958	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1959	return FNM_NOMATCH;
1960	do
1961	{
1962	++mask;
1963	}
1964	while (mask == '');
1965	for (;;)
1966	{
1967	rc = match_unix(mask, name, flags, comp);
1968	if (rc != FNM_NOMATCH)
1969	return rc;
1970	if (*name == 0)
1971	return FNM_NOMATCH;
1972	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1973	return FNM_NOMATCH;
1974	++name;
1975	}
1976
1977	case '/':
1978
1979	/* Separators match only separators. If _FNM_PATHPREFIX is
1980	* set, a trailing separator in MASK is ignored at the end
1981	* of NAME. */
1982
1983	if (!(IS_UNIX_COMP_SEP(*name)
1984	\|\| ((flags & FNM_PATHPREFIX) && *name == 0
1985	&& (mask[1] == 0
1986	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1987	&& mask[2] == 0)))))
1988	return FNM_NOMATCH;
1989
1990	++mask;
1991	if (*name != 0)
1992	++name;
1993
1994	/* This is the beginning of a new component if FNM_PATHNAME
1995	* is set. */
1996
1997	if (flags & FNM_PATHNAME)
1998	comp = name;
1999	break;
2000
2001	case '[':
2002
2003	/* A set of characters. Always case-sensitive. */
2004
2005	if (*name == 0)
2006	return FNM_NOMATCH;
2007	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2008	return FNM_NOMATCH;
2009	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2010	return FNM_NOMATCH;
2011
2012	invert = 0;
2013	matched = 0;
2014	++mask;
2015
2016	/* If the first character is a ! or ^, the set matches all
2017	* characters not listed in the set. */
2018
2019	if (mask == '!' \|\| mask == '^')
2020	{
2021	++mask;
2022	invert = 1;
2023	}
2024
2025	/* Loop over all the characters of the set. The loop ends
2026	* if the end of the string is reached or if a ] is
2027	* encountered unless it directly follows the initial [ or
2028	* [-. */
2029
2030	start = mask;
2031	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
2032	{
2033	/* Get the next character which is optionally preceded
2034	* by a backslash. */
2035
2036	c1 = *mask++;
2037	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2038	{
2039	if (*mask == 0)
2040	break;
2041	c1 = *mask++;
2042	}
2043
2044	/* Ranges of characters are written as a-z. Don't
2045	* forget to check for the end of the string and to
2046	* handle the backslash. If the character after - is a
2047	* ], it isn't a range. */
2048
2049	if (*mask == '-' && mask[1] != ']')
2050	{
2051	++mask; /* Skip the - character */
2052	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2053	++mask;
2054	if (*mask == 0)
2055	break;
2056	c2 = *mask++;
2057	}
2058	else
2059	c2 = c1;
2060
2061	/* Now check whether this character or range matches NAME. */
2062
2063	if (c1 <= name && name <= c2)
2064	matched = 1;
2065	}
2066
2067	/* If the end of the string is reached before a ] is found,
2068	* back up to the [ and compare it to NAME. */
2069
2070	if (*mask == 0)
2071	{
2072	if (*name != '[')
2073	return FNM_NOMATCH;
2074	++name;
2075	mask = start;
2076	if (invert)
2077	--mask;
2078	}
2079	else
2080	{
2081	if (invert)
2082	matched = !matched;
2083	if (!matched)
2084	return FNM_NOMATCH;
2085	++mask; /* Skip the ] character */
2086	if (*name != 0)
2087	++name;
2088	}
2089	break;
2090
2091	case '\\':
2092	++mask;
2093	if (flags & FNM_NOESCAPE)
2094	{
2095	if (*name != '\\')
2096	return FNM_NOMATCH;
2097	++name;
2098	}
2099	else if (mask == '' \|\| *mask == '?')
2100	{
2101	if (mask != name)
2102	return FNM_NOMATCH;
2103	++mask;
2104	++name;
2105	}
2106	break;
2107
2108	default:
2109
2110	/* All other characters match themselves. */
2111
2112	if (flags & FNM_IGNORECASE)
2113	{
2114	if (tolower(mask) != tolower(name))
2115	return FNM_NOMATCH;
2116	}
2117	else
2118	{
2119	if (mask != name)
2120	return FNM_NOMATCH;
2121	}
2122	++mask;
2123	++name;
2124	break;
2125	}
2126	}
2127
2128	/*
2129	* _fnmatch_unsigned:
2130	* Check whether the path name NAME matches the wildcard MASK.
2131	*
2132	* Return:
2133	* -- 0 (FNM_MATCH) if it matches,
2134	* -- _FNM_NOMATCH if it doesn't,
2135	* -- FNM_ERR on error.
2136	*
2137	* The operation of this function is controlled by FLAGS.
2138	* This is an internal function, with unsigned arguments.
2139	*
2140	* (c) 1994-1996 by Eberhard Mattes.
2141	*/
2142
2143	static int _fnmatch_unsigned(const unsigned char *mask,
2144	const unsigned char *name,
2145	unsigned flags)
2146	{
2147	int m_drive,
2148	n_drive,
2149	rc;
2150
2151	/* Match and skip the drive name if present. */
2152
2153	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2154	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2155
2156	if (m_drive != n_drive)
2157	{
2158	if (m_drive == -1 \|\| n_drive == -1)
2159	return FNM_NOMATCH;
2160	if (!(flags & FNM_IGNORECASE))
2161	return FNM_NOMATCH;
2162	if (tolower(m_drive) != tolower(n_drive))
2163	return FNM_NOMATCH;
2164	}
2165
2166	if (m_drive != -1)
2167	mask += 2;
2168	if (n_drive != -1)
2169	name += 2;
2170
2171	/* Colons are not allowed in path names, except for the drive name,
2172	* which was skipped above. */
2173
2174	if (has_colon(mask) \|\| has_colon(name))
2175	return FNM_ERR;
2176
2177	/* The name "\\server\path" should not be matched by mask
2178	* "\\server\path". Ditto for /. /
2179
2180	switch (flags & FNM_STYLE_MASK)
2181	{
2182	case FNM_OS2:
2183	case FNM_DOS:
2184
2185	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2186	{
2187	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2188	return FNM_NOMATCH;
2189	name += 2;
2190	mask += 2;
2191	}
2192	break;
2193
2194	case FNM_POSIX:
2195
2196	if (name[0] == '/' && name[1] == '/')
2197	{
2198	int i;
2199
2200	name += 2;
2201	for (i = 0; i < 2; ++i)
2202	if (mask[0] == '/')
2203	++mask;
2204	else if (mask[0] == '\\' && mask[1] == '/')
2205	mask += 2;
2206	else
2207	return FNM_NOMATCH;
2208	}
2209
2210	/* In Unix styles, treating ? and * w.r.t. components is simple.
2211	* No need to do matching component by component. */
2212
2213	return match_unix(mask, name, flags, name);
2214	}
2215
2216	/* Now compare all the components of the path name, one by one.
2217	* Note that the path separator must not be enclosed in brackets. */
2218
2219	while (mask != 0 \|\| name != 0)
2220	{
2221
2222	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2223	* is reached even if there are components left in NAME. */
2224
2225	if (*mask == 0 && (flags & FNM_PATHPREFIX))
2226	return FNM_MATCH;
2227
2228	/* Compare a single component of the path name. */
2229
2230	rc = match_comp(mask, name, flags);
2231	if (rc != FNM_MATCH)
2232	return rc;
2233
2234	/* Skip to the next component or to the end of the path name. */
2235
2236	mask = skip_comp_os2(mask);
2237	name = skip_comp_os2(name);
2238	}
2239
2240	/* If we reached the ends of both strings, the names match. */
2241
2242	if (mask == 0 && name == 0)
2243	return FNM_MATCH;
2244
2245	/* The names do not match. */
2246
2247	return FNM_NOMATCH;
2248	}
2249
2250	/*
2251	*@@ strhMatchOS2:
2252	* this matches wildcards, similar to what DosEditName does.
2253	* However, this does not require a file to be present, but
2254	* works on strings only.
2255	*/
2256
2257	BOOL strhMatchOS2(const char pcszMask, // in: mask (e.g. ".txt")
2258	const char *pcszName) // in: string to check (e.g. "test.txt")
2259	{
2260	return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2261	(const unsigned char *)pcszName,
2262	FNM_OS2 \| FNM_IGNORECASE)
2263	== FNM_MATCH)
2264	);
2265	}
2266
2267	/*
2268	*@@ strhMatchExt:
2269	* like strhMatchOS2, but this takes all the flags
2270	* for input.
2271	*
2272	*@@added V0.9.15 (2001-09-14) [umoeller]
2273	*/
2274
2275	BOOL strhMatchExt(const char pcszMask, // in: mask (e.g. ".txt")
2276	const char *pcszName, // in: string to check (e.g. "test.txt")
2277	unsigned flags) // in: FNM_* flags
2278	{
2279	return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2280	(const unsigned char *)pcszName,
2281	flags)
2282	== FNM_MATCH)
2283	);
2284	}
2285
2286	/* ******************************************************************
2287	*
2288	* Fast string searches
2289	*
2290	********************************************************************/
2291
2292	#define ASSERT(a)
2293
2294	/*
2295	* The following code has been taken from the "Standard
2296	* Function Library", file sflfind.c, and only slightly
2297	* modified to conform to the rest of this file.
2298	*
2299	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2300	* Revised: 98/05/04
2301	*
2302	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2303	*
2304	* The SFL Licence allows incorporating SFL code into other
2305	* programs, as long as the copyright is reprinted and the
2306	* code is marked as modified, so this is what we do.
2307	*/
2308
2309	/*
2310	*@@ strhmemfind:
2311	* searches for a pattern in a block of memory using the
2312	* Boyer-Moore-Horspool-Sunday algorithm.
2313	*
2314	* The block and pattern may contain any values; you must
2315	* explicitly provide their lengths. If you search for strings,
2316	* use strlen() on the buffers.
2317	*
2318	* Returns a pointer to the pattern if found within the block,
2319	* or NULL if the pattern was not found.
2320	*
2321	* This algorithm needs a "shift table" to cache data for the
2322	* search pattern. This table can be reused when performing
2323	* several searches with the same pattern.
2324	*
2325	* "shift" must point to an array big enough to hold 256 (8**2)
2326	* "size_t" values.
2327	*
2328	* If (*repeat_find == FALSE), the shift table is initialized.
2329	* So on the first search with a given pattern, *repeat_find
2330	* should be FALSE. This function sets it to TRUE after the
2331	* shift table is initialised, allowing the initialisation
2332	* phase to be skipped on subsequent searches.
2333	*
2334	* This function is most effective when repeated searches are
2335	* made for the same pattern in one or more large buffers.
2336	*
2337	* Example:
2338	*
2339	+ PSZ pszHaystack = "This is a sample string.",
2340	+ pszNeedle = "string";
2341	+ size_t shift[256];
2342	+ BOOL fRepeat = FALSE;
2343	+
2344	+ PSZ pFound = strhmemfind(pszHaystack,
2345	+ strlen(pszHaystack), // block size
2346	+ pszNeedle,
2347	+ strlen(pszNeedle), // pattern size
2348	+ shift,
2349	+ &fRepeat);
2350	*
2351	* Taken from the "Standard Function Library", file sflfind.c.
2352	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2353	* Slightly modified by umoeller.
2354	*
2355	*@@added V0.9.3 (2000-05-08) [umoeller]
2356	*/
2357
2358	void* strhmemfind(const void *in_block, // in: block containing data
2359	size_t block_size, // in: size of block in bytes
2360	const void *in_pattern, // in: pattern to search for
2361	size_t pattern_size, // in: size of pattern block
2362	size_t *shift, // in/out: shift table (search buffer)
2363	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2364	{
2365	size_t byte_nbr, // Distance through block
2366	match_size; // Size of matched part
2367	const unsigned char
2368	*match_base = NULL, // Base of match of pattern
2369	*match_ptr = NULL, // Point within current match
2370	*limit = NULL; // Last potiental match point
2371	const unsigned char
2372	block = (unsigned char ) in_block, // Concrete pointer to block data
2373	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2374
2375	if ( (block == NULL)
2376	\|\| (pattern == NULL)
2377	\|\| (shift == NULL)
2378	)
2379	return (NULL);
2380
2381	// Pattern must be smaller or equal in size to string
2382	if (block_size < pattern_size)
2383	return (NULL); // Otherwise it's not found
2384
2385	if (pattern_size == 0) // Empty patterns match at start
2386	return ((void *)block);
2387
2388	// Build the shift table unless we're continuing a previous search
2389
2390	// The shift table determines how far to shift before trying to match
2391	// again, if a match at this point fails. If the byte after where the
2392	// end of our pattern falls is not in our pattern, then we start to
2393	// match again after that byte; otherwise we line up the last occurence
2394	// of that byte in our pattern under that byte, and try match again.
2395
2396	if (!repeat_find \|\| !*repeat_find)
2397	{
2398	for (byte_nbr = 0;
2399	byte_nbr < 256;
2400	byte_nbr++)
2401	shift[byte_nbr] = pattern_size + 1;
2402	for (byte_nbr = 0;
2403	byte_nbr < pattern_size;
2404	byte_nbr++)
2405	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2406
2407	if (repeat_find)
2408	*repeat_find = TRUE;
2409	}
2410
2411	// Search for the block, each time jumping up by the amount
2412	// computed in the shift table
2413
2414	limit = block + (block_size - pattern_size + 1);
2415	ASSERT (limit > block);
2416
2417	for (match_base = block;
2418	match_base < limit;
2419	match_base += shift[*(match_base + pattern_size)])
2420	{
2421	match_ptr = match_base;
2422	match_size = 0;
2423
2424	// Compare pattern until it all matches, or we find a difference
2425	while (*match_ptr++ == pattern[match_size++])
2426	{
2427	ASSERT (match_size <= pattern_size &&
2428	match_ptr == (match_base + match_size));
2429
2430	// If we found a match, return the start address
2431	if (match_size >= pattern_size)
2432	return ((void*)(match_base));
2433
2434	}
2435	}
2436	return (NULL); // Found nothing
2437	}
2438
2439	/*
2440	*@@ strhtxtfind:
2441	* searches for a case-insensitive text pattern in a string
2442	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2443	* pattern are null-terminated strings. Returns a pointer to the pattern
2444	* if found within the string, or NULL if the pattern was not found.
2445	* Will match strings irrespective of case. To match exact strings, use
2446	* strhfind(). Will not work on multibyte characters.
2447	*
2448	* Examples:
2449	+ char *result;
2450	+
2451	+ result = strhtxtfind ("AbracaDabra", "cad");
2452	+ if (result)
2453	+ puts (result);
2454	+
2455	* Taken from the "Standard Function Library", file sflfind.c.
2456	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2457	* Slightly modified.
2458	*
2459	*@@added V0.9.3 (2000-05-08) [umoeller]
2460	*/
2461
2462	char* strhtxtfind (const char *string, // String containing data
2463	const char *pattern) // Pattern to search for
2464	{
2465	size_t
2466	shift [256]; // Shift distance for each value
2467	size_t
2468	string_size,
2469	pattern_size,
2470	byte_nbr, // Index into byte array
2471	match_size; // Size of matched part
2472	const char
2473	*match_base = NULL, // Base of match of pattern
2474	*match_ptr = NULL, // Point within current match
2475	*limit = NULL; // Last potiental match point
2476
2477	ASSERT (string); // Expect non-NULL pointers, but
2478	ASSERT (pattern); // fail gracefully if not debugging
2479	if (string == NULL \|\| pattern == NULL)
2480	return (NULL);
2481
2482	string_size = strlen (string);
2483	pattern_size = strlen (pattern);
2484
2485	// Pattern must be smaller or equal in size to string
2486	if (string_size < pattern_size)
2487	return (NULL); // Otherwise it cannot be found
2488
2489	if (pattern_size == 0) // Empty string matches at start
2490	return (char *) string;
2491
2492	// Build the shift table
2493
2494	// The shift table determines how far to shift before trying to match
2495	// again, if a match at this point fails. If the byte after where the
2496	// end of our pattern falls is not in our pattern, then we start to
2497	// match again after that byte; otherwise we line up the last occurence
2498	// of that byte in our pattern under that byte, and try match again.
2499
2500	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2501	shift [byte_nbr] = pattern_size + 1;
2502
2503	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2504	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2505
2506	// Search for the string. If we don't find a match, move up by the
2507	// amount we computed in the shift table above, to find location of
2508	// the next potiental match.
2509
2510	limit = string + (string_size - pattern_size + 1);
2511	ASSERT (limit > string);
2512
2513	for (match_base = string;
2514	match_base < limit;
2515	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2516	{
2517	match_ptr = match_base;
2518	match_size = 0;
2519
2520	// Compare pattern until it all matches, or we find a difference
2521	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2522	{
2523	ASSERT (match_size <= pattern_size &&
2524	match_ptr == (match_base + match_size));
2525
2526	// If we found a match, return the start address
2527	if (match_size >= pattern_size)
2528	return ((char *)(match_base));
2529	}
2530	}
2531	return (NULL); // Found nothing
2532	}
2533

Note: See TracBrowser for help on using the repository browser.

Download in other formats: