Context Navigation

source: trunk/src/helpers/stringh.c@ 110

Visit:

Last change on this file since 110 was 108, checked in by umoeller, 24 years ago
Lots of updates from the last week for conditional compiles and other stuff.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 73.7 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#include <os2.h>
45
46	#include <stdlib.h>
47	#include <stdio.h>
48	#include <string.h>
49	#include <ctype.h>
50	#include <math.h>
51
52	#include "setup.h" // code generation and debugging options
53
54	#define DONT_REPLACE_STRINGH_MALLOC
55	#include "helpers\stringh.h"
56	#include "helpers\xstring.h" // extended string helpers
57
58	#pragma hdrstop
59
60	/*
61	*@@category: Helpers\C helpers\String management
62	* See stringh.c and xstring.c.
63	*/
64
65	/*
66	*@@category: Helpers\C helpers\String management\C string helpers
67	* See stringh.c.
68	*/
69
70	/*
71	*@@ strhcpy:
72	* like strdup, but this one doesn't crash if string2 is NULL,
73	* but sets the first byte in string1 to \0 instead.
74	*
75	*@@added V0.9.14 (2001-08-01) [umoeller]
76	*/
77
78	PSZ strhcpy(PSZ string1, const char *string2)
79	{
80	if (string2)
81	return (strcpy(string1, string2));
82
83	*string1 = '\0';
84	return (string1);
85	}
86
87	#ifdef __DEBUG_MALLOC_ENABLED__
88
89	/*
90	*@@ strhdup:
91	* memory debug version of strhdup.
92	*
93	*@@added V0.9.0 [umoeller]
94	*/
95
96	PSZ strhdupDebug(const char *pszSource,
97	const char *pcszSourceFile,
98	unsigned long ulLine,
99	const char *pcszFunction)
100	{
101	if (pszSource)
102	{
103	PSZ p = (PSZ)memdMalloc(strlen(pszSource) + 1,
104	pcszSourceFile,
105	ulLine,
106	pcszFunction);
107	strcpy(p, pszSource);
108	return (p);
109	}
110	else
111	return (0);
112	}
113
114	#endif // __DEBUG_MALLOC_ENABLED__
115
116	/*
117	*@@ strhdup:
118	* like strdup, but this one doesn't crash if pszSource is NULL,
119	* but returns NULL also.
120	*
121	*@@added V0.9.0 [umoeller]
122	*/
123
124	PSZ strhdup(const char *pszSource)
125	{
126	if (pszSource)
127	return (strdup(pszSource));
128	else
129	return (0);
130	}
131
132	/*
133	*@@ strhcmp:
134	* better strcmp. This doesn't crash if any of the
135	* string pointers are NULL, but returns a proper
136	* value then.
137	*
138	* Besides, this is guaranteed to only return -1, 0,
139	* or +1, while strcmp can return any positive or
140	* negative value. This is useful for tree comparison
141	* funcs.
142	*
143	*@@added V0.9.9 (2001-02-16) [umoeller]
144	*/
145
146	int strhcmp(const char p1, const char p2)
147	{
148	if (p1 && p2)
149	{
150	int i = strcmp(p1, p2);
151	if (i < 0) return (-1);
152	if (i > 0) return (+1);
153	}
154	else if (p1)
155	// but p2 is NULL: p1 greater than p2 then
156	return (+1);
157	else if (p2)
158	// but p1 is NULL: p1 less than p2 then
159	return (-1);
160
161	// return 0 if strcmp returned 0 above or both strings are NULL
162	return (0);
163	}
164
165	/*
166	*@@ strhicmp:
167	* like strhcmp, but compares without respect
168	* to case.
169	*
170	*@@added V0.9.9 (2001-04-07) [umoeller]
171	*/
172
173	int strhicmp(const char p1, const char p2)
174	{
175	if (p1 && p2)
176	{
177	int i = stricmp(p1, p2);
178	if (i < 0) return (-1);
179	if (i > 0) return (+1);
180	}
181	else if (p1)
182	// but p2 is NULL: p1 greater than p2 then
183	return (+1);
184	else if (p2)
185	// but p1 is NULL: p1 less than p2 then
186	return (-1);
187
188	// return 0 if strcmp returned 0 above or both strings are NULL
189	return (0);
190	}
191
192	/*
193	*@@ strhistr:
194	* like strstr, but case-insensitive.
195	*
196	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
197	*/
198
199	PSZ strhistr(const char string1, const char string2)
200	{
201	PSZ prc = NULL;
202
203	if ((string1) && (string2))
204	{
205	PSZ pszSrchIn = strdup(string1);
206	PSZ pszSrchFor = strdup(string2);
207
208	if ((pszSrchIn) && (pszSrchFor))
209	{
210	strupr(pszSrchIn);
211	strupr(pszSrchFor);
212
213	prc = strstr(pszSrchIn, pszSrchFor);
214	if (prc)
215	{
216	// prc now has the first occurence of the string,
217	// but in pszSrchIn; we need to map this
218	// return value to the original string
219	prc = (prc-pszSrchIn) // offset in pszSrchIn
220	+ (PSZ)string1;
221	}
222	}
223	if (pszSrchFor)
224	free(pszSrchFor);
225	if (pszSrchIn)
226	free(pszSrchIn);
227	}
228	return (prc);
229	}
230
231	/*
232	*@@ strhncpy0:
233	* like strncpy, but always appends a 0 character.
234	*/
235
236	ULONG strhncpy0(PSZ pszTarget,
237	const char *pszSource,
238	ULONG cbSource)
239	{
240	ULONG ul = 0;
241	PSZ pTarget = pszTarget,
242	pSource = (PSZ)pszSource;
243
244	for (ul = 0; ul < cbSource; ul++)
245	if (*pSource)
246	pTarget++ = pSource++;
247	else
248	break;
249	*pTarget = 0;
250
251	return (ul);
252	}
253
254	/*
255	* strhCount:
256	* this counts the occurences of c in pszSearch.
257	*/
258
259	ULONG strhCount(const char *pszSearch,
260	CHAR c)
261	{
262	PSZ p = (PSZ)pszSearch;
263	ULONG ulCount = 0;
264	while (TRUE)
265	{
266	p = strchr(p, c);
267	if (p)
268	{
269	ulCount++;
270	p++;
271	}
272	else
273	break;
274	}
275	return (ulCount);
276	}
277
278	/*
279	*@@ strhIsDecimal:
280	* returns TRUE if psz consists of decimal digits only.
281	*/
282
283	BOOL strhIsDecimal(PSZ psz)
284	{
285	PSZ p = psz;
286	while (*p != 0)
287	{
288	if (isdigit(*p) == 0)
289	return (FALSE);
290	p++;
291	}
292
293	return (TRUE);
294	}
295
296	#ifdef __DEBUG_MALLOC_ENABLED__
297
298	/*
299	*@@ strhSubstrDebug:
300	* memory debug version of strhSubstr.
301	*
302	*@@added V0.9.14 (2001-08-01) [umoeller]
303	*/
304
305	PSZ strhSubstrDebug(const char *pBegin, // in: first char
306	const char *pEnd, // in: last char (not included)
307	const char *pcszSourceFile,
308	unsigned long ulLine,
309	const char *pcszFunction)
310	{
311	PSZ pszSubstr = NULL;
312
313	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
314	{
315	ULONG cbSubstr = (pEnd - pBegin);
316	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
317	pcszSourceFile,
318	ulLine,
319	pcszFunction))
320	{
321	// strhncpy0(pszSubstr, pBegin, cbSubstr);
322	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
323	*(pszSubstr + cbSubstr) = '\0';
324	}
325	}
326
327	return (pszSubstr);
328	}
329
330	#endif // __DEBUG_MALLOC_ENABLED__
331
332	/*
333	*@@ strhSubstr:
334	* this creates a new PSZ containing the string
335	* from pBegin to pEnd, excluding the pEnd character.
336	* The new string is null-terminated. The caller
337	* must free() the new string after use.
338	*
339	* Example:
340	+ "1234567890"
341	+ ^ ^
342	+ p1 p2
343	+ strhSubstr(p1, p2)
344	* would return a new string containing "2345678".
345	*
346	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
347	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
348	*/
349
350	PSZ strhSubstr(const char *pBegin, // in: first char
351	const char *pEnd) // in: last char (not included)
352	{
353	PSZ pszSubstr = NULL;
354
355	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
356	{
357	ULONG cbSubstr = (pEnd - pBegin);
358	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
359	{
360	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
361	*(pszSubstr + cbSubstr) = '\0';
362	}
363	}
364
365	return (pszSubstr);
366	}
367
368	/*
369	*@@ strhExtract:
370	* searches pszBuf for the cOpen character and returns
371	* the data in between cOpen and cClose, excluding
372	* those two characters, in a newly allocated buffer
373	* which you must free() afterwards.
374	*
375	* Spaces and newlines/linefeeds are skipped.
376	*
377	* If the search was successful, the new buffer
378	* is returned and, if (ppEnd != NULL), *ppEnd points
379	* to the first character after the cClose character
380	* found in the buffer.
381	*
382	* If the search was not successful, NULL is
383	* returned, and *ppEnd is unchanged.
384	*
385	* If another cOpen character is found before
386	* cClose, matching cClose characters will be skipped.
387	* You can therefore nest the cOpen and cClose
388	* characters.
389	*
390	* This function ignores cOpen and cClose characters
391	* in C-style comments and strings surrounded by
392	* double quotes.
393	*
394	* Example:
395	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
396	+ pEnd;
397	+ strhExtract(pszBuf,
398	+ '{', '}',
399	+ &pEnd)
400	* would return a new buffer containing " --blah-- ",
401	* and ppEnd would afterwards point to the space
402	* before "next" in the static buffer.
403	*
404	*@@added V0.9.0 [umoeller]
405	*/
406
407	PSZ strhExtract(PSZ pszBuf, // in: search buffer
408	CHAR cOpen, // in: opening char
409	CHAR cClose, // in: closing char
410	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
411	{
412	PSZ pszReturn = NULL;
413
414	if (pszBuf)
415	{
416	PSZ pOpen = strchr(pszBuf, cOpen);
417	if (pOpen)
418	{
419	// opening char found:
420	// now go thru the whole rest of the buffer
421	PSZ p = pOpen+1;
422	LONG lLevel = 1; // if this goes 0, we're done
423	while (*p)
424	{
425	if (*p == cOpen)
426	lLevel++;
427	else if (*p == cClose)
428	{
429	lLevel--;
430	if (lLevel <= 0)
431	{
432	// matching closing bracket found:
433	// extract string
434	pszReturn = strhSubstr(pOpen+1, // after cOpen
435	p); // excluding cClose
436	if (ppEnd)
437	*ppEnd = p+1;
438	break; // while (*p)
439	}
440	}
441	else if (*p == '\"')
442	{
443	// beginning of string:
444	PSZ p2 = p+1;
445	// find end of string
446	while ((p2) && (p2 != '\"'))
447	p2++;
448
449	if (*p2 == '\"')
450	// closing quote found:
451	// search on after that
452	p = p2; // raised below
453	else
454	break; // while (*p)
455	}
456
457	p++;
458	}
459	}
460	}
461
462	return (pszReturn);
463	}
464
465	/*
466	*@@ strhQuote:
467	* similar to strhExtract, except that
468	* opening and closing chars are the same,
469	* and therefore no nesting is possible.
470	* Useful for extracting stuff between
471	* quotes.
472	*
473	*@@added V0.9.0 [umoeller]
474	*/
475
476	PSZ strhQuote(PSZ pszBuf,
477	CHAR cQuote,
478	PSZ *ppEnd)
479	{
480	PSZ pszReturn = NULL,
481	p1 = NULL;
482	if ((p1 = strchr(pszBuf, cQuote)))
483	{
484	PSZ p2 = strchr(p1+1, cQuote);
485	if (p2)
486	{
487	pszReturn = strhSubstr(p1+1, p2);
488	if (ppEnd)
489	// store closing char
490	*ppEnd = p2 + 1;
491	}
492	}
493
494	return (pszReturn);
495	}
496
497	/*
498	*@@ strhStrip:
499	* removes all double spaces.
500	* This copies within the "psz" buffer.
501	* If any double spaces are found, the
502	* string will be shorter than before,
503	* but the buffer is _not_ reallocated,
504	* so there will be unused bytes at the
505	* end.
506	*
507	* Returns the number of spaces removed.
508	*
509	*@@added V0.9.0 [umoeller]
510	*/
511
512	ULONG strhStrip(PSZ psz) // in/out: string
513	{
514	PSZ p;
515	ULONG cb = strlen(psz),
516	ulrc = 0;
517
518	for (p = psz; p < psz+cb; p++)
519	{
520	if ((p == ' ') && ((p+1) == ' '))
521	{
522	PSZ p2 = p;
523	while (*p2)
524	{
525	p2 = (p2+1);
526	p2++;
527	}
528	cb--;
529	p--;
530	ulrc++;
531	}
532	}
533	return (ulrc);
534	}
535
536	/*
537	*@@ strhins:
538	* this inserts one string into another.
539	*
540	* pszInsert is inserted into pszBuffer at offset
541	* ulInsertOfs (which counts from 0).
542	*
543	* A newly allocated string is returned. pszBuffer is
544	* not changed. The new string should be free()'d after
545	* use.
546	*
547	* Upon errors, NULL is returned.
548	*
549	*@@changed V0.9.0 [umoeller]: completely rewritten.
550	*/
551
552	PSZ strhins(const char *pcszBuffer,
553	ULONG ulInsertOfs,
554	const char *pcszInsert)
555	{
556	PSZ pszNew = NULL;
557
558	if ((pcszBuffer) && (pcszInsert))
559	{
560	do {
561	ULONG cbBuffer = strlen(pcszBuffer);
562	ULONG cbInsert = strlen(pcszInsert);
563
564	// check string length
565	if (ulInsertOfs > cbBuffer + 1)
566	break; // do
567
568	// OK, let's go.
569	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
570
571	// copy stuff before pInsertPos
572	memcpy(pszNew,
573	pcszBuffer,
574	ulInsertOfs);
575	// copy string to be inserted
576	memcpy(pszNew + ulInsertOfs,
577	pcszInsert,
578	cbInsert);
579	// copy stuff after pInsertPos
580	strcpy(pszNew + ulInsertOfs + cbInsert,
581	pcszBuffer + ulInsertOfs);
582	} while (FALSE);
583	}
584
585	return (pszNew);
586	}
587
588	/*
589	*@@ strhFindReplace:
590	* wrapper around xstrFindReplace to work with C strings.
591	* Note that *ppszBuf can get reallocated and must
592	* be free()'able.
593	*
594	* Repetitive use of this wrapper is not recommended
595	* because it is considerably slower than xstrFindReplace.
596	*
597	*@@added V0.9.6 (2000-11-01) [umoeller]
598	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
599	*/
600
601	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
602	PULONG pulOfs, // in: where to begin search (0 = start);
603	// out: ofs of first char after replacement string
604	const char *pcszSearch, // in: search string; cannot be NULL
605	const char *pcszReplace) // in: replacement string; cannot be NULL
606	{
607	ULONG ulrc = 0;
608	XSTRING xstrBuf,
609	xstrFind,
610	xstrReplace;
611	size_t ShiftTable[256];
612	BOOL fRepeat = FALSE;
613	xstrInitSet(&xstrBuf, *ppszBuf);
614	// reallocated and returned, so we're safe
615	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
616	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
617	// these two are never freed, so we're safe too
618
619	if ((ulrc = xstrFindReplace(&xstrBuf,
620	pulOfs,
621	&xstrFind,
622	&xstrReplace,
623	ShiftTable,
624	&fRepeat)))
625	// replaced:
626	*ppszBuf = xstrBuf.psz;
627
628	return (ulrc);
629	}
630
631	/*
632	* strhWords:
633	* returns the no. of words in "psz".
634	* A string is considered a "word" if
635	* it is surrounded by spaces only.
636	*
637	*@@added V0.9.0 [umoeller]
638	*/
639
640	ULONG strhWords(PSZ psz)
641	{
642	PSZ p;
643	ULONG cb = strlen(psz),
644	ulWords = 0;
645	if (cb > 1)
646	{
647	ulWords = 1;
648	for (p = psz; p < psz+cb; p++)
649	if (*p == ' ')
650	ulWords++;
651	}
652	return (ulWords);
653	}
654
655	/*
656	*@@ strhGetWord:
657	* finds word boundaries.
658	*
659	* *ppszStart is used as the beginning of the
660	* search.
661	*
662	* If a word is found, *ppszStart is set to
663	* the first character of the word which was
664	* found and *ppszEnd receives the address
665	* of the first character _after_ the word,
666	* which is probably a space or a \n or \r char.
667	* We then return TRUE.
668	*
669	* The search is stopped if a null character
670	* is found or pLimit is reached. In that case,
671	* FALSE is returned.
672	*
673	*@@added V0.9.1 (2000-02-13) [umoeller]
674	*/
675
676	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
677	// out: start of word (if TRUE is returned)
678	const char pLimit, // in: ptr to last char after ppszStart to be
679	// searched; if the word does not end before
680	// or with this char, FALSE is returned
681	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
682	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
683	PSZ *ppszEnd) // out: first char _after_ word
684	// (if TRUE is returned)
685	{
686	// characters after which a word can be started
687	// const char *pcszBeginChars = "\x0d\x0a ";
688	// const char *pcszEndChars = "\x0d\x0a /-";
689
690	PSZ pStart = *ppszStart;
691
692	// find start of word
693	while ( (pStart < (PSZ)pLimit)
694	&& (strchr(pcszBeginChars, *pStart))
695	)
696	// if char is a "before word" char: go for next
697	pStart++;
698
699	if (pStart < (PSZ)pLimit)
700	{
701	// found a valid "word start" character
702	// (which is not in pcszBeginChars):
703
704	// find end of word
705	PSZ pEndOfWord = pStart;
706	while ( (pEndOfWord <= (PSZ)pLimit)
707	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
708	)
709	// if char is not an "end word" char: go for next
710	pEndOfWord++;
711
712	if (pEndOfWord <= (PSZ)pLimit)
713	{
714	// whoa, got a word:
715	*ppszStart = pStart;
716	*ppszEnd = pEndOfWord;
717	return (TRUE);
718	}
719	}
720
721	return (FALSE);
722	}
723
724	/*
725	*@@ strhIsWord:
726	* returns TRUE if p points to a "word"
727	* in pcszBuf.
728	*
729	* p is considered a word if the character _before_
730	* it is in pcszBeginChars and the char _after_
731	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
732	*
733	*@@added V0.9.6 (2000-11-12) [umoeller]
734	*/
735
736	BOOL strhIsWord(const char *pcszBuf,
737	const char *p, // in: start of word
738	ULONG cbSearch, // in: length of word
739	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
740	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
741	{
742	BOOL fEndOK = FALSE;
743
744	// check previous char
745	if ( (p == pcszBuf)
746	\|\| (strchr(pcszBeginChars, *(p-1)))
747	)
748	{
749	// OK, valid begin char:
750	// check end char
751	CHAR cNextChar = *(p + cbSearch);
752	if (cNextChar == 0)
753	fEndOK = TRUE;
754	else
755	{
756	char *pc = strchr(pcszEndChars, cNextChar);
757	if (pc)
758	// OK, is end char: avoid doubles of that char,
759	// but allow spaces
760	if ( (cNextChar+1 != *pc)
761	\|\| (cNextChar+1 == ' ')
762	\|\| (cNextChar+1 == 0)
763	)
764	fEndOK = TRUE;
765	}
766	}
767
768	return (fEndOK);
769	}
770
771	/*
772	*@@ strhFindWord:
773	* searches for pszSearch in pszBuf, which is
774	* returned if found (or NULL if not).
775	*
776	* As opposed to strstr, this finds pszSearch
777	* only if it is a "word". A search string is
778	* considered a word if the character _before_
779	* it is in pcszBeginChars and the char _after_
780	* it is in pcszEndChars.
781	*
782	* Example:
783	+ strhFindWord("This is an example.", "is");
784	+ returns ...........^ this, but not the "is" in "This".
785	*
786	* The algorithm here uses strstr to find pszSearch in pszBuf
787	* and performs additional "is-word" checks for each item found
788	* (by calling strhIsWord).
789	*
790	* Note that this function is fairly slow compared to xstrFindWord.
791	*
792	*@@added V0.9.0 (99-11-08) [umoeller]
793	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
794	*/
795
796	PSZ strhFindWord(const char *pszBuf,
797	const char *pszSearch,
798	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
799	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
800	{
801	PSZ pszReturn = 0;
802	ULONG cbBuf = strlen(pszBuf),
803	cbSearch = strlen(pszSearch);
804
805	if ((cbBuf) && (cbSearch))
806	{
807	const char *p = pszBuf;
808
809	do // while p
810	{
811	p = strstr(p, pszSearch);
812	if (p)
813	{
814	// string found:
815	// check if that's a word
816
817	if (strhIsWord(pszBuf,
818	p,
819	cbSearch,
820	pcszBeginChars,
821	pcszEndChars))
822	{
823	// valid end char:
824	pszReturn = (PSZ)p;
825	break;
826	}
827
828	p += cbSearch;
829	}
830	} while (p);
831
832	}
833	return (pszReturn);
834	}
835
836	/*
837	*@@ strhFindEOL:
838	* returns a pointer to the next \r, \n or null character
839	* following pszSearchIn. Stores the offset in *pulOffset.
840	*
841	* This should never return NULL because at some point,
842	* there will be a null byte in your string.
843	*
844	*@@added V0.9.4 (2000-07-01) [umoeller]
845	*/
846
847	PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
848	PULONG pulOffset) // out: offset (ptr can be NULL)
849	{
850	const char *p = pcszSearchIn,
851	*prc = 0;
852	while (TRUE)
853	{
854	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
855	{
856	prc = p;
857	break;
858	}
859	p++;
860	}
861
862	if ((pulOffset) && (prc))
863	*pulOffset = prc - pcszSearchIn;
864
865	return ((PSZ)prc);
866	}
867
868	/*
869	*@@ strhFindNextLine:
870	* like strhFindEOL, but this returns the character
871	* _after_ \r or \n. Note that this might return
872	* a pointer to terminating NULL character also.
873	*/
874
875	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
876	{
877	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
878	// pEOL now points to the \r char or the terminating 0 byte;
879	// if not null byte, advance pointer
880	PSZ pNextLine = pEOL;
881	if (*pNextLine == '\r')
882	pNextLine++;
883	if (*pNextLine == '\n')
884	pNextLine++;
885	if (pulOffset)
886	*pulOffset = pNextLine - pszSearchIn;
887	return (pNextLine);
888	}
889
890	/*
891	*@@ strhBeautifyTitle:
892	* replaces all line breaks (0xd, 0xa) with spaces.
893	*
894	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
895	*/
896
897	BOOL strhBeautifyTitle(PSZ psz)
898	{
899	BOOL rc = FALSE;
900	CHAR *p = psz;
901
902	while(*p)
903	if ( (*p == '\r')
904	\|\| (*p == '\n')
905	)
906	{
907	rc = TRUE;
908	if ( (p != psz)
909	&& (p[-1] == ' ')
910	)
911	memmove(p, p + 1, strlen(p));
912	else
913	*p++ = ' ';
914	}
915	else
916	p++;
917
918	return (rc);
919	}
920
921	/*
922	* strhFindAttribValue:
923	* searches for pszAttrib in pszSearchIn; if found,
924	* returns the first character after the "=" char.
925	* If "=" is not found, a space, \r, and \n are
926	* also accepted. This function searches without
927	* respecting case.
928	*
929	* <B>Example:</B>
930	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
931	+
932	+ returns ....................... ^ this address.
933	*
934	*@@added V0.9.0 [umoeller]
935	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
936	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
937	*/
938
939	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
940	{
941	PSZ prc = 0;
942	PSZ pszSearchIn2, p;
943	ULONG cbAttrib = strlen(pszAttrib),
944	ulLength = strlen(pszSearchIn);
945
946	// use alloca(), so memory is freed on function exit
947	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
948	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
949
950	// 1) find token, (space char, \n, \r, \t)
951	p = strtok(pszSearchIn2, " \n\r\t");
952	while (p)
953	{
954	CHAR c2;
955	PSZ pOrig;
956
957	// check tag name
958	if (!strnicmp(p, pszAttrib, cbAttrib))
959	{
960	// position in original string
961	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
962
963	// yes:
964	prc = pOrig + cbAttrib;
965	c2 = *prc;
966	while ( ( (c2 == ' ')
967	\|\| (c2 == '=')
968	\|\| (c2 == '\n')
969	\|\| (c2 == '\r')
970	)
971	&& (c2 != 0)
972	)
973	c2 = *++prc;
974
975	break;
976	}
977
978	p = strtok(NULL, " \n\r\t");
979	}
980
981	return (prc);
982	}
983
984	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
985	{
986	PSZ prc = 0;
987	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
988	p,
989	p2;
990	ULONG cbAttrib = strlen(pszAttrib);
991
992	// 1) find space char
993	while ((p = strchr(pszSearchIn2, ' ')))
994	{
995	CHAR c;
996	p++;
997	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
998	{
999	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1000	// now check whether the p+strlen(pszAttrib)
1001	// is a valid end-of-tag character
1002	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1003	&& ( (c == ' ')
1004	\|\| (c == '>')
1005	\|\| (c == '=')
1006	\|\| (c == '\r')
1007	\|\| (c == '\n')
1008	\|\| (c == 0)
1009	)
1010	)
1011	{
1012	// yes:
1013	CHAR c2;
1014	p2 = p + cbAttrib;
1015	c2 = *p2;
1016	while ( ( (c2 == ' ')
1017	\|\| (c2 == '=')
1018	\|\| (c2 == '\n')
1019	\|\| (c2 == '\r')
1020	)
1021	&& (c2 != 0)
1022	)
1023	c2 = *++p2;
1024
1025	prc = p2;
1026	break; // first while
1027	}
1028	}
1029	else
1030	break;
1031
1032	pszSearchIn2++;
1033	}
1034	return (prc);
1035	} */
1036
1037	/*
1038	* strhGetNumAttribValue:
1039	* stores the numerical parameter value of an HTML-style
1040	* tag in *pl.
1041	*
1042	* Returns the address of the tag parameter in the
1043	* search buffer, if found, or NULL.
1044	*
1045	* <B>Example:</B>
1046	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1047	*
1048	* stores 123 in the "l" variable.
1049	*
1050	*@@added V0.9.0 [umoeller]
1051	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1052	*/
1053
1054	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1055	const char *pszTag, // e.g. "INDEX"
1056	PLONG pl) // out: numerical value
1057	{
1058	PSZ pParam;
1059	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1060	{
1061	if ( (*pParam == '\"')
1062	\|\| (*pParam == '\'')
1063	)
1064	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1065
1066	sscanf(pParam, "%ld", pl);
1067	}
1068
1069	return (pParam);
1070	}
1071
1072	/*
1073	* strhGetTextAttr:
1074	* retrieves the attribute value of a textual HTML-style tag
1075	* in a newly allocated buffer, which is returned,
1076	* or NULL if attribute not found.
1077	* If an attribute value is to contain spaces, it
1078	* must be enclosed in quotes.
1079	*
1080	* The offset of the attribute data in pszSearchIn is
1081	* returned in *pulOffset so that you can do multiple
1082	* searches.
1083	*
1084	* This returns a new buffer, which should be free()'d after use.
1085	*
1086	* <B>Example:</B>
1087	+ ULONG ulOfs = 0;
1088	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1089	+ ............^ ulOfs
1090	*
1091	* returns a new string with the value "blublub" (without
1092	* quotes) and sets ulOfs to 12.
1093	*
1094	*@@added V0.9.0 [umoeller]
1095	*/
1096
1097	PSZ strhGetTextAttr(const char *pszSearchIn,
1098	const char *pszTag,
1099	PULONG pulOffset) // out: offset where found
1100	{
1101	PSZ pParam,
1102	pParam2,
1103	prc = NULL;
1104	ULONG ulCount = 0;
1105	LONG lNestingLevel = 0;
1106
1107	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1108	{
1109	// determine end character to search for: a space
1110	CHAR cEnd = ' ';
1111	if (*pParam == '\"')
1112	{
1113	// or, if the data is enclosed in quotes, a quote
1114	cEnd = '\"';
1115	pParam++;
1116	}
1117
1118	if (pulOffset)
1119	// store the offset
1120	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1121
1122	// now find end of attribute
1123	pParam2 = pParam;
1124	while (*pParam)
1125	{
1126	if (*pParam == cEnd)
1127	// end character found
1128	break;
1129	else if (*pParam == '<')
1130	// yet another opening tag found:
1131	// this is probably some "<" in the attributes
1132	lNestingLevel++;
1133	else if (*pParam == '>')
1134	{
1135	lNestingLevel--;
1136	if (lNestingLevel < 0)
1137	// end of tag found:
1138	break;
1139	}
1140	ulCount++;
1141	pParam++;
1142	}
1143
1144	// copy attribute to new buffer
1145	if (ulCount)
1146	{
1147	prc = (PSZ)malloc(ulCount+1);
1148	memcpy(prc, pParam2, ulCount);
1149	*(prc+ulCount) = 0;
1150	}
1151	}
1152	return (prc);
1153	}
1154
1155	/*
1156	* strhFindEndOfTag:
1157	* returns a pointer to the ">" char
1158	* which seems to terminate the tag beginning
1159	* after pszBeginOfTag.
1160	*
1161	* If additional "<" chars are found, we look
1162	* for additional ">" characters too.
1163	*
1164	* Note: You must pass the address of the opening
1165	* '<' character to this function.
1166	*
1167	* Example:
1168	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1169	+ strhFindEndOfTag(pszTest)
1170	+ returns.................................^ this.
1171	*
1172	*@@added V0.9.0 [umoeller]
1173	*/
1174
1175	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1176	{
1177	PSZ p = (PSZ)pszBeginOfTag,
1178	prc = NULL;
1179	LONG lNestingLevel = 0;
1180
1181	while (*p)
1182	{
1183	if (*p == '<')
1184	// another opening tag found:
1185	lNestingLevel++;
1186	else if (*p == '>')
1187	{
1188	// closing tag found:
1189	lNestingLevel--;
1190	if (lNestingLevel < 1)
1191	{
1192	// corresponding: return this
1193	prc = p;
1194	break;
1195	}
1196	}
1197	p++;
1198	}
1199
1200	return (prc);
1201	}
1202
1203	/*
1204	* strhGetBlock:
1205	* this complex function searches the given string
1206	* for a pair of opening/closing HTML-style tags.
1207	*
1208	* If found, this routine returns TRUE and does
1209	* the following:
1210	*
1211	* 1) allocate a new buffer, copy the text
1212	* enclosed by the opening/closing tags
1213	* into it and set *ppszBlock to that
1214	* buffer;
1215	*
1216	* 2) if the opening tag has any attributes,
1217	* allocate another buffer, copy the
1218	* attributes into it and set *ppszAttrs
1219	* to that buffer; if no attributes are
1220	* found, *ppszAttrs will be NULL;
1221	*
1222	* 3) set *pulOffset to the offset from the
1223	* beginning of *ppszSearchIn where the
1224	* opening tag was found;
1225	*
1226	* 4) advance *ppszSearchIn to after the
1227	* closing tag, so that you can do
1228	* multiple searches without finding the
1229	* same tags twice.
1230	*
1231	* All buffers should be freed using free().
1232	*
1233	* This returns the following:
1234	* -- 0: no error
1235	* -- 1: tag not found at all (doesn't have to be an error)
1236	* -- 2: begin tag found, but no corresponding end tag found. This
1237	* is a real error.
1238	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1239	*
1240	* <B>Example:</B>
1241	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1242	+ PSZ pszBlock, pszAttrs;
1243	+ ULONG ulOfs;
1244	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1245	*
1246	* would do the following:
1247	*
1248	* 1) set pszBlock to a new string containing "This is page 1."
1249	* without quotes;
1250	*
1251	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1252	*
1253	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1254	*
1255	* 4) pSearch would be advanced to point to the "More text"
1256	* string in the original buffer.
1257	*
1258	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1259	* for HTML parsing.
1260	*
1261	*@@added V0.9.0 [umoeller]
1262	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1263	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1264	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1265	*/
1266
1267	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1268	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1269	PSZ pszTag,
1270	PSZ *ppszBlock, // out: block enclosed by the tags
1271	PSZ *ppszAttribs, // out: attributes of the opening tag
1272	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1273	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1274	{
1275	ULONG ulrc = 1;
1276	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1277	pszSearch2 = pszBeginTag,
1278	pszClosingTag;
1279	ULONG cbTag = strlen(pszTag);
1280
1281	// go thru the block and check all tags if it's the
1282	// begin tag we're looking for
1283	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1284	{
1285	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1286	// yes: stop
1287	break;
1288	else
1289	pszBeginTag++;
1290	}
1291
1292	if (pszBeginTag)
1293	{
1294	// we found <TAG>:
1295	ULONG ulNestingLevel = 0;
1296
1297	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1298	// strchr(pszBeginTag, '>');
1299	if (pszEndOfBeginTag)
1300	{
1301	// does the caller want the attributes?
1302	if (ppszAttribs)
1303	{
1304	// yes: then copy them
1305	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1306	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1307	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1308	// add terminating 0
1309	*(pszAttrs + ulAttrLen) = 0;
1310
1311	*ppszAttribs = pszAttrs;
1312	}
1313
1314	// output offset of where we found the begin tag
1315	if (pulOfsBeginTag)
1316	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1317
1318	// now find corresponding closing tag (e.g. "</BODY>"
1319	pszBeginTag = pszEndOfBeginTag+1;
1320	// now we're behind the '>' char of the opening tag
1321	// increase offset of that too
1322	if (pulOfsBeginBlock)
1323	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1324
1325	// find next closing tag;
1326	// for the first run, pszSearch2 points to right
1327	// after the '>' char of the opening tag
1328	pszSearch2 = pszBeginTag;
1329	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1330	&& (pszClosingTag = strstr(pszSearch2, "<"))
1331	)
1332	{
1333	// if we have another opening tag before our closing
1334	// tag, we need to have several closing tags before
1335	// we're done
1336	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1337	ulNestingLevel++;
1338	else
1339	{
1340	// is this ours?
1341	if ( (*(pszClosingTag+1) == '/')
1342	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1343	)
1344	{
1345	// we've found a matching closing tag; is
1346	// it ours?
1347	if (ulNestingLevel == 0)
1348	{
1349	// our closing tag found:
1350	// allocate mem for a new buffer
1351	// and extract all the text between
1352	// open and closing tags to it
1353	ULONG ulLen = pszClosingTag - pszBeginTag;
1354	if (ppszBlock)
1355	{
1356	PSZ pNew = (PSZ)malloc(ulLen + 1);
1357	strhncpy0(pNew, pszBeginTag, ulLen);
1358	*ppszBlock = pNew;
1359	}
1360
1361	// raise search offset to after the closing tag
1362	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1363
1364	ulrc = 0;
1365
1366	break;
1367	} else
1368	// not our closing tag:
1369	ulNestingLevel--;
1370	}
1371	}
1372	// no matching closing tag: search on after that
1373	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1374	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1375
1376	if (!pszClosingTag)
1377	// no matching closing tag found:
1378	// return 2 (closing tag not found)
1379	ulrc = 2;
1380	} // end if (pszBeginTag)
1381	else
1382	// no matching ">" for opening tag found:
1383	ulrc = 3;
1384	}
1385
1386	return (ulrc);
1387	}
1388
1389	/* ******************************************************************
1390	*
1391	* Miscellaneous
1392	*
1393	********************************************************************/
1394
1395	/*
1396	*@@ strhArrayAppend:
1397	* this appends a string to a "string array".
1398	*
1399	* A string array is considered a sequence of
1400	* zero-terminated strings in memory. That is,
1401	* after each string's null-byte, the next
1402	* string comes up.
1403	*
1404	* This is useful for composing a single block
1405	* of memory from, say, list box entries, which
1406	* can then be written to OS2.INI in one flush.
1407	*
1408	* To append strings to such an array, call this
1409	* function for each string you wish to append.
1410	* This will re-allocate *ppszRoot with each call,
1411	* and update *pcbRoot, which then contains the
1412	* total size of all strings (including all null
1413	* terminators).
1414	*
1415	* Pass *pcbRoot to PrfSaveProfileData to have the
1416	* block saved.
1417	*
1418	* Note: On the first call, ppszRoot and pcbRoot
1419	* _must_ be both NULL, or this crashes.
1420	*
1421	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1422	*/
1423
1424	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1425	const char *pcszNew, // in: string to append
1426	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1427	PULONG pcbRoot) // in/out: size of array
1428	{
1429	PSZ pszTemp;
1430
1431	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1432	cbNew = strlen(pcszNew);
1433
1434	pszTemp = (PSZ)malloc(*pcbRoot
1435	+ cbNew
1436	+ 1); // two null bytes
1437	if (*ppszRoot)
1438	{
1439	// not first loop: copy old stuff
1440	memcpy(pszTemp,
1441	*ppszRoot,
1442	*pcbRoot);
1443	free(*ppszRoot);
1444	}
1445	// append new string
1446	strcpy(pszTemp + *pcbRoot,
1447	pcszNew);
1448	// update root
1449	*ppszRoot = pszTemp;
1450	// update length
1451	*pcbRoot += cbNew + 1;
1452	}
1453
1454	/*
1455	*@@ strhCreateDump:
1456	* this dumps a memory block into a string
1457	* and returns that string in a new buffer.
1458	*
1459	* You must free() the returned PSZ after use.
1460	*
1461	* The output looks like the following:
1462	*
1463	+ 0000: FE FF 0E 02 90 00 00 00 ........
1464	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1465	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1466	*
1467	* Each line is terminated with a newline (\n)
1468	* character only.
1469	*
1470	*@@added V0.9.1 (2000-01-22) [umoeller]
1471	*/
1472
1473	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1474	ULONG ulSize, // in: size of buffer
1475	ULONG ulIndent) // in: indentation of every line
1476	{
1477	PSZ pszReturn = 0;
1478	XSTRING strReturn;
1479	CHAR szTemp[1000];
1480
1481	PBYTE pbCurrent = pb; // current byte
1482	ULONG ulCount = 0,
1483	ulCharsInLine = 0; // if this grows > 7, a new line is started
1484	CHAR szLine[400] = "",
1485	szAscii[30] = " "; // ASCII representation; filled for every line
1486	PSZ pszLine = szLine,
1487	pszAscii = szAscii;
1488
1489	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1490
1491	for (pbCurrent = pb;
1492	ulCount < ulSize;
1493	pbCurrent++, ulCount++)
1494	{
1495	if (ulCharsInLine == 0)
1496	{
1497	memset(szLine, ' ', ulIndent);
1498	pszLine += ulIndent;
1499	}
1500	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1501
1502	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1503	// printable character:
1504	pszAscii = pbCurrent;
1505	else
1506	*pszAscii = '.';
1507	pszAscii++;
1508
1509	ulCharsInLine++;
1510	if ( (ulCharsInLine > 7) // 8 bytes added?
1511	\|\| (ulCount == ulSize-1) // end of buffer reached?
1512	)
1513	{
1514	// if we haven't had eight bytes yet,
1515	// fill buffer up to eight bytes with spaces
1516	ULONG ul2;
1517	for (ul2 = ulCharsInLine;
1518	ul2 < 8;
1519	ul2++)
1520	pszLine += sprintf(pszLine, " ");
1521
1522	sprintf(szTemp, "%04lX: %s %s\n",
1523	(ulCount & 0xFFFFFFF8), // offset in hex
1524	szLine, // bytes string
1525	szAscii); // ASCII string
1526	xstrcat(&strReturn, szTemp, 0);
1527
1528	// restart line buffer
1529	pszLine = szLine;
1530
1531	// clear ASCII buffer
1532	strcpy(szAscii, " ");
1533	pszAscii = szAscii;
1534
1535	// reset line counter
1536	ulCharsInLine = 0;
1537	}
1538	}
1539
1540	if (strReturn.cbAllocated)
1541	pszReturn = strReturn.psz;
1542
1543	return (pszReturn);
1544	}
1545
1546	/* ******************************************************************
1547	*
1548	* Wildcard matching
1549	*
1550	********************************************************************/
1551
1552	/*
1553	* The following code has been taken from "fnmatch.zip".
1554	*
1555	* (c) 1994-1996 by Eberhard Mattes.
1556	*/
1557
1558	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1559	* This macro returns true iff C is a separator. */
1560
1561	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1562
1563
1564	/* This macro returns true if C is at the end of a component of a
1565	* path. */
1566
1567	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1568
1569	/*
1570	* skip_comp_os2:
1571	* Return a pointer to the next component of the path SRC, for OS/2
1572	* and DOS styles. When the end of the string is reached, a pointer
1573	* to the terminating null character is returned.
1574	*
1575	* (c) 1994-1996 by Eberhard Mattes.
1576	*/
1577
1578	static const unsigned char* skip_comp_os2(const unsigned char *src)
1579	{
1580	/* Skip characters until hitting a separator or the end of the
1581	* string. */
1582
1583	while (!IS_OS2_COMP_END(*src))
1584	++src;
1585
1586	/* Skip the separator if we hit a separator. */
1587
1588	if (*src != 0)
1589	++src;
1590	return src;
1591	}
1592
1593	/*
1594	* has_colon:
1595	* returns true iff the path P contains a colon.
1596	*
1597	* (c) 1994-1996 by Eberhard Mattes.
1598	*/
1599
1600	static int has_colon(const unsigned char *p)
1601	{
1602	while (*p != 0)
1603	if (*p == ':')
1604	return 1;
1605	else
1606	++p;
1607	return 0;
1608	}
1609
1610	/*
1611	* match_comp_os2:
1612	* compares a single component (directory name or file name)
1613	* of the paths, for OS/2 and DOS styles. MASK and NAME point
1614	* into a component of the wildcard and the name to be checked,
1615	* respectively. Comparing stops at the next separator.
1616	* The FLAGS argument is the same as that of fnmatch().
1617	*
1618	* HAS_DOT is true if a dot is in the current component of NAME.
1619	* The number of dots is not restricted, even in DOS style.
1620	*
1621	* Returns FNM_MATCH iff MASK and NAME match.
1622	*
1623	* Note that this function is recursive.
1624	*
1625	* (c) 1994-1996 by Eberhard Mattes.
1626	*/
1627
1628	static int match_comp_os2(const unsigned char *mask,
1629	const unsigned char *name,
1630	unsigned flags,
1631	int has_dot)
1632	{
1633	int rc;
1634
1635	for (;;)
1636	switch (*mask)
1637	{
1638	case 0:
1639
1640	/* There must be no extra characters at the end of NAME when
1641	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1642	* in that case, NAME may point to a separator. */
1643
1644	if (*name == 0)
1645	return FNM_MATCH;
1646	if ((flags & FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1647	return FNM_MATCH;
1648	return FNM_NOMATCH;
1649
1650	case '/':
1651	case '\\':
1652
1653	/* Separators match separators. */
1654
1655	if (IS_OS2_COMP_SEP(*name))
1656	return FNM_MATCH;
1657
1658	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1659	* is ignored at the end of NAME. */
1660
1661	if ((flags & FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1662	return FNM_MATCH;
1663
1664	/* Stop comparing at the separator. */
1665
1666	return FNM_NOMATCH;
1667
1668	case '?':
1669
1670	/* A question mark matches one character. It does not match
1671	* a dot. At the end of the component (and before a dot),
1672	* it also matches zero characters. */
1673
1674	if (name != '.' && !IS_OS2_COMP_END(name))
1675	++name;
1676	++mask;
1677	break;
1678
1679	case '*':
1680
1681	/* An asterisk matches zero or more characters. In DOS
1682	* mode, dots are not matched. */
1683
1684	do
1685	{
1686	++mask;
1687	}
1688	while (mask == '');
1689	for (;;)
1690	{
1691	rc = match_comp_os2(mask, name, flags, has_dot);
1692	if (rc != FNM_NOMATCH)
1693	return rc;
1694	if (IS_OS2_COMP_END(*name))
1695	return FNM_NOMATCH;
1696	if (*name == '.' && (flags & FNM_STYLE_MASK) == FNM_DOS)
1697	return FNM_NOMATCH;
1698	++name;
1699	}
1700
1701	case '.':
1702
1703	/* A dot matches a dot. It also matches the implicit dot at
1704	* the end of a dot-less NAME. */
1705
1706	++mask;
1707	if (*name == '.')
1708	++name;
1709	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
1710	return FNM_NOMATCH;
1711	break;
1712
1713	default:
1714
1715	/* All other characters match themselves. */
1716
1717	if (flags & FNM_IGNORECASE)
1718	{
1719	if (tolower(mask) != tolower(name))
1720	return FNM_NOMATCH;
1721	}
1722	else
1723	{
1724	if (mask != name)
1725	return FNM_NOMATCH;
1726	}
1727	++mask;
1728	++name;
1729	break;
1730	}
1731	}
1732
1733	/*
1734	* match_comp:
1735	* compares a single component (directory name or file
1736	* name) of the paths, for all styles which need
1737	* component-by-component matching. MASK and NAME point
1738	* to the start of a component of the wildcard and the
1739	* name to be checked, respectively. Comparing stops at
1740	* the next separator. The FLAGS argument is the same as
1741	* that of fnmatch().
1742	*
1743	* Return FNM_MATCH iff MASK and NAME match.
1744	*
1745	* (c) 1994-1996 by Eberhard Mattes.
1746	*/
1747
1748	static int match_comp(const unsigned char *mask,
1749	const unsigned char *name,
1750	unsigned flags)
1751	{
1752	const unsigned char *s;
1753
1754	switch (flags & FNM_STYLE_MASK)
1755	{
1756	case FNM_OS2:
1757	case FNM_DOS:
1758
1759	/* For OS/2 and DOS styles, we add an implicit dot at the end of
1760	* the component if the component doesn't include a dot. */
1761
1762	s = name;
1763	while (!IS_OS2_COMP_END(s) && s != '.')
1764	++s;
1765	return match_comp_os2(mask, name, flags, *s == '.');
1766
1767	default:
1768	return FNM_ERR;
1769	}
1770	}
1771
1772	/* In Unix styles, / separates components of a path. This macro
1773	* returns true iff C is a separator. */
1774
1775	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1776
1777
1778	/* This macro returns true if C is at the end of a component of a
1779	* path. */
1780
1781	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
1782
1783	/*
1784	* match_unix:
1785	* matches complete paths for Unix styles.
1786	*
1787	* The FLAGS argument is the same as that of fnmatch().
1788	* COMP points to the start of the current component in
1789	* NAME. Return FNM_MATCH iff MASK and NAME match. The
1790	* backslash character is used for escaping ? and * unless
1791	* FNM_NOESCAPE is set.
1792	*
1793	* (c) 1994-1996 by Eberhard Mattes.
1794	*/
1795
1796	static int match_unix(const unsigned char *mask,
1797	const unsigned char *name,
1798	unsigned flags,
1799	const unsigned char *comp)
1800	{
1801	unsigned char c1, c2;
1802	char invert, matched;
1803	const unsigned char *start;
1804	int rc;
1805
1806	for (;;)
1807	switch (*mask)
1808	{
1809	case 0:
1810
1811	/* There must be no extra characters at the end of NAME when
1812	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1813	* in that case, NAME may point to a separator. */
1814
1815	if (*name == 0)
1816	return FNM_MATCH;
1817	if ((flags & FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1818	return FNM_MATCH;
1819	return FNM_NOMATCH;
1820
1821	case '?':
1822
1823	/* A question mark matches one character. It does not match
1824	* the component separator if FNM_PATHNAME is set. It does
1825	* not match a dot at the start of a component if FNM_PERIOD
1826	* is set. */
1827
1828	if (*name == 0)
1829	return FNM_NOMATCH;
1830	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1831	return FNM_NOMATCH;
1832	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1833	return FNM_NOMATCH;
1834	++mask;
1835	++name;
1836	break;
1837
1838	case '*':
1839
1840	/* An asterisk matches zero or more characters. It does not
1841	* match the component separator if FNM_PATHNAME is set. It
1842	* does not match a dot at the start of a component if
1843	* FNM_PERIOD is set. */
1844
1845	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1846	return FNM_NOMATCH;
1847	do
1848	{
1849	++mask;
1850	}
1851	while (mask == '');
1852	for (;;)
1853	{
1854	rc = match_unix(mask, name, flags, comp);
1855	if (rc != FNM_NOMATCH)
1856	return rc;
1857	if (*name == 0)
1858	return FNM_NOMATCH;
1859	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1860	return FNM_NOMATCH;
1861	++name;
1862	}
1863
1864	case '/':
1865
1866	/* Separators match only separators. If _FNM_PATHPREFIX is
1867	* set, a trailing separator in MASK is ignored at the end
1868	* of NAME. */
1869
1870	if (!(IS_UNIX_COMP_SEP(*name)
1871	\|\| ((flags & FNM_PATHPREFIX) && *name == 0
1872	&& (mask[1] == 0
1873	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1874	&& mask[2] == 0)))))
1875	return FNM_NOMATCH;
1876
1877	++mask;
1878	if (*name != 0)
1879	++name;
1880
1881	/* This is the beginning of a new component if FNM_PATHNAME
1882	* is set. */
1883
1884	if (flags & FNM_PATHNAME)
1885	comp = name;
1886	break;
1887
1888	case '[':
1889
1890	/* A set of characters. Always case-sensitive. */
1891
1892	if (*name == 0)
1893	return FNM_NOMATCH;
1894	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1895	return FNM_NOMATCH;
1896	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1897	return FNM_NOMATCH;
1898
1899	invert = 0;
1900	matched = 0;
1901	++mask;
1902
1903	/* If the first character is a ! or ^, the set matches all
1904	* characters not listed in the set. */
1905
1906	if (mask == '!' \|\| mask == '^')
1907	{
1908	++mask;
1909	invert = 1;
1910	}
1911
1912	/* Loop over all the characters of the set. The loop ends
1913	* if the end of the string is reached or if a ] is
1914	* encountered unless it directly follows the initial [ or
1915	* [-. */
1916
1917	start = mask;
1918	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
1919	{
1920	/* Get the next character which is optionally preceded
1921	* by a backslash. */
1922
1923	c1 = *mask++;
1924	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
1925	{
1926	if (*mask == 0)
1927	break;
1928	c1 = *mask++;
1929	}
1930
1931	/* Ranges of characters are written as a-z. Don't
1932	* forget to check for the end of the string and to
1933	* handle the backslash. If the character after - is a
1934	* ], it isn't a range. */
1935
1936	if (*mask == '-' && mask[1] != ']')
1937	{
1938	++mask; /* Skip the - character */
1939	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
1940	++mask;
1941	if (*mask == 0)
1942	break;
1943	c2 = *mask++;
1944	}
1945	else
1946	c2 = c1;
1947
1948	/* Now check whether this character or range matches NAME. */
1949
1950	if (c1 <= name && name <= c2)
1951	matched = 1;
1952	}
1953
1954	/* If the end of the string is reached before a ] is found,
1955	* back up to the [ and compare it to NAME. */
1956
1957	if (*mask == 0)
1958	{
1959	if (*name != '[')
1960	return FNM_NOMATCH;
1961	++name;
1962	mask = start;
1963	if (invert)
1964	--mask;
1965	}
1966	else
1967	{
1968	if (invert)
1969	matched = !matched;
1970	if (!matched)
1971	return FNM_NOMATCH;
1972	++mask; /* Skip the ] character */
1973	if (*name != 0)
1974	++name;
1975	}
1976	break;
1977
1978	case '\\':
1979	++mask;
1980	if (flags & FNM_NOESCAPE)
1981	{
1982	if (*name != '\\')
1983	return FNM_NOMATCH;
1984	++name;
1985	}
1986	else if (mask == '' \|\| *mask == '?')
1987	{
1988	if (mask != name)
1989	return FNM_NOMATCH;
1990	++mask;
1991	++name;
1992	}
1993	break;
1994
1995	default:
1996
1997	/* All other characters match themselves. */
1998
1999	if (flags & FNM_IGNORECASE)
2000	{
2001	if (tolower(mask) != tolower(name))
2002	return FNM_NOMATCH;
2003	}
2004	else
2005	{
2006	if (mask != name)
2007	return FNM_NOMATCH;
2008	}
2009	++mask;
2010	++name;
2011	break;
2012	}
2013	}
2014
2015	/*
2016	* _fnmatch_unsigned:
2017	* Check whether the path name NAME matches the wildcard MASK.
2018	*
2019	* Return:
2020	* -- 0 (FNM_MATCH) if it matches,
2021	* -- _FNM_NOMATCH if it doesn't,
2022	* -- FNM_ERR on error.
2023	*
2024	* The operation of this function is controlled by FLAGS.
2025	* This is an internal function, with unsigned arguments.
2026	*
2027	* (c) 1994-1996 by Eberhard Mattes.
2028	*/
2029
2030	static int _fnmatch_unsigned(const unsigned char *mask,
2031	const unsigned char *name,
2032	unsigned flags)
2033	{
2034	int m_drive,
2035	n_drive,
2036	rc;
2037
2038	/* Match and skip the drive name if present. */
2039
2040	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2041	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2042
2043	if (m_drive != n_drive)
2044	{
2045	if (m_drive == -1 \|\| n_drive == -1)
2046	return FNM_NOMATCH;
2047	if (!(flags & FNM_IGNORECASE))
2048	return FNM_NOMATCH;
2049	if (tolower(m_drive) != tolower(n_drive))
2050	return FNM_NOMATCH;
2051	}
2052
2053	if (m_drive != -1)
2054	mask += 2;
2055	if (n_drive != -1)
2056	name += 2;
2057
2058	/* Colons are not allowed in path names, except for the drive name,
2059	* which was skipped above. */
2060
2061	if (has_colon(mask) \|\| has_colon(name))
2062	return FNM_ERR;
2063
2064	/* The name "\\server\path" should not be matched by mask
2065	* "\\server\path". Ditto for /. /
2066
2067	switch (flags & FNM_STYLE_MASK)
2068	{
2069	case FNM_OS2:
2070	case FNM_DOS:
2071
2072	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2073	{
2074	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2075	return FNM_NOMATCH;
2076	name += 2;
2077	mask += 2;
2078	}
2079	break;
2080
2081	case FNM_POSIX:
2082
2083	if (name[0] == '/' && name[1] == '/')
2084	{
2085	int i;
2086
2087	name += 2;
2088	for (i = 0; i < 2; ++i)
2089	if (mask[0] == '/')
2090	++mask;
2091	else if (mask[0] == '\\' && mask[1] == '/')
2092	mask += 2;
2093	else
2094	return FNM_NOMATCH;
2095	}
2096
2097	/* In Unix styles, treating ? and * w.r.t. components is simple.
2098	* No need to do matching component by component. */
2099
2100	return match_unix(mask, name, flags, name);
2101	}
2102
2103	/* Now compare all the components of the path name, one by one.
2104	* Note that the path separator must not be enclosed in brackets. */
2105
2106	while (mask != 0 \|\| name != 0)
2107	{
2108
2109	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2110	* is reached even if there are components left in NAME. */
2111
2112	if (*mask == 0 && (flags & FNM_PATHPREFIX))
2113	return FNM_MATCH;
2114
2115	/* Compare a single component of the path name. */
2116
2117	rc = match_comp(mask, name, flags);
2118	if (rc != FNM_MATCH)
2119	return rc;
2120
2121	/* Skip to the next component or to the end of the path name. */
2122
2123	mask = skip_comp_os2(mask);
2124	name = skip_comp_os2(name);
2125	}
2126
2127	/* If we reached the ends of both strings, the names match. */
2128
2129	if (mask == 0 && name == 0)
2130	return FNM_MATCH;
2131
2132	/* The names do not match. */
2133
2134	return FNM_NOMATCH;
2135	}
2136
2137	/*
2138	*@@ strhMatchOS2:
2139	* this matches wildcards, similar to what DosEditName does.
2140	* However, this does not require a file to be present, but
2141	* works on strings only.
2142	*/
2143
2144	BOOL strhMatchOS2(const char pcszMask, // in: mask (e.g. ".txt")
2145	const char *pcszName) // in: string to check (e.g. "test.txt")
2146	{
2147	return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2148	(const unsigned char *)pcszName,
2149	FNM_OS2 \| FNM_IGNORECASE)
2150	== FNM_MATCH)
2151	);
2152	}
2153
2154	/*
2155	*@@ strhMatchExt:
2156	* like strhMatchOS2, but this takes all the flags
2157	* for input.
2158	*
2159	*@@added V0.9.15 (2001-09-14) [umoeller]
2160	*/
2161
2162	BOOL strhMatchExt(const char pcszMask, // in: mask (e.g. ".txt")
2163	const char *pcszName, // in: string to check (e.g. "test.txt")
2164	unsigned flags) // in: FNM_* flags
2165	{
2166	return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2167	(const unsigned char *)pcszName,
2168	flags)
2169	== FNM_MATCH)
2170	);
2171	}
2172
2173	/* ******************************************************************
2174	*
2175	* Fast string searches
2176	*
2177	********************************************************************/
2178
2179	#define ASSERT(a)
2180
2181	/*
2182	* The following code has been taken from the "Standard
2183	* Function Library", file sflfind.c, and only slightly
2184	* modified to conform to the rest of this file.
2185	*
2186	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2187	* Revised: 98/05/04
2188	*
2189	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2190	*
2191	* The SFL Licence allows incorporating SFL code into other
2192	* programs, as long as the copyright is reprinted and the
2193	* code is marked as modified, so this is what we do.
2194	*/
2195
2196	/*
2197	*@@ strhmemfind:
2198	* searches for a pattern in a block of memory using the
2199	* Boyer-Moore-Horspool-Sunday algorithm.
2200	*
2201	* The block and pattern may contain any values; you must
2202	* explicitly provide their lengths. If you search for strings,
2203	* use strlen() on the buffers.
2204	*
2205	* Returns a pointer to the pattern if found within the block,
2206	* or NULL if the pattern was not found.
2207	*
2208	* This algorithm needs a "shift table" to cache data for the
2209	* search pattern. This table can be reused when performing
2210	* several searches with the same pattern.
2211	*
2212	* "shift" must point to an array big enough to hold 256 (8**2)
2213	* "size_t" values.
2214	*
2215	* If (*repeat_find == FALSE), the shift table is initialized.
2216	* So on the first search with a given pattern, *repeat_find
2217	* should be FALSE. This function sets it to TRUE after the
2218	* shift table is initialised, allowing the initialisation
2219	* phase to be skipped on subsequent searches.
2220	*
2221	* This function is most effective when repeated searches are
2222	* made for the same pattern in one or more large buffers.
2223	*
2224	* Example:
2225	*
2226	+ PSZ pszHaystack = "This is a sample string.",
2227	+ pszNeedle = "string";
2228	+ size_t shift[256];
2229	+ BOOL fRepeat = FALSE;
2230	+
2231	+ PSZ pFound = strhmemfind(pszHaystack,
2232	+ strlen(pszHaystack), // block size
2233	+ pszNeedle,
2234	+ strlen(pszNeedle), // pattern size
2235	+ shift,
2236	+ &fRepeat);
2237	*
2238	* Taken from the "Standard Function Library", file sflfind.c.
2239	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2240	* Slightly modified by umoeller.
2241	*
2242	*@@added V0.9.3 (2000-05-08) [umoeller]
2243	*/
2244
2245	void* strhmemfind(const void *in_block, // in: block containing data
2246	size_t block_size, // in: size of block in bytes
2247	const void *in_pattern, // in: pattern to search for
2248	size_t pattern_size, // in: size of pattern block
2249	size_t *shift, // in/out: shift table (search buffer)
2250	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2251	{
2252	size_t byte_nbr, // Distance through block
2253	match_size; // Size of matched part
2254	const unsigned char
2255	*match_base = NULL, // Base of match of pattern
2256	*match_ptr = NULL, // Point within current match
2257	*limit = NULL; // Last potiental match point
2258	const unsigned char
2259	block = (unsigned char ) in_block, // Concrete pointer to block data
2260	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2261
2262	if ( (block == NULL)
2263	\|\| (pattern == NULL)
2264	\|\| (shift == NULL)
2265	)
2266	return (NULL);
2267
2268	// Pattern must be smaller or equal in size to string
2269	if (block_size < pattern_size)
2270	return (NULL); // Otherwise it's not found
2271
2272	if (pattern_size == 0) // Empty patterns match at start
2273	return ((void *)block);
2274
2275	// Build the shift table unless we're continuing a previous search
2276
2277	// The shift table determines how far to shift before trying to match
2278	// again, if a match at this point fails. If the byte after where the
2279	// end of our pattern falls is not in our pattern, then we start to
2280	// match again after that byte; otherwise we line up the last occurence
2281	// of that byte in our pattern under that byte, and try match again.
2282
2283	if (!repeat_find \|\| !*repeat_find)
2284	{
2285	for (byte_nbr = 0;
2286	byte_nbr < 256;
2287	byte_nbr++)
2288	shift[byte_nbr] = pattern_size + 1;
2289	for (byte_nbr = 0;
2290	byte_nbr < pattern_size;
2291	byte_nbr++)
2292	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2293
2294	if (repeat_find)
2295	*repeat_find = TRUE;
2296	}
2297
2298	// Search for the block, each time jumping up by the amount
2299	// computed in the shift table
2300
2301	limit = block + (block_size - pattern_size + 1);
2302	ASSERT (limit > block);
2303
2304	for (match_base = block;
2305	match_base < limit;
2306	match_base += shift[*(match_base + pattern_size)])
2307	{
2308	match_ptr = match_base;
2309	match_size = 0;
2310
2311	// Compare pattern until it all matches, or we find a difference
2312	while (*match_ptr++ == pattern[match_size++])
2313	{
2314	ASSERT (match_size <= pattern_size &&
2315	match_ptr == (match_base + match_size));
2316
2317	// If we found a match, return the start address
2318	if (match_size >= pattern_size)
2319	return ((void*)(match_base));
2320
2321	}
2322	}
2323	return (NULL); // Found nothing
2324	}
2325
2326	/*
2327	*@@ strhtxtfind:
2328	* searches for a case-insensitive text pattern in a string
2329	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2330	* pattern are null-terminated strings. Returns a pointer to the pattern
2331	* if found within the string, or NULL if the pattern was not found.
2332	* Will match strings irrespective of case. To match exact strings, use
2333	* strhfind(). Will not work on multibyte characters.
2334	*
2335	* Examples:
2336	+ char *result;
2337	+
2338	+ result = strhtxtfind ("AbracaDabra", "cad");
2339	+ if (result)
2340	+ puts (result);
2341	+
2342	* Taken from the "Standard Function Library", file sflfind.c.
2343	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2344	* Slightly modified.
2345	*
2346	*@@added V0.9.3 (2000-05-08) [umoeller]
2347	*/
2348
2349	char* strhtxtfind (const char *string, // String containing data
2350	const char *pattern) // Pattern to search for
2351	{
2352	size_t
2353	shift [256]; // Shift distance for each value
2354	size_t
2355	string_size,
2356	pattern_size,
2357	byte_nbr, // Index into byte array
2358	match_size; // Size of matched part
2359	const char
2360	*match_base = NULL, // Base of match of pattern
2361	*match_ptr = NULL, // Point within current match
2362	*limit = NULL; // Last potiental match point
2363
2364	ASSERT (string); // Expect non-NULL pointers, but
2365	ASSERT (pattern); // fail gracefully if not debugging
2366	if (string == NULL \|\| pattern == NULL)
2367	return (NULL);
2368
2369	string_size = strlen (string);
2370	pattern_size = strlen (pattern);
2371
2372	// Pattern must be smaller or equal in size to string
2373	if (string_size < pattern_size)
2374	return (NULL); // Otherwise it cannot be found
2375
2376	if (pattern_size == 0) // Empty string matches at start
2377	return (char *) string;
2378
2379	// Build the shift table
2380
2381	// The shift table determines how far to shift before trying to match
2382	// again, if a match at this point fails. If the byte after where the
2383	// end of our pattern falls is not in our pattern, then we start to
2384	// match again after that byte; otherwise we line up the last occurence
2385	// of that byte in our pattern under that byte, and try match again.
2386
2387	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2388	shift [byte_nbr] = pattern_size + 1;
2389
2390	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2391	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2392
2393	// Search for the string. If we don't find a match, move up by the
2394	// amount we computed in the shift table above, to find location of
2395	// the next potiental match.
2396
2397	limit = string + (string_size - pattern_size + 1);
2398	ASSERT (limit > string);
2399
2400	for (match_base = string;
2401	match_base < limit;
2402	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2403	{
2404	match_ptr = match_base;
2405	match_size = 0;
2406
2407	// Compare pattern until it all matches, or we find a difference
2408	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2409	{
2410	ASSERT (match_size <= pattern_size &&
2411	match_ptr == (match_base + match_size));
2412
2413	// If we found a match, return the start address
2414	if (match_size >= pattern_size)
2415	return ((char *)(match_base));
2416	}
2417	}
2418	return (NULL); // Found nothing
2419	}
2420

Note: See TracBrowser for help on using the repository browser.

Download in other formats: