Context Navigation

source: trunk/src/helpers/stringh.c@ 12

Visit:

Last change on this file since 12 was 12, checked in by umoeller, 25 years ago
Updated string helpers.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 93.6 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout XWorkplace.
6	*
7	* Usage: All OS/2 programs.
8	*
9	* Function prefixes (new with V0.81):
10	* -- strh* string helper functions.
11	*
12	* Note: Version numbering in this file relates to XWorkplace version
13	* numbering.
14	*
15	*@@header "helpers\stringh.h"
16	*/
17
18	/*
19	* Copyright (C) 1997-2000 Ulrich Mller.
20	* Parts Copyright (C) 1991-1999 iMatix Corporation.
21	* This file is part of the XWorkplace source package.
22	* XWorkplace is free software; you can redistribute it and/or modify
23	* it under the terms of the GNU General Public License as published
24	* by the Free Software Foundation, in version 2 as it comes in the
25	* "COPYING" file of the XWorkplace main distribution.
26	* This program is distributed in the hope that it will be useful,
27	* but WITHOUT ANY WARRANTY; without even the implied warranty of
28	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29	* GNU General Public License for more details.
30	*/
31
32	#define OS2EMX_PLAIN_CHAR
33	// this is needed for "os2emx.h"; if this is defined,
34	// emx will define PSZ as _signed_ char, otherwise
35	// as unsigned char
36
37	#define INCL_WINSHELLDATA
38	#include <os2.h>
39
40	#include <stdlib.h>
41	#include <stdio.h>
42	#include <string.h>
43	#include <ctype.h>
44	#include <math.h>
45
46	#include "setup.h" // code generation and debugging options
47
48	#include "helpers\stringh.h"
49	#include "helpers\xstring.h" // extended string helpers
50
51	#pragma hdrstop
52
53	/*
54	*@@category: Helpers\C helpers\String management
55	*/
56
57	/*
58	*@@ strhdup:
59	* like strdup, but this one
60	* doesn't crash if pszSource is NULL,
61	* but returns NULL also.
62	*
63	*@@added V0.9.0 [umoeller]
64	*/
65
66	PSZ strhdup(const char *pszSource)
67	{
68	if (pszSource)
69	return (strdup(pszSource));
70	else
71	return (0);
72	}
73
74	/*
75	*@@ strhistr:
76	* like strstr, but case-insensitive.
77	*
78	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
79	*/
80
81	PSZ strhistr(const char string1, const char string2)
82	{
83	PSZ prc = NULL;
84
85	if ((string1) && (string2))
86	{
87	PSZ pszSrchIn = strdup(string1);
88	PSZ pszSrchFor = strdup(string2);
89
90	if ((pszSrchIn) && (pszSrchFor))
91	{
92	strupr(pszSrchIn);
93	strupr(pszSrchFor);
94
95	prc = strstr(pszSrchIn, pszSrchFor);
96	if (prc)
97	{
98	// prc now has the first occurence of the string,
99	// but in pszSrchIn; we need to map this
100	// return value to the original string
101	prc = (prc-pszSrchIn) // offset in pszSrchIn
102	+ (PSZ)string1;
103	}
104	}
105	if (pszSrchFor)
106	free(pszSrchFor);
107	if (pszSrchIn)
108	free(pszSrchIn);
109	}
110	return (prc);
111	}
112
113	/*
114	*@@ strhncpy0:
115	* like strncpy, but always appends a 0 character.
116	*/
117
118	ULONG strhncpy0(PSZ pszTarget,
119	const char *pszSource,
120	ULONG cbSource)
121	{
122	ULONG ul = 0;
123	PSZ pTarget = pszTarget,
124	pSource = (PSZ)pszSource;
125
126	for (ul = 0; ul < cbSource; ul++)
127	if (*pSource)
128	pTarget++ = pSource++;
129	else
130	break;
131	*pTarget = 0;
132
133	return (ul);
134	}
135
136	/*
137	* strhCount:
138	* this counts the occurences of c in pszSearch.
139	*/
140
141	ULONG strhCount(const char *pszSearch,
142	CHAR c)
143	{
144	PSZ p = (PSZ)pszSearch;
145	ULONG ulCount = 0;
146	while (TRUE)
147	{
148	p = strchr(p, c);
149	if (p)
150	{
151	ulCount++;
152	p++;
153	}
154	else
155	break;
156	}
157	return (ulCount);
158	}
159
160	/*
161	*@@ strhIsDecimal:
162	* returns TRUE if psz consists of decimal digits only.
163	*/
164
165	BOOL strhIsDecimal(PSZ psz)
166	{
167	PSZ p = psz;
168	while (*p != 0)
169	{
170	if (isdigit(*p) == 0)
171	return (FALSE);
172	p++;
173	}
174
175	return (TRUE);
176	}
177
178	/*
179	*@@ strhSubstr:
180	* this creates a new PSZ containing the string
181	* from pBegin to pEnd, excluding the pEnd character.
182	* The new string is null-terminated.
183	*
184	* Example:
185	+ "1234567890"
186	+ ^ ^
187	+ p1 p2
188	+ strhSubstr(p1, p2)
189	* would return a new string containing "2345678".
190	*/
191
192	PSZ strhSubstr(const char pBegin, const char pEnd)
193	{
194	ULONG cbSubstr = (pEnd - pBegin);
195	PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
196	strhncpy0(pszSubstr, pBegin, cbSubstr);
197	return (pszSubstr);
198	}
199
200	/*
201	*@@ strhExtract:
202	* searches pszBuf for the cOpen character and returns
203	* the data in between cOpen and cClose, excluding
204	* those two characters, in a newly allocated buffer
205	* which you must free() afterwards.
206	*
207	* Spaces and newlines/linefeeds are skipped.
208	*
209	* If the search was successful, the new buffer
210	* is returned and, if (ppEnd != NULL), *ppEnd points
211	* to the first character after the cClose character
212	* found in the buffer.
213	*
214	* If the search was not successful, NULL is
215	* returned, and *ppEnd is unchanged.
216	*
217	* If another cOpen character is found before
218	* cClose, matching cClose characters will be skipped.
219	* You can therefore nest the cOpen and cClose
220	* characters.
221	*
222	* This function ignores cOpen and cClose characters
223	* in C-style comments and strings surrounded by
224	* double quotes.
225	*
226	* Example:
227	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
228	+ pEnd;
229	+ strhExtract(pszBuf,
230	+ '{', '}',
231	+ &pEnd)
232	* would return a new buffer containing " --blah-- ",
233	* and ppEnd would afterwards point to the space
234	* before "next" in the static buffer.
235	*
236	*@@added V0.9.0 [umoeller]
237	*/
238
239	PSZ strhExtract(PSZ pszBuf, // in: search buffer
240	CHAR cOpen, // in: opening char
241	CHAR cClose, // in: closing char
242	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
243	{
244	PSZ pszReturn = NULL;
245
246	if (pszBuf)
247	{
248	PSZ pOpen = strchr(pszBuf, cOpen);
249	if (pOpen)
250	{
251	// opening char found:
252	// now go thru the whole rest of the buffer
253	PSZ p = pOpen+1;
254	LONG lLevel = 1; // if this goes 0, we're done
255	while (*p)
256	{
257	if (*p == cOpen)
258	lLevel++;
259	else if (*p == cClose)
260	{
261	lLevel--;
262	if (lLevel <= 0)
263	{
264	// matching closing bracket found:
265	// extract string
266	pszReturn = strhSubstr(pOpen+1, // after cOpen
267	p); // excluding cClose
268	if (ppEnd)
269	*ppEnd = p+1;
270	break; // while (*p)
271	}
272	}
273	else if (*p == '\"')
274	{
275	// beginning of string:
276	PSZ p2 = p+1;
277	// find end of string
278	while ((p2) && (p2 != '\"'))
279	p2++;
280
281	if (*p2 == '\"')
282	// closing quote found:
283	// search on after that
284	p = p2; // raised below
285	else
286	break; // while (*p)
287	}
288
289	p++;
290	}
291	}
292	}
293
294	return (pszReturn);
295	}
296
297	/*
298	*@@ strhQuote:
299	* similar to strhExtract, except that
300	* opening and closing chars are the same,
301	* and therefore no nesting is possible.
302	* Useful for extracting stuff between
303	* quotes.
304	*
305	*@@added V0.9.0 [umoeller]
306	*/
307
308	PSZ strhQuote(PSZ pszBuf,
309	CHAR cQuote,
310	PSZ *ppEnd)
311	{
312	PSZ pszReturn = NULL,
313	p1 = NULL;
314	if ((p1 = strchr(pszBuf, cQuote)))
315	{
316	PSZ p2 = strchr(p1+1, cQuote);
317	if (p2)
318	{
319	pszReturn = strhSubstr(p1+1, p2);
320	if (ppEnd)
321	// store closing char
322	*ppEnd = p2 + 1;
323	}
324	}
325
326	return (pszReturn);
327	}
328
329	/*
330	*@@ strhStrip:
331	* removes all double spaces.
332	* This copies within the "psz" buffer.
333	* If any double spaces are found, the
334	* string will be shorter than before,
335	* but the buffer is _not_ reallocated,
336	* so there will be unused bytes at the
337	* end.
338	*
339	* Returns the number of spaces removed.
340	*
341	*@@added V0.9.0 [umoeller]
342	*/
343
344	ULONG strhStrip(PSZ psz) // in/out: string
345	{
346	PSZ p;
347	ULONG cb = strlen(psz),
348	ulrc = 0;
349
350	for (p = psz; p < psz+cb; p++)
351	{
352	if ((p == ' ') && ((p+1) == ' '))
353	{
354	PSZ p2 = p;
355	while (*p2)
356	{
357	p2 = (p2+1);
358	p2++;
359	}
360	cb--;
361	p--;
362	ulrc++;
363	}
364	}
365	return (ulrc);
366	}
367
368	/*
369	*@@ strhins:
370	* this inserts one string into another.
371	*
372	* pszInsert is inserted into pszBuffer at offset
373	* ulInsertOfs (which counts from 0).
374	*
375	* A newly allocated string is returned. pszBuffer is
376	* not changed. The new string should be free()'d after
377	* use.
378	*
379	* Upon errors, NULL is returned.
380	*
381	*@@changed V0.9.0 [umoeller]: completely rewritten.
382	*/
383
384	PSZ strhins(const char *pcszBuffer,
385	ULONG ulInsertOfs,
386	const char *pcszInsert)
387	{
388	PSZ pszNew = NULL;
389
390	if ((pcszBuffer) && (pcszInsert))
391	{
392	do {
393	ULONG cbBuffer = strlen(pcszBuffer);
394	ULONG cbInsert = strlen(pcszInsert);
395
396	// check string length
397	if (ulInsertOfs > cbBuffer + 1)
398	break; // do
399
400	// OK, let's go.
401	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
402
403	// copy stuff before pInsertPos
404	memcpy(pszNew,
405	pcszBuffer,
406	ulInsertOfs);
407	// copy string to be inserted
408	memcpy(pszNew + ulInsertOfs,
409	pcszInsert,
410	cbInsert);
411	// copy stuff after pInsertPos
412	strcpy(pszNew + ulInsertOfs + cbInsert,
413	pcszBuffer + ulInsertOfs);
414	} while (FALSE);
415	}
416
417	return (pszNew);
418	}
419
420	/*
421	*@@ strhrpl:
422	* wrapper around xstrrpl to work with C strings.
423	* Note that *ppszBuf can get reallocated and must
424	* be free()'able.
425	*
426	* Use of this wrapper is not recommended because
427	* it is considerably slower than xstrrpl.
428	*
429	*@@added V0.9.6 (2000-11-01) [umoeller]
430	*/
431
432	ULONG strhrpl(PSZ *ppszBuf, // in/out: string
433	ULONG ulOfs, // in: where to begin search (0 = start)
434	const char *pcszSearch, // in: search string; cannot be NULL
435	const char *pcszReplace, // in: replacement string; cannot be NULL
436	PULONG pulAfterOfs) // out: offset where found (ptr can be NULL)
437	{
438	ULONG ulrc = 0;
439	XSTRING xstrBuf,
440	xstrFind,
441	xstrReplace;
442	xstrInit(&xstrBuf, 0);
443	xstrset(&xstrBuf, *ppszBuf);
444	xstrInit(&xstrFind, 0);
445	xstrset(&xstrFind, (PSZ)pcszSearch);
446	xstrInit(&xstrReplace, 0);
447	xstrset(&xstrReplace, (PSZ)pcszReplace);
448
449	if (ulrc = xstrrpl(&xstrBuf, ulOfs, &xstrFind, &xstrReplace, pulAfterOfs))
450	// replaced:
451	*ppszBuf = xstrBuf.psz;
452
453	return (ulrc);
454	}
455
456	/*
457	* strhWords:
458	* returns the no. of words in "psz".
459	* A string is considered a "word" if
460	* it is surrounded by spaces only.
461	*
462	*@@added V0.9.0 [umoeller]
463	*/
464
465	ULONG strhWords(PSZ psz)
466	{
467	PSZ p;
468	ULONG cb = strlen(psz),
469	ulWords = 0;
470	if (cb > 1)
471	{
472	ulWords = 1;
473	for (p = psz; p < psz+cb; p++)
474	if (*p == ' ')
475	ulWords++;
476	}
477	return (ulWords);
478	}
479
480	/*
481	*@@ strhThousandsULong:
482	* converts a ULONG into a decimal string, while
483	* inserting thousands separators into it. Specify
484	* the separator char in cThousands.
485	* Returns pszTarget so you can use it directly
486	* with sprintf and the "%s" flag.
487	* For cThousands, you should use the data in
488	* OS2.INI ("PM_National" application), which is
489	* always set according to the "Country" object.
490	* Use strhThousandsDouble for "double" values.
491	*/
492
493	PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
494	ULONG ul, // in: decimal to convert
495	CHAR cThousands) // in: separator char (e.g. '.')
496	{
497	USHORT ust, uss, usc;
498	CHAR szTemp[40];
499	sprintf(szTemp, "%d", ul);
500
501	ust = 0;
502	usc = strlen(szTemp);
503	for (uss = 0; uss < usc; uss++)
504	{
505	if (uss)
506	if (((usc - uss) % 3) == 0)
507	{
508	pszTarget[ust] = cThousands;
509	ust++;
510	}
511	pszTarget[ust] = szTemp[uss];
512	ust++;
513	}
514	pszTarget[ust] = '\0';
515
516	return (pszTarget);
517	}
518
519	/*
520	*@@ strhThousandsDouble:
521	* like strhThousandsULong, but for a "double"
522	* value. Note that after-comma values are truncated.
523	*/
524
525	PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
526	{
527	USHORT ust, uss, usc;
528	CHAR szTemp[40];
529	sprintf(szTemp, "%.0f", floor(dbl));
530
531	ust = 0;
532	usc = strlen(szTemp);
533	for (uss = 0; uss < usc; uss++)
534	{
535	if (uss)
536	if (((usc - uss) % 3) == 0)
537	{
538	pszTarget[ust] = cThousands;
539	ust++;
540	}
541	pszTarget[ust] = szTemp[uss];
542	ust++;
543	}
544	pszTarget[ust] = '\0';
545
546	return (pszTarget);
547	}
548
549	/*
550	*@@ strhFileDate:
551	* converts file date data to a string (to pszBuf).
552	* You can pass any FDATE structure to this function,
553	* which are returned in those FILEFINDBUF* or
554	* FILESTATUS* structs by the Dos* functions.
555	*
556	* ulDateFormat is the PM setting for the date format,
557	* as set in the "Country" object, and can be queried using
558	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
559	*
560	* meaning:
561	* -- 0 mm.dd.yyyy (English)
562	* -- 1 dd.mm.yyyy (e.g. German)
563	* -- 2 yyyy.mm.dd (Japanese, ISO)
564	* -- 3 yyyy.dd.mm
565	*
566	* cDateSep is used as a date separator (e.g. '.').
567	* This can be queried using:
568	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
569	*
570	* Alternatively, you can query all the country settings
571	* at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
572	*
573	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
574	*/
575
576	VOID strhFileDate(PSZ pszBuf, // out: string returned
577	FDATE *pfDate, // in: date information
578	ULONG ulDateFormat, // in: date format (0-3)
579	CHAR cDateSep) // in: date separator (e.g. '.')
580	{
581	DATETIME dt;
582	dt.day = pfDate->day;
583	dt.month = pfDate->month;
584	dt.year = pfDate->year + 1980;
585
586	strhDateTime(pszBuf,
587	NULL, // no time
588	&dt,
589	ulDateFormat,
590	cDateSep,
591	0, 0); // no time
592	}
593
594	/*
595	*@@ strhFileTime:
596	* converts file time data to a string (to pszBuf).
597	* You can pass any FTIME structure to this function,
598	* which are returned in those FILEFINDBUF* or
599	* FILESTATUS* structs by the Dos* functions.
600	*
601	* ulTimeFormat is the PM setting for the time format,
602	* as set in the "Country" object, and can be queried using
603	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
604	* meaning:
605	* -- 0 12-hour clock
606	* -- >0 24-hour clock
607	*
608	* cDateSep is used as a time separator (e.g. ':').
609	* This can be queried using:
610	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
611	*
612	* Alternatively, you can query all the country settings
613	* at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
614	*
615	*@@changed 99-03-15 fixed 12-hour crash
616	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
617	*/
618
619	VOID strhFileTime(PSZ pszBuf, // out: string returned
620	FTIME *pfTime, // in: time information
621	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
622	CHAR cTimeSep) // in: time separator (e.g. ':')
623	{
624	DATETIME dt;
625	dt.hours = pfTime->hours;
626	dt.minutes = pfTime->minutes;
627	dt.seconds = pfTime->twosecs * 2;
628
629	strhDateTime(NULL, // no date
630	pszBuf,
631	&dt,
632	0, 0, // no date
633	ulTimeFormat,
634	cTimeSep);
635	}
636
637	/*
638	*@@ strhDateTime:
639	* converts Control Programe DATETIME info
640	* to two strings. See strhFileDate and strhFileTime
641	* for more detailed parameter descriptions.
642	*
643	*@@added V0.9.0 (99-11-07) [umoeller]
644	*/
645
646	VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
647	PSZ pszTime, // out: time string returned (can be NULL)
648	DATETIME *pDateTime, // in: date/time information
649	ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
650	CHAR cDateSep, // in: date separator (e.g. '.')
651	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
652	CHAR cTimeSep) // in: time separator (e.g. ':')
653	{
654	if (pszDate)
655	{
656	switch (ulDateFormat)
657	{
658	case 0: // mm.dd.yyyy (English)
659	sprintf(pszDate, "%02d%c%02d%c%04d",
660	pDateTime->month,
661	cDateSep,
662	pDateTime->day,
663	cDateSep,
664	pDateTime->year);
665	break;
666
667	case 1: // dd.mm.yyyy (e.g. German)
668	sprintf(pszDate, "%02d%c%02d%c%04d",
669	pDateTime->day,
670	cDateSep,
671	pDateTime->month,
672	cDateSep,
673	pDateTime->year);
674	break;
675
676	case 2: // yyyy.mm.dd (Japanese)
677	sprintf(pszDate, "%04d%c%02d%c%02d",
678	pDateTime->year,
679	cDateSep,
680	pDateTime->month,
681	cDateSep,
682	pDateTime->day);
683	break;
684
685	default: // yyyy.dd.mm
686	sprintf(pszDate, "%04d%c%02d%c%02d",
687	pDateTime->year,
688	cDateSep,
689	pDateTime->day,
690	cDateSep,
691	pDateTime->month);
692	break;
693	}
694	}
695
696	if (pszTime)
697	{
698	if (ulTimeFormat == 0)
699	{
700	// for 12-hour clock, we need additional INI data
701	CHAR szAMPM[10] = "err";
702
703	if (pDateTime->hours > 12)
704	{
705	// > 12h: PM.
706
707	// Note: 12:xx noon is 12 AM, not PM (even though
708	// AM stands for "ante meridiam", but English is just
709	// not logical), so that's handled below.
710
711	PrfQueryProfileString(HINI_USER,
712	"PM_National",
713	"s2359", // key
714	"PM", // default
715	szAMPM, sizeof(szAMPM)-1);
716	sprintf(pszTime, "%02d%c%02d%c%02d %s",
717	// leave 12 == 12 (not 0)
718	pDateTime->hours % 12,
719	cTimeSep,
720	pDateTime->minutes,
721	cTimeSep,
722	pDateTime->seconds,
723	szAMPM);
724	}
725	else
726	{
727	// <= 12h: AM
728	PrfQueryProfileString(HINI_USER,
729	"PM_National",
730	"s1159", // key
731	"AM", // default
732	szAMPM, sizeof(szAMPM)-1);
733	sprintf(pszTime, "%02d%c%02d%c%02d %s",
734	pDateTime->hours,
735	cTimeSep,
736	pDateTime->minutes,
737	cTimeSep,
738	pDateTime->seconds,
739	szAMPM);
740	}
741	}
742	else
743	// 24-hour clock
744	sprintf(pszTime, "%02d%c%02d%c%02d",
745	pDateTime->hours,
746	cTimeSep,
747	pDateTime->minutes,
748	cTimeSep,
749	pDateTime->seconds);
750	}
751	}
752
753	/*
754	*@@ strhGetWord:
755	* finds word boundaries.
756	*
757	* *ppszStart is used as the beginning of the
758	* search.
759	*
760	* If a word is found, *ppszStart is set to
761	* the first character of the word which was
762	* found and *ppszEnd receives the address
763	* of the first character _after_ the word,
764	* which is probably a space or a \n or \r char.
765	* We then return TRUE.
766	*
767	* The search is stopped if a null character
768	* is found or pLimit is reached. In that case,
769	* FALSE is returned.
770	*
771	*@@added V0.9.1 (2000-02-13) [umoeller]
772	*/
773
774	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
775	// out: start of word (if TRUE is returned)
776	const char pLimit, // in: ptr to last char after ppszStart to be
777	// searched; if the word does not end before
778	// or with this char, FALSE is returned
779	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
780	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
781	PSZ *ppszEnd) // out: first char _after_ word
782	// (if TRUE is returned)
783	{
784	// characters after which a word can be started
785	// const char *pcszBeginChars = "\x0d\x0a ";
786	// const char *pcszEndChars = "\x0d\x0a /-";
787
788	PSZ pStart = *ppszStart;
789
790	// find start of word
791	while ( (pStart < (PSZ)pLimit)
792	&& (strchr(pcszBeginChars, *pStart))
793	)
794	// if char is a "before word" char: go for next
795	pStart++;
796
797	if (pStart < (PSZ)pLimit)
798	{
799	// found a valid "word start" character
800	// (which is not in pcszBeginChars):
801
802	// find end of word
803	PSZ pEndOfWord = pStart;
804	while ( (pEndOfWord <= (PSZ)pLimit)
805	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
806	)
807	// if char is not an "end word" char: go for next
808	pEndOfWord++;
809
810	if (pEndOfWord <= (PSZ)pLimit)
811	{
812	// whoa, got a word:
813	*ppszStart = pStart;
814	*ppszEnd = pEndOfWord;
815	return (TRUE);
816	}
817	}
818
819	return (FALSE);
820	}
821
822	/*
823	*@@ strhFindWord:
824	* searches for pszSearch in pszBuf, which is
825	* returned if found (or NULL if not).
826	*
827	* As opposed to strstr, this finds pszSearch
828	* only if it is a "word". A search string is
829	* considered a word if the character _before_
830	* it is in pcszBeginChars and the char _after_
831	* it is in pcszEndChars.
832	*
833	* Example:
834	+ strhFindWord("This is an example.", "is");
835	+ returns ...........^ this, but not the "is" in "This".
836	*
837	* The algorithm here uses strstr to find pszSearch in pszBuf
838	* and performs additional "is-word" checks for each item found.
839	* With VAC++ 3.0, this is still much faster than searching
840	* words first and then comparing each word with pszSearch.
841	* I've tried it that way too, and that took nearly double as
842	* long. Apparently, the VAC++ runtime library uses some
843	* optimized search algorithm here, so we better use that one.
844	*
845	*@@added V0.9.0 (99-11-08) [umoeller]
846	*@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
847	*/
848
849	PSZ strhFindWord(const char *pszBuf,
850	const char *pszSearch,
851	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
852	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
853	{
854	PSZ pszReturn = 0;
855	ULONG cbBuf = strlen(pszBuf),
856	cbSearch = strlen(pszSearch);
857
858	if ((cbBuf) && (cbSearch))
859	{
860	const char *p = pszBuf;
861
862	/* // go thru all characters
863	while (*p)
864	{
865	// check if current character is either the
866	// very first or a "begin word" character
867	if ( (p == pszBuf)
868	\|\| (strchr(pcszBeginChars, *p) == 0)
869	)
870	{
871	// yes: go for next
872	if (*(++p))
873	{
874	// compare with search string
875	if (strcmp(p, pszSearch) == 0)
876	{
877	// is the same:
878	// check if still in buffer
879	if (p < pszBuf + cbBuf)
880	{
881	CHAR cAfterEndOfWord = *(p + cbSearch);
882	if (cAfterEndOfWord == 0)
883	{
884	// end of string:
885	// that's ok
886	pszReturn = (PSZ)p;
887	break;
888	}
889	else
890	{
891	// check if in "end of word" list
892	char *pc2 = strchr(pcszEndChars, cAfterEndOfWord);
893	if (pc2)
894	// OK, is end char: avoid doubles of that char,
895	// but allow spaces
896	if ( (cAfterEndOfWord+1 != *pc2)
897	\|\| (cAfterEndOfWord+1 == ' ')
898	\|\| (cAfterEndOfWord+1 == 0)
899	)
900	{
901	// end of string:
902	// that's ok
903	pszReturn = (PSZ)p;
904	break;
905	}
906	}
907	}
908	}
909	}
910	else
911	// end of string:
912	break;
913	}
914
915	++p;
916	} // end while
917	*/
918
919	do // while p
920	{
921	p = strstr(p, pszSearch);
922	if (p)
923	{
924	// string found:
925	// check if that's a word
926
927	// check previous char
928	if ( (p == pszBuf)
929	\|\| (strchr(pcszBeginChars, *(p-1)))
930	)
931	{
932	// OK, valid begin char:
933	BOOL fEndOK = FALSE;
934	// check end char
935	CHAR cNextChar = *(p + cbSearch);
936	if (cNextChar == 0)
937	fEndOK = TRUE;
938	else
939	{
940	char *pc = strchr(pcszEndChars, cNextChar);
941	if (pc)
942	// OK, is end char: avoid doubles of that char,
943	// but allow spaces
944	if ( (cNextChar+1 != *pc)
945	\|\| (cNextChar+1 == ' ')
946	\|\| (cNextChar+1 == 0)
947	)
948	fEndOK = TRUE;
949	}
950
951	if (fEndOK)
952	{
953	// valid end char:
954	pszReturn = (PSZ)p;
955	break;
956	}
957	}
958	p += cbSearch;
959	}
960	} while (p);
961
962	}
963	return (pszReturn);
964	}
965
966	/*
967	*@@ strhFindEOL:
968	* returns a pointer to the next \r, \n or null character
969	* following pszSearchIn. Stores the offset in *pulOffset.
970	*
971	* This should never return NULL because at some point,
972	* there will be a null byte in your string.
973	*
974	*@@added V0.9.4 (2000-07-01) [umoeller]
975	*/
976
977	PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
978	PULONG pulOffset) // out: offset (ptr can be NULL)
979	{
980	PSZ p = pszSearchIn,
981	prc = NULL;
982	while (TRUE)
983	{
984	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
985	{
986	prc = p;
987	break;
988	}
989	p++;
990	}
991
992	if (pulOffset)
993	*pulOffset = prc - pszSearchIn;
994	return (prc);
995	}
996
997	/*
998	*@@ strhFindNextLine:
999	* like strhFindEOL, but this returns the character
1000	* _after_ \r or \n. Note that this might return
1001	* a pointer to terminating NULL character also.
1002	*/
1003
1004	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1005	{
1006	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1007	// pEOL now points to the \r char or the terminating 0 byte;
1008	// if not null byte, advance pointer
1009	PSZ pNextLine = pEOL;
1010	if (*pNextLine == '\r')
1011	pNextLine++;
1012	if (*pNextLine == '\n')
1013	pNextLine++;
1014	if (pulOffset)
1015	*pulOffset = pNextLine - pszSearchIn;
1016	return (pNextLine);
1017	}
1018
1019	/*
1020	*@@ strhFindKey:
1021	* finds pszKey in pszSearchIn; similar to strhistr,
1022	* but this one makes sure the key is at the beginning
1023	* of a line. Spaces before the key are tolerated.
1024	* Returns NULL if the key was not found.
1025	*
1026	* Used by strhGetParameter/strhSetParameter; useful
1027	* for analyzing CONFIG.SYS settings.
1028	*
1029	*@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1030	*@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1031	*/
1032
1033	PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1034	const char *pcszKey, // in: key to search for
1035	PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1036	// can be NULL if not needed
1037	{
1038	const char *p = NULL;
1039	PSZ pReturn = NULL;
1040	// BOOL fFound = FALSE;
1041
1042	p = pcszSearchIn;
1043	do {
1044	p = strhistr(p, pcszKey);
1045
1046	if ((p) && (p >= pcszSearchIn))
1047	{
1048	// make sure the key is at the beginning of a line
1049	// by going backwards until we find a char != " "
1050	const char *p2 = p;
1051	while ( (*p2 == ' ')
1052	&& (p2 > pcszSearchIn)
1053	)
1054	p2--;
1055
1056	// if previous char is an EOL sign, go on
1057	if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1058	\|\| (*(p2-1) == '\r')
1059	\|\| (*(p2-1) == '\n')
1060	)
1061	{
1062	// now check whether the char after the search
1063	// is a "=" char
1064	// ULONG cbKey = strlen(pszKey);
1065
1066	// tolerate spaces before "="
1067	/* PSZ p3 = p;
1068	while (*(p3+cbKey) == ' ')
1069	p3++;
1070
1071	if ((p3+cbKey) == '=') /
1072	{
1073	// found:
1074	pReturn = (PSZ)p; // go on, p contains found key
1075
1076	// test for all upper case?
1077	if (pfIsAllUpperCase)
1078	{
1079	ULONG cbKey2 = strlen(pcszKey),
1080	ul = 0;
1081	*pfIsAllUpperCase = TRUE;
1082	for (ul = 0; ul < cbKey2; ul++)
1083	if (islower(*(p+ul)))
1084	{
1085	*pfIsAllUpperCase = FALSE;
1086	break; // for
1087	}
1088	}
1089
1090	break; // do
1091	}
1092	} // else search next key
1093
1094	p++; // search on after this key
1095	}
1096	} while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1097
1098	return (pReturn);
1099	}
1100
1101	/*
1102	*@@ strhGetParameter:
1103	* searches pszSearchIn for the key pszKey; if found, it
1104	* returns a pointer to the following characters in pszSearchIn
1105	* and, if pszCopyTo != NULL, copies the rest of the line to
1106	* that buffer, of which cbCopyTo specified the size.
1107	* If the key is not found, NULL is returned.
1108	* String search is done by calling strhFindKey.
1109	* This is useful for querying CONFIG.SYS settings.
1110	*
1111	* <B>Example:</B> this would return "YES" if you searched
1112	* for "PAUSEONERROR=", and "PAUSEONERROR=YES" existed in pszSearchIn.
1113	*/
1114
1115	PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1116	const char *pcszKey, // in: key to search for
1117	PSZ pszCopyTo, // out: key value
1118	ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1119	{
1120	PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1121	prc = NULL;
1122	if (p)
1123	{
1124	prc = p + strlen(pcszKey);
1125	if (pszCopyTo)
1126	// copy to pszCopyTo
1127	{
1128	ULONG cb;
1129	PSZ pEOL = strhFindEOL(prc, &cb);
1130	if (pEOL)
1131	{
1132	if (cb > cbCopyTo)
1133	cb = cbCopyTo-1;
1134	strhncpy0(pszCopyTo, prc, cb);
1135	}
1136	}
1137	}
1138
1139	return (prc);
1140	}
1141
1142	/*
1143	*@@ strhSetParameter:
1144	* searches *ppszBuf for the key pszKey; if found, it
1145	* replaces the characters following this key up to the
1146	* end of the line with pszParam. If pszKey is not found in
1147	* *ppszBuf, it is appended to the file in a new line.
1148	*
1149	* If any changes are made, *ppszBuf is re-allocated.
1150	*
1151	* This function searches w/out case sensitivity.
1152	*
1153	* Returns a pointer to the new parameter inside the buffer.
1154	*
1155	@@changed V0.9.0 [umoeller]: changed function prototype to PSZ ppszSearchIn
1156	*/
1157
1158	PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1159	const char *pcszKey, // in: key to search for
1160	PSZ pszNewParam, // in: new parameter to set for key
1161	BOOL fRespectCase) // in: if TRUE, pszNewParam will
1162	// be converted to upper case if the found key is
1163	// in upper case also. pszNewParam should be in
1164	// lower case if you use this.
1165	{
1166	BOOL fIsAllUpperCase = FALSE;
1167	PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1168	prc = NULL;
1169
1170	if (pKey)
1171	{
1172	// key found in file:
1173	// replace existing parameter
1174	PSZ pOldParam = pKey + strlen(pcszKey);
1175
1176	prc = pOldParam;
1177	// pOldParam now has the old parameter, which we
1178	// will overwrite now
1179
1180	if (pOldParam)
1181	{
1182	ULONG cbOldParam;
1183	PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1184	// pEOL now has first end-of-line after the parameter
1185
1186	if (pEOL)
1187	{
1188	XSTRING strBuf,
1189	strFind,
1190	strReplace;
1191
1192	PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1193	strncpy(pszOldCopy, pOldParam, cbOldParam);
1194	pszOldCopy[cbOldParam] = '\0';
1195
1196	xstrInit(&strBuf, 0);
1197	xstrset(&strBuf, *ppszBuf); // this must not be freed!
1198	xstrInit(&strFind, 0);
1199	xstrset(&strFind, pszOldCopy); // this must not be freed!
1200	xstrInit(&strReplace, 0);
1201	xstrset(&strReplace, pszNewParam); // this must not be freed!
1202
1203	// check for upper case desired?
1204	if (fRespectCase)
1205	if (fIsAllUpperCase)
1206	strupr(pszNewParam);
1207
1208	xstrrpl(&strBuf, 0, &strFind, &strReplace, NULL);
1209
1210	free(pszOldCopy);
1211
1212	*ppszBuf = strBuf.psz;
1213	}
1214	}
1215	}
1216	else
1217	{
1218	PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1219	+ strlen(pcszKey)
1220	+ strlen(pszNewParam)
1221	+ 5); // 2 * \r\n + null byte
1222	// key not found: append to end of file
1223	sprintf(pszNew, "%s\r\n%s%s\r\n",
1224	*ppszBuf, pcszKey, pszNewParam);
1225	free(*ppszBuf);
1226	*ppszBuf = pszNew;
1227	}
1228
1229	return (prc);
1230	}
1231
1232	/*
1233	*@@ strhDeleteLine:
1234	* this deletes the line in pszSearchIn which starts with
1235	* the key pszKey. Returns TRUE if the line was found and
1236	* deleted.
1237	*
1238	* This copies within pszSearchIn.
1239	*/
1240
1241	BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1242	PSZ pszKey) // in: key to find
1243	{
1244	BOOL fIsAllUpperCase = FALSE;
1245	PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1246	BOOL brc = FALSE;
1247
1248	if (pKey) {
1249	PSZ pEOL = strhFindEOL(pKey, NULL);
1250	// pEOL now has first end-of-line after the key
1251	if (pEOL)
1252	{
1253	// delete line by overwriting it with
1254	// the next line
1255	strcpy(pKey, pEOL+2);
1256	}
1257	else
1258	{
1259	// EOL not found: we must be at the end of the file
1260	*pKey = '\0';
1261	}
1262	brc = TRUE;
1263	}
1264
1265	return (brc);
1266	}
1267
1268	/*
1269	*@@ strhBeautifyTitle:
1270	* replaces all line breaks (0xd, 0xa) with spaces.
1271	*/
1272
1273	BOOL strhBeautifyTitle(PSZ psz)
1274	{
1275	BOOL rc = FALSE;
1276	CHAR *p;
1277	while ((p = strchr(psz, 0xa)))
1278	{
1279	*p = ' ';
1280	rc = TRUE;
1281	}
1282	while ((p = strchr(psz, 0xd)))
1283	{
1284	*p = ' ';
1285	rc = TRUE;
1286	}
1287	return (rc);
1288	}
1289
1290	/*
1291	* strhFindAttribValue:
1292	* searches for pszAttrib in pszSearchIn; if found,
1293	* returns the first character after the "=" char.
1294	* If "=" is not found, a space, \r, and \n are
1295	* also accepted. This function searches without
1296	* respecting case.
1297	*
1298	* <B>Example:</B>
1299	+ strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1300	+
1301	+ returns ....................... ^ this address.
1302	*
1303	*@@added V0.9.0 [umoeller]
1304	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1305	*/
1306
1307	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1308	{
1309	PSZ prc = 0;
1310	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1311	p,
1312	p2;
1313	ULONG cbAttrib = strlen(pszAttrib);
1314
1315	// 1) find space char
1316	while ((p = strchr(pszSearchIn2, ' ')))
1317	{
1318	CHAR c;
1319	p++;
1320	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1321	// now check whether the p+strlen(pszAttrib)
1322	// is a valid end-of-tag character
1323	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1324	&& ( (c == ' ')
1325	\|\| (c == '>')
1326	\|\| (c == '=')
1327	\|\| (c == '\r')
1328	\|\| (c == '\n')
1329	\|\| (c == 0)
1330	)
1331	)
1332	{
1333	// yes:
1334	CHAR c2;
1335	p2 = p + cbAttrib;
1336	c2 = *p2;
1337	while ( ( (c2 == ' ')
1338	\|\| (c2 == '=')
1339	\|\| (c2 == '\n')
1340	\|\| (c2 == '\r')
1341	)
1342	&& (c2 != 0)
1343	)
1344	c2 = *++p2;
1345	prc = p2;
1346	break; // first while
1347	}
1348	pszSearchIn2++;
1349	}
1350	return (prc);
1351	}
1352
1353	/*
1354	* strhGetNumAttribValue:
1355	* stores the numerical parameter value of an HTML-style
1356	* tag in *pl.
1357	*
1358	* Returns the address of the tag parameter in the
1359	* search buffer, if found, or NULL.
1360	*
1361	* <B>Example:</B>
1362	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1363	*
1364	* stores 123 in the "l" variable.
1365	*
1366	*@@added V0.9.0 [umoeller]
1367	*/
1368
1369	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1370	const char *pszTag, // e.g. "INDEX"
1371	PLONG pl) // out: numerical value
1372	{
1373	PSZ pParam;
1374	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1375	sscanf(pParam, "%d", pl);
1376
1377	return (pParam);
1378	}
1379
1380	/*
1381	* strhGetTextAttr:
1382	* retrieves the attribute value of a textual HTML-style tag
1383	* in a newly allocated buffer, which is returned,
1384	* or NULL if attribute not found.
1385	* If an attribute value is to contain spaces, it
1386	* must be enclosed in quotes.
1387	*
1388	* The offset of the attribute data in pszSearchIn is
1389	* returned in *pulOffset so that you can do multiple
1390	* searches.
1391	*
1392	* This returns a new buffer, which should be free()'d after use.
1393	*
1394	* <B>Example:</B>
1395	+ ULONG ulOfs = 0;
1396	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1397	+ ............^ ulOfs
1398	*
1399	* returns a new string with the value "blublub" (without
1400	* quotes) and sets ulOfs to 12.
1401	*
1402	*@@added V0.9.0 [umoeller]
1403	*/
1404
1405	PSZ strhGetTextAttr(const char *pszSearchIn,
1406	const char *pszTag,
1407	PULONG pulOffset) // out: offset where found
1408	{
1409	PSZ pParam,
1410	pParam2,
1411	prc = NULL;
1412	ULONG ulCount = 0;
1413	LONG lNestingLevel = 0;
1414
1415	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1416	{
1417	// determine end character to search for: a space
1418	CHAR cEnd = ' ';
1419	if (*pParam == '\"')
1420	{
1421	// or, if the data is enclosed in quotes, a quote
1422	cEnd = '\"';
1423	pParam++;
1424	}
1425
1426	if (pulOffset)
1427	// store the offset
1428	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1429
1430	// now find end of attribute
1431	pParam2 = pParam;
1432	while (*pParam)
1433	{
1434	if (*pParam == cEnd)
1435	// end character found
1436	break;
1437	else if (*pParam == '<')
1438	// yet another opening tag found:
1439	// this is probably some "<" in the attributes
1440	lNestingLevel++;
1441	else if (*pParam == '>')
1442	{
1443	lNestingLevel--;
1444	if (lNestingLevel < 0)
1445	// end of tag found:
1446	break;
1447	}
1448	ulCount++;
1449	pParam++;
1450	}
1451
1452	// copy attribute to new buffer
1453	if (ulCount)
1454	{
1455	prc = (PSZ)malloc(ulCount+1);
1456	memcpy(prc, pParam2, ulCount);
1457	*(prc+ulCount) = 0;
1458	}
1459	}
1460	return (prc);
1461	}
1462
1463	/*
1464	* strhFindEndOfTag:
1465	* returns a pointer to the ">" char
1466	* which seems to terminate the tag beginning
1467	* after pszBeginOfTag.
1468	*
1469	* If additional "<" chars are found, we look
1470	* for additional ">" characters too.
1471	*
1472	* Note: You must pass the address of the opening
1473	* '<' character to this function.
1474	*
1475	* Example:
1476	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1477	+ strhFindEndOfTag(pszTest)
1478	+ returns.................................^ this.
1479	*
1480	*@@added V0.9.0 [umoeller]
1481	*/
1482
1483	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1484	{
1485	PSZ p = (PSZ)pszBeginOfTag,
1486	prc = NULL;
1487	LONG lNestingLevel = 0;
1488
1489	while (*p)
1490	{
1491	if (*p == '<')
1492	// another opening tag found:
1493	lNestingLevel++;
1494	else if (*p == '>')
1495	{
1496	// closing tag found:
1497	lNestingLevel--;
1498	if (lNestingLevel < 1)
1499	{
1500	// corresponding: return this
1501	prc = p;
1502	break;
1503	}
1504	}
1505	p++;
1506	}
1507
1508	return (prc);
1509	}
1510
1511	/*
1512	* strhGetBlock:
1513	* this complex function searches the given string
1514	* for a pair of opening/closing HTML-style tags.
1515	*
1516	* If found, this routine returns TRUE and does
1517	* the following:
1518	*
1519	* 1) allocate a new buffer, copy the text
1520	* enclosed by the opening/closing tags
1521	* into it and set *ppszBlock to that
1522	* buffer;
1523	*
1524	* 2) if the opening tag has any attributes,
1525	* allocate another buffer, copy the
1526	* attributes into it and set *ppszAttrs
1527	* to that buffer; if no attributes are
1528	* found, *ppszAttrs will be NULL;
1529	*
1530	* 3) set *pulOffset to the offset from the
1531	* beginning of *ppszSearchIn where the
1532	* opening tag was found;
1533	*
1534	* 4) advance *ppszSearchIn to after the
1535	* closing tag, so that you can do
1536	* multiple searches without finding the
1537	* same tags twice.
1538	*
1539	* All buffers should be freed using free().
1540	*
1541	* This returns the following:
1542	* -- 0: no error
1543	* -- 1: tag not found at all (doesn't have to be an error)
1544	* -- 2: begin tag found, but no corresponding end tag found. This
1545	* is a real error.
1546	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1547	*
1548	* <B>Example:</B>
1549	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1550	+ PSZ pszBlock, pszAttrs;
1551	+ ULONG ulOfs;
1552	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1553	*
1554	* would do the following:
1555	*
1556	* 1) set pszBlock to a new string containing "This is page 1."
1557	* without quotes;
1558	*
1559	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1560	*
1561	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1562	*
1563	* 4) pSearch would be advanced to point to the "More text"
1564	* string in the original buffer.
1565	*
1566	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1567	* for HTML parsing.
1568	*
1569	*@@added V0.9.0 [umoeller]
1570	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1571	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1572	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1573	*/
1574
1575	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1576	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1577	PSZ pszTag,
1578	PSZ *ppszBlock, // out: block enclosed by the tags
1579	PSZ *ppszAttribs, // out: attributes of the opening tag
1580	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1581	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1582	{
1583	ULONG ulrc = 1;
1584	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1585	pszSearch2 = pszBeginTag,
1586	pszClosingTag;
1587	ULONG cbTag = strlen(pszTag);
1588
1589	// go thru the block and check all tags if it's the
1590	// begin tag we're looking for
1591	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1592	{
1593	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1594	// yes: stop
1595	break;
1596	else
1597	pszBeginTag++;
1598	}
1599
1600	if (pszBeginTag)
1601	{
1602	// we found <TAG>:
1603	ULONG ulNestingLevel = 0;
1604
1605	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1606	// strchr(pszBeginTag, '>');
1607	if (pszEndOfBeginTag)
1608	{
1609	// does the caller want the attributes?
1610	if (ppszAttribs)
1611	{
1612	// yes: then copy them
1613	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1614	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1615	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1616	// add terminating 0
1617	*(pszAttrs + ulAttrLen) = 0;
1618
1619	*ppszAttribs = pszAttrs;
1620	}
1621
1622	// output offset of where we found the begin tag
1623	if (pulOfsBeginTag)
1624	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1625
1626	// now find corresponding closing tag (e.g. "</BODY>"
1627	pszBeginTag = pszEndOfBeginTag+1;
1628	// now we're behind the '>' char of the opening tag
1629	// increase offset of that too
1630	if (pulOfsBeginBlock)
1631	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1632
1633	// find next closing tag;
1634	// for the first run, pszSearch2 points to right
1635	// after the '>' char of the opening tag
1636	pszSearch2 = pszBeginTag;
1637	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1638	&& (pszClosingTag = strstr(pszSearch2, "<"))
1639	)
1640	{
1641	// if we have another opening tag before our closing
1642	// tag, we need to have several closing tags before
1643	// we're done
1644	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1645	ulNestingLevel++;
1646	else
1647	{
1648	// is this ours?
1649	if ( (*(pszClosingTag+1) == '/')
1650	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1651	)
1652	{
1653	// we've found a matching closing tag; is
1654	// it ours?
1655	if (ulNestingLevel == 0)
1656	{
1657	// our closing tag found:
1658	// allocate mem for a new buffer
1659	// and extract all the text between
1660	// open and closing tags to it
1661	ULONG ulLen = pszClosingTag - pszBeginTag;
1662	if (ppszBlock)
1663	{
1664	PSZ pNew = (PSZ)malloc(ulLen + 1);
1665	strhncpy0(pNew, pszBeginTag, ulLen);
1666	*ppszBlock = pNew;
1667	}
1668
1669	// raise search offset to after the closing tag
1670	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1671
1672	ulrc = 0;
1673
1674	break;
1675	} else
1676	// not our closing tag:
1677	ulNestingLevel--;
1678	}
1679	}
1680	// no matching closing tag: search on after that
1681	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1682	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1683
1684	if (!pszClosingTag)
1685	// no matching closing tag found:
1686	// return 2 (closing tag not found)
1687	ulrc = 2;
1688	} // end if (pszBeginTag)
1689	else
1690	// no matching ">" for opening tag found:
1691	ulrc = 3;
1692	}
1693
1694	return (ulrc);
1695	}
1696
1697	/* ******************************************************************
1698	* *
1699	* Miscellaneous *
1700	* *
1701	********************************************************************/
1702
1703	/*
1704	*@@ strhArrayAppend:
1705	* this appends a string to a "string array".
1706	*
1707	* A string array is considered a sequence of
1708	* zero-terminated strings in memory. That is,
1709	* after each string's null-byte, the next
1710	* string comes up.
1711	*
1712	* This is useful for composing a single block
1713	* of memory from, say, list box entries, which
1714	* can then be written to OS2.INI in one flush.
1715	*
1716	* To append strings to such an array, call this
1717	* function for each string you wish to append.
1718	* This will re-allocate *ppszRoot with each call,
1719	* and update *pcbRoot, which then contains the
1720	* total size of all strings (including all null
1721	* terminators).
1722	*
1723	* Pass *pcbRoot to PrfSaveProfileData to have the
1724	* block saved.
1725	*
1726	* Note: On the first call, ppszRoot and pcbRoot
1727	* _must_ be both NULL, or this crashes.
1728	*/
1729
1730	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1731	PSZ pszNew, // in: string to append
1732	PULONG pcbRoot) // in/out: size of array
1733	{
1734	ULONG cbNew = strlen(pszNew);
1735	PSZ pszTemp = (PSZ)malloc(*pcbRoot
1736	+ cbNew
1737	+ 1); // two null bytes
1738	if (*ppszRoot)
1739	{
1740	// not first loop: copy old stuff
1741	memcpy(pszTemp,
1742	*ppszRoot,
1743	*pcbRoot);
1744	free(*ppszRoot);
1745	}
1746	// append new string
1747	strcpy(pszTemp + *pcbRoot,
1748	pszNew);
1749	// update root
1750	*ppszRoot = pszTemp;
1751	// update length
1752	*pcbRoot += cbNew + 1;
1753	}
1754
1755	/*
1756	*@@ strhCreateDump:
1757	* this dumps a memory block into a string
1758	* and returns that string in a new buffer.
1759	*
1760	* You must free() the returned PSZ after use.
1761	*
1762	* The output looks like the following:
1763	*
1764	+ 0000: FE FF 0E 02 90 00 00 00 ........
1765	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1766	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1767	*
1768	* Each line is terminated with a newline (\n)
1769	* character only.
1770	*
1771	*@@added V0.9.1 (2000-01-22) [umoeller]
1772	*/
1773
1774	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1775	ULONG ulSize, // in: size of buffer
1776	ULONG ulIndent) // in: indentation of every line
1777	{
1778	PSZ pszReturn = 0;
1779	XSTRING strReturn;
1780	CHAR szTemp[1000];
1781
1782	PBYTE pbCurrent = pb; // current byte
1783	ULONG ulCount = 0,
1784	ulCharsInLine = 0; // if this grows > 7, a new line is started
1785	CHAR szLine[400] = "",
1786	szAscii[30] = " "; // ASCII representation; filled for every line
1787	PSZ pszLine = szLine,
1788	pszAscii = szAscii;
1789
1790	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1791
1792	for (pbCurrent = pb;
1793	ulCount < ulSize;
1794	pbCurrent++, ulCount++)
1795	{
1796	if (ulCharsInLine == 0)
1797	{
1798	memset(szLine, ' ', ulIndent);
1799	pszLine += ulIndent;
1800	}
1801	pszLine += sprintf(pszLine, "%02lX ", *pbCurrent);
1802
1803	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1804	// printable character:
1805	pszAscii = pbCurrent;
1806	else
1807	*pszAscii = '.';
1808	pszAscii++;
1809
1810	ulCharsInLine++;
1811	if ( (ulCharsInLine > 7) // 8 bytes added?
1812	\|\| (ulCount == ulSize-1) // end of buffer reached?
1813	)
1814	{
1815	// if we haven't had eight bytes yet,
1816	// fill buffer up to eight bytes with spaces
1817	ULONG ul2;
1818	for (ul2 = ulCharsInLine;
1819	ul2 < 8;
1820	ul2++)
1821	pszLine += sprintf(pszLine, " ");
1822
1823	sprintf(szTemp, "%04lX: %s %s\n",
1824	(ulCount & 0xFFFFFFF8), // offset in hex
1825	szLine, // bytes string
1826	szAscii); // ASCII string
1827	xstrcat(&strReturn, szTemp);
1828
1829	// restart line buffer
1830	pszLine = szLine;
1831
1832	// clear ASCII buffer
1833	strcpy(szAscii, " ");
1834	pszAscii = szAscii;
1835
1836	// reset line counter
1837	ulCharsInLine = 0;
1838	}
1839	}
1840
1841	if (strReturn.cbAllocated)
1842	pszReturn = strReturn.psz;
1843
1844	return (pszReturn);
1845	}
1846
1847	/* ******************************************************************
1848	* *
1849	* Wildcard matching *
1850	* *
1851	********************************************************************/
1852
1853	/*
1854	* The following code has been taken from "fnmatch.zip".
1855	*
1856	* (c) 1994-1996 by Eberhard Mattes.
1857	*/
1858
1859	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1860	* This macro returns true iff C is a separator. */
1861
1862	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1863
1864
1865	/* This macro returns true if C is at the end of a component of a
1866	* path. */
1867
1868	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1869
1870	/*
1871	*@@ skip_comp_os2:
1872	* Return a pointer to the next component of the path SRC, for OS/2
1873	* and DOS styles. When the end of the string is reached, a pointer
1874	* to the terminating null character is returned.
1875	*
1876	* (c) 1994-1996 by Eberhard Mattes.
1877	*/
1878
1879	static const unsigned char* skip_comp_os2(const unsigned char *src)
1880	{
1881	/* Skip characters until hitting a separator or the end of the
1882	* string. */
1883
1884	while (!IS_OS2_COMP_END(*src))
1885	++src;
1886
1887	/* Skip the separator if we hit a separator. */
1888
1889	if (*src != 0)
1890	++src;
1891	return src;
1892	}
1893
1894	/*
1895	* has_colon:
1896	* returns true iff the path P contains a colon.
1897	*
1898	* (c) 1994-1996 by Eberhard Mattes.
1899	*/
1900
1901	static int has_colon(const unsigned char *p)
1902	{
1903	while (*p != 0)
1904	if (*p == ':')
1905	return 1;
1906	else
1907	++p;
1908	return 0;
1909	}
1910
1911	/*
1912	* match_comp_os2:
1913	* Compare a single component (directory name or file name) of the
1914	* paths, for OS/2 and DOS styles. MASK and NAME point into a
1915	* component of the wildcard and the name to be checked, respectively.
1916	* Comparing stops at the next separator. The FLAGS argument is the
1917	* same as that of fnmatch(). HAS_DOT is true if a dot is in the
1918	* current component of NAME. The number of dots is not restricted,
1919	* even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1920	* Note that this function is recursive.
1921	*
1922	* (c) 1994-1996 by Eberhard Mattes.
1923	*/
1924
1925	static int match_comp_os2(const unsigned char *mask,
1926	const unsigned char *name,
1927	unsigned flags,
1928	int has_dot)
1929	{
1930	int rc;
1931
1932	for (;;)
1933	switch (*mask)
1934	{
1935	case 0:
1936
1937	/* There must be no extra characters at the end of NAME when
1938	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1939	* in that case, NAME may point to a separator. */
1940
1941	if (*name == 0)
1942	return FNM_MATCH;
1943	if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1944	return FNM_MATCH;
1945	return FNM_NOMATCH;
1946
1947	case '/':
1948	case '\\':
1949
1950	/* Separators match separators. */
1951
1952	if (IS_OS2_COMP_SEP(*name))
1953	return FNM_MATCH;
1954
1955	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1956	* is ignored at the end of NAME. */
1957
1958	if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1959	return FNM_MATCH;
1960
1961	/* Stop comparing at the separator. */
1962
1963	return FNM_NOMATCH;
1964
1965	case '?':
1966
1967	/* A question mark matches one character. It does not match
1968	* a dot. At the end of the component (and before a dot),
1969	* it also matches zero characters. */
1970
1971	if (name != '.' && !IS_OS2_COMP_END(name))
1972	++name;
1973	++mask;
1974	break;
1975
1976	case '*':
1977
1978	/* An asterisk matches zero or more characters. In DOS
1979	* mode, dots are not matched. */
1980
1981	do
1982	{
1983	++mask;
1984	}
1985	while (mask == '');
1986	for (;;)
1987	{
1988	rc = match_comp_os2(mask, name, flags, has_dot);
1989	if (rc != FNM_NOMATCH)
1990	return rc;
1991	if (IS_OS2_COMP_END(*name))
1992	return FNM_NOMATCH;
1993	if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1994	return FNM_NOMATCH;
1995	++name;
1996	}
1997
1998	case '.':
1999
2000	/* A dot matches a dot. It also matches the implicit dot at
2001	* the end of a dot-less NAME. */
2002
2003	++mask;
2004	if (*name == '.')
2005	++name;
2006	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
2007	return FNM_NOMATCH;
2008	break;
2009
2010	default:
2011
2012	/* All other characters match themselves. */
2013
2014	if (flags & _FNM_IGNORECASE)
2015	{
2016	if (tolower(mask) != tolower(name))
2017	return FNM_NOMATCH;
2018	}
2019	else
2020	{
2021	if (mask != name)
2022	return FNM_NOMATCH;
2023	}
2024	++mask;
2025	++name;
2026	break;
2027	}
2028	}
2029
2030	/*
2031	* match_comp:
2032	* compare a single component (directory name or file name) of the
2033	* paths, for all styles which need component-by-component matching.
2034	* MASK and NAME point to the start of a component of the wildcard and
2035	* the name to be checked, respectively. Comparing stops at the next
2036	* separator. The FLAGS argument is the same as that of fnmatch().
2037	* Return FNM_MATCH iff MASK and NAME match.
2038	*
2039	* (c) 1994-1996 by Eberhard Mattes.
2040	*/
2041
2042	static int match_comp(const unsigned char *mask,
2043	const unsigned char *name,
2044	unsigned flags)
2045	{
2046	const unsigned char *s;
2047
2048	switch (flags & _FNM_STYLE_MASK)
2049	{
2050	case _FNM_OS2:
2051	case _FNM_DOS:
2052
2053	/* For OS/2 and DOS styles, we add an implicit dot at the end of
2054	* the component if the component doesn't include a dot. */
2055
2056	s = name;
2057	while (!IS_OS2_COMP_END(s) && s != '.')
2058	++s;
2059	return match_comp_os2(mask, name, flags, *s == '.');
2060
2061	default:
2062	return FNM_ERR;
2063	}
2064	}
2065
2066	/* In Unix styles, / separates components of a path. This macro
2067	* returns true iff C is a separator. */
2068
2069	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2070
2071
2072	/* This macro returns true if C is at the end of a component of a
2073	* path. */
2074
2075	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
2076
2077	/*
2078	* match_unix:
2079	* match complete paths for Unix styles. The FLAGS argument is the
2080	* same as that of fnmatch(). COMP points to the start of the current
2081	* component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2082	* backslash character is used for escaping ? and * unless
2083	* FNM_NOESCAPE is set.
2084	*
2085	* (c) 1994-1996 by Eberhard Mattes.
2086	*/
2087
2088	static int match_unix(const unsigned char *mask,
2089	const unsigned char *name,
2090	unsigned flags,
2091	const unsigned char *comp)
2092	{
2093	unsigned char c1, c2;
2094	char invert, matched;
2095	const unsigned char *start;
2096	int rc;
2097
2098	for (;;)
2099	switch (*mask)
2100	{
2101	case 0:
2102
2103	/* There must be no extra characters at the end of NAME when
2104	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
2105	* in that case, NAME may point to a separator. */
2106
2107	if (*name == 0)
2108	return FNM_MATCH;
2109	if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2110	return FNM_MATCH;
2111	return FNM_NOMATCH;
2112
2113	case '?':
2114
2115	/* A question mark matches one character. It does not match
2116	* the component separator if FNM_PATHNAME is set. It does
2117	* not match a dot at the start of a component if FNM_PERIOD
2118	* is set. */
2119
2120	if (*name == 0)
2121	return FNM_NOMATCH;
2122	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2123	return FNM_NOMATCH;
2124	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2125	return FNM_NOMATCH;
2126	++mask;
2127	++name;
2128	break;
2129
2130	case '*':
2131
2132	/* An asterisk matches zero or more characters. It does not
2133	* match the component separator if FNM_PATHNAME is set. It
2134	* does not match a dot at the start of a component if
2135	* FNM_PERIOD is set. */
2136
2137	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2138	return FNM_NOMATCH;
2139	do
2140	{
2141	++mask;
2142	}
2143	while (mask == '');
2144	for (;;)
2145	{
2146	rc = match_unix(mask, name, flags, comp);
2147	if (rc != FNM_NOMATCH)
2148	return rc;
2149	if (*name == 0)
2150	return FNM_NOMATCH;
2151	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2152	return FNM_NOMATCH;
2153	++name;
2154	}
2155
2156	case '/':
2157
2158	/* Separators match only separators. If _FNM_PATHPREFIX is
2159	* set, a trailing separator in MASK is ignored at the end
2160	* of NAME. */
2161
2162	if (!(IS_UNIX_COMP_SEP(*name)
2163	\|\| ((flags & _FNM_PATHPREFIX) && *name == 0
2164	&& (mask[1] == 0
2165	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2166	&& mask[2] == 0)))))
2167	return FNM_NOMATCH;
2168
2169	++mask;
2170	if (*name != 0)
2171	++name;
2172
2173	/* This is the beginning of a new component if FNM_PATHNAME
2174	* is set. */
2175
2176	if (flags & FNM_PATHNAME)
2177	comp = name;
2178	break;
2179
2180	case '[':
2181
2182	/* A set of characters. Always case-sensitive. */
2183
2184	if (*name == 0)
2185	return FNM_NOMATCH;
2186	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2187	return FNM_NOMATCH;
2188	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2189	return FNM_NOMATCH;
2190
2191	invert = 0;
2192	matched = 0;
2193	++mask;
2194
2195	/* If the first character is a ! or ^, the set matches all
2196	* characters not listed in the set. */
2197
2198	if (mask == '!' \|\| mask == '^')
2199	{
2200	++mask;
2201	invert = 1;
2202	}
2203
2204	/* Loop over all the characters of the set. The loop ends
2205	* if the end of the string is reached or if a ] is
2206	* encountered unless it directly follows the initial [ or
2207	* [-. */
2208
2209	start = mask;
2210	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
2211	{
2212	/* Get the next character which is optionally preceded
2213	* by a backslash. */
2214
2215	c1 = *mask++;
2216	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2217	{
2218	if (*mask == 0)
2219	break;
2220	c1 = *mask++;
2221	}
2222
2223	/* Ranges of characters are written as a-z. Don't
2224	* forget to check for the end of the string and to
2225	* handle the backslash. If the character after - is a
2226	* ], it isn't a range. */
2227
2228	if (*mask == '-' && mask[1] != ']')
2229	{
2230	++mask; /* Skip the - character */
2231	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2232	++mask;
2233	if (*mask == 0)
2234	break;
2235	c2 = *mask++;
2236	}
2237	else
2238	c2 = c1;
2239
2240	/* Now check whether this character or range matches NAME. */
2241
2242	if (c1 <= name && name <= c2)
2243	matched = 1;
2244	}
2245
2246	/* If the end of the string is reached before a ] is found,
2247	* back up to the [ and compare it to NAME. */
2248
2249	if (*mask == 0)
2250	{
2251	if (*name != '[')
2252	return FNM_NOMATCH;
2253	++name;
2254	mask = start;
2255	if (invert)
2256	--mask;
2257	}
2258	else
2259	{
2260	if (invert)
2261	matched = !matched;
2262	if (!matched)
2263	return FNM_NOMATCH;
2264	++mask; /* Skip the ] character */
2265	if (*name != 0)
2266	++name;
2267	}
2268	break;
2269
2270	case '\\':
2271	++mask;
2272	if (flags & FNM_NOESCAPE)
2273	{
2274	if (*name != '\\')
2275	return FNM_NOMATCH;
2276	++name;
2277	}
2278	else if (mask == '' \|\| *mask == '?')
2279	{
2280	if (mask != name)
2281	return FNM_NOMATCH;
2282	++mask;
2283	++name;
2284	}
2285	break;
2286
2287	default:
2288
2289	/* All other characters match themselves. */
2290
2291	if (flags & _FNM_IGNORECASE)
2292	{
2293	if (tolower(mask) != tolower(name))
2294	return FNM_NOMATCH;
2295	}
2296	else
2297	{
2298	if (mask != name)
2299	return FNM_NOMATCH;
2300	}
2301	++mask;
2302	++name;
2303	break;
2304	}
2305	}
2306
2307	/*
2308	* _fnmatch_unsigned:
2309	* Check whether the path name NAME matches the wildcard MASK.
2310	*
2311	* Return:
2312	* -- 0 (FNM_MATCH) if it matches,
2313	* -- _FNM_NOMATCH if it doesn't,
2314	* -- FNM_ERR on error.
2315	*
2316	* The operation of this function is controlled by FLAGS.
2317	* This is an internal function, with unsigned arguments.
2318	*
2319	* (c) 1994-1996 by Eberhard Mattes.
2320	*/
2321
2322	static int _fnmatch_unsigned(const unsigned char *mask,
2323	const unsigned char *name,
2324	unsigned flags)
2325	{
2326	int m_drive, n_drive,
2327	rc;
2328
2329	/* Match and skip the drive name if present. */
2330
2331	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2332	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2333
2334	if (m_drive != n_drive)
2335	{
2336	if (m_drive == -1 \|\| n_drive == -1)
2337	return FNM_NOMATCH;
2338	if (!(flags & _FNM_IGNORECASE))
2339	return FNM_NOMATCH;
2340	if (tolower(m_drive) != tolower(n_drive))
2341	return FNM_NOMATCH;
2342	}
2343
2344	if (m_drive != -1)
2345	mask += 2;
2346	if (n_drive != -1)
2347	name += 2;
2348
2349	/* Colons are not allowed in path names, except for the drive name,
2350	* which was skipped above. */
2351
2352	if (has_colon(mask) \|\| has_colon(name))
2353	return FNM_ERR;
2354
2355	/* The name "\\server\path" should not be matched by mask
2356	* "\\server\path". Ditto for /. /
2357
2358	switch (flags & _FNM_STYLE_MASK)
2359	{
2360	case _FNM_OS2:
2361	case _FNM_DOS:
2362
2363	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2364	{
2365	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2366	return FNM_NOMATCH;
2367	name += 2;
2368	mask += 2;
2369	}
2370	break;
2371
2372	case _FNM_POSIX:
2373
2374	if (name[0] == '/' && name[1] == '/')
2375	{
2376	int i;
2377
2378	name += 2;
2379	for (i = 0; i < 2; ++i)
2380	if (mask[0] == '/')
2381	++mask;
2382	else if (mask[0] == '\\' && mask[1] == '/')
2383	mask += 2;
2384	else
2385	return FNM_NOMATCH;
2386	}
2387
2388	/* In Unix styles, treating ? and * w.r.t. components is simple.
2389	* No need to do matching component by component. */
2390
2391	return match_unix(mask, name, flags, name);
2392	}
2393
2394	/* Now compare all the components of the path name, one by one.
2395	* Note that the path separator must not be enclosed in brackets. */
2396
2397	while (mask != 0 \|\| name != 0)
2398	{
2399
2400	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2401	* is reached even if there are components left in NAME. */
2402
2403	if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2404	return FNM_MATCH;
2405
2406	/* Compare a single component of the path name. */
2407
2408	rc = match_comp(mask, name, flags);
2409	if (rc != FNM_MATCH)
2410	return rc;
2411
2412	/* Skip to the next component or to the end of the path name. */
2413
2414	mask = skip_comp_os2(mask);
2415	name = skip_comp_os2(name);
2416	}
2417
2418	/* If we reached the ends of both strings, the names match. */
2419
2420	if (mask == 0 && name == 0)
2421	return FNM_MATCH;
2422
2423	/* The names do not match. */
2424
2425	return FNM_NOMATCH;
2426	}
2427
2428	/*
2429	*@@ strhMatchOS2:
2430	* this matches wildcards, similar to what DosEditName does.
2431	* However, this does not require a file to be present, but
2432	* works on strings only.
2433	*/
2434
2435	BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2436	const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2437	{
2438	return ((BOOL)(_fnmatch_unsigned(pcszMask,
2439	pcszName,
2440	_FNM_OS2 \| _FNM_IGNORECASE)
2441	== FNM_MATCH)
2442	);
2443	}
2444
2445	/* ******************************************************************
2446	* *
2447	* Fast string searches *
2448	* *
2449	********************************************************************/
2450
2451	#define ASSERT(a)
2452
2453	/*
2454	* The following code has been taken from the "Standard
2455	* Function Library", file sflfind.c, and only slightly
2456	* modified to conform to the rest of this file.
2457	*
2458	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2459	* Revised: 98/05/04
2460	*
2461	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2462	*
2463	* The SFL Licence allows incorporating SFL code into other
2464	* programs, as long as the copyright is reprinted and the
2465	* code is marked as modified, so this is what we do.
2466	*/
2467
2468	/*
2469	*@@ strhfind:
2470	* searches for a pattern in a string using the Boyer-Moore-
2471	* Horspool-Sunday algorithm. The string and pattern are null-terminated
2472	* strings. Returns a pointer to the pattern if found within the string,
2473	* or NULL if the pattern was not found. If you repeatedly scan for the
2474	* same pattern, use the repeat_find argument. If this is TRUE, the
2475	* function does not re-parse the pattern. You must of course call the
2476	* function with repeat_find equal to FALSE the first time. This function
2477	* is meant to handle character data, and is most effective when you work
2478	* with large strings. To search binary data use strhmemfind(). Will not work
2479	* on multibyte characters.
2480	*
2481	* Examples:
2482	+ char *result;
2483	+
2484	+ result = strhfind ("abracadabra", "cad", FALSE);
2485	+ if (result)
2486	+ puts (result);
2487	+
2488	* Taken from the "Standard Function Library", file sflfind.c.
2489	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2490	* Slightly modified.
2491	*
2492	*@@added V0.9.3 (2000-05-08) [umoeller]
2493	*/
2494
2495	char* strhfind (const char *string, // String containing data
2496	const char *pattern, // Pattern to search for
2497	BOOL repeat_find) // Same pattern as last time
2498	{
2499	static size_t
2500	searchbuf [256]; // Fixed search buffer
2501
2502	ASSERT (string); // Expect non-NULL pointers, but
2503	ASSERT (pattern); // fall through if not debugging
2504
2505	return (char *) strhmemfind_rb (string, strlen (string),
2506	pattern, strlen (pattern),
2507	searchbuf, &repeat_find);
2508	}
2509
2510	/*
2511	*@@ strhfind_r:
2512	* searches for a pattern in a string using the Boyer-Moore-
2513	* Horspool-Sunday algorithm. The string and pattern are null-terminated
2514	* strings. Returns a pointer to the pattern if found within the string,
2515	* or NULL if the pattern was not found. This function is meant to handle
2516	* character data, and is most effective when you work with large strings.
2517	* To search binary data use strhmemfind(). Will not work on multibyte
2518	* characters. Reentrant.
2519	*
2520	* Examples:
2521	+ char *result;
2522	+
2523	+ result = strhfind_r ("abracadabra", "cad");
2524	+ if (result)
2525	+ puts (result);
2526	*
2527	* Taken from the "Standard Function Library", file sflfind.c.
2528	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2529	* Slightly modified.
2530	*
2531	*@@added V0.9.3 (2000-05-08) [umoeller]
2532	*/
2533
2534	char* strhfind_r (const char *string, // String containing data
2535	const char *pattern) // Pattern to search for
2536	{
2537	size_t
2538	searchbuf [256]; // One-time search buffer
2539	BOOL
2540	secondtime = FALSE; // Search buffer init needed
2541
2542	ASSERT (string); // Expect non-NULL pointers, but
2543	ASSERT (pattern); // fall through if not debugging
2544
2545	return (char *) strhmemfind_rb (string, strlen (string),
2546	pattern, strlen (pattern),
2547	searchbuf, &secondtime);
2548	}
2549
2550	/*
2551	*@@ strhfind_rb:
2552	* searches for a pattern in a string using the Boyer-Moore-
2553	* Horspool-Sunday algorithm. The string and pattern are null-terminated
2554	* strings. Returns a pointer to the pattern if found within the string,
2555	* or NULL if the pattern was not found. Supports more efficient repeat
2556	* searches (for the same pattern), through a supplied search buffer. The
2557	* search buffer must be long enough to contain 256 (2**8) size_t entries.
2558	* On the first call repeat_find must be set to FALSE. After the search
2559	* buffer has been initialised, repeat_find will be set to TRUE by the
2560	* function, avoiding the search buffer initialisation on later calls.
2561	*
2562	* This function is most effective when repeated searches are made for
2563	* the same pattern in one or more strings. This function is meant to
2564	* handle character data, and is most effective when you work with
2565	* large strings. To search binary data use strhmemfind(). Will not work
2566	* on multibyte characters. Reentrant.
2567	*
2568	* Examples:
2569	+ char *result;
2570	+ BOOL repeat_search = FALSE;
2571	+ size_t searchbuf[256];
2572	+
2573	+ result = strhfind_rb ("abracadabra", "cad", searchbuf, &repeat_search);
2574	+ if (result)
2575	+ {
2576	+ puts (result);
2577	+ result = strhfind_rb ("cad/cam", "cad", searchbuf, &repeat_search);
2578	+ if (result)
2579	+ puts (result);
2580	+ }
2581	*
2582	* Taken from the "Standard Function Library", file sflfind.c.
2583	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2584	* Slightly modified.
2585	*
2586	*@@added V0.9.3 (2000-05-08) [umoeller]
2587	*/
2588
2589	char* strhfind_rb (const char *string, // String containing data
2590	const char *pattern, // Pattern to search for
2591	size_t *shift, // Working buffer between searches
2592	BOOL *repeat_find) // Flag for first/later search
2593	{
2594	ASSERT (string); // Expect non-NULL pointers, but
2595	ASSERT (pattern); // fall through if not debugging
2596	ASSERT (shift);
2597	ASSERT (repeat_find);
2598
2599	return (char *) strhmemfind_rb (string, strlen (string),
2600	pattern, strlen (pattern),
2601	shift, repeat_find);
2602	}
2603
2604	/*
2605	*@@ strhmemfind:
2606	* searches for a pattern in a block of memory using the Boyer-
2607	* Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2608	* values; you must explicitly provide their lengths. Returns a pointer to
2609	* the pattern if found within the block, or NULL if the pattern was not
2610	* found. If you repeatedly scan for the same pattern, use the repeat_find
2611	* argument. If this is TRUE, the function does not re-parse the pattern.
2612	* This function is meant to handle binary data. If you need to search
2613	* strings, use the strhfind_r or strhfind_rb() functions. Non-Reentrant.
2614	*
2615	* Taken from the "Standard Function Library", file sflfind.c.
2616	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2617	* Slightly modified.
2618	*
2619	*@@added V0.9.3 (2000-05-08) [umoeller]
2620	*/
2621
2622	void* strhmemfind (const void *block, // Block containing data
2623	size_t block_size, // Size of block in bytes
2624	const void *pattern, // Pattern to search for
2625	size_t pattern_size, // Size of pattern block
2626	BOOL repeat_find) // Same pattern as last time
2627	{
2628	static size_t
2629	searchbuf [256]; // Static shared search buffer
2630
2631	ASSERT (block); // Expect non-NULL pointers, but
2632	ASSERT (pattern); // full through if not debugging
2633
2634	return strhmemfind_rb (block, block_size, pattern, pattern_size,
2635	searchbuf, &repeat_find);
2636	}
2637
2638	/*
2639	*@@ strhmemfind_r:
2640	* searches for a pattern in a block of memory using the Boyer-
2641	* Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2642	* values; you must explicitly provide their lengths. Returns a pointer to
2643	* the pattern if found within the block, or NULL if the pattern was not
2644	* found.
2645	*
2646	* This function is meant to handle binary data, for a single search for
2647	* a given pattern. If you need to search strings, use the strhfind_r()
2648	* or strhfind_rb() functions. If you want to do efficient repeated searches
2649	* for one pattern, use strhmemfind_rb(). Reentrant.
2650	*
2651	* Taken from the "Standard Function Library", file sflfind.c.
2652	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2653	* Slightly modified.
2654	*
2655	*@@added V0.9.3 (2000-05-08) [umoeller]
2656	*/
2657
2658	void* strhmemfind_r (const void *block, // Block containing data
2659	size_t block_size, // Size of block in bytes
2660	const void *pattern, // Pattern to search for
2661	size_t pattern_size) // Size of pattern block
2662	{
2663	size_t
2664	searchbuf [256]; // One-time search buffer
2665	BOOL
2666	secondtime = FALSE;
2667
2668	ASSERT (block); // Expect non-NULL pointers, but
2669	ASSERT (pattern); // full through if not debugging
2670
2671	return strhmemfind_rb (block, block_size, pattern, pattern_size,
2672	searchbuf, &secondtime);
2673	}
2674
2675	/*
2676	*@@ strhmemfind_rb:
2677	* searches for a pattern in a block of memory using the Boyer-
2678	* Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2679	* values; you must explicitly provide their lengths. Returns a pointer to
2680	* the pattern if found within the block, or NULL if the pattern was not
2681	* found. On the first search with a given pattern, *repeat_find should
2682	* be FALSE. It will be set to TRUE after the shift table is initialised,
2683	* allowing the initialisation phase to be skipped on subsequent searches.
2684	* shift must point to an array big enough to hold 256 (8**2) size_t values.
2685	*
2686	* This function is meant to handle binary data, for repeated searches
2687	* for the same pattern. If you need to search strings, use the
2688	* strhfind_r() or strhfind_rb() functions. If you wish to search for a
2689	* pattern only once consider using strhmemfind_r(). Reentrant.
2690	*
2691	* Taken from the "Standard Function Library", file sflfind.c.
2692	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2693	* Slightly modified.
2694	*
2695	*@@added V0.9.3 (2000-05-08) [umoeller]
2696	*/
2697
2698	void* strhmemfind_rb (const void *in_block, // Block containing data
2699	size_t block_size, // Size of block in bytes
2700	const void *in_pattern, // Pattern to search for
2701	size_t pattern_size, // Size of pattern block
2702	size_t *shift, // Shift table (search buffer)
2703	BOOL *repeat_find) // TRUE: search buffer already init
2704	{
2705	size_t
2706	byte_nbr, // Distance through block
2707	match_size; // Size of matched part
2708	const unsigned char
2709	*match_base = NULL, // Base of match of pattern
2710	*match_ptr = NULL, // Point within current match
2711	*limit = NULL; // Last potiental match point
2712	const unsigned char
2713	block = (unsigned char ) in_block, // Concrete pointer to block data
2714	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2715
2716	ASSERT (block); // Expect non-NULL pointers, but
2717	ASSERT (pattern); // fail gracefully if not debugging
2718	ASSERT (shift); // NULL repeat_find => is false
2719	if (block == NULL \|\| pattern == NULL \|\| shift == NULL)
2720	return (NULL);
2721
2722	// Pattern must be smaller or equal in size to string
2723	if (block_size < pattern_size)
2724	return (NULL); // Otherwise it's not found
2725
2726	if (pattern_size == 0) // Empty patterns match at start
2727	return ((void *)block);
2728
2729	// Build the shift table unless we're continuing a previous search
2730
2731	// The shift table determines how far to shift before trying to match
2732	// again, if a match at this point fails. If the byte after where the
2733	// end of our pattern falls is not in our pattern, then we start to
2734	// match again after that byte; otherwise we line up the last occurence
2735	// of that byte in our pattern under that byte, and try match again.
2736
2737	if (!repeat_find \|\| !*repeat_find)
2738	{
2739	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2740	shift [byte_nbr] = pattern_size + 1;
2741	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2742	shift [(unsigned char) pattern [byte_nbr]] = pattern_size - byte_nbr;
2743
2744	if (repeat_find)
2745	*repeat_find = TRUE;
2746	}
2747
2748	// Search for the block, each time jumping up by the amount
2749	// computed in the shift table
2750
2751	limit = block + (block_size - pattern_size + 1);
2752	ASSERT (limit > block);
2753
2754	for (match_base = block;
2755	match_base < limit;
2756	match_base += shift [*(match_base + pattern_size)])
2757	{
2758	match_ptr = match_base;
2759	match_size = 0;
2760
2761	// Compare pattern until it all matches, or we find a difference
2762	while (*match_ptr++ == pattern [match_size++])
2763	{
2764	ASSERT (match_size <= pattern_size &&
2765	match_ptr == (match_base + match_size));
2766
2767	// If we found a match, return the start address
2768	if (match_size >= pattern_size)
2769	return ((void*)(match_base));
2770
2771	}
2772	}
2773	return (NULL); // Found nothing
2774	}
2775
2776	/*
2777	*@@ strhtxtfind:
2778	* searches for a case-insensitive text pattern in a string
2779	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2780	* pattern are null-terminated strings. Returns a pointer to the pattern
2781	* if found within the string, or NULL if the pattern was not found.
2782	* Will match strings irrespective of case. To match exact strings, use
2783	* strhfind(). Will not work on multibyte characters.
2784	*
2785	* Examples:
2786	+ char *result;
2787	+
2788	+ result = strhtxtfind ("AbracaDabra", "cad");
2789	+ if (result)
2790	+ puts (result);
2791	+
2792	* Taken from the "Standard Function Library", file sflfind.c.
2793	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2794	* Slightly modified.
2795	*
2796	*@@added V0.9.3 (2000-05-08) [umoeller]
2797	*/
2798
2799	char* strhtxtfind (const char *string, // String containing data
2800	const char *pattern) // Pattern to search for
2801	{
2802	size_t
2803	shift [256]; // Shift distance for each value
2804	size_t
2805	string_size,
2806	pattern_size,
2807	byte_nbr, // Index into byte array
2808	match_size; // Size of matched part
2809	const char
2810	*match_base = NULL, // Base of match of pattern
2811	*match_ptr = NULL, // Point within current match
2812	*limit = NULL; // Last potiental match point
2813
2814	ASSERT (string); // Expect non-NULL pointers, but
2815	ASSERT (pattern); // fail gracefully if not debugging
2816	if (string == NULL \|\| pattern == NULL)
2817	return (NULL);
2818
2819	string_size = strlen (string);
2820	pattern_size = strlen (pattern);
2821
2822	// Pattern must be smaller or equal in size to string
2823	if (string_size < pattern_size)
2824	return (NULL); // Otherwise it cannot be found
2825
2826	if (pattern_size == 0) // Empty string matches at start
2827	return (char *) string;
2828
2829	// Build the shift table
2830
2831	// The shift table determines how far to shift before trying to match
2832	// again, if a match at this point fails. If the byte after where the
2833	// end of our pattern falls is not in our pattern, then we start to
2834	// match again after that byte; otherwise we line up the last occurence
2835	// of that byte in our pattern under that byte, and try match again.
2836
2837	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2838	shift [byte_nbr] = pattern_size + 1;
2839
2840	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2841	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2842
2843	// Search for the string. If we don't find a match, move up by the
2844	// amount we computed in the shift table above, to find location of
2845	// the next potiental match.
2846
2847	limit = string + (string_size - pattern_size + 1);
2848	ASSERT (limit > string);
2849
2850	for (match_base = string;
2851	match_base < limit;
2852	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2853	{
2854	match_ptr = match_base;
2855	match_size = 0;
2856
2857	// Compare pattern until it all matches, or we find a difference
2858	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2859	{
2860	ASSERT (match_size <= pattern_size &&
2861	match_ptr == (match_base + match_size));
2862
2863	// If we found a match, return the start address
2864	if (match_size >= pattern_size)
2865	return ((char *)(match_base));
2866	}
2867	}
2868	return (NULL); // Found nothing
2869	}
2870

Note: See TracBrowser for help on using the repository browser.

Download in other formats: