Context Navigation

source: trunk/src/helpers/stringh.c@ 18

Visit:

Last change on this file since 18 was 18, checked in by umoeller, 25 years ago
Tons of updates.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 84.3 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#include <os2.h>
45
46	#include <stdlib.h>
47	#include <stdio.h>
48	#include <string.h>
49	#include <ctype.h>
50	#include <math.h>
51
52	#include "setup.h" // code generation and debugging options
53
54	#include "helpers\stringh.h"
55	#include "helpers\xstring.h" // extended string helpers
56
57	#pragma hdrstop
58
59	/*
60	*@@category: Helpers\C helpers\String management
61	*/
62
63	/*
64	*@@ strhdup:
65	* like strdup, but this one
66	* doesn't crash if pszSource is NULL,
67	* but returns NULL also.
68	*
69	*@@added V0.9.0 [umoeller]
70	*/
71
72	PSZ strhdup(const char *pszSource)
73	{
74	if (pszSource)
75	return (strdup(pszSource));
76	else
77	return (0);
78	}
79
80	/*
81	*@@ strhistr:
82	* like strstr, but case-insensitive.
83	*
84	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
85	*/
86
87	PSZ strhistr(const char string1, const char string2)
88	{
89	PSZ prc = NULL;
90
91	if ((string1) && (string2))
92	{
93	PSZ pszSrchIn = strdup(string1);
94	PSZ pszSrchFor = strdup(string2);
95
96	if ((pszSrchIn) && (pszSrchFor))
97	{
98	strupr(pszSrchIn);
99	strupr(pszSrchFor);
100
101	prc = strstr(pszSrchIn, pszSrchFor);
102	if (prc)
103	{
104	// prc now has the first occurence of the string,
105	// but in pszSrchIn; we need to map this
106	// return value to the original string
107	prc = (prc-pszSrchIn) // offset in pszSrchIn
108	+ (PSZ)string1;
109	}
110	}
111	if (pszSrchFor)
112	free(pszSrchFor);
113	if (pszSrchIn)
114	free(pszSrchIn);
115	}
116	return (prc);
117	}
118
119	/*
120	*@@ strhncpy0:
121	* like strncpy, but always appends a 0 character.
122	*/
123
124	ULONG strhncpy0(PSZ pszTarget,
125	const char *pszSource,
126	ULONG cbSource)
127	{
128	ULONG ul = 0;
129	PSZ pTarget = pszTarget,
130	pSource = (PSZ)pszSource;
131
132	for (ul = 0; ul < cbSource; ul++)
133	if (*pSource)
134	pTarget++ = pSource++;
135	else
136	break;
137	*pTarget = 0;
138
139	return (ul);
140	}
141
142	/*
143	* strhCount:
144	* this counts the occurences of c in pszSearch.
145	*/
146
147	ULONG strhCount(const char *pszSearch,
148	CHAR c)
149	{
150	PSZ p = (PSZ)pszSearch;
151	ULONG ulCount = 0;
152	while (TRUE)
153	{
154	p = strchr(p, c);
155	if (p)
156	{
157	ulCount++;
158	p++;
159	}
160	else
161	break;
162	}
163	return (ulCount);
164	}
165
166	/*
167	*@@ strhIsDecimal:
168	* returns TRUE if psz consists of decimal digits only.
169	*/
170
171	BOOL strhIsDecimal(PSZ psz)
172	{
173	PSZ p = psz;
174	while (*p != 0)
175	{
176	if (isdigit(*p) == 0)
177	return (FALSE);
178	p++;
179	}
180
181	return (TRUE);
182	}
183
184	/*
185	*@@ strhSubstr:
186	* this creates a new PSZ containing the string
187	* from pBegin to pEnd, excluding the pEnd character.
188	* The new string is null-terminated. The caller
189	* must free() the new string after use.
190	*
191	* Example:
192	+ "1234567890"
193	+ ^ ^
194	+ p1 p2
195	+ strhSubstr(p1, p2)
196	* would return a new string containing "2345678".
197	*/
198
199	PSZ strhSubstr(const char pBegin, const char pEnd)
200	{
201	ULONG cbSubstr = (pEnd - pBegin);
202	PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
203	strhncpy0(pszSubstr, pBegin, cbSubstr);
204	return (pszSubstr);
205	}
206
207	/*
208	*@@ strhExtract:
209	* searches pszBuf for the cOpen character and returns
210	* the data in between cOpen and cClose, excluding
211	* those two characters, in a newly allocated buffer
212	* which you must free() afterwards.
213	*
214	* Spaces and newlines/linefeeds are skipped.
215	*
216	* If the search was successful, the new buffer
217	* is returned and, if (ppEnd != NULL), *ppEnd points
218	* to the first character after the cClose character
219	* found in the buffer.
220	*
221	* If the search was not successful, NULL is
222	* returned, and *ppEnd is unchanged.
223	*
224	* If another cOpen character is found before
225	* cClose, matching cClose characters will be skipped.
226	* You can therefore nest the cOpen and cClose
227	* characters.
228	*
229	* This function ignores cOpen and cClose characters
230	* in C-style comments and strings surrounded by
231	* double quotes.
232	*
233	* Example:
234	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
235	+ pEnd;
236	+ strhExtract(pszBuf,
237	+ '{', '}',
238	+ &pEnd)
239	* would return a new buffer containing " --blah-- ",
240	* and ppEnd would afterwards point to the space
241	* before "next" in the static buffer.
242	*
243	*@@added V0.9.0 [umoeller]
244	*/
245
246	PSZ strhExtract(PSZ pszBuf, // in: search buffer
247	CHAR cOpen, // in: opening char
248	CHAR cClose, // in: closing char
249	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
250	{
251	PSZ pszReturn = NULL;
252
253	if (pszBuf)
254	{
255	PSZ pOpen = strchr(pszBuf, cOpen);
256	if (pOpen)
257	{
258	// opening char found:
259	// now go thru the whole rest of the buffer
260	PSZ p = pOpen+1;
261	LONG lLevel = 1; // if this goes 0, we're done
262	while (*p)
263	{
264	if (*p == cOpen)
265	lLevel++;
266	else if (*p == cClose)
267	{
268	lLevel--;
269	if (lLevel <= 0)
270	{
271	// matching closing bracket found:
272	// extract string
273	pszReturn = strhSubstr(pOpen+1, // after cOpen
274	p); // excluding cClose
275	if (ppEnd)
276	*ppEnd = p+1;
277	break; // while (*p)
278	}
279	}
280	else if (*p == '\"')
281	{
282	// beginning of string:
283	PSZ p2 = p+1;
284	// find end of string
285	while ((p2) && (p2 != '\"'))
286	p2++;
287
288	if (*p2 == '\"')
289	// closing quote found:
290	// search on after that
291	p = p2; // raised below
292	else
293	break; // while (*p)
294	}
295
296	p++;
297	}
298	}
299	}
300
301	return (pszReturn);
302	}
303
304	/*
305	*@@ strhQuote:
306	* similar to strhExtract, except that
307	* opening and closing chars are the same,
308	* and therefore no nesting is possible.
309	* Useful for extracting stuff between
310	* quotes.
311	*
312	*@@added V0.9.0 [umoeller]
313	*/
314
315	PSZ strhQuote(PSZ pszBuf,
316	CHAR cQuote,
317	PSZ *ppEnd)
318	{
319	PSZ pszReturn = NULL,
320	p1 = NULL;
321	if ((p1 = strchr(pszBuf, cQuote)))
322	{
323	PSZ p2 = strchr(p1+1, cQuote);
324	if (p2)
325	{
326	pszReturn = strhSubstr(p1+1, p2);
327	if (ppEnd)
328	// store closing char
329	*ppEnd = p2 + 1;
330	}
331	}
332
333	return (pszReturn);
334	}
335
336	/*
337	*@@ strhStrip:
338	* removes all double spaces.
339	* This copies within the "psz" buffer.
340	* If any double spaces are found, the
341	* string will be shorter than before,
342	* but the buffer is _not_ reallocated,
343	* so there will be unused bytes at the
344	* end.
345	*
346	* Returns the number of spaces removed.
347	*
348	*@@added V0.9.0 [umoeller]
349	*/
350
351	ULONG strhStrip(PSZ psz) // in/out: string
352	{
353	PSZ p;
354	ULONG cb = strlen(psz),
355	ulrc = 0;
356
357	for (p = psz; p < psz+cb; p++)
358	{
359	if ((p == ' ') && ((p+1) == ' '))
360	{
361	PSZ p2 = p;
362	while (*p2)
363	{
364	p2 = (p2+1);
365	p2++;
366	}
367	cb--;
368	p--;
369	ulrc++;
370	}
371	}
372	return (ulrc);
373	}
374
375	/*
376	*@@ strhins:
377	* this inserts one string into another.
378	*
379	* pszInsert is inserted into pszBuffer at offset
380	* ulInsertOfs (which counts from 0).
381	*
382	* A newly allocated string is returned. pszBuffer is
383	* not changed. The new string should be free()'d after
384	* use.
385	*
386	* Upon errors, NULL is returned.
387	*
388	*@@changed V0.9.0 [umoeller]: completely rewritten.
389	*/
390
391	PSZ strhins(const char *pcszBuffer,
392	ULONG ulInsertOfs,
393	const char *pcszInsert)
394	{
395	PSZ pszNew = NULL;
396
397	if ((pcszBuffer) && (pcszInsert))
398	{
399	do {
400	ULONG cbBuffer = strlen(pcszBuffer);
401	ULONG cbInsert = strlen(pcszInsert);
402
403	// check string length
404	if (ulInsertOfs > cbBuffer + 1)
405	break; // do
406
407	// OK, let's go.
408	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
409
410	// copy stuff before pInsertPos
411	memcpy(pszNew,
412	pcszBuffer,
413	ulInsertOfs);
414	// copy string to be inserted
415	memcpy(pszNew + ulInsertOfs,
416	pcszInsert,
417	cbInsert);
418	// copy stuff after pInsertPos
419	strcpy(pszNew + ulInsertOfs + cbInsert,
420	pcszBuffer + ulInsertOfs);
421	} while (FALSE);
422	}
423
424	return (pszNew);
425	}
426
427	/*
428	*@@ strhrpl:
429	* wrapper around xstrrpl to work with C strings.
430	* Note that *ppszBuf can get reallocated and must
431	* be free()'able.
432	*
433	* Repetitive use of this wrapper is not recommended
434	* because it is considerably slower than xstrrpl.
435	*
436	*@@added V0.9.6 (2000-11-01) [umoeller]
437	*/
438
439	ULONG strhrpl(PSZ *ppszBuf, // in/out: string
440	PULONG pulOfs, // in: where to begin search (0 = start);
441	// out: ofs of first char after replacement string
442	const char *pcszSearch, // in: search string; cannot be NULL
443	const char *pcszReplace) // in: replacement string; cannot be NULL
444	{
445	ULONG ulrc = 0;
446	XSTRING xstrBuf,
447	xstrFind,
448	xstrReplace;
449	size_t ShiftTable[256];
450	BOOL fRepeat = FALSE;
451	xstrInit(&xstrBuf, 0);
452	xstrset(&xstrBuf, *ppszBuf);
453	xstrInit(&xstrFind, 0);
454	xstrset(&xstrFind, (PSZ)pcszSearch);
455	xstrInit(&xstrReplace, 0);
456	xstrset(&xstrReplace, (PSZ)pcszReplace);
457
458	if ((ulrc = xstrrpl(&xstrBuf,
459	pulOfs,
460	&xstrFind,
461	&xstrReplace,
462	ShiftTable,
463	&fRepeat)))
464	// replaced:
465	*ppszBuf = xstrBuf.psz;
466
467	return (ulrc);
468	}
469
470	/*
471	* strhWords:
472	* returns the no. of words in "psz".
473	* A string is considered a "word" if
474	* it is surrounded by spaces only.
475	*
476	*@@added V0.9.0 [umoeller]
477	*/
478
479	ULONG strhWords(PSZ psz)
480	{
481	PSZ p;
482	ULONG cb = strlen(psz),
483	ulWords = 0;
484	if (cb > 1)
485	{
486	ulWords = 1;
487	for (p = psz; p < psz+cb; p++)
488	if (*p == ' ')
489	ulWords++;
490	}
491	return (ulWords);
492	}
493
494	/*
495	*@@ strhThousandsULong:
496	* converts a ULONG into a decimal string, while
497	* inserting thousands separators into it. Specify
498	* the separator character in cThousands.
499	*
500	* Returns pszTarget so you can use it directly
501	* with sprintf and the "%s" flag.
502	*
503	* For cThousands, you should use the data in
504	* OS2.INI ("PM_National" application), which is
505	* always set according to the "Country" object.
506	* You can use prfhQueryCountrySettings to
507	* retrieve this setting.
508	*
509	* Use strhThousandsDouble for "double" values.
510	*/
511
512	PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
513	ULONG ul, // in: decimal to convert
514	CHAR cThousands) // in: separator char (e.g. '.')
515	{
516	USHORT ust, uss, usc;
517	CHAR szTemp[40];
518	sprintf(szTemp, "%lu", ul);
519
520	ust = 0;
521	usc = strlen(szTemp);
522	for (uss = 0; uss < usc; uss++)
523	{
524	if (uss)
525	if (((usc - uss) % 3) == 0)
526	{
527	pszTarget[ust] = cThousands;
528	ust++;
529	}
530	pszTarget[ust] = szTemp[uss];
531	ust++;
532	}
533	pszTarget[ust] = '\0';
534
535	return (pszTarget);
536	}
537
538	/*
539	*@@ strhThousandsDouble:
540	* like strhThousandsULong, but for a "double"
541	* value. Note that after-comma values are truncated.
542	*/
543
544	PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
545	{
546	USHORT ust, uss, usc;
547	CHAR szTemp[40];
548	sprintf(szTemp, "%.0f", floor(dbl));
549
550	ust = 0;
551	usc = strlen(szTemp);
552	for (uss = 0; uss < usc; uss++)
553	{
554	if (uss)
555	if (((usc - uss) % 3) == 0)
556	{
557	pszTarget[ust] = cThousands;
558	ust++;
559	}
560	pszTarget[ust] = szTemp[uss];
561	ust++;
562	}
563	pszTarget[ust] = '\0';
564
565	return (pszTarget);
566	}
567
568	/*
569	*@@ strhVariableDouble:
570	* like strhThousandsULong, but for a "double" value, and
571	* with a variable number of decimal places depending on the
572	* size of the quantity.
573	*
574	*@@added V0.9.6 (2000-11-12) [pr]
575	*/
576
577	PSZ strhVariableDouble(PSZ pszTarget,
578	double dbl,
579	PSZ pszUnits,
580	CHAR cThousands)
581	{
582	if (dbl < 100.0)
583	sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
584	else
585	if (dbl < 1000.0)
586	sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
587	else
588	strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
589	pszUnits);
590
591	return(pszTarget);
592	}
593
594	/*
595	*@@ strhFileDate:
596	* converts file date data to a string (to pszBuf).
597	* You can pass any FDATE structure to this function,
598	* which are returned in those FILEFINDBUF* or
599	* FILESTATUS* structs by the Dos* functions.
600	*
601	* ulDateFormat is the PM setting for the date format,
602	* as set in the "Country" object, and can be queried using
603	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
604	*
605	* meaning:
606	* -- 0 mm.dd.yyyy (English)
607	* -- 1 dd.mm.yyyy (e.g. German)
608	* -- 2 yyyy.mm.dd (Japanese, ISO)
609	* -- 3 yyyy.dd.mm
610	*
611	* cDateSep is used as a date separator (e.g. '.').
612	* This can be queried using:
613	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
614	*
615	* Alternatively, you can query all the country settings
616	* at once using prfhQueryCountrySettings (prfh.c).
617	*
618	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
619	*/
620
621	VOID strhFileDate(PSZ pszBuf, // out: string returned
622	FDATE *pfDate, // in: date information
623	ULONG ulDateFormat, // in: date format (0-3)
624	CHAR cDateSep) // in: date separator (e.g. '.')
625	{
626	DATETIME dt;
627	dt.day = pfDate->day;
628	dt.month = pfDate->month;
629	dt.year = pfDate->year + 1980;
630
631	strhDateTime(pszBuf,
632	NULL, // no time
633	&dt,
634	ulDateFormat,
635	cDateSep,
636	0, 0); // no time
637	}
638
639	/*
640	*@@ strhFileTime:
641	* converts file time data to a string (to pszBuf).
642	* You can pass any FTIME structure to this function,
643	* which are returned in those FILEFINDBUF* or
644	* FILESTATUS* structs by the Dos* functions.
645	*
646	* ulTimeFormat is the PM setting for the time format,
647	* as set in the "Country" object, and can be queried using
648	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
649	* meaning:
650	* -- 0 12-hour clock
651	* -- >0 24-hour clock
652	*
653	* cDateSep is used as a time separator (e.g. ':').
654	* This can be queried using:
655	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
656	*
657	* Alternatively, you can query all the country settings
658	* at once using prfhQueryCountrySettings (prfh.c).
659	*
660	*@@changed 99-03-15 fixed 12-hour crash
661	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
662	*/
663
664	VOID strhFileTime(PSZ pszBuf, // out: string returned
665	FTIME *pfTime, // in: time information
666	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
667	CHAR cTimeSep) // in: time separator (e.g. ':')
668	{
669	DATETIME dt;
670	dt.hours = pfTime->hours;
671	dt.minutes = pfTime->minutes;
672	dt.seconds = pfTime->twosecs * 2;
673
674	strhDateTime(NULL, // no date
675	pszBuf,
676	&dt,
677	0, 0, // no date
678	ulTimeFormat,
679	cTimeSep);
680	}
681
682	/*
683	*@@ strhDateTime:
684	* converts Control Program DATETIME info
685	* into two strings. See strhFileDate and strhFileTime
686	* for more detailed parameter descriptions.
687	*
688	*@@added V0.9.0 (99-11-07) [umoeller]
689	*/
690
691	VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
692	PSZ pszTime, // out: time string returned (can be NULL)
693	DATETIME *pDateTime, // in: date/time information
694	ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
695	CHAR cDateSep, // in: date separator (e.g. '.')
696	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
697	CHAR cTimeSep) // in: time separator (e.g. ':')
698	{
699	if (pszDate)
700	{
701	switch (ulDateFormat)
702	{
703	case 0: // mm.dd.yyyy (English)
704	sprintf(pszDate, "%02d%c%02d%c%04d",
705	pDateTime->month,
706	cDateSep,
707	pDateTime->day,
708	cDateSep,
709	pDateTime->year);
710	break;
711
712	case 1: // dd.mm.yyyy (e.g. German)
713	sprintf(pszDate, "%02d%c%02d%c%04d",
714	pDateTime->day,
715	cDateSep,
716	pDateTime->month,
717	cDateSep,
718	pDateTime->year);
719	break;
720
721	case 2: // yyyy.mm.dd (Japanese)
722	sprintf(pszDate, "%04d%c%02d%c%02d",
723	pDateTime->year,
724	cDateSep,
725	pDateTime->month,
726	cDateSep,
727	pDateTime->day);
728	break;
729
730	default: // yyyy.dd.mm
731	sprintf(pszDate, "%04d%c%02d%c%02d",
732	pDateTime->year,
733	cDateSep,
734	pDateTime->day,
735	cDateSep,
736	pDateTime->month);
737	break;
738	}
739	}
740
741	if (pszTime)
742	{
743	if (ulTimeFormat == 0)
744	{
745	// for 12-hour clock, we need additional INI data
746	CHAR szAMPM[10] = "err";
747
748	if (pDateTime->hours > 12)
749	{
750	// > 12h: PM.
751
752	// Note: 12:xx noon is 12 AM, not PM (even though
753	// AM stands for "ante meridiam", but English is just
754	// not logical), so that's handled below.
755
756	PrfQueryProfileString(HINI_USER,
757	"PM_National",
758	"s2359", // key
759	"PM", // default
760	szAMPM, sizeof(szAMPM)-1);
761	sprintf(pszTime, "%02d%c%02d%c%02d %s",
762	// leave 12 == 12 (not 0)
763	pDateTime->hours % 12,
764	cTimeSep,
765	pDateTime->minutes,
766	cTimeSep,
767	pDateTime->seconds,
768	szAMPM);
769	}
770	else
771	{
772	// <= 12h: AM
773	PrfQueryProfileString(HINI_USER,
774	"PM_National",
775	"s1159", // key
776	"AM", // default
777	szAMPM, sizeof(szAMPM)-1);
778	sprintf(pszTime, "%02d%c%02d%c%02d %s",
779	pDateTime->hours,
780	cTimeSep,
781	pDateTime->minutes,
782	cTimeSep,
783	pDateTime->seconds,
784	szAMPM);
785	}
786	}
787	else
788	// 24-hour clock
789	sprintf(pszTime, "%02d%c%02d%c%02d",
790	pDateTime->hours,
791	cTimeSep,
792	pDateTime->minutes,
793	cTimeSep,
794	pDateTime->seconds);
795	}
796	}
797
798	/*
799	*@@ strhGetWord:
800	* finds word boundaries.
801	*
802	* *ppszStart is used as the beginning of the
803	* search.
804	*
805	* If a word is found, *ppszStart is set to
806	* the first character of the word which was
807	* found and *ppszEnd receives the address
808	* of the first character _after_ the word,
809	* which is probably a space or a \n or \r char.
810	* We then return TRUE.
811	*
812	* The search is stopped if a null character
813	* is found or pLimit is reached. In that case,
814	* FALSE is returned.
815	*
816	*@@added V0.9.1 (2000-02-13) [umoeller]
817	*/
818
819	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
820	// out: start of word (if TRUE is returned)
821	const char pLimit, // in: ptr to last char after ppszStart to be
822	// searched; if the word does not end before
823	// or with this char, FALSE is returned
824	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
825	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
826	PSZ *ppszEnd) // out: first char _after_ word
827	// (if TRUE is returned)
828	{
829	// characters after which a word can be started
830	// const char *pcszBeginChars = "\x0d\x0a ";
831	// const char *pcszEndChars = "\x0d\x0a /-";
832
833	PSZ pStart = *ppszStart;
834
835	// find start of word
836	while ( (pStart < (PSZ)pLimit)
837	&& (strchr(pcszBeginChars, *pStart))
838	)
839	// if char is a "before word" char: go for next
840	pStart++;
841
842	if (pStart < (PSZ)pLimit)
843	{
844	// found a valid "word start" character
845	// (which is not in pcszBeginChars):
846
847	// find end of word
848	PSZ pEndOfWord = pStart;
849	while ( (pEndOfWord <= (PSZ)pLimit)
850	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
851	)
852	// if char is not an "end word" char: go for next
853	pEndOfWord++;
854
855	if (pEndOfWord <= (PSZ)pLimit)
856	{
857	// whoa, got a word:
858	*ppszStart = pStart;
859	*ppszEnd = pEndOfWord;
860	return (TRUE);
861	}
862	}
863
864	return (FALSE);
865	}
866
867	/*
868	*@@ strhIsWord:
869	* returns TRUE if p points to a "word"
870	* in pcszBuf.
871	*
872	* p is considered a word if the character _before_
873	* it is in pcszBeginChars and the char _after_
874	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
875	*
876	*@@added V0.9.6 (2000-11-12) [umoeller]
877	*/
878
879	BOOL strhIsWord(const char *pcszBuf,
880	const char *p, // in: start of word
881	ULONG cbSearch, // in: length of word
882	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
883	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
884	{
885	BOOL fEndOK = FALSE;
886
887	// check previous char
888	if ( (p == pcszBuf)
889	\|\| (strchr(pcszBeginChars, *(p-1)))
890	)
891	{
892	// OK, valid begin char:
893	// check end char
894	CHAR cNextChar = *(p + cbSearch);
895	if (cNextChar == 0)
896	fEndOK = TRUE;
897	else
898	{
899	char *pc = strchr(pcszEndChars, cNextChar);
900	if (pc)
901	// OK, is end char: avoid doubles of that char,
902	// but allow spaces
903	if ( (cNextChar+1 != *pc)
904	\|\| (cNextChar+1 == ' ')
905	\|\| (cNextChar+1 == 0)
906	)
907	fEndOK = TRUE;
908	}
909	}
910
911	return (fEndOK);
912	}
913
914	/*
915	*@@ strhFindWord:
916	* searches for pszSearch in pszBuf, which is
917	* returned if found (or NULL if not).
918	*
919	* As opposed to strstr, this finds pszSearch
920	* only if it is a "word". A search string is
921	* considered a word if the character _before_
922	* it is in pcszBeginChars and the char _after_
923	* it is in pcszEndChars.
924	*
925	* Example:
926	+ strhFindWord("This is an example.", "is");
927	+ returns ...........^ this, but not the "is" in "This".
928	*
929	* The algorithm here uses strstr to find pszSearch in pszBuf
930	* and performs additional "is-word" checks for each item found
931	* (by calling strhIsWord).
932	*
933	* Note that this function is fairly slow compared to xstrFindWord.
934	*
935	*@@added V0.9.0 (99-11-08) [umoeller]
936	*@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
937	*/
938
939	PSZ strhFindWord(const char *pszBuf,
940	const char *pszSearch,
941	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
942	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
943	{
944	PSZ pszReturn = 0;
945	ULONG cbBuf = strlen(pszBuf),
946	cbSearch = strlen(pszSearch);
947
948	if ((cbBuf) && (cbSearch))
949	{
950	const char *p = pszBuf;
951
952	do // while p
953	{
954	p = strstr(p, pszSearch);
955	if (p)
956	{
957	// string found:
958	// check if that's a word
959
960	if (strhIsWord(pszBuf,
961	p,
962	cbSearch,
963	pcszBeginChars,
964	pcszEndChars))
965	{
966	// valid end char:
967	pszReturn = (PSZ)p;
968	break;
969	}
970
971	p += cbSearch;
972	}
973	} while (p);
974
975	}
976	return (pszReturn);
977	}
978
979	/*
980	*@@ strhFindEOL:
981	* returns a pointer to the next \r, \n or null character
982	* following pszSearchIn. Stores the offset in *pulOffset.
983	*
984	* This should never return NULL because at some point,
985	* there will be a null byte in your string.
986	*
987	*@@added V0.9.4 (2000-07-01) [umoeller]
988	*/
989
990	PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
991	PULONG pulOffset) // out: offset (ptr can be NULL)
992	{
993	PSZ p = pszSearchIn,
994	prc = NULL;
995	while (TRUE)
996	{
997	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
998	{
999	prc = p;
1000	break;
1001	}
1002	p++;
1003	}
1004
1005	if (pulOffset)
1006	*pulOffset = prc - pszSearchIn;
1007	return (prc);
1008	}
1009
1010	/*
1011	*@@ strhFindNextLine:
1012	* like strhFindEOL, but this returns the character
1013	* _after_ \r or \n. Note that this might return
1014	* a pointer to terminating NULL character also.
1015	*/
1016
1017	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1018	{
1019	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1020	// pEOL now points to the \r char or the terminating 0 byte;
1021	// if not null byte, advance pointer
1022	PSZ pNextLine = pEOL;
1023	if (*pNextLine == '\r')
1024	pNextLine++;
1025	if (*pNextLine == '\n')
1026	pNextLine++;
1027	if (pulOffset)
1028	*pulOffset = pNextLine - pszSearchIn;
1029	return (pNextLine);
1030	}
1031
1032	/*
1033	*@@ strhFindKey:
1034	* finds pszKey in pszSearchIn; similar to strhistr,
1035	* but this one makes sure the key is at the beginning
1036	* of a line. Spaces before the key are tolerated.
1037	* Returns NULL if the key was not found.
1038	*
1039	* Used by strhGetParameter/strhSetParameter; useful
1040	* for analyzing CONFIG.SYS settings.
1041	*
1042	*@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1043	*@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1044	*/
1045
1046	PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1047	const char *pcszKey, // in: key to search for
1048	PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1049	// can be NULL if not needed
1050	{
1051	const char *p = NULL;
1052	PSZ pReturn = NULL;
1053	// BOOL fFound = FALSE;
1054
1055	p = pcszSearchIn;
1056	do {
1057	p = strhistr(p, pcszKey);
1058
1059	if ((p) && (p >= pcszSearchIn))
1060	{
1061	// make sure the key is at the beginning of a line
1062	// by going backwards until we find a char != " "
1063	const char *p2 = p;
1064	while ( (*p2 == ' ')
1065	&& (p2 > pcszSearchIn)
1066	)
1067	p2--;
1068
1069	// if previous char is an EOL sign, go on
1070	if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1071	\|\| (*(p2-1) == '\r')
1072	\|\| (*(p2-1) == '\n')
1073	)
1074	{
1075	// now check whether the char after the search
1076	// is a "=" char
1077	// ULONG cbKey = strlen(pszKey);
1078
1079	// tolerate spaces before "="
1080	/* PSZ p3 = p;
1081	while (*(p3+cbKey) == ' ')
1082	p3++;
1083
1084	if ((p3+cbKey) == '=') /
1085	{
1086	// found:
1087	pReturn = (PSZ)p; // go on, p contains found key
1088
1089	// test for all upper case?
1090	if (pfIsAllUpperCase)
1091	{
1092	ULONG cbKey2 = strlen(pcszKey),
1093	ul = 0;
1094	*pfIsAllUpperCase = TRUE;
1095	for (ul = 0; ul < cbKey2; ul++)
1096	if (islower(*(p+ul)))
1097	{
1098	*pfIsAllUpperCase = FALSE;
1099	break; // for
1100	}
1101	}
1102
1103	break; // do
1104	}
1105	} // else search next key
1106
1107	p++; // search on after this key
1108	}
1109	} while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1110
1111	return (pReturn);
1112	}
1113
1114	/*
1115	*@@ strhGetParameter:
1116	* searches pszSearchIn for the key pszKey; if found, it
1117	* returns a pointer to the following characters in pszSearchIn
1118	* and, if pszCopyTo != NULL, copies the rest of the line to
1119	* that buffer, of which cbCopyTo specified the size.
1120	*
1121	* If the key is not found, NULL is returned.
1122	* String search is done by calling strhFindKey.
1123	* This is useful for querying CONFIG.SYS settings.
1124	*
1125	* <B>Example:</B>
1126	*
1127	* this would return "YES" if you searched for "PAUSEONERROR=",
1128	* and "PAUSEONERROR=YES" existed in pszSearchIn.
1129	*/
1130
1131	PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1132	const char *pcszKey, // in: key to search for
1133	PSZ pszCopyTo, // out: key value
1134	ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1135	{
1136	PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1137	prc = NULL;
1138	if (p)
1139	{
1140	prc = p + strlen(pcszKey);
1141	if (pszCopyTo)
1142	// copy to pszCopyTo
1143	{
1144	ULONG cb;
1145	PSZ pEOL = strhFindEOL(prc, &cb);
1146	if (pEOL)
1147	{
1148	if (cb > cbCopyTo)
1149	cb = cbCopyTo-1;
1150	strhncpy0(pszCopyTo, prc, cb);
1151	}
1152	}
1153	}
1154
1155	return (prc);
1156	}
1157
1158	/*
1159	*@@ strhSetParameter:
1160	* searches *ppszBuf for the key pszKey; if found, it
1161	* replaces the characters following this key up to the
1162	* end of the line with pszParam. If pszKey is not found in
1163	* *ppszBuf, it is appended to the file in a new line.
1164	*
1165	* If any changes are made, *ppszBuf is re-allocated.
1166	*
1167	* This function searches w/out case sensitivity.
1168	*
1169	* Returns a pointer to the new parameter inside the buffer.
1170	*
1171	@@changed V0.9.0 [umoeller]: changed function prototype to PSZ ppszSearchIn
1172	*/
1173
1174	PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1175	const char *pcszKey, // in: key to search for
1176	PSZ pszNewParam, // in: new parameter to set for key
1177	BOOL fRespectCase) // in: if TRUE, pszNewParam will
1178	// be converted to upper case if the found key is
1179	// in upper case also. pszNewParam should be in
1180	// lower case if you use this.
1181	{
1182	BOOL fIsAllUpperCase = FALSE;
1183	PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1184	prc = NULL;
1185
1186	if (pKey)
1187	{
1188	// key found in file:
1189	// replace existing parameter
1190	PSZ pOldParam = pKey + strlen(pcszKey);
1191
1192	prc = pOldParam;
1193	// pOldParam now has the old parameter, which we
1194	// will overwrite now
1195
1196	if (pOldParam)
1197	{
1198	ULONG cbOldParam;
1199	PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1200	// pEOL now has first end-of-line after the parameter
1201
1202	if (pEOL)
1203	{
1204	XSTRING strBuf;
1205	ULONG ulOfs = 0;
1206
1207	PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1208	strncpy(pszOldCopy, pOldParam, cbOldParam);
1209	pszOldCopy[cbOldParam] = '\0';
1210
1211	xstrInit(&strBuf, 0);
1212	xstrset(&strBuf, *ppszBuf); // this must not be freed!
1213	/* xstrInit(&strFind, 0);
1214	xstrset(&strFind, pszOldCopy); // this must not be freed!
1215	xstrInit(&strReplace, 0);
1216	xstrset(&strReplace, pszNewParam); // this must not be freed!
1217	*/
1218
1219	// check for upper case desired?
1220	if (fRespectCase)
1221	if (fIsAllUpperCase)
1222	strupr(pszNewParam);
1223
1224	xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1225
1226	free(pszOldCopy);
1227
1228	*ppszBuf = strBuf.psz;
1229	}
1230	}
1231	}
1232	else
1233	{
1234	PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1235	+ strlen(pcszKey)
1236	+ strlen(pszNewParam)
1237	+ 5); // 2 * \r\n + null byte
1238	// key not found: append to end of file
1239	sprintf(pszNew, "%s\r\n%s%s\r\n",
1240	*ppszBuf, pcszKey, pszNewParam);
1241	free(*ppszBuf);
1242	*ppszBuf = pszNew;
1243	}
1244
1245	return (prc);
1246	}
1247
1248	/*
1249	*@@ strhDeleteLine:
1250	* this deletes the line in pszSearchIn which starts with
1251	* the key pszKey. Returns TRUE if the line was found and
1252	* deleted.
1253	*
1254	* This copies within pszSearchIn.
1255	*/
1256
1257	BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1258	PSZ pszKey) // in: key to find
1259	{
1260	BOOL fIsAllUpperCase = FALSE;
1261	PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1262	BOOL brc = FALSE;
1263
1264	if (pKey) {
1265	PSZ pEOL = strhFindEOL(pKey, NULL);
1266	// pEOL now has first end-of-line after the key
1267	if (pEOL)
1268	{
1269	// delete line by overwriting it with
1270	// the next line
1271	strcpy(pKey, pEOL+2);
1272	}
1273	else
1274	{
1275	// EOL not found: we must be at the end of the file
1276	*pKey = '\0';
1277	}
1278	brc = TRUE;
1279	}
1280
1281	return (brc);
1282	}
1283
1284	/*
1285	*@@ strhBeautifyTitle:
1286	* replaces all line breaks (0xd, 0xa) with spaces.
1287	*/
1288
1289	BOOL strhBeautifyTitle(PSZ psz)
1290	{
1291	BOOL rc = FALSE;
1292	CHAR *p;
1293	while ((p = strchr(psz, 0xa)))
1294	{
1295	*p = ' ';
1296	rc = TRUE;
1297	}
1298	while ((p = strchr(psz, 0xd)))
1299	{
1300	*p = ' ';
1301	rc = TRUE;
1302	}
1303	return (rc);
1304	}
1305
1306	/*
1307	* strhFindAttribValue:
1308	* searches for pszAttrib in pszSearchIn; if found,
1309	* returns the first character after the "=" char.
1310	* If "=" is not found, a space, \r, and \n are
1311	* also accepted. This function searches without
1312	* respecting case.
1313	*
1314	* <B>Example:</B>
1315	+ strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1316	+
1317	+ returns ....................... ^ this address.
1318	*
1319	*@@added V0.9.0 [umoeller]
1320	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1321	*/
1322
1323	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1324	{
1325	PSZ prc = 0;
1326	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1327	p,
1328	p2;
1329	ULONG cbAttrib = strlen(pszAttrib);
1330
1331	// 1) find space char
1332	while ((p = strchr(pszSearchIn2, ' ')))
1333	{
1334	CHAR c;
1335	p++;
1336	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1337	// now check whether the p+strlen(pszAttrib)
1338	// is a valid end-of-tag character
1339	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1340	&& ( (c == ' ')
1341	\|\| (c == '>')
1342	\|\| (c == '=')
1343	\|\| (c == '\r')
1344	\|\| (c == '\n')
1345	\|\| (c == 0)
1346	)
1347	)
1348	{
1349	// yes:
1350	CHAR c2;
1351	p2 = p + cbAttrib;
1352	c2 = *p2;
1353	while ( ( (c2 == ' ')
1354	\|\| (c2 == '=')
1355	\|\| (c2 == '\n')
1356	\|\| (c2 == '\r')
1357	)
1358	&& (c2 != 0)
1359	)
1360	c2 = *++p2;
1361	prc = p2;
1362	break; // first while
1363	}
1364	pszSearchIn2++;
1365	}
1366	return (prc);
1367	}
1368
1369	/*
1370	* strhGetNumAttribValue:
1371	* stores the numerical parameter value of an HTML-style
1372	* tag in *pl.
1373	*
1374	* Returns the address of the tag parameter in the
1375	* search buffer, if found, or NULL.
1376	*
1377	* <B>Example:</B>
1378	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1379	*
1380	* stores 123 in the "l" variable.
1381	*
1382	*@@added V0.9.0 [umoeller]
1383	*/
1384
1385	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1386	const char *pszTag, // e.g. "INDEX"
1387	PLONG pl) // out: numerical value
1388	{
1389	PSZ pParam;
1390	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1391	sscanf(pParam, "%ld", pl);
1392
1393	return (pParam);
1394	}
1395
1396	/*
1397	* strhGetTextAttr:
1398	* retrieves the attribute value of a textual HTML-style tag
1399	* in a newly allocated buffer, which is returned,
1400	* or NULL if attribute not found.
1401	* If an attribute value is to contain spaces, it
1402	* must be enclosed in quotes.
1403	*
1404	* The offset of the attribute data in pszSearchIn is
1405	* returned in *pulOffset so that you can do multiple
1406	* searches.
1407	*
1408	* This returns a new buffer, which should be free()'d after use.
1409	*
1410	* <B>Example:</B>
1411	+ ULONG ulOfs = 0;
1412	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1413	+ ............^ ulOfs
1414	*
1415	* returns a new string with the value "blublub" (without
1416	* quotes) and sets ulOfs to 12.
1417	*
1418	*@@added V0.9.0 [umoeller]
1419	*/
1420
1421	PSZ strhGetTextAttr(const char *pszSearchIn,
1422	const char *pszTag,
1423	PULONG pulOffset) // out: offset where found
1424	{
1425	PSZ pParam,
1426	pParam2,
1427	prc = NULL;
1428	ULONG ulCount = 0;
1429	LONG lNestingLevel = 0;
1430
1431	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1432	{
1433	// determine end character to search for: a space
1434	CHAR cEnd = ' ';
1435	if (*pParam == '\"')
1436	{
1437	// or, if the data is enclosed in quotes, a quote
1438	cEnd = '\"';
1439	pParam++;
1440	}
1441
1442	if (pulOffset)
1443	// store the offset
1444	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1445
1446	// now find end of attribute
1447	pParam2 = pParam;
1448	while (*pParam)
1449	{
1450	if (*pParam == cEnd)
1451	// end character found
1452	break;
1453	else if (*pParam == '<')
1454	// yet another opening tag found:
1455	// this is probably some "<" in the attributes
1456	lNestingLevel++;
1457	else if (*pParam == '>')
1458	{
1459	lNestingLevel--;
1460	if (lNestingLevel < 0)
1461	// end of tag found:
1462	break;
1463	}
1464	ulCount++;
1465	pParam++;
1466	}
1467
1468	// copy attribute to new buffer
1469	if (ulCount)
1470	{
1471	prc = (PSZ)malloc(ulCount+1);
1472	memcpy(prc, pParam2, ulCount);
1473	*(prc+ulCount) = 0;
1474	}
1475	}
1476	return (prc);
1477	}
1478
1479	/*
1480	* strhFindEndOfTag:
1481	* returns a pointer to the ">" char
1482	* which seems to terminate the tag beginning
1483	* after pszBeginOfTag.
1484	*
1485	* If additional "<" chars are found, we look
1486	* for additional ">" characters too.
1487	*
1488	* Note: You must pass the address of the opening
1489	* '<' character to this function.
1490	*
1491	* Example:
1492	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1493	+ strhFindEndOfTag(pszTest)
1494	+ returns.................................^ this.
1495	*
1496	*@@added V0.9.0 [umoeller]
1497	*/
1498
1499	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1500	{
1501	PSZ p = (PSZ)pszBeginOfTag,
1502	prc = NULL;
1503	LONG lNestingLevel = 0;
1504
1505	while (*p)
1506	{
1507	if (*p == '<')
1508	// another opening tag found:
1509	lNestingLevel++;
1510	else if (*p == '>')
1511	{
1512	// closing tag found:
1513	lNestingLevel--;
1514	if (lNestingLevel < 1)
1515	{
1516	// corresponding: return this
1517	prc = p;
1518	break;
1519	}
1520	}
1521	p++;
1522	}
1523
1524	return (prc);
1525	}
1526
1527	/*
1528	* strhGetBlock:
1529	* this complex function searches the given string
1530	* for a pair of opening/closing HTML-style tags.
1531	*
1532	* If found, this routine returns TRUE and does
1533	* the following:
1534	*
1535	* 1) allocate a new buffer, copy the text
1536	* enclosed by the opening/closing tags
1537	* into it and set *ppszBlock to that
1538	* buffer;
1539	*
1540	* 2) if the opening tag has any attributes,
1541	* allocate another buffer, copy the
1542	* attributes into it and set *ppszAttrs
1543	* to that buffer; if no attributes are
1544	* found, *ppszAttrs will be NULL;
1545	*
1546	* 3) set *pulOffset to the offset from the
1547	* beginning of *ppszSearchIn where the
1548	* opening tag was found;
1549	*
1550	* 4) advance *ppszSearchIn to after the
1551	* closing tag, so that you can do
1552	* multiple searches without finding the
1553	* same tags twice.
1554	*
1555	* All buffers should be freed using free().
1556	*
1557	* This returns the following:
1558	* -- 0: no error
1559	* -- 1: tag not found at all (doesn't have to be an error)
1560	* -- 2: begin tag found, but no corresponding end tag found. This
1561	* is a real error.
1562	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1563	*
1564	* <B>Example:</B>
1565	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1566	+ PSZ pszBlock, pszAttrs;
1567	+ ULONG ulOfs;
1568	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1569	*
1570	* would do the following:
1571	*
1572	* 1) set pszBlock to a new string containing "This is page 1."
1573	* without quotes;
1574	*
1575	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1576	*
1577	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1578	*
1579	* 4) pSearch would be advanced to point to the "More text"
1580	* string in the original buffer.
1581	*
1582	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1583	* for HTML parsing.
1584	*
1585	*@@added V0.9.0 [umoeller]
1586	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1587	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1588	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1589	*/
1590
1591	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1592	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1593	PSZ pszTag,
1594	PSZ *ppszBlock, // out: block enclosed by the tags
1595	PSZ *ppszAttribs, // out: attributes of the opening tag
1596	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1597	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1598	{
1599	ULONG ulrc = 1;
1600	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1601	pszSearch2 = pszBeginTag,
1602	pszClosingTag;
1603	ULONG cbTag = strlen(pszTag);
1604
1605	// go thru the block and check all tags if it's the
1606	// begin tag we're looking for
1607	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1608	{
1609	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1610	// yes: stop
1611	break;
1612	else
1613	pszBeginTag++;
1614	}
1615
1616	if (pszBeginTag)
1617	{
1618	// we found <TAG>:
1619	ULONG ulNestingLevel = 0;
1620
1621	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1622	// strchr(pszBeginTag, '>');
1623	if (pszEndOfBeginTag)
1624	{
1625	// does the caller want the attributes?
1626	if (ppszAttribs)
1627	{
1628	// yes: then copy them
1629	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1630	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1631	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1632	// add terminating 0
1633	*(pszAttrs + ulAttrLen) = 0;
1634
1635	*ppszAttribs = pszAttrs;
1636	}
1637
1638	// output offset of where we found the begin tag
1639	if (pulOfsBeginTag)
1640	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1641
1642	// now find corresponding closing tag (e.g. "</BODY>"
1643	pszBeginTag = pszEndOfBeginTag+1;
1644	// now we're behind the '>' char of the opening tag
1645	// increase offset of that too
1646	if (pulOfsBeginBlock)
1647	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1648
1649	// find next closing tag;
1650	// for the first run, pszSearch2 points to right
1651	// after the '>' char of the opening tag
1652	pszSearch2 = pszBeginTag;
1653	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1654	&& (pszClosingTag = strstr(pszSearch2, "<"))
1655	)
1656	{
1657	// if we have another opening tag before our closing
1658	// tag, we need to have several closing tags before
1659	// we're done
1660	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1661	ulNestingLevel++;
1662	else
1663	{
1664	// is this ours?
1665	if ( (*(pszClosingTag+1) == '/')
1666	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1667	)
1668	{
1669	// we've found a matching closing tag; is
1670	// it ours?
1671	if (ulNestingLevel == 0)
1672	{
1673	// our closing tag found:
1674	// allocate mem for a new buffer
1675	// and extract all the text between
1676	// open and closing tags to it
1677	ULONG ulLen = pszClosingTag - pszBeginTag;
1678	if (ppszBlock)
1679	{
1680	PSZ pNew = (PSZ)malloc(ulLen + 1);
1681	strhncpy0(pNew, pszBeginTag, ulLen);
1682	*ppszBlock = pNew;
1683	}
1684
1685	// raise search offset to after the closing tag
1686	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1687
1688	ulrc = 0;
1689
1690	break;
1691	} else
1692	// not our closing tag:
1693	ulNestingLevel--;
1694	}
1695	}
1696	// no matching closing tag: search on after that
1697	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1698	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1699
1700	if (!pszClosingTag)
1701	// no matching closing tag found:
1702	// return 2 (closing tag not found)
1703	ulrc = 2;
1704	} // end if (pszBeginTag)
1705	else
1706	// no matching ">" for opening tag found:
1707	ulrc = 3;
1708	}
1709
1710	return (ulrc);
1711	}
1712
1713	/* ******************************************************************
1714	*
1715	* Miscellaneous
1716	*
1717	********************************************************************/
1718
1719	/*
1720	*@@ strhArrayAppend:
1721	* this appends a string to a "string array".
1722	*
1723	* A string array is considered a sequence of
1724	* zero-terminated strings in memory. That is,
1725	* after each string's null-byte, the next
1726	* string comes up.
1727	*
1728	* This is useful for composing a single block
1729	* of memory from, say, list box entries, which
1730	* can then be written to OS2.INI in one flush.
1731	*
1732	* To append strings to such an array, call this
1733	* function for each string you wish to append.
1734	* This will re-allocate *ppszRoot with each call,
1735	* and update *pcbRoot, which then contains the
1736	* total size of all strings (including all null
1737	* terminators).
1738	*
1739	* Pass *pcbRoot to PrfSaveProfileData to have the
1740	* block saved.
1741	*
1742	* Note: On the first call, ppszRoot and pcbRoot
1743	* _must_ be both NULL, or this crashes.
1744	*/
1745
1746	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1747	const char *pcszNew, // in: string to append
1748	PULONG pcbRoot) // in/out: size of array
1749	{
1750	ULONG cbNew = strlen(pcszNew);
1751	PSZ pszTemp = (PSZ)malloc(*pcbRoot
1752	+ cbNew
1753	+ 1); // two null bytes
1754	if (*ppszRoot)
1755	{
1756	// not first loop: copy old stuff
1757	memcpy(pszTemp,
1758	*ppszRoot,
1759	*pcbRoot);
1760	free(*ppszRoot);
1761	}
1762	// append new string
1763	strcpy(pszTemp + *pcbRoot,
1764	pcszNew);
1765	// update root
1766	*ppszRoot = pszTemp;
1767	// update length
1768	*pcbRoot += cbNew + 1;
1769	}
1770
1771	/*
1772	*@@ strhCreateDump:
1773	* this dumps a memory block into a string
1774	* and returns that string in a new buffer.
1775	*
1776	* You must free() the returned PSZ after use.
1777	*
1778	* The output looks like the following:
1779	*
1780	+ 0000: FE FF 0E 02 90 00 00 00 ........
1781	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1782	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1783	*
1784	* Each line is terminated with a newline (\n)
1785	* character only.
1786	*
1787	*@@added V0.9.1 (2000-01-22) [umoeller]
1788	*/
1789
1790	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1791	ULONG ulSize, // in: size of buffer
1792	ULONG ulIndent) // in: indentation of every line
1793	{
1794	PSZ pszReturn = 0;
1795	XSTRING strReturn;
1796	CHAR szTemp[1000];
1797
1798	PBYTE pbCurrent = pb; // current byte
1799	ULONG ulCount = 0,
1800	ulCharsInLine = 0; // if this grows > 7, a new line is started
1801	CHAR szLine[400] = "",
1802	szAscii[30] = " "; // ASCII representation; filled for every line
1803	PSZ pszLine = szLine,
1804	pszAscii = szAscii;
1805
1806	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1807
1808	for (pbCurrent = pb;
1809	ulCount < ulSize;
1810	pbCurrent++, ulCount++)
1811	{
1812	if (ulCharsInLine == 0)
1813	{
1814	memset(szLine, ' ', ulIndent);
1815	pszLine += ulIndent;
1816	}
1817	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1818
1819	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1820	// printable character:
1821	pszAscii = pbCurrent;
1822	else
1823	*pszAscii = '.';
1824	pszAscii++;
1825
1826	ulCharsInLine++;
1827	if ( (ulCharsInLine > 7) // 8 bytes added?
1828	\|\| (ulCount == ulSize-1) // end of buffer reached?
1829	)
1830	{
1831	// if we haven't had eight bytes yet,
1832	// fill buffer up to eight bytes with spaces
1833	ULONG ul2;
1834	for (ul2 = ulCharsInLine;
1835	ul2 < 8;
1836	ul2++)
1837	pszLine += sprintf(pszLine, " ");
1838
1839	sprintf(szTemp, "%04lX: %s %s\n",
1840	(ulCount & 0xFFFFFFF8), // offset in hex
1841	szLine, // bytes string
1842	szAscii); // ASCII string
1843	xstrcat(&strReturn, szTemp);
1844
1845	// restart line buffer
1846	pszLine = szLine;
1847
1848	// clear ASCII buffer
1849	strcpy(szAscii, " ");
1850	pszAscii = szAscii;
1851
1852	// reset line counter
1853	ulCharsInLine = 0;
1854	}
1855	}
1856
1857	if (strReturn.cbAllocated)
1858	pszReturn = strReturn.psz;
1859
1860	return (pszReturn);
1861	}
1862
1863	/* ******************************************************************
1864	*
1865	* Wildcard matching
1866	*
1867	********************************************************************/
1868
1869	/*
1870	* The following code has been taken from "fnmatch.zip".
1871	*
1872	* (c) 1994-1996 by Eberhard Mattes.
1873	*/
1874
1875	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1876	* This macro returns true iff C is a separator. */
1877
1878	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1879
1880
1881	/* This macro returns true if C is at the end of a component of a
1882	* path. */
1883
1884	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1885
1886	/*
1887	* skip_comp_os2:
1888	* Return a pointer to the next component of the path SRC, for OS/2
1889	* and DOS styles. When the end of the string is reached, a pointer
1890	* to the terminating null character is returned.
1891	*
1892	* (c) 1994-1996 by Eberhard Mattes.
1893	*/
1894
1895	static const unsigned char* skip_comp_os2(const unsigned char *src)
1896	{
1897	/* Skip characters until hitting a separator or the end of the
1898	* string. */
1899
1900	while (!IS_OS2_COMP_END(*src))
1901	++src;
1902
1903	/* Skip the separator if we hit a separator. */
1904
1905	if (*src != 0)
1906	++src;
1907	return src;
1908	}
1909
1910	/*
1911	* has_colon:
1912	* returns true iff the path P contains a colon.
1913	*
1914	* (c) 1994-1996 by Eberhard Mattes.
1915	*/
1916
1917	static int has_colon(const unsigned char *p)
1918	{
1919	while (*p != 0)
1920	if (*p == ':')
1921	return 1;
1922	else
1923	++p;
1924	return 0;
1925	}
1926
1927	/*
1928	* match_comp_os2:
1929	* Compare a single component (directory name or file name) of the
1930	* paths, for OS/2 and DOS styles. MASK and NAME point into a
1931	* component of the wildcard and the name to be checked, respectively.
1932	* Comparing stops at the next separator. The FLAGS argument is the
1933	* same as that of fnmatch(). HAS_DOT is true if a dot is in the
1934	* current component of NAME. The number of dots is not restricted,
1935	* even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1936	* Note that this function is recursive.
1937	*
1938	* (c) 1994-1996 by Eberhard Mattes.
1939	*/
1940
1941	static int match_comp_os2(const unsigned char *mask,
1942	const unsigned char *name,
1943	unsigned flags,
1944	int has_dot)
1945	{
1946	int rc;
1947
1948	for (;;)
1949	switch (*mask)
1950	{
1951	case 0:
1952
1953	/* There must be no extra characters at the end of NAME when
1954	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1955	* in that case, NAME may point to a separator. */
1956
1957	if (*name == 0)
1958	return FNM_MATCH;
1959	if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1960	return FNM_MATCH;
1961	return FNM_NOMATCH;
1962
1963	case '/':
1964	case '\\':
1965
1966	/* Separators match separators. */
1967
1968	if (IS_OS2_COMP_SEP(*name))
1969	return FNM_MATCH;
1970
1971	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1972	* is ignored at the end of NAME. */
1973
1974	if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1975	return FNM_MATCH;
1976
1977	/* Stop comparing at the separator. */
1978
1979	return FNM_NOMATCH;
1980
1981	case '?':
1982
1983	/* A question mark matches one character. It does not match
1984	* a dot. At the end of the component (and before a dot),
1985	* it also matches zero characters. */
1986
1987	if (name != '.' && !IS_OS2_COMP_END(name))
1988	++name;
1989	++mask;
1990	break;
1991
1992	case '*':
1993
1994	/* An asterisk matches zero or more characters. In DOS
1995	* mode, dots are not matched. */
1996
1997	do
1998	{
1999	++mask;
2000	}
2001	while (mask == '');
2002	for (;;)
2003	{
2004	rc = match_comp_os2(mask, name, flags, has_dot);
2005	if (rc != FNM_NOMATCH)
2006	return rc;
2007	if (IS_OS2_COMP_END(*name))
2008	return FNM_NOMATCH;
2009	if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2010	return FNM_NOMATCH;
2011	++name;
2012	}
2013
2014	case '.':
2015
2016	/* A dot matches a dot. It also matches the implicit dot at
2017	* the end of a dot-less NAME. */
2018
2019	++mask;
2020	if (*name == '.')
2021	++name;
2022	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
2023	return FNM_NOMATCH;
2024	break;
2025
2026	default:
2027
2028	/* All other characters match themselves. */
2029
2030	if (flags & _FNM_IGNORECASE)
2031	{
2032	if (tolower(mask) != tolower(name))
2033	return FNM_NOMATCH;
2034	}
2035	else
2036	{
2037	if (mask != name)
2038	return FNM_NOMATCH;
2039	}
2040	++mask;
2041	++name;
2042	break;
2043	}
2044	}
2045
2046	/*
2047	* match_comp:
2048	* compare a single component (directory name or file name) of the
2049	* paths, for all styles which need component-by-component matching.
2050	* MASK and NAME point to the start of a component of the wildcard and
2051	* the name to be checked, respectively. Comparing stops at the next
2052	* separator. The FLAGS argument is the same as that of fnmatch().
2053	* Return FNM_MATCH iff MASK and NAME match.
2054	*
2055	* (c) 1994-1996 by Eberhard Mattes.
2056	*/
2057
2058	static int match_comp(const unsigned char *mask,
2059	const unsigned char *name,
2060	unsigned flags)
2061	{
2062	const unsigned char *s;
2063
2064	switch (flags & _FNM_STYLE_MASK)
2065	{
2066	case _FNM_OS2:
2067	case _FNM_DOS:
2068
2069	/* For OS/2 and DOS styles, we add an implicit dot at the end of
2070	* the component if the component doesn't include a dot. */
2071
2072	s = name;
2073	while (!IS_OS2_COMP_END(s) && s != '.')
2074	++s;
2075	return match_comp_os2(mask, name, flags, *s == '.');
2076
2077	default:
2078	return FNM_ERR;
2079	}
2080	}
2081
2082	/* In Unix styles, / separates components of a path. This macro
2083	* returns true iff C is a separator. */
2084
2085	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2086
2087
2088	/* This macro returns true if C is at the end of a component of a
2089	* path. */
2090
2091	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
2092
2093	/*
2094	* match_unix:
2095	* match complete paths for Unix styles. The FLAGS argument is the
2096	* same as that of fnmatch(). COMP points to the start of the current
2097	* component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2098	* backslash character is used for escaping ? and * unless
2099	* FNM_NOESCAPE is set.
2100	*
2101	* (c) 1994-1996 by Eberhard Mattes.
2102	*/
2103
2104	static int match_unix(const unsigned char *mask,
2105	const unsigned char *name,
2106	unsigned flags,
2107	const unsigned char *comp)
2108	{
2109	unsigned char c1, c2;
2110	char invert, matched;
2111	const unsigned char *start;
2112	int rc;
2113
2114	for (;;)
2115	switch (*mask)
2116	{
2117	case 0:
2118
2119	/* There must be no extra characters at the end of NAME when
2120	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
2121	* in that case, NAME may point to a separator. */
2122
2123	if (*name == 0)
2124	return FNM_MATCH;
2125	if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2126	return FNM_MATCH;
2127	return FNM_NOMATCH;
2128
2129	case '?':
2130
2131	/* A question mark matches one character. It does not match
2132	* the component separator if FNM_PATHNAME is set. It does
2133	* not match a dot at the start of a component if FNM_PERIOD
2134	* is set. */
2135
2136	if (*name == 0)
2137	return FNM_NOMATCH;
2138	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2139	return FNM_NOMATCH;
2140	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2141	return FNM_NOMATCH;
2142	++mask;
2143	++name;
2144	break;
2145
2146	case '*':
2147
2148	/* An asterisk matches zero or more characters. It does not
2149	* match the component separator if FNM_PATHNAME is set. It
2150	* does not match a dot at the start of a component if
2151	* FNM_PERIOD is set. */
2152
2153	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2154	return FNM_NOMATCH;
2155	do
2156	{
2157	++mask;
2158	}
2159	while (mask == '');
2160	for (;;)
2161	{
2162	rc = match_unix(mask, name, flags, comp);
2163	if (rc != FNM_NOMATCH)
2164	return rc;
2165	if (*name == 0)
2166	return FNM_NOMATCH;
2167	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2168	return FNM_NOMATCH;
2169	++name;
2170	}
2171
2172	case '/':
2173
2174	/* Separators match only separators. If _FNM_PATHPREFIX is
2175	* set, a trailing separator in MASK is ignored at the end
2176	* of NAME. */
2177
2178	if (!(IS_UNIX_COMP_SEP(*name)
2179	\|\| ((flags & _FNM_PATHPREFIX) && *name == 0
2180	&& (mask[1] == 0
2181	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2182	&& mask[2] == 0)))))
2183	return FNM_NOMATCH;
2184
2185	++mask;
2186	if (*name != 0)
2187	++name;
2188
2189	/* This is the beginning of a new component if FNM_PATHNAME
2190	* is set. */
2191
2192	if (flags & FNM_PATHNAME)
2193	comp = name;
2194	break;
2195
2196	case '[':
2197
2198	/* A set of characters. Always case-sensitive. */
2199
2200	if (*name == 0)
2201	return FNM_NOMATCH;
2202	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2203	return FNM_NOMATCH;
2204	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2205	return FNM_NOMATCH;
2206
2207	invert = 0;
2208	matched = 0;
2209	++mask;
2210
2211	/* If the first character is a ! or ^, the set matches all
2212	* characters not listed in the set. */
2213
2214	if (mask == '!' \|\| mask == '^')
2215	{
2216	++mask;
2217	invert = 1;
2218	}
2219
2220	/* Loop over all the characters of the set. The loop ends
2221	* if the end of the string is reached or if a ] is
2222	* encountered unless it directly follows the initial [ or
2223	* [-. */
2224
2225	start = mask;
2226	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
2227	{
2228	/* Get the next character which is optionally preceded
2229	* by a backslash. */
2230
2231	c1 = *mask++;
2232	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2233	{
2234	if (*mask == 0)
2235	break;
2236	c1 = *mask++;
2237	}
2238
2239	/* Ranges of characters are written as a-z. Don't
2240	* forget to check for the end of the string and to
2241	* handle the backslash. If the character after - is a
2242	* ], it isn't a range. */
2243
2244	if (*mask == '-' && mask[1] != ']')
2245	{
2246	++mask; /* Skip the - character */
2247	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2248	++mask;
2249	if (*mask == 0)
2250	break;
2251	c2 = *mask++;
2252	}
2253	else
2254	c2 = c1;
2255
2256	/* Now check whether this character or range matches NAME. */
2257
2258	if (c1 <= name && name <= c2)
2259	matched = 1;
2260	}
2261
2262	/* If the end of the string is reached before a ] is found,
2263	* back up to the [ and compare it to NAME. */
2264
2265	if (*mask == 0)
2266	{
2267	if (*name != '[')
2268	return FNM_NOMATCH;
2269	++name;
2270	mask = start;
2271	if (invert)
2272	--mask;
2273	}
2274	else
2275	{
2276	if (invert)
2277	matched = !matched;
2278	if (!matched)
2279	return FNM_NOMATCH;
2280	++mask; /* Skip the ] character */
2281	if (*name != 0)
2282	++name;
2283	}
2284	break;
2285
2286	case '\\':
2287	++mask;
2288	if (flags & FNM_NOESCAPE)
2289	{
2290	if (*name != '\\')
2291	return FNM_NOMATCH;
2292	++name;
2293	}
2294	else if (mask == '' \|\| *mask == '?')
2295	{
2296	if (mask != name)
2297	return FNM_NOMATCH;
2298	++mask;
2299	++name;
2300	}
2301	break;
2302
2303	default:
2304
2305	/* All other characters match themselves. */
2306
2307	if (flags & _FNM_IGNORECASE)
2308	{
2309	if (tolower(mask) != tolower(name))
2310	return FNM_NOMATCH;
2311	}
2312	else
2313	{
2314	if (mask != name)
2315	return FNM_NOMATCH;
2316	}
2317	++mask;
2318	++name;
2319	break;
2320	}
2321	}
2322
2323	/*
2324	* _fnmatch_unsigned:
2325	* Check whether the path name NAME matches the wildcard MASK.
2326	*
2327	* Return:
2328	* -- 0 (FNM_MATCH) if it matches,
2329	* -- _FNM_NOMATCH if it doesn't,
2330	* -- FNM_ERR on error.
2331	*
2332	* The operation of this function is controlled by FLAGS.
2333	* This is an internal function, with unsigned arguments.
2334	*
2335	* (c) 1994-1996 by Eberhard Mattes.
2336	*/
2337
2338	static int _fnmatch_unsigned(const unsigned char *mask,
2339	const unsigned char *name,
2340	unsigned flags)
2341	{
2342	int m_drive, n_drive,
2343	rc;
2344
2345	/* Match and skip the drive name if present. */
2346
2347	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2348	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2349
2350	if (m_drive != n_drive)
2351	{
2352	if (m_drive == -1 \|\| n_drive == -1)
2353	return FNM_NOMATCH;
2354	if (!(flags & _FNM_IGNORECASE))
2355	return FNM_NOMATCH;
2356	if (tolower(m_drive) != tolower(n_drive))
2357	return FNM_NOMATCH;
2358	}
2359
2360	if (m_drive != -1)
2361	mask += 2;
2362	if (n_drive != -1)
2363	name += 2;
2364
2365	/* Colons are not allowed in path names, except for the drive name,
2366	* which was skipped above. */
2367
2368	if (has_colon(mask) \|\| has_colon(name))
2369	return FNM_ERR;
2370
2371	/* The name "\\server\path" should not be matched by mask
2372	* "\\server\path". Ditto for /. /
2373
2374	switch (flags & _FNM_STYLE_MASK)
2375	{
2376	case _FNM_OS2:
2377	case _FNM_DOS:
2378
2379	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2380	{
2381	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2382	return FNM_NOMATCH;
2383	name += 2;
2384	mask += 2;
2385	}
2386	break;
2387
2388	case _FNM_POSIX:
2389
2390	if (name[0] == '/' && name[1] == '/')
2391	{
2392	int i;
2393
2394	name += 2;
2395	for (i = 0; i < 2; ++i)
2396	if (mask[0] == '/')
2397	++mask;
2398	else if (mask[0] == '\\' && mask[1] == '/')
2399	mask += 2;
2400	else
2401	return FNM_NOMATCH;
2402	}
2403
2404	/* In Unix styles, treating ? and * w.r.t. components is simple.
2405	* No need to do matching component by component. */
2406
2407	return match_unix(mask, name, flags, name);
2408	}
2409
2410	/* Now compare all the components of the path name, one by one.
2411	* Note that the path separator must not be enclosed in brackets. */
2412
2413	while (mask != 0 \|\| name != 0)
2414	{
2415
2416	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2417	* is reached even if there are components left in NAME. */
2418
2419	if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2420	return FNM_MATCH;
2421
2422	/* Compare a single component of the path name. */
2423
2424	rc = match_comp(mask, name, flags);
2425	if (rc != FNM_MATCH)
2426	return rc;
2427
2428	/* Skip to the next component or to the end of the path name. */
2429
2430	mask = skip_comp_os2(mask);
2431	name = skip_comp_os2(name);
2432	}
2433
2434	/* If we reached the ends of both strings, the names match. */
2435
2436	if (mask == 0 && name == 0)
2437	return FNM_MATCH;
2438
2439	/* The names do not match. */
2440
2441	return FNM_NOMATCH;
2442	}
2443
2444	/*
2445	*@@ strhMatchOS2:
2446	* this matches wildcards, similar to what DosEditName does.
2447	* However, this does not require a file to be present, but
2448	* works on strings only.
2449	*/
2450
2451	BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2452	const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2453	{
2454	return ((BOOL)(_fnmatch_unsigned(pcszMask,
2455	pcszName,
2456	_FNM_OS2 \| _FNM_IGNORECASE)
2457	== FNM_MATCH)
2458	);
2459	}
2460
2461	/* ******************************************************************
2462	*
2463	* Fast string searches
2464	*
2465	********************************************************************/
2466
2467	#define ASSERT(a)
2468
2469	/*
2470	* The following code has been taken from the "Standard
2471	* Function Library", file sflfind.c, and only slightly
2472	* modified to conform to the rest of this file.
2473	*
2474	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2475	* Revised: 98/05/04
2476	*
2477	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2478	*
2479	* The SFL Licence allows incorporating SFL code into other
2480	* programs, as long as the copyright is reprinted and the
2481	* code is marked as modified, so this is what we do.
2482	*/
2483
2484	/*
2485	*@@ strhmemfind:
2486	* searches for a pattern in a block of memory using the
2487	* Boyer-Moore-Horspool-Sunday algorithm.
2488	*
2489	* The block and pattern may contain any values; you must
2490	* explicitly provide their lengths. If you search for strings,
2491	* use strlen() on the buffers.
2492	*
2493	* Returns a pointer to the pattern if found within the block,
2494	* or NULL if the pattern was not found.
2495	*
2496	* This algorithm needs a "shift table" to cache data for the
2497	* search pattern. This table can be reused when performing
2498	* several searches with the same pattern.
2499	*
2500	* "shift" must point to an array big enough to hold 256 (8**2)
2501	* "size_t" values.
2502	*
2503	* If (*repeat_find == FALSE), the shift table is initialized.
2504	* So on the first search with a given pattern, *repeat_find
2505	* should be FALSE. This function sets it to TRUE after the
2506	* shift table is initialised, allowing the initialisation
2507	* phase to be skipped on subsequent searches.
2508	*
2509	* This function is most effective when repeated searches are
2510	* made for the same pattern in one or more large buffers.
2511	*
2512	* Example:
2513	*
2514	+ PSZ pszHaystack = "This is a sample string.",
2515	+ pszNeedle = "string";
2516	+ size_t shift[256];
2517	+ BOOL fRepeat = FALSE;
2518	+
2519	+ PSZ pFound = strhmemfind(pszHaystack,
2520	+ strlen(pszHaystack), // block size
2521	+ pszNeedle,
2522	+ strlen(pszNeedle), // pattern size
2523	+ shift,
2524	+ &fRepeat);
2525	*
2526	* Taken from the "Standard Function Library", file sflfind.c.
2527	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2528	* Slightly modified by umoeller.
2529	*
2530	*@@added V0.9.3 (2000-05-08) [umoeller]
2531	*/
2532
2533	void* strhmemfind(const void *in_block, // in: block containing data
2534	size_t block_size, // in: size of block in bytes
2535	const void *in_pattern, // in: pattern to search for
2536	size_t pattern_size, // in: size of pattern block
2537	size_t *shift, // in/out: shift table (search buffer)
2538	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2539	{
2540	size_t byte_nbr, // Distance through block
2541	match_size; // Size of matched part
2542	const unsigned char
2543	*match_base = NULL, // Base of match of pattern
2544	*match_ptr = NULL, // Point within current match
2545	*limit = NULL; // Last potiental match point
2546	const unsigned char
2547	block = (unsigned char ) in_block, // Concrete pointer to block data
2548	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2549
2550	if ( (block == NULL)
2551	\|\| (pattern == NULL)
2552	\|\| (shift == NULL)
2553	)
2554	return (NULL);
2555
2556	// Pattern must be smaller or equal in size to string
2557	if (block_size < pattern_size)
2558	return (NULL); // Otherwise it's not found
2559
2560	if (pattern_size == 0) // Empty patterns match at start
2561	return ((void *)block);
2562
2563	// Build the shift table unless we're continuing a previous search
2564
2565	// The shift table determines how far to shift before trying to match
2566	// again, if a match at this point fails. If the byte after where the
2567	// end of our pattern falls is not in our pattern, then we start to
2568	// match again after that byte; otherwise we line up the last occurence
2569	// of that byte in our pattern under that byte, and try match again.
2570
2571	if (!repeat_find \|\| !*repeat_find)
2572	{
2573	for (byte_nbr = 0;
2574	byte_nbr < 256;
2575	byte_nbr++)
2576	shift[byte_nbr] = pattern_size + 1;
2577	for (byte_nbr = 0;
2578	byte_nbr < pattern_size;
2579	byte_nbr++)
2580	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2581
2582	if (repeat_find)
2583	*repeat_find = TRUE;
2584	}
2585
2586	// Search for the block, each time jumping up by the amount
2587	// computed in the shift table
2588
2589	limit = block + (block_size - pattern_size + 1);
2590	ASSERT (limit > block);
2591
2592	for (match_base = block;
2593	match_base < limit;
2594	match_base += shift[*(match_base + pattern_size)])
2595	{
2596	match_ptr = match_base;
2597	match_size = 0;
2598
2599	// Compare pattern until it all matches, or we find a difference
2600	while (*match_ptr++ == pattern[match_size++])
2601	{
2602	ASSERT (match_size <= pattern_size &&
2603	match_ptr == (match_base + match_size));
2604
2605	// If we found a match, return the start address
2606	if (match_size >= pattern_size)
2607	return ((void*)(match_base));
2608
2609	}
2610	}
2611	return (NULL); // Found nothing
2612	}
2613
2614	/*
2615	*@@ strhtxtfind:
2616	* searches for a case-insensitive text pattern in a string
2617	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2618	* pattern are null-terminated strings. Returns a pointer to the pattern
2619	* if found within the string, or NULL if the pattern was not found.
2620	* Will match strings irrespective of case. To match exact strings, use
2621	* strhfind(). Will not work on multibyte characters.
2622	*
2623	* Examples:
2624	+ char *result;
2625	+
2626	+ result = strhtxtfind ("AbracaDabra", "cad");
2627	+ if (result)
2628	+ puts (result);
2629	+
2630	* Taken from the "Standard Function Library", file sflfind.c.
2631	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2632	* Slightly modified.
2633	*
2634	*@@added V0.9.3 (2000-05-08) [umoeller]
2635	*/
2636
2637	char* strhtxtfind (const char *string, // String containing data
2638	const char *pattern) // Pattern to search for
2639	{
2640	size_t
2641	shift [256]; // Shift distance for each value
2642	size_t
2643	string_size,
2644	pattern_size,
2645	byte_nbr, // Index into byte array
2646	match_size; // Size of matched part
2647	const char
2648	*match_base = NULL, // Base of match of pattern
2649	*match_ptr = NULL, // Point within current match
2650	*limit = NULL; // Last potiental match point
2651
2652	ASSERT (string); // Expect non-NULL pointers, but
2653	ASSERT (pattern); // fail gracefully if not debugging
2654	if (string == NULL \|\| pattern == NULL)
2655	return (NULL);
2656
2657	string_size = strlen (string);
2658	pattern_size = strlen (pattern);
2659
2660	// Pattern must be smaller or equal in size to string
2661	if (string_size < pattern_size)
2662	return (NULL); // Otherwise it cannot be found
2663
2664	if (pattern_size == 0) // Empty string matches at start
2665	return (char *) string;
2666
2667	// Build the shift table
2668
2669	// The shift table determines how far to shift before trying to match
2670	// again, if a match at this point fails. If the byte after where the
2671	// end of our pattern falls is not in our pattern, then we start to
2672	// match again after that byte; otherwise we line up the last occurence
2673	// of that byte in our pattern under that byte, and try match again.
2674
2675	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2676	shift [byte_nbr] = pattern_size + 1;
2677
2678	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2679	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2680
2681	// Search for the string. If we don't find a match, move up by the
2682	// amount we computed in the shift table above, to find location of
2683	// the next potiental match.
2684
2685	limit = string + (string_size - pattern_size + 1);
2686	ASSERT (limit > string);
2687
2688	for (match_base = string;
2689	match_base < limit;
2690	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2691	{
2692	match_ptr = match_base;
2693	match_size = 0;
2694
2695	// Compare pattern until it all matches, or we find a difference
2696	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2697	{
2698	ASSERT (match_size <= pattern_size &&
2699	match_ptr == (match_base + match_size));
2700
2701	// If we found a match, return the start address
2702	if (match_size >= pattern_size)
2703	return ((char *)(match_base));
2704	}
2705	}
2706	return (NULL); // Found nothing
2707	}
2708

Note: See TracBrowser for help on using the repository browser.

Download in other formats: