Context Navigation

source: trunk/src/helpers/stringh.c@ 13

Visit:

Last change on this file since 13 was 13, checked in by umoeller, 25 years ago
Updates for V0.9.6.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 84.8 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the XWorkplace source package.
28	* XWorkplace is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#include <os2.h>
45
46	#include <stdlib.h>
47	#include <stdio.h>
48	#include <string.h>
49	#include <ctype.h>
50	#include <math.h>
51
52	#include "setup.h" // code generation and debugging options
53
54	#include "helpers\stringh.h"
55	#include "helpers\xstring.h" // extended string helpers
56
57	#pragma hdrstop
58
59	/*
60	*@@category: Helpers\C helpers\String management
61	*/
62
63	/*
64	*@@ strhdup:
65	* like strdup, but this one
66	* doesn't crash if pszSource is NULL,
67	* but returns NULL also.
68	*
69	*@@added V0.9.0 [umoeller]
70	*/
71
72	PSZ strhdup(const char *pszSource)
73	{
74	if (pszSource)
75	return (strdup(pszSource));
76	else
77	return (0);
78	}
79
80	/*
81	*@@ strhistr:
82	* like strstr, but case-insensitive.
83	*
84	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
85	*/
86
87	PSZ strhistr(const char string1, const char string2)
88	{
89	PSZ prc = NULL;
90
91	if ((string1) && (string2))
92	{
93	PSZ pszSrchIn = strdup(string1);
94	PSZ pszSrchFor = strdup(string2);
95
96	if ((pszSrchIn) && (pszSrchFor))
97	{
98	strupr(pszSrchIn);
99	strupr(pszSrchFor);
100
101	prc = strstr(pszSrchIn, pszSrchFor);
102	if (prc)
103	{
104	// prc now has the first occurence of the string,
105	// but in pszSrchIn; we need to map this
106	// return value to the original string
107	prc = (prc-pszSrchIn) // offset in pszSrchIn
108	+ (PSZ)string1;
109	}
110	}
111	if (pszSrchFor)
112	free(pszSrchFor);
113	if (pszSrchIn)
114	free(pszSrchIn);
115	}
116	return (prc);
117	}
118
119	/*
120	*@@ strhncpy0:
121	* like strncpy, but always appends a 0 character.
122	*/
123
124	ULONG strhncpy0(PSZ pszTarget,
125	const char *pszSource,
126	ULONG cbSource)
127	{
128	ULONG ul = 0;
129	PSZ pTarget = pszTarget,
130	pSource = (PSZ)pszSource;
131
132	for (ul = 0; ul < cbSource; ul++)
133	if (*pSource)
134	pTarget++ = pSource++;
135	else
136	break;
137	*pTarget = 0;
138
139	return (ul);
140	}
141
142	/*
143	* strhCount:
144	* this counts the occurences of c in pszSearch.
145	*/
146
147	ULONG strhCount(const char *pszSearch,
148	CHAR c)
149	{
150	PSZ p = (PSZ)pszSearch;
151	ULONG ulCount = 0;
152	while (TRUE)
153	{
154	p = strchr(p, c);
155	if (p)
156	{
157	ulCount++;
158	p++;
159	}
160	else
161	break;
162	}
163	return (ulCount);
164	}
165
166	/*
167	*@@ strhIsDecimal:
168	* returns TRUE if psz consists of decimal digits only.
169	*/
170
171	BOOL strhIsDecimal(PSZ psz)
172	{
173	PSZ p = psz;
174	while (*p != 0)
175	{
176	if (isdigit(*p) == 0)
177	return (FALSE);
178	p++;
179	}
180
181	return (TRUE);
182	}
183
184	/*
185	*@@ strhSubstr:
186	* this creates a new PSZ containing the string
187	* from pBegin to pEnd, excluding the pEnd character.
188	* The new string is null-terminated. The caller
189	* must free() the new string after use.
190	*
191	* Example:
192	+ "1234567890"
193	+ ^ ^
194	+ p1 p2
195	+ strhSubstr(p1, p2)
196	* would return a new string containing "2345678".
197	*/
198
199	PSZ strhSubstr(const char pBegin, const char pEnd)
200	{
201	ULONG cbSubstr = (pEnd - pBegin);
202	PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
203	strhncpy0(pszSubstr, pBegin, cbSubstr);
204	return (pszSubstr);
205	}
206
207	/*
208	*@@ strhExtract:
209	* searches pszBuf for the cOpen character and returns
210	* the data in between cOpen and cClose, excluding
211	* those two characters, in a newly allocated buffer
212	* which you must free() afterwards.
213	*
214	* Spaces and newlines/linefeeds are skipped.
215	*
216	* If the search was successful, the new buffer
217	* is returned and, if (ppEnd != NULL), *ppEnd points
218	* to the first character after the cClose character
219	* found in the buffer.
220	*
221	* If the search was not successful, NULL is
222	* returned, and *ppEnd is unchanged.
223	*
224	* If another cOpen character is found before
225	* cClose, matching cClose characters will be skipped.
226	* You can therefore nest the cOpen and cClose
227	* characters.
228	*
229	* This function ignores cOpen and cClose characters
230	* in C-style comments and strings surrounded by
231	* double quotes.
232	*
233	* Example:
234	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
235	+ pEnd;
236	+ strhExtract(pszBuf,
237	+ '{', '}',
238	+ &pEnd)
239	* would return a new buffer containing " --blah-- ",
240	* and ppEnd would afterwards point to the space
241	* before "next" in the static buffer.
242	*
243	*@@added V0.9.0 [umoeller]
244	*/
245
246	PSZ strhExtract(PSZ pszBuf, // in: search buffer
247	CHAR cOpen, // in: opening char
248	CHAR cClose, // in: closing char
249	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
250	{
251	PSZ pszReturn = NULL;
252
253	if (pszBuf)
254	{
255	PSZ pOpen = strchr(pszBuf, cOpen);
256	if (pOpen)
257	{
258	// opening char found:
259	// now go thru the whole rest of the buffer
260	PSZ p = pOpen+1;
261	LONG lLevel = 1; // if this goes 0, we're done
262	while (*p)
263	{
264	if (*p == cOpen)
265	lLevel++;
266	else if (*p == cClose)
267	{
268	lLevel--;
269	if (lLevel <= 0)
270	{
271	// matching closing bracket found:
272	// extract string
273	pszReturn = strhSubstr(pOpen+1, // after cOpen
274	p); // excluding cClose
275	if (ppEnd)
276	*ppEnd = p+1;
277	break; // while (*p)
278	}
279	}
280	else if (*p == '\"')
281	{
282	// beginning of string:
283	PSZ p2 = p+1;
284	// find end of string
285	while ((p2) && (p2 != '\"'))
286	p2++;
287
288	if (*p2 == '\"')
289	// closing quote found:
290	// search on after that
291	p = p2; // raised below
292	else
293	break; // while (*p)
294	}
295
296	p++;
297	}
298	}
299	}
300
301	return (pszReturn);
302	}
303
304	/*
305	*@@ strhQuote:
306	* similar to strhExtract, except that
307	* opening and closing chars are the same,
308	* and therefore no nesting is possible.
309	* Useful for extracting stuff between
310	* quotes.
311	*
312	*@@added V0.9.0 [umoeller]
313	*/
314
315	PSZ strhQuote(PSZ pszBuf,
316	CHAR cQuote,
317	PSZ *ppEnd)
318	{
319	PSZ pszReturn = NULL,
320	p1 = NULL;
321	if ((p1 = strchr(pszBuf, cQuote)))
322	{
323	PSZ p2 = strchr(p1+1, cQuote);
324	if (p2)
325	{
326	pszReturn = strhSubstr(p1+1, p2);
327	if (ppEnd)
328	// store closing char
329	*ppEnd = p2 + 1;
330	}
331	}
332
333	return (pszReturn);
334	}
335
336	/*
337	*@@ strhStrip:
338	* removes all double spaces.
339	* This copies within the "psz" buffer.
340	* If any double spaces are found, the
341	* string will be shorter than before,
342	* but the buffer is _not_ reallocated,
343	* so there will be unused bytes at the
344	* end.
345	*
346	* Returns the number of spaces removed.
347	*
348	*@@added V0.9.0 [umoeller]
349	*/
350
351	ULONG strhStrip(PSZ psz) // in/out: string
352	{
353	PSZ p;
354	ULONG cb = strlen(psz),
355	ulrc = 0;
356
357	for (p = psz; p < psz+cb; p++)
358	{
359	if ((p == ' ') && ((p+1) == ' '))
360	{
361	PSZ p2 = p;
362	while (*p2)
363	{
364	p2 = (p2+1);
365	p2++;
366	}
367	cb--;
368	p--;
369	ulrc++;
370	}
371	}
372	return (ulrc);
373	}
374
375	/*
376	*@@ strhins:
377	* this inserts one string into another.
378	*
379	* pszInsert is inserted into pszBuffer at offset
380	* ulInsertOfs (which counts from 0).
381	*
382	* A newly allocated string is returned. pszBuffer is
383	* not changed. The new string should be free()'d after
384	* use.
385	*
386	* Upon errors, NULL is returned.
387	*
388	*@@changed V0.9.0 [umoeller]: completely rewritten.
389	*/
390
391	PSZ strhins(const char *pcszBuffer,
392	ULONG ulInsertOfs,
393	const char *pcszInsert)
394	{
395	PSZ pszNew = NULL;
396
397	if ((pcszBuffer) && (pcszInsert))
398	{
399	do {
400	ULONG cbBuffer = strlen(pcszBuffer);
401	ULONG cbInsert = strlen(pcszInsert);
402
403	// check string length
404	if (ulInsertOfs > cbBuffer + 1)
405	break; // do
406
407	// OK, let's go.
408	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
409
410	// copy stuff before pInsertPos
411	memcpy(pszNew,
412	pcszBuffer,
413	ulInsertOfs);
414	// copy string to be inserted
415	memcpy(pszNew + ulInsertOfs,
416	pcszInsert,
417	cbInsert);
418	// copy stuff after pInsertPos
419	strcpy(pszNew + ulInsertOfs + cbInsert,
420	pcszBuffer + ulInsertOfs);
421	} while (FALSE);
422	}
423
424	return (pszNew);
425	}
426
427	/*
428	*@@ strhrpl:
429	* wrapper around xstrrpl to work with C strings.
430	* Note that *ppszBuf can get reallocated and must
431	* be free()'able.
432	*
433	* Repetitive use of this wrapper is not recommended
434	* because it is considerably slower than xstrrpl.
435	*
436	*@@added V0.9.6 (2000-11-01) [umoeller]
437	*/
438
439	ULONG strhrpl(PSZ *ppszBuf, // in/out: string
440	PULONG pulOfs, // in: where to begin search (0 = start);
441	// out: ofs of first char after replacement string
442	const char *pcszSearch, // in: search string; cannot be NULL
443	const char *pcszReplace) // in: replacement string; cannot be NULL
444	{
445	ULONG ulrc = 0;
446	XSTRING xstrBuf,
447	xstrFind,
448	xstrReplace;
449	size_t ShiftTable[256];
450	BOOL fRepeat = FALSE;
451	xstrInit(&xstrBuf, 0);
452	xstrset(&xstrBuf, *ppszBuf);
453	xstrInit(&xstrFind, 0);
454	xstrset(&xstrFind, (PSZ)pcszSearch);
455	xstrInit(&xstrReplace, 0);
456	xstrset(&xstrReplace, (PSZ)pcszReplace);
457
458	if ((ulrc = xstrrpl(&xstrBuf,
459	pulOfs,
460	&xstrFind,
461	&xstrReplace,
462	ShiftTable,
463	&fRepeat)))
464	// replaced:
465	*ppszBuf = xstrBuf.psz;
466
467	return (ulrc);
468	}
469
470	/*
471	* strhWords:
472	* returns the no. of words in "psz".
473	* A string is considered a "word" if
474	* it is surrounded by spaces only.
475	*
476	*@@added V0.9.0 [umoeller]
477	*/
478
479	ULONG strhWords(PSZ psz)
480	{
481	PSZ p;
482	ULONG cb = strlen(psz),
483	ulWords = 0;
484	if (cb > 1)
485	{
486	ulWords = 1;
487	for (p = psz; p < psz+cb; p++)
488	if (*p == ' ')
489	ulWords++;
490	}
491	return (ulWords);
492	}
493
494	/*
495	*@@ strhThousandsULong:
496	* converts a ULONG into a decimal string, while
497	* inserting thousands separators into it. Specify
498	* the separator character in cThousands.
499	*
500	* Returns pszTarget so you can use it directly
501	* with sprintf and the "%s" flag.
502	*
503	* For cThousands, you should use the data in
504	* OS2.INI ("PM_National" application), which is
505	* always set according to the "Country" object.
506	* You can use prfhQueryCountrySettings to
507	* retrieve this setting.
508	*
509	* Use strhThousandsDouble for "double" values.
510	*/
511
512	PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
513	ULONG ul, // in: decimal to convert
514	CHAR cThousands) // in: separator char (e.g. '.')
515	{
516	USHORT ust, uss, usc;
517	CHAR szTemp[40];
518	sprintf(szTemp, "%lu", ul);
519
520	ust = 0;
521	usc = strlen(szTemp);
522	for (uss = 0; uss < usc; uss++)
523	{
524	if (uss)
525	if (((usc - uss) % 3) == 0)
526	{
527	pszTarget[ust] = cThousands;
528	ust++;
529	}
530	pszTarget[ust] = szTemp[uss];
531	ust++;
532	}
533	pszTarget[ust] = '\0';
534
535	return (pszTarget);
536	}
537
538	/*
539	*@@ strhThousandsDouble:
540	* like strhThousandsULong, but for a "double"
541	* value. Note that after-comma values are truncated.
542	*/
543
544	PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
545	{
546	USHORT ust, uss, usc;
547	CHAR szTemp[40];
548	sprintf(szTemp, "%.0f", floor(dbl));
549
550	ust = 0;
551	usc = strlen(szTemp);
552	for (uss = 0; uss < usc; uss++)
553	{
554	if (uss)
555	if (((usc - uss) % 3) == 0)
556	{
557	pszTarget[ust] = cThousands;
558	ust++;
559	}
560	pszTarget[ust] = szTemp[uss];
561	ust++;
562	}
563	pszTarget[ust] = '\0';
564
565	return (pszTarget);
566	}
567
568	/*
569	*@@ strhVariableDouble:
570	* like strhThousandsULong, but for a "double" value, and
571	* with a variable number of decimal places depending on the
572	* size of the quantity.
573	*
574	*@@added V0.9.6 (2000-11-12) [pr]
575	*/
576
577	PSZ strhVariableDouble(PSZ pszTarget,
578	double dbl,
579	PSZ pszUnits,
580	CHAR cThousands)
581	{
582	if (dbl < 100.0)
583	sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
584	else
585	if (dbl < 1000.0)
586	sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
587	else
588	strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
589	pszUnits);
590
591	return(pszTarget);
592	}
593
594	/*
595	*@@ strhFileDate:
596	* converts file date data to a string (to pszBuf).
597	* You can pass any FDATE structure to this function,
598	* which are returned in those FILEFINDBUF* or
599	* FILESTATUS* structs by the Dos* functions.
600	*
601	* ulDateFormat is the PM setting for the date format,
602	* as set in the "Country" object, and can be queried using
603	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
604	*
605	* meaning:
606	* -- 0 mm.dd.yyyy (English)
607	* -- 1 dd.mm.yyyy (e.g. German)
608	* -- 2 yyyy.mm.dd (Japanese, ISO)
609	* -- 3 yyyy.dd.mm
610	*
611	* cDateSep is used as a date separator (e.g. '.').
612	* This can be queried using:
613	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
614	*
615	* Alternatively, you can query all the country settings
616	* at once using prfhQueryCountrySettings (prfh.c).
617	*
618	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
619	*/
620
621	VOID strhFileDate(PSZ pszBuf, // out: string returned
622	FDATE *pfDate, // in: date information
623	ULONG ulDateFormat, // in: date format (0-3)
624	CHAR cDateSep) // in: date separator (e.g. '.')
625	{
626	DATETIME dt;
627	dt.day = pfDate->day;
628	dt.month = pfDate->month;
629	dt.year = pfDate->year + 1980;
630
631	strhDateTime(pszBuf,
632	NULL, // no time
633	&dt,
634	ulDateFormat,
635	cDateSep,
636	0, 0); // no time
637	}
638
639	/*
640	*@@ strhFileTime:
641	* converts file time data to a string (to pszBuf).
642	* You can pass any FTIME structure to this function,
643	* which are returned in those FILEFINDBUF* or
644	* FILESTATUS* structs by the Dos* functions.
645	*
646	* ulTimeFormat is the PM setting for the time format,
647	* as set in the "Country" object, and can be queried using
648	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
649	* meaning:
650	* -- 0 12-hour clock
651	* -- >0 24-hour clock
652	*
653	* cDateSep is used as a time separator (e.g. ':').
654	* This can be queried using:
655	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
656	*
657	* Alternatively, you can query all the country settings
658	* at once using prfhQueryCountrySettings (prfh.c).
659	*
660	*@@changed 99-03-15 fixed 12-hour crash
661	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
662	*/
663
664	VOID strhFileTime(PSZ pszBuf, // out: string returned
665	FTIME *pfTime, // in: time information
666	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
667	CHAR cTimeSep) // in: time separator (e.g. ':')
668	{
669	DATETIME dt;
670	dt.hours = pfTime->hours;
671	dt.minutes = pfTime->minutes;
672	dt.seconds = pfTime->twosecs * 2;
673
674	strhDateTime(NULL, // no date
675	pszBuf,
676	&dt,
677	0, 0, // no date
678	ulTimeFormat,
679	cTimeSep);
680	}
681
682	/*
683	*@@ strhDateTime:
684	* converts Control Program DATETIME info
685	* into two strings. See strhFileDate and strhFileTime
686	* for more detailed parameter descriptions.
687	*
688	*@@added V0.9.0 (99-11-07) [umoeller]
689	*/
690
691	VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
692	PSZ pszTime, // out: time string returned (can be NULL)
693	DATETIME *pDateTime, // in: date/time information
694	ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
695	CHAR cDateSep, // in: date separator (e.g. '.')
696	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
697	CHAR cTimeSep) // in: time separator (e.g. ':')
698	{
699	if (pszDate)
700	{
701	switch (ulDateFormat)
702	{
703	case 0: // mm.dd.yyyy (English)
704	sprintf(pszDate, "%02d%c%02d%c%04d",
705	pDateTime->month,
706	cDateSep,
707	pDateTime->day,
708	cDateSep,
709	pDateTime->year);
710	break;
711
712	case 1: // dd.mm.yyyy (e.g. German)
713	sprintf(pszDate, "%02d%c%02d%c%04d",
714	pDateTime->day,
715	cDateSep,
716	pDateTime->month,
717	cDateSep,
718	pDateTime->year);
719	break;
720
721	case 2: // yyyy.mm.dd (Japanese)
722	sprintf(pszDate, "%04d%c%02d%c%02d",
723	pDateTime->year,
724	cDateSep,
725	pDateTime->month,
726	cDateSep,
727	pDateTime->day);
728	break;
729
730	default: // yyyy.dd.mm
731	sprintf(pszDate, "%04d%c%02d%c%02d",
732	pDateTime->year,
733	cDateSep,
734	pDateTime->day,
735	cDateSep,
736	pDateTime->month);
737	break;
738	}
739	}
740
741	if (pszTime)
742	{
743	if (ulTimeFormat == 0)
744	{
745	// for 12-hour clock, we need additional INI data
746	CHAR szAMPM[10] = "err";
747
748	if (pDateTime->hours > 12)
749	{
750	// > 12h: PM.
751
752	// Note: 12:xx noon is 12 AM, not PM (even though
753	// AM stands for "ante meridiam", but English is just
754	// not logical), so that's handled below.
755
756	PrfQueryProfileString(HINI_USER,
757	"PM_National",
758	"s2359", // key
759	"PM", // default
760	szAMPM, sizeof(szAMPM)-1);
761	sprintf(pszTime, "%02d%c%02d%c%02d %s",
762	// leave 12 == 12 (not 0)
763	pDateTime->hours % 12,
764	cTimeSep,
765	pDateTime->minutes,
766	cTimeSep,
767	pDateTime->seconds,
768	szAMPM);
769	}
770	else
771	{
772	// <= 12h: AM
773	PrfQueryProfileString(HINI_USER,
774	"PM_National",
775	"s1159", // key
776	"AM", // default
777	szAMPM, sizeof(szAMPM)-1);
778	sprintf(pszTime, "%02d%c%02d%c%02d %s",
779	pDateTime->hours,
780	cTimeSep,
781	pDateTime->minutes,
782	cTimeSep,
783	pDateTime->seconds,
784	szAMPM);
785	}
786	}
787	else
788	// 24-hour clock
789	sprintf(pszTime, "%02d%c%02d%c%02d",
790	pDateTime->hours,
791	cTimeSep,
792	pDateTime->minutes,
793	cTimeSep,
794	pDateTime->seconds);
795	}
796	}
797
798	/*
799	*@@ strhGetWord:
800	* finds word boundaries.
801	*
802	* *ppszStart is used as the beginning of the
803	* search.
804	*
805	* If a word is found, *ppszStart is set to
806	* the first character of the word which was
807	* found and *ppszEnd receives the address
808	* of the first character _after_ the word,
809	* which is probably a space or a \n or \r char.
810	* We then return TRUE.
811	*
812	* The search is stopped if a null character
813	* is found or pLimit is reached. In that case,
814	* FALSE is returned.
815	*
816	*@@added V0.9.1 (2000-02-13) [umoeller]
817	*/
818
819	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
820	// out: start of word (if TRUE is returned)
821	const char pLimit, // in: ptr to last char after ppszStart to be
822	// searched; if the word does not end before
823	// or with this char, FALSE is returned
824	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
825	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
826	PSZ *ppszEnd) // out: first char _after_ word
827	// (if TRUE is returned)
828	{
829	// characters after which a word can be started
830	// const char *pcszBeginChars = "\x0d\x0a ";
831	// const char *pcszEndChars = "\x0d\x0a /-";
832
833	PSZ pStart = *ppszStart;
834
835	// find start of word
836	while ( (pStart < (PSZ)pLimit)
837	&& (strchr(pcszBeginChars, *pStart))
838	)
839	// if char is a "before word" char: go for next
840	pStart++;
841
842	if (pStart < (PSZ)pLimit)
843	{
844	// found a valid "word start" character
845	// (which is not in pcszBeginChars):
846
847	// find end of word
848	PSZ pEndOfWord = pStart;
849	while ( (pEndOfWord <= (PSZ)pLimit)
850	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
851	)
852	// if char is not an "end word" char: go for next
853	pEndOfWord++;
854
855	if (pEndOfWord <= (PSZ)pLimit)
856	{
857	// whoa, got a word:
858	*ppszStart = pStart;
859	*ppszEnd = pEndOfWord;
860	return (TRUE);
861	}
862	}
863
864	return (FALSE);
865	}
866
867	/*
868	*@@ strhIsWord:
869	* returns TRUE if p points to a "word"
870	* in pcszBuf.
871	*
872	* p is considered a word if the character _before_
873	* it is in pcszBeginChars and the char _after_
874	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
875	*
876	*@@added V0.9.6 (2000-11-12) [umoeller]
877	*/
878
879	BOOL strhIsWord(const char *pcszBuf,
880	const char *p, // in: start of word
881	ULONG cbSearch, // in: length of word
882	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
883	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
884	{
885	BOOL fEndOK = FALSE;
886
887	// check previous char
888	if ( (p == pcszBuf)
889	\|\| (strchr(pcszBeginChars, *(p-1)))
890	)
891	{
892	// OK, valid begin char:
893	// check end char
894	CHAR cNextChar = *(p + cbSearch);
895	if (cNextChar == 0)
896	fEndOK = TRUE;
897	else
898	{
899	char *pc = strchr(pcszEndChars, cNextChar);
900	if (pc)
901	// OK, is end char: avoid doubles of that char,
902	// but allow spaces
903	if ( (cNextChar+1 != *pc)
904	\|\| (cNextChar+1 == ' ')
905	\|\| (cNextChar+1 == 0)
906	)
907	fEndOK = TRUE;
908	}
909	}
910
911	return (fEndOK);
912	}
913
914	/*
915	*@@ strhFindWord:
916	* searches for pszSearch in pszBuf, which is
917	* returned if found (or NULL if not).
918	*
919	* As opposed to strstr, this finds pszSearch
920	* only if it is a "word". A search string is
921	* considered a word if the character _before_
922	* it is in pcszBeginChars and the char _after_
923	* it is in pcszEndChars.
924	*
925	* Example:
926	+ strhFindWord("This is an example.", "is");
927	+ returns ...........^ this, but not the "is" in "This".
928	*
929	* The algorithm here uses strstr to find pszSearch in pszBuf
930	* and performs additional "is-word" checks for each item found
931	* (by calling strhIsWord).
932	*
933	* Note that this function is fairly slow compared to xstrFindWord.
934	*
935	*@@added V0.9.0 (99-11-08) [umoeller]
936	*@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
937	*/
938
939	PSZ strhFindWord(const char *pszBuf,
940	const char *pszSearch,
941	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
942	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
943	{
944	PSZ pszReturn = 0;
945	ULONG cbBuf = strlen(pszBuf),
946	cbSearch = strlen(pszSearch);
947
948	if ((cbBuf) && (cbSearch))
949	{
950	const char *p = pszBuf;
951
952	do // while p
953	{
954	p = strstr(p, pszSearch);
955	if (p)
956	{
957	// string found:
958	// check if that's a word
959
960	if (strhIsWord(pszBuf,
961	p,
962	cbSearch,
963	pcszBeginChars,
964	pcszEndChars))
965	{
966	// valid end char:
967	pszReturn = (PSZ)p;
968	break;
969	}
970
971	p += cbSearch;
972	}
973	} while (p);
974
975	}
976	return (pszReturn);
977	}
978
979	/*
980	*@@ strhFindEOL:
981	* returns a pointer to the next \r, \n or null character
982	* following pszSearchIn. Stores the offset in *pulOffset.
983	*
984	* This should never return NULL because at some point,
985	* there will be a null byte in your string.
986	*
987	*@@added V0.9.4 (2000-07-01) [umoeller]
988	*/
989
990	PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
991	PULONG pulOffset) // out: offset (ptr can be NULL)
992	{
993	PSZ p = pszSearchIn,
994	prc = NULL;
995	while (TRUE)
996	{
997	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
998	{
999	prc = p;
1000	break;
1001	}
1002	p++;
1003	}
1004
1005	if (pulOffset)
1006	*pulOffset = prc - pszSearchIn;
1007	return (prc);
1008	}
1009
1010	/*
1011	*@@ strhFindNextLine:
1012	* like strhFindEOL, but this returns the character
1013	* _after_ \r or \n. Note that this might return
1014	* a pointer to terminating NULL character also.
1015	*/
1016
1017	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1018	{
1019	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1020	// pEOL now points to the \r char or the terminating 0 byte;
1021	// if not null byte, advance pointer
1022	PSZ pNextLine = pEOL;
1023	if (*pNextLine == '\r')
1024	pNextLine++;
1025	if (*pNextLine == '\n')
1026	pNextLine++;
1027	if (pulOffset)
1028	*pulOffset = pNextLine - pszSearchIn;
1029	return (pNextLine);
1030	}
1031
1032	/*
1033	*@@ strhFindKey:
1034	* finds pszKey in pszSearchIn; similar to strhistr,
1035	* but this one makes sure the key is at the beginning
1036	* of a line. Spaces before the key are tolerated.
1037	* Returns NULL if the key was not found.
1038	*
1039	* Used by strhGetParameter/strhSetParameter; useful
1040	* for analyzing CONFIG.SYS settings.
1041	*
1042	*@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1043	*@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1044	*/
1045
1046	PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1047	const char *pcszKey, // in: key to search for
1048	PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1049	// can be NULL if not needed
1050	{
1051	const char *p = NULL;
1052	PSZ pReturn = NULL;
1053	// BOOL fFound = FALSE;
1054
1055	p = pcszSearchIn;
1056	do {
1057	p = strhistr(p, pcszKey);
1058
1059	if ((p) && (p >= pcszSearchIn))
1060	{
1061	// make sure the key is at the beginning of a line
1062	// by going backwards until we find a char != " "
1063	const char *p2 = p;
1064	while ( (*p2 == ' ')
1065	&& (p2 > pcszSearchIn)
1066	)
1067	p2--;
1068
1069	// if previous char is an EOL sign, go on
1070	if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1071	\|\| (*(p2-1) == '\r')
1072	\|\| (*(p2-1) == '\n')
1073	)
1074	{
1075	// now check whether the char after the search
1076	// is a "=" char
1077	// ULONG cbKey = strlen(pszKey);
1078
1079	// tolerate spaces before "="
1080	/* PSZ p3 = p;
1081	while (*(p3+cbKey) == ' ')
1082	p3++;
1083
1084	if ((p3+cbKey) == '=') /
1085	{
1086	// found:
1087	pReturn = (PSZ)p; // go on, p contains found key
1088
1089	// test for all upper case?
1090	if (pfIsAllUpperCase)
1091	{
1092	ULONG cbKey2 = strlen(pcszKey),
1093	ul = 0;
1094	*pfIsAllUpperCase = TRUE;
1095	for (ul = 0; ul < cbKey2; ul++)
1096	if (islower(*(p+ul)))
1097	{
1098	*pfIsAllUpperCase = FALSE;
1099	break; // for
1100	}
1101	}
1102
1103	break; // do
1104	}
1105	} // else search next key
1106
1107	p++; // search on after this key
1108	}
1109	} while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1110
1111	return (pReturn);
1112	}
1113
1114	/*
1115	*@@ strhGetParameter:
1116	* searches pszSearchIn for the key pszKey; if found, it
1117	* returns a pointer to the following characters in pszSearchIn
1118	* and, if pszCopyTo != NULL, copies the rest of the line to
1119	* that buffer, of which cbCopyTo specified the size.
1120	* If the key is not found, NULL is returned.
1121	* String search is done by calling strhFindKey.
1122	* This is useful for querying CONFIG.SYS settings.
1123	*
1124	* <B>Example:</B> this would return "YES" if you searched
1125	* for "PAUSEONERROR=", and "PAUSEONERROR=YES" existed in pszSearchIn.
1126	*/
1127
1128	PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1129	const char *pcszKey, // in: key to search for
1130	PSZ pszCopyTo, // out: key value
1131	ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1132	{
1133	PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1134	prc = NULL;
1135	if (p)
1136	{
1137	prc = p + strlen(pcszKey);
1138	if (pszCopyTo)
1139	// copy to pszCopyTo
1140	{
1141	ULONG cb;
1142	PSZ pEOL = strhFindEOL(prc, &cb);
1143	if (pEOL)
1144	{
1145	if (cb > cbCopyTo)
1146	cb = cbCopyTo-1;
1147	strhncpy0(pszCopyTo, prc, cb);
1148	}
1149	}
1150	}
1151
1152	return (prc);
1153	}
1154
1155	/*
1156	*@@ strhSetParameter:
1157	* searches *ppszBuf for the key pszKey; if found, it
1158	* replaces the characters following this key up to the
1159	* end of the line with pszParam. If pszKey is not found in
1160	* *ppszBuf, it is appended to the file in a new line.
1161	*
1162	* If any changes are made, *ppszBuf is re-allocated.
1163	*
1164	* This function searches w/out case sensitivity.
1165	*
1166	* Returns a pointer to the new parameter inside the buffer.
1167	*
1168	@@changed V0.9.0 [umoeller]: changed function prototype to PSZ ppszSearchIn
1169	*/
1170
1171	PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1172	const char *pcszKey, // in: key to search for
1173	PSZ pszNewParam, // in: new parameter to set for key
1174	BOOL fRespectCase) // in: if TRUE, pszNewParam will
1175	// be converted to upper case if the found key is
1176	// in upper case also. pszNewParam should be in
1177	// lower case if you use this.
1178	{
1179	BOOL fIsAllUpperCase = FALSE;
1180	PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1181	prc = NULL;
1182
1183	if (pKey)
1184	{
1185	// key found in file:
1186	// replace existing parameter
1187	PSZ pOldParam = pKey + strlen(pcszKey);
1188
1189	prc = pOldParam;
1190	// pOldParam now has the old parameter, which we
1191	// will overwrite now
1192
1193	if (pOldParam)
1194	{
1195	ULONG cbOldParam;
1196	PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1197	// pEOL now has first end-of-line after the parameter
1198
1199	if (pEOL)
1200	{
1201	XSTRING strBuf;
1202	ULONG ulOfs = 0;
1203
1204	PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1205	strncpy(pszOldCopy, pOldParam, cbOldParam);
1206	pszOldCopy[cbOldParam] = '\0';
1207
1208	xstrInit(&strBuf, 0);
1209	xstrset(&strBuf, *ppszBuf); // this must not be freed!
1210	/* xstrInit(&strFind, 0);
1211	xstrset(&strFind, pszOldCopy); // this must not be freed!
1212	xstrInit(&strReplace, 0);
1213	xstrset(&strReplace, pszNewParam); // this must not be freed!
1214	*/
1215
1216	// check for upper case desired?
1217	if (fRespectCase)
1218	if (fIsAllUpperCase)
1219	strupr(pszNewParam);
1220
1221	xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1222
1223	free(pszOldCopy);
1224
1225	*ppszBuf = strBuf.psz;
1226	}
1227	}
1228	}
1229	else
1230	{
1231	PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1232	+ strlen(pcszKey)
1233	+ strlen(pszNewParam)
1234	+ 5); // 2 * \r\n + null byte
1235	// key not found: append to end of file
1236	sprintf(pszNew, "%s\r\n%s%s\r\n",
1237	*ppszBuf, pcszKey, pszNewParam);
1238	free(*ppszBuf);
1239	*ppszBuf = pszNew;
1240	}
1241
1242	return (prc);
1243	}
1244
1245	/*
1246	*@@ strhDeleteLine:
1247	* this deletes the line in pszSearchIn which starts with
1248	* the key pszKey. Returns TRUE if the line was found and
1249	* deleted.
1250	*
1251	* This copies within pszSearchIn.
1252	*/
1253
1254	BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1255	PSZ pszKey) // in: key to find
1256	{
1257	BOOL fIsAllUpperCase = FALSE;
1258	PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1259	BOOL brc = FALSE;
1260
1261	if (pKey) {
1262	PSZ pEOL = strhFindEOL(pKey, NULL);
1263	// pEOL now has first end-of-line after the key
1264	if (pEOL)
1265	{
1266	// delete line by overwriting it with
1267	// the next line
1268	strcpy(pKey, pEOL+2);
1269	}
1270	else
1271	{
1272	// EOL not found: we must be at the end of the file
1273	*pKey = '\0';
1274	}
1275	brc = TRUE;
1276	}
1277
1278	return (brc);
1279	}
1280
1281	/*
1282	*@@ strhBeautifyTitle:
1283	* replaces all line breaks (0xd, 0xa) with spaces.
1284	*/
1285
1286	BOOL strhBeautifyTitle(PSZ psz)
1287	{
1288	BOOL rc = FALSE;
1289	CHAR *p;
1290	while ((p = strchr(psz, 0xa)))
1291	{
1292	*p = ' ';
1293	rc = TRUE;
1294	}
1295	while ((p = strchr(psz, 0xd)))
1296	{
1297	*p = ' ';
1298	rc = TRUE;
1299	}
1300	return (rc);
1301	}
1302
1303	/*
1304	* strhFindAttribValue:
1305	* searches for pszAttrib in pszSearchIn; if found,
1306	* returns the first character after the "=" char.
1307	* If "=" is not found, a space, \r, and \n are
1308	* also accepted. This function searches without
1309	* respecting case.
1310	*
1311	* <B>Example:</B>
1312	+ strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1313	+
1314	+ returns ....................... ^ this address.
1315	*
1316	*@@added V0.9.0 [umoeller]
1317	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1318	*/
1319
1320	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1321	{
1322	PSZ prc = 0;
1323	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1324	p,
1325	p2;
1326	ULONG cbAttrib = strlen(pszAttrib);
1327
1328	// 1) find space char
1329	while ((p = strchr(pszSearchIn2, ' ')))
1330	{
1331	CHAR c;
1332	p++;
1333	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1334	// now check whether the p+strlen(pszAttrib)
1335	// is a valid end-of-tag character
1336	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1337	&& ( (c == ' ')
1338	\|\| (c == '>')
1339	\|\| (c == '=')
1340	\|\| (c == '\r')
1341	\|\| (c == '\n')
1342	\|\| (c == 0)
1343	)
1344	)
1345	{
1346	// yes:
1347	CHAR c2;
1348	p2 = p + cbAttrib;
1349	c2 = *p2;
1350	while ( ( (c2 == ' ')
1351	\|\| (c2 == '=')
1352	\|\| (c2 == '\n')
1353	\|\| (c2 == '\r')
1354	)
1355	&& (c2 != 0)
1356	)
1357	c2 = *++p2;
1358	prc = p2;
1359	break; // first while
1360	}
1361	pszSearchIn2++;
1362	}
1363	return (prc);
1364	}
1365
1366	/*
1367	* strhGetNumAttribValue:
1368	* stores the numerical parameter value of an HTML-style
1369	* tag in *pl.
1370	*
1371	* Returns the address of the tag parameter in the
1372	* search buffer, if found, or NULL.
1373	*
1374	* <B>Example:</B>
1375	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1376	*
1377	* stores 123 in the "l" variable.
1378	*
1379	*@@added V0.9.0 [umoeller]
1380	*/
1381
1382	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1383	const char *pszTag, // e.g. "INDEX"
1384	PLONG pl) // out: numerical value
1385	{
1386	PSZ pParam;
1387	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1388	sscanf(pParam, "%ld", pl);
1389
1390	return (pParam);
1391	}
1392
1393	/*
1394	* strhGetTextAttr:
1395	* retrieves the attribute value of a textual HTML-style tag
1396	* in a newly allocated buffer, which is returned,
1397	* or NULL if attribute not found.
1398	* If an attribute value is to contain spaces, it
1399	* must be enclosed in quotes.
1400	*
1401	* The offset of the attribute data in pszSearchIn is
1402	* returned in *pulOffset so that you can do multiple
1403	* searches.
1404	*
1405	* This returns a new buffer, which should be free()'d after use.
1406	*
1407	* <B>Example:</B>
1408	+ ULONG ulOfs = 0;
1409	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1410	+ ............^ ulOfs
1411	*
1412	* returns a new string with the value "blublub" (without
1413	* quotes) and sets ulOfs to 12.
1414	*
1415	*@@added V0.9.0 [umoeller]
1416	*/
1417
1418	PSZ strhGetTextAttr(const char *pszSearchIn,
1419	const char *pszTag,
1420	PULONG pulOffset) // out: offset where found
1421	{
1422	PSZ pParam,
1423	pParam2,
1424	prc = NULL;
1425	ULONG ulCount = 0;
1426	LONG lNestingLevel = 0;
1427
1428	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1429	{
1430	// determine end character to search for: a space
1431	CHAR cEnd = ' ';
1432	if (*pParam == '\"')
1433	{
1434	// or, if the data is enclosed in quotes, a quote
1435	cEnd = '\"';
1436	pParam++;
1437	}
1438
1439	if (pulOffset)
1440	// store the offset
1441	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1442
1443	// now find end of attribute
1444	pParam2 = pParam;
1445	while (*pParam)
1446	{
1447	if (*pParam == cEnd)
1448	// end character found
1449	break;
1450	else if (*pParam == '<')
1451	// yet another opening tag found:
1452	// this is probably some "<" in the attributes
1453	lNestingLevel++;
1454	else if (*pParam == '>')
1455	{
1456	lNestingLevel--;
1457	if (lNestingLevel < 0)
1458	// end of tag found:
1459	break;
1460	}
1461	ulCount++;
1462	pParam++;
1463	}
1464
1465	// copy attribute to new buffer
1466	if (ulCount)
1467	{
1468	prc = (PSZ)malloc(ulCount+1);
1469	memcpy(prc, pParam2, ulCount);
1470	*(prc+ulCount) = 0;
1471	}
1472	}
1473	return (prc);
1474	}
1475
1476	/*
1477	* strhFindEndOfTag:
1478	* returns a pointer to the ">" char
1479	* which seems to terminate the tag beginning
1480	* after pszBeginOfTag.
1481	*
1482	* If additional "<" chars are found, we look
1483	* for additional ">" characters too.
1484	*
1485	* Note: You must pass the address of the opening
1486	* '<' character to this function.
1487	*
1488	* Example:
1489	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1490	+ strhFindEndOfTag(pszTest)
1491	+ returns.................................^ this.
1492	*
1493	*@@added V0.9.0 [umoeller]
1494	*/
1495
1496	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1497	{
1498	PSZ p = (PSZ)pszBeginOfTag,
1499	prc = NULL;
1500	LONG lNestingLevel = 0;
1501
1502	while (*p)
1503	{
1504	if (*p == '<')
1505	// another opening tag found:
1506	lNestingLevel++;
1507	else if (*p == '>')
1508	{
1509	// closing tag found:
1510	lNestingLevel--;
1511	if (lNestingLevel < 1)
1512	{
1513	// corresponding: return this
1514	prc = p;
1515	break;
1516	}
1517	}
1518	p++;
1519	}
1520
1521	return (prc);
1522	}
1523
1524	/*
1525	* strhGetBlock:
1526	* this complex function searches the given string
1527	* for a pair of opening/closing HTML-style tags.
1528	*
1529	* If found, this routine returns TRUE and does
1530	* the following:
1531	*
1532	* 1) allocate a new buffer, copy the text
1533	* enclosed by the opening/closing tags
1534	* into it and set *ppszBlock to that
1535	* buffer;
1536	*
1537	* 2) if the opening tag has any attributes,
1538	* allocate another buffer, copy the
1539	* attributes into it and set *ppszAttrs
1540	* to that buffer; if no attributes are
1541	* found, *ppszAttrs will be NULL;
1542	*
1543	* 3) set *pulOffset to the offset from the
1544	* beginning of *ppszSearchIn where the
1545	* opening tag was found;
1546	*
1547	* 4) advance *ppszSearchIn to after the
1548	* closing tag, so that you can do
1549	* multiple searches without finding the
1550	* same tags twice.
1551	*
1552	* All buffers should be freed using free().
1553	*
1554	* This returns the following:
1555	* -- 0: no error
1556	* -- 1: tag not found at all (doesn't have to be an error)
1557	* -- 2: begin tag found, but no corresponding end tag found. This
1558	* is a real error.
1559	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1560	*
1561	* <B>Example:</B>
1562	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1563	+ PSZ pszBlock, pszAttrs;
1564	+ ULONG ulOfs;
1565	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1566	*
1567	* would do the following:
1568	*
1569	* 1) set pszBlock to a new string containing "This is page 1."
1570	* without quotes;
1571	*
1572	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1573	*
1574	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1575	*
1576	* 4) pSearch would be advanced to point to the "More text"
1577	* string in the original buffer.
1578	*
1579	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1580	* for HTML parsing.
1581	*
1582	*@@added V0.9.0 [umoeller]
1583	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1584	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1585	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1586	*/
1587
1588	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1589	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1590	PSZ pszTag,
1591	PSZ *ppszBlock, // out: block enclosed by the tags
1592	PSZ *ppszAttribs, // out: attributes of the opening tag
1593	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1594	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1595	{
1596	ULONG ulrc = 1;
1597	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1598	pszSearch2 = pszBeginTag,
1599	pszClosingTag;
1600	ULONG cbTag = strlen(pszTag);
1601
1602	// go thru the block and check all tags if it's the
1603	// begin tag we're looking for
1604	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1605	{
1606	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1607	// yes: stop
1608	break;
1609	else
1610	pszBeginTag++;
1611	}
1612
1613	if (pszBeginTag)
1614	{
1615	// we found <TAG>:
1616	ULONG ulNestingLevel = 0;
1617
1618	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1619	// strchr(pszBeginTag, '>');
1620	if (pszEndOfBeginTag)
1621	{
1622	// does the caller want the attributes?
1623	if (ppszAttribs)
1624	{
1625	// yes: then copy them
1626	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1627	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1628	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1629	// add terminating 0
1630	*(pszAttrs + ulAttrLen) = 0;
1631
1632	*ppszAttribs = pszAttrs;
1633	}
1634
1635	// output offset of where we found the begin tag
1636	if (pulOfsBeginTag)
1637	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1638
1639	// now find corresponding closing tag (e.g. "</BODY>"
1640	pszBeginTag = pszEndOfBeginTag+1;
1641	// now we're behind the '>' char of the opening tag
1642	// increase offset of that too
1643	if (pulOfsBeginBlock)
1644	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1645
1646	// find next closing tag;
1647	// for the first run, pszSearch2 points to right
1648	// after the '>' char of the opening tag
1649	pszSearch2 = pszBeginTag;
1650	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1651	&& (pszClosingTag = strstr(pszSearch2, "<"))
1652	)
1653	{
1654	// if we have another opening tag before our closing
1655	// tag, we need to have several closing tags before
1656	// we're done
1657	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1658	ulNestingLevel++;
1659	else
1660	{
1661	// is this ours?
1662	if ( (*(pszClosingTag+1) == '/')
1663	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1664	)
1665	{
1666	// we've found a matching closing tag; is
1667	// it ours?
1668	if (ulNestingLevel == 0)
1669	{
1670	// our closing tag found:
1671	// allocate mem for a new buffer
1672	// and extract all the text between
1673	// open and closing tags to it
1674	ULONG ulLen = pszClosingTag - pszBeginTag;
1675	if (ppszBlock)
1676	{
1677	PSZ pNew = (PSZ)malloc(ulLen + 1);
1678	strhncpy0(pNew, pszBeginTag, ulLen);
1679	*ppszBlock = pNew;
1680	}
1681
1682	// raise search offset to after the closing tag
1683	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1684
1685	ulrc = 0;
1686
1687	break;
1688	} else
1689	// not our closing tag:
1690	ulNestingLevel--;
1691	}
1692	}
1693	// no matching closing tag: search on after that
1694	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1695	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1696
1697	if (!pszClosingTag)
1698	// no matching closing tag found:
1699	// return 2 (closing tag not found)
1700	ulrc = 2;
1701	} // end if (pszBeginTag)
1702	else
1703	// no matching ">" for opening tag found:
1704	ulrc = 3;
1705	}
1706
1707	return (ulrc);
1708	}
1709
1710	/* ******************************************************************
1711	* *
1712	* Miscellaneous *
1713	* *
1714	********************************************************************/
1715
1716	/*
1717	*@@ strhArrayAppend:
1718	* this appends a string to a "string array".
1719	*
1720	* A string array is considered a sequence of
1721	* zero-terminated strings in memory. That is,
1722	* after each string's null-byte, the next
1723	* string comes up.
1724	*
1725	* This is useful for composing a single block
1726	* of memory from, say, list box entries, which
1727	* can then be written to OS2.INI in one flush.
1728	*
1729	* To append strings to such an array, call this
1730	* function for each string you wish to append.
1731	* This will re-allocate *ppszRoot with each call,
1732	* and update *pcbRoot, which then contains the
1733	* total size of all strings (including all null
1734	* terminators).
1735	*
1736	* Pass *pcbRoot to PrfSaveProfileData to have the
1737	* block saved.
1738	*
1739	* Note: On the first call, ppszRoot and pcbRoot
1740	* _must_ be both NULL, or this crashes.
1741	*/
1742
1743	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1744	PSZ pszNew, // in: string to append
1745	PULONG pcbRoot) // in/out: size of array
1746	{
1747	ULONG cbNew = strlen(pszNew);
1748	PSZ pszTemp = (PSZ)malloc(*pcbRoot
1749	+ cbNew
1750	+ 1); // two null bytes
1751	if (*ppszRoot)
1752	{
1753	// not first loop: copy old stuff
1754	memcpy(pszTemp,
1755	*ppszRoot,
1756	*pcbRoot);
1757	free(*ppszRoot);
1758	}
1759	// append new string
1760	strcpy(pszTemp + *pcbRoot,
1761	pszNew);
1762	// update root
1763	*ppszRoot = pszTemp;
1764	// update length
1765	*pcbRoot += cbNew + 1;
1766	}
1767
1768	/*
1769	*@@ strhCreateDump:
1770	* this dumps a memory block into a string
1771	* and returns that string in a new buffer.
1772	*
1773	* You must free() the returned PSZ after use.
1774	*
1775	* The output looks like the following:
1776	*
1777	+ 0000: FE FF 0E 02 90 00 00 00 ........
1778	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1779	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1780	*
1781	* Each line is terminated with a newline (\n)
1782	* character only.
1783	*
1784	*@@added V0.9.1 (2000-01-22) [umoeller]
1785	*/
1786
1787	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1788	ULONG ulSize, // in: size of buffer
1789	ULONG ulIndent) // in: indentation of every line
1790	{
1791	PSZ pszReturn = 0;
1792	XSTRING strReturn;
1793	CHAR szTemp[1000];
1794
1795	PBYTE pbCurrent = pb; // current byte
1796	ULONG ulCount = 0,
1797	ulCharsInLine = 0; // if this grows > 7, a new line is started
1798	CHAR szLine[400] = "",
1799	szAscii[30] = " "; // ASCII representation; filled for every line
1800	PSZ pszLine = szLine,
1801	pszAscii = szAscii;
1802
1803	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1804
1805	for (pbCurrent = pb;
1806	ulCount < ulSize;
1807	pbCurrent++, ulCount++)
1808	{
1809	if (ulCharsInLine == 0)
1810	{
1811	memset(szLine, ' ', ulIndent);
1812	pszLine += ulIndent;
1813	}
1814	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1815
1816	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1817	// printable character:
1818	pszAscii = pbCurrent;
1819	else
1820	*pszAscii = '.';
1821	pszAscii++;
1822
1823	ulCharsInLine++;
1824	if ( (ulCharsInLine > 7) // 8 bytes added?
1825	\|\| (ulCount == ulSize-1) // end of buffer reached?
1826	)
1827	{
1828	// if we haven't had eight bytes yet,
1829	// fill buffer up to eight bytes with spaces
1830	ULONG ul2;
1831	for (ul2 = ulCharsInLine;
1832	ul2 < 8;
1833	ul2++)
1834	pszLine += sprintf(pszLine, " ");
1835
1836	sprintf(szTemp, "%04lX: %s %s\n",
1837	(ulCount & 0xFFFFFFF8), // offset in hex
1838	szLine, // bytes string
1839	szAscii); // ASCII string
1840	xstrcat(&strReturn, szTemp);
1841
1842	// restart line buffer
1843	pszLine = szLine;
1844
1845	// clear ASCII buffer
1846	strcpy(szAscii, " ");
1847	pszAscii = szAscii;
1848
1849	// reset line counter
1850	ulCharsInLine = 0;
1851	}
1852	}
1853
1854	if (strReturn.cbAllocated)
1855	pszReturn = strReturn.psz;
1856
1857	return (pszReturn);
1858	}
1859
1860	/* ******************************************************************
1861	* *
1862	* Wildcard matching *
1863	* *
1864	********************************************************************/
1865
1866	/*
1867	* The following code has been taken from "fnmatch.zip".
1868	*
1869	* (c) 1994-1996 by Eberhard Mattes.
1870	*/
1871
1872	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1873	* This macro returns true iff C is a separator. */
1874
1875	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1876
1877
1878	/* This macro returns true if C is at the end of a component of a
1879	* path. */
1880
1881	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1882
1883	/*
1884	* skip_comp_os2:
1885	* Return a pointer to the next component of the path SRC, for OS/2
1886	* and DOS styles. When the end of the string is reached, a pointer
1887	* to the terminating null character is returned.
1888	*
1889	* (c) 1994-1996 by Eberhard Mattes.
1890	*/
1891
1892	static const unsigned char* skip_comp_os2(const unsigned char *src)
1893	{
1894	/* Skip characters until hitting a separator or the end of the
1895	* string. */
1896
1897	while (!IS_OS2_COMP_END(*src))
1898	++src;
1899
1900	/* Skip the separator if we hit a separator. */
1901
1902	if (*src != 0)
1903	++src;
1904	return src;
1905	}
1906
1907	/*
1908	* has_colon:
1909	* returns true iff the path P contains a colon.
1910	*
1911	* (c) 1994-1996 by Eberhard Mattes.
1912	*/
1913
1914	static int has_colon(const unsigned char *p)
1915	{
1916	while (*p != 0)
1917	if (*p == ':')
1918	return 1;
1919	else
1920	++p;
1921	return 0;
1922	}
1923
1924	/*
1925	* match_comp_os2:
1926	* Compare a single component (directory name or file name) of the
1927	* paths, for OS/2 and DOS styles. MASK and NAME point into a
1928	* component of the wildcard and the name to be checked, respectively.
1929	* Comparing stops at the next separator. The FLAGS argument is the
1930	* same as that of fnmatch(). HAS_DOT is true if a dot is in the
1931	* current component of NAME. The number of dots is not restricted,
1932	* even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1933	* Note that this function is recursive.
1934	*
1935	* (c) 1994-1996 by Eberhard Mattes.
1936	*/
1937
1938	static int match_comp_os2(const unsigned char *mask,
1939	const unsigned char *name,
1940	unsigned flags,
1941	int has_dot)
1942	{
1943	int rc;
1944
1945	for (;;)
1946	switch (*mask)
1947	{
1948	case 0:
1949
1950	/* There must be no extra characters at the end of NAME when
1951	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1952	* in that case, NAME may point to a separator. */
1953
1954	if (*name == 0)
1955	return FNM_MATCH;
1956	if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1957	return FNM_MATCH;
1958	return FNM_NOMATCH;
1959
1960	case '/':
1961	case '\\':
1962
1963	/* Separators match separators. */
1964
1965	if (IS_OS2_COMP_SEP(*name))
1966	return FNM_MATCH;
1967
1968	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1969	* is ignored at the end of NAME. */
1970
1971	if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1972	return FNM_MATCH;
1973
1974	/* Stop comparing at the separator. */
1975
1976	return FNM_NOMATCH;
1977
1978	case '?':
1979
1980	/* A question mark matches one character. It does not match
1981	* a dot. At the end of the component (and before a dot),
1982	* it also matches zero characters. */
1983
1984	if (name != '.' && !IS_OS2_COMP_END(name))
1985	++name;
1986	++mask;
1987	break;
1988
1989	case '*':
1990
1991	/* An asterisk matches zero or more characters. In DOS
1992	* mode, dots are not matched. */
1993
1994	do
1995	{
1996	++mask;
1997	}
1998	while (mask == '');
1999	for (;;)
2000	{
2001	rc = match_comp_os2(mask, name, flags, has_dot);
2002	if (rc != FNM_NOMATCH)
2003	return rc;
2004	if (IS_OS2_COMP_END(*name))
2005	return FNM_NOMATCH;
2006	if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2007	return FNM_NOMATCH;
2008	++name;
2009	}
2010
2011	case '.':
2012
2013	/* A dot matches a dot. It also matches the implicit dot at
2014	* the end of a dot-less NAME. */
2015
2016	++mask;
2017	if (*name == '.')
2018	++name;
2019	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
2020	return FNM_NOMATCH;
2021	break;
2022
2023	default:
2024
2025	/* All other characters match themselves. */
2026
2027	if (flags & _FNM_IGNORECASE)
2028	{
2029	if (tolower(mask) != tolower(name))
2030	return FNM_NOMATCH;
2031	}
2032	else
2033	{
2034	if (mask != name)
2035	return FNM_NOMATCH;
2036	}
2037	++mask;
2038	++name;
2039	break;
2040	}
2041	}
2042
2043	/*
2044	* match_comp:
2045	* compare a single component (directory name or file name) of the
2046	* paths, for all styles which need component-by-component matching.
2047	* MASK and NAME point to the start of a component of the wildcard and
2048	* the name to be checked, respectively. Comparing stops at the next
2049	* separator. The FLAGS argument is the same as that of fnmatch().
2050	* Return FNM_MATCH iff MASK and NAME match.
2051	*
2052	* (c) 1994-1996 by Eberhard Mattes.
2053	*/
2054
2055	static int match_comp(const unsigned char *mask,
2056	const unsigned char *name,
2057	unsigned flags)
2058	{
2059	const unsigned char *s;
2060
2061	switch (flags & _FNM_STYLE_MASK)
2062	{
2063	case _FNM_OS2:
2064	case _FNM_DOS:
2065
2066	/* For OS/2 and DOS styles, we add an implicit dot at the end of
2067	* the component if the component doesn't include a dot. */
2068
2069	s = name;
2070	while (!IS_OS2_COMP_END(s) && s != '.')
2071	++s;
2072	return match_comp_os2(mask, name, flags, *s == '.');
2073
2074	default:
2075	return FNM_ERR;
2076	}
2077	}
2078
2079	/* In Unix styles, / separates components of a path. This macro
2080	* returns true iff C is a separator. */
2081
2082	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2083
2084
2085	/* This macro returns true if C is at the end of a component of a
2086	* path. */
2087
2088	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
2089
2090	/*
2091	* match_unix:
2092	* match complete paths for Unix styles. The FLAGS argument is the
2093	* same as that of fnmatch(). COMP points to the start of the current
2094	* component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2095	* backslash character is used for escaping ? and * unless
2096	* FNM_NOESCAPE is set.
2097	*
2098	* (c) 1994-1996 by Eberhard Mattes.
2099	*/
2100
2101	static int match_unix(const unsigned char *mask,
2102	const unsigned char *name,
2103	unsigned flags,
2104	const unsigned char *comp)
2105	{
2106	unsigned char c1, c2;
2107	char invert, matched;
2108	const unsigned char *start;
2109	int rc;
2110
2111	for (;;)
2112	switch (*mask)
2113	{
2114	case 0:
2115
2116	/* There must be no extra characters at the end of NAME when
2117	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
2118	* in that case, NAME may point to a separator. */
2119
2120	if (*name == 0)
2121	return FNM_MATCH;
2122	if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2123	return FNM_MATCH;
2124	return FNM_NOMATCH;
2125
2126	case '?':
2127
2128	/* A question mark matches one character. It does not match
2129	* the component separator if FNM_PATHNAME is set. It does
2130	* not match a dot at the start of a component if FNM_PERIOD
2131	* is set. */
2132
2133	if (*name == 0)
2134	return FNM_NOMATCH;
2135	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2136	return FNM_NOMATCH;
2137	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2138	return FNM_NOMATCH;
2139	++mask;
2140	++name;
2141	break;
2142
2143	case '*':
2144
2145	/* An asterisk matches zero or more characters. It does not
2146	* match the component separator if FNM_PATHNAME is set. It
2147	* does not match a dot at the start of a component if
2148	* FNM_PERIOD is set. */
2149
2150	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2151	return FNM_NOMATCH;
2152	do
2153	{
2154	++mask;
2155	}
2156	while (mask == '');
2157	for (;;)
2158	{
2159	rc = match_unix(mask, name, flags, comp);
2160	if (rc != FNM_NOMATCH)
2161	return rc;
2162	if (*name == 0)
2163	return FNM_NOMATCH;
2164	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2165	return FNM_NOMATCH;
2166	++name;
2167	}
2168
2169	case '/':
2170
2171	/* Separators match only separators. If _FNM_PATHPREFIX is
2172	* set, a trailing separator in MASK is ignored at the end
2173	* of NAME. */
2174
2175	if (!(IS_UNIX_COMP_SEP(*name)
2176	\|\| ((flags & _FNM_PATHPREFIX) && *name == 0
2177	&& (mask[1] == 0
2178	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2179	&& mask[2] == 0)))))
2180	return FNM_NOMATCH;
2181
2182	++mask;
2183	if (*name != 0)
2184	++name;
2185
2186	/* This is the beginning of a new component if FNM_PATHNAME
2187	* is set. */
2188
2189	if (flags & FNM_PATHNAME)
2190	comp = name;
2191	break;
2192
2193	case '[':
2194
2195	/* A set of characters. Always case-sensitive. */
2196
2197	if (*name == 0)
2198	return FNM_NOMATCH;
2199	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2200	return FNM_NOMATCH;
2201	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2202	return FNM_NOMATCH;
2203
2204	invert = 0;
2205	matched = 0;
2206	++mask;
2207
2208	/* If the first character is a ! or ^, the set matches all
2209	* characters not listed in the set. */
2210
2211	if (mask == '!' \|\| mask == '^')
2212	{
2213	++mask;
2214	invert = 1;
2215	}
2216
2217	/* Loop over all the characters of the set. The loop ends
2218	* if the end of the string is reached or if a ] is
2219	* encountered unless it directly follows the initial [ or
2220	* [-. */
2221
2222	start = mask;
2223	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
2224	{
2225	/* Get the next character which is optionally preceded
2226	* by a backslash. */
2227
2228	c1 = *mask++;
2229	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2230	{
2231	if (*mask == 0)
2232	break;
2233	c1 = *mask++;
2234	}
2235
2236	/* Ranges of characters are written as a-z. Don't
2237	* forget to check for the end of the string and to
2238	* handle the backslash. If the character after - is a
2239	* ], it isn't a range. */
2240
2241	if (*mask == '-' && mask[1] != ']')
2242	{
2243	++mask; /* Skip the - character */
2244	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2245	++mask;
2246	if (*mask == 0)
2247	break;
2248	c2 = *mask++;
2249	}
2250	else
2251	c2 = c1;
2252
2253	/* Now check whether this character or range matches NAME. */
2254
2255	if (c1 <= name && name <= c2)
2256	matched = 1;
2257	}
2258
2259	/* If the end of the string is reached before a ] is found,
2260	* back up to the [ and compare it to NAME. */
2261
2262	if (*mask == 0)
2263	{
2264	if (*name != '[')
2265	return FNM_NOMATCH;
2266	++name;
2267	mask = start;
2268	if (invert)
2269	--mask;
2270	}
2271	else
2272	{
2273	if (invert)
2274	matched = !matched;
2275	if (!matched)
2276	return FNM_NOMATCH;
2277	++mask; /* Skip the ] character */
2278	if (*name != 0)
2279	++name;
2280	}
2281	break;
2282
2283	case '\\':
2284	++mask;
2285	if (flags & FNM_NOESCAPE)
2286	{
2287	if (*name != '\\')
2288	return FNM_NOMATCH;
2289	++name;
2290	}
2291	else if (mask == '' \|\| *mask == '?')
2292	{
2293	if (mask != name)
2294	return FNM_NOMATCH;
2295	++mask;
2296	++name;
2297	}
2298	break;
2299
2300	default:
2301
2302	/* All other characters match themselves. */
2303
2304	if (flags & _FNM_IGNORECASE)
2305	{
2306	if (tolower(mask) != tolower(name))
2307	return FNM_NOMATCH;
2308	}
2309	else
2310	{
2311	if (mask != name)
2312	return FNM_NOMATCH;
2313	}
2314	++mask;
2315	++name;
2316	break;
2317	}
2318	}
2319
2320	/*
2321	* _fnmatch_unsigned:
2322	* Check whether the path name NAME matches the wildcard MASK.
2323	*
2324	* Return:
2325	* -- 0 (FNM_MATCH) if it matches,
2326	* -- _FNM_NOMATCH if it doesn't,
2327	* -- FNM_ERR on error.
2328	*
2329	* The operation of this function is controlled by FLAGS.
2330	* This is an internal function, with unsigned arguments.
2331	*
2332	* (c) 1994-1996 by Eberhard Mattes.
2333	*/
2334
2335	static int _fnmatch_unsigned(const unsigned char *mask,
2336	const unsigned char *name,
2337	unsigned flags)
2338	{
2339	int m_drive, n_drive,
2340	rc;
2341
2342	/* Match and skip the drive name if present. */
2343
2344	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2345	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2346
2347	if (m_drive != n_drive)
2348	{
2349	if (m_drive == -1 \|\| n_drive == -1)
2350	return FNM_NOMATCH;
2351	if (!(flags & _FNM_IGNORECASE))
2352	return FNM_NOMATCH;
2353	if (tolower(m_drive) != tolower(n_drive))
2354	return FNM_NOMATCH;
2355	}
2356
2357	if (m_drive != -1)
2358	mask += 2;
2359	if (n_drive != -1)
2360	name += 2;
2361
2362	/* Colons are not allowed in path names, except for the drive name,
2363	* which was skipped above. */
2364
2365	if (has_colon(mask) \|\| has_colon(name))
2366	return FNM_ERR;
2367
2368	/* The name "\\server\path" should not be matched by mask
2369	* "\\server\path". Ditto for /. /
2370
2371	switch (flags & _FNM_STYLE_MASK)
2372	{
2373	case _FNM_OS2:
2374	case _FNM_DOS:
2375
2376	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2377	{
2378	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2379	return FNM_NOMATCH;
2380	name += 2;
2381	mask += 2;
2382	}
2383	break;
2384
2385	case _FNM_POSIX:
2386
2387	if (name[0] == '/' && name[1] == '/')
2388	{
2389	int i;
2390
2391	name += 2;
2392	for (i = 0; i < 2; ++i)
2393	if (mask[0] == '/')
2394	++mask;
2395	else if (mask[0] == '\\' && mask[1] == '/')
2396	mask += 2;
2397	else
2398	return FNM_NOMATCH;
2399	}
2400
2401	/* In Unix styles, treating ? and * w.r.t. components is simple.
2402	* No need to do matching component by component. */
2403
2404	return match_unix(mask, name, flags, name);
2405	}
2406
2407	/* Now compare all the components of the path name, one by one.
2408	* Note that the path separator must not be enclosed in brackets. */
2409
2410	while (mask != 0 \|\| name != 0)
2411	{
2412
2413	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2414	* is reached even if there are components left in NAME. */
2415
2416	if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2417	return FNM_MATCH;
2418
2419	/* Compare a single component of the path name. */
2420
2421	rc = match_comp(mask, name, flags);
2422	if (rc != FNM_MATCH)
2423	return rc;
2424
2425	/* Skip to the next component or to the end of the path name. */
2426
2427	mask = skip_comp_os2(mask);
2428	name = skip_comp_os2(name);
2429	}
2430
2431	/* If we reached the ends of both strings, the names match. */
2432
2433	if (mask == 0 && name == 0)
2434	return FNM_MATCH;
2435
2436	/* The names do not match. */
2437
2438	return FNM_NOMATCH;
2439	}
2440
2441	/*
2442	*@@ strhMatchOS2:
2443	* this matches wildcards, similar to what DosEditName does.
2444	* However, this does not require a file to be present, but
2445	* works on strings only.
2446	*/
2447
2448	BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2449	const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2450	{
2451	return ((BOOL)(_fnmatch_unsigned(pcszMask,
2452	pcszName,
2453	_FNM_OS2 \| _FNM_IGNORECASE)
2454	== FNM_MATCH)
2455	);
2456	}
2457
2458	/* ******************************************************************
2459	* *
2460	* Fast string searches *
2461	* *
2462	********************************************************************/
2463
2464	#define ASSERT(a)
2465
2466	/*
2467	* The following code has been taken from the "Standard
2468	* Function Library", file sflfind.c, and only slightly
2469	* modified to conform to the rest of this file.
2470	*
2471	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2472	* Revised: 98/05/04
2473	*
2474	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2475	*
2476	* The SFL Licence allows incorporating SFL code into other
2477	* programs, as long as the copyright is reprinted and the
2478	* code is marked as modified, so this is what we do.
2479	*/
2480
2481	/*
2482	*@@ strhmemfind:
2483	* searches for a pattern in a block of memory using the
2484	* Boyer-Moore-Horspool-Sunday algorithm.
2485	*
2486	* The block and pattern may contain any values; you must
2487	* explicitly provide their lengths. If you search for strings,
2488	* use strlen() on the buffers.
2489	*
2490	* Returns a pointer to the pattern if found within the block,
2491	* or NULL if the pattern was not found.
2492	*
2493	* This algorithm needs a "shift table" to cache data for the
2494	* search pattern. This table can be reused when performing
2495	* several searches with the same pattern.
2496	*
2497	* "shift" must point to an array big enough to hold 256 (8**2)
2498	* "size_t" values.
2499	*
2500	* If (*repeat_find == FALSE), the shift table is initialized.
2501	* So on the first search with a given pattern, *repeat_find
2502	* should be FALSE. This function sets it to TRUE after the
2503	* shift table is initialised, allowing the initialisation
2504	* phase to be skipped on subsequent searches.
2505	*
2506	* This function is most effective when repeated searches are
2507	* made for the same pattern in one or more large buffers.
2508	*
2509	* Example:
2510	*
2511	+ PSZ pszHaystack = "This is a sample string.",
2512	+ pszNeedle = "string";
2513	+ size_t shift[256];
2514	+ BOOL fRepeat = FALSE;
2515	+
2516	+ PSZ pFound = strhmemfind(pszHaystack,
2517	+ strlen(pszHaystack), // block size
2518	+ pszNeedle,
2519	+ strlen(pszNeedle), // pattern size
2520	+ shift,
2521	+ &fRepeat);
2522	*
2523	* Taken from the "Standard Function Library", file sflfind.c.
2524	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2525	* Slightly modified by umoeller.
2526	*
2527	*@@added V0.9.3 (2000-05-08) [umoeller]
2528	*/
2529
2530	void* strhmemfind(const void *in_block, // in: block containing data
2531	size_t block_size, // in: size of block in bytes
2532	const void *in_pattern, // in: pattern to search for
2533	size_t pattern_size, // in: size of pattern block
2534	size_t *shift, // in/out: shift table (search buffer)
2535	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2536	{
2537	size_t byte_nbr, // Distance through block
2538	match_size; // Size of matched part
2539	const unsigned char
2540	*match_base = NULL, // Base of match of pattern
2541	*match_ptr = NULL, // Point within current match
2542	*limit = NULL; // Last potiental match point
2543	const unsigned char
2544	block = (unsigned char ) in_block, // Concrete pointer to block data
2545	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2546
2547	if ( (block == NULL)
2548	\|\| (pattern == NULL)
2549	\|\| (shift == NULL)
2550	)
2551	return (NULL);
2552
2553	// Pattern must be smaller or equal in size to string
2554	if (block_size < pattern_size)
2555	return (NULL); // Otherwise it's not found
2556
2557	if (pattern_size == 0) // Empty patterns match at start
2558	return ((void *)block);
2559
2560	// Build the shift table unless we're continuing a previous search
2561
2562	// The shift table determines how far to shift before trying to match
2563	// again, if a match at this point fails. If the byte after where the
2564	// end of our pattern falls is not in our pattern, then we start to
2565	// match again after that byte; otherwise we line up the last occurence
2566	// of that byte in our pattern under that byte, and try match again.
2567
2568	if (!repeat_find \|\| !*repeat_find)
2569	{
2570	for (byte_nbr = 0;
2571	byte_nbr < 256;
2572	byte_nbr++)
2573	shift[byte_nbr] = pattern_size + 1;
2574	for (byte_nbr = 0;
2575	byte_nbr < pattern_size;
2576	byte_nbr++)
2577	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2578
2579	if (repeat_find)
2580	*repeat_find = TRUE;
2581	}
2582
2583	// Search for the block, each time jumping up by the amount
2584	// computed in the shift table
2585
2586	limit = block + (block_size - pattern_size + 1);
2587	ASSERT (limit > block);
2588
2589	for (match_base = block;
2590	match_base < limit;
2591	match_base += shift[*(match_base + pattern_size)])
2592	{
2593	match_ptr = match_base;
2594	match_size = 0;
2595
2596	// Compare pattern until it all matches, or we find a difference
2597	while (*match_ptr++ == pattern[match_size++])
2598	{
2599	ASSERT (match_size <= pattern_size &&
2600	match_ptr == (match_base + match_size));
2601
2602	// If we found a match, return the start address
2603	if (match_size >= pattern_size)
2604	return ((void*)(match_base));
2605
2606	}
2607	}
2608	return (NULL); // Found nothing
2609	}
2610
2611	/*
2612	*@@ strhtxtfind:
2613	* searches for a case-insensitive text pattern in a string
2614	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2615	* pattern are null-terminated strings. Returns a pointer to the pattern
2616	* if found within the string, or NULL if the pattern was not found.
2617	* Will match strings irrespective of case. To match exact strings, use
2618	* strhfind(). Will not work on multibyte characters.
2619	*
2620	* Examples:
2621	+ char *result;
2622	+
2623	+ result = strhtxtfind ("AbracaDabra", "cad");
2624	+ if (result)
2625	+ puts (result);
2626	+
2627	* Taken from the "Standard Function Library", file sflfind.c.
2628	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2629	* Slightly modified.
2630	*
2631	*@@added V0.9.3 (2000-05-08) [umoeller]
2632	*/
2633
2634	char* strhtxtfind (const char *string, // String containing data
2635	const char *pattern) // Pattern to search for
2636	{
2637	size_t
2638	shift [256]; // Shift distance for each value
2639	size_t
2640	string_size,
2641	pattern_size,
2642	byte_nbr, // Index into byte array
2643	match_size; // Size of matched part
2644	const char
2645	*match_base = NULL, // Base of match of pattern
2646	*match_ptr = NULL, // Point within current match
2647	*limit = NULL; // Last potiental match point
2648
2649	ASSERT (string); // Expect non-NULL pointers, but
2650	ASSERT (pattern); // fail gracefully if not debugging
2651	if (string == NULL \|\| pattern == NULL)
2652	return (NULL);
2653
2654	string_size = strlen (string);
2655	pattern_size = strlen (pattern);
2656
2657	// Pattern must be smaller or equal in size to string
2658	if (string_size < pattern_size)
2659	return (NULL); // Otherwise it cannot be found
2660
2661	if (pattern_size == 0) // Empty string matches at start
2662	return (char *) string;
2663
2664	// Build the shift table
2665
2666	// The shift table determines how far to shift before trying to match
2667	// again, if a match at this point fails. If the byte after where the
2668	// end of our pattern falls is not in our pattern, then we start to
2669	// match again after that byte; otherwise we line up the last occurence
2670	// of that byte in our pattern under that byte, and try match again.
2671
2672	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2673	shift [byte_nbr] = pattern_size + 1;
2674
2675	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2676	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2677
2678	// Search for the string. If we don't find a match, move up by the
2679	// amount we computed in the shift table above, to find location of
2680	// the next potiental match.
2681
2682	limit = string + (string_size - pattern_size + 1);
2683	ASSERT (limit > string);
2684
2685	for (match_base = string;
2686	match_base < limit;
2687	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2688	{
2689	match_ptr = match_base;
2690	match_size = 0;
2691
2692	// Compare pattern until it all matches, or we find a difference
2693	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2694	{
2695	ASSERT (match_size <= pattern_size &&
2696	match_ptr == (match_base + match_size));
2697
2698	// If we found a match, return the start address
2699	if (match_size >= pattern_size)
2700	return ((char *)(match_base));
2701	}
2702	}
2703	return (NULL); // Found nothing
2704	}
2705

Note: See TracBrowser for help on using the repository browser.

Download in other formats: