Context Navigation

source: trunk/src/helpers/stringh.c@ 21

Visit:

Last change on this file since 21 was 21, checked in by umoeller, 25 years ago
Final changes for 0.9.7, i hope...
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 84.5 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#include <os2.h>
45
46	#include <stdlib.h>
47	#include <stdio.h>
48	#include <string.h>
49	#include <ctype.h>
50	#include <math.h>
51
52	#include "setup.h" // code generation and debugging options
53
54	#include "helpers\stringh.h"
55	#include "helpers\xstring.h" // extended string helpers
56
57	#pragma hdrstop
58
59	/*
60	*@@category: Helpers\C helpers\String management
61	* See stringh.c and xstring.c.
62	*/
63
64	/*
65	*@@category: Helpers\C helpers\String management\C string helpers
66	* See stringh.c.
67	*/
68
69	/*
70	*@@ strhdup:
71	* like strdup, but this one
72	* doesn't crash if pszSource is NULL,
73	* but returns NULL also.
74	*
75	*@@added V0.9.0 [umoeller]
76	*/
77
78	PSZ strhdup(const char *pszSource)
79	{
80	if (pszSource)
81	return (strdup(pszSource));
82	else
83	return (0);
84	}
85
86	/*
87	*@@ strhistr:
88	* like strstr, but case-insensitive.
89	*
90	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
91	*/
92
93	PSZ strhistr(const char string1, const char string2)
94	{
95	PSZ prc = NULL;
96
97	if ((string1) && (string2))
98	{
99	PSZ pszSrchIn = strdup(string1);
100	PSZ pszSrchFor = strdup(string2);
101
102	if ((pszSrchIn) && (pszSrchFor))
103	{
104	strupr(pszSrchIn);
105	strupr(pszSrchFor);
106
107	prc = strstr(pszSrchIn, pszSrchFor);
108	if (prc)
109	{
110	// prc now has the first occurence of the string,
111	// but in pszSrchIn; we need to map this
112	// return value to the original string
113	prc = (prc-pszSrchIn) // offset in pszSrchIn
114	+ (PSZ)string1;
115	}
116	}
117	if (pszSrchFor)
118	free(pszSrchFor);
119	if (pszSrchIn)
120	free(pszSrchIn);
121	}
122	return (prc);
123	}
124
125	/*
126	*@@ strhncpy0:
127	* like strncpy, but always appends a 0 character.
128	*/
129
130	ULONG strhncpy0(PSZ pszTarget,
131	const char *pszSource,
132	ULONG cbSource)
133	{
134	ULONG ul = 0;
135	PSZ pTarget = pszTarget,
136	pSource = (PSZ)pszSource;
137
138	for (ul = 0; ul < cbSource; ul++)
139	if (*pSource)
140	pTarget++ = pSource++;
141	else
142	break;
143	*pTarget = 0;
144
145	return (ul);
146	}
147
148	/*
149	* strhCount:
150	* this counts the occurences of c in pszSearch.
151	*/
152
153	ULONG strhCount(const char *pszSearch,
154	CHAR c)
155	{
156	PSZ p = (PSZ)pszSearch;
157	ULONG ulCount = 0;
158	while (TRUE)
159	{
160	p = strchr(p, c);
161	if (p)
162	{
163	ulCount++;
164	p++;
165	}
166	else
167	break;
168	}
169	return (ulCount);
170	}
171
172	/*
173	*@@ strhIsDecimal:
174	* returns TRUE if psz consists of decimal digits only.
175	*/
176
177	BOOL strhIsDecimal(PSZ psz)
178	{
179	PSZ p = psz;
180	while (*p != 0)
181	{
182	if (isdigit(*p) == 0)
183	return (FALSE);
184	p++;
185	}
186
187	return (TRUE);
188	}
189
190	/*
191	*@@ strhSubstr:
192	* this creates a new PSZ containing the string
193	* from pBegin to pEnd, excluding the pEnd character.
194	* The new string is null-terminated. The caller
195	* must free() the new string after use.
196	*
197	* Example:
198	+ "1234567890"
199	+ ^ ^
200	+ p1 p2
201	+ strhSubstr(p1, p2)
202	* would return a new string containing "2345678".
203	*/
204
205	PSZ strhSubstr(const char pBegin, const char pEnd)
206	{
207	ULONG cbSubstr = (pEnd - pBegin);
208	PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
209	strhncpy0(pszSubstr, pBegin, cbSubstr);
210	return (pszSubstr);
211	}
212
213	/*
214	*@@ strhExtract:
215	* searches pszBuf for the cOpen character and returns
216	* the data in between cOpen and cClose, excluding
217	* those two characters, in a newly allocated buffer
218	* which you must free() afterwards.
219	*
220	* Spaces and newlines/linefeeds are skipped.
221	*
222	* If the search was successful, the new buffer
223	* is returned and, if (ppEnd != NULL), *ppEnd points
224	* to the first character after the cClose character
225	* found in the buffer.
226	*
227	* If the search was not successful, NULL is
228	* returned, and *ppEnd is unchanged.
229	*
230	* If another cOpen character is found before
231	* cClose, matching cClose characters will be skipped.
232	* You can therefore nest the cOpen and cClose
233	* characters.
234	*
235	* This function ignores cOpen and cClose characters
236	* in C-style comments and strings surrounded by
237	* double quotes.
238	*
239	* Example:
240	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
241	+ pEnd;
242	+ strhExtract(pszBuf,
243	+ '{', '}',
244	+ &pEnd)
245	* would return a new buffer containing " --blah-- ",
246	* and ppEnd would afterwards point to the space
247	* before "next" in the static buffer.
248	*
249	*@@added V0.9.0 [umoeller]
250	*/
251
252	PSZ strhExtract(PSZ pszBuf, // in: search buffer
253	CHAR cOpen, // in: opening char
254	CHAR cClose, // in: closing char
255	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
256	{
257	PSZ pszReturn = NULL;
258
259	if (pszBuf)
260	{
261	PSZ pOpen = strchr(pszBuf, cOpen);
262	if (pOpen)
263	{
264	// opening char found:
265	// now go thru the whole rest of the buffer
266	PSZ p = pOpen+1;
267	LONG lLevel = 1; // if this goes 0, we're done
268	while (*p)
269	{
270	if (*p == cOpen)
271	lLevel++;
272	else if (*p == cClose)
273	{
274	lLevel--;
275	if (lLevel <= 0)
276	{
277	// matching closing bracket found:
278	// extract string
279	pszReturn = strhSubstr(pOpen+1, // after cOpen
280	p); // excluding cClose
281	if (ppEnd)
282	*ppEnd = p+1;
283	break; // while (*p)
284	}
285	}
286	else if (*p == '\"')
287	{
288	// beginning of string:
289	PSZ p2 = p+1;
290	// find end of string
291	while ((p2) && (p2 != '\"'))
292	p2++;
293
294	if (*p2 == '\"')
295	// closing quote found:
296	// search on after that
297	p = p2; // raised below
298	else
299	break; // while (*p)
300	}
301
302	p++;
303	}
304	}
305	}
306
307	return (pszReturn);
308	}
309
310	/*
311	*@@ strhQuote:
312	* similar to strhExtract, except that
313	* opening and closing chars are the same,
314	* and therefore no nesting is possible.
315	* Useful for extracting stuff between
316	* quotes.
317	*
318	*@@added V0.9.0 [umoeller]
319	*/
320
321	PSZ strhQuote(PSZ pszBuf,
322	CHAR cQuote,
323	PSZ *ppEnd)
324	{
325	PSZ pszReturn = NULL,
326	p1 = NULL;
327	if ((p1 = strchr(pszBuf, cQuote)))
328	{
329	PSZ p2 = strchr(p1+1, cQuote);
330	if (p2)
331	{
332	pszReturn = strhSubstr(p1+1, p2);
333	if (ppEnd)
334	// store closing char
335	*ppEnd = p2 + 1;
336	}
337	}
338
339	return (pszReturn);
340	}
341
342	/*
343	*@@ strhStrip:
344	* removes all double spaces.
345	* This copies within the "psz" buffer.
346	* If any double spaces are found, the
347	* string will be shorter than before,
348	* but the buffer is _not_ reallocated,
349	* so there will be unused bytes at the
350	* end.
351	*
352	* Returns the number of spaces removed.
353	*
354	*@@added V0.9.0 [umoeller]
355	*/
356
357	ULONG strhStrip(PSZ psz) // in/out: string
358	{
359	PSZ p;
360	ULONG cb = strlen(psz),
361	ulrc = 0;
362
363	for (p = psz; p < psz+cb; p++)
364	{
365	if ((p == ' ') && ((p+1) == ' '))
366	{
367	PSZ p2 = p;
368	while (*p2)
369	{
370	p2 = (p2+1);
371	p2++;
372	}
373	cb--;
374	p--;
375	ulrc++;
376	}
377	}
378	return (ulrc);
379	}
380
381	/*
382	*@@ strhins:
383	* this inserts one string into another.
384	*
385	* pszInsert is inserted into pszBuffer at offset
386	* ulInsertOfs (which counts from 0).
387	*
388	* A newly allocated string is returned. pszBuffer is
389	* not changed. The new string should be free()'d after
390	* use.
391	*
392	* Upon errors, NULL is returned.
393	*
394	*@@changed V0.9.0 [umoeller]: completely rewritten.
395	*/
396
397	PSZ strhins(const char *pcszBuffer,
398	ULONG ulInsertOfs,
399	const char *pcszInsert)
400	{
401	PSZ pszNew = NULL;
402
403	if ((pcszBuffer) && (pcszInsert))
404	{
405	do {
406	ULONG cbBuffer = strlen(pcszBuffer);
407	ULONG cbInsert = strlen(pcszInsert);
408
409	// check string length
410	if (ulInsertOfs > cbBuffer + 1)
411	break; // do
412
413	// OK, let's go.
414	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
415
416	// copy stuff before pInsertPos
417	memcpy(pszNew,
418	pcszBuffer,
419	ulInsertOfs);
420	// copy string to be inserted
421	memcpy(pszNew + ulInsertOfs,
422	pcszInsert,
423	cbInsert);
424	// copy stuff after pInsertPos
425	strcpy(pszNew + ulInsertOfs + cbInsert,
426	pcszBuffer + ulInsertOfs);
427	} while (FALSE);
428	}
429
430	return (pszNew);
431	}
432
433	/*
434	*@@ strhrpl:
435	* wrapper around xstrrpl to work with C strings.
436	* Note that *ppszBuf can get reallocated and must
437	* be free()'able.
438	*
439	* Repetitive use of this wrapper is not recommended
440	* because it is considerably slower than xstrrpl.
441	*
442	*@@added V0.9.6 (2000-11-01) [umoeller]
443	*/
444
445	ULONG strhrpl(PSZ *ppszBuf, // in/out: string
446	PULONG pulOfs, // in: where to begin search (0 = start);
447	// out: ofs of first char after replacement string
448	const char *pcszSearch, // in: search string; cannot be NULL
449	const char *pcszReplace) // in: replacement string; cannot be NULL
450	{
451	ULONG ulrc = 0;
452	XSTRING xstrBuf,
453	xstrFind,
454	xstrReplace;
455	size_t ShiftTable[256];
456	BOOL fRepeat = FALSE;
457	xstrInit(&xstrBuf, 0);
458	xstrset(&xstrBuf, *ppszBuf);
459	xstrInit(&xstrFind, 0);
460	xstrset(&xstrFind, (PSZ)pcszSearch);
461	xstrInit(&xstrReplace, 0);
462	xstrset(&xstrReplace, (PSZ)pcszReplace);
463
464	if ((ulrc = xstrrpl(&xstrBuf,
465	pulOfs,
466	&xstrFind,
467	&xstrReplace,
468	ShiftTable,
469	&fRepeat)))
470	// replaced:
471	*ppszBuf = xstrBuf.psz;
472
473	return (ulrc);
474	}
475
476	/*
477	* strhWords:
478	* returns the no. of words in "psz".
479	* A string is considered a "word" if
480	* it is surrounded by spaces only.
481	*
482	*@@added V0.9.0 [umoeller]
483	*/
484
485	ULONG strhWords(PSZ psz)
486	{
487	PSZ p;
488	ULONG cb = strlen(psz),
489	ulWords = 0;
490	if (cb > 1)
491	{
492	ulWords = 1;
493	for (p = psz; p < psz+cb; p++)
494	if (*p == ' ')
495	ulWords++;
496	}
497	return (ulWords);
498	}
499
500	/*
501	*@@ strhThousandsULong:
502	* converts a ULONG into a decimal string, while
503	* inserting thousands separators into it. Specify
504	* the separator character in cThousands.
505	*
506	* Returns pszTarget so you can use it directly
507	* with sprintf and the "%s" flag.
508	*
509	* For cThousands, you should use the data in
510	* OS2.INI ("PM_National" application), which is
511	* always set according to the "Country" object.
512	* You can use prfhQueryCountrySettings to
513	* retrieve this setting.
514	*
515	* Use strhThousandsDouble for "double" values.
516	*/
517
518	PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
519	ULONG ul, // in: decimal to convert
520	CHAR cThousands) // in: separator char (e.g. '.')
521	{
522	USHORT ust, uss, usc;
523	CHAR szTemp[40];
524	sprintf(szTemp, "%lu", ul);
525
526	ust = 0;
527	usc = strlen(szTemp);
528	for (uss = 0; uss < usc; uss++)
529	{
530	if (uss)
531	if (((usc - uss) % 3) == 0)
532	{
533	pszTarget[ust] = cThousands;
534	ust++;
535	}
536	pszTarget[ust] = szTemp[uss];
537	ust++;
538	}
539	pszTarget[ust] = '\0';
540
541	return (pszTarget);
542	}
543
544	/*
545	*@@ strhThousandsDouble:
546	* like strhThousandsULong, but for a "double"
547	* value. Note that after-comma values are truncated.
548	*/
549
550	PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
551	{
552	USHORT ust, uss, usc;
553	CHAR szTemp[40];
554	sprintf(szTemp, "%.0f", floor(dbl));
555
556	ust = 0;
557	usc = strlen(szTemp);
558	for (uss = 0; uss < usc; uss++)
559	{
560	if (uss)
561	if (((usc - uss) % 3) == 0)
562	{
563	pszTarget[ust] = cThousands;
564	ust++;
565	}
566	pszTarget[ust] = szTemp[uss];
567	ust++;
568	}
569	pszTarget[ust] = '\0';
570
571	return (pszTarget);
572	}
573
574	/*
575	*@@ strhVariableDouble:
576	* like strhThousandsULong, but for a "double" value, and
577	* with a variable number of decimal places depending on the
578	* size of the quantity.
579	*
580	*@@added V0.9.6 (2000-11-12) [pr]
581	*/
582
583	PSZ strhVariableDouble(PSZ pszTarget,
584	double dbl,
585	PSZ pszUnits,
586	CHAR cThousands)
587	{
588	if (dbl < 100.0)
589	sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
590	else
591	if (dbl < 1000.0)
592	sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
593	else
594	strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
595	pszUnits);
596
597	return(pszTarget);
598	}
599
600	/*
601	*@@ strhFileDate:
602	* converts file date data to a string (to pszBuf).
603	* You can pass any FDATE structure to this function,
604	* which are returned in those FILEFINDBUF* or
605	* FILESTATUS* structs by the Dos* functions.
606	*
607	* ulDateFormat is the PM setting for the date format,
608	* as set in the "Country" object, and can be queried using
609	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
610	*
611	* meaning:
612	* -- 0 mm.dd.yyyy (English)
613	* -- 1 dd.mm.yyyy (e.g. German)
614	* -- 2 yyyy.mm.dd (Japanese, ISO)
615	* -- 3 yyyy.dd.mm
616	*
617	* cDateSep is used as a date separator (e.g. '.').
618	* This can be queried using:
619	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
620	*
621	* Alternatively, you can query all the country settings
622	* at once using prfhQueryCountrySettings (prfh.c).
623	*
624	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
625	*/
626
627	VOID strhFileDate(PSZ pszBuf, // out: string returned
628	FDATE *pfDate, // in: date information
629	ULONG ulDateFormat, // in: date format (0-3)
630	CHAR cDateSep) // in: date separator (e.g. '.')
631	{
632	DATETIME dt;
633	dt.day = pfDate->day;
634	dt.month = pfDate->month;
635	dt.year = pfDate->year + 1980;
636
637	strhDateTime(pszBuf,
638	NULL, // no time
639	&dt,
640	ulDateFormat,
641	cDateSep,
642	0, 0); // no time
643	}
644
645	/*
646	*@@ strhFileTime:
647	* converts file time data to a string (to pszBuf).
648	* You can pass any FTIME structure to this function,
649	* which are returned in those FILEFINDBUF* or
650	* FILESTATUS* structs by the Dos* functions.
651	*
652	* ulTimeFormat is the PM setting for the time format,
653	* as set in the "Country" object, and can be queried using
654	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
655	* meaning:
656	* -- 0 12-hour clock
657	* -- >0 24-hour clock
658	*
659	* cDateSep is used as a time separator (e.g. ':').
660	* This can be queried using:
661	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
662	*
663	* Alternatively, you can query all the country settings
664	* at once using prfhQueryCountrySettings (prfh.c).
665	*
666	*@@changed 99-03-15 fixed 12-hour crash
667	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
668	*/
669
670	VOID strhFileTime(PSZ pszBuf, // out: string returned
671	FTIME *pfTime, // in: time information
672	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
673	CHAR cTimeSep) // in: time separator (e.g. ':')
674	{
675	DATETIME dt;
676	dt.hours = pfTime->hours;
677	dt.minutes = pfTime->minutes;
678	dt.seconds = pfTime->twosecs * 2;
679
680	strhDateTime(NULL, // no date
681	pszBuf,
682	&dt,
683	0, 0, // no date
684	ulTimeFormat,
685	cTimeSep);
686	}
687
688	/*
689	*@@ strhDateTime:
690	* converts Control Program DATETIME info
691	* into two strings. See strhFileDate and strhFileTime
692	* for more detailed parameter descriptions.
693	*
694	*@@added V0.9.0 (99-11-07) [umoeller]
695	*/
696
697	VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
698	PSZ pszTime, // out: time string returned (can be NULL)
699	DATETIME *pDateTime, // in: date/time information
700	ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
701	CHAR cDateSep, // in: date separator (e.g. '.')
702	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
703	CHAR cTimeSep) // in: time separator (e.g. ':')
704	{
705	if (pszDate)
706	{
707	switch (ulDateFormat)
708	{
709	case 0: // mm.dd.yyyy (English)
710	sprintf(pszDate, "%02d%c%02d%c%04d",
711	pDateTime->month,
712	cDateSep,
713	pDateTime->day,
714	cDateSep,
715	pDateTime->year);
716	break;
717
718	case 1: // dd.mm.yyyy (e.g. German)
719	sprintf(pszDate, "%02d%c%02d%c%04d",
720	pDateTime->day,
721	cDateSep,
722	pDateTime->month,
723	cDateSep,
724	pDateTime->year);
725	break;
726
727	case 2: // yyyy.mm.dd (Japanese)
728	sprintf(pszDate, "%04d%c%02d%c%02d",
729	pDateTime->year,
730	cDateSep,
731	pDateTime->month,
732	cDateSep,
733	pDateTime->day);
734	break;
735
736	default: // yyyy.dd.mm
737	sprintf(pszDate, "%04d%c%02d%c%02d",
738	pDateTime->year,
739	cDateSep,
740	pDateTime->day,
741	cDateSep,
742	pDateTime->month);
743	break;
744	}
745	}
746
747	if (pszTime)
748	{
749	if (ulTimeFormat == 0)
750	{
751	// for 12-hour clock, we need additional INI data
752	CHAR szAMPM[10] = "err";
753
754	if (pDateTime->hours > 12)
755	{
756	// > 12h: PM.
757
758	// Note: 12:xx noon is 12 AM, not PM (even though
759	// AM stands for "ante meridiam", but English is just
760	// not logical), so that's handled below.
761
762	PrfQueryProfileString(HINI_USER,
763	"PM_National",
764	"s2359", // key
765	"PM", // default
766	szAMPM, sizeof(szAMPM)-1);
767	sprintf(pszTime, "%02d%c%02d%c%02d %s",
768	// leave 12 == 12 (not 0)
769	pDateTime->hours % 12,
770	cTimeSep,
771	pDateTime->minutes,
772	cTimeSep,
773	pDateTime->seconds,
774	szAMPM);
775	}
776	else
777	{
778	// <= 12h: AM
779	PrfQueryProfileString(HINI_USER,
780	"PM_National",
781	"s1159", // key
782	"AM", // default
783	szAMPM, sizeof(szAMPM)-1);
784	sprintf(pszTime, "%02d%c%02d%c%02d %s",
785	pDateTime->hours,
786	cTimeSep,
787	pDateTime->minutes,
788	cTimeSep,
789	pDateTime->seconds,
790	szAMPM);
791	}
792	}
793	else
794	// 24-hour clock
795	sprintf(pszTime, "%02d%c%02d%c%02d",
796	pDateTime->hours,
797	cTimeSep,
798	pDateTime->minutes,
799	cTimeSep,
800	pDateTime->seconds);
801	}
802	}
803
804	/*
805	*@@ strhGetWord:
806	* finds word boundaries.
807	*
808	* *ppszStart is used as the beginning of the
809	* search.
810	*
811	* If a word is found, *ppszStart is set to
812	* the first character of the word which was
813	* found and *ppszEnd receives the address
814	* of the first character _after_ the word,
815	* which is probably a space or a \n or \r char.
816	* We then return TRUE.
817	*
818	* The search is stopped if a null character
819	* is found or pLimit is reached. In that case,
820	* FALSE is returned.
821	*
822	*@@added V0.9.1 (2000-02-13) [umoeller]
823	*/
824
825	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
826	// out: start of word (if TRUE is returned)
827	const char pLimit, // in: ptr to last char after ppszStart to be
828	// searched; if the word does not end before
829	// or with this char, FALSE is returned
830	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
831	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
832	PSZ *ppszEnd) // out: first char _after_ word
833	// (if TRUE is returned)
834	{
835	// characters after which a word can be started
836	// const char *pcszBeginChars = "\x0d\x0a ";
837	// const char *pcszEndChars = "\x0d\x0a /-";
838
839	PSZ pStart = *ppszStart;
840
841	// find start of word
842	while ( (pStart < (PSZ)pLimit)
843	&& (strchr(pcszBeginChars, *pStart))
844	)
845	// if char is a "before word" char: go for next
846	pStart++;
847
848	if (pStart < (PSZ)pLimit)
849	{
850	// found a valid "word start" character
851	// (which is not in pcszBeginChars):
852
853	// find end of word
854	PSZ pEndOfWord = pStart;
855	while ( (pEndOfWord <= (PSZ)pLimit)
856	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
857	)
858	// if char is not an "end word" char: go for next
859	pEndOfWord++;
860
861	if (pEndOfWord <= (PSZ)pLimit)
862	{
863	// whoa, got a word:
864	*ppszStart = pStart;
865	*ppszEnd = pEndOfWord;
866	return (TRUE);
867	}
868	}
869
870	return (FALSE);
871	}
872
873	/*
874	*@@ strhIsWord:
875	* returns TRUE if p points to a "word"
876	* in pcszBuf.
877	*
878	* p is considered a word if the character _before_
879	* it is in pcszBeginChars and the char _after_
880	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
881	*
882	*@@added V0.9.6 (2000-11-12) [umoeller]
883	*/
884
885	BOOL strhIsWord(const char *pcszBuf,
886	const char *p, // in: start of word
887	ULONG cbSearch, // in: length of word
888	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
889	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
890	{
891	BOOL fEndOK = FALSE;
892
893	// check previous char
894	if ( (p == pcszBuf)
895	\|\| (strchr(pcszBeginChars, *(p-1)))
896	)
897	{
898	// OK, valid begin char:
899	// check end char
900	CHAR cNextChar = *(p + cbSearch);
901	if (cNextChar == 0)
902	fEndOK = TRUE;
903	else
904	{
905	char *pc = strchr(pcszEndChars, cNextChar);
906	if (pc)
907	// OK, is end char: avoid doubles of that char,
908	// but allow spaces
909	if ( (cNextChar+1 != *pc)
910	\|\| (cNextChar+1 == ' ')
911	\|\| (cNextChar+1 == 0)
912	)
913	fEndOK = TRUE;
914	}
915	}
916
917	return (fEndOK);
918	}
919
920	/*
921	*@@ strhFindWord:
922	* searches for pszSearch in pszBuf, which is
923	* returned if found (or NULL if not).
924	*
925	* As opposed to strstr, this finds pszSearch
926	* only if it is a "word". A search string is
927	* considered a word if the character _before_
928	* it is in pcszBeginChars and the char _after_
929	* it is in pcszEndChars.
930	*
931	* Example:
932	+ strhFindWord("This is an example.", "is");
933	+ returns ...........^ this, but not the "is" in "This".
934	*
935	* The algorithm here uses strstr to find pszSearch in pszBuf
936	* and performs additional "is-word" checks for each item found
937	* (by calling strhIsWord).
938	*
939	* Note that this function is fairly slow compared to xstrFindWord.
940	*
941	*@@added V0.9.0 (99-11-08) [umoeller]
942	*@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
943	*/
944
945	PSZ strhFindWord(const char *pszBuf,
946	const char *pszSearch,
947	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
948	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
949	{
950	PSZ pszReturn = 0;
951	ULONG cbBuf = strlen(pszBuf),
952	cbSearch = strlen(pszSearch);
953
954	if ((cbBuf) && (cbSearch))
955	{
956	const char *p = pszBuf;
957
958	do // while p
959	{
960	p = strstr(p, pszSearch);
961	if (p)
962	{
963	// string found:
964	// check if that's a word
965
966	if (strhIsWord(pszBuf,
967	p,
968	cbSearch,
969	pcszBeginChars,
970	pcszEndChars))
971	{
972	// valid end char:
973	pszReturn = (PSZ)p;
974	break;
975	}
976
977	p += cbSearch;
978	}
979	} while (p);
980
981	}
982	return (pszReturn);
983	}
984
985	/*
986	*@@ strhFindEOL:
987	* returns a pointer to the next \r, \n or null character
988	* following pszSearchIn. Stores the offset in *pulOffset.
989	*
990	* This should never return NULL because at some point,
991	* there will be a null byte in your string.
992	*
993	*@@added V0.9.4 (2000-07-01) [umoeller]
994	*/
995
996	PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
997	PULONG pulOffset) // out: offset (ptr can be NULL)
998	{
999	const char *p = pcszSearchIn,
1000	*prc = 0;
1001	while (TRUE)
1002	{
1003	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1004	{
1005	prc = p;
1006	break;
1007	}
1008	p++;
1009	}
1010
1011	if (pulOffset)
1012	*pulOffset = prc - pcszSearchIn;
1013
1014	return ((PSZ)prc);
1015	}
1016
1017	/*
1018	*@@ strhFindNextLine:
1019	* like strhFindEOL, but this returns the character
1020	* _after_ \r or \n. Note that this might return
1021	* a pointer to terminating NULL character also.
1022	*/
1023
1024	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1025	{
1026	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1027	// pEOL now points to the \r char or the terminating 0 byte;
1028	// if not null byte, advance pointer
1029	PSZ pNextLine = pEOL;
1030	if (*pNextLine == '\r')
1031	pNextLine++;
1032	if (*pNextLine == '\n')
1033	pNextLine++;
1034	if (pulOffset)
1035	*pulOffset = pNextLine - pszSearchIn;
1036	return (pNextLine);
1037	}
1038
1039	/*
1040	*@@ strhFindKey:
1041	* finds pszKey in pszSearchIn; similar to strhistr,
1042	* but this one makes sure the key is at the beginning
1043	* of a line. Spaces before the key are tolerated.
1044	* Returns NULL if the key was not found.
1045	*
1046	* Used by strhGetParameter/strhSetParameter; useful
1047	* for analyzing CONFIG.SYS settings.
1048	*
1049	*@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1050	*@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1051	*/
1052
1053	PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1054	const char *pcszKey, // in: key to search for
1055	PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1056	// can be NULL if not needed
1057	{
1058	const char *p = NULL;
1059	PSZ pReturn = NULL;
1060	// BOOL fFound = FALSE;
1061
1062	p = pcszSearchIn;
1063	do {
1064	p = strhistr(p, pcszKey);
1065
1066	if ((p) && (p >= pcszSearchIn))
1067	{
1068	// make sure the key is at the beginning of a line
1069	// by going backwards until we find a char != " "
1070	const char *p2 = p;
1071	while ( (*p2 == ' ')
1072	&& (p2 > pcszSearchIn)
1073	)
1074	p2--;
1075
1076	// if previous char is an EOL sign, go on
1077	if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1078	\|\| (*(p2-1) == '\r')
1079	\|\| (*(p2-1) == '\n')
1080	)
1081	{
1082	// now check whether the char after the search
1083	// is a "=" char
1084	// ULONG cbKey = strlen(pszKey);
1085
1086	// tolerate spaces before "="
1087	/* PSZ p3 = p;
1088	while (*(p3+cbKey) == ' ')
1089	p3++;
1090
1091	if ((p3+cbKey) == '=') /
1092	{
1093	// found:
1094	pReturn = (PSZ)p; // go on, p contains found key
1095
1096	// test for all upper case?
1097	if (pfIsAllUpperCase)
1098	{
1099	ULONG cbKey2 = strlen(pcszKey),
1100	ul = 0;
1101	*pfIsAllUpperCase = TRUE;
1102	for (ul = 0; ul < cbKey2; ul++)
1103	if (islower(*(p+ul)))
1104	{
1105	*pfIsAllUpperCase = FALSE;
1106	break; // for
1107	}
1108	}
1109
1110	break; // do
1111	}
1112	} // else search next key
1113
1114	p++; // search on after this key
1115	}
1116	} while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1117
1118	return (pReturn);
1119	}
1120
1121	/*
1122	*@@ strhGetParameter:
1123	* searches pszSearchIn for the key pszKey; if found, it
1124	* returns a pointer to the following characters in pszSearchIn
1125	* and, if pszCopyTo != NULL, copies the rest of the line to
1126	* that buffer, of which cbCopyTo specified the size.
1127	*
1128	* If the key is not found, NULL is returned.
1129	* String search is done by calling strhFindKey.
1130	* This is useful for querying CONFIG.SYS settings.
1131	*
1132	* <B>Example:</B>
1133	*
1134	* this would return "YES" if you searched for "PAUSEONERROR=",
1135	* and "PAUSEONERROR=YES" existed in pszSearchIn.
1136	*/
1137
1138	PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1139	const char *pcszKey, // in: key to search for
1140	PSZ pszCopyTo, // out: key value
1141	ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1142	{
1143	PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1144	prc = NULL;
1145	if (p)
1146	{
1147	prc = p + strlen(pcszKey);
1148	if (pszCopyTo)
1149	// copy to pszCopyTo
1150	{
1151	ULONG cb;
1152	PSZ pEOL = strhFindEOL(prc, &cb);
1153	if (pEOL)
1154	{
1155	if (cb > cbCopyTo)
1156	cb = cbCopyTo-1;
1157	strhncpy0(pszCopyTo, prc, cb);
1158	}
1159	}
1160	}
1161
1162	return (prc);
1163	}
1164
1165	/*
1166	*@@ strhSetParameter:
1167	* searches *ppszBuf for the key pszKey; if found, it
1168	* replaces the characters following this key up to the
1169	* end of the line with pszParam. If pszKey is not found in
1170	* *ppszBuf, it is appended to the file in a new line.
1171	*
1172	* If any changes are made, *ppszBuf is re-allocated.
1173	*
1174	* This function searches w/out case sensitivity.
1175	*
1176	* Returns a pointer to the new parameter inside the buffer.
1177	*
1178	@@changed V0.9.0 [umoeller]: changed function prototype to PSZ ppszSearchIn
1179	*/
1180
1181	PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1182	const char *pcszKey, // in: key to search for
1183	PSZ pszNewParam, // in: new parameter to set for key
1184	BOOL fRespectCase) // in: if TRUE, pszNewParam will
1185	// be converted to upper case if the found key is
1186	// in upper case also. pszNewParam should be in
1187	// lower case if you use this.
1188	{
1189	BOOL fIsAllUpperCase = FALSE;
1190	PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1191	prc = NULL;
1192
1193	if (pKey)
1194	{
1195	// key found in file:
1196	// replace existing parameter
1197	PSZ pOldParam = pKey + strlen(pcszKey);
1198
1199	prc = pOldParam;
1200	// pOldParam now has the old parameter, which we
1201	// will overwrite now
1202
1203	if (pOldParam)
1204	{
1205	ULONG cbOldParam;
1206	PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1207	// pEOL now has first end-of-line after the parameter
1208
1209	if (pEOL)
1210	{
1211	XSTRING strBuf;
1212	ULONG ulOfs = 0;
1213
1214	PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1215	strncpy(pszOldCopy, pOldParam, cbOldParam);
1216	pszOldCopy[cbOldParam] = '\0';
1217
1218	xstrInit(&strBuf, 0);
1219	xstrset(&strBuf, *ppszBuf); // this must not be freed!
1220	/* xstrInit(&strFind, 0);
1221	xstrset(&strFind, pszOldCopy); // this must not be freed!
1222	xstrInit(&strReplace, 0);
1223	xstrset(&strReplace, pszNewParam); // this must not be freed!
1224	*/
1225
1226	// check for upper case desired?
1227	if (fRespectCase)
1228	if (fIsAllUpperCase)
1229	strupr(pszNewParam);
1230
1231	xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1232
1233	free(pszOldCopy);
1234
1235	*ppszBuf = strBuf.psz;
1236	}
1237	}
1238	}
1239	else
1240	{
1241	PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1242	+ strlen(pcszKey)
1243	+ strlen(pszNewParam)
1244	+ 5); // 2 * \r\n + null byte
1245	// key not found: append to end of file
1246	sprintf(pszNew, "%s\r\n%s%s\r\n",
1247	*ppszBuf, pcszKey, pszNewParam);
1248	free(*ppszBuf);
1249	*ppszBuf = pszNew;
1250	}
1251
1252	return (prc);
1253	}
1254
1255	/*
1256	*@@ strhDeleteLine:
1257	* this deletes the line in pszSearchIn which starts with
1258	* the key pszKey. Returns TRUE if the line was found and
1259	* deleted.
1260	*
1261	* This copies within pszSearchIn.
1262	*/
1263
1264	BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1265	PSZ pszKey) // in: key to find
1266	{
1267	BOOL fIsAllUpperCase = FALSE;
1268	PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1269	BOOL brc = FALSE;
1270
1271	if (pKey) {
1272	PSZ pEOL = strhFindEOL(pKey, NULL);
1273	// pEOL now has first end-of-line after the key
1274	if (pEOL)
1275	{
1276	// delete line by overwriting it with
1277	// the next line
1278	strcpy(pKey, pEOL+2);
1279	}
1280	else
1281	{
1282	// EOL not found: we must be at the end of the file
1283	*pKey = '\0';
1284	}
1285	brc = TRUE;
1286	}
1287
1288	return (brc);
1289	}
1290
1291	/*
1292	*@@ strhBeautifyTitle:
1293	* replaces all line breaks (0xd, 0xa) with spaces.
1294	*/
1295
1296	BOOL strhBeautifyTitle(PSZ psz)
1297	{
1298	BOOL rc = FALSE;
1299	CHAR *p;
1300	while ((p = strchr(psz, 0xa)))
1301	{
1302	*p = ' ';
1303	rc = TRUE;
1304	}
1305	while ((p = strchr(psz, 0xd)))
1306	{
1307	*p = ' ';
1308	rc = TRUE;
1309	}
1310	return (rc);
1311	}
1312
1313	/*
1314	* strhFindAttribValue:
1315	* searches for pszAttrib in pszSearchIn; if found,
1316	* returns the first character after the "=" char.
1317	* If "=" is not found, a space, \r, and \n are
1318	* also accepted. This function searches without
1319	* respecting case.
1320	*
1321	* <B>Example:</B>
1322	+ strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1323	+
1324	+ returns ....................... ^ this address.
1325	*
1326	*@@added V0.9.0 [umoeller]
1327	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1328	*/
1329
1330	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1331	{
1332	PSZ prc = 0;
1333	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1334	p,
1335	p2;
1336	ULONG cbAttrib = strlen(pszAttrib);
1337
1338	// 1) find space char
1339	while ((p = strchr(pszSearchIn2, ' ')))
1340	{
1341	CHAR c;
1342	p++;
1343	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1344	// now check whether the p+strlen(pszAttrib)
1345	// is a valid end-of-tag character
1346	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1347	&& ( (c == ' ')
1348	\|\| (c == '>')
1349	\|\| (c == '=')
1350	\|\| (c == '\r')
1351	\|\| (c == '\n')
1352	\|\| (c == 0)
1353	)
1354	)
1355	{
1356	// yes:
1357	CHAR c2;
1358	p2 = p + cbAttrib;
1359	c2 = *p2;
1360	while ( ( (c2 == ' ')
1361	\|\| (c2 == '=')
1362	\|\| (c2 == '\n')
1363	\|\| (c2 == '\r')
1364	)
1365	&& (c2 != 0)
1366	)
1367	c2 = *++p2;
1368	prc = p2;
1369	break; // first while
1370	}
1371	pszSearchIn2++;
1372	}
1373	return (prc);
1374	}
1375
1376	/*
1377	* strhGetNumAttribValue:
1378	* stores the numerical parameter value of an HTML-style
1379	* tag in *pl.
1380	*
1381	* Returns the address of the tag parameter in the
1382	* search buffer, if found, or NULL.
1383	*
1384	* <B>Example:</B>
1385	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1386	*
1387	* stores 123 in the "l" variable.
1388	*
1389	*@@added V0.9.0 [umoeller]
1390	*/
1391
1392	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1393	const char *pszTag, // e.g. "INDEX"
1394	PLONG pl) // out: numerical value
1395	{
1396	PSZ pParam;
1397	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1398	sscanf(pParam, "%ld", pl);
1399
1400	return (pParam);
1401	}
1402
1403	/*
1404	* strhGetTextAttr:
1405	* retrieves the attribute value of a textual HTML-style tag
1406	* in a newly allocated buffer, which is returned,
1407	* or NULL if attribute not found.
1408	* If an attribute value is to contain spaces, it
1409	* must be enclosed in quotes.
1410	*
1411	* The offset of the attribute data in pszSearchIn is
1412	* returned in *pulOffset so that you can do multiple
1413	* searches.
1414	*
1415	* This returns a new buffer, which should be free()'d after use.
1416	*
1417	* <B>Example:</B>
1418	+ ULONG ulOfs = 0;
1419	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1420	+ ............^ ulOfs
1421	*
1422	* returns a new string with the value "blublub" (without
1423	* quotes) and sets ulOfs to 12.
1424	*
1425	*@@added V0.9.0 [umoeller]
1426	*/
1427
1428	PSZ strhGetTextAttr(const char *pszSearchIn,
1429	const char *pszTag,
1430	PULONG pulOffset) // out: offset where found
1431	{
1432	PSZ pParam,
1433	pParam2,
1434	prc = NULL;
1435	ULONG ulCount = 0;
1436	LONG lNestingLevel = 0;
1437
1438	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1439	{
1440	// determine end character to search for: a space
1441	CHAR cEnd = ' ';
1442	if (*pParam == '\"')
1443	{
1444	// or, if the data is enclosed in quotes, a quote
1445	cEnd = '\"';
1446	pParam++;
1447	}
1448
1449	if (pulOffset)
1450	// store the offset
1451	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1452
1453	// now find end of attribute
1454	pParam2 = pParam;
1455	while (*pParam)
1456	{
1457	if (*pParam == cEnd)
1458	// end character found
1459	break;
1460	else if (*pParam == '<')
1461	// yet another opening tag found:
1462	// this is probably some "<" in the attributes
1463	lNestingLevel++;
1464	else if (*pParam == '>')
1465	{
1466	lNestingLevel--;
1467	if (lNestingLevel < 0)
1468	// end of tag found:
1469	break;
1470	}
1471	ulCount++;
1472	pParam++;
1473	}
1474
1475	// copy attribute to new buffer
1476	if (ulCount)
1477	{
1478	prc = (PSZ)malloc(ulCount+1);
1479	memcpy(prc, pParam2, ulCount);
1480	*(prc+ulCount) = 0;
1481	}
1482	}
1483	return (prc);
1484	}
1485
1486	/*
1487	* strhFindEndOfTag:
1488	* returns a pointer to the ">" char
1489	* which seems to terminate the tag beginning
1490	* after pszBeginOfTag.
1491	*
1492	* If additional "<" chars are found, we look
1493	* for additional ">" characters too.
1494	*
1495	* Note: You must pass the address of the opening
1496	* '<' character to this function.
1497	*
1498	* Example:
1499	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1500	+ strhFindEndOfTag(pszTest)
1501	+ returns.................................^ this.
1502	*
1503	*@@added V0.9.0 [umoeller]
1504	*/
1505
1506	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1507	{
1508	PSZ p = (PSZ)pszBeginOfTag,
1509	prc = NULL;
1510	LONG lNestingLevel = 0;
1511
1512	while (*p)
1513	{
1514	if (*p == '<')
1515	// another opening tag found:
1516	lNestingLevel++;
1517	else if (*p == '>')
1518	{
1519	// closing tag found:
1520	lNestingLevel--;
1521	if (lNestingLevel < 1)
1522	{
1523	// corresponding: return this
1524	prc = p;
1525	break;
1526	}
1527	}
1528	p++;
1529	}
1530
1531	return (prc);
1532	}
1533
1534	/*
1535	* strhGetBlock:
1536	* this complex function searches the given string
1537	* for a pair of opening/closing HTML-style tags.
1538	*
1539	* If found, this routine returns TRUE and does
1540	* the following:
1541	*
1542	* 1) allocate a new buffer, copy the text
1543	* enclosed by the opening/closing tags
1544	* into it and set *ppszBlock to that
1545	* buffer;
1546	*
1547	* 2) if the opening tag has any attributes,
1548	* allocate another buffer, copy the
1549	* attributes into it and set *ppszAttrs
1550	* to that buffer; if no attributes are
1551	* found, *ppszAttrs will be NULL;
1552	*
1553	* 3) set *pulOffset to the offset from the
1554	* beginning of *ppszSearchIn where the
1555	* opening tag was found;
1556	*
1557	* 4) advance *ppszSearchIn to after the
1558	* closing tag, so that you can do
1559	* multiple searches without finding the
1560	* same tags twice.
1561	*
1562	* All buffers should be freed using free().
1563	*
1564	* This returns the following:
1565	* -- 0: no error
1566	* -- 1: tag not found at all (doesn't have to be an error)
1567	* -- 2: begin tag found, but no corresponding end tag found. This
1568	* is a real error.
1569	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1570	*
1571	* <B>Example:</B>
1572	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1573	+ PSZ pszBlock, pszAttrs;
1574	+ ULONG ulOfs;
1575	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1576	*
1577	* would do the following:
1578	*
1579	* 1) set pszBlock to a new string containing "This is page 1."
1580	* without quotes;
1581	*
1582	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1583	*
1584	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1585	*
1586	* 4) pSearch would be advanced to point to the "More text"
1587	* string in the original buffer.
1588	*
1589	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1590	* for HTML parsing.
1591	*
1592	*@@added V0.9.0 [umoeller]
1593	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1594	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1595	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1596	*/
1597
1598	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1599	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1600	PSZ pszTag,
1601	PSZ *ppszBlock, // out: block enclosed by the tags
1602	PSZ *ppszAttribs, // out: attributes of the opening tag
1603	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1604	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1605	{
1606	ULONG ulrc = 1;
1607	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1608	pszSearch2 = pszBeginTag,
1609	pszClosingTag;
1610	ULONG cbTag = strlen(pszTag);
1611
1612	// go thru the block and check all tags if it's the
1613	// begin tag we're looking for
1614	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1615	{
1616	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1617	// yes: stop
1618	break;
1619	else
1620	pszBeginTag++;
1621	}
1622
1623	if (pszBeginTag)
1624	{
1625	// we found <TAG>:
1626	ULONG ulNestingLevel = 0;
1627
1628	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1629	// strchr(pszBeginTag, '>');
1630	if (pszEndOfBeginTag)
1631	{
1632	// does the caller want the attributes?
1633	if (ppszAttribs)
1634	{
1635	// yes: then copy them
1636	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1637	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1638	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1639	// add terminating 0
1640	*(pszAttrs + ulAttrLen) = 0;
1641
1642	*ppszAttribs = pszAttrs;
1643	}
1644
1645	// output offset of where we found the begin tag
1646	if (pulOfsBeginTag)
1647	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1648
1649	// now find corresponding closing tag (e.g. "</BODY>"
1650	pszBeginTag = pszEndOfBeginTag+1;
1651	// now we're behind the '>' char of the opening tag
1652	// increase offset of that too
1653	if (pulOfsBeginBlock)
1654	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1655
1656	// find next closing tag;
1657	// for the first run, pszSearch2 points to right
1658	// after the '>' char of the opening tag
1659	pszSearch2 = pszBeginTag;
1660	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1661	&& (pszClosingTag = strstr(pszSearch2, "<"))
1662	)
1663	{
1664	// if we have another opening tag before our closing
1665	// tag, we need to have several closing tags before
1666	// we're done
1667	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1668	ulNestingLevel++;
1669	else
1670	{
1671	// is this ours?
1672	if ( (*(pszClosingTag+1) == '/')
1673	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1674	)
1675	{
1676	// we've found a matching closing tag; is
1677	// it ours?
1678	if (ulNestingLevel == 0)
1679	{
1680	// our closing tag found:
1681	// allocate mem for a new buffer
1682	// and extract all the text between
1683	// open and closing tags to it
1684	ULONG ulLen = pszClosingTag - pszBeginTag;
1685	if (ppszBlock)
1686	{
1687	PSZ pNew = (PSZ)malloc(ulLen + 1);
1688	strhncpy0(pNew, pszBeginTag, ulLen);
1689	*ppszBlock = pNew;
1690	}
1691
1692	// raise search offset to after the closing tag
1693	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1694
1695	ulrc = 0;
1696
1697	break;
1698	} else
1699	// not our closing tag:
1700	ulNestingLevel--;
1701	}
1702	}
1703	// no matching closing tag: search on after that
1704	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1705	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1706
1707	if (!pszClosingTag)
1708	// no matching closing tag found:
1709	// return 2 (closing tag not found)
1710	ulrc = 2;
1711	} // end if (pszBeginTag)
1712	else
1713	// no matching ">" for opening tag found:
1714	ulrc = 3;
1715	}
1716
1717	return (ulrc);
1718	}
1719
1720	/* ******************************************************************
1721	*
1722	* Miscellaneous
1723	*
1724	********************************************************************/
1725
1726	/*
1727	*@@ strhArrayAppend:
1728	* this appends a string to a "string array".
1729	*
1730	* A string array is considered a sequence of
1731	* zero-terminated strings in memory. That is,
1732	* after each string's null-byte, the next
1733	* string comes up.
1734	*
1735	* This is useful for composing a single block
1736	* of memory from, say, list box entries, which
1737	* can then be written to OS2.INI in one flush.
1738	*
1739	* To append strings to such an array, call this
1740	* function for each string you wish to append.
1741	* This will re-allocate *ppszRoot with each call,
1742	* and update *pcbRoot, which then contains the
1743	* total size of all strings (including all null
1744	* terminators).
1745	*
1746	* Pass *pcbRoot to PrfSaveProfileData to have the
1747	* block saved.
1748	*
1749	* Note: On the first call, ppszRoot and pcbRoot
1750	* _must_ be both NULL, or this crashes.
1751	*/
1752
1753	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1754	const char *pcszNew, // in: string to append
1755	PULONG pcbRoot) // in/out: size of array
1756	{
1757	ULONG cbNew = strlen(pcszNew);
1758	PSZ pszTemp = (PSZ)malloc(*pcbRoot
1759	+ cbNew
1760	+ 1); // two null bytes
1761	if (*ppszRoot)
1762	{
1763	// not first loop: copy old stuff
1764	memcpy(pszTemp,
1765	*ppszRoot,
1766	*pcbRoot);
1767	free(*ppszRoot);
1768	}
1769	// append new string
1770	strcpy(pszTemp + *pcbRoot,
1771	pcszNew);
1772	// update root
1773	*ppszRoot = pszTemp;
1774	// update length
1775	*pcbRoot += cbNew + 1;
1776	}
1777
1778	/*
1779	*@@ strhCreateDump:
1780	* this dumps a memory block into a string
1781	* and returns that string in a new buffer.
1782	*
1783	* You must free() the returned PSZ after use.
1784	*
1785	* The output looks like the following:
1786	*
1787	+ 0000: FE FF 0E 02 90 00 00 00 ........
1788	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1789	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1790	*
1791	* Each line is terminated with a newline (\n)
1792	* character only.
1793	*
1794	*@@added V0.9.1 (2000-01-22) [umoeller]
1795	*/
1796
1797	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1798	ULONG ulSize, // in: size of buffer
1799	ULONG ulIndent) // in: indentation of every line
1800	{
1801	PSZ pszReturn = 0;
1802	XSTRING strReturn;
1803	CHAR szTemp[1000];
1804
1805	PBYTE pbCurrent = pb; // current byte
1806	ULONG ulCount = 0,
1807	ulCharsInLine = 0; // if this grows > 7, a new line is started
1808	CHAR szLine[400] = "",
1809	szAscii[30] = " "; // ASCII representation; filled for every line
1810	PSZ pszLine = szLine,
1811	pszAscii = szAscii;
1812
1813	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1814
1815	for (pbCurrent = pb;
1816	ulCount < ulSize;
1817	pbCurrent++, ulCount++)
1818	{
1819	if (ulCharsInLine == 0)
1820	{
1821	memset(szLine, ' ', ulIndent);
1822	pszLine += ulIndent;
1823	}
1824	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1825
1826	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1827	// printable character:
1828	pszAscii = pbCurrent;
1829	else
1830	*pszAscii = '.';
1831	pszAscii++;
1832
1833	ulCharsInLine++;
1834	if ( (ulCharsInLine > 7) // 8 bytes added?
1835	\|\| (ulCount == ulSize-1) // end of buffer reached?
1836	)
1837	{
1838	// if we haven't had eight bytes yet,
1839	// fill buffer up to eight bytes with spaces
1840	ULONG ul2;
1841	for (ul2 = ulCharsInLine;
1842	ul2 < 8;
1843	ul2++)
1844	pszLine += sprintf(pszLine, " ");
1845
1846	sprintf(szTemp, "%04lX: %s %s\n",
1847	(ulCount & 0xFFFFFFF8), // offset in hex
1848	szLine, // bytes string
1849	szAscii); // ASCII string
1850	xstrcat(&strReturn, szTemp);
1851
1852	// restart line buffer
1853	pszLine = szLine;
1854
1855	// clear ASCII buffer
1856	strcpy(szAscii, " ");
1857	pszAscii = szAscii;
1858
1859	// reset line counter
1860	ulCharsInLine = 0;
1861	}
1862	}
1863
1864	if (strReturn.cbAllocated)
1865	pszReturn = strReturn.psz;
1866
1867	return (pszReturn);
1868	}
1869
1870	/* ******************************************************************
1871	*
1872	* Wildcard matching
1873	*
1874	********************************************************************/
1875
1876	/*
1877	* The following code has been taken from "fnmatch.zip".
1878	*
1879	* (c) 1994-1996 by Eberhard Mattes.
1880	*/
1881
1882	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1883	* This macro returns true iff C is a separator. */
1884
1885	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1886
1887
1888	/* This macro returns true if C is at the end of a component of a
1889	* path. */
1890
1891	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1892
1893	/*
1894	* skip_comp_os2:
1895	* Return a pointer to the next component of the path SRC, for OS/2
1896	* and DOS styles. When the end of the string is reached, a pointer
1897	* to the terminating null character is returned.
1898	*
1899	* (c) 1994-1996 by Eberhard Mattes.
1900	*/
1901
1902	static const unsigned char* skip_comp_os2(const unsigned char *src)
1903	{
1904	/* Skip characters until hitting a separator or the end of the
1905	* string. */
1906
1907	while (!IS_OS2_COMP_END(*src))
1908	++src;
1909
1910	/* Skip the separator if we hit a separator. */
1911
1912	if (*src != 0)
1913	++src;
1914	return src;
1915	}
1916
1917	/*
1918	* has_colon:
1919	* returns true iff the path P contains a colon.
1920	*
1921	* (c) 1994-1996 by Eberhard Mattes.
1922	*/
1923
1924	static int has_colon(const unsigned char *p)
1925	{
1926	while (*p != 0)
1927	if (*p == ':')
1928	return 1;
1929	else
1930	++p;
1931	return 0;
1932	}
1933
1934	/*
1935	* match_comp_os2:
1936	* Compare a single component (directory name or file name) of the
1937	* paths, for OS/2 and DOS styles. MASK and NAME point into a
1938	* component of the wildcard and the name to be checked, respectively.
1939	* Comparing stops at the next separator. The FLAGS argument is the
1940	* same as that of fnmatch(). HAS_DOT is true if a dot is in the
1941	* current component of NAME. The number of dots is not restricted,
1942	* even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1943	* Note that this function is recursive.
1944	*
1945	* (c) 1994-1996 by Eberhard Mattes.
1946	*/
1947
1948	static int match_comp_os2(const unsigned char *mask,
1949	const unsigned char *name,
1950	unsigned flags,
1951	int has_dot)
1952	{
1953	int rc;
1954
1955	for (;;)
1956	switch (*mask)
1957	{
1958	case 0:
1959
1960	/* There must be no extra characters at the end of NAME when
1961	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1962	* in that case, NAME may point to a separator. */
1963
1964	if (*name == 0)
1965	return FNM_MATCH;
1966	if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1967	return FNM_MATCH;
1968	return FNM_NOMATCH;
1969
1970	case '/':
1971	case '\\':
1972
1973	/* Separators match separators. */
1974
1975	if (IS_OS2_COMP_SEP(*name))
1976	return FNM_MATCH;
1977
1978	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1979	* is ignored at the end of NAME. */
1980
1981	if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1982	return FNM_MATCH;
1983
1984	/* Stop comparing at the separator. */
1985
1986	return FNM_NOMATCH;
1987
1988	case '?':
1989
1990	/* A question mark matches one character. It does not match
1991	* a dot. At the end of the component (and before a dot),
1992	* it also matches zero characters. */
1993
1994	if (name != '.' && !IS_OS2_COMP_END(name))
1995	++name;
1996	++mask;
1997	break;
1998
1999	case '*':
2000
2001	/* An asterisk matches zero or more characters. In DOS
2002	* mode, dots are not matched. */
2003
2004	do
2005	{
2006	++mask;
2007	}
2008	while (mask == '');
2009	for (;;)
2010	{
2011	rc = match_comp_os2(mask, name, flags, has_dot);
2012	if (rc != FNM_NOMATCH)
2013	return rc;
2014	if (IS_OS2_COMP_END(*name))
2015	return FNM_NOMATCH;
2016	if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2017	return FNM_NOMATCH;
2018	++name;
2019	}
2020
2021	case '.':
2022
2023	/* A dot matches a dot. It also matches the implicit dot at
2024	* the end of a dot-less NAME. */
2025
2026	++mask;
2027	if (*name == '.')
2028	++name;
2029	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
2030	return FNM_NOMATCH;
2031	break;
2032
2033	default:
2034
2035	/* All other characters match themselves. */
2036
2037	if (flags & _FNM_IGNORECASE)
2038	{
2039	if (tolower(mask) != tolower(name))
2040	return FNM_NOMATCH;
2041	}
2042	else
2043	{
2044	if (mask != name)
2045	return FNM_NOMATCH;
2046	}
2047	++mask;
2048	++name;
2049	break;
2050	}
2051	}
2052
2053	/*
2054	* match_comp:
2055	* compare a single component (directory name or file name) of the
2056	* paths, for all styles which need component-by-component matching.
2057	* MASK and NAME point to the start of a component of the wildcard and
2058	* the name to be checked, respectively. Comparing stops at the next
2059	* separator. The FLAGS argument is the same as that of fnmatch().
2060	* Return FNM_MATCH iff MASK and NAME match.
2061	*
2062	* (c) 1994-1996 by Eberhard Mattes.
2063	*/
2064
2065	static int match_comp(const unsigned char *mask,
2066	const unsigned char *name,
2067	unsigned flags)
2068	{
2069	const unsigned char *s;
2070
2071	switch (flags & _FNM_STYLE_MASK)
2072	{
2073	case _FNM_OS2:
2074	case _FNM_DOS:
2075
2076	/* For OS/2 and DOS styles, we add an implicit dot at the end of
2077	* the component if the component doesn't include a dot. */
2078
2079	s = name;
2080	while (!IS_OS2_COMP_END(s) && s != '.')
2081	++s;
2082	return match_comp_os2(mask, name, flags, *s == '.');
2083
2084	default:
2085	return FNM_ERR;
2086	}
2087	}
2088
2089	/* In Unix styles, / separates components of a path. This macro
2090	* returns true iff C is a separator. */
2091
2092	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2093
2094
2095	/* This macro returns true if C is at the end of a component of a
2096	* path. */
2097
2098	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
2099
2100	/*
2101	* match_unix:
2102	* match complete paths for Unix styles. The FLAGS argument is the
2103	* same as that of fnmatch(). COMP points to the start of the current
2104	* component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2105	* backslash character is used for escaping ? and * unless
2106	* FNM_NOESCAPE is set.
2107	*
2108	* (c) 1994-1996 by Eberhard Mattes.
2109	*/
2110
2111	static int match_unix(const unsigned char *mask,
2112	const unsigned char *name,
2113	unsigned flags,
2114	const unsigned char *comp)
2115	{
2116	unsigned char c1, c2;
2117	char invert, matched;
2118	const unsigned char *start;
2119	int rc;
2120
2121	for (;;)
2122	switch (*mask)
2123	{
2124	case 0:
2125
2126	/* There must be no extra characters at the end of NAME when
2127	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
2128	* in that case, NAME may point to a separator. */
2129
2130	if (*name == 0)
2131	return FNM_MATCH;
2132	if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2133	return FNM_MATCH;
2134	return FNM_NOMATCH;
2135
2136	case '?':
2137
2138	/* A question mark matches one character. It does not match
2139	* the component separator if FNM_PATHNAME is set. It does
2140	* not match a dot at the start of a component if FNM_PERIOD
2141	* is set. */
2142
2143	if (*name == 0)
2144	return FNM_NOMATCH;
2145	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2146	return FNM_NOMATCH;
2147	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2148	return FNM_NOMATCH;
2149	++mask;
2150	++name;
2151	break;
2152
2153	case '*':
2154
2155	/* An asterisk matches zero or more characters. It does not
2156	* match the component separator if FNM_PATHNAME is set. It
2157	* does not match a dot at the start of a component if
2158	* FNM_PERIOD is set. */
2159
2160	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2161	return FNM_NOMATCH;
2162	do
2163	{
2164	++mask;
2165	}
2166	while (mask == '');
2167	for (;;)
2168	{
2169	rc = match_unix(mask, name, flags, comp);
2170	if (rc != FNM_NOMATCH)
2171	return rc;
2172	if (*name == 0)
2173	return FNM_NOMATCH;
2174	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2175	return FNM_NOMATCH;
2176	++name;
2177	}
2178
2179	case '/':
2180
2181	/* Separators match only separators. If _FNM_PATHPREFIX is
2182	* set, a trailing separator in MASK is ignored at the end
2183	* of NAME. */
2184
2185	if (!(IS_UNIX_COMP_SEP(*name)
2186	\|\| ((flags & _FNM_PATHPREFIX) && *name == 0
2187	&& (mask[1] == 0
2188	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2189	&& mask[2] == 0)))))
2190	return FNM_NOMATCH;
2191
2192	++mask;
2193	if (*name != 0)
2194	++name;
2195
2196	/* This is the beginning of a new component if FNM_PATHNAME
2197	* is set. */
2198
2199	if (flags & FNM_PATHNAME)
2200	comp = name;
2201	break;
2202
2203	case '[':
2204
2205	/* A set of characters. Always case-sensitive. */
2206
2207	if (*name == 0)
2208	return FNM_NOMATCH;
2209	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2210	return FNM_NOMATCH;
2211	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2212	return FNM_NOMATCH;
2213
2214	invert = 0;
2215	matched = 0;
2216	++mask;
2217
2218	/* If the first character is a ! or ^, the set matches all
2219	* characters not listed in the set. */
2220
2221	if (mask == '!' \|\| mask == '^')
2222	{
2223	++mask;
2224	invert = 1;
2225	}
2226
2227	/* Loop over all the characters of the set. The loop ends
2228	* if the end of the string is reached or if a ] is
2229	* encountered unless it directly follows the initial [ or
2230	* [-. */
2231
2232	start = mask;
2233	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
2234	{
2235	/* Get the next character which is optionally preceded
2236	* by a backslash. */
2237
2238	c1 = *mask++;
2239	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2240	{
2241	if (*mask == 0)
2242	break;
2243	c1 = *mask++;
2244	}
2245
2246	/* Ranges of characters are written as a-z. Don't
2247	* forget to check for the end of the string and to
2248	* handle the backslash. If the character after - is a
2249	* ], it isn't a range. */
2250
2251	if (*mask == '-' && mask[1] != ']')
2252	{
2253	++mask; /* Skip the - character */
2254	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2255	++mask;
2256	if (*mask == 0)
2257	break;
2258	c2 = *mask++;
2259	}
2260	else
2261	c2 = c1;
2262
2263	/* Now check whether this character or range matches NAME. */
2264
2265	if (c1 <= name && name <= c2)
2266	matched = 1;
2267	}
2268
2269	/* If the end of the string is reached before a ] is found,
2270	* back up to the [ and compare it to NAME. */
2271
2272	if (*mask == 0)
2273	{
2274	if (*name != '[')
2275	return FNM_NOMATCH;
2276	++name;
2277	mask = start;
2278	if (invert)
2279	--mask;
2280	}
2281	else
2282	{
2283	if (invert)
2284	matched = !matched;
2285	if (!matched)
2286	return FNM_NOMATCH;
2287	++mask; /* Skip the ] character */
2288	if (*name != 0)
2289	++name;
2290	}
2291	break;
2292
2293	case '\\':
2294	++mask;
2295	if (flags & FNM_NOESCAPE)
2296	{
2297	if (*name != '\\')
2298	return FNM_NOMATCH;
2299	++name;
2300	}
2301	else if (mask == '' \|\| *mask == '?')
2302	{
2303	if (mask != name)
2304	return FNM_NOMATCH;
2305	++mask;
2306	++name;
2307	}
2308	break;
2309
2310	default:
2311
2312	/* All other characters match themselves. */
2313
2314	if (flags & _FNM_IGNORECASE)
2315	{
2316	if (tolower(mask) != tolower(name))
2317	return FNM_NOMATCH;
2318	}
2319	else
2320	{
2321	if (mask != name)
2322	return FNM_NOMATCH;
2323	}
2324	++mask;
2325	++name;
2326	break;
2327	}
2328	}
2329
2330	/*
2331	* _fnmatch_unsigned:
2332	* Check whether the path name NAME matches the wildcard MASK.
2333	*
2334	* Return:
2335	* -- 0 (FNM_MATCH) if it matches,
2336	* -- _FNM_NOMATCH if it doesn't,
2337	* -- FNM_ERR on error.
2338	*
2339	* The operation of this function is controlled by FLAGS.
2340	* This is an internal function, with unsigned arguments.
2341	*
2342	* (c) 1994-1996 by Eberhard Mattes.
2343	*/
2344
2345	static int _fnmatch_unsigned(const unsigned char *mask,
2346	const unsigned char *name,
2347	unsigned flags)
2348	{
2349	int m_drive, n_drive,
2350	rc;
2351
2352	/* Match and skip the drive name if present. */
2353
2354	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2355	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2356
2357	if (m_drive != n_drive)
2358	{
2359	if (m_drive == -1 \|\| n_drive == -1)
2360	return FNM_NOMATCH;
2361	if (!(flags & _FNM_IGNORECASE))
2362	return FNM_NOMATCH;
2363	if (tolower(m_drive) != tolower(n_drive))
2364	return FNM_NOMATCH;
2365	}
2366
2367	if (m_drive != -1)
2368	mask += 2;
2369	if (n_drive != -1)
2370	name += 2;
2371
2372	/* Colons are not allowed in path names, except for the drive name,
2373	* which was skipped above. */
2374
2375	if (has_colon(mask) \|\| has_colon(name))
2376	return FNM_ERR;
2377
2378	/* The name "\\server\path" should not be matched by mask
2379	* "\\server\path". Ditto for /. /
2380
2381	switch (flags & _FNM_STYLE_MASK)
2382	{
2383	case _FNM_OS2:
2384	case _FNM_DOS:
2385
2386	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2387	{
2388	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2389	return FNM_NOMATCH;
2390	name += 2;
2391	mask += 2;
2392	}
2393	break;
2394
2395	case _FNM_POSIX:
2396
2397	if (name[0] == '/' && name[1] == '/')
2398	{
2399	int i;
2400
2401	name += 2;
2402	for (i = 0; i < 2; ++i)
2403	if (mask[0] == '/')
2404	++mask;
2405	else if (mask[0] == '\\' && mask[1] == '/')
2406	mask += 2;
2407	else
2408	return FNM_NOMATCH;
2409	}
2410
2411	/* In Unix styles, treating ? and * w.r.t. components is simple.
2412	* No need to do matching component by component. */
2413
2414	return match_unix(mask, name, flags, name);
2415	}
2416
2417	/* Now compare all the components of the path name, one by one.
2418	* Note that the path separator must not be enclosed in brackets. */
2419
2420	while (mask != 0 \|\| name != 0)
2421	{
2422
2423	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2424	* is reached even if there are components left in NAME. */
2425
2426	if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2427	return FNM_MATCH;
2428
2429	/* Compare a single component of the path name. */
2430
2431	rc = match_comp(mask, name, flags);
2432	if (rc != FNM_MATCH)
2433	return rc;
2434
2435	/* Skip to the next component or to the end of the path name. */
2436
2437	mask = skip_comp_os2(mask);
2438	name = skip_comp_os2(name);
2439	}
2440
2441	/* If we reached the ends of both strings, the names match. */
2442
2443	if (mask == 0 && name == 0)
2444	return FNM_MATCH;
2445
2446	/* The names do not match. */
2447
2448	return FNM_NOMATCH;
2449	}
2450
2451	/*
2452	*@@ strhMatchOS2:
2453	* this matches wildcards, similar to what DosEditName does.
2454	* However, this does not require a file to be present, but
2455	* works on strings only.
2456	*/
2457
2458	BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2459	const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2460	{
2461	return ((BOOL)(_fnmatch_unsigned(pcszMask,
2462	pcszName,
2463	_FNM_OS2 \| _FNM_IGNORECASE)
2464	== FNM_MATCH)
2465	);
2466	}
2467
2468	/* ******************************************************************
2469	*
2470	* Fast string searches
2471	*
2472	********************************************************************/
2473
2474	#define ASSERT(a)
2475
2476	/*
2477	* The following code has been taken from the "Standard
2478	* Function Library", file sflfind.c, and only slightly
2479	* modified to conform to the rest of this file.
2480	*
2481	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2482	* Revised: 98/05/04
2483	*
2484	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2485	*
2486	* The SFL Licence allows incorporating SFL code into other
2487	* programs, as long as the copyright is reprinted and the
2488	* code is marked as modified, so this is what we do.
2489	*/
2490
2491	/*
2492	*@@ strhmemfind:
2493	* searches for a pattern in a block of memory using the
2494	* Boyer-Moore-Horspool-Sunday algorithm.
2495	*
2496	* The block and pattern may contain any values; you must
2497	* explicitly provide their lengths. If you search for strings,
2498	* use strlen() on the buffers.
2499	*
2500	* Returns a pointer to the pattern if found within the block,
2501	* or NULL if the pattern was not found.
2502	*
2503	* This algorithm needs a "shift table" to cache data for the
2504	* search pattern. This table can be reused when performing
2505	* several searches with the same pattern.
2506	*
2507	* "shift" must point to an array big enough to hold 256 (8**2)
2508	* "size_t" values.
2509	*
2510	* If (*repeat_find == FALSE), the shift table is initialized.
2511	* So on the first search with a given pattern, *repeat_find
2512	* should be FALSE. This function sets it to TRUE after the
2513	* shift table is initialised, allowing the initialisation
2514	* phase to be skipped on subsequent searches.
2515	*
2516	* This function is most effective when repeated searches are
2517	* made for the same pattern in one or more large buffers.
2518	*
2519	* Example:
2520	*
2521	+ PSZ pszHaystack = "This is a sample string.",
2522	+ pszNeedle = "string";
2523	+ size_t shift[256];
2524	+ BOOL fRepeat = FALSE;
2525	+
2526	+ PSZ pFound = strhmemfind(pszHaystack,
2527	+ strlen(pszHaystack), // block size
2528	+ pszNeedle,
2529	+ strlen(pszNeedle), // pattern size
2530	+ shift,
2531	+ &fRepeat);
2532	*
2533	* Taken from the "Standard Function Library", file sflfind.c.
2534	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2535	* Slightly modified by umoeller.
2536	*
2537	*@@added V0.9.3 (2000-05-08) [umoeller]
2538	*/
2539
2540	void* strhmemfind(const void *in_block, // in: block containing data
2541	size_t block_size, // in: size of block in bytes
2542	const void *in_pattern, // in: pattern to search for
2543	size_t pattern_size, // in: size of pattern block
2544	size_t *shift, // in/out: shift table (search buffer)
2545	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2546	{
2547	size_t byte_nbr, // Distance through block
2548	match_size; // Size of matched part
2549	const unsigned char
2550	*match_base = NULL, // Base of match of pattern
2551	*match_ptr = NULL, // Point within current match
2552	*limit = NULL; // Last potiental match point
2553	const unsigned char
2554	block = (unsigned char ) in_block, // Concrete pointer to block data
2555	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2556
2557	if ( (block == NULL)
2558	\|\| (pattern == NULL)
2559	\|\| (shift == NULL)
2560	)
2561	return (NULL);
2562
2563	// Pattern must be smaller or equal in size to string
2564	if (block_size < pattern_size)
2565	return (NULL); // Otherwise it's not found
2566
2567	if (pattern_size == 0) // Empty patterns match at start
2568	return ((void *)block);
2569
2570	// Build the shift table unless we're continuing a previous search
2571
2572	// The shift table determines how far to shift before trying to match
2573	// again, if a match at this point fails. If the byte after where the
2574	// end of our pattern falls is not in our pattern, then we start to
2575	// match again after that byte; otherwise we line up the last occurence
2576	// of that byte in our pattern under that byte, and try match again.
2577
2578	if (!repeat_find \|\| !*repeat_find)
2579	{
2580	for (byte_nbr = 0;
2581	byte_nbr < 256;
2582	byte_nbr++)
2583	shift[byte_nbr] = pattern_size + 1;
2584	for (byte_nbr = 0;
2585	byte_nbr < pattern_size;
2586	byte_nbr++)
2587	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2588
2589	if (repeat_find)
2590	*repeat_find = TRUE;
2591	}
2592
2593	// Search for the block, each time jumping up by the amount
2594	// computed in the shift table
2595
2596	limit = block + (block_size - pattern_size + 1);
2597	ASSERT (limit > block);
2598
2599	for (match_base = block;
2600	match_base < limit;
2601	match_base += shift[*(match_base + pattern_size)])
2602	{
2603	match_ptr = match_base;
2604	match_size = 0;
2605
2606	// Compare pattern until it all matches, or we find a difference
2607	while (*match_ptr++ == pattern[match_size++])
2608	{
2609	ASSERT (match_size <= pattern_size &&
2610	match_ptr == (match_base + match_size));
2611
2612	// If we found a match, return the start address
2613	if (match_size >= pattern_size)
2614	return ((void*)(match_base));
2615
2616	}
2617	}
2618	return (NULL); // Found nothing
2619	}
2620
2621	/*
2622	*@@ strhtxtfind:
2623	* searches for a case-insensitive text pattern in a string
2624	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2625	* pattern are null-terminated strings. Returns a pointer to the pattern
2626	* if found within the string, or NULL if the pattern was not found.
2627	* Will match strings irrespective of case. To match exact strings, use
2628	* strhfind(). Will not work on multibyte characters.
2629	*
2630	* Examples:
2631	+ char *result;
2632	+
2633	+ result = strhtxtfind ("AbracaDabra", "cad");
2634	+ if (result)
2635	+ puts (result);
2636	+
2637	* Taken from the "Standard Function Library", file sflfind.c.
2638	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2639	* Slightly modified.
2640	*
2641	*@@added V0.9.3 (2000-05-08) [umoeller]
2642	*/
2643
2644	char* strhtxtfind (const char *string, // String containing data
2645	const char *pattern) // Pattern to search for
2646	{
2647	size_t
2648	shift [256]; // Shift distance for each value
2649	size_t
2650	string_size,
2651	pattern_size,
2652	byte_nbr, // Index into byte array
2653	match_size; // Size of matched part
2654	const char
2655	*match_base = NULL, // Base of match of pattern
2656	*match_ptr = NULL, // Point within current match
2657	*limit = NULL; // Last potiental match point
2658
2659	ASSERT (string); // Expect non-NULL pointers, but
2660	ASSERT (pattern); // fail gracefully if not debugging
2661	if (string == NULL \|\| pattern == NULL)
2662	return (NULL);
2663
2664	string_size = strlen (string);
2665	pattern_size = strlen (pattern);
2666
2667	// Pattern must be smaller or equal in size to string
2668	if (string_size < pattern_size)
2669	return (NULL); // Otherwise it cannot be found
2670
2671	if (pattern_size == 0) // Empty string matches at start
2672	return (char *) string;
2673
2674	// Build the shift table
2675
2676	// The shift table determines how far to shift before trying to match
2677	// again, if a match at this point fails. If the byte after where the
2678	// end of our pattern falls is not in our pattern, then we start to
2679	// match again after that byte; otherwise we line up the last occurence
2680	// of that byte in our pattern under that byte, and try match again.
2681
2682	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2683	shift [byte_nbr] = pattern_size + 1;
2684
2685	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2686	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2687
2688	// Search for the string. If we don't find a match, move up by the
2689	// amount we computed in the shift table above, to find location of
2690	// the next potiental match.
2691
2692	limit = string + (string_size - pattern_size + 1);
2693	ASSERT (limit > string);
2694
2695	for (match_base = string;
2696	match_base < limit;
2697	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2698	{
2699	match_ptr = match_base;
2700	match_size = 0;
2701
2702	// Compare pattern until it all matches, or we find a difference
2703	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2704	{
2705	ASSERT (match_size <= pattern_size &&
2706	match_ptr == (match_base + match_size));
2707
2708	// If we found a match, return the start address
2709	if (match_size >= pattern_size)
2710	return ((char *)(match_base));
2711	}
2712	}
2713	return (NULL); // Found nothing
2714	}
2715

Note: See TracBrowser for help on using the repository browser.

Download in other formats: