Context Navigation

source: trunk/src/helpers/stringh.c@ 35

Visit:

Last change on this file since 35 was 23, checked in by umoeller, 25 years ago
Fixes for V0.9.7.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 76.5 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#include <os2.h>
45
46	#include <stdlib.h>
47	#include <stdio.h>
48	#include <string.h>
49	#include <ctype.h>
50	#include <math.h>
51
52	#include "setup.h" // code generation and debugging options
53
54	#include "helpers\stringh.h"
55	#include "helpers\xstring.h" // extended string helpers
56
57	#pragma hdrstop
58
59	/*
60	*@@category: Helpers\C helpers\String management
61	* See stringh.c and xstring.c.
62	*/
63
64	/*
65	*@@category: Helpers\C helpers\String management\C string helpers
66	* See stringh.c.
67	*/
68
69	/*
70	*@@ strhdup:
71	* like strdup, but this one
72	* doesn't crash if pszSource is NULL,
73	* but returns NULL also.
74	*
75	*@@added V0.9.0 [umoeller]
76	*/
77
78	PSZ strhdup(const char *pszSource)
79	{
80	if (pszSource)
81	return (strdup(pszSource));
82	else
83	return (0);
84	}
85
86	/*
87	*@@ strhistr:
88	* like strstr, but case-insensitive.
89	*
90	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
91	*/
92
93	PSZ strhistr(const char string1, const char string2)
94	{
95	PSZ prc = NULL;
96
97	if ((string1) && (string2))
98	{
99	PSZ pszSrchIn = strdup(string1);
100	PSZ pszSrchFor = strdup(string2);
101
102	if ((pszSrchIn) && (pszSrchFor))
103	{
104	strupr(pszSrchIn);
105	strupr(pszSrchFor);
106
107	prc = strstr(pszSrchIn, pszSrchFor);
108	if (prc)
109	{
110	// prc now has the first occurence of the string,
111	// but in pszSrchIn; we need to map this
112	// return value to the original string
113	prc = (prc-pszSrchIn) // offset in pszSrchIn
114	+ (PSZ)string1;
115	}
116	}
117	if (pszSrchFor)
118	free(pszSrchFor);
119	if (pszSrchIn)
120	free(pszSrchIn);
121	}
122	return (prc);
123	}
124
125	/*
126	*@@ strhncpy0:
127	* like strncpy, but always appends a 0 character.
128	*/
129
130	ULONG strhncpy0(PSZ pszTarget,
131	const char *pszSource,
132	ULONG cbSource)
133	{
134	ULONG ul = 0;
135	PSZ pTarget = pszTarget,
136	pSource = (PSZ)pszSource;
137
138	for (ul = 0; ul < cbSource; ul++)
139	if (*pSource)
140	pTarget++ = pSource++;
141	else
142	break;
143	*pTarget = 0;
144
145	return (ul);
146	}
147
148	/*
149	* strhCount:
150	* this counts the occurences of c in pszSearch.
151	*/
152
153	ULONG strhCount(const char *pszSearch,
154	CHAR c)
155	{
156	PSZ p = (PSZ)pszSearch;
157	ULONG ulCount = 0;
158	while (TRUE)
159	{
160	p = strchr(p, c);
161	if (p)
162	{
163	ulCount++;
164	p++;
165	}
166	else
167	break;
168	}
169	return (ulCount);
170	}
171
172	/*
173	*@@ strhIsDecimal:
174	* returns TRUE if psz consists of decimal digits only.
175	*/
176
177	BOOL strhIsDecimal(PSZ psz)
178	{
179	PSZ p = psz;
180	while (*p != 0)
181	{
182	if (isdigit(*p) == 0)
183	return (FALSE);
184	p++;
185	}
186
187	return (TRUE);
188	}
189
190	/*
191	*@@ strhSubstr:
192	* this creates a new PSZ containing the string
193	* from pBegin to pEnd, excluding the pEnd character.
194	* The new string is null-terminated. The caller
195	* must free() the new string after use.
196	*
197	* Example:
198	+ "1234567890"
199	+ ^ ^
200	+ p1 p2
201	+ strhSubstr(p1, p2)
202	* would return a new string containing "2345678".
203	*/
204
205	PSZ strhSubstr(const char pBegin, const char pEnd)
206	{
207	ULONG cbSubstr = (pEnd - pBegin);
208	PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
209	strhncpy0(pszSubstr, pBegin, cbSubstr);
210	return (pszSubstr);
211	}
212
213	/*
214	*@@ strhExtract:
215	* searches pszBuf for the cOpen character and returns
216	* the data in between cOpen and cClose, excluding
217	* those two characters, in a newly allocated buffer
218	* which you must free() afterwards.
219	*
220	* Spaces and newlines/linefeeds are skipped.
221	*
222	* If the search was successful, the new buffer
223	* is returned and, if (ppEnd != NULL), *ppEnd points
224	* to the first character after the cClose character
225	* found in the buffer.
226	*
227	* If the search was not successful, NULL is
228	* returned, and *ppEnd is unchanged.
229	*
230	* If another cOpen character is found before
231	* cClose, matching cClose characters will be skipped.
232	* You can therefore nest the cOpen and cClose
233	* characters.
234	*
235	* This function ignores cOpen and cClose characters
236	* in C-style comments and strings surrounded by
237	* double quotes.
238	*
239	* Example:
240	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
241	+ pEnd;
242	+ strhExtract(pszBuf,
243	+ '{', '}',
244	+ &pEnd)
245	* would return a new buffer containing " --blah-- ",
246	* and ppEnd would afterwards point to the space
247	* before "next" in the static buffer.
248	*
249	*@@added V0.9.0 [umoeller]
250	*/
251
252	PSZ strhExtract(PSZ pszBuf, // in: search buffer
253	CHAR cOpen, // in: opening char
254	CHAR cClose, // in: closing char
255	PSZ *ppEnd) // out: if != NULL, receives first character after closing char
256	{
257	PSZ pszReturn = NULL;
258
259	if (pszBuf)
260	{
261	PSZ pOpen = strchr(pszBuf, cOpen);
262	if (pOpen)
263	{
264	// opening char found:
265	// now go thru the whole rest of the buffer
266	PSZ p = pOpen+1;
267	LONG lLevel = 1; // if this goes 0, we're done
268	while (*p)
269	{
270	if (*p == cOpen)
271	lLevel++;
272	else if (*p == cClose)
273	{
274	lLevel--;
275	if (lLevel <= 0)
276	{
277	// matching closing bracket found:
278	// extract string
279	pszReturn = strhSubstr(pOpen+1, // after cOpen
280	p); // excluding cClose
281	if (ppEnd)
282	*ppEnd = p+1;
283	break; // while (*p)
284	}
285	}
286	else if (*p == '\"')
287	{
288	// beginning of string:
289	PSZ p2 = p+1;
290	// find end of string
291	while ((p2) && (p2 != '\"'))
292	p2++;
293
294	if (*p2 == '\"')
295	// closing quote found:
296	// search on after that
297	p = p2; // raised below
298	else
299	break; // while (*p)
300	}
301
302	p++;
303	}
304	}
305	}
306
307	return (pszReturn);
308	}
309
310	/*
311	*@@ strhQuote:
312	* similar to strhExtract, except that
313	* opening and closing chars are the same,
314	* and therefore no nesting is possible.
315	* Useful for extracting stuff between
316	* quotes.
317	*
318	*@@added V0.9.0 [umoeller]
319	*/
320
321	PSZ strhQuote(PSZ pszBuf,
322	CHAR cQuote,
323	PSZ *ppEnd)
324	{
325	PSZ pszReturn = NULL,
326	p1 = NULL;
327	if ((p1 = strchr(pszBuf, cQuote)))
328	{
329	PSZ p2 = strchr(p1+1, cQuote);
330	if (p2)
331	{
332	pszReturn = strhSubstr(p1+1, p2);
333	if (ppEnd)
334	// store closing char
335	*ppEnd = p2 + 1;
336	}
337	}
338
339	return (pszReturn);
340	}
341
342	/*
343	*@@ strhStrip:
344	* removes all double spaces.
345	* This copies within the "psz" buffer.
346	* If any double spaces are found, the
347	* string will be shorter than before,
348	* but the buffer is _not_ reallocated,
349	* so there will be unused bytes at the
350	* end.
351	*
352	* Returns the number of spaces removed.
353	*
354	*@@added V0.9.0 [umoeller]
355	*/
356
357	ULONG strhStrip(PSZ psz) // in/out: string
358	{
359	PSZ p;
360	ULONG cb = strlen(psz),
361	ulrc = 0;
362
363	for (p = psz; p < psz+cb; p++)
364	{
365	if ((p == ' ') && ((p+1) == ' '))
366	{
367	PSZ p2 = p;
368	while (*p2)
369	{
370	p2 = (p2+1);
371	p2++;
372	}
373	cb--;
374	p--;
375	ulrc++;
376	}
377	}
378	return (ulrc);
379	}
380
381	/*
382	*@@ strhins:
383	* this inserts one string into another.
384	*
385	* pszInsert is inserted into pszBuffer at offset
386	* ulInsertOfs (which counts from 0).
387	*
388	* A newly allocated string is returned. pszBuffer is
389	* not changed. The new string should be free()'d after
390	* use.
391	*
392	* Upon errors, NULL is returned.
393	*
394	*@@changed V0.9.0 [umoeller]: completely rewritten.
395	*/
396
397	PSZ strhins(const char *pcszBuffer,
398	ULONG ulInsertOfs,
399	const char *pcszInsert)
400	{
401	PSZ pszNew = NULL;
402
403	if ((pcszBuffer) && (pcszInsert))
404	{
405	do {
406	ULONG cbBuffer = strlen(pcszBuffer);
407	ULONG cbInsert = strlen(pcszInsert);
408
409	// check string length
410	if (ulInsertOfs > cbBuffer + 1)
411	break; // do
412
413	// OK, let's go.
414	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
415
416	// copy stuff before pInsertPos
417	memcpy(pszNew,
418	pcszBuffer,
419	ulInsertOfs);
420	// copy string to be inserted
421	memcpy(pszNew + ulInsertOfs,
422	pcszInsert,
423	cbInsert);
424	// copy stuff after pInsertPos
425	strcpy(pszNew + ulInsertOfs + cbInsert,
426	pcszBuffer + ulInsertOfs);
427	} while (FALSE);
428	}
429
430	return (pszNew);
431	}
432
433	/*
434	*@@ strhFindReplace:
435	* wrapper around xstrFindReplace to work with C strings.
436	* Note that *ppszBuf can get reallocated and must
437	* be free()'able.
438	*
439	* Repetitive use of this wrapper is not recommended
440	* because it is considerably slower than xstrFindReplace.
441	*
442	*@@added V0.9.6 (2000-11-01) [umoeller]
443	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
444	*/
445
446	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
447	PULONG pulOfs, // in: where to begin search (0 = start);
448	// out: ofs of first char after replacement string
449	const char *pcszSearch, // in: search string; cannot be NULL
450	const char *pcszReplace) // in: replacement string; cannot be NULL
451	{
452	ULONG ulrc = 0;
453	XSTRING xstrBuf,
454	xstrFind,
455	xstrReplace;
456	size_t ShiftTable[256];
457	BOOL fRepeat = FALSE;
458	xstrInitSet(&xstrBuf, *ppszBuf);
459	// reallocated and returned, so we're safe
460	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
461	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
462	// these two are never freed, so we're safe too
463
464	if ((ulrc = xstrFindReplace(&xstrBuf,
465	pulOfs,
466	&xstrFind,
467	&xstrReplace,
468	ShiftTable,
469	&fRepeat)))
470	// replaced:
471	*ppszBuf = xstrBuf.psz;
472
473	return (ulrc);
474	}
475
476	/*
477	* strhWords:
478	* returns the no. of words in "psz".
479	* A string is considered a "word" if
480	* it is surrounded by spaces only.
481	*
482	*@@added V0.9.0 [umoeller]
483	*/
484
485	ULONG strhWords(PSZ psz)
486	{
487	PSZ p;
488	ULONG cb = strlen(psz),
489	ulWords = 0;
490	if (cb > 1)
491	{
492	ulWords = 1;
493	for (p = psz; p < psz+cb; p++)
494	if (*p == ' ')
495	ulWords++;
496	}
497	return (ulWords);
498	}
499
500	/*
501	*@@ strhThousandsULong:
502	* converts a ULONG into a decimal string, while
503	* inserting thousands separators into it. Specify
504	* the separator character in cThousands.
505	*
506	* Returns pszTarget so you can use it directly
507	* with sprintf and the "%s" flag.
508	*
509	* For cThousands, you should use the data in
510	* OS2.INI ("PM_National" application), which is
511	* always set according to the "Country" object.
512	* You can use prfhQueryCountrySettings to
513	* retrieve this setting.
514	*
515	* Use strhThousandsDouble for "double" values.
516	*/
517
518	PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
519	ULONG ul, // in: decimal to convert
520	CHAR cThousands) // in: separator char (e.g. '.')
521	{
522	USHORT ust, uss, usc;
523	CHAR szTemp[40];
524	sprintf(szTemp, "%lu", ul);
525
526	ust = 0;
527	usc = strlen(szTemp);
528	for (uss = 0; uss < usc; uss++)
529	{
530	if (uss)
531	if (((usc - uss) % 3) == 0)
532	{
533	pszTarget[ust] = cThousands;
534	ust++;
535	}
536	pszTarget[ust] = szTemp[uss];
537	ust++;
538	}
539	pszTarget[ust] = '\0';
540
541	return (pszTarget);
542	}
543
544	/*
545	*@@ strhThousandsDouble:
546	* like strhThousandsULong, but for a "double"
547	* value. Note that after-comma values are truncated.
548	*/
549
550	PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
551	{
552	USHORT ust, uss, usc;
553	CHAR szTemp[40];
554	sprintf(szTemp, "%.0f", floor(dbl));
555
556	ust = 0;
557	usc = strlen(szTemp);
558	for (uss = 0; uss < usc; uss++)
559	{
560	if (uss)
561	if (((usc - uss) % 3) == 0)
562	{
563	pszTarget[ust] = cThousands;
564	ust++;
565	}
566	pszTarget[ust] = szTemp[uss];
567	ust++;
568	}
569	pszTarget[ust] = '\0';
570
571	return (pszTarget);
572	}
573
574	/*
575	*@@ strhVariableDouble:
576	* like strhThousandsULong, but for a "double" value, and
577	* with a variable number of decimal places depending on the
578	* size of the quantity.
579	*
580	*@@added V0.9.6 (2000-11-12) [pr]
581	*/
582
583	PSZ strhVariableDouble(PSZ pszTarget,
584	double dbl,
585	PSZ pszUnits,
586	CHAR cThousands)
587	{
588	if (dbl < 100.0)
589	sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
590	else
591	if (dbl < 1000.0)
592	sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
593	else
594	strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
595	pszUnits);
596
597	return(pszTarget);
598	}
599
600	/*
601	*@@ strhFileDate:
602	* converts file date data to a string (to pszBuf).
603	* You can pass any FDATE structure to this function,
604	* which are returned in those FILEFINDBUF* or
605	* FILESTATUS* structs by the Dos* functions.
606	*
607	* ulDateFormat is the PM setting for the date format,
608	* as set in the "Country" object, and can be queried using
609	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
610	*
611	* meaning:
612	* -- 0 mm.dd.yyyy (English)
613	* -- 1 dd.mm.yyyy (e.g. German)
614	* -- 2 yyyy.mm.dd (Japanese, ISO)
615	* -- 3 yyyy.dd.mm
616	*
617	* cDateSep is used as a date separator (e.g. '.').
618	* This can be queried using:
619	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
620	*
621	* Alternatively, you can query all the country settings
622	* at once using prfhQueryCountrySettings (prfh.c).
623	*
624	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
625	*/
626
627	VOID strhFileDate(PSZ pszBuf, // out: string returned
628	FDATE *pfDate, // in: date information
629	ULONG ulDateFormat, // in: date format (0-3)
630	CHAR cDateSep) // in: date separator (e.g. '.')
631	{
632	DATETIME dt;
633	dt.day = pfDate->day;
634	dt.month = pfDate->month;
635	dt.year = pfDate->year + 1980;
636
637	strhDateTime(pszBuf,
638	NULL, // no time
639	&dt,
640	ulDateFormat,
641	cDateSep,
642	0, 0); // no time
643	}
644
645	/*
646	*@@ strhFileTime:
647	* converts file time data to a string (to pszBuf).
648	* You can pass any FTIME structure to this function,
649	* which are returned in those FILEFINDBUF* or
650	* FILESTATUS* structs by the Dos* functions.
651	*
652	* ulTimeFormat is the PM setting for the time format,
653	* as set in the "Country" object, and can be queried using
654	+ PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
655	* meaning:
656	* -- 0 12-hour clock
657	* -- >0 24-hour clock
658	*
659	* cDateSep is used as a time separator (e.g. ':').
660	* This can be queried using:
661	+ prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
662	*
663	* Alternatively, you can query all the country settings
664	* at once using prfhQueryCountrySettings (prfh.c).
665	*
666	*@@changed 99-03-15 fixed 12-hour crash
667	*@@changed (99-11-07) [umoeller]: now calling strhDateTime
668	*/
669
670	VOID strhFileTime(PSZ pszBuf, // out: string returned
671	FTIME *pfTime, // in: time information
672	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
673	CHAR cTimeSep) // in: time separator (e.g. ':')
674	{
675	DATETIME dt;
676	dt.hours = pfTime->hours;
677	dt.minutes = pfTime->minutes;
678	dt.seconds = pfTime->twosecs * 2;
679
680	strhDateTime(NULL, // no date
681	pszBuf,
682	&dt,
683	0, 0, // no date
684	ulTimeFormat,
685	cTimeSep);
686	}
687
688	/*
689	*@@ strhDateTime:
690	* converts Control Program DATETIME info
691	* into two strings. See strhFileDate and strhFileTime
692	* for more detailed parameter descriptions.
693	*
694	*@@added V0.9.0 (99-11-07) [umoeller]
695	*/
696
697	VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
698	PSZ pszTime, // out: time string returned (can be NULL)
699	DATETIME *pDateTime, // in: date/time information
700	ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
701	CHAR cDateSep, // in: date separator (e.g. '.')
702	ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
703	CHAR cTimeSep) // in: time separator (e.g. ':')
704	{
705	if (pszDate)
706	{
707	switch (ulDateFormat)
708	{
709	case 0: // mm.dd.yyyy (English)
710	sprintf(pszDate, "%02d%c%02d%c%04d",
711	pDateTime->month,
712	cDateSep,
713	pDateTime->day,
714	cDateSep,
715	pDateTime->year);
716	break;
717
718	case 1: // dd.mm.yyyy (e.g. German)
719	sprintf(pszDate, "%02d%c%02d%c%04d",
720	pDateTime->day,
721	cDateSep,
722	pDateTime->month,
723	cDateSep,
724	pDateTime->year);
725	break;
726
727	case 2: // yyyy.mm.dd (Japanese)
728	sprintf(pszDate, "%04d%c%02d%c%02d",
729	pDateTime->year,
730	cDateSep,
731	pDateTime->month,
732	cDateSep,
733	pDateTime->day);
734	break;
735
736	default: // yyyy.dd.mm
737	sprintf(pszDate, "%04d%c%02d%c%02d",
738	pDateTime->year,
739	cDateSep,
740	pDateTime->day,
741	cDateSep,
742	pDateTime->month);
743	break;
744	}
745	}
746
747	if (pszTime)
748	{
749	if (ulTimeFormat == 0)
750	{
751	// for 12-hour clock, we need additional INI data
752	CHAR szAMPM[10] = "err";
753
754	if (pDateTime->hours > 12)
755	{
756	// > 12h: PM.
757
758	// Note: 12:xx noon is 12 AM, not PM (even though
759	// AM stands for "ante meridiam", but English is just
760	// not logical), so that's handled below.
761
762	PrfQueryProfileString(HINI_USER,
763	"PM_National",
764	"s2359", // key
765	"PM", // default
766	szAMPM, sizeof(szAMPM)-1);
767	sprintf(pszTime, "%02d%c%02d%c%02d %s",
768	// leave 12 == 12 (not 0)
769	pDateTime->hours % 12,
770	cTimeSep,
771	pDateTime->minutes,
772	cTimeSep,
773	pDateTime->seconds,
774	szAMPM);
775	}
776	else
777	{
778	// <= 12h: AM
779	PrfQueryProfileString(HINI_USER,
780	"PM_National",
781	"s1159", // key
782	"AM", // default
783	szAMPM, sizeof(szAMPM)-1);
784	sprintf(pszTime, "%02d%c%02d%c%02d %s",
785	pDateTime->hours,
786	cTimeSep,
787	pDateTime->minutes,
788	cTimeSep,
789	pDateTime->seconds,
790	szAMPM);
791	}
792	}
793	else
794	// 24-hour clock
795	sprintf(pszTime, "%02d%c%02d%c%02d",
796	pDateTime->hours,
797	cTimeSep,
798	pDateTime->minutes,
799	cTimeSep,
800	pDateTime->seconds);
801	}
802	}
803
804	/*
805	*@@ strhGetWord:
806	* finds word boundaries.
807	*
808	* *ppszStart is used as the beginning of the
809	* search.
810	*
811	* If a word is found, *ppszStart is set to
812	* the first character of the word which was
813	* found and *ppszEnd receives the address
814	* of the first character _after_ the word,
815	* which is probably a space or a \n or \r char.
816	* We then return TRUE.
817	*
818	* The search is stopped if a null character
819	* is found or pLimit is reached. In that case,
820	* FALSE is returned.
821	*
822	*@@added V0.9.1 (2000-02-13) [umoeller]
823	*/
824
825	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
826	// out: start of word (if TRUE is returned)
827	const char pLimit, // in: ptr to last char after ppszStart to be
828	// searched; if the word does not end before
829	// or with this char, FALSE is returned
830	const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
831	const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
832	PSZ *ppszEnd) // out: first char _after_ word
833	// (if TRUE is returned)
834	{
835	// characters after which a word can be started
836	// const char *pcszBeginChars = "\x0d\x0a ";
837	// const char *pcszEndChars = "\x0d\x0a /-";
838
839	PSZ pStart = *ppszStart;
840
841	// find start of word
842	while ( (pStart < (PSZ)pLimit)
843	&& (strchr(pcszBeginChars, *pStart))
844	)
845	// if char is a "before word" char: go for next
846	pStart++;
847
848	if (pStart < (PSZ)pLimit)
849	{
850	// found a valid "word start" character
851	// (which is not in pcszBeginChars):
852
853	// find end of word
854	PSZ pEndOfWord = pStart;
855	while ( (pEndOfWord <= (PSZ)pLimit)
856	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
857	)
858	// if char is not an "end word" char: go for next
859	pEndOfWord++;
860
861	if (pEndOfWord <= (PSZ)pLimit)
862	{
863	// whoa, got a word:
864	*ppszStart = pStart;
865	*ppszEnd = pEndOfWord;
866	return (TRUE);
867	}
868	}
869
870	return (FALSE);
871	}
872
873	/*
874	*@@ strhIsWord:
875	* returns TRUE if p points to a "word"
876	* in pcszBuf.
877	*
878	* p is considered a word if the character _before_
879	* it is in pcszBeginChars and the char _after_
880	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
881	*
882	*@@added V0.9.6 (2000-11-12) [umoeller]
883	*/
884
885	BOOL strhIsWord(const char *pcszBuf,
886	const char *p, // in: start of word
887	ULONG cbSearch, // in: length of word
888	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
889	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
890	{
891	BOOL fEndOK = FALSE;
892
893	// check previous char
894	if ( (p == pcszBuf)
895	\|\| (strchr(pcszBeginChars, *(p-1)))
896	)
897	{
898	// OK, valid begin char:
899	// check end char
900	CHAR cNextChar = *(p + cbSearch);
901	if (cNextChar == 0)
902	fEndOK = TRUE;
903	else
904	{
905	char *pc = strchr(pcszEndChars, cNextChar);
906	if (pc)
907	// OK, is end char: avoid doubles of that char,
908	// but allow spaces
909	if ( (cNextChar+1 != *pc)
910	\|\| (cNextChar+1 == ' ')
911	\|\| (cNextChar+1 == 0)
912	)
913	fEndOK = TRUE;
914	}
915	}
916
917	return (fEndOK);
918	}
919
920	/*
921	*@@ strhFindWord:
922	* searches for pszSearch in pszBuf, which is
923	* returned if found (or NULL if not).
924	*
925	* As opposed to strstr, this finds pszSearch
926	* only if it is a "word". A search string is
927	* considered a word if the character _before_
928	* it is in pcszBeginChars and the char _after_
929	* it is in pcszEndChars.
930	*
931	* Example:
932	+ strhFindWord("This is an example.", "is");
933	+ returns ...........^ this, but not the "is" in "This".
934	*
935	* The algorithm here uses strstr to find pszSearch in pszBuf
936	* and performs additional "is-word" checks for each item found
937	* (by calling strhIsWord).
938	*
939	* Note that this function is fairly slow compared to xstrFindWord.
940	*
941	*@@added V0.9.0 (99-11-08) [umoeller]
942	*@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
943	*/
944
945	PSZ strhFindWord(const char *pszBuf,
946	const char *pszSearch,
947	const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
948	const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
949	{
950	PSZ pszReturn = 0;
951	ULONG cbBuf = strlen(pszBuf),
952	cbSearch = strlen(pszSearch);
953
954	if ((cbBuf) && (cbSearch))
955	{
956	const char *p = pszBuf;
957
958	do // while p
959	{
960	p = strstr(p, pszSearch);
961	if (p)
962	{
963	// string found:
964	// check if that's a word
965
966	if (strhIsWord(pszBuf,
967	p,
968	cbSearch,
969	pcszBeginChars,
970	pcszEndChars))
971	{
972	// valid end char:
973	pszReturn = (PSZ)p;
974	break;
975	}
976
977	p += cbSearch;
978	}
979	} while (p);
980
981	}
982	return (pszReturn);
983	}
984
985	/*
986	*@@ strhFindEOL:
987	* returns a pointer to the next \r, \n or null character
988	* following pszSearchIn. Stores the offset in *pulOffset.
989	*
990	* This should never return NULL because at some point,
991	* there will be a null byte in your string.
992	*
993	*@@added V0.9.4 (2000-07-01) [umoeller]
994	*/
995
996	PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
997	PULONG pulOffset) // out: offset (ptr can be NULL)
998	{
999	const char *p = pcszSearchIn,
1000	*prc = 0;
1001	while (TRUE)
1002	{
1003	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1004	{
1005	prc = p;
1006	break;
1007	}
1008	p++;
1009	}
1010
1011	if ((pulOffset) && (prc))
1012	*pulOffset = prc - pcszSearchIn;
1013
1014	return ((PSZ)prc);
1015	}
1016
1017	/*
1018	*@@ strhFindNextLine:
1019	* like strhFindEOL, but this returns the character
1020	* _after_ \r or \n. Note that this might return
1021	* a pointer to terminating NULL character also.
1022	*/
1023
1024	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1025	{
1026	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1027	// pEOL now points to the \r char or the terminating 0 byte;
1028	// if not null byte, advance pointer
1029	PSZ pNextLine = pEOL;
1030	if (*pNextLine == '\r')
1031	pNextLine++;
1032	if (*pNextLine == '\n')
1033	pNextLine++;
1034	if (pulOffset)
1035	*pulOffset = pNextLine - pszSearchIn;
1036	return (pNextLine);
1037	}
1038
1039	/*
1040	*@@ strhBeautifyTitle:
1041	* replaces all line breaks (0xd, 0xa) with spaces.
1042	*/
1043
1044	BOOL strhBeautifyTitle(PSZ psz)
1045	{
1046	BOOL rc = FALSE;
1047	CHAR *p;
1048	while ((p = strchr(psz, 0xa)))
1049	{
1050	*p = ' ';
1051	rc = TRUE;
1052	}
1053	while ((p = strchr(psz, 0xd)))
1054	{
1055	*p = ' ';
1056	rc = TRUE;
1057	}
1058	return (rc);
1059	}
1060
1061	/*
1062	* strhFindAttribValue:
1063	* searches for pszAttrib in pszSearchIn; if found,
1064	* returns the first character after the "=" char.
1065	* If "=" is not found, a space, \r, and \n are
1066	* also accepted. This function searches without
1067	* respecting case.
1068	*
1069	* <B>Example:</B>
1070	+ strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1071	+
1072	+ returns ....................... ^ this address.
1073	*
1074	*@@added V0.9.0 [umoeller]
1075	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1076	*/
1077
1078	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1079	{
1080	PSZ prc = 0;
1081	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1082	p,
1083	p2;
1084	ULONG cbAttrib = strlen(pszAttrib);
1085
1086	// 1) find space char
1087	while ((p = strchr(pszSearchIn2, ' ')))
1088	{
1089	CHAR c;
1090	p++;
1091	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1092	// now check whether the p+strlen(pszAttrib)
1093	// is a valid end-of-tag character
1094	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1095	&& ( (c == ' ')
1096	\|\| (c == '>')
1097	\|\| (c == '=')
1098	\|\| (c == '\r')
1099	\|\| (c == '\n')
1100	\|\| (c == 0)
1101	)
1102	)
1103	{
1104	// yes:
1105	CHAR c2;
1106	p2 = p + cbAttrib;
1107	c2 = *p2;
1108	while ( ( (c2 == ' ')
1109	\|\| (c2 == '=')
1110	\|\| (c2 == '\n')
1111	\|\| (c2 == '\r')
1112	)
1113	&& (c2 != 0)
1114	)
1115	c2 = *++p2;
1116	prc = p2;
1117	break; // first while
1118	}
1119	pszSearchIn2++;
1120	}
1121	return (prc);
1122	}
1123
1124	/*
1125	* strhGetNumAttribValue:
1126	* stores the numerical parameter value of an HTML-style
1127	* tag in *pl.
1128	*
1129	* Returns the address of the tag parameter in the
1130	* search buffer, if found, or NULL.
1131	*
1132	* <B>Example:</B>
1133	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1134	*
1135	* stores 123 in the "l" variable.
1136	*
1137	*@@added V0.9.0 [umoeller]
1138	*/
1139
1140	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1141	const char *pszTag, // e.g. "INDEX"
1142	PLONG pl) // out: numerical value
1143	{
1144	PSZ pParam;
1145	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1146	sscanf(pParam, "%ld", pl);
1147
1148	return (pParam);
1149	}
1150
1151	/*
1152	* strhGetTextAttr:
1153	* retrieves the attribute value of a textual HTML-style tag
1154	* in a newly allocated buffer, which is returned,
1155	* or NULL if attribute not found.
1156	* If an attribute value is to contain spaces, it
1157	* must be enclosed in quotes.
1158	*
1159	* The offset of the attribute data in pszSearchIn is
1160	* returned in *pulOffset so that you can do multiple
1161	* searches.
1162	*
1163	* This returns a new buffer, which should be free()'d after use.
1164	*
1165	* <B>Example:</B>
1166	+ ULONG ulOfs = 0;
1167	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1168	+ ............^ ulOfs
1169	*
1170	* returns a new string with the value "blublub" (without
1171	* quotes) and sets ulOfs to 12.
1172	*
1173	*@@added V0.9.0 [umoeller]
1174	*/
1175
1176	PSZ strhGetTextAttr(const char *pszSearchIn,
1177	const char *pszTag,
1178	PULONG pulOffset) // out: offset where found
1179	{
1180	PSZ pParam,
1181	pParam2,
1182	prc = NULL;
1183	ULONG ulCount = 0;
1184	LONG lNestingLevel = 0;
1185
1186	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1187	{
1188	// determine end character to search for: a space
1189	CHAR cEnd = ' ';
1190	if (*pParam == '\"')
1191	{
1192	// or, if the data is enclosed in quotes, a quote
1193	cEnd = '\"';
1194	pParam++;
1195	}
1196
1197	if (pulOffset)
1198	// store the offset
1199	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1200
1201	// now find end of attribute
1202	pParam2 = pParam;
1203	while (*pParam)
1204	{
1205	if (*pParam == cEnd)
1206	// end character found
1207	break;
1208	else if (*pParam == '<')
1209	// yet another opening tag found:
1210	// this is probably some "<" in the attributes
1211	lNestingLevel++;
1212	else if (*pParam == '>')
1213	{
1214	lNestingLevel--;
1215	if (lNestingLevel < 0)
1216	// end of tag found:
1217	break;
1218	}
1219	ulCount++;
1220	pParam++;
1221	}
1222
1223	// copy attribute to new buffer
1224	if (ulCount)
1225	{
1226	prc = (PSZ)malloc(ulCount+1);
1227	memcpy(prc, pParam2, ulCount);
1228	*(prc+ulCount) = 0;
1229	}
1230	}
1231	return (prc);
1232	}
1233
1234	/*
1235	* strhFindEndOfTag:
1236	* returns a pointer to the ">" char
1237	* which seems to terminate the tag beginning
1238	* after pszBeginOfTag.
1239	*
1240	* If additional "<" chars are found, we look
1241	* for additional ">" characters too.
1242	*
1243	* Note: You must pass the address of the opening
1244	* '<' character to this function.
1245	*
1246	* Example:
1247	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1248	+ strhFindEndOfTag(pszTest)
1249	+ returns.................................^ this.
1250	*
1251	*@@added V0.9.0 [umoeller]
1252	*/
1253
1254	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1255	{
1256	PSZ p = (PSZ)pszBeginOfTag,
1257	prc = NULL;
1258	LONG lNestingLevel = 0;
1259
1260	while (*p)
1261	{
1262	if (*p == '<')
1263	// another opening tag found:
1264	lNestingLevel++;
1265	else if (*p == '>')
1266	{
1267	// closing tag found:
1268	lNestingLevel--;
1269	if (lNestingLevel < 1)
1270	{
1271	// corresponding: return this
1272	prc = p;
1273	break;
1274	}
1275	}
1276	p++;
1277	}
1278
1279	return (prc);
1280	}
1281
1282	/*
1283	* strhGetBlock:
1284	* this complex function searches the given string
1285	* for a pair of opening/closing HTML-style tags.
1286	*
1287	* If found, this routine returns TRUE and does
1288	* the following:
1289	*
1290	* 1) allocate a new buffer, copy the text
1291	* enclosed by the opening/closing tags
1292	* into it and set *ppszBlock to that
1293	* buffer;
1294	*
1295	* 2) if the opening tag has any attributes,
1296	* allocate another buffer, copy the
1297	* attributes into it and set *ppszAttrs
1298	* to that buffer; if no attributes are
1299	* found, *ppszAttrs will be NULL;
1300	*
1301	* 3) set *pulOffset to the offset from the
1302	* beginning of *ppszSearchIn where the
1303	* opening tag was found;
1304	*
1305	* 4) advance *ppszSearchIn to after the
1306	* closing tag, so that you can do
1307	* multiple searches without finding the
1308	* same tags twice.
1309	*
1310	* All buffers should be freed using free().
1311	*
1312	* This returns the following:
1313	* -- 0: no error
1314	* -- 1: tag not found at all (doesn't have to be an error)
1315	* -- 2: begin tag found, but no corresponding end tag found. This
1316	* is a real error.
1317	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1318	*
1319	* <B>Example:</B>
1320	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1321	+ PSZ pszBlock, pszAttrs;
1322	+ ULONG ulOfs;
1323	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1324	*
1325	* would do the following:
1326	*
1327	* 1) set pszBlock to a new string containing "This is page 1."
1328	* without quotes;
1329	*
1330	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1331	*
1332	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1333	*
1334	* 4) pSearch would be advanced to point to the "More text"
1335	* string in the original buffer.
1336	*
1337	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1338	* for HTML parsing.
1339	*
1340	*@@added V0.9.0 [umoeller]
1341	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1342	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1343	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1344	*/
1345
1346	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1347	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1348	PSZ pszTag,
1349	PSZ *ppszBlock, // out: block enclosed by the tags
1350	PSZ *ppszAttribs, // out: attributes of the opening tag
1351	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1352	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1353	{
1354	ULONG ulrc = 1;
1355	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1356	pszSearch2 = pszBeginTag,
1357	pszClosingTag;
1358	ULONG cbTag = strlen(pszTag);
1359
1360	// go thru the block and check all tags if it's the
1361	// begin tag we're looking for
1362	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1363	{
1364	if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1365	// yes: stop
1366	break;
1367	else
1368	pszBeginTag++;
1369	}
1370
1371	if (pszBeginTag)
1372	{
1373	// we found <TAG>:
1374	ULONG ulNestingLevel = 0;
1375
1376	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1377	// strchr(pszBeginTag, '>');
1378	if (pszEndOfBeginTag)
1379	{
1380	// does the caller want the attributes?
1381	if (ppszAttribs)
1382	{
1383	// yes: then copy them
1384	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1385	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1386	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1387	// add terminating 0
1388	*(pszAttrs + ulAttrLen) = 0;
1389
1390	*ppszAttribs = pszAttrs;
1391	}
1392
1393	// output offset of where we found the begin tag
1394	if (pulOfsBeginTag)
1395	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1396
1397	// now find corresponding closing tag (e.g. "</BODY>"
1398	pszBeginTag = pszEndOfBeginTag+1;
1399	// now we're behind the '>' char of the opening tag
1400	// increase offset of that too
1401	if (pulOfsBeginBlock)
1402	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1403
1404	// find next closing tag;
1405	// for the first run, pszSearch2 points to right
1406	// after the '>' char of the opening tag
1407	pszSearch2 = pszBeginTag;
1408	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1409	&& (pszClosingTag = strstr(pszSearch2, "<"))
1410	)
1411	{
1412	// if we have another opening tag before our closing
1413	// tag, we need to have several closing tags before
1414	// we're done
1415	if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1416	ulNestingLevel++;
1417	else
1418	{
1419	// is this ours?
1420	if ( (*(pszClosingTag+1) == '/')
1421	&& (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1422	)
1423	{
1424	// we've found a matching closing tag; is
1425	// it ours?
1426	if (ulNestingLevel == 0)
1427	{
1428	// our closing tag found:
1429	// allocate mem for a new buffer
1430	// and extract all the text between
1431	// open and closing tags to it
1432	ULONG ulLen = pszClosingTag - pszBeginTag;
1433	if (ppszBlock)
1434	{
1435	PSZ pNew = (PSZ)malloc(ulLen + 1);
1436	strhncpy0(pNew, pszBeginTag, ulLen);
1437	*ppszBlock = pNew;
1438	}
1439
1440	// raise search offset to after the closing tag
1441	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1442
1443	ulrc = 0;
1444
1445	break;
1446	} else
1447	// not our closing tag:
1448	ulNestingLevel--;
1449	}
1450	}
1451	// no matching closing tag: search on after that
1452	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1453	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1454
1455	if (!pszClosingTag)
1456	// no matching closing tag found:
1457	// return 2 (closing tag not found)
1458	ulrc = 2;
1459	} // end if (pszBeginTag)
1460	else
1461	// no matching ">" for opening tag found:
1462	ulrc = 3;
1463	}
1464
1465	return (ulrc);
1466	}
1467
1468	/* ******************************************************************
1469	*
1470	* Miscellaneous
1471	*
1472	********************************************************************/
1473
1474	/*
1475	*@@ strhArrayAppend:
1476	* this appends a string to a "string array".
1477	*
1478	* A string array is considered a sequence of
1479	* zero-terminated strings in memory. That is,
1480	* after each string's null-byte, the next
1481	* string comes up.
1482	*
1483	* This is useful for composing a single block
1484	* of memory from, say, list box entries, which
1485	* can then be written to OS2.INI in one flush.
1486	*
1487	* To append strings to such an array, call this
1488	* function for each string you wish to append.
1489	* This will re-allocate *ppszRoot with each call,
1490	* and update *pcbRoot, which then contains the
1491	* total size of all strings (including all null
1492	* terminators).
1493	*
1494	* Pass *pcbRoot to PrfSaveProfileData to have the
1495	* block saved.
1496	*
1497	* Note: On the first call, ppszRoot and pcbRoot
1498	* _must_ be both NULL, or this crashes.
1499	*/
1500
1501	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1502	const char *pcszNew, // in: string to append
1503	PULONG pcbRoot) // in/out: size of array
1504	{
1505	ULONG cbNew = strlen(pcszNew);
1506	PSZ pszTemp = (PSZ)malloc(*pcbRoot
1507	+ cbNew
1508	+ 1); // two null bytes
1509	if (*ppszRoot)
1510	{
1511	// not first loop: copy old stuff
1512	memcpy(pszTemp,
1513	*ppszRoot,
1514	*pcbRoot);
1515	free(*ppszRoot);
1516	}
1517	// append new string
1518	strcpy(pszTemp + *pcbRoot,
1519	pcszNew);
1520	// update root
1521	*ppszRoot = pszTemp;
1522	// update length
1523	*pcbRoot += cbNew + 1;
1524	}
1525
1526	/*
1527	*@@ strhCreateDump:
1528	* this dumps a memory block into a string
1529	* and returns that string in a new buffer.
1530	*
1531	* You must free() the returned PSZ after use.
1532	*
1533	* The output looks like the following:
1534	*
1535	+ 0000: FE FF 0E 02 90 00 00 00 ........
1536	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1537	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1538	*
1539	* Each line is terminated with a newline (\n)
1540	* character only.
1541	*
1542	*@@added V0.9.1 (2000-01-22) [umoeller]
1543	*/
1544
1545	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1546	ULONG ulSize, // in: size of buffer
1547	ULONG ulIndent) // in: indentation of every line
1548	{
1549	PSZ pszReturn = 0;
1550	XSTRING strReturn;
1551	CHAR szTemp[1000];
1552
1553	PBYTE pbCurrent = pb; // current byte
1554	ULONG ulCount = 0,
1555	ulCharsInLine = 0; // if this grows > 7, a new line is started
1556	CHAR szLine[400] = "",
1557	szAscii[30] = " "; // ASCII representation; filled for every line
1558	PSZ pszLine = szLine,
1559	pszAscii = szAscii;
1560
1561	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1562
1563	for (pbCurrent = pb;
1564	ulCount < ulSize;
1565	pbCurrent++, ulCount++)
1566	{
1567	if (ulCharsInLine == 0)
1568	{
1569	memset(szLine, ' ', ulIndent);
1570	pszLine += ulIndent;
1571	}
1572	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1573
1574	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1575	// printable character:
1576	pszAscii = pbCurrent;
1577	else
1578	*pszAscii = '.';
1579	pszAscii++;
1580
1581	ulCharsInLine++;
1582	if ( (ulCharsInLine > 7) // 8 bytes added?
1583	\|\| (ulCount == ulSize-1) // end of buffer reached?
1584	)
1585	{
1586	// if we haven't had eight bytes yet,
1587	// fill buffer up to eight bytes with spaces
1588	ULONG ul2;
1589	for (ul2 = ulCharsInLine;
1590	ul2 < 8;
1591	ul2++)
1592	pszLine += sprintf(pszLine, " ");
1593
1594	sprintf(szTemp, "%04lX: %s %s\n",
1595	(ulCount & 0xFFFFFFF8), // offset in hex
1596	szLine, // bytes string
1597	szAscii); // ASCII string
1598	xstrcat(&strReturn, szTemp, 0);
1599
1600	// restart line buffer
1601	pszLine = szLine;
1602
1603	// clear ASCII buffer
1604	strcpy(szAscii, " ");
1605	pszAscii = szAscii;
1606
1607	// reset line counter
1608	ulCharsInLine = 0;
1609	}
1610	}
1611
1612	if (strReturn.cbAllocated)
1613	pszReturn = strReturn.psz;
1614
1615	return (pszReturn);
1616	}
1617
1618	/* ******************************************************************
1619	*
1620	* Wildcard matching
1621	*
1622	********************************************************************/
1623
1624	/*
1625	* The following code has been taken from "fnmatch.zip".
1626	*
1627	* (c) 1994-1996 by Eberhard Mattes.
1628	*/
1629
1630	/* In OS/2 and DOS styles, both / and \ separate components of a path.
1631	* This macro returns true iff C is a separator. */
1632
1633	#define IS_OS2_COMP_SEP(C) ((C) == '/' \|\| (C) == '\\')
1634
1635
1636	/* This macro returns true if C is at the end of a component of a
1637	* path. */
1638
1639	#define IS_OS2_COMP_END(C) ((C) == 0 \|\| IS_OS2_COMP_SEP (C))
1640
1641	/*
1642	* skip_comp_os2:
1643	* Return a pointer to the next component of the path SRC, for OS/2
1644	* and DOS styles. When the end of the string is reached, a pointer
1645	* to the terminating null character is returned.
1646	*
1647	* (c) 1994-1996 by Eberhard Mattes.
1648	*/
1649
1650	static const unsigned char* skip_comp_os2(const unsigned char *src)
1651	{
1652	/* Skip characters until hitting a separator or the end of the
1653	* string. */
1654
1655	while (!IS_OS2_COMP_END(*src))
1656	++src;
1657
1658	/* Skip the separator if we hit a separator. */
1659
1660	if (*src != 0)
1661	++src;
1662	return src;
1663	}
1664
1665	/*
1666	* has_colon:
1667	* returns true iff the path P contains a colon.
1668	*
1669	* (c) 1994-1996 by Eberhard Mattes.
1670	*/
1671
1672	static int has_colon(const unsigned char *p)
1673	{
1674	while (*p != 0)
1675	if (*p == ':')
1676	return 1;
1677	else
1678	++p;
1679	return 0;
1680	}
1681
1682	/*
1683	* match_comp_os2:
1684	* Compare a single component (directory name or file name) of the
1685	* paths, for OS/2 and DOS styles. MASK and NAME point into a
1686	* component of the wildcard and the name to be checked, respectively.
1687	* Comparing stops at the next separator. The FLAGS argument is the
1688	* same as that of fnmatch(). HAS_DOT is true if a dot is in the
1689	* current component of NAME. The number of dots is not restricted,
1690	* even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1691	* Note that this function is recursive.
1692	*
1693	* (c) 1994-1996 by Eberhard Mattes.
1694	*/
1695
1696	static int match_comp_os2(const unsigned char *mask,
1697	const unsigned char *name,
1698	unsigned flags,
1699	int has_dot)
1700	{
1701	int rc;
1702
1703	for (;;)
1704	switch (*mask)
1705	{
1706	case 0:
1707
1708	/* There must be no extra characters at the end of NAME when
1709	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1710	* in that case, NAME may point to a separator. */
1711
1712	if (*name == 0)
1713	return FNM_MATCH;
1714	if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1715	return FNM_MATCH;
1716	return FNM_NOMATCH;
1717
1718	case '/':
1719	case '\\':
1720
1721	/* Separators match separators. */
1722
1723	if (IS_OS2_COMP_SEP(*name))
1724	return FNM_MATCH;
1725
1726	/* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1727	* is ignored at the end of NAME. */
1728
1729	if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1730	return FNM_MATCH;
1731
1732	/* Stop comparing at the separator. */
1733
1734	return FNM_NOMATCH;
1735
1736	case '?':
1737
1738	/* A question mark matches one character. It does not match
1739	* a dot. At the end of the component (and before a dot),
1740	* it also matches zero characters. */
1741
1742	if (name != '.' && !IS_OS2_COMP_END(name))
1743	++name;
1744	++mask;
1745	break;
1746
1747	case '*':
1748
1749	/* An asterisk matches zero or more characters. In DOS
1750	* mode, dots are not matched. */
1751
1752	do
1753	{
1754	++mask;
1755	}
1756	while (mask == '');
1757	for (;;)
1758	{
1759	rc = match_comp_os2(mask, name, flags, has_dot);
1760	if (rc != FNM_NOMATCH)
1761	return rc;
1762	if (IS_OS2_COMP_END(*name))
1763	return FNM_NOMATCH;
1764	if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1765	return FNM_NOMATCH;
1766	++name;
1767	}
1768
1769	case '.':
1770
1771	/* A dot matches a dot. It also matches the implicit dot at
1772	* the end of a dot-less NAME. */
1773
1774	++mask;
1775	if (*name == '.')
1776	++name;
1777	else if (has_dot \|\| !IS_OS2_COMP_END(*name))
1778	return FNM_NOMATCH;
1779	break;
1780
1781	default:
1782
1783	/* All other characters match themselves. */
1784
1785	if (flags & _FNM_IGNORECASE)
1786	{
1787	if (tolower(mask) != tolower(name))
1788	return FNM_NOMATCH;
1789	}
1790	else
1791	{
1792	if (mask != name)
1793	return FNM_NOMATCH;
1794	}
1795	++mask;
1796	++name;
1797	break;
1798	}
1799	}
1800
1801	/*
1802	* match_comp:
1803	* compare a single component (directory name or file name) of the
1804	* paths, for all styles which need component-by-component matching.
1805	* MASK and NAME point to the start of a component of the wildcard and
1806	* the name to be checked, respectively. Comparing stops at the next
1807	* separator. The FLAGS argument is the same as that of fnmatch().
1808	* Return FNM_MATCH iff MASK and NAME match.
1809	*
1810	* (c) 1994-1996 by Eberhard Mattes.
1811	*/
1812
1813	static int match_comp(const unsigned char *mask,
1814	const unsigned char *name,
1815	unsigned flags)
1816	{
1817	const unsigned char *s;
1818
1819	switch (flags & _FNM_STYLE_MASK)
1820	{
1821	case _FNM_OS2:
1822	case _FNM_DOS:
1823
1824	/* For OS/2 and DOS styles, we add an implicit dot at the end of
1825	* the component if the component doesn't include a dot. */
1826
1827	s = name;
1828	while (!IS_OS2_COMP_END(s) && s != '.')
1829	++s;
1830	return match_comp_os2(mask, name, flags, *s == '.');
1831
1832	default:
1833	return FNM_ERR;
1834	}
1835	}
1836
1837	/* In Unix styles, / separates components of a path. This macro
1838	* returns true iff C is a separator. */
1839
1840	#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1841
1842
1843	/* This macro returns true if C is at the end of a component of a
1844	* path. */
1845
1846	#define IS_UNIX_COMP_END(C) ((C) == 0 \|\| IS_UNIX_COMP_SEP (C))
1847
1848	/*
1849	* match_unix:
1850	* match complete paths for Unix styles. The FLAGS argument is the
1851	* same as that of fnmatch(). COMP points to the start of the current
1852	* component in NAME. Return FNM_MATCH iff MASK and NAME match. The
1853	* backslash character is used for escaping ? and * unless
1854	* FNM_NOESCAPE is set.
1855	*
1856	* (c) 1994-1996 by Eberhard Mattes.
1857	*/
1858
1859	static int match_unix(const unsigned char *mask,
1860	const unsigned char *name,
1861	unsigned flags,
1862	const unsigned char *comp)
1863	{
1864	unsigned char c1, c2;
1865	char invert, matched;
1866	const unsigned char *start;
1867	int rc;
1868
1869	for (;;)
1870	switch (*mask)
1871	{
1872	case 0:
1873
1874	/* There must be no extra characters at the end of NAME when
1875	* reaching the end of MASK unless _FNM_PATHPREFIX is set:
1876	* in that case, NAME may point to a separator. */
1877
1878	if (*name == 0)
1879	return FNM_MATCH;
1880	if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1881	return FNM_MATCH;
1882	return FNM_NOMATCH;
1883
1884	case '?':
1885
1886	/* A question mark matches one character. It does not match
1887	* the component separator if FNM_PATHNAME is set. It does
1888	* not match a dot at the start of a component if FNM_PERIOD
1889	* is set. */
1890
1891	if (*name == 0)
1892	return FNM_NOMATCH;
1893	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1894	return FNM_NOMATCH;
1895	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1896	return FNM_NOMATCH;
1897	++mask;
1898	++name;
1899	break;
1900
1901	case '*':
1902
1903	/* An asterisk matches zero or more characters. It does not
1904	* match the component separator if FNM_PATHNAME is set. It
1905	* does not match a dot at the start of a component if
1906	* FNM_PERIOD is set. */
1907
1908	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1909	return FNM_NOMATCH;
1910	do
1911	{
1912	++mask;
1913	}
1914	while (mask == '');
1915	for (;;)
1916	{
1917	rc = match_unix(mask, name, flags, comp);
1918	if (rc != FNM_NOMATCH)
1919	return rc;
1920	if (*name == 0)
1921	return FNM_NOMATCH;
1922	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1923	return FNM_NOMATCH;
1924	++name;
1925	}
1926
1927	case '/':
1928
1929	/* Separators match only separators. If _FNM_PATHPREFIX is
1930	* set, a trailing separator in MASK is ignored at the end
1931	* of NAME. */
1932
1933	if (!(IS_UNIX_COMP_SEP(*name)
1934	\|\| ((flags & _FNM_PATHPREFIX) && *name == 0
1935	&& (mask[1] == 0
1936	\|\| (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1937	&& mask[2] == 0)))))
1938	return FNM_NOMATCH;
1939
1940	++mask;
1941	if (*name != 0)
1942	++name;
1943
1944	/* This is the beginning of a new component if FNM_PATHNAME
1945	* is set. */
1946
1947	if (flags & FNM_PATHNAME)
1948	comp = name;
1949	break;
1950
1951	case '[':
1952
1953	/* A set of characters. Always case-sensitive. */
1954
1955	if (*name == 0)
1956	return FNM_NOMATCH;
1957	if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1958	return FNM_NOMATCH;
1959	if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1960	return FNM_NOMATCH;
1961
1962	invert = 0;
1963	matched = 0;
1964	++mask;
1965
1966	/* If the first character is a ! or ^, the set matches all
1967	* characters not listed in the set. */
1968
1969	if (mask == '!' \|\| mask == '^')
1970	{
1971	++mask;
1972	invert = 1;
1973	}
1974
1975	/* Loop over all the characters of the set. The loop ends
1976	* if the end of the string is reached or if a ] is
1977	* encountered unless it directly follows the initial [ or
1978	* [-. */
1979
1980	start = mask;
1981	while (!(mask == 0 \|\| (mask == ']' && mask != start)))
1982	{
1983	/* Get the next character which is optionally preceded
1984	* by a backslash. */
1985
1986	c1 = *mask++;
1987	if (!(flags & FNM_NOESCAPE) && c1 == '\\')
1988	{
1989	if (*mask == 0)
1990	break;
1991	c1 = *mask++;
1992	}
1993
1994	/* Ranges of characters are written as a-z. Don't
1995	* forget to check for the end of the string and to
1996	* handle the backslash. If the character after - is a
1997	* ], it isn't a range. */
1998
1999	if (*mask == '-' && mask[1] != ']')
2000	{
2001	++mask; /* Skip the - character */
2002	if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2003	++mask;
2004	if (*mask == 0)
2005	break;
2006	c2 = *mask++;
2007	}
2008	else
2009	c2 = c1;
2010
2011	/* Now check whether this character or range matches NAME. */
2012
2013	if (c1 <= name && name <= c2)
2014	matched = 1;
2015	}
2016
2017	/* If the end of the string is reached before a ] is found,
2018	* back up to the [ and compare it to NAME. */
2019
2020	if (*mask == 0)
2021	{
2022	if (*name != '[')
2023	return FNM_NOMATCH;
2024	++name;
2025	mask = start;
2026	if (invert)
2027	--mask;
2028	}
2029	else
2030	{
2031	if (invert)
2032	matched = !matched;
2033	if (!matched)
2034	return FNM_NOMATCH;
2035	++mask; /* Skip the ] character */
2036	if (*name != 0)
2037	++name;
2038	}
2039	break;
2040
2041	case '\\':
2042	++mask;
2043	if (flags & FNM_NOESCAPE)
2044	{
2045	if (*name != '\\')
2046	return FNM_NOMATCH;
2047	++name;
2048	}
2049	else if (mask == '' \|\| *mask == '?')
2050	{
2051	if (mask != name)
2052	return FNM_NOMATCH;
2053	++mask;
2054	++name;
2055	}
2056	break;
2057
2058	default:
2059
2060	/* All other characters match themselves. */
2061
2062	if (flags & _FNM_IGNORECASE)
2063	{
2064	if (tolower(mask) != tolower(name))
2065	return FNM_NOMATCH;
2066	}
2067	else
2068	{
2069	if (mask != name)
2070	return FNM_NOMATCH;
2071	}
2072	++mask;
2073	++name;
2074	break;
2075	}
2076	}
2077
2078	/*
2079	* _fnmatch_unsigned:
2080	* Check whether the path name NAME matches the wildcard MASK.
2081	*
2082	* Return:
2083	* -- 0 (FNM_MATCH) if it matches,
2084	* -- _FNM_NOMATCH if it doesn't,
2085	* -- FNM_ERR on error.
2086	*
2087	* The operation of this function is controlled by FLAGS.
2088	* This is an internal function, with unsigned arguments.
2089	*
2090	* (c) 1994-1996 by Eberhard Mattes.
2091	*/
2092
2093	static int _fnmatch_unsigned(const unsigned char *mask,
2094	const unsigned char *name,
2095	unsigned flags)
2096	{
2097	int m_drive, n_drive,
2098	rc;
2099
2100	/* Match and skip the drive name if present. */
2101
2102	m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2103	n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2104
2105	if (m_drive != n_drive)
2106	{
2107	if (m_drive == -1 \|\| n_drive == -1)
2108	return FNM_NOMATCH;
2109	if (!(flags & _FNM_IGNORECASE))
2110	return FNM_NOMATCH;
2111	if (tolower(m_drive) != tolower(n_drive))
2112	return FNM_NOMATCH;
2113	}
2114
2115	if (m_drive != -1)
2116	mask += 2;
2117	if (n_drive != -1)
2118	name += 2;
2119
2120	/* Colons are not allowed in path names, except for the drive name,
2121	* which was skipped above. */
2122
2123	if (has_colon(mask) \|\| has_colon(name))
2124	return FNM_ERR;
2125
2126	/* The name "\\server\path" should not be matched by mask
2127	* "\\server\path". Ditto for /. /
2128
2129	switch (flags & _FNM_STYLE_MASK)
2130	{
2131	case _FNM_OS2:
2132	case _FNM_DOS:
2133
2134	if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2135	{
2136	if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2137	return FNM_NOMATCH;
2138	name += 2;
2139	mask += 2;
2140	}
2141	break;
2142
2143	case _FNM_POSIX:
2144
2145	if (name[0] == '/' && name[1] == '/')
2146	{
2147	int i;
2148
2149	name += 2;
2150	for (i = 0; i < 2; ++i)
2151	if (mask[0] == '/')
2152	++mask;
2153	else if (mask[0] == '\\' && mask[1] == '/')
2154	mask += 2;
2155	else
2156	return FNM_NOMATCH;
2157	}
2158
2159	/* In Unix styles, treating ? and * w.r.t. components is simple.
2160	* No need to do matching component by component. */
2161
2162	return match_unix(mask, name, flags, name);
2163	}
2164
2165	/* Now compare all the components of the path name, one by one.
2166	* Note that the path separator must not be enclosed in brackets. */
2167
2168	while (mask != 0 \|\| name != 0)
2169	{
2170
2171	/* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2172	* is reached even if there are components left in NAME. */
2173
2174	if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2175	return FNM_MATCH;
2176
2177	/* Compare a single component of the path name. */
2178
2179	rc = match_comp(mask, name, flags);
2180	if (rc != FNM_MATCH)
2181	return rc;
2182
2183	/* Skip to the next component or to the end of the path name. */
2184
2185	mask = skip_comp_os2(mask);
2186	name = skip_comp_os2(name);
2187	}
2188
2189	/* If we reached the ends of both strings, the names match. */
2190
2191	if (mask == 0 && name == 0)
2192	return FNM_MATCH;
2193
2194	/* The names do not match. */
2195
2196	return FNM_NOMATCH;
2197	}
2198
2199	/*
2200	*@@ strhMatchOS2:
2201	* this matches wildcards, similar to what DosEditName does.
2202	* However, this does not require a file to be present, but
2203	* works on strings only.
2204	*/
2205
2206	BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2207	const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2208	{
2209	return ((BOOL)(_fnmatch_unsigned(pcszMask,
2210	pcszName,
2211	_FNM_OS2 \| _FNM_IGNORECASE)
2212	== FNM_MATCH)
2213	);
2214	}
2215
2216	/* ******************************************************************
2217	*
2218	* Fast string searches
2219	*
2220	********************************************************************/
2221
2222	#define ASSERT(a)
2223
2224	/*
2225	* The following code has been taken from the "Standard
2226	* Function Library", file sflfind.c, and only slightly
2227	* modified to conform to the rest of this file.
2228	*
2229	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2230	* Revised: 98/05/04
2231	*
2232	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2233	*
2234	* The SFL Licence allows incorporating SFL code into other
2235	* programs, as long as the copyright is reprinted and the
2236	* code is marked as modified, so this is what we do.
2237	*/
2238
2239	/*
2240	*@@ strhmemfind:
2241	* searches for a pattern in a block of memory using the
2242	* Boyer-Moore-Horspool-Sunday algorithm.
2243	*
2244	* The block and pattern may contain any values; you must
2245	* explicitly provide their lengths. If you search for strings,
2246	* use strlen() on the buffers.
2247	*
2248	* Returns a pointer to the pattern if found within the block,
2249	* or NULL if the pattern was not found.
2250	*
2251	* This algorithm needs a "shift table" to cache data for the
2252	* search pattern. This table can be reused when performing
2253	* several searches with the same pattern.
2254	*
2255	* "shift" must point to an array big enough to hold 256 (8**2)
2256	* "size_t" values.
2257	*
2258	* If (*repeat_find == FALSE), the shift table is initialized.
2259	* So on the first search with a given pattern, *repeat_find
2260	* should be FALSE. This function sets it to TRUE after the
2261	* shift table is initialised, allowing the initialisation
2262	* phase to be skipped on subsequent searches.
2263	*
2264	* This function is most effective when repeated searches are
2265	* made for the same pattern in one or more large buffers.
2266	*
2267	* Example:
2268	*
2269	+ PSZ pszHaystack = "This is a sample string.",
2270	+ pszNeedle = "string";
2271	+ size_t shift[256];
2272	+ BOOL fRepeat = FALSE;
2273	+
2274	+ PSZ pFound = strhmemfind(pszHaystack,
2275	+ strlen(pszHaystack), // block size
2276	+ pszNeedle,
2277	+ strlen(pszNeedle), // pattern size
2278	+ shift,
2279	+ &fRepeat);
2280	*
2281	* Taken from the "Standard Function Library", file sflfind.c.
2282	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2283	* Slightly modified by umoeller.
2284	*
2285	*@@added V0.9.3 (2000-05-08) [umoeller]
2286	*/
2287
2288	void* strhmemfind(const void *in_block, // in: block containing data
2289	size_t block_size, // in: size of block in bytes
2290	const void *in_pattern, // in: pattern to search for
2291	size_t pattern_size, // in: size of pattern block
2292	size_t *shift, // in/out: shift table (search buffer)
2293	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
2294	{
2295	size_t byte_nbr, // Distance through block
2296	match_size; // Size of matched part
2297	const unsigned char
2298	*match_base = NULL, // Base of match of pattern
2299	*match_ptr = NULL, // Point within current match
2300	*limit = NULL; // Last potiental match point
2301	const unsigned char
2302	block = (unsigned char ) in_block, // Concrete pointer to block data
2303	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
2304
2305	if ( (block == NULL)
2306	\|\| (pattern == NULL)
2307	\|\| (shift == NULL)
2308	)
2309	return (NULL);
2310
2311	// Pattern must be smaller or equal in size to string
2312	if (block_size < pattern_size)
2313	return (NULL); // Otherwise it's not found
2314
2315	if (pattern_size == 0) // Empty patterns match at start
2316	return ((void *)block);
2317
2318	// Build the shift table unless we're continuing a previous search
2319
2320	// The shift table determines how far to shift before trying to match
2321	// again, if a match at this point fails. If the byte after where the
2322	// end of our pattern falls is not in our pattern, then we start to
2323	// match again after that byte; otherwise we line up the last occurence
2324	// of that byte in our pattern under that byte, and try match again.
2325
2326	if (!repeat_find \|\| !*repeat_find)
2327	{
2328	for (byte_nbr = 0;
2329	byte_nbr < 256;
2330	byte_nbr++)
2331	shift[byte_nbr] = pattern_size + 1;
2332	for (byte_nbr = 0;
2333	byte_nbr < pattern_size;
2334	byte_nbr++)
2335	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2336
2337	if (repeat_find)
2338	*repeat_find = TRUE;
2339	}
2340
2341	// Search for the block, each time jumping up by the amount
2342	// computed in the shift table
2343
2344	limit = block + (block_size - pattern_size + 1);
2345	ASSERT (limit > block);
2346
2347	for (match_base = block;
2348	match_base < limit;
2349	match_base += shift[*(match_base + pattern_size)])
2350	{
2351	match_ptr = match_base;
2352	match_size = 0;
2353
2354	// Compare pattern until it all matches, or we find a difference
2355	while (*match_ptr++ == pattern[match_size++])
2356	{
2357	ASSERT (match_size <= pattern_size &&
2358	match_ptr == (match_base + match_size));
2359
2360	// If we found a match, return the start address
2361	if (match_size >= pattern_size)
2362	return ((void*)(match_base));
2363
2364	}
2365	}
2366	return (NULL); // Found nothing
2367	}
2368
2369	/*
2370	*@@ strhtxtfind:
2371	* searches for a case-insensitive text pattern in a string
2372	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2373	* pattern are null-terminated strings. Returns a pointer to the pattern
2374	* if found within the string, or NULL if the pattern was not found.
2375	* Will match strings irrespective of case. To match exact strings, use
2376	* strhfind(). Will not work on multibyte characters.
2377	*
2378	* Examples:
2379	+ char *result;
2380	+
2381	+ result = strhtxtfind ("AbracaDabra", "cad");
2382	+ if (result)
2383	+ puts (result);
2384	+
2385	* Taken from the "Standard Function Library", file sflfind.c.
2386	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
2387	* Slightly modified.
2388	*
2389	*@@added V0.9.3 (2000-05-08) [umoeller]
2390	*/
2391
2392	char* strhtxtfind (const char *string, // String containing data
2393	const char *pattern) // Pattern to search for
2394	{
2395	size_t
2396	shift [256]; // Shift distance for each value
2397	size_t
2398	string_size,
2399	pattern_size,
2400	byte_nbr, // Index into byte array
2401	match_size; // Size of matched part
2402	const char
2403	*match_base = NULL, // Base of match of pattern
2404	*match_ptr = NULL, // Point within current match
2405	*limit = NULL; // Last potiental match point
2406
2407	ASSERT (string); // Expect non-NULL pointers, but
2408	ASSERT (pattern); // fail gracefully if not debugging
2409	if (string == NULL \|\| pattern == NULL)
2410	return (NULL);
2411
2412	string_size = strlen (string);
2413	pattern_size = strlen (pattern);
2414
2415	// Pattern must be smaller or equal in size to string
2416	if (string_size < pattern_size)
2417	return (NULL); // Otherwise it cannot be found
2418
2419	if (pattern_size == 0) // Empty string matches at start
2420	return (char *) string;
2421
2422	// Build the shift table
2423
2424	// The shift table determines how far to shift before trying to match
2425	// again, if a match at this point fails. If the byte after where the
2426	// end of our pattern falls is not in our pattern, then we start to
2427	// match again after that byte; otherwise we line up the last occurence
2428	// of that byte in our pattern under that byte, and try match again.
2429
2430	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2431	shift [byte_nbr] = pattern_size + 1;
2432
2433	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2434	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2435
2436	// Search for the string. If we don't find a match, move up by the
2437	// amount we computed in the shift table above, to find location of
2438	// the next potiental match.
2439
2440	limit = string + (string_size - pattern_size + 1);
2441	ASSERT (limit > string);
2442
2443	for (match_base = string;
2444	match_base < limit;
2445	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2446	{
2447	match_ptr = match_base;
2448	match_size = 0;
2449
2450	// Compare pattern until it all matches, or we find a difference
2451	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2452	{
2453	ASSERT (match_size <= pattern_size &&
2454	match_ptr == (match_base + match_size));
2455
2456	// If we found a match, return the start address
2457	if (match_size >= pattern_size)
2458	return ((char *)(match_base));
2459	}
2460	}
2461	return (NULL); // Found nothing
2462	}
2463

Note: See TracBrowser for help on using the repository browser.

Download in other formats: