Context Navigation

source: trunk/src/helpers/stringh.c@ 176

Visit:

Last change on this file since 176 was 174, checked in by umoeller, 23 years ago
Misc updates.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 58.3 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return pszReturn;
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return -1;
269	if (i > 0) return +1;
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return +1;
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return -1;
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return 0;
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return -1;
296	if (i > 0) return +1;
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return +1;
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return -1;
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return 0;
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344
345	return prc;
346	}
347
348	/*
349	*@@ strhncpy0:
350	* like strncpy, but always appends a 0 character.
351	*
352	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
353	*/
354
355	ULONG strhncpy0(PSZ pszTarget,
356	PCSZ pszSource,
357	ULONG cbSource)
358	{
359	ULONG ul = 0;
360	PSZ pTarget = pszTarget,
361	pSource;
362
363	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
364	{
365	for (ul = 0; ul < cbSource; ul++)
366	if (*pSource)
367	pTarget++ = pSource++;
368	else
369	break;
370	}
371
372	*pTarget = 0;
373
374	return ul;
375	}
376
377	/*
378	*@@ strhlen:
379	* like strlen, but doesn't crash on
380	* null strings, but returns 0 also.
381	*
382	*@@added V0.9.19 (2002-04-02) [umoeller]
383	*/
384
385	ULONG strhlen(PCSZ pcsz)
386	{
387	if (pcsz)
388	return strlen(pcsz);
389
390	return 0;
391	}
392
393	/*
394	*@@ strhSize:
395	* returns the size of the given string, which
396	* is the memory required to allocate a copy,
397	* including the null terminator.
398	*
399	* Returns 0 only if pcsz is NULL. If pcsz
400	* points to a null character, this returns 1.
401	*
402	*@@added V0.9.18 (2002-02-13) [umoeller]
403	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
404	*/
405
406	ULONG strhSize(PCSZ pcsz)
407	{
408	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
409	return (strlen(pcsz) + 1);
410
411	return 0;
412	}
413
414	/*
415	* strhCount:
416	* this counts the occurences of c in pszSearch.
417	*/
418
419	ULONG strhCount(PCSZ pszSearch,
420	CHAR c)
421	{
422	PSZ p = (PSZ)pszSearch;
423	ULONG ulCount = 0;
424	while (TRUE)
425	{
426	p = strchr(p, c);
427	if (p)
428	{
429	ulCount++;
430	p++;
431	}
432	else
433	break;
434	}
435	return ulCount;
436	}
437
438	/*
439	*@@ strhIsDecimal:
440	* returns TRUE if psz consists of decimal digits only.
441	*/
442
443	BOOL strhIsDecimal(PSZ psz)
444	{
445	PSZ p = psz;
446	while (*p != 0)
447	{
448	if (isdigit(*p) == 0)
449	return FALSE;
450	p++;
451	}
452
453	return TRUE;
454	}
455
456	#ifdef __DEBUG_MALLOC_ENABLED__
457
458	/*
459	*@@ strhSubstrDebug:
460	* memory debug version of strhSubstr.
461	*
462	*@@added V0.9.14 (2001-08-01) [umoeller]
463	*/
464
465	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
466	PCSZ pEnd, // in: last char (not included)
467	PCSZ pcszSourceFile,
468	unsigned long ulLine,
469	PCSZ pcszFunction)
470	{
471	PSZ pszSubstr = NULL;
472
473	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
474	{
475	ULONG cbSubstr = (pEnd - pBegin);
476	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
477	pcszSourceFile,
478	ulLine,
479	pcszFunction))
480	{
481	// strhncpy0(pszSubstr, pBegin, cbSubstr);
482	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
483	*(pszSubstr + cbSubstr) = '\0';
484	}
485	}
486
487	return pszSubstr;
488	}
489
490	#endif // __DEBUG_MALLOC_ENABLED__
491
492	/*
493	*@@ strhSubstr:
494	* this creates a new PSZ containing the string
495	* from pBegin to pEnd, excluding the pEnd character.
496	* The new string is null-terminated. The caller
497	* must free() the new string after use.
498	*
499	* Example:
500	+ "1234567890"
501	+ ^ ^
502	+ p1 p2
503	+ strhSubstr(p1, p2)
504	* would return a new string containing "2345678".
505	*
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
508	*/
509
510	PSZ strhSubstr(PCSZ pBegin, // in: first char
511	PCSZ pEnd) // in: last char (not included)
512	{
513	PSZ pszSubstr = NULL;
514
515	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
516	{
517	ULONG cbSubstr = (pEnd - pBegin);
518	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
519	{
520	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
521	*(pszSubstr + cbSubstr) = '\0';
522	}
523	}
524
525	return pszSubstr;
526	}
527
528	/*
529	*@@ strhExtract:
530	* searches pszBuf for the cOpen character and returns
531	* the data in between cOpen and cClose, excluding
532	* those two characters, in a newly allocated buffer
533	* which you must free() afterwards.
534	*
535	* Spaces and newlines/linefeeds are skipped.
536	*
537	* If the search was successful, the new buffer
538	* is returned and, if (ppEnd != NULL), *ppEnd points
539	* to the first character after the cClose character
540	* found in the buffer.
541	*
542	* If the search was not successful, NULL is
543	* returned, and *ppEnd is unchanged.
544	*
545	* If another cOpen character is found before
546	* cClose, matching cClose characters will be skipped.
547	* You can therefore nest the cOpen and cClose
548	* characters.
549	*
550	* This function ignores cOpen and cClose characters
551	* in C-style comments and strings surrounded by
552	* double quotes.
553	*
554	* Example:
555	*
556	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
557	+ pEnd;
558	+ strhExtract(pszBuf,
559	+ '{', '}',
560	+ &pEnd)
561	*
562	* would return a new buffer containing " --blah-- ",
563	* and ppEnd would afterwards point to the space
564	* before "next" in the static buffer.
565	*
566	*@@added V0.9.0 [umoeller]
567	*/
568
569	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
570	CHAR cOpen, // in: opening char
571	CHAR cClose, // in: closing char
572	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
573	{
574	PSZ pszReturn = NULL;
575	PCSZ pOpen;
576	if ( (pszBuf)
577	&& (pOpen = strchr(pszBuf, cOpen))
578	)
579	{
580	// opening char found:
581	// now go thru the whole rest of the buffer
582	PCSZ p = pOpen + 1;
583	LONG lLevel = 1; // if this goes 0, we're done
584	while (*p)
585	{
586	if (*p == cOpen)
587	lLevel++;
588	else if (*p == cClose)
589	{
590	lLevel--;
591	if (lLevel <= 0)
592	{
593	// matching closing bracket found:
594	// extract string
595	pszReturn = strhSubstr(pOpen + 1, // after cOpen
596	p); // excluding cClose
597	if (ppEnd)
598	*ppEnd = p + 1;
599	break; // while (*p)
600	}
601	}
602	else if (*p == '\"')
603	{
604	// beginning of string:
605	PCSZ p2 = p+1;
606	// find end of string
607	while ((p2) && (p2 != '\"'))
608	p2++;
609
610	if (*p2 == '\"')
611	// closing quote found:
612	// search on after that
613	p = p2; // raised below
614	else
615	break; // while (*p)
616	}
617
618	p++;
619	}
620	}
621
622	return pszReturn;
623	}
624
625	/*
626	*@@ strhQuote:
627	* similar to strhExtract, except that
628	* opening and closing chars are the same,
629	* and therefore no nesting is possible.
630	* Useful for extracting stuff between
631	* quotes.
632	*
633	*@@added V0.9.0 [umoeller]
634	*/
635
636	PSZ strhQuote(PSZ pszBuf,
637	CHAR cQuote,
638	PSZ *ppEnd)
639	{
640	PSZ pszReturn = NULL,
641	p1 = NULL;
642	if ((p1 = strchr(pszBuf, cQuote)))
643	{
644	PSZ p2;
645	if (p2 = strchr(p1+1, cQuote))
646	{
647	pszReturn = strhSubstr(p1+1, p2);
648	if (ppEnd)
649	// store closing char
650	*ppEnd = p2 + 1;
651	}
652	}
653
654	return pszReturn;
655	}
656
657	/*
658	*@@ strhStrip:
659	* removes all double spaces.
660	* This copies within the "psz" buffer.
661	* If any double spaces are found, the
662	* string will be shorter than before,
663	* but the buffer is _not_ reallocated,
664	* so there will be unused bytes at the
665	* end.
666	*
667	* Returns the number of spaces removed.
668	*
669	*@@added V0.9.0 [umoeller]
670	*/
671
672	ULONG strhStrip(PSZ psz) // in/out: string
673	{
674	PSZ p;
675	ULONG cb = strlen(psz),
676	ulrc = 0;
677
678	for (p = psz; p < psz+cb; p++)
679	{
680	if ((p == ' ') && ((p+1) == ' '))
681	{
682	PSZ p2 = p;
683	while (*p2)
684	{
685	p2 = (p2+1);
686	p2++;
687	}
688	cb--;
689	p--;
690	ulrc++;
691	}
692	}
693	return ulrc;
694	}
695
696	/*
697	*@@ strhins:
698	* this inserts one string into another.
699	*
700	* pszInsert is inserted into pszBuffer at offset
701	* ulInsertOfs (which counts from 0).
702	*
703	* A newly allocated string is returned. pszBuffer is
704	* not changed. The new string should be free()'d after
705	* use.
706	*
707	* Upon errors, NULL is returned.
708	*
709	*@@changed V0.9.0 [umoeller]: completely rewritten.
710	*/
711
712	PSZ strhins(PCSZ pcszBuffer,
713	ULONG ulInsertOfs,
714	PCSZ pcszInsert)
715	{
716	PSZ pszNew = NULL;
717
718	if ((pcszBuffer) && (pcszInsert))
719	{
720	do {
721	ULONG cbBuffer = strlen(pcszBuffer);
722	ULONG cbInsert = strlen(pcszInsert);
723
724	// check string length
725	if (ulInsertOfs > cbBuffer + 1)
726	break; // do
727
728	// OK, let's go.
729	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
730
731	// copy stuff before pInsertPos
732	memcpy(pszNew,
733	pcszBuffer,
734	ulInsertOfs);
735	// copy string to be inserted
736	memcpy(pszNew + ulInsertOfs,
737	pcszInsert,
738	cbInsert);
739	// copy stuff after pInsertPos
740	strcpy(pszNew + ulInsertOfs + cbInsert,
741	pcszBuffer + ulInsertOfs);
742	} while (FALSE);
743	}
744
745	return pszNew;
746	}
747
748	/*
749	*@@ strhFindReplace:
750	* wrapper around xstrFindReplace to work with C strings.
751	* Note that *ppszBuf can get reallocated and must
752	* be free()'able.
753	*
754	* Repetitive use of this wrapper is not recommended
755	* because it is considerably slower than xstrFindReplace.
756	*
757	*@@added V0.9.6 (2000-11-01) [umoeller]
758	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
759	*/
760
761	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
762	PULONG pulOfs, // in: where to begin search (0 = start);
763	// out: ofs of first char after replacement string
764	PCSZ pcszSearch, // in: search string; cannot be NULL
765	PCSZ pcszReplace) // in: replacement string; cannot be NULL
766	{
767	ULONG ulrc = 0;
768	XSTRING xstrBuf,
769	xstrFind,
770	xstrReplace;
771	size_t ShiftTable[256];
772	BOOL fRepeat = FALSE;
773	xstrInitSet(&xstrBuf, *ppszBuf);
774	// reallocated and returned, so we're safe
775	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
776	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
777	// these two are never freed, so we're safe too
778
779	if ((ulrc = xstrFindReplace(&xstrBuf,
780	pulOfs,
781	&xstrFind,
782	&xstrReplace,
783	ShiftTable,
784	&fRepeat)))
785	// replaced:
786	*ppszBuf = xstrBuf.psz;
787
788	return ulrc;
789	}
790
791	/*
792	* strhWords:
793	* returns the no. of words in "psz".
794	* A string is considered a "word" if
795	* it is surrounded by spaces only.
796	*
797	*@@added V0.9.0 [umoeller]
798	*/
799
800	ULONG strhWords(PSZ psz)
801	{
802	PSZ p;
803	ULONG cb = strlen(psz),
804	ulWords = 0;
805	if (cb > 1)
806	{
807	ulWords = 1;
808	for (p = psz; p < psz+cb; p++)
809	if (*p == ' ')
810	ulWords++;
811	}
812	return ulWords;
813	}
814
815	/*
816	*@@ strhGetWord:
817	* finds word boundaries.
818	*
819	* *ppszStart is used as the beginning of the
820	* search.
821	*
822	* If a word is found, *ppszStart is set to
823	* the first character of the word which was
824	* found and *ppszEnd receives the address
825	* of the first character _after_ the word,
826	* which is probably a space or a \n or \r char.
827	* We then return TRUE.
828	*
829	* The search is stopped if a null character
830	* is found or pLimit is reached. In that case,
831	* FALSE is returned.
832	*
833	*@@added V0.9.1 (2000-02-13) [umoeller]
834	*/
835
836	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
837	// out: start of word (if TRUE is returned)
838	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
839	// searched; if the word does not end before
840	// or with this char, FALSE is returned
841	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
842	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
843	PSZ *ppszEnd) // out: first char _after_ word
844	// (if TRUE is returned)
845	{
846	// characters after which a word can be started
847	// PCSZ pcszBeginChars = "\x0d\x0a ";
848	// PCSZ pcszEndChars = "\x0d\x0a /-";
849
850	PSZ pStart = *ppszStart;
851
852	// find start of word
853	while ( (pStart < (PSZ)pLimit)
854	&& (strchr(pcszBeginChars, *pStart))
855	)
856	// if char is a "before word" char: go for next
857	pStart++;
858
859	if (pStart < (PSZ)pLimit)
860	{
861	// found a valid "word start" character
862	// (which is not in pcszBeginChars):
863
864	// find end of word
865	PSZ pEndOfWord = pStart;
866	while ( (pEndOfWord <= (PSZ)pLimit)
867	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
868	)
869	// if char is not an "end word" char: go for next
870	pEndOfWord++;
871
872	if (pEndOfWord <= (PSZ)pLimit)
873	{
874	// whoa, got a word:
875	*ppszStart = pStart;
876	*ppszEnd = pEndOfWord;
877	return TRUE;
878	}
879	}
880
881	return FALSE;
882	}
883
884	/*
885	*@@ strhIsWord:
886	* returns TRUE if p points to a "word"
887	* in pcszBuf.
888	*
889	* p is considered a word if the character _before_
890	* it is in pcszBeginChars and the char _after_
891	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
892	*
893	*@@added V0.9.6 (2000-11-12) [umoeller]
894	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
895	*/
896
897	BOOL strhIsWord(PCSZ pcszBuf,
898	PCSZ p, // in: start of word
899	ULONG cbSearch, // in: length of word
900	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
901	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
902	{
903	// check previous char
904	if ( (p == pcszBuf)
905	\|\| (strchr(pcszBeginChars, *(p-1)))
906	)
907	{
908	// OK, valid begin char:
909	// check end char
910	CHAR cNextChar;
911	if (!(cNextChar = p[cbSearch]))
912	// null terminator:
913	return TRUE;
914	else
915	{
916	// not null terminator: check if char is
917	// in the list of valid end chars
918	if (strchr(pcszEndChars, cNextChar))
919	{
920	// OK, is end char: avoid doubles of that char,
921	// but allow spaces
922	// fixed V0.9.18 (2002-02-23) [umoeller]
923	CHAR cNextNext = p[cbSearch + 1];
924	if ( (cNextNext != cNextChar)
925	\|\| (cNextNext == ' ')
926	\|\| (cNextNext == 0)
927	)
928	return TRUE;
929	}
930	}
931	}
932
933	return FALSE;
934	}
935
936	/*
937	*@@ strhFindWord:
938	* searches for pszSearch in pszBuf, which is
939	* returned if found (or NULL if not).
940	*
941	* As opposed to strstr, this finds pszSearch
942	* only if it is a "word". A search string is
943	* considered a word if the character _before_
944	* it is in pcszBeginChars and the char _after_
945	* it is in pcszEndChars.
946	*
947	* Example:
948	+ strhFindWord("This is an example.", "is");
949	+ returns ...........^ this, but not the "is" in "This".
950	*
951	* The algorithm here uses strstr to find pszSearch in pszBuf
952	* and performs additional "is-word" checks for each item found
953	* (by calling strhIsWord).
954	*
955	* Note that this function is fairly slow compared to xstrFindWord.
956	*
957	*@@added V0.9.0 (99-11-08) [umoeller]
958	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
959	*/
960
961	PSZ strhFindWord(PCSZ pszBuf,
962	PCSZ pszSearch,
963	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
964	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
965	{
966	PSZ pszReturn = 0;
967	ULONG cbBuf = strlen(pszBuf),
968	cbSearch = strlen(pszSearch);
969
970	if ((cbBuf) && (cbSearch))
971	{
972	PCSZ p = pszBuf;
973
974	do // while p
975	{
976	p = strstr(p, pszSearch);
977	if (p)
978	{
979	// string found:
980	// check if that's a word
981
982	if (strhIsWord(pszBuf,
983	p,
984	cbSearch,
985	pcszBeginChars,
986	pcszEndChars))
987	{
988	// valid end char:
989	pszReturn = (PSZ)p;
990	break;
991	}
992
993	p += cbSearch;
994	}
995	} while (p);
996
997	}
998	return pszReturn;
999	}
1000
1001	/*
1002	*@@ strhFindEOL:
1003	* returns a pointer to the next \r, \n or null character
1004	* following pszSearchIn. Stores the offset in *pulOffset.
1005	*
1006	* This should never return NULL because at some point,
1007	* there will be a null byte in your string.
1008	*
1009	*@@added V0.9.4 (2000-07-01) [umoeller]
1010	*/
1011
1012	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1013	PULONG pulOffset) // out: offset (ptr can be NULL)
1014	{
1015	PCSZ p = pcszSearchIn,
1016	prc = 0;
1017	while (TRUE)
1018	{
1019	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1020	{
1021	prc = p;
1022	break;
1023	}
1024	p++;
1025	}
1026
1027	if ((pulOffset) && (prc))
1028	*pulOffset = prc - pcszSearchIn;
1029
1030	return ((PSZ)prc);
1031	}
1032
1033	/*
1034	*@@ strhFindNextLine:
1035	* like strhFindEOL, but this returns the character
1036	* _after_ \r or \n. Note that this might return
1037	* a pointer to terminating NULL character also.
1038	*/
1039
1040	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1041	{
1042	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1043	// pEOL now points to the \r char or the terminating 0 byte;
1044	// if not null byte, advance pointer
1045	PSZ pNextLine = pEOL;
1046	if (*pNextLine == '\r')
1047	pNextLine++;
1048	if (*pNextLine == '\n')
1049	pNextLine++;
1050	if (pulOffset)
1051	*pulOffset = pNextLine - pszSearchIn;
1052	return pNextLine;
1053	}
1054
1055	/*
1056	*@@ strhBeautifyTitle:
1057	* replaces all line breaks (0xd, 0xa) with spaces.
1058	*
1059	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1060	*/
1061
1062	BOOL strhBeautifyTitle(PSZ psz)
1063	{
1064	BOOL rc = FALSE;
1065	CHAR *p = psz;
1066
1067	while(*p)
1068	if ( (*p == '\r')
1069	\|\| (*p == '\n')
1070	)
1071	{
1072	rc = TRUE;
1073	if ( (p != psz)
1074	&& (p[-1] == ' ')
1075	)
1076	memmove(p, p + 1, strlen(p));
1077	else
1078	*p++ = ' ';
1079	}
1080	else
1081	p++;
1082
1083	return rc;
1084	}
1085
1086	/*
1087	* strhFindAttribValue:
1088	* searches for pszAttrib in pszSearchIn; if found,
1089	* returns the first character after the "=" char.
1090	* If "=" is not found, a space, \r, and \n are
1091	* also accepted. This function searches without
1092	* respecting case.
1093	*
1094	* <B>Example:</B>
1095	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1096	+
1097	+ returns ....................... ^ this address.
1098	*
1099	*@@added V0.9.0 [umoeller]
1100	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1101	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1102	*/
1103
1104	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1105	{
1106	PSZ prc = 0;
1107	PSZ pszSearchIn2, p;
1108	ULONG cbAttrib = strlen(pszAttrib),
1109	ulLength = strlen(pszSearchIn);
1110
1111	// use alloca(), so memory is freed on function exit
1112	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1113	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1114
1115	// 1) find token, (space char, \n, \r, \t)
1116	p = strtok(pszSearchIn2, " \n\r\t");
1117	while (p)
1118	{
1119	CHAR c2;
1120	PSZ pOrig;
1121
1122	// check tag name
1123	if (!strnicmp(p, pszAttrib, cbAttrib))
1124	{
1125	// position in original string
1126	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1127
1128	// yes:
1129	prc = pOrig + cbAttrib;
1130	c2 = *prc;
1131	while ( ( (c2 == ' ')
1132	\|\| (c2 == '=')
1133	\|\| (c2 == '\n')
1134	\|\| (c2 == '\r')
1135	)
1136	&& (c2 != 0)
1137	)
1138	c2 = *++prc;
1139
1140	break;
1141	}
1142
1143	p = strtok(NULL, " \n\r\t");
1144	}
1145
1146	return prc;
1147	}
1148
1149	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1150	{
1151	PSZ prc = 0;
1152	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1153	p,
1154	p2;
1155	ULONG cbAttrib = strlen(pszAttrib);
1156
1157	// 1) find space char
1158	while ((p = strchr(pszSearchIn2, ' ')))
1159	{
1160	CHAR c;
1161	p++;
1162	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1163	{
1164	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1165	// now check whether the p+strlen(pszAttrib)
1166	// is a valid end-of-tag character
1167	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1168	&& ( (c == ' ')
1169	\|\| (c == '>')
1170	\|\| (c == '=')
1171	\|\| (c == '\r')
1172	\|\| (c == '\n')
1173	\|\| (c == 0)
1174	)
1175	)
1176	{
1177	// yes:
1178	CHAR c2;
1179	p2 = p + cbAttrib;
1180	c2 = *p2;
1181	while ( ( (c2 == ' ')
1182	\|\| (c2 == '=')
1183	\|\| (c2 == '\n')
1184	\|\| (c2 == '\r')
1185	)
1186	&& (c2 != 0)
1187	)
1188	c2 = *++p2;
1189
1190	prc = p2;
1191	break; // first while
1192	}
1193	}
1194	else
1195	break;
1196
1197	pszSearchIn2++;
1198	}
1199	return prc;
1200	} */
1201
1202	/*
1203	* strhGetNumAttribValue:
1204	* stores the numerical parameter value of an HTML-style
1205	* tag in *pl.
1206	*
1207	* Returns the address of the tag parameter in the
1208	* search buffer, if found, or NULL.
1209	*
1210	* <B>Example:</B>
1211	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1212	*
1213	* stores 123 in the "l" variable.
1214	*
1215	*@@added V0.9.0 [umoeller]
1216	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1217	*/
1218
1219	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1220	const char *pszTag, // e.g. "INDEX"
1221	PLONG pl) // out: numerical value
1222	{
1223	PSZ pParam;
1224	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1225	{
1226	if ( (*pParam == '\"')
1227	\|\| (*pParam == '\'')
1228	)
1229	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1230
1231	sscanf(pParam, "%ld", pl);
1232	}
1233
1234	return pParam;
1235	}
1236
1237	/*
1238	* strhGetTextAttr:
1239	* retrieves the attribute value of a textual HTML-style tag
1240	* in a newly allocated buffer, which is returned,
1241	* or NULL if attribute not found.
1242	* If an attribute value is to contain spaces, it
1243	* must be enclosed in quotes.
1244	*
1245	* The offset of the attribute data in pszSearchIn is
1246	* returned in *pulOffset so that you can do multiple
1247	* searches.
1248	*
1249	* This returns a new buffer, which should be free()'d after use.
1250	*
1251	* <B>Example:</B>
1252	+ ULONG ulOfs = 0;
1253	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1254	+ ............^ ulOfs
1255	*
1256	* returns a new string with the value "blublub" (without
1257	* quotes) and sets ulOfs to 12.
1258	*
1259	*@@added V0.9.0 [umoeller]
1260	*/
1261
1262	PSZ strhGetTextAttr(const char *pszSearchIn,
1263	const char *pszTag,
1264	PULONG pulOffset) // out: offset where found
1265	{
1266	PSZ pParam,
1267	pParam2,
1268	prc = NULL;
1269	ULONG ulCount = 0;
1270	LONG lNestingLevel = 0;
1271
1272	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1273	{
1274	// determine end character to search for: a space
1275	CHAR cEnd = ' ';
1276	if (*pParam == '\"')
1277	{
1278	// or, if the data is enclosed in quotes, a quote
1279	cEnd = '\"';
1280	pParam++;
1281	}
1282
1283	if (pulOffset)
1284	// store the offset
1285	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1286
1287	// now find end of attribute
1288	pParam2 = pParam;
1289	while (*pParam)
1290	{
1291	if (*pParam == cEnd)
1292	// end character found
1293	break;
1294	else if (*pParam == '<')
1295	// yet another opening tag found:
1296	// this is probably some "<" in the attributes
1297	lNestingLevel++;
1298	else if (*pParam == '>')
1299	{
1300	lNestingLevel--;
1301	if (lNestingLevel < 0)
1302	// end of tag found:
1303	break;
1304	}
1305	ulCount++;
1306	pParam++;
1307	}
1308
1309	// copy attribute to new buffer
1310	if (ulCount)
1311	{
1312	prc = (PSZ)malloc(ulCount+1);
1313	memcpy(prc, pParam2, ulCount);
1314	*(prc+ulCount) = 0;
1315	}
1316	}
1317	return prc;
1318	}
1319
1320	/*
1321	* strhFindEndOfTag:
1322	* returns a pointer to the ">" char
1323	* which seems to terminate the tag beginning
1324	* after pszBeginOfTag.
1325	*
1326	* If additional "<" chars are found, we look
1327	* for additional ">" characters too.
1328	*
1329	* Note: You must pass the address of the opening
1330	* '<' character to this function.
1331	*
1332	* Example:
1333	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1334	+ strhFindEndOfTag(pszTest)
1335	+ returns.................................^ this.
1336	*
1337	*@@added V0.9.0 [umoeller]
1338	*/
1339
1340	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1341	{
1342	PSZ p = (PSZ)pszBeginOfTag,
1343	prc = NULL;
1344	LONG lNestingLevel = 0;
1345
1346	while (*p)
1347	{
1348	if (*p == '<')
1349	// another opening tag found:
1350	lNestingLevel++;
1351	else if (*p == '>')
1352	{
1353	// closing tag found:
1354	lNestingLevel--;
1355	if (lNestingLevel < 1)
1356	{
1357	// corresponding: return this
1358	prc = p;
1359	break;
1360	}
1361	}
1362	p++;
1363	}
1364
1365	return prc;
1366	}
1367
1368	/*
1369	* strhGetBlock:
1370	* this complex function searches the given string
1371	* for a pair of opening/closing HTML-style tags.
1372	*
1373	* If found, this routine returns TRUE and does
1374	* the following:
1375	*
1376	* 1) allocate a new buffer, copy the text
1377	* enclosed by the opening/closing tags
1378	* into it and set *ppszBlock to that
1379	* buffer;
1380	*
1381	* 2) if the opening tag has any attributes,
1382	* allocate another buffer, copy the
1383	* attributes into it and set *ppszAttrs
1384	* to that buffer; if no attributes are
1385	* found, *ppszAttrs will be NULL;
1386	*
1387	* 3) set *pulOffset to the offset from the
1388	* beginning of *ppszSearchIn where the
1389	* opening tag was found;
1390	*
1391	* 4) advance *ppszSearchIn to after the
1392	* closing tag, so that you can do
1393	* multiple searches without finding the
1394	* same tags twice.
1395	*
1396	* All buffers should be freed using free().
1397	*
1398	* This returns the following:
1399	* -- 0: no error
1400	* -- 1: tag not found at all (doesn't have to be an error)
1401	* -- 2: begin tag found, but no corresponding end tag found. This
1402	* is a real error.
1403	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1404	*
1405	* <B>Example:</B>
1406	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1407	+ PSZ pszBlock, pszAttrs;
1408	+ ULONG ulOfs;
1409	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1410	*
1411	* would do the following:
1412	*
1413	* 1) set pszBlock to a new string containing "This is page 1."
1414	* without quotes;
1415	*
1416	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1417	*
1418	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1419	*
1420	* 4) pSearch would be advanced to point to the "More text"
1421	* string in the original buffer.
1422	*
1423	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1424	* for HTML parsing.
1425	*
1426	*@@added V0.9.0 [umoeller]
1427	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1428	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1429	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1430	*/
1431
1432	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1433	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1434	const char *pszTag,
1435	PSZ *ppszBlock, // out: block enclosed by the tags
1436	PSZ *ppszAttribs, // out: attributes of the opening tag
1437	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1438	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1439	{
1440	ULONG ulrc = 1;
1441	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1442	pszSearch2 = pszBeginTag,
1443	pszClosingTag;
1444	ULONG cbTag = strlen(pszTag);
1445
1446	// go thru the block and check all tags if it's the
1447	// begin tag we're looking for
1448	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1449	{
1450	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1451	// yes: stop
1452	break;
1453	else
1454	pszBeginTag++;
1455	}
1456
1457	if (pszBeginTag)
1458	{
1459	// we found <TAG>:
1460	ULONG ulNestingLevel = 0;
1461
1462	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1463	// strchr(pszBeginTag, '>');
1464	if (pszEndOfBeginTag)
1465	{
1466	// does the caller want the attributes?
1467	if (ppszAttribs)
1468	{
1469	// yes: then copy them
1470	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1471	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1472	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1473	// add terminating 0
1474	*(pszAttrs + ulAttrLen) = 0;
1475
1476	*ppszAttribs = pszAttrs;
1477	}
1478
1479	// output offset of where we found the begin tag
1480	if (pulOfsBeginTag)
1481	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1482
1483	// now find corresponding closing tag (e.g. "</BODY>"
1484	pszBeginTag = pszEndOfBeginTag+1;
1485	// now we're behind the '>' char of the opening tag
1486	// increase offset of that too
1487	if (pulOfsBeginBlock)
1488	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1489
1490	// find next closing tag;
1491	// for the first run, pszSearch2 points to right
1492	// after the '>' char of the opening tag
1493	pszSearch2 = pszBeginTag;
1494	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1495	&& (pszClosingTag = strstr(pszSearch2, "<"))
1496	)
1497	{
1498	// if we have another opening tag before our closing
1499	// tag, we need to have several closing tags before
1500	// we're done
1501	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1502	ulNestingLevel++;
1503	else
1504	{
1505	// is this ours?
1506	if ( (*(pszClosingTag+1) == '/')
1507	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1508	)
1509	{
1510	// we've found a matching closing tag; is
1511	// it ours?
1512	if (ulNestingLevel == 0)
1513	{
1514	// our closing tag found:
1515	// allocate mem for a new buffer
1516	// and extract all the text between
1517	// open and closing tags to it
1518	ULONG ulLen = pszClosingTag - pszBeginTag;
1519	if (ppszBlock)
1520	{
1521	PSZ pNew = (PSZ)malloc(ulLen + 1);
1522	strhncpy0(pNew, pszBeginTag, ulLen);
1523	*ppszBlock = pNew;
1524	}
1525
1526	// raise search offset to after the closing tag
1527	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1528
1529	ulrc = 0;
1530
1531	break;
1532	} else
1533	// not our closing tag:
1534	ulNestingLevel--;
1535	}
1536	}
1537	// no matching closing tag: search on after that
1538	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1539	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1540
1541	if (!pszClosingTag)
1542	// no matching closing tag found:
1543	// return 2 (closing tag not found)
1544	ulrc = 2;
1545	} // end if (pszBeginTag)
1546	else
1547	// no matching ">" for opening tag found:
1548	ulrc = 3;
1549	}
1550
1551	return ulrc;
1552	}
1553
1554	/* ******************************************************************
1555	*
1556	* Miscellaneous
1557	*
1558	********************************************************************/
1559
1560	/*
1561	*@@ strhArrayAppend:
1562	* this appends a string to a "string array".
1563	*
1564	* A string array is considered a sequence of
1565	* zero-terminated strings in memory. That is,
1566	* after each string's null-byte, the next
1567	* string comes up.
1568	*
1569	* This is useful for composing a single block
1570	* of memory from, say, list box entries, which
1571	* can then be written to OS2.INI in one flush.
1572	*
1573	* To append strings to such an array, call this
1574	* function for each string you wish to append.
1575	* This will re-allocate *ppszRoot with each call,
1576	* and update *pcbRoot, which then contains the
1577	* total size of all strings (including all null
1578	* terminators).
1579	*
1580	* Pass *pcbRoot to PrfSaveProfileData to have the
1581	* block saved.
1582	*
1583	* Note: On the first call, ppszRoot and pcbRoot
1584	* _must_ be both NULL, or this crashes.
1585	*
1586	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1587	*/
1588
1589	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1590	const char *pcszNew, // in: string to append
1591	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1592	PULONG pcbRoot) // in/out: size of array
1593	{
1594	PSZ pszTemp;
1595
1596	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1597	cbNew = strlen(pcszNew);
1598
1599	pszTemp = (PSZ)malloc(*pcbRoot
1600	+ cbNew
1601	+ 1); // two null bytes
1602	if (*ppszRoot)
1603	{
1604	// not first loop: copy old stuff
1605	memcpy(pszTemp,
1606	*ppszRoot,
1607	*pcbRoot);
1608	free(*ppszRoot);
1609	}
1610	// append new string
1611	strcpy(pszTemp + *pcbRoot,
1612	pcszNew);
1613	// update root
1614	*ppszRoot = pszTemp;
1615	// update length
1616	*pcbRoot += cbNew + 1;
1617	}
1618
1619	/*
1620	*@@ strhCreateDump:
1621	* this dumps a memory block into a string
1622	* and returns that string in a new buffer.
1623	*
1624	* You must free() the returned PSZ after use.
1625	*
1626	* The output looks like the following:
1627	*
1628	+ 0000: FE FF 0E 02 90 00 00 00 ........
1629	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1630	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1631	*
1632	* Each line is terminated with a newline (\n)
1633	* character only.
1634	*
1635	*@@added V0.9.1 (2000-01-22) [umoeller]
1636	*/
1637
1638	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1639	ULONG ulSize, // in: size of buffer
1640	ULONG ulIndent) // in: indentation of every line
1641	{
1642	PSZ pszReturn = 0;
1643	XSTRING strReturn;
1644	CHAR szTemp[1000];
1645
1646	PBYTE pbCurrent = pb; // current byte
1647	ULONG ulCount = 0,
1648	ulCharsInLine = 0; // if this grows > 7, a new line is started
1649	CHAR szLine[400] = "",
1650	szAscii[30] = " "; // ASCII representation; filled for every line
1651	PSZ pszLine = szLine,
1652	pszAscii = szAscii;
1653
1654	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1655
1656	for (pbCurrent = pb;
1657	ulCount < ulSize;
1658	pbCurrent++, ulCount++)
1659	{
1660	if (ulCharsInLine == 0)
1661	{
1662	memset(szLine, ' ', ulIndent);
1663	pszLine += ulIndent;
1664	}
1665	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1666
1667	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1668	// printable character:
1669	pszAscii = pbCurrent;
1670	else
1671	*pszAscii = '.';
1672	pszAscii++;
1673
1674	ulCharsInLine++;
1675	if ( (ulCharsInLine > 7) // 8 bytes added?
1676	\|\| (ulCount == ulSize-1) // end of buffer reached?
1677	)
1678	{
1679	// if we haven't had eight bytes yet,
1680	// fill buffer up to eight bytes with spaces
1681	ULONG ul2;
1682	for (ul2 = ulCharsInLine;
1683	ul2 < 8;
1684	ul2++)
1685	pszLine += sprintf(pszLine, " ");
1686
1687	sprintf(szTemp, "%04lX: %s %s\n",
1688	(ulCount & 0xFFFFFFF8), // offset in hex
1689	szLine, // bytes string
1690	szAscii); // ASCII string
1691	xstrcat(&strReturn, szTemp, 0);
1692
1693	// restart line buffer
1694	pszLine = szLine;
1695
1696	// clear ASCII buffer
1697	strcpy(szAscii, " ");
1698	pszAscii = szAscii;
1699
1700	// reset line counter
1701	ulCharsInLine = 0;
1702	}
1703	}
1704
1705	if (strReturn.cbAllocated)
1706	pszReturn = strReturn.psz;
1707
1708	return pszReturn;
1709	}
1710
1711	/* ******************************************************************
1712	*
1713	* Fast string searches
1714	*
1715	********************************************************************/
1716
1717	#define ASSERT(a)
1718
1719	/*
1720	* The following code has been taken from the "Standard
1721	* Function Library", file sflfind.c, and only slightly
1722	* modified to conform to the rest of this file.
1723	*
1724	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1725	* Revised: 98/05/04
1726	*
1727	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1728	*
1729	* The SFL Licence allows incorporating SFL code into other
1730	* programs, as long as the copyright is reprinted and the
1731	* code is marked as modified, so this is what we do.
1732	*/
1733
1734	/*
1735	*@@ strhmemfind:
1736	* searches for a pattern in a block of memory using the
1737	* Boyer-Moore-Horspool-Sunday algorithm.
1738	*
1739	* The block and pattern may contain any values; you must
1740	* explicitly provide their lengths. If you search for strings,
1741	* use strlen() on the buffers.
1742	*
1743	* Returns a pointer to the pattern if found within the block,
1744	* or NULL if the pattern was not found.
1745	*
1746	* This algorithm needs a "shift table" to cache data for the
1747	* search pattern. This table can be reused when performing
1748	* several searches with the same pattern.
1749	*
1750	* "shift" must point to an array big enough to hold 256 (8**2)
1751	* "size_t" values.
1752	*
1753	* If (*repeat_find == FALSE), the shift table is initialized.
1754	* So on the first search with a given pattern, *repeat_find
1755	* should be FALSE. This function sets it to TRUE after the
1756	* shift table is initialised, allowing the initialisation
1757	* phase to be skipped on subsequent searches.
1758	*
1759	* This function is most effective when repeated searches are
1760	* made for the same pattern in one or more large buffers.
1761	*
1762	* Example:
1763	*
1764	+ PSZ pszHaystack = "This is a sample string.",
1765	+ pszNeedle = "string";
1766	+ size_t shift[256];
1767	+ BOOL fRepeat = FALSE;
1768	+
1769	+ PSZ pFound = strhmemfind(pszHaystack,
1770	+ strlen(pszHaystack), // block size
1771	+ pszNeedle,
1772	+ strlen(pszNeedle), // pattern size
1773	+ shift,
1774	+ &fRepeat);
1775	*
1776	* Taken from the "Standard Function Library", file sflfind.c.
1777	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1778	* Slightly modified by umoeller.
1779	*
1780	*@@added V0.9.3 (2000-05-08) [umoeller]
1781	*/
1782
1783	void* strhmemfind(const void *in_block, // in: block containing data
1784	size_t block_size, // in: size of block in bytes
1785	const void *in_pattern, // in: pattern to search for
1786	size_t pattern_size, // in: size of pattern block
1787	size_t *shift, // in/out: shift table (search buffer)
1788	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1789	{
1790	size_t byte_nbr, // Distance through block
1791	match_size; // Size of matched part
1792	const unsigned char
1793	*match_base = NULL, // Base of match of pattern
1794	*match_ptr = NULL, // Point within current match
1795	*limit = NULL; // Last potiental match point
1796	const unsigned char
1797	block = (unsigned char ) in_block, // Concrete pointer to block data
1798	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1799
1800	if ( (block == NULL)
1801	\|\| (pattern == NULL)
1802	\|\| (shift == NULL)
1803	)
1804	return NULL;
1805
1806	// Pattern must be smaller or equal in size to string
1807	if (block_size < pattern_size)
1808	return NULL; // Otherwise it's not found
1809
1810	if (pattern_size == 0) // Empty patterns match at start
1811	return ((void *)block);
1812
1813	// Build the shift table unless we're continuing a previous search
1814
1815	// The shift table determines how far to shift before trying to match
1816	// again, if a match at this point fails. If the byte after where the
1817	// end of our pattern falls is not in our pattern, then we start to
1818	// match again after that byte; otherwise we line up the last occurence
1819	// of that byte in our pattern under that byte, and try match again.
1820
1821	if (!repeat_find \|\| !*repeat_find)
1822	{
1823	for (byte_nbr = 0;
1824	byte_nbr < 256;
1825	byte_nbr++)
1826	shift[byte_nbr] = pattern_size + 1;
1827	for (byte_nbr = 0;
1828	byte_nbr < pattern_size;
1829	byte_nbr++)
1830	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1831
1832	if (repeat_find)
1833	*repeat_find = TRUE;
1834	}
1835
1836	// Search for the block, each time jumping up by the amount
1837	// computed in the shift table
1838
1839	limit = block + (block_size - pattern_size + 1);
1840	ASSERT (limit > block);
1841
1842	for (match_base = block;
1843	match_base < limit;
1844	match_base += shift[*(match_base + pattern_size)])
1845	{
1846	match_ptr = match_base;
1847	match_size = 0;
1848
1849	// Compare pattern until it all matches, or we find a difference
1850	while (*match_ptr++ == pattern[match_size++])
1851	{
1852	ASSERT (match_size <= pattern_size &&
1853	match_ptr == (match_base + match_size));
1854
1855	// If we found a match, return the start address
1856	if (match_size >= pattern_size)
1857	return ((void*)(match_base));
1858
1859	}
1860	}
1861	return NULL; // Found nothing
1862	}
1863
1864	/*
1865	*@@ strhtxtfind:
1866	* searches for a case-insensitive text pattern in a string
1867	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1868	* pattern are null-terminated strings. Returns a pointer to the pattern
1869	* if found within the string, or NULL if the pattern was not found.
1870	* Will match strings irrespective of case. To match exact strings, use
1871	* strhfind(). Will not work on multibyte characters.
1872	*
1873	* Examples:
1874	+ char *result;
1875	+
1876	+ result = strhtxtfind ("AbracaDabra", "cad");
1877	+ if (result)
1878	+ puts (result);
1879	+
1880	* Taken from the "Standard Function Library", file sflfind.c.
1881	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1882	* Slightly modified.
1883	*
1884	*@@added V0.9.3 (2000-05-08) [umoeller]
1885	*/
1886
1887	char* strhtxtfind (const char *string, // String containing data
1888	const char *pattern) // Pattern to search for
1889	{
1890	size_t
1891	shift [256]; // Shift distance for each value
1892	size_t
1893	string_size,
1894	pattern_size,
1895	byte_nbr, // Index into byte array
1896	match_size; // Size of matched part
1897	const char
1898	*match_base = NULL, // Base of match of pattern
1899	*match_ptr = NULL, // Point within current match
1900	*limit = NULL; // Last potiental match point
1901
1902	ASSERT (string); // Expect non-NULL pointers, but
1903	ASSERT (pattern); // fail gracefully if not debugging
1904	if (string == NULL \|\| pattern == NULL)
1905	return NULL;
1906
1907	string_size = strlen (string);
1908	pattern_size = strlen (pattern);
1909
1910	// Pattern must be smaller or equal in size to string
1911	if (string_size < pattern_size)
1912	return NULL; // Otherwise it cannot be found
1913
1914	if (pattern_size == 0) // Empty string matches at start
1915	return (char *) string;
1916
1917	// Build the shift table
1918
1919	// The shift table determines how far to shift before trying to match
1920	// again, if a match at this point fails. If the byte after where the
1921	// end of our pattern falls is not in our pattern, then we start to
1922	// match again after that byte; otherwise we line up the last occurence
1923	// of that byte in our pattern under that byte, and try match again.
1924
1925	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1926	shift [byte_nbr] = pattern_size + 1;
1927
1928	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1929	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1930
1931	// Search for the string. If we don't find a match, move up by the
1932	// amount we computed in the shift table above, to find location of
1933	// the next potiental match.
1934
1935	limit = string + (string_size - pattern_size + 1);
1936	ASSERT (limit > string);
1937
1938	for (match_base = string;
1939	match_base < limit;
1940	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1941	{
1942	match_ptr = match_base;
1943	match_size = 0;
1944
1945	// Compare pattern until it all matches, or we find a difference
1946	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1947	{
1948	ASSERT (match_size <= pattern_size &&
1949	match_ptr == (match_base + match_size));
1950
1951	// If we found a match, return the start address
1952	if (match_size >= pattern_size)
1953	return ((char *)(match_base));
1954	}
1955	}
1956	return NULL; // Found nothing
1957	}
1958

Note: See TracBrowser for help on using the repository browser.

Download in other formats: