Context Navigation

source: trunk/src/helpers/stringh.c@ 165

Visit:

Last change on this file since 165 was 164, checked in by umoeller, 23 years ago
Massive pager rework.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 58.4 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2000 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return (ERROR_NOT_ENOUGH_MEMORY);
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return (NO_ERROR);
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return (ERROR_NOT_ENOUGH_MEMORY);
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return (ERROR_INVALID_PARAMETER);
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return (NO_ERROR);
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return (strcpy(string1, string2));
177
178	*string1 = '\0';
179	return (string1);
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return (pszReturn);
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return (pszReturn);
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return (-1);
269	if (i > 0) return (+1);
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return (+1);
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return (-1);
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return (0);
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return (-1);
296	if (i > 0) return (+1);
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return (+1);
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return (-1);
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return (0);
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344	return (prc);
345	}
346
347	/*
348	*@@ strhncpy0:
349	* like strncpy, but always appends a 0 character.
350	*
351	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
352	*/
353
354	ULONG strhncpy0(PSZ pszTarget,
355	PCSZ pszSource,
356	ULONG cbSource)
357	{
358	ULONG ul = 0;
359	PSZ pTarget = pszTarget,
360	pSource;
361
362	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
363	{
364	for (ul = 0; ul < cbSource; ul++)
365	if (*pSource)
366	pTarget++ = pSource++;
367	else
368	break;
369	}
370
371	*pTarget = 0;
372
373	return (ul);
374	}
375
376	/*
377	*@@ strhlen:
378	* like strlen, but doesn't crash on
379	* null strings, but returns 0 also.
380	*
381	*@@added V0.9.19 (2002-04-02) [umoeller]
382	*/
383
384	ULONG strhlen(PCSZ pcsz)
385	{
386	if (pcsz)
387	return (strlen(pcsz));
388
389	return 0;
390	}
391
392	/*
393	*@@ strhSize:
394	* returns the size of the given string, which
395	* is the memory required to allocate a copy,
396	* including the null terminator.
397	*
398	* Returns 0 only if pcsz is NULL. If pcsz
399	* points to a null character, this returns 1.
400	*
401	*@@added V0.9.18 (2002-02-13) [umoeller]
402	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
403	*/
404
405	ULONG strhSize(PCSZ pcsz)
406	{
407	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
408	return (strlen(pcsz) + 1);
409
410	return (0);
411	}
412
413	/*
414	* strhCount:
415	* this counts the occurences of c in pszSearch.
416	*/
417
418	ULONG strhCount(PCSZ pszSearch,
419	CHAR c)
420	{
421	PSZ p = (PSZ)pszSearch;
422	ULONG ulCount = 0;
423	while (TRUE)
424	{
425	p = strchr(p, c);
426	if (p)
427	{
428	ulCount++;
429	p++;
430	}
431	else
432	break;
433	}
434	return (ulCount);
435	}
436
437	/*
438	*@@ strhIsDecimal:
439	* returns TRUE if psz consists of decimal digits only.
440	*/
441
442	BOOL strhIsDecimal(PSZ psz)
443	{
444	PSZ p = psz;
445	while (*p != 0)
446	{
447	if (isdigit(*p) == 0)
448	return (FALSE);
449	p++;
450	}
451
452	return (TRUE);
453	}
454
455	#ifdef __DEBUG_MALLOC_ENABLED__
456
457	/*
458	*@@ strhSubstrDebug:
459	* memory debug version of strhSubstr.
460	*
461	*@@added V0.9.14 (2001-08-01) [umoeller]
462	*/
463
464	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
465	PCSZ pEnd, // in: last char (not included)
466	PCSZ pcszSourceFile,
467	unsigned long ulLine,
468	PCSZ pcszFunction)
469	{
470	PSZ pszSubstr = NULL;
471
472	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
473	{
474	ULONG cbSubstr = (pEnd - pBegin);
475	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
476	pcszSourceFile,
477	ulLine,
478	pcszFunction))
479	{
480	// strhncpy0(pszSubstr, pBegin, cbSubstr);
481	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
482	*(pszSubstr + cbSubstr) = '\0';
483	}
484	}
485
486	return (pszSubstr);
487	}
488
489	#endif // __DEBUG_MALLOC_ENABLED__
490
491	/*
492	*@@ strhSubstr:
493	* this creates a new PSZ containing the string
494	* from pBegin to pEnd, excluding the pEnd character.
495	* The new string is null-terminated. The caller
496	* must free() the new string after use.
497	*
498	* Example:
499	+ "1234567890"
500	+ ^ ^
501	+ p1 p2
502	+ strhSubstr(p1, p2)
503	* would return a new string containing "2345678".
504	*
505	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
507	*/
508
509	PSZ strhSubstr(PCSZ pBegin, // in: first char
510	PCSZ pEnd) // in: last char (not included)
511	{
512	PSZ pszSubstr = NULL;
513
514	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
515	{
516	ULONG cbSubstr = (pEnd - pBegin);
517	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
518	{
519	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
520	*(pszSubstr + cbSubstr) = '\0';
521	}
522	}
523
524	return (pszSubstr);
525	}
526
527	/*
528	*@@ strhExtract:
529	* searches pszBuf for the cOpen character and returns
530	* the data in between cOpen and cClose, excluding
531	* those two characters, in a newly allocated buffer
532	* which you must free() afterwards.
533	*
534	* Spaces and newlines/linefeeds are skipped.
535	*
536	* If the search was successful, the new buffer
537	* is returned and, if (ppEnd != NULL), *ppEnd points
538	* to the first character after the cClose character
539	* found in the buffer.
540	*
541	* If the search was not successful, NULL is
542	* returned, and *ppEnd is unchanged.
543	*
544	* If another cOpen character is found before
545	* cClose, matching cClose characters will be skipped.
546	* You can therefore nest the cOpen and cClose
547	* characters.
548	*
549	* This function ignores cOpen and cClose characters
550	* in C-style comments and strings surrounded by
551	* double quotes.
552	*
553	* Example:
554	*
555	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
556	+ pEnd;
557	+ strhExtract(pszBuf,
558	+ '{', '}',
559	+ &pEnd)
560	*
561	* would return a new buffer containing " --blah-- ",
562	* and ppEnd would afterwards point to the space
563	* before "next" in the static buffer.
564	*
565	*@@added V0.9.0 [umoeller]
566	*/
567
568	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
569	CHAR cOpen, // in: opening char
570	CHAR cClose, // in: closing char
571	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
572	{
573	PSZ pszReturn = NULL;
574	PCSZ pOpen;
575	if ( (pszBuf)
576	&& (pOpen = strchr(pszBuf, cOpen))
577	)
578	{
579	// opening char found:
580	// now go thru the whole rest of the buffer
581	PCSZ p = pOpen + 1;
582	LONG lLevel = 1; // if this goes 0, we're done
583	while (*p)
584	{
585	if (*p == cOpen)
586	lLevel++;
587	else if (*p == cClose)
588	{
589	lLevel--;
590	if (lLevel <= 0)
591	{
592	// matching closing bracket found:
593	// extract string
594	pszReturn = strhSubstr(pOpen + 1, // after cOpen
595	p); // excluding cClose
596	if (ppEnd)
597	*ppEnd = p + 1;
598	break; // while (*p)
599	}
600	}
601	else if (*p == '\"')
602	{
603	// beginning of string:
604	PCSZ p2 = p+1;
605	// find end of string
606	while ((p2) && (p2 != '\"'))
607	p2++;
608
609	if (*p2 == '\"')
610	// closing quote found:
611	// search on after that
612	p = p2; // raised below
613	else
614	break; // while (*p)
615	}
616
617	p++;
618	}
619	}
620
621	return (pszReturn);
622	}
623
624	/*
625	*@@ strhQuote:
626	* similar to strhExtract, except that
627	* opening and closing chars are the same,
628	* and therefore no nesting is possible.
629	* Useful for extracting stuff between
630	* quotes.
631	*
632	*@@added V0.9.0 [umoeller]
633	*/
634
635	PSZ strhQuote(PSZ pszBuf,
636	CHAR cQuote,
637	PSZ *ppEnd)
638	{
639	PSZ pszReturn = NULL,
640	p1 = NULL;
641	if ((p1 = strchr(pszBuf, cQuote)))
642	{
643	PSZ p2;
644	if (p2 = strchr(p1+1, cQuote))
645	{
646	pszReturn = strhSubstr(p1+1, p2);
647	if (ppEnd)
648	// store closing char
649	*ppEnd = p2 + 1;
650	}
651	}
652
653	return (pszReturn);
654	}
655
656	/*
657	*@@ strhStrip:
658	* removes all double spaces.
659	* This copies within the "psz" buffer.
660	* If any double spaces are found, the
661	* string will be shorter than before,
662	* but the buffer is _not_ reallocated,
663	* so there will be unused bytes at the
664	* end.
665	*
666	* Returns the number of spaces removed.
667	*
668	*@@added V0.9.0 [umoeller]
669	*/
670
671	ULONG strhStrip(PSZ psz) // in/out: string
672	{
673	PSZ p;
674	ULONG cb = strlen(psz),
675	ulrc = 0;
676
677	for (p = psz; p < psz+cb; p++)
678	{
679	if ((p == ' ') && ((p+1) == ' '))
680	{
681	PSZ p2 = p;
682	while (*p2)
683	{
684	p2 = (p2+1);
685	p2++;
686	}
687	cb--;
688	p--;
689	ulrc++;
690	}
691	}
692	return (ulrc);
693	}
694
695	/*
696	*@@ strhins:
697	* this inserts one string into another.
698	*
699	* pszInsert is inserted into pszBuffer at offset
700	* ulInsertOfs (which counts from 0).
701	*
702	* A newly allocated string is returned. pszBuffer is
703	* not changed. The new string should be free()'d after
704	* use.
705	*
706	* Upon errors, NULL is returned.
707	*
708	*@@changed V0.9.0 [umoeller]: completely rewritten.
709	*/
710
711	PSZ strhins(PCSZ pcszBuffer,
712	ULONG ulInsertOfs,
713	PCSZ pcszInsert)
714	{
715	PSZ pszNew = NULL;
716
717	if ((pcszBuffer) && (pcszInsert))
718	{
719	do {
720	ULONG cbBuffer = strlen(pcszBuffer);
721	ULONG cbInsert = strlen(pcszInsert);
722
723	// check string length
724	if (ulInsertOfs > cbBuffer + 1)
725	break; // do
726
727	// OK, let's go.
728	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
729
730	// copy stuff before pInsertPos
731	memcpy(pszNew,
732	pcszBuffer,
733	ulInsertOfs);
734	// copy string to be inserted
735	memcpy(pszNew + ulInsertOfs,
736	pcszInsert,
737	cbInsert);
738	// copy stuff after pInsertPos
739	strcpy(pszNew + ulInsertOfs + cbInsert,
740	pcszBuffer + ulInsertOfs);
741	} while (FALSE);
742	}
743
744	return (pszNew);
745	}
746
747	/*
748	*@@ strhFindReplace:
749	* wrapper around xstrFindReplace to work with C strings.
750	* Note that *ppszBuf can get reallocated and must
751	* be free()'able.
752	*
753	* Repetitive use of this wrapper is not recommended
754	* because it is considerably slower than xstrFindReplace.
755	*
756	*@@added V0.9.6 (2000-11-01) [umoeller]
757	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
758	*/
759
760	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
761	PULONG pulOfs, // in: where to begin search (0 = start);
762	// out: ofs of first char after replacement string
763	PCSZ pcszSearch, // in: search string; cannot be NULL
764	PCSZ pcszReplace) // in: replacement string; cannot be NULL
765	{
766	ULONG ulrc = 0;
767	XSTRING xstrBuf,
768	xstrFind,
769	xstrReplace;
770	size_t ShiftTable[256];
771	BOOL fRepeat = FALSE;
772	xstrInitSet(&xstrBuf, *ppszBuf);
773	// reallocated and returned, so we're safe
774	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
775	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
776	// these two are never freed, so we're safe too
777
778	if ((ulrc = xstrFindReplace(&xstrBuf,
779	pulOfs,
780	&xstrFind,
781	&xstrReplace,
782	ShiftTable,
783	&fRepeat)))
784	// replaced:
785	*ppszBuf = xstrBuf.psz;
786
787	return (ulrc);
788	}
789
790	/*
791	* strhWords:
792	* returns the no. of words in "psz".
793	* A string is considered a "word" if
794	* it is surrounded by spaces only.
795	*
796	*@@added V0.9.0 [umoeller]
797	*/
798
799	ULONG strhWords(PSZ psz)
800	{
801	PSZ p;
802	ULONG cb = strlen(psz),
803	ulWords = 0;
804	if (cb > 1)
805	{
806	ulWords = 1;
807	for (p = psz; p < psz+cb; p++)
808	if (*p == ' ')
809	ulWords++;
810	}
811	return (ulWords);
812	}
813
814	/*
815	*@@ strhGetWord:
816	* finds word boundaries.
817	*
818	* *ppszStart is used as the beginning of the
819	* search.
820	*
821	* If a word is found, *ppszStart is set to
822	* the first character of the word which was
823	* found and *ppszEnd receives the address
824	* of the first character _after_ the word,
825	* which is probably a space or a \n or \r char.
826	* We then return TRUE.
827	*
828	* The search is stopped if a null character
829	* is found or pLimit is reached. In that case,
830	* FALSE is returned.
831	*
832	*@@added V0.9.1 (2000-02-13) [umoeller]
833	*/
834
835	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
836	// out: start of word (if TRUE is returned)
837	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
838	// searched; if the word does not end before
839	// or with this char, FALSE is returned
840	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
841	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
842	PSZ *ppszEnd) // out: first char _after_ word
843	// (if TRUE is returned)
844	{
845	// characters after which a word can be started
846	// PCSZ pcszBeginChars = "\x0d\x0a ";
847	// PCSZ pcszEndChars = "\x0d\x0a /-";
848
849	PSZ pStart = *ppszStart;
850
851	// find start of word
852	while ( (pStart < (PSZ)pLimit)
853	&& (strchr(pcszBeginChars, *pStart))
854	)
855	// if char is a "before word" char: go for next
856	pStart++;
857
858	if (pStart < (PSZ)pLimit)
859	{
860	// found a valid "word start" character
861	// (which is not in pcszBeginChars):
862
863	// find end of word
864	PSZ pEndOfWord = pStart;
865	while ( (pEndOfWord <= (PSZ)pLimit)
866	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
867	)
868	// if char is not an "end word" char: go for next
869	pEndOfWord++;
870
871	if (pEndOfWord <= (PSZ)pLimit)
872	{
873	// whoa, got a word:
874	*ppszStart = pStart;
875	*ppszEnd = pEndOfWord;
876	return (TRUE);
877	}
878	}
879
880	return (FALSE);
881	}
882
883	/*
884	*@@ strhIsWord:
885	* returns TRUE if p points to a "word"
886	* in pcszBuf.
887	*
888	* p is considered a word if the character _before_
889	* it is in pcszBeginChars and the char _after_
890	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
891	*
892	*@@added V0.9.6 (2000-11-12) [umoeller]
893	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
894	*/
895
896	BOOL strhIsWord(PCSZ pcszBuf,
897	PCSZ p, // in: start of word
898	ULONG cbSearch, // in: length of word
899	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
900	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
901	{
902	// check previous char
903	if ( (p == pcszBuf)
904	\|\| (strchr(pcszBeginChars, *(p-1)))
905	)
906	{
907	// OK, valid begin char:
908	// check end char
909	CHAR cNextChar;
910	if (!(cNextChar = p[cbSearch]))
911	// null terminator:
912	return TRUE;
913	else
914	{
915	// not null terminator: check if char is
916	// in the list of valid end chars
917	if (strchr(pcszEndChars, cNextChar))
918	{
919	// OK, is end char: avoid doubles of that char,
920	// but allow spaces
921	// fixed V0.9.18 (2002-02-23) [umoeller]
922	CHAR cNextNext = p[cbSearch + 1];
923	if ( (cNextNext != cNextChar)
924	\|\| (cNextNext == ' ')
925	\|\| (cNextNext == 0)
926	)
927	return TRUE;
928	}
929	}
930	}
931
932	return FALSE;
933	}
934
935	/*
936	*@@ strhFindWord:
937	* searches for pszSearch in pszBuf, which is
938	* returned if found (or NULL if not).
939	*
940	* As opposed to strstr, this finds pszSearch
941	* only if it is a "word". A search string is
942	* considered a word if the character _before_
943	* it is in pcszBeginChars and the char _after_
944	* it is in pcszEndChars.
945	*
946	* Example:
947	+ strhFindWord("This is an example.", "is");
948	+ returns ...........^ this, but not the "is" in "This".
949	*
950	* The algorithm here uses strstr to find pszSearch in pszBuf
951	* and performs additional "is-word" checks for each item found
952	* (by calling strhIsWord).
953	*
954	* Note that this function is fairly slow compared to xstrFindWord.
955	*
956	*@@added V0.9.0 (99-11-08) [umoeller]
957	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
958	*/
959
960	PSZ strhFindWord(PCSZ pszBuf,
961	PCSZ pszSearch,
962	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
963	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
964	{
965	PSZ pszReturn = 0;
966	ULONG cbBuf = strlen(pszBuf),
967	cbSearch = strlen(pszSearch);
968
969	if ((cbBuf) && (cbSearch))
970	{
971	PCSZ p = pszBuf;
972
973	do // while p
974	{
975	p = strstr(p, pszSearch);
976	if (p)
977	{
978	// string found:
979	// check if that's a word
980
981	if (strhIsWord(pszBuf,
982	p,
983	cbSearch,
984	pcszBeginChars,
985	pcszEndChars))
986	{
987	// valid end char:
988	pszReturn = (PSZ)p;
989	break;
990	}
991
992	p += cbSearch;
993	}
994	} while (p);
995
996	}
997	return (pszReturn);
998	}
999
1000	/*
1001	*@@ strhFindEOL:
1002	* returns a pointer to the next \r, \n or null character
1003	* following pszSearchIn. Stores the offset in *pulOffset.
1004	*
1005	* This should never return NULL because at some point,
1006	* there will be a null byte in your string.
1007	*
1008	*@@added V0.9.4 (2000-07-01) [umoeller]
1009	*/
1010
1011	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1012	PULONG pulOffset) // out: offset (ptr can be NULL)
1013	{
1014	PCSZ p = pcszSearchIn,
1015	prc = 0;
1016	while (TRUE)
1017	{
1018	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1019	{
1020	prc = p;
1021	break;
1022	}
1023	p++;
1024	}
1025
1026	if ((pulOffset) && (prc))
1027	*pulOffset = prc - pcszSearchIn;
1028
1029	return ((PSZ)prc);
1030	}
1031
1032	/*
1033	*@@ strhFindNextLine:
1034	* like strhFindEOL, but this returns the character
1035	* _after_ \r or \n. Note that this might return
1036	* a pointer to terminating NULL character also.
1037	*/
1038
1039	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1040	{
1041	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1042	// pEOL now points to the \r char or the terminating 0 byte;
1043	// if not null byte, advance pointer
1044	PSZ pNextLine = pEOL;
1045	if (*pNextLine == '\r')
1046	pNextLine++;
1047	if (*pNextLine == '\n')
1048	pNextLine++;
1049	if (pulOffset)
1050	*pulOffset = pNextLine - pszSearchIn;
1051	return (pNextLine);
1052	}
1053
1054	/*
1055	*@@ strhBeautifyTitle:
1056	* replaces all line breaks (0xd, 0xa) with spaces.
1057	*
1058	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1059	*/
1060
1061	BOOL strhBeautifyTitle(PSZ psz)
1062	{
1063	BOOL rc = FALSE;
1064	CHAR *p = psz;
1065
1066	while(*p)
1067	if ( (*p == '\r')
1068	\|\| (*p == '\n')
1069	)
1070	{
1071	rc = TRUE;
1072	if ( (p != psz)
1073	&& (p[-1] == ' ')
1074	)
1075	memmove(p, p + 1, strlen(p));
1076	else
1077	*p++ = ' ';
1078	}
1079	else
1080	p++;
1081
1082	return (rc);
1083	}
1084
1085	/*
1086	* strhFindAttribValue:
1087	* searches for pszAttrib in pszSearchIn; if found,
1088	* returns the first character after the "=" char.
1089	* If "=" is not found, a space, \r, and \n are
1090	* also accepted. This function searches without
1091	* respecting case.
1092	*
1093	* <B>Example:</B>
1094	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1095	+
1096	+ returns ....................... ^ this address.
1097	*
1098	*@@added V0.9.0 [umoeller]
1099	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1100	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1101	*/
1102
1103	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1104	{
1105	PSZ prc = 0;
1106	PSZ pszSearchIn2, p;
1107	ULONG cbAttrib = strlen(pszAttrib),
1108	ulLength = strlen(pszSearchIn);
1109
1110	// use alloca(), so memory is freed on function exit
1111	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1112	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1113
1114	// 1) find token, (space char, \n, \r, \t)
1115	p = strtok(pszSearchIn2, " \n\r\t");
1116	while (p)
1117	{
1118	CHAR c2;
1119	PSZ pOrig;
1120
1121	// check tag name
1122	if (!strnicmp(p, pszAttrib, cbAttrib))
1123	{
1124	// position in original string
1125	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1126
1127	// yes:
1128	prc = pOrig + cbAttrib;
1129	c2 = *prc;
1130	while ( ( (c2 == ' ')
1131	\|\| (c2 == '=')
1132	\|\| (c2 == '\n')
1133	\|\| (c2 == '\r')
1134	)
1135	&& (c2 != 0)
1136	)
1137	c2 = *++prc;
1138
1139	break;
1140	}
1141
1142	p = strtok(NULL, " \n\r\t");
1143	}
1144
1145	return (prc);
1146	}
1147
1148	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1149	{
1150	PSZ prc = 0;
1151	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1152	p,
1153	p2;
1154	ULONG cbAttrib = strlen(pszAttrib);
1155
1156	// 1) find space char
1157	while ((p = strchr(pszSearchIn2, ' ')))
1158	{
1159	CHAR c;
1160	p++;
1161	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1162	{
1163	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1164	// now check whether the p+strlen(pszAttrib)
1165	// is a valid end-of-tag character
1166	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1167	&& ( (c == ' ')
1168	\|\| (c == '>')
1169	\|\| (c == '=')
1170	\|\| (c == '\r')
1171	\|\| (c == '\n')
1172	\|\| (c == 0)
1173	)
1174	)
1175	{
1176	// yes:
1177	CHAR c2;
1178	p2 = p + cbAttrib;
1179	c2 = *p2;
1180	while ( ( (c2 == ' ')
1181	\|\| (c2 == '=')
1182	\|\| (c2 == '\n')
1183	\|\| (c2 == '\r')
1184	)
1185	&& (c2 != 0)
1186	)
1187	c2 = *++p2;
1188
1189	prc = p2;
1190	break; // first while
1191	}
1192	}
1193	else
1194	break;
1195
1196	pszSearchIn2++;
1197	}
1198	return (prc);
1199	} */
1200
1201	/*
1202	* strhGetNumAttribValue:
1203	* stores the numerical parameter value of an HTML-style
1204	* tag in *pl.
1205	*
1206	* Returns the address of the tag parameter in the
1207	* search buffer, if found, or NULL.
1208	*
1209	* <B>Example:</B>
1210	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1211	*
1212	* stores 123 in the "l" variable.
1213	*
1214	*@@added V0.9.0 [umoeller]
1215	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1216	*/
1217
1218	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1219	const char *pszTag, // e.g. "INDEX"
1220	PLONG pl) // out: numerical value
1221	{
1222	PSZ pParam;
1223	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1224	{
1225	if ( (*pParam == '\"')
1226	\|\| (*pParam == '\'')
1227	)
1228	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1229
1230	sscanf(pParam, "%ld", pl);
1231	}
1232
1233	return (pParam);
1234	}
1235
1236	/*
1237	* strhGetTextAttr:
1238	* retrieves the attribute value of a textual HTML-style tag
1239	* in a newly allocated buffer, which is returned,
1240	* or NULL if attribute not found.
1241	* If an attribute value is to contain spaces, it
1242	* must be enclosed in quotes.
1243	*
1244	* The offset of the attribute data in pszSearchIn is
1245	* returned in *pulOffset so that you can do multiple
1246	* searches.
1247	*
1248	* This returns a new buffer, which should be free()'d after use.
1249	*
1250	* <B>Example:</B>
1251	+ ULONG ulOfs = 0;
1252	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1253	+ ............^ ulOfs
1254	*
1255	* returns a new string with the value "blublub" (without
1256	* quotes) and sets ulOfs to 12.
1257	*
1258	*@@added V0.9.0 [umoeller]
1259	*/
1260
1261	PSZ strhGetTextAttr(const char *pszSearchIn,
1262	const char *pszTag,
1263	PULONG pulOffset) // out: offset where found
1264	{
1265	PSZ pParam,
1266	pParam2,
1267	prc = NULL;
1268	ULONG ulCount = 0;
1269	LONG lNestingLevel = 0;
1270
1271	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1272	{
1273	// determine end character to search for: a space
1274	CHAR cEnd = ' ';
1275	if (*pParam == '\"')
1276	{
1277	// or, if the data is enclosed in quotes, a quote
1278	cEnd = '\"';
1279	pParam++;
1280	}
1281
1282	if (pulOffset)
1283	// store the offset
1284	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1285
1286	// now find end of attribute
1287	pParam2 = pParam;
1288	while (*pParam)
1289	{
1290	if (*pParam == cEnd)
1291	// end character found
1292	break;
1293	else if (*pParam == '<')
1294	// yet another opening tag found:
1295	// this is probably some "<" in the attributes
1296	lNestingLevel++;
1297	else if (*pParam == '>')
1298	{
1299	lNestingLevel--;
1300	if (lNestingLevel < 0)
1301	// end of tag found:
1302	break;
1303	}
1304	ulCount++;
1305	pParam++;
1306	}
1307
1308	// copy attribute to new buffer
1309	if (ulCount)
1310	{
1311	prc = (PSZ)malloc(ulCount+1);
1312	memcpy(prc, pParam2, ulCount);
1313	*(prc+ulCount) = 0;
1314	}
1315	}
1316	return (prc);
1317	}
1318
1319	/*
1320	* strhFindEndOfTag:
1321	* returns a pointer to the ">" char
1322	* which seems to terminate the tag beginning
1323	* after pszBeginOfTag.
1324	*
1325	* If additional "<" chars are found, we look
1326	* for additional ">" characters too.
1327	*
1328	* Note: You must pass the address of the opening
1329	* '<' character to this function.
1330	*
1331	* Example:
1332	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1333	+ strhFindEndOfTag(pszTest)
1334	+ returns.................................^ this.
1335	*
1336	*@@added V0.9.0 [umoeller]
1337	*/
1338
1339	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1340	{
1341	PSZ p = (PSZ)pszBeginOfTag,
1342	prc = NULL;
1343	LONG lNestingLevel = 0;
1344
1345	while (*p)
1346	{
1347	if (*p == '<')
1348	// another opening tag found:
1349	lNestingLevel++;
1350	else if (*p == '>')
1351	{
1352	// closing tag found:
1353	lNestingLevel--;
1354	if (lNestingLevel < 1)
1355	{
1356	// corresponding: return this
1357	prc = p;
1358	break;
1359	}
1360	}
1361	p++;
1362	}
1363
1364	return (prc);
1365	}
1366
1367	/*
1368	* strhGetBlock:
1369	* this complex function searches the given string
1370	* for a pair of opening/closing HTML-style tags.
1371	*
1372	* If found, this routine returns TRUE and does
1373	* the following:
1374	*
1375	* 1) allocate a new buffer, copy the text
1376	* enclosed by the opening/closing tags
1377	* into it and set *ppszBlock to that
1378	* buffer;
1379	*
1380	* 2) if the opening tag has any attributes,
1381	* allocate another buffer, copy the
1382	* attributes into it and set *ppszAttrs
1383	* to that buffer; if no attributes are
1384	* found, *ppszAttrs will be NULL;
1385	*
1386	* 3) set *pulOffset to the offset from the
1387	* beginning of *ppszSearchIn where the
1388	* opening tag was found;
1389	*
1390	* 4) advance *ppszSearchIn to after the
1391	* closing tag, so that you can do
1392	* multiple searches without finding the
1393	* same tags twice.
1394	*
1395	* All buffers should be freed using free().
1396	*
1397	* This returns the following:
1398	* -- 0: no error
1399	* -- 1: tag not found at all (doesn't have to be an error)
1400	* -- 2: begin tag found, but no corresponding end tag found. This
1401	* is a real error.
1402	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1403	*
1404	* <B>Example:</B>
1405	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1406	+ PSZ pszBlock, pszAttrs;
1407	+ ULONG ulOfs;
1408	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1409	*
1410	* would do the following:
1411	*
1412	* 1) set pszBlock to a new string containing "This is page 1."
1413	* without quotes;
1414	*
1415	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1416	*
1417	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1418	*
1419	* 4) pSearch would be advanced to point to the "More text"
1420	* string in the original buffer.
1421	*
1422	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1423	* for HTML parsing.
1424	*
1425	*@@added V0.9.0 [umoeller]
1426	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1427	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1428	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1429	*/
1430
1431	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1432	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1433	const char *pszTag,
1434	PSZ *ppszBlock, // out: block enclosed by the tags
1435	PSZ *ppszAttribs, // out: attributes of the opening tag
1436	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1437	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1438	{
1439	ULONG ulrc = 1;
1440	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1441	pszSearch2 = pszBeginTag,
1442	pszClosingTag;
1443	ULONG cbTag = strlen(pszTag);
1444
1445	// go thru the block and check all tags if it's the
1446	// begin tag we're looking for
1447	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1448	{
1449	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1450	// yes: stop
1451	break;
1452	else
1453	pszBeginTag++;
1454	}
1455
1456	if (pszBeginTag)
1457	{
1458	// we found <TAG>:
1459	ULONG ulNestingLevel = 0;
1460
1461	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1462	// strchr(pszBeginTag, '>');
1463	if (pszEndOfBeginTag)
1464	{
1465	// does the caller want the attributes?
1466	if (ppszAttribs)
1467	{
1468	// yes: then copy them
1469	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1470	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1471	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1472	// add terminating 0
1473	*(pszAttrs + ulAttrLen) = 0;
1474
1475	*ppszAttribs = pszAttrs;
1476	}
1477
1478	// output offset of where we found the begin tag
1479	if (pulOfsBeginTag)
1480	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1481
1482	// now find corresponding closing tag (e.g. "</BODY>"
1483	pszBeginTag = pszEndOfBeginTag+1;
1484	// now we're behind the '>' char of the opening tag
1485	// increase offset of that too
1486	if (pulOfsBeginBlock)
1487	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1488
1489	// find next closing tag;
1490	// for the first run, pszSearch2 points to right
1491	// after the '>' char of the opening tag
1492	pszSearch2 = pszBeginTag;
1493	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1494	&& (pszClosingTag = strstr(pszSearch2, "<"))
1495	)
1496	{
1497	// if we have another opening tag before our closing
1498	// tag, we need to have several closing tags before
1499	// we're done
1500	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1501	ulNestingLevel++;
1502	else
1503	{
1504	// is this ours?
1505	if ( (*(pszClosingTag+1) == '/')
1506	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1507	)
1508	{
1509	// we've found a matching closing tag; is
1510	// it ours?
1511	if (ulNestingLevel == 0)
1512	{
1513	// our closing tag found:
1514	// allocate mem for a new buffer
1515	// and extract all the text between
1516	// open and closing tags to it
1517	ULONG ulLen = pszClosingTag - pszBeginTag;
1518	if (ppszBlock)
1519	{
1520	PSZ pNew = (PSZ)malloc(ulLen + 1);
1521	strhncpy0(pNew, pszBeginTag, ulLen);
1522	*ppszBlock = pNew;
1523	}
1524
1525	// raise search offset to after the closing tag
1526	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1527
1528	ulrc = 0;
1529
1530	break;
1531	} else
1532	// not our closing tag:
1533	ulNestingLevel--;
1534	}
1535	}
1536	// no matching closing tag: search on after that
1537	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1538	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1539
1540	if (!pszClosingTag)
1541	// no matching closing tag found:
1542	// return 2 (closing tag not found)
1543	ulrc = 2;
1544	} // end if (pszBeginTag)
1545	else
1546	// no matching ">" for opening tag found:
1547	ulrc = 3;
1548	}
1549
1550	return (ulrc);
1551	}
1552
1553	/* ******************************************************************
1554	*
1555	* Miscellaneous
1556	*
1557	********************************************************************/
1558
1559	/*
1560	*@@ strhArrayAppend:
1561	* this appends a string to a "string array".
1562	*
1563	* A string array is considered a sequence of
1564	* zero-terminated strings in memory. That is,
1565	* after each string's null-byte, the next
1566	* string comes up.
1567	*
1568	* This is useful for composing a single block
1569	* of memory from, say, list box entries, which
1570	* can then be written to OS2.INI in one flush.
1571	*
1572	* To append strings to such an array, call this
1573	* function for each string you wish to append.
1574	* This will re-allocate *ppszRoot with each call,
1575	* and update *pcbRoot, which then contains the
1576	* total size of all strings (including all null
1577	* terminators).
1578	*
1579	* Pass *pcbRoot to PrfSaveProfileData to have the
1580	* block saved.
1581	*
1582	* Note: On the first call, ppszRoot and pcbRoot
1583	* _must_ be both NULL, or this crashes.
1584	*
1585	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1586	*/
1587
1588	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1589	const char *pcszNew, // in: string to append
1590	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1591	PULONG pcbRoot) // in/out: size of array
1592	{
1593	PSZ pszTemp;
1594
1595	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1596	cbNew = strlen(pcszNew);
1597
1598	pszTemp = (PSZ)malloc(*pcbRoot
1599	+ cbNew
1600	+ 1); // two null bytes
1601	if (*ppszRoot)
1602	{
1603	// not first loop: copy old stuff
1604	memcpy(pszTemp,
1605	*ppszRoot,
1606	*pcbRoot);
1607	free(*ppszRoot);
1608	}
1609	// append new string
1610	strcpy(pszTemp + *pcbRoot,
1611	pcszNew);
1612	// update root
1613	*ppszRoot = pszTemp;
1614	// update length
1615	*pcbRoot += cbNew + 1;
1616	}
1617
1618	/*
1619	*@@ strhCreateDump:
1620	* this dumps a memory block into a string
1621	* and returns that string in a new buffer.
1622	*
1623	* You must free() the returned PSZ after use.
1624	*
1625	* The output looks like the following:
1626	*
1627	+ 0000: FE FF 0E 02 90 00 00 00 ........
1628	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1629	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1630	*
1631	* Each line is terminated with a newline (\n)
1632	* character only.
1633	*
1634	*@@added V0.9.1 (2000-01-22) [umoeller]
1635	*/
1636
1637	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1638	ULONG ulSize, // in: size of buffer
1639	ULONG ulIndent) // in: indentation of every line
1640	{
1641	PSZ pszReturn = 0;
1642	XSTRING strReturn;
1643	CHAR szTemp[1000];
1644
1645	PBYTE pbCurrent = pb; // current byte
1646	ULONG ulCount = 0,
1647	ulCharsInLine = 0; // if this grows > 7, a new line is started
1648	CHAR szLine[400] = "",
1649	szAscii[30] = " "; // ASCII representation; filled for every line
1650	PSZ pszLine = szLine,
1651	pszAscii = szAscii;
1652
1653	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1654
1655	for (pbCurrent = pb;
1656	ulCount < ulSize;
1657	pbCurrent++, ulCount++)
1658	{
1659	if (ulCharsInLine == 0)
1660	{
1661	memset(szLine, ' ', ulIndent);
1662	pszLine += ulIndent;
1663	}
1664	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1665
1666	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1667	// printable character:
1668	pszAscii = pbCurrent;
1669	else
1670	*pszAscii = '.';
1671	pszAscii++;
1672
1673	ulCharsInLine++;
1674	if ( (ulCharsInLine > 7) // 8 bytes added?
1675	\|\| (ulCount == ulSize-1) // end of buffer reached?
1676	)
1677	{
1678	// if we haven't had eight bytes yet,
1679	// fill buffer up to eight bytes with spaces
1680	ULONG ul2;
1681	for (ul2 = ulCharsInLine;
1682	ul2 < 8;
1683	ul2++)
1684	pszLine += sprintf(pszLine, " ");
1685
1686	sprintf(szTemp, "%04lX: %s %s\n",
1687	(ulCount & 0xFFFFFFF8), // offset in hex
1688	szLine, // bytes string
1689	szAscii); // ASCII string
1690	xstrcat(&strReturn, szTemp, 0);
1691
1692	// restart line buffer
1693	pszLine = szLine;
1694
1695	// clear ASCII buffer
1696	strcpy(szAscii, " ");
1697	pszAscii = szAscii;
1698
1699	// reset line counter
1700	ulCharsInLine = 0;
1701	}
1702	}
1703
1704	if (strReturn.cbAllocated)
1705	pszReturn = strReturn.psz;
1706
1707	return (pszReturn);
1708	}
1709
1710	/* ******************************************************************
1711	*
1712	* Fast string searches
1713	*
1714	********************************************************************/
1715
1716	#define ASSERT(a)
1717
1718	/*
1719	* The following code has been taken from the "Standard
1720	* Function Library", file sflfind.c, and only slightly
1721	* modified to conform to the rest of this file.
1722	*
1723	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1724	* Revised: 98/05/04
1725	*
1726	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1727	*
1728	* The SFL Licence allows incorporating SFL code into other
1729	* programs, as long as the copyright is reprinted and the
1730	* code is marked as modified, so this is what we do.
1731	*/
1732
1733	/*
1734	*@@ strhmemfind:
1735	* searches for a pattern in a block of memory using the
1736	* Boyer-Moore-Horspool-Sunday algorithm.
1737	*
1738	* The block and pattern may contain any values; you must
1739	* explicitly provide their lengths. If you search for strings,
1740	* use strlen() on the buffers.
1741	*
1742	* Returns a pointer to the pattern if found within the block,
1743	* or NULL if the pattern was not found.
1744	*
1745	* This algorithm needs a "shift table" to cache data for the
1746	* search pattern. This table can be reused when performing
1747	* several searches with the same pattern.
1748	*
1749	* "shift" must point to an array big enough to hold 256 (8**2)
1750	* "size_t" values.
1751	*
1752	* If (*repeat_find == FALSE), the shift table is initialized.
1753	* So on the first search with a given pattern, *repeat_find
1754	* should be FALSE. This function sets it to TRUE after the
1755	* shift table is initialised, allowing the initialisation
1756	* phase to be skipped on subsequent searches.
1757	*
1758	* This function is most effective when repeated searches are
1759	* made for the same pattern in one or more large buffers.
1760	*
1761	* Example:
1762	*
1763	+ PSZ pszHaystack = "This is a sample string.",
1764	+ pszNeedle = "string";
1765	+ size_t shift[256];
1766	+ BOOL fRepeat = FALSE;
1767	+
1768	+ PSZ pFound = strhmemfind(pszHaystack,
1769	+ strlen(pszHaystack), // block size
1770	+ pszNeedle,
1771	+ strlen(pszNeedle), // pattern size
1772	+ shift,
1773	+ &fRepeat);
1774	*
1775	* Taken from the "Standard Function Library", file sflfind.c.
1776	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1777	* Slightly modified by umoeller.
1778	*
1779	*@@added V0.9.3 (2000-05-08) [umoeller]
1780	*/
1781
1782	void* strhmemfind(const void *in_block, // in: block containing data
1783	size_t block_size, // in: size of block in bytes
1784	const void *in_pattern, // in: pattern to search for
1785	size_t pattern_size, // in: size of pattern block
1786	size_t *shift, // in/out: shift table (search buffer)
1787	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1788	{
1789	size_t byte_nbr, // Distance through block
1790	match_size; // Size of matched part
1791	const unsigned char
1792	*match_base = NULL, // Base of match of pattern
1793	*match_ptr = NULL, // Point within current match
1794	*limit = NULL; // Last potiental match point
1795	const unsigned char
1796	block = (unsigned char ) in_block, // Concrete pointer to block data
1797	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1798
1799	if ( (block == NULL)
1800	\|\| (pattern == NULL)
1801	\|\| (shift == NULL)
1802	)
1803	return (NULL);
1804
1805	// Pattern must be smaller or equal in size to string
1806	if (block_size < pattern_size)
1807	return (NULL); // Otherwise it's not found
1808
1809	if (pattern_size == 0) // Empty patterns match at start
1810	return ((void *)block);
1811
1812	// Build the shift table unless we're continuing a previous search
1813
1814	// The shift table determines how far to shift before trying to match
1815	// again, if a match at this point fails. If the byte after where the
1816	// end of our pattern falls is not in our pattern, then we start to
1817	// match again after that byte; otherwise we line up the last occurence
1818	// of that byte in our pattern under that byte, and try match again.
1819
1820	if (!repeat_find \|\| !*repeat_find)
1821	{
1822	for (byte_nbr = 0;
1823	byte_nbr < 256;
1824	byte_nbr++)
1825	shift[byte_nbr] = pattern_size + 1;
1826	for (byte_nbr = 0;
1827	byte_nbr < pattern_size;
1828	byte_nbr++)
1829	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1830
1831	if (repeat_find)
1832	*repeat_find = TRUE;
1833	}
1834
1835	// Search for the block, each time jumping up by the amount
1836	// computed in the shift table
1837
1838	limit = block + (block_size - pattern_size + 1);
1839	ASSERT (limit > block);
1840
1841	for (match_base = block;
1842	match_base < limit;
1843	match_base += shift[*(match_base + pattern_size)])
1844	{
1845	match_ptr = match_base;
1846	match_size = 0;
1847
1848	// Compare pattern until it all matches, or we find a difference
1849	while (*match_ptr++ == pattern[match_size++])
1850	{
1851	ASSERT (match_size <= pattern_size &&
1852	match_ptr == (match_base + match_size));
1853
1854	// If we found a match, return the start address
1855	if (match_size >= pattern_size)
1856	return ((void*)(match_base));
1857
1858	}
1859	}
1860	return (NULL); // Found nothing
1861	}
1862
1863	/*
1864	*@@ strhtxtfind:
1865	* searches for a case-insensitive text pattern in a string
1866	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1867	* pattern are null-terminated strings. Returns a pointer to the pattern
1868	* if found within the string, or NULL if the pattern was not found.
1869	* Will match strings irrespective of case. To match exact strings, use
1870	* strhfind(). Will not work on multibyte characters.
1871	*
1872	* Examples:
1873	+ char *result;
1874	+
1875	+ result = strhtxtfind ("AbracaDabra", "cad");
1876	+ if (result)
1877	+ puts (result);
1878	+
1879	* Taken from the "Standard Function Library", file sflfind.c.
1880	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1881	* Slightly modified.
1882	*
1883	*@@added V0.9.3 (2000-05-08) [umoeller]
1884	*/
1885
1886	char* strhtxtfind (const char *string, // String containing data
1887	const char *pattern) // Pattern to search for
1888	{
1889	size_t
1890	shift [256]; // Shift distance for each value
1891	size_t
1892	string_size,
1893	pattern_size,
1894	byte_nbr, // Index into byte array
1895	match_size; // Size of matched part
1896	const char
1897	*match_base = NULL, // Base of match of pattern
1898	*match_ptr = NULL, // Point within current match
1899	*limit = NULL; // Last potiental match point
1900
1901	ASSERT (string); // Expect non-NULL pointers, but
1902	ASSERT (pattern); // fail gracefully if not debugging
1903	if (string == NULL \|\| pattern == NULL)
1904	return (NULL);
1905
1906	string_size = strlen (string);
1907	pattern_size = strlen (pattern);
1908
1909	// Pattern must be smaller or equal in size to string
1910	if (string_size < pattern_size)
1911	return (NULL); // Otherwise it cannot be found
1912
1913	if (pattern_size == 0) // Empty string matches at start
1914	return (char *) string;
1915
1916	// Build the shift table
1917
1918	// The shift table determines how far to shift before trying to match
1919	// again, if a match at this point fails. If the byte after where the
1920	// end of our pattern falls is not in our pattern, then we start to
1921	// match again after that byte; otherwise we line up the last occurence
1922	// of that byte in our pattern under that byte, and try match again.
1923
1924	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1925	shift [byte_nbr] = pattern_size + 1;
1926
1927	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1928	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1929
1930	// Search for the string. If we don't find a match, move up by the
1931	// amount we computed in the shift table above, to find location of
1932	// the next potiental match.
1933
1934	limit = string + (string_size - pattern_size + 1);
1935	ASSERT (limit > string);
1936
1937	for (match_base = string;
1938	match_base < limit;
1939	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1940	{
1941	match_ptr = match_base;
1942	match_size = 0;
1943
1944	// Compare pattern until it all matches, or we find a difference
1945	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1946	{
1947	ASSERT (match_size <= pattern_size &&
1948	match_ptr == (match_base + match_size));
1949
1950	// If we found a match, return the start address
1951	if (match_size >= pattern_size)
1952	return ((char *)(match_base));
1953	}
1954	}
1955	return (NULL); // Found nothing
1956	}
1957

Note: See TracBrowser for help on using the repository browser.

Download in other formats: