Context Navigation

source: trunk/src/helpers/stringh.c@ 238

Visit:

Last change on this file since 238 was 238, checked in by umoeller, 23 years ago
Misc fixes.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 59.4 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2002 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL. Instead, this returns NULL if pcszSource is
223	* NULL or points to a null byte. In addition, this
224	* can report the length of the string (V0.9.16).
225	*
226	*@@added V0.9.0 [umoeller]
227	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
228	*/
229
230	PSZ strhdup(PCSZ pcszSource,
231	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
232	{
233	PSZ pszReturn = NULL;
234	ULONG ulLength = 0;
235
236	if ( (pcszSource)
237	&& (ulLength = strlen(pcszSource))
238	)
239	{
240	if (pszReturn = (PSZ)malloc(ulLength + 1))
241	memcpy(pszReturn, pcszSource, ulLength + 1);
242	}
243
244	if (pulLength)
245	*pulLength = ulLength;
246
247	return pszReturn;
248	}
249
250	/*
251	*@@ strhcmp:
252	* better strcmp. This doesn't crash if any of the
253	* string pointers are NULL, but returns a proper
254	* value then.
255	*
256	* Besides, this is guaranteed to only return -1, 0,
257	* or +1, while strcmp can return any positive or
258	* negative value. This is useful for tree comparison
259	* funcs.
260	*
261	*@@added V0.9.9 (2001-02-16) [umoeller]
262	*/
263
264	int strhcmp(PCSZ p1, PCSZ p2)
265	{
266	if (p1 && p2)
267	{
268	int i = strcmp(p1, p2);
269	if (i < 0) return -1;
270	if (i > 0) return +1;
271	}
272	else if (p1)
273	// but p2 is NULL: p1 greater than p2 then
274	return +1;
275	else if (p2)
276	// but p1 is NULL: p1 less than p2 then
277	return -1;
278
279	// return 0 if strcmp returned 0 above or both strings are NULL
280	return 0;
281	}
282
283	/*
284	*@@ strhicmp:
285	* like strhcmp, but compares without respect
286	* to case.
287	*
288	*@@added V0.9.9 (2001-04-07) [umoeller]
289	*/
290
291	int strhicmp(PCSZ p1, PCSZ p2)
292	{
293	if (p1 && p2)
294	{
295	int i = stricmp(p1, p2);
296	if (i < 0) return -1;
297	if (i > 0) return +1;
298	}
299	else if (p1)
300	// but p2 is NULL: p1 greater than p2 then
301	return +1;
302	else if (p2)
303	// but p1 is NULL: p1 less than p2 then
304	return -1;
305
306	// return 0 if strcmp returned 0 above or both strings are NULL
307	return 0;
308	}
309
310	/*
311	*@@ strhistr:
312	* like strstr, but case-insensitive.
313	*
314	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
315	*/
316
317	PSZ strhistr(PCSZ string1, PCSZ string2)
318	{
319	PSZ prc = NULL;
320
321	if ((string1) && (string2))
322	{
323	PSZ pszSrchIn = strdup(string1);
324	PSZ pszSrchFor = strdup(string2);
325
326	if ((pszSrchIn) && (pszSrchFor))
327	{
328	strupr(pszSrchIn);
329	strupr(pszSrchFor);
330
331	if (prc = strstr(pszSrchIn, pszSrchFor))
332	{
333	// prc now has the first occurence of the string,
334	// but in pszSrchIn; we need to map this
335	// return value to the original string
336	prc = (prc-pszSrchIn) // offset in pszSrchIn
337	+ (PSZ)string1;
338	}
339	}
340	if (pszSrchFor)
341	free(pszSrchFor);
342	if (pszSrchIn)
343	free(pszSrchIn);
344	}
345
346	return prc;
347	}
348
349	/*
350	*@@ strhncpy0:
351	* like strncpy, but always appends a 0 character.
352	*
353	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
354	*/
355
356	ULONG strhncpy0(PSZ pszTarget,
357	PCSZ pszSource,
358	ULONG cbSource)
359	{
360	ULONG ul = 0;
361	PSZ pTarget = pszTarget,
362	pSource;
363
364	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
365	{
366	for (ul = 0; ul < cbSource; ul++)
367	if (*pSource)
368	pTarget++ = pSource++;
369	else
370	break;
371	}
372
373	*pTarget = 0;
374
375	return ul;
376	}
377
378	/*
379	*@@ strhlen:
380	* like strlen, but doesn't crash on
381	* null strings, but returns 0 also.
382	*
383	*@@added V0.9.19 (2002-04-02) [umoeller]
384	*/
385
386	ULONG strhlen(PCSZ pcsz)
387	{
388	if (pcsz)
389	return strlen(pcsz);
390
391	return 0;
392	}
393
394	/*
395	*@@ strhSize:
396	* returns the size of the given string, which
397	* is the memory required to allocate a copy,
398	* including the null terminator.
399	*
400	* Returns 0 only if pcsz is NULL. If pcsz
401	* points to a null character, this returns 1.
402	*
403	*@@added V0.9.18 (2002-02-13) [umoeller]
404	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
405	*/
406
407	ULONG strhSize(PCSZ pcsz)
408	{
409	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
410	return strlen(pcsz) + 1;
411
412	return 0;
413	}
414
415	/*
416	* strhCount:
417	* this counts the occurences of c in pszSearch.
418	*/
419
420	ULONG strhCount(PCSZ pszSearch,
421	CHAR c)
422	{
423	PSZ p = (PSZ)pszSearch;
424	ULONG ulCount = 0;
425	while (TRUE)
426	{
427	p = strchr(p, c);
428	if (p)
429	{
430	ulCount++;
431	p++;
432	}
433	else
434	break;
435	}
436	return ulCount;
437	}
438
439	/*
440	*@@ strhIsDecimal:
441	* returns TRUE if psz consists of decimal digits only.
442	*/
443
444	BOOL strhIsDecimal(PSZ psz)
445	{
446	PSZ p = psz;
447	while (*p != 0)
448	{
449	if (isdigit(*p) == 0)
450	return FALSE;
451	p++;
452	}
453
454	return TRUE;
455	}
456
457	#ifdef __DEBUG_MALLOC_ENABLED__
458
459	/*
460	*@@ strhSubstrDebug:
461	* memory debug version of strhSubstr.
462	*
463	*@@added V0.9.14 (2001-08-01) [umoeller]
464	*/
465
466	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
467	PCSZ pEnd, // in: last char (not included)
468	PCSZ pcszSourceFile,
469	unsigned long ulLine,
470	PCSZ pcszFunction)
471	{
472	PSZ pszSubstr = NULL;
473
474	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
475	{
476	ULONG cbSubstr = (pEnd - pBegin);
477	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
478	pcszSourceFile,
479	ulLine,
480	pcszFunction))
481	{
482	// strhncpy0(pszSubstr, pBegin, cbSubstr);
483	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
484	*(pszSubstr + cbSubstr) = '\0';
485	}
486	}
487
488	return pszSubstr;
489	}
490
491	#endif // __DEBUG_MALLOC_ENABLED__
492
493	/*
494	*@@ strhSubstr:
495	* this creates a new PSZ containing the string
496	* from pBegin to pEnd, excluding the pEnd character.
497	* The new string is null-terminated. The caller
498	* must free() the new string after use.
499	*
500	* Example:
501	+ "1234567890"
502	+ ^ ^
503	+ p1 p2
504	+ strhSubstr(p1, p2)
505	* would return a new string containing "2345678".
506	*
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
508	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
509	*/
510
511	PSZ strhSubstr(PCSZ pBegin, // in: first char
512	PCSZ pEnd) // in: last char (not included)
513	{
514	PSZ pszSubstr = NULL;
515
516	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
517	{
518	ULONG cbSubstr = (pEnd - pBegin);
519	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
520	{
521	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
522	*(pszSubstr + cbSubstr) = '\0';
523	}
524	}
525
526	return pszSubstr;
527	}
528
529	/*
530	*@@ strhExtract:
531	* searches pszBuf for the cOpen character and returns
532	* the data in between cOpen and cClose, excluding
533	* those two characters, in a newly allocated buffer
534	* which you must free() afterwards.
535	*
536	* Spaces and newlines/linefeeds are skipped.
537	*
538	* If the search was successful, the new buffer
539	* is returned and, if (ppEnd != NULL), *ppEnd points
540	* to the first character after the cClose character
541	* found in the buffer.
542	*
543	* If the search was not successful, NULL is
544	* returned, and *ppEnd is unchanged.
545	*
546	* If another cOpen character is found before
547	* cClose, matching cClose characters will be skipped.
548	* You can therefore nest the cOpen and cClose
549	* characters.
550	*
551	* This function ignores cOpen and cClose characters
552	* in C-style comments and strings surrounded by
553	* double quotes.
554	*
555	* Example:
556	*
557	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
558	+ pEnd;
559	+ strhExtract(pszBuf,
560	+ '{', '}',
561	+ &pEnd)
562	*
563	* would return a new buffer containing " --blah-- ",
564	* and ppEnd would afterwards point to the space
565	* before "next" in the static buffer.
566	*
567	*@@added V0.9.0 [umoeller]
568	*/
569
570	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
571	CHAR cOpen, // in: opening char
572	CHAR cClose, // in: closing char
573	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
574	{
575	PSZ pszReturn = NULL;
576	PCSZ pOpen;
577	if ( (pszBuf)
578	&& (pOpen = strchr(pszBuf, cOpen))
579	)
580	{
581	// opening char found:
582	// now go thru the whole rest of the buffer
583	PCSZ p = pOpen + 1;
584	LONG lLevel = 1; // if this goes 0, we're done
585	while (*p)
586	{
587	if (*p == cOpen)
588	lLevel++;
589	else if (*p == cClose)
590	{
591	lLevel--;
592	if (lLevel <= 0)
593	{
594	// matching closing bracket found:
595	// extract string
596	pszReturn = strhSubstr(pOpen + 1, // after cOpen
597	p); // excluding cClose
598	if (ppEnd)
599	*ppEnd = p + 1;
600	break; // while (*p)
601	}
602	}
603	else if (*p == '\"')
604	{
605	// beginning of string:
606	PCSZ p2 = p+1;
607	// find end of string
608	while ((p2) && (p2 != '\"'))
609	p2++;
610
611	if (*p2 == '\"')
612	// closing quote found:
613	// search on after that
614	p = p2; // raised below
615	else
616	break; // while (*p)
617	}
618
619	p++;
620	}
621	}
622
623	return pszReturn;
624	}
625
626	/*
627	*@@ strhQuote:
628	* similar to strhExtract, except that
629	* opening and closing chars are the same,
630	* and therefore no nesting is possible.
631	* Useful for extracting stuff between
632	* quotes.
633	*
634	*@@added V0.9.0 [umoeller]
635	*/
636
637	PSZ strhQuote(PSZ pszBuf,
638	CHAR cQuote,
639	PSZ *ppEnd)
640	{
641	PSZ pszReturn = NULL,
642	p1 = NULL;
643	if ((p1 = strchr(pszBuf, cQuote)))
644	{
645	PSZ p2;
646	if (p2 = strchr(p1+1, cQuote))
647	{
648	pszReturn = strhSubstr(p1+1, p2);
649	if (ppEnd)
650	// store closing char
651	*ppEnd = p2 + 1;
652	}
653	}
654
655	return pszReturn;
656	}
657
658	/*
659	*@@ strhStrip:
660	* removes all double spaces.
661	* This copies within the "psz" buffer.
662	* If any double spaces are found, the
663	* string will be shorter than before,
664	* but the buffer is _not_ reallocated,
665	* so there will be unused bytes at the
666	* end.
667	*
668	* Returns the number of spaces removed.
669	*
670	*@@added V0.9.0 [umoeller]
671	*/
672
673	ULONG strhStrip(PSZ psz) // in/out: string
674	{
675	PSZ p;
676	ULONG cb = strlen(psz),
677	ulrc = 0;
678
679	for (p = psz; p < psz+cb; p++)
680	{
681	if ((p == ' ') && ((p+1) == ' '))
682	{
683	PSZ p2 = p;
684	while (*p2)
685	{
686	p2 = (p2+1);
687	p2++;
688	}
689	cb--;
690	p--;
691	ulrc++;
692	}
693	}
694	return ulrc;
695	}
696
697	/*
698	*@@ strhins:
699	* this inserts one string into another.
700	*
701	* pszInsert is inserted into pszBuffer at offset
702	* ulInsertOfs (which counts from 0).
703	*
704	* A newly allocated string is returned. pszBuffer is
705	* not changed. The new string should be free()'d after
706	* use.
707	*
708	* Upon errors, NULL is returned.
709	*
710	*@@changed V0.9.0 [umoeller]: completely rewritten.
711	*/
712
713	PSZ strhins(PCSZ pcszBuffer,
714	ULONG ulInsertOfs,
715	PCSZ pcszInsert)
716	{
717	PSZ pszNew = NULL;
718
719	if ((pcszBuffer) && (pcszInsert))
720	{
721	do {
722	ULONG cbBuffer = strlen(pcszBuffer);
723	ULONG cbInsert = strlen(pcszInsert);
724
725	// check string length
726	if (ulInsertOfs > cbBuffer + 1)
727	break; // do
728
729	// OK, let's go.
730	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
731
732	// copy stuff before pInsertPos
733	memcpy(pszNew,
734	pcszBuffer,
735	ulInsertOfs);
736	// copy string to be inserted
737	memcpy(pszNew + ulInsertOfs,
738	pcszInsert,
739	cbInsert);
740	// copy stuff after pInsertPos
741	strcpy(pszNew + ulInsertOfs + cbInsert,
742	pcszBuffer + ulInsertOfs);
743	} while (FALSE);
744	}
745
746	return pszNew;
747	}
748
749	/*
750	*@@ strhFindReplace:
751	* wrapper around xstrFindReplace to work with C strings.
752	* Note that *ppszBuf can get reallocated and must
753	* be free()'able.
754	*
755	* Repetitive use of this wrapper is not recommended
756	* because it is considerably slower than xstrFindReplace.
757	*
758	*@@added V0.9.6 (2000-11-01) [umoeller]
759	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
760	*/
761
762	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
763	PULONG pulOfs, // in: where to begin search (0 = start);
764	// out: ofs of first char after replacement string
765	PCSZ pcszSearch, // in: search string; cannot be NULL
766	PCSZ pcszReplace) // in: replacement string; cannot be NULL
767	{
768	ULONG ulrc = 0;
769	XSTRING xstrBuf,
770	xstrFind,
771	xstrReplace;
772	size_t ShiftTable[256];
773	BOOL fRepeat = FALSE;
774	xstrInitSet(&xstrBuf, *ppszBuf);
775	// reallocated and returned, so we're safe
776	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
777	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
778	// these two are never freed, so we're safe too
779
780	if ((ulrc = xstrFindReplace(&xstrBuf,
781	pulOfs,
782	&xstrFind,
783	&xstrReplace,
784	ShiftTable,
785	&fRepeat)))
786	// replaced:
787	*ppszBuf = xstrBuf.psz;
788
789	return ulrc;
790	}
791
792	/*
793	* strhWords:
794	* returns the no. of words in "psz".
795	* A string is considered a "word" if
796	* it is surrounded by spaces only.
797	*
798	*@@added V0.9.0 [umoeller]
799	*/
800
801	ULONG strhWords(PSZ psz)
802	{
803	PSZ p;
804	ULONG cb = strlen(psz),
805	ulWords = 0;
806	if (cb > 1)
807	{
808	ulWords = 1;
809	for (p = psz; p < psz+cb; p++)
810	if (*p == ' ')
811	ulWords++;
812	}
813	return ulWords;
814	}
815
816	/*
817	*@@ strhGetWord:
818	* finds word boundaries.
819	*
820	* *ppszStart is used as the beginning of the
821	* search.
822	*
823	* If a word is found, *ppszStart is set to
824	* the first character of the word which was
825	* found and *ppszEnd receives the address
826	* of the first character _after_ the word,
827	* which is probably a space or a \n or \r char.
828	* We then return TRUE.
829	*
830	* The search is stopped if a null character
831	* is found or pLimit is reached. In that case,
832	* FALSE is returned.
833	*
834	*@@added V0.9.1 (2000-02-13) [umoeller]
835	*/
836
837	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
838	// out: start of word (if TRUE is returned)
839	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
840	// searched; if the word does not end before
841	// or with this char, FALSE is returned
842	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
843	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
844	PSZ *ppszEnd) // out: first char _after_ word
845	// (if TRUE is returned)
846	{
847	// characters after which a word can be started
848	// PCSZ pcszBeginChars = "\x0d\x0a ";
849	// PCSZ pcszEndChars = "\x0d\x0a /-";
850
851	PSZ pStart = *ppszStart;
852
853	// find start of word
854	while ( (pStart < (PSZ)pLimit)
855	&& (strchr(pcszBeginChars, *pStart))
856	)
857	// if char is a "before word" char: go for next
858	pStart++;
859
860	if (pStart < (PSZ)pLimit)
861	{
862	// found a valid "word start" character
863	// (which is not in pcszBeginChars):
864
865	// find end of word
866	PSZ pEndOfWord = pStart;
867	while ( (pEndOfWord <= (PSZ)pLimit)
868	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
869	)
870	// if char is not an "end word" char: go for next
871	pEndOfWord++;
872
873	if (pEndOfWord <= (PSZ)pLimit)
874	{
875	// whoa, got a word:
876	*ppszStart = pStart;
877	*ppszEnd = pEndOfWord;
878	return TRUE;
879	}
880	}
881
882	return FALSE;
883	}
884
885	/*
886	*@@ strhIsWord:
887	* returns TRUE if p points to a "word"
888	* in pcszBuf.
889	*
890	* p is considered a word if the character _before_
891	* it is in pcszBeginChars and the char _after_
892	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
893	*
894	*@@added V0.9.6 (2000-11-12) [umoeller]
895	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
896	*/
897
898	BOOL strhIsWord(PCSZ pcszBuf,
899	PCSZ p, // in: start of word
900	ULONG cbSearch, // in: length of word
901	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
902	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
903	{
904	// check previous char
905	if ( (p == pcszBuf)
906	\|\| (strchr(pcszBeginChars, *(p-1)))
907	)
908	{
909	// OK, valid begin char:
910	// check end char
911	CHAR cNextChar;
912	if (!(cNextChar = p[cbSearch]))
913	// null terminator:
914	return TRUE;
915	else
916	{
917	// not null terminator: check if char is
918	// in the list of valid end chars
919	if (strchr(pcszEndChars, cNextChar))
920	{
921	// OK, is end char: avoid doubles of that char,
922	// but allow spaces
923	// fixed V0.9.18 (2002-02-23) [umoeller]
924	CHAR cNextNext = p[cbSearch + 1];
925	if ( (cNextNext != cNextChar)
926	\|\| (cNextNext == ' ')
927	\|\| (cNextNext == 0)
928	)
929	return TRUE;
930	}
931	}
932	}
933
934	return FALSE;
935	}
936
937	/*
938	*@@ strhFindWord:
939	* searches for pszSearch in pszBuf, which is
940	* returned if found (or NULL if not).
941	*
942	* As opposed to strstr, this finds pszSearch
943	* only if it is a "word". A search string is
944	* considered a word if the character _before_
945	* it is in pcszBeginChars and the char _after_
946	* it is in pcszEndChars.
947	*
948	* Example:
949	+ strhFindWord("This is an example.", "is");
950	+ returns ...........^ this, but not the "is" in "This".
951	*
952	* The algorithm here uses strstr to find pszSearch in pszBuf
953	* and performs additional "is-word" checks for each item found
954	* (by calling strhIsWord).
955	*
956	* Note that this function is fairly slow compared to xstrFindWord.
957	*
958	*@@added V0.9.0 (99-11-08) [umoeller]
959	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
960	*/
961
962	PSZ strhFindWord(PCSZ pszBuf,
963	PCSZ pszSearch,
964	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
965	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
966	{
967	PSZ pszReturn = 0;
968	ULONG cbBuf = strlen(pszBuf),
969	cbSearch = strlen(pszSearch);
970
971	if ((cbBuf) && (cbSearch))
972	{
973	PCSZ p = pszBuf;
974
975	do // while p
976	{
977	p = strstr(p, pszSearch);
978	if (p)
979	{
980	// string found:
981	// check if that's a word
982
983	if (strhIsWord(pszBuf,
984	p,
985	cbSearch,
986	pcszBeginChars,
987	pcszEndChars))
988	{
989	// valid end char:
990	pszReturn = (PSZ)p;
991	break;
992	}
993
994	p += cbSearch;
995	}
996	} while (p);
997
998	}
999	return pszReturn;
1000	}
1001
1002	/*
1003	*@@ strhFindEOL:
1004	* returns a pointer to the next \r, \n or null character
1005	* following pszSearchIn. Stores the offset in *pulOffset.
1006	*
1007	* This should never return NULL because at some point,
1008	* there will be a null byte in your string.
1009	*
1010	*@@added V0.9.4 (2000-07-01) [umoeller]
1011	*/
1012
1013	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1014	PULONG pulOffset) // out: offset (ptr can be NULL)
1015	{
1016	PCSZ p = pcszSearchIn,
1017	prc = 0;
1018	while (TRUE)
1019	{
1020	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1021	{
1022	prc = p;
1023	break;
1024	}
1025	p++;
1026	}
1027
1028	if ((pulOffset) && (prc))
1029	*pulOffset = prc - pcszSearchIn;
1030
1031	return (PSZ)prc;
1032	}
1033
1034	/*
1035	*@@ strhFindNextLine:
1036	* like strhFindEOL, but this returns the character
1037	* _after_ \r or \n. Note that this might return
1038	* a pointer to terminating NULL character also.
1039	*/
1040
1041	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1042	{
1043	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1044	// pEOL now points to the \r char or the terminating 0 byte;
1045	// if not null byte, advance pointer
1046	PSZ pNextLine = pEOL;
1047	if (*pNextLine == '\r')
1048	pNextLine++;
1049	if (*pNextLine == '\n')
1050	pNextLine++;
1051	if (pulOffset)
1052	*pulOffset = pNextLine - pszSearchIn;
1053	return pNextLine;
1054	}
1055
1056	/*
1057	*@@ strhBeautifyTitle:
1058	* replaces all line breaks (0xd, 0xa) with spaces.
1059	* Returns the new length of the string or 0 on
1060	* errors.
1061	*
1062	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1063	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
1064	*/
1065
1066	ULONG strhBeautifyTitle(PSZ psz)
1067	{
1068	ULONG ulrc;
1069	PSZ p = psz;
1070
1071	while (*p)
1072	{
1073	if ( (*p == '\r')
1074	\|\| (*p == '\n')
1075	)
1076	{
1077	if ( (p != psz)
1078	&& (p[-1] == ' ')
1079	)
1080	memmove(p, p + 1, strlen(p));
1081	else
1082	*p++ = ' ';
1083	}
1084	else
1085	p++;
1086	}
1087
1088	return (p - psz);
1089	}
1090
1091	/*
1092	*@@ strhBeautifyTitle:
1093	* like strhBeautifyTitle, but copies into
1094	* a new buffer. More efficient.
1095	*
1096	*@@added V0.9.19 (2002-06-18) [umoeller]
1097	*/
1098
1099	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
1100	PCSZ pcszSource) // in: string to be beautified (can be NULL)
1101	{
1102	ULONG ulrc;
1103	PCSZ pSource = pcszSource;
1104	PSZ pTarget = pszTarget;
1105	CHAR c;
1106	if (!pcszSource)
1107	{
1108	*pszTarget = '\0';
1109	return 0;
1110	}
1111
1112	while (c = *pSource++)
1113	{
1114	if ( (c == '\r')
1115	\|\| (c == '\n')
1116	)
1117	{
1118	if ( (pTarget == pszTarget)
1119	\|\| (pTarget[-1] != ' ')
1120	)
1121	*pTarget++ = ' ';
1122	}
1123	else
1124	*pTarget++ = c;
1125	}
1126
1127	// null-terminate
1128	*pTarget = '\0';
1129
1130	return (pTarget - pszTarget);
1131	}
1132
1133	/*
1134	* strhFindAttribValue:
1135	* searches for pszAttrib in pszSearchIn; if found,
1136	* returns the first character after the "=" char.
1137	* If "=" is not found, a space, \r, and \n are
1138	* also accepted. This function searches without
1139	* respecting case.
1140	*
1141	* <B>Example:</B>
1142	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1143	+
1144	+ returns ....................... ^ this address.
1145	*
1146	*@@added V0.9.0 [umoeller]
1147	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1148	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1149	*/
1150
1151	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1152	{
1153	PSZ prc = 0;
1154	PSZ pszSearchIn2, p;
1155	ULONG cbAttrib = strlen(pszAttrib),
1156	ulLength = strlen(pszSearchIn);
1157
1158	// use alloca(), so memory is freed on function exit
1159	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1160	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1161
1162	// 1) find token, (space char, \n, \r, \t)
1163	p = strtok(pszSearchIn2, " \n\r\t");
1164	while (p)
1165	{
1166	CHAR c2;
1167	PSZ pOrig;
1168
1169	// check tag name
1170	if (!strnicmp(p, pszAttrib, cbAttrib))
1171	{
1172	// position in original string
1173	pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1174
1175	// yes:
1176	prc = pOrig + cbAttrib;
1177	c2 = *prc;
1178	while ( ( (c2 == ' ')
1179	\|\| (c2 == '=')
1180	\|\| (c2 == '\n')
1181	\|\| (c2 == '\r')
1182	)
1183	&& (c2 != 0)
1184	)
1185	c2 = *++prc;
1186
1187	break;
1188	}
1189
1190	p = strtok(NULL, " \n\r\t");
1191	}
1192
1193	return prc;
1194	}
1195
1196	/* PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1197	{
1198	PSZ prc = 0;
1199	PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1200	p,
1201	p2;
1202	ULONG cbAttrib = strlen(pszAttrib);
1203
1204	// 1) find space char
1205	while ((p = strchr(pszSearchIn2, ' ')))
1206	{
1207	CHAR c;
1208	p++;
1209	if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1210	{
1211	c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1212	// now check whether the p+strlen(pszAttrib)
1213	// is a valid end-of-tag character
1214	if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1215	&& ( (c == ' ')
1216	\|\| (c == '>')
1217	\|\| (c == '=')
1218	\|\| (c == '\r')
1219	\|\| (c == '\n')
1220	\|\| (c == 0)
1221	)
1222	)
1223	{
1224	// yes:
1225	CHAR c2;
1226	p2 = p + cbAttrib;
1227	c2 = *p2;
1228	while ( ( (c2 == ' ')
1229	\|\| (c2 == '=')
1230	\|\| (c2 == '\n')
1231	\|\| (c2 == '\r')
1232	)
1233	&& (c2 != 0)
1234	)
1235	c2 = *++p2;
1236
1237	prc = p2;
1238	break; // first while
1239	}
1240	}
1241	else
1242	break;
1243
1244	pszSearchIn2++;
1245	}
1246	return prc;
1247	} */
1248
1249	/*
1250	* strhGetNumAttribValue:
1251	* stores the numerical parameter value of an HTML-style
1252	* tag in *pl.
1253	*
1254	* Returns the address of the tag parameter in the
1255	* search buffer, if found, or NULL.
1256	*
1257	* <B>Example:</B>
1258	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1259	*
1260	* stores 123 in the "l" variable.
1261	*
1262	*@@added V0.9.0 [umoeller]
1263	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1264	*/
1265
1266	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1267	const char *pszTag, // e.g. "INDEX"
1268	PLONG pl) // out: numerical value
1269	{
1270	PSZ pParam;
1271	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1272	{
1273	if ( (*pParam == '\"')
1274	\|\| (*pParam == '\'')
1275	)
1276	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1277
1278	sscanf(pParam, "%ld", pl);
1279	}
1280
1281	return pParam;
1282	}
1283
1284	/*
1285	* strhGetTextAttr:
1286	* retrieves the attribute value of a textual HTML-style tag
1287	* in a newly allocated buffer, which is returned,
1288	* or NULL if attribute not found.
1289	* If an attribute value is to contain spaces, it
1290	* must be enclosed in quotes.
1291	*
1292	* The offset of the attribute data in pszSearchIn is
1293	* returned in *pulOffset so that you can do multiple
1294	* searches.
1295	*
1296	* This returns a new buffer, which should be free()'d after use.
1297	*
1298	* <B>Example:</B>
1299	+ ULONG ulOfs = 0;
1300	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1301	+ ............^ ulOfs
1302	*
1303	* returns a new string with the value "blublub" (without
1304	* quotes) and sets ulOfs to 12.
1305	*
1306	*@@added V0.9.0 [umoeller]
1307	*/
1308
1309	PSZ strhGetTextAttr(const char *pszSearchIn,
1310	const char *pszTag,
1311	PULONG pulOffset) // out: offset where found
1312	{
1313	PSZ pParam,
1314	pParam2,
1315	prc = NULL;
1316	ULONG ulCount = 0;
1317	LONG lNestingLevel = 0;
1318
1319	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1320	{
1321	// determine end character to search for: a space
1322	CHAR cEnd = ' ';
1323	if (*pParam == '\"')
1324	{
1325	// or, if the data is enclosed in quotes, a quote
1326	cEnd = '\"';
1327	pParam++;
1328	}
1329
1330	if (pulOffset)
1331	// store the offset
1332	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1333
1334	// now find end of attribute
1335	pParam2 = pParam;
1336	while (*pParam)
1337	{
1338	if (*pParam == cEnd)
1339	// end character found
1340	break;
1341	else if (*pParam == '<')
1342	// yet another opening tag found:
1343	// this is probably some "<" in the attributes
1344	lNestingLevel++;
1345	else if (*pParam == '>')
1346	{
1347	lNestingLevel--;
1348	if (lNestingLevel < 0)
1349	// end of tag found:
1350	break;
1351	}
1352	ulCount++;
1353	pParam++;
1354	}
1355
1356	// copy attribute to new buffer
1357	if (ulCount)
1358	{
1359	prc = (PSZ)malloc(ulCount+1);
1360	memcpy(prc, pParam2, ulCount);
1361	*(prc+ulCount) = 0;
1362	}
1363	}
1364	return prc;
1365	}
1366
1367	/*
1368	* strhFindEndOfTag:
1369	* returns a pointer to the ">" char
1370	* which seems to terminate the tag beginning
1371	* after pszBeginOfTag.
1372	*
1373	* If additional "<" chars are found, we look
1374	* for additional ">" characters too.
1375	*
1376	* Note: You must pass the address of the opening
1377	* '<' character to this function.
1378	*
1379	* Example:
1380	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1381	+ strhFindEndOfTag(pszTest)
1382	+ returns.................................^ this.
1383	*
1384	*@@added V0.9.0 [umoeller]
1385	*/
1386
1387	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1388	{
1389	PSZ p = (PSZ)pszBeginOfTag,
1390	prc = NULL;
1391	LONG lNestingLevel = 0;
1392
1393	while (*p)
1394	{
1395	if (*p == '<')
1396	// another opening tag found:
1397	lNestingLevel++;
1398	else if (*p == '>')
1399	{
1400	// closing tag found:
1401	lNestingLevel--;
1402	if (lNestingLevel < 1)
1403	{
1404	// corresponding: return this
1405	prc = p;
1406	break;
1407	}
1408	}
1409	p++;
1410	}
1411
1412	return prc;
1413	}
1414
1415	/*
1416	* strhGetBlock:
1417	* this complex function searches the given string
1418	* for a pair of opening/closing HTML-style tags.
1419	*
1420	* If found, this routine returns TRUE and does
1421	* the following:
1422	*
1423	* 1) allocate a new buffer, copy the text
1424	* enclosed by the opening/closing tags
1425	* into it and set *ppszBlock to that
1426	* buffer;
1427	*
1428	* 2) if the opening tag has any attributes,
1429	* allocate another buffer, copy the
1430	* attributes into it and set *ppszAttrs
1431	* to that buffer; if no attributes are
1432	* found, *ppszAttrs will be NULL;
1433	*
1434	* 3) set *pulOffset to the offset from the
1435	* beginning of *ppszSearchIn where the
1436	* opening tag was found;
1437	*
1438	* 4) advance *ppszSearchIn to after the
1439	* closing tag, so that you can do
1440	* multiple searches without finding the
1441	* same tags twice.
1442	*
1443	* All buffers should be freed using free().
1444	*
1445	* This returns the following:
1446	* -- 0: no error
1447	* -- 1: tag not found at all (doesn't have to be an error)
1448	* -- 2: begin tag found, but no corresponding end tag found. This
1449	* is a real error.
1450	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1451	*
1452	* <B>Example:</B>
1453	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1454	+ PSZ pszBlock, pszAttrs;
1455	+ ULONG ulOfs;
1456	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1457	*
1458	* would do the following:
1459	*
1460	* 1) set pszBlock to a new string containing "This is page 1."
1461	* without quotes;
1462	*
1463	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1464	*
1465	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1466	*
1467	* 4) pSearch would be advanced to point to the "More text"
1468	* string in the original buffer.
1469	*
1470	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1471	* for HTML parsing.
1472	*
1473	*@@added V0.9.0 [umoeller]
1474	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1475	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1476	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1477	*/
1478
1479	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1480	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1481	const char *pszTag,
1482	PSZ *ppszBlock, // out: block enclosed by the tags
1483	PSZ *ppszAttribs, // out: attributes of the opening tag
1484	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1485	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1486	{
1487	ULONG ulrc = 1;
1488	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1489	pszSearch2 = pszBeginTag,
1490	pszClosingTag;
1491	ULONG cbTag = strlen(pszTag);
1492
1493	// go thru the block and check all tags if it's the
1494	// begin tag we're looking for
1495	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1496	{
1497	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1498	// yes: stop
1499	break;
1500	else
1501	pszBeginTag++;
1502	}
1503
1504	if (pszBeginTag)
1505	{
1506	// we found <TAG>:
1507	ULONG ulNestingLevel = 0;
1508
1509	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1510	// strchr(pszBeginTag, '>');
1511	if (pszEndOfBeginTag)
1512	{
1513	// does the caller want the attributes?
1514	if (ppszAttribs)
1515	{
1516	// yes: then copy them
1517	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1518	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1519	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1520	// add terminating 0
1521	*(pszAttrs + ulAttrLen) = 0;
1522
1523	*ppszAttribs = pszAttrs;
1524	}
1525
1526	// output offset of where we found the begin tag
1527	if (pulOfsBeginTag)
1528	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1529
1530	// now find corresponding closing tag (e.g. "</BODY>"
1531	pszBeginTag = pszEndOfBeginTag+1;
1532	// now we're behind the '>' char of the opening tag
1533	// increase offset of that too
1534	if (pulOfsBeginBlock)
1535	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1536
1537	// find next closing tag;
1538	// for the first run, pszSearch2 points to right
1539	// after the '>' char of the opening tag
1540	pszSearch2 = pszBeginTag;
1541	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1542	&& (pszClosingTag = strstr(pszSearch2, "<"))
1543	)
1544	{
1545	// if we have another opening tag before our closing
1546	// tag, we need to have several closing tags before
1547	// we're done
1548	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1549	ulNestingLevel++;
1550	else
1551	{
1552	// is this ours?
1553	if ( (*(pszClosingTag+1) == '/')
1554	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1555	)
1556	{
1557	// we've found a matching closing tag; is
1558	// it ours?
1559	if (ulNestingLevel == 0)
1560	{
1561	// our closing tag found:
1562	// allocate mem for a new buffer
1563	// and extract all the text between
1564	// open and closing tags to it
1565	ULONG ulLen = pszClosingTag - pszBeginTag;
1566	if (ppszBlock)
1567	{
1568	PSZ pNew = (PSZ)malloc(ulLen + 1);
1569	strhncpy0(pNew, pszBeginTag, ulLen);
1570	*ppszBlock = pNew;
1571	}
1572
1573	// raise search offset to after the closing tag
1574	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1575
1576	ulrc = 0;
1577
1578	break;
1579	} else
1580	// not our closing tag:
1581	ulNestingLevel--;
1582	}
1583	}
1584	// no matching closing tag: search on after that
1585	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1586	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1587
1588	if (!pszClosingTag)
1589	// no matching closing tag found:
1590	// return 2 (closing tag not found)
1591	ulrc = 2;
1592	} // end if (pszBeginTag)
1593	else
1594	// no matching ">" for opening tag found:
1595	ulrc = 3;
1596	}
1597
1598	return ulrc;
1599	}
1600
1601	/* ******************************************************************
1602	*
1603	* Miscellaneous
1604	*
1605	********************************************************************/
1606
1607	/*
1608	*@@ strhArrayAppend:
1609	* this appends a string to a "string array".
1610	*
1611	* A string array is considered a sequence of
1612	* zero-terminated strings in memory. That is,
1613	* after each string's null-byte, the next
1614	* string comes up.
1615	*
1616	* This is useful for composing a single block
1617	* of memory from, say, list box entries, which
1618	* can then be written to OS2.INI in one flush.
1619	*
1620	* To append strings to such an array, call this
1621	* function for each string you wish to append.
1622	* This will re-allocate *ppszRoot with each call,
1623	* and update *pcbRoot, which then contains the
1624	* total size of all strings (including all null
1625	* terminators).
1626	*
1627	* Pass *pcbRoot to PrfSaveProfileData to have the
1628	* block saved.
1629	*
1630	* Note: On the first call, ppszRoot and pcbRoot
1631	* _must_ be both NULL, or this crashes.
1632	*
1633	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1634	*/
1635
1636	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1637	const char *pcszNew, // in: string to append
1638	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1639	PULONG pcbRoot) // in/out: size of array
1640	{
1641	PSZ pszTemp;
1642
1643	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1644	cbNew = strlen(pcszNew);
1645
1646	pszTemp = (PSZ)malloc(*pcbRoot
1647	+ cbNew
1648	+ 1); // two null bytes
1649	if (*ppszRoot)
1650	{
1651	// not first loop: copy old stuff
1652	memcpy(pszTemp,
1653	*ppszRoot,
1654	*pcbRoot);
1655	free(*ppszRoot);
1656	}
1657	// append new string
1658	strcpy(pszTemp + *pcbRoot,
1659	pcszNew);
1660	// update root
1661	*ppszRoot = pszTemp;
1662	// update length
1663	*pcbRoot += cbNew + 1;
1664	}
1665
1666	/*
1667	*@@ strhCreateDump:
1668	* this dumps a memory block into a string
1669	* and returns that string in a new buffer.
1670	*
1671	* You must free() the returned PSZ after use.
1672	*
1673	* The output looks like the following:
1674	*
1675	+ 0000: FE FF 0E 02 90 00 00 00 ........
1676	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1677	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1678	*
1679	* Each line is terminated with a newline (\n)
1680	* character only.
1681	*
1682	*@@added V0.9.1 (2000-01-22) [umoeller]
1683	*/
1684
1685	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1686	ULONG ulSize, // in: size of buffer
1687	ULONG ulIndent) // in: indentation of every line
1688	{
1689	PSZ pszReturn = 0;
1690	XSTRING strReturn;
1691	CHAR szTemp[1000];
1692
1693	PBYTE pbCurrent = pb; // current byte
1694	ULONG ulCount = 0,
1695	ulCharsInLine = 0; // if this grows > 7, a new line is started
1696	CHAR szLine[400] = "",
1697	szAscii[30] = " "; // ASCII representation; filled for every line
1698	PSZ pszLine = szLine,
1699	pszAscii = szAscii;
1700
1701	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1702
1703	for (pbCurrent = pb;
1704	ulCount < ulSize;
1705	pbCurrent++, ulCount++)
1706	{
1707	if (ulCharsInLine == 0)
1708	{
1709	memset(szLine, ' ', ulIndent);
1710	pszLine += ulIndent;
1711	}
1712	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1713
1714	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1715	// printable character:
1716	pszAscii = pbCurrent;
1717	else
1718	*pszAscii = '.';
1719	pszAscii++;
1720
1721	ulCharsInLine++;
1722	if ( (ulCharsInLine > 7) // 8 bytes added?
1723	\|\| (ulCount == ulSize-1) // end of buffer reached?
1724	)
1725	{
1726	// if we haven't had eight bytes yet,
1727	// fill buffer up to eight bytes with spaces
1728	ULONG ul2;
1729	for (ul2 = ulCharsInLine;
1730	ul2 < 8;
1731	ul2++)
1732	pszLine += sprintf(pszLine, " ");
1733
1734	sprintf(szTemp, "%04lX: %s %s\n",
1735	(ulCount & 0xFFFFFFF8), // offset in hex
1736	szLine, // bytes string
1737	szAscii); // ASCII string
1738	xstrcat(&strReturn, szTemp, 0);
1739
1740	// restart line buffer
1741	pszLine = szLine;
1742
1743	// clear ASCII buffer
1744	strcpy(szAscii, " ");
1745	pszAscii = szAscii;
1746
1747	// reset line counter
1748	ulCharsInLine = 0;
1749	}
1750	}
1751
1752	if (strReturn.cbAllocated)
1753	pszReturn = strReturn.psz;
1754
1755	return pszReturn;
1756	}
1757
1758	/* ******************************************************************
1759	*
1760	* Fast string searches
1761	*
1762	********************************************************************/
1763
1764	#define ASSERT(a)
1765
1766	/*
1767	* The following code has been taken from the "Standard
1768	* Function Library", file sflfind.c, and only slightly
1769	* modified to conform to the rest of this file.
1770	*
1771	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1772	* Revised: 98/05/04
1773	*
1774	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1775	*
1776	* The SFL Licence allows incorporating SFL code into other
1777	* programs, as long as the copyright is reprinted and the
1778	* code is marked as modified, so this is what we do.
1779	*/
1780
1781	/*
1782	*@@ strhmemfind:
1783	* searches for a pattern in a block of memory using the
1784	* Boyer-Moore-Horspool-Sunday algorithm.
1785	*
1786	* The block and pattern may contain any values; you must
1787	* explicitly provide their lengths. If you search for strings,
1788	* use strlen() on the buffers.
1789	*
1790	* Returns a pointer to the pattern if found within the block,
1791	* or NULL if the pattern was not found.
1792	*
1793	* This algorithm needs a "shift table" to cache data for the
1794	* search pattern. This table can be reused when performing
1795	* several searches with the same pattern.
1796	*
1797	* "shift" must point to an array big enough to hold 256 (8**2)
1798	* "size_t" values.
1799	*
1800	* If (*repeat_find == FALSE), the shift table is initialized.
1801	* So on the first search with a given pattern, *repeat_find
1802	* should be FALSE. This function sets it to TRUE after the
1803	* shift table is initialised, allowing the initialisation
1804	* phase to be skipped on subsequent searches.
1805	*
1806	* This function is most effective when repeated searches are
1807	* made for the same pattern in one or more large buffers.
1808	*
1809	* Example:
1810	*
1811	+ PSZ pszHaystack = "This is a sample string.",
1812	+ pszNeedle = "string";
1813	+ size_t shift[256];
1814	+ BOOL fRepeat = FALSE;
1815	+
1816	+ PSZ pFound = strhmemfind(pszHaystack,
1817	+ strlen(pszHaystack), // block size
1818	+ pszNeedle,
1819	+ strlen(pszNeedle), // pattern size
1820	+ shift,
1821	+ &fRepeat);
1822	*
1823	* Taken from the "Standard Function Library", file sflfind.c.
1824	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1825	* Slightly modified by umoeller.
1826	*
1827	*@@added V0.9.3 (2000-05-08) [umoeller]
1828	*/
1829
1830	void* strhmemfind(const void *in_block, // in: block containing data
1831	size_t block_size, // in: size of block in bytes
1832	const void *in_pattern, // in: pattern to search for
1833	size_t pattern_size, // in: size of pattern block
1834	size_t *shift, // in/out: shift table (search buffer)
1835	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1836	{
1837	size_t byte_nbr, // Distance through block
1838	match_size; // Size of matched part
1839	const unsigned char
1840	*match_base = NULL, // Base of match of pattern
1841	*match_ptr = NULL, // Point within current match
1842	*limit = NULL; // Last potiental match point
1843	const unsigned char
1844	block = (unsigned char ) in_block, // Concrete pointer to block data
1845	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1846
1847	if ( (block == NULL)
1848	\|\| (pattern == NULL)
1849	\|\| (shift == NULL)
1850	)
1851	return NULL;
1852
1853	// Pattern must be smaller or equal in size to string
1854	if (block_size < pattern_size)
1855	return NULL; // Otherwise it's not found
1856
1857	if (pattern_size == 0) // Empty patterns match at start
1858	return (void*)block;
1859
1860	// Build the shift table unless we're continuing a previous search
1861
1862	// The shift table determines how far to shift before trying to match
1863	// again, if a match at this point fails. If the byte after where the
1864	// end of our pattern falls is not in our pattern, then we start to
1865	// match again after that byte; otherwise we line up the last occurence
1866	// of that byte in our pattern under that byte, and try match again.
1867
1868	if (!repeat_find \|\| !*repeat_find)
1869	{
1870	for (byte_nbr = 0;
1871	byte_nbr < 256;
1872	byte_nbr++)
1873	shift[byte_nbr] = pattern_size + 1;
1874	for (byte_nbr = 0;
1875	byte_nbr < pattern_size;
1876	byte_nbr++)
1877	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1878
1879	if (repeat_find)
1880	*repeat_find = TRUE;
1881	}
1882
1883	// Search for the block, each time jumping up by the amount
1884	// computed in the shift table
1885
1886	limit = block + (block_size - pattern_size + 1);
1887	ASSERT (limit > block);
1888
1889	for (match_base = block;
1890	match_base < limit;
1891	match_base += shift[*(match_base + pattern_size)])
1892	{
1893	match_ptr = match_base;
1894	match_size = 0;
1895
1896	// Compare pattern until it all matches, or we find a difference
1897	while (*match_ptr++ == pattern[match_size++])
1898	{
1899	ASSERT (match_size <= pattern_size &&
1900	match_ptr == (match_base + match_size));
1901
1902	// If we found a match, return the start address
1903	if (match_size >= pattern_size)
1904	return (void*)match_base;
1905
1906	}
1907	}
1908	return NULL; // Found nothing
1909	}
1910
1911	/*
1912	*@@ strhtxtfind:
1913	* searches for a case-insensitive text pattern in a string
1914	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1915	* pattern are null-terminated strings. Returns a pointer to the pattern
1916	* if found within the string, or NULL if the pattern was not found.
1917	* Will match strings irrespective of case. To match exact strings, use
1918	* strhfind(). Will not work on multibyte characters.
1919	*
1920	* Examples:
1921	+ char *result;
1922	+
1923	+ result = strhtxtfind ("AbracaDabra", "cad");
1924	+ if (result)
1925	+ puts (result);
1926	+
1927	* Taken from the "Standard Function Library", file sflfind.c.
1928	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1929	* Slightly modified.
1930	*
1931	*@@added V0.9.3 (2000-05-08) [umoeller]
1932	*/
1933
1934	char* strhtxtfind (const char *string, // String containing data
1935	const char *pattern) // Pattern to search for
1936	{
1937	size_t
1938	shift [256]; // Shift distance for each value
1939	size_t
1940	string_size,
1941	pattern_size,
1942	byte_nbr, // Index into byte array
1943	match_size; // Size of matched part
1944	const char
1945	*match_base = NULL, // Base of match of pattern
1946	*match_ptr = NULL, // Point within current match
1947	*limit = NULL; // Last potiental match point
1948
1949	ASSERT (string); // Expect non-NULL pointers, but
1950	ASSERT (pattern); // fail gracefully if not debugging
1951	if (string == NULL \|\| pattern == NULL)
1952	return NULL;
1953
1954	string_size = strlen (string);
1955	pattern_size = strlen (pattern);
1956
1957	// Pattern must be smaller or equal in size to string
1958	if (string_size < pattern_size)
1959	return NULL; // Otherwise it cannot be found
1960
1961	if (pattern_size == 0) // Empty string matches at start
1962	return (char*)string;
1963
1964	// Build the shift table
1965
1966	// The shift table determines how far to shift before trying to match
1967	// again, if a match at this point fails. If the byte after where the
1968	// end of our pattern falls is not in our pattern, then we start to
1969	// match again after that byte; otherwise we line up the last occurence
1970	// of that byte in our pattern under that byte, and try match again.
1971
1972	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1973	shift [byte_nbr] = pattern_size + 1;
1974
1975	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1976	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1977
1978	// Search for the string. If we don't find a match, move up by the
1979	// amount we computed in the shift table above, to find location of
1980	// the next potiental match.
1981
1982	limit = string + (string_size - pattern_size + 1);
1983	ASSERT (limit > string);
1984
1985	for (match_base = string;
1986	match_base < limit;
1987	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1988	{
1989	match_ptr = match_base;
1990	match_size = 0;
1991
1992	// Compare pattern until it all matches, or we find a difference
1993	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1994	{
1995	ASSERT (match_size <= pattern_size &&
1996	match_ptr == (match_base + match_size));
1997
1998	// If we found a match, return the start address
1999	if (match_size >= pattern_size)
2000	return (char*)match_base;
2001	}
2002	}
2003	return NULL; // Found nothing
2004	}
2005

Note: See TracBrowser for help on using the repository browser.

Download in other formats: