Context Navigation

source: branches/branch-1-0/src/helpers/stringh.c@ 332

Visit:

Last change on this file since 332 was 332, checked in by pr, 19 years ago
Fixes bug 718/836
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 59.2 KB

Line
1
2	/*
3	*@@sourcefile stringh.c:
4	* contains string/text helper functions. These are good for
5	* parsing/splitting strings and other stuff used throughout
6	* XWorkplace.
7	*
8	* Note that these functions are really a bunch of very mixed
9	* up string helpers, which you may or may not find helpful.
10	* If you're looking for string functions with memory
11	* management, look at xstring.c instead.
12	*
13	* Usage: All OS/2 programs.
14	*
15	* Function prefixes (new with V0.81):
16	* -- strh* string helper functions.
17	*
18	* Note: Version numbering in this file relates to XWorkplace version
19	* numbering.
20	*
21	*@@header "helpers\stringh.h"
22	*/
23
24	/*
25	* Copyright (C) 1997-2006 Ulrich Mller.
26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
27	* This file is part of the "XWorkplace helpers" source package.
28	* This is free software; you can redistribute it and/or modify
29	* it under the terms of the GNU General Public License as published
30	* by the Free Software Foundation, in version 2 as it comes in the
31	* "COPYING" file of the XWorkplace main distribution.
32	* This program is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35	* GNU General Public License for more details.
36	*/
37
38	#define OS2EMX_PLAIN_CHAR
39	// this is needed for "os2emx.h"; if this is defined,
40	// emx will define PSZ as _signed_ char, otherwise
41	// as unsigned char
42
43	#define INCL_WINSHELLDATA
44	#define INCL_DOSERRORS
45	#include <os2.h>
46
47	#include <stdlib.h>
48	#include <stdio.h>
49	#include <string.h>
50	#include <ctype.h>
51	#include <math.h>
52
53	#include "setup.h" // code generation and debugging options
54
55	#define DONT_REPLACE_STRINGH_MALLOC
56	#include "helpers\stringh.h"
57	#include "helpers\xstring.h" // extended string helpers
58
59	#pragma hdrstop
60
61	/*
62	*@@category: Helpers\C helpers\String management
63	* See stringh.c and xstring.c.
64	*/
65
66	/*
67	*@@category: Helpers\C helpers\String management\C string helpers
68	* See stringh.c.
69	*/
70
71	#ifdef __DEBUG_MALLOC_ENABLED__
72
73	/*
74	*@@ strhStoreDebug:
75	* memory debug version of strhStore.
76	*
77	*@@added V0.9.16 (2001-12-08) [umoeller]
78	*/
79
80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
81	PCSZ pcszSource,
82	PULONG pulLength, // out: length of new string (ptr can be NULL)
83	PCSZ pcszSourceFile,
84	unsigned long ulLine,
85	PCSZ pcszFunction)
86	{
87	ULONG ulLength = 0;
88
89
90
91	if (ppszTarget)
92	{
93	if (*ppszTarget)
94	free(*ppszTarget);
95
96	if ( (pcszSource)
97	&& (ulLength = strlen(pcszSource))
98	)
99	{
100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101	pcszSourceFile,
102	ulLine,
103	pcszFunction))
104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
105	else
106	return ERROR_NOT_ENOUGH_MEMORY;
107	}
108	else
109	*ppszTarget = NULL;
110	}
111
112	if (pulLength)
113	*pulLength = ulLength;
114
115	return NO_ERROR;
116	}
117
118	#endif
119
120	/*
121	*@@ strhStore:
122	* stores a copy of the given string in the specified
123	* buffer. Uses strdup internally.
124	*
125	* If *ppszTarget != NULL, the previous string is freed
126	* and set to NULL.
127	* If pcszSource != NULL, a copy of it is stored in the
128	* buffer.
129	*
130	*@@added V0.9.16 (2001-12-06) [umoeller]
131	*/
132
133	APIRET strhStore(PSZ *ppszTarget,
134	PCSZ pcszSource,
135	PULONG pulLength) // out: length of new string (ptr can be NULL)
136	{
137	ULONG ulLength = 0;
138
139	if (ppszTarget)
140	{
141	if (*ppszTarget)
142	free(*ppszTarget);
143
144	if ( (pcszSource)
145	&& (ulLength = strlen(pcszSource))
146	)
147	{
148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
150	else
151	return ERROR_NOT_ENOUGH_MEMORY;
152	}
153	else
154	*ppszTarget = NULL;
155	}
156	else
157	return ERROR_INVALID_PARAMETER;
158
159	if (pulLength)
160	*pulLength = ulLength;
161
162	return NO_ERROR;
163	}
164
165	/*
166	*@@ strhcpy:
167	* like strdup, but this one doesn't crash if string2 is NULL,
168	* but sets the first byte in string1 to \0 instead.
169	*
170	*@@added V0.9.14 (2001-08-01) [umoeller]
171	*/
172
173	PSZ strhcpy(PSZ string1, PCSZ string2)
174	{
175	if (string2)
176	return strcpy(string1, string2);
177
178	*string1 = '\0';
179	return string1;
180	}
181
182	#ifdef __DEBUG_MALLOC_ENABLED__
183
184	/*
185	*@@ strhdupDebug:
186	* memory debug version of strhdup.
187	*
188	*@@added V0.9.0 [umoeller]
189	*/
190
191	PSZ strhdupDebug(PCSZ pcszSource,
192	unsigned long *pulLength,
193	PCSZ pcszSourceFile,
194	unsigned long ulLine,
195	PCSZ pcszFunction)
196	{
197	PSZ pszReturn = NULL;
198	ULONG ulLength = 0;
199
200	if ( (pcszSource)
201	&& (ulLength = strlen(pcszSource))
202	)
203	{
204	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206	ulLine,
207	pcszFunction))
208	memcpy(pszReturn, pcszSource, ulLength + 1);
209	}
210
211	if (pulLength)
212	*pulLength = ulLength;
213
214	return pszReturn;
215	}
216
217	#endif // __DEBUG_MALLOC_ENABLED__
218
219	/*
220	*@@ strhdup:
221	* like strdup, but this one doesn't crash if pszSource
222	* is NULL, but returns NULL also. In addition, this
223	* can report the length of the string (V0.9.16).
224	*
225	*@@added V0.9.0 [umoeller]
226	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227	*/
228
229	PSZ strhdup(PCSZ pcszSource,
230	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231	{
232	PSZ pszReturn = NULL;
233	ULONG ulLength = 0;
234
235	if ( (pcszSource)
236	&& (ulLength = strlen(pcszSource))
237	)
238	{
239	if (pszReturn = (PSZ)malloc(ulLength + 1))
240	memcpy(pszReturn, pcszSource, ulLength + 1);
241	}
242
243	if (pulLength)
244	*pulLength = ulLength;
245
246	return pszReturn;
247	}
248
249	/*
250	*@@ strhcmp:
251	* better strcmp. This doesn't crash if any of the
252	* string pointers are NULL, but returns a proper
253	* value then.
254	*
255	* Besides, this is guaranteed to only return -1, 0,
256	* or +1, while strcmp can return any positive or
257	* negative value. This is useful for tree comparison
258	* funcs.
259	*
260	*@@added V0.9.9 (2001-02-16) [umoeller]
261	*/
262
263	int strhcmp(PCSZ p1, PCSZ p2)
264	{
265	if (p1 && p2)
266	{
267	int i = strcmp(p1, p2);
268	if (i < 0) return -1;
269	if (i > 0) return +1;
270	}
271	else if (p1)
272	// but p2 is NULL: p1 greater than p2 then
273	return +1;
274	else if (p2)
275	// but p1 is NULL: p1 less than p2 then
276	return -1;
277
278	// return 0 if strcmp returned 0 above or both strings are NULL
279	return 0;
280	}
281
282	/*
283	*@@ strhicmp:
284	* like strhcmp, but compares without respect
285	* to case.
286	*
287	*@@added V0.9.9 (2001-04-07) [umoeller]
288	*/
289
290	int strhicmp(PCSZ p1, PCSZ p2)
291	{
292	if (p1 && p2)
293	{
294	int i = stricmp(p1, p2);
295	if (i < 0) return -1;
296	if (i > 0) return +1;
297	}
298	else if (p1)
299	// but p2 is NULL: p1 greater than p2 then
300	return +1;
301	else if (p2)
302	// but p1 is NULL: p1 less than p2 then
303	return -1;
304
305	// return 0 if strcmp returned 0 above or both strings are NULL
306	return 0;
307	}
308
309	/*
310	*@@ strhistr:
311	* like strstr, but case-insensitive.
312	*
313	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314	*/
315
316	PSZ strhistr(PCSZ string1, PCSZ string2)
317	{
318	PSZ prc = NULL;
319
320	if ((string1) && (string2))
321	{
322	PSZ pszSrchIn = strdup(string1);
323	PSZ pszSrchFor = strdup(string2);
324
325	if ((pszSrchIn) && (pszSrchFor))
326	{
327	strupr(pszSrchIn);
328	strupr(pszSrchFor);
329
330	if (prc = strstr(pszSrchIn, pszSrchFor))
331	{
332	// prc now has the first occurence of the string,
333	// but in pszSrchIn; we need to map this
334	// return value to the original string
335	prc = (prc-pszSrchIn) // offset in pszSrchIn
336	+ (PSZ)string1;
337	}
338	}
339	if (pszSrchFor)
340	free(pszSrchFor);
341	if (pszSrchIn)
342	free(pszSrchIn);
343	}
344
345	return prc;
346	}
347
348	/*
349	*@@ strhncpy0:
350	* like strncpy, but always appends a 0 character.
351	*
352	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
353	*/
354
355	ULONG strhncpy0(PSZ pszTarget,
356	PCSZ pszSource,
357	ULONG cbSource)
358	{
359	ULONG ul = 0;
360	PSZ pTarget = pszTarget,
361	pSource;
362
363	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
364	{
365	for (ul = 0; ul < cbSource; ul++)
366	if (*pSource)
367	pTarget++ = pSource++;
368	else
369	break;
370	}
371
372	*pTarget = 0;
373
374	return ul;
375	}
376
377	/*
378	*@@ strhlen:
379	* like strlen, but doesn't crash on
380	* null strings, but returns 0 also.
381	*
382	*@@added V0.9.19 (2002-04-02) [umoeller]
383	*/
384
385	ULONG strhlen(PCSZ pcsz)
386	{
387	if (pcsz)
388	return strlen(pcsz);
389
390	return 0;
391	}
392
393	/*
394	*@@ strhSize:
395	* returns the size of the given string, which
396	* is the memory required to allocate a copy,
397	* including the null terminator.
398	*
399	* Returns 0 only if pcsz is NULL. If pcsz
400	* points to a null character, this returns 1.
401	*
402	*@@added V0.9.18 (2002-02-13) [umoeller]
403	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
404	*/
405
406	ULONG strhSize(PCSZ pcsz)
407	{
408	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
409	return (strlen(pcsz) + 1);
410
411	return 0;
412	}
413
414	/*
415	* strhCount:
416	* this counts the occurences of c in pszSearch.
417	*/
418
419	ULONG strhCount(PCSZ pszSearch,
420	CHAR c)
421	{
422	PSZ p = (PSZ)pszSearch;
423	ULONG ulCount = 0;
424	while (TRUE)
425	{
426	p = strchr(p, c);
427	if (p)
428	{
429	ulCount++;
430	p++;
431	}
432	else
433	break;
434	}
435	return ulCount;
436	}
437
438	/*
439	*@@ strhIsDecimal:
440	* returns TRUE if psz consists of decimal digits only.
441	*/
442
443	BOOL strhIsDecimal(PSZ psz)
444	{
445	PSZ p = psz;
446	while (*p != 0)
447	{
448	if (isdigit(*p) == 0)
449	return FALSE;
450	p++;
451	}
452
453	return TRUE;
454	}
455
456	#ifdef __DEBUG_MALLOC_ENABLED__
457
458	/*
459	*@@ strhSubstrDebug:
460	* memory debug version of strhSubstr.
461	*
462	*@@added V0.9.14 (2001-08-01) [umoeller]
463	*/
464
465	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
466	PCSZ pEnd, // in: last char (not included)
467	PCSZ pcszSourceFile,
468	unsigned long ulLine,
469	PCSZ pcszFunction)
470	{
471	PSZ pszSubstr = NULL;
472
473	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
474	{
475	ULONG cbSubstr = (pEnd - pBegin);
476	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
477	pcszSourceFile,
478	ulLine,
479	pcszFunction))
480	{
481	// strhncpy0(pszSubstr, pBegin, cbSubstr);
482	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
483	*(pszSubstr + cbSubstr) = '\0';
484	}
485	}
486
487	return pszSubstr;
488	}
489
490	#endif // __DEBUG_MALLOC_ENABLED__
491
492	/*
493	*@@ strhSubstr:
494	* this creates a new PSZ containing the string
495	* from pBegin to pEnd, excluding the pEnd character.
496	* The new string is null-terminated. The caller
497	* must free() the new string after use.
498	*
499	* Example:
500	+ "1234567890"
501	+ ^ ^
502	+ p1 p2
503	+ strhSubstr(p1, p2)
504	* would return a new string containing "2345678".
505	*
506	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
507	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
508	*/
509
510	PSZ strhSubstr(PCSZ pBegin, // in: first char
511	PCSZ pEnd) // in: last char (not included)
512	{
513	PSZ pszSubstr = NULL;
514
515	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
516	{
517	ULONG cbSubstr = (pEnd - pBegin);
518	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
519	{
520	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
521	*(pszSubstr + cbSubstr) = '\0';
522	}
523	}
524
525	return pszSubstr;
526	}
527
528	/*
529	*@@ strhExtract:
530	* searches pszBuf for the cOpen character and returns
531	* the data in between cOpen and cClose, excluding
532	* those two characters, in a newly allocated buffer
533	* which you must free() afterwards.
534	*
535	* Spaces and newlines/linefeeds are skipped.
536	*
537	* If the search was successful, the new buffer
538	* is returned and, if (ppEnd != NULL), *ppEnd points
539	* to the first character after the cClose character
540	* found in the buffer.
541	*
542	* If the search was not successful, NULL is
543	* returned, and *ppEnd is unchanged.
544	*
545	* If another cOpen character is found before
546	* cClose, matching cClose characters will be skipped.
547	* You can therefore nest the cOpen and cClose
548	* characters.
549	*
550	* This function ignores cOpen and cClose characters
551	* in C-style comments and strings surrounded by
552	* double quotes.
553	*
554	* Example:
555	*
556	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
557	+ pEnd;
558	+ strhExtract(pszBuf,
559	+ '{', '}',
560	+ &pEnd)
561	*
562	* would return a new buffer containing " --blah-- ",
563	* and ppEnd would afterwards point to the space
564	* before "next" in the static buffer.
565	*
566	*@@added V0.9.0 [umoeller]
567	*/
568
569	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
570	CHAR cOpen, // in: opening char
571	CHAR cClose, // in: closing char
572	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
573	{
574	PSZ pszReturn = NULL;
575	PCSZ pOpen;
576	if ( (pszBuf)
577	&& (pOpen = strchr(pszBuf, cOpen))
578	)
579	{
580	// opening char found:
581	// now go thru the whole rest of the buffer
582	PCSZ p = pOpen + 1;
583	LONG lLevel = 1; // if this goes 0, we're done
584	while (*p)
585	{
586	if (*p == cOpen)
587	lLevel++;
588	else if (*p == cClose)
589	{
590	lLevel--;
591	if (lLevel <= 0)
592	{
593	// matching closing bracket found:
594	// extract string
595	pszReturn = strhSubstr(pOpen + 1, // after cOpen
596	p); // excluding cClose
597	if (ppEnd)
598	*ppEnd = p + 1;
599	break; // while (*p)
600	}
601	}
602	else if (*p == '\"')
603	{
604	// beginning of string:
605	PCSZ p2 = p+1;
606	// find end of string
607	while ((p2) && (p2 != '\"'))
608	p2++;
609
610	if (*p2 == '\"')
611	// closing quote found:
612	// search on after that
613	p = p2; // raised below
614	else
615	break; // while (*p)
616	}
617
618	p++;
619	}
620	}
621
622	return pszReturn;
623	}
624
625	/*
626	*@@ strhQuote:
627	* similar to strhExtract, except that
628	* opening and closing chars are the same,
629	* and therefore no nesting is possible.
630	* Useful for extracting stuff between
631	* quotes.
632	*
633	*@@added V0.9.0 [umoeller]
634	*/
635
636	PSZ strhQuote(PSZ pszBuf,
637	CHAR cQuote,
638	PSZ *ppEnd)
639	{
640	PSZ pszReturn = NULL,
641	p1 = NULL;
642	if ((p1 = strchr(pszBuf, cQuote)))
643	{
644	PSZ p2;
645	if (p2 = strchr(p1+1, cQuote))
646	{
647	pszReturn = strhSubstr(p1+1, p2);
648	if (ppEnd)
649	// store closing char
650	*ppEnd = p2 + 1;
651	}
652	}
653
654	return pszReturn;
655	}
656
657	/*
658	*@@ strhStrip:
659	* removes all double spaces.
660	* This copies within the "psz" buffer.
661	* If any double spaces are found, the
662	* string will be shorter than before,
663	* but the buffer is _not_ reallocated,
664	* so there will be unused bytes at the
665	* end.
666	*
667	* Returns the number of spaces removed.
668	*
669	*@@added V0.9.0 [umoeller]
670	*/
671
672	ULONG strhStrip(PSZ psz) // in/out: string
673	{
674	PSZ p;
675	ULONG cb = strlen(psz),
676	ulrc = 0;
677
678	for (p = psz; p < psz+cb; p++)
679	{
680	if ((p == ' ') && ((p+1) == ' '))
681	{
682	PSZ p2 = p;
683	while (*p2)
684	{
685	p2 = (p2+1);
686	p2++;
687	}
688	cb--;
689	p--;
690	ulrc++;
691	}
692	}
693	return ulrc;
694	}
695
696	/*
697	*@@ strhins:
698	* this inserts one string into another.
699	*
700	* pszInsert is inserted into pszBuffer at offset
701	* ulInsertOfs (which counts from 0).
702	*
703	* A newly allocated string is returned. pszBuffer is
704	* not changed. The new string should be free()'d after
705	* use.
706	*
707	* Upon errors, NULL is returned.
708	*
709	*@@changed V0.9.0 [umoeller]: completely rewritten.
710	*/
711
712	PSZ strhins(PCSZ pcszBuffer,
713	ULONG ulInsertOfs,
714	PCSZ pcszInsert)
715	{
716	PSZ pszNew = NULL;
717
718	if ((pcszBuffer) && (pcszInsert))
719	{
720	do {
721	ULONG cbBuffer = strlen(pcszBuffer);
722	ULONG cbInsert = strlen(pcszInsert);
723
724	// check string length
725	if (ulInsertOfs > cbBuffer + 1)
726	break; // do
727
728	// OK, let's go.
729	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
730
731	// copy stuff before pInsertPos
732	memcpy(pszNew,
733	pcszBuffer,
734	ulInsertOfs);
735	// copy string to be inserted
736	memcpy(pszNew + ulInsertOfs,
737	pcszInsert,
738	cbInsert);
739	// copy stuff after pInsertPos
740	strcpy(pszNew + ulInsertOfs + cbInsert,
741	pcszBuffer + ulInsertOfs);
742	} while (FALSE);
743	}
744
745	return pszNew;
746	}
747
748	/*
749	*@@ strhFindReplace:
750	* wrapper around xstrFindReplace to work with C strings.
751	* Note that *ppszBuf can get reallocated and must
752	* be free()'able.
753	*
754	* Repetitive use of this wrapper is not recommended
755	* because it is considerably slower than xstrFindReplace.
756	*
757	*@@added V0.9.6 (2000-11-01) [umoeller]
758	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
759	*/
760
761	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
762	PULONG pulOfs, // in: where to begin search (0 = start);
763	// out: ofs of first char after replacement string
764	PCSZ pcszSearch, // in: search string; cannot be NULL
765	PCSZ pcszReplace) // in: replacement string; cannot be NULL
766	{
767	ULONG ulrc = 0;
768	XSTRING xstrBuf,
769	xstrFind,
770	xstrReplace;
771	size_t ShiftTable[256];
772	BOOL fRepeat = FALSE;
773	xstrInitSet(&xstrBuf, *ppszBuf);
774	// reallocated and returned, so we're safe
775	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
776	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
777	// these two are never freed, so we're safe too
778
779	if ((ulrc = xstrFindReplace(&xstrBuf,
780	pulOfs,
781	&xstrFind,
782	&xstrReplace,
783	ShiftTable,
784	&fRepeat)))
785	// replaced:
786	*ppszBuf = xstrBuf.psz;
787
788	return ulrc;
789	}
790
791	/*
792	* strhWords:
793	* returns the no. of words in "psz".
794	* A string is considered a "word" if
795	* it is surrounded by spaces only.
796	*
797	*@@added V0.9.0 [umoeller]
798	*/
799
800	ULONG strhWords(PSZ psz)
801	{
802	PSZ p;
803	ULONG cb = strlen(psz),
804	ulWords = 0;
805	if (cb > 1)
806	{
807	ulWords = 1;
808	for (p = psz; p < psz+cb; p++)
809	if (*p == ' ')
810	ulWords++;
811	}
812	return ulWords;
813	}
814
815	/*
816	*@@ strhGetWord:
817	* finds word boundaries.
818	*
819	* *ppszStart is used as the beginning of the
820	* search.
821	*
822	* If a word is found, *ppszStart is set to
823	* the first character of the word which was
824	* found and *ppszEnd receives the address
825	* of the first character _after_ the word,
826	* which is probably a space or a \n or \r char.
827	* We then return TRUE.
828	*
829	* The search is stopped if a null character
830	* is found or pLimit is reached. In that case,
831	* FALSE is returned.
832	*
833	*@@added V0.9.1 (2000-02-13) [umoeller]
834	*/
835
836	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
837	// out: start of word (if TRUE is returned)
838	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
839	// searched; if the word does not end before
840	// or with this char, FALSE is returned
841	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
842	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
843	PSZ *ppszEnd) // out: first char _after_ word
844	// (if TRUE is returned)
845	{
846	// characters after which a word can be started
847	// PCSZ pcszBeginChars = "\x0d\x0a ";
848	// PCSZ pcszEndChars = "\x0d\x0a /-";
849
850	PSZ pStart = *ppszStart;
851
852	// find start of word
853	while ( (pStart < (PSZ)pLimit)
854	&& (strchr(pcszBeginChars, *pStart))
855	)
856	// if char is a "before word" char: go for next
857	pStart++;
858
859	if (pStart < (PSZ)pLimit)
860	{
861	// found a valid "word start" character
862	// (which is not in pcszBeginChars):
863
864	// find end of word
865	PSZ pEndOfWord = pStart;
866	while ( (pEndOfWord <= (PSZ)pLimit)
867	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
868	)
869	// if char is not an "end word" char: go for next
870	pEndOfWord++;
871
872	if (pEndOfWord <= (PSZ)pLimit)
873	{
874	// whoa, got a word:
875	*ppszStart = pStart;
876	*ppszEnd = pEndOfWord;
877	return TRUE;
878	}
879	}
880
881	return FALSE;
882	}
883
884	/*
885	*@@ strhIsWord:
886	* returns TRUE if p points to a "word"
887	* in pcszBuf.
888	*
889	* p is considered a word if the character _before_
890	* it is in pcszBeginChars and the char _after_
891	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
892	*
893	*@@added V0.9.6 (2000-11-12) [umoeller]
894	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
895	*/
896
897	BOOL strhIsWord(PCSZ pcszBuf,
898	PCSZ p, // in: start of word
899	ULONG cbSearch, // in: length of word
900	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
901	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
902	{
903	// check previous char
904	if ( (p == pcszBuf)
905	\|\| (strchr(pcszBeginChars, *(p-1)))
906	)
907	{
908	// OK, valid begin char:
909	// check end char
910	CHAR cNextChar;
911	if (!(cNextChar = p[cbSearch]))
912	// null terminator:
913	return TRUE;
914	else
915	{
916	// not null terminator: check if char is
917	// in the list of valid end chars
918	if (strchr(pcszEndChars, cNextChar))
919	{
920	// OK, is end char: avoid doubles of that char,
921	// but allow spaces
922	// fixed V0.9.18 (2002-02-23) [umoeller]
923	CHAR cNextNext = p[cbSearch + 1];
924	if ( (cNextNext != cNextChar)
925	\|\| (cNextNext == ' ')
926	\|\| (cNextNext == 0)
927	)
928	return TRUE;
929	}
930	}
931	}
932
933	return FALSE;
934	}
935
936	/*
937	*@@ strhFindWord:
938	* searches for pszSearch in pszBuf, which is
939	* returned if found (or NULL if not).
940	*
941	* As opposed to strstr, this finds pszSearch
942	* only if it is a "word". A search string is
943	* considered a word if the character _before_
944	* it is in pcszBeginChars and the char _after_
945	* it is in pcszEndChars.
946	*
947	* Example:
948	+ strhFindWord("This is an example.", "is");
949	+ returns ...........^ this, but not the "is" in "This".
950	*
951	* The algorithm here uses strstr to find pszSearch in pszBuf
952	* and performs additional "is-word" checks for each item found
953	* (by calling strhIsWord).
954	*
955	* Note that this function is fairly slow compared to xstrFindWord.
956	*
957	*@@added V0.9.0 (99-11-08) [umoeller]
958	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
959	*/
960
961	PSZ strhFindWord(PCSZ pszBuf,
962	PCSZ pszSearch,
963	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
964	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
965	{
966	PSZ pszReturn = 0;
967	ULONG cbBuf = strlen(pszBuf),
968	cbSearch = strlen(pszSearch);
969
970	if ((cbBuf) && (cbSearch))
971	{
972	PCSZ p = pszBuf;
973
974	do // while p
975	{
976	p = strstr(p, pszSearch);
977	if (p)
978	{
979	// string found:
980	// check if that's a word
981
982	if (strhIsWord(pszBuf,
983	p,
984	cbSearch,
985	pcszBeginChars,
986	pcszEndChars))
987	{
988	// valid end char:
989	pszReturn = (PSZ)p;
990	break;
991	}
992
993	p += cbSearch;
994	}
995	} while (p);
996
997	}
998	return pszReturn;
999	}
1000
1001	/*
1002	*@@ strhFindEOL:
1003	* returns a pointer to the next \r, \n or null character
1004	* following pszSearchIn. Stores the offset in *pulOffset.
1005	*
1006	* This should never return NULL because at some point,
1007	* there will be a null byte in your string.
1008	*
1009	*@@added V0.9.4 (2000-07-01) [umoeller]
1010	*/
1011
1012	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1013	PULONG pulOffset) // out: offset (ptr can be NULL)
1014	{
1015	PCSZ p = pcszSearchIn,
1016	prc = 0;
1017	while (TRUE)
1018	{
1019	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
1020	{
1021	prc = p;
1022	break;
1023	}
1024	p++;
1025	}
1026
1027	if ((pulOffset) && (prc))
1028	*pulOffset = prc - pcszSearchIn;
1029
1030	return ((PSZ)prc);
1031	}
1032
1033	/*
1034	*@@ strhFindNextLine:
1035	* like strhFindEOL, but this returns the character
1036	* _after_ \r or \n. Note that this might return
1037	* a pointer to terminating NULL character also.
1038	*/
1039
1040	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1041	{
1042	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1043	// pEOL now points to the \r char or the terminating 0 byte;
1044	// if not null byte, advance pointer
1045	PSZ pNextLine = pEOL;
1046	if (*pNextLine == '\r')
1047	pNextLine++;
1048	if (*pNextLine == '\n')
1049	pNextLine++;
1050	if (pulOffset)
1051	*pulOffset = pNextLine - pszSearchIn;
1052	return pNextLine;
1053	}
1054
1055	/*
1056	*@@ strhBeautifyTitle:
1057	* replaces all line breaks (0xd, 0xa) with spaces.
1058	* Returns the new length of the string or 0 on
1059	* errors.
1060	*
1061	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1062	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
1063	*/
1064
1065	ULONG strhBeautifyTitle(PSZ psz)
1066	{
1067	ULONG ulrc;
1068	PSZ p = psz;
1069
1070	while (*p)
1071	{
1072	if ( (*p == '\r')
1073	\|\| (*p == '\n')
1074	)
1075	{
1076	if ( (p != psz)
1077	&& (p[-1] == ' ')
1078	)
1079	memmove(p, p + 1, strlen(p));
1080	else
1081	*p++ = ' ';
1082	}
1083	else
1084	p++;
1085	}
1086
1087	return (p - psz);
1088	}
1089
1090	/*
1091	*@@ strhBeautifyTitle:
1092	* like strhBeautifyTitle, but copies into
1093	* a new buffer. More efficient.
1094	*
1095	*@@added V0.9.19 (2002-06-18) [umoeller]
1096	*/
1097
1098	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
1099	PCSZ pcszSource) // in: string to be beautified (can be NULL)
1100	{
1101	ULONG ulrc;
1102	PCSZ pSource = pcszSource;
1103	PSZ pTarget = pszTarget;
1104	CHAR c;
1105	if (!pcszSource)
1106	{
1107	*pszTarget = '\0';
1108	return 0;
1109	}
1110
1111	while (c = *pSource++)
1112	{
1113	if ( (c == '\r')
1114	\|\| (c == '\n')
1115	)
1116	{
1117	if ( (pTarget == pszTarget)
1118	\|\| (pTarget[-1] != ' ')
1119	)
1120	*pTarget++ = ' ';
1121	}
1122	else
1123	*pTarget++ = c;
1124	}
1125
1126	// null-terminate
1127	*pTarget = '\0';
1128
1129	return (pTarget - pszTarget);
1130	}
1131
1132	/*
1133	* strhFindAttribValue:
1134	* searches for pszAttrib in pszSearchIn; if found,
1135	* returns the first character after the "=" char.
1136	* If "=" is not found, a space, \r, and \n are
1137	* also accepted. This function searches without
1138	* respecting case.
1139	*
1140	* <B>Example:</B>
1141	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1142	+
1143	+ returns ....................... ^ this address.
1144	*
1145	*@@added V0.9.0 [umoeller]
1146	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1147	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1148	*@@changed WarpIN V1.0.11 (2006-08-29) [pr]: handle attrib names in quoted strings @@fixes 718
1149	*@@changed WarpIN V1.0.12 (2006-09-07) [pr]: fix attrib handling again @@fixes 718 @@fixes 836
1150	*/
1151
1152	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
1153	{
1154	PSZ prc = 0;
1155	PSZ pszSearchIn2, p, pszStart, pszName, pszValue;
1156	ULONG cbAttrib = strlen(pszAttrib),
1157	ulLength = strlen(pszSearchIn);
1158	BOOL fInQuote = FALSE;
1159
1160	// use alloca(), so memory is freed on function exit
1161	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1162	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1163
1164	// V1.0.12 (2006-09-07) [pr]: filter leading " and ' left over from the previous pass
1165	for (p = pszSearchIn2; p == '\'' \|\| p == '"' \|\| *p == ' '
1166	\|\| p == '\n' \|\| p == '\r' \|\| *p == '\t'; p++);
1167	for (pszStart = p; *p; p++)
1168	{
1169	if (fInQuote)
1170	{
1171	// V1.0.12 (2006-09-07) [pr]: allow end of line to terminate a (broken) quote
1172	if (p == '"' \|\| p == '\n' \|\| *p == '\r')
1173	fInQuote = FALSE;
1174	}
1175	else
1176	{
1177	if (*p == '"')
1178	fInQuote = TRUE;
1179	else
1180	{
1181	if (p == ' ' \|\| p == '\n' \|\| p == '\r' \|\| p == '\t')
1182	{
1183	*p = '\0';
1184	pszName = strtok(pszStart, "=>");
1185	pszStart = p + 1;
1186	if (pszName && !stricmp(pszName, pszAttrib))
1187	{
1188	pszValue = strtok(NULL, "");
1189	if (pszValue)
1190	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
1191	else
1192	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
1193
1194	return(prc);
1195	}
1196	}
1197	}
1198	}
1199	}
1200
1201	if (pszStart != p)
1202	{
1203	pszName = strtok(pszStart, "=>");
1204	if (pszName && !stricmp(pszName, pszAttrib))
1205	{
1206	pszValue = strtok(NULL, "");
1207	if (pszValue)
1208	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
1209	else
1210	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
1211	}
1212	}
1213
1214	return prc;
1215	}
1216
1217	/*
1218	* strhGetNumAttribValue:
1219	* stores the numerical parameter value of an HTML-style
1220	* tag in *pl.
1221	*
1222	* Returns the address of the tag parameter in the
1223	* search buffer, if found, or NULL.
1224	*
1225	* <B>Example:</B>
1226	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1227	*
1228	* stores 123 in the "l" variable.
1229	*
1230	*@@added V0.9.0 [umoeller]
1231	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1232	*/
1233
1234	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1235	const char *pszTag, // e.g. "INDEX"
1236	PLONG pl) // out: numerical value
1237	{
1238	PSZ pParam;
1239	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1240	{
1241	if ( (*pParam == '\"')
1242	\|\| (*pParam == '\'')
1243	)
1244	pParam++; // V0.9.9 (2001-04-04) [umoeller]
1245
1246	sscanf(pParam, "%ld", pl);
1247	}
1248
1249	return pParam;
1250	}
1251
1252	/*
1253	* strhGetTextAttr:
1254	* retrieves the attribute value of a textual HTML-style tag
1255	* in a newly allocated buffer, which is returned,
1256	* or NULL if attribute not found.
1257	* If an attribute value is to contain spaces, it
1258	* must be enclosed in quotes.
1259	*
1260	* The offset of the attribute data in pszSearchIn is
1261	* returned in *pulOffset so that you can do multiple
1262	* searches.
1263	*
1264	* This returns a new buffer, which should be free()'d after use.
1265	*
1266	* <B>Example:</B>
1267	+ ULONG ulOfs = 0;
1268	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1269	+ ............^ ulOfs
1270	*
1271	* returns a new string with the value "blublub" (without
1272	* quotes) and sets ulOfs to 12.
1273	*
1274	*@@added V0.9.0 [umoeller]
1275	*/
1276
1277	PSZ strhGetTextAttr(const char *pszSearchIn,
1278	const char *pszTag,
1279	PULONG pulOffset) // out: offset where found
1280	{
1281	PSZ pParam,
1282	pParam2,
1283	prc = NULL;
1284	ULONG ulCount = 0;
1285	LONG lNestingLevel = 0;
1286
1287	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1288	{
1289	// determine end character to search for: a space
1290	CHAR cEnd = ' ';
1291	if (*pParam == '\"')
1292	{
1293	// or, if the data is enclosed in quotes, a quote
1294	cEnd = '\"';
1295	pParam++;
1296	}
1297
1298	// V1.0.3 (2004-11-10) [pr]: @@fixes 461
1299	if (*pParam == '\'')
1300	{
1301	// or, if the data is enclosed in single quotes, a single quote
1302	cEnd = '\'';
1303	pParam++;
1304	}
1305
1306	if (pulOffset)
1307	// store the offset
1308	(*pulOffset) = pParam - (PSZ)pszSearchIn;
1309
1310	// now find end of attribute
1311	pParam2 = pParam;
1312	while (*pParam)
1313	{
1314	if (*pParam == cEnd)
1315	// end character found
1316	break;
1317	else if (*pParam == '<')
1318	// yet another opening tag found:
1319	// this is probably some "<" in the attributes
1320	lNestingLevel++;
1321	else if (*pParam == '>')
1322	{
1323	lNestingLevel--;
1324	if (lNestingLevel < 0)
1325	// end of tag found:
1326	break;
1327	}
1328	ulCount++;
1329	pParam++;
1330	}
1331
1332	// copy attribute to new buffer
1333	if (ulCount)
1334	{
1335	prc = (PSZ)malloc(ulCount+1);
1336	memcpy(prc, pParam2, ulCount);
1337	*(prc+ulCount) = 0;
1338	}
1339	}
1340	return prc;
1341	}
1342
1343	/*
1344	* strhFindEndOfTag:
1345	* returns a pointer to the ">" char
1346	* which seems to terminate the tag beginning
1347	* after pszBeginOfTag.
1348	*
1349	* If additional "<" chars are found, we look
1350	* for additional ">" characters too.
1351	*
1352	* Note: You must pass the address of the opening
1353	* '<' character to this function.
1354	*
1355	* Example:
1356	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1357	+ strhFindEndOfTag(pszTest)
1358	+ returns.................................^ this.
1359	*
1360	*@@added V0.9.0 [umoeller]
1361	*/
1362
1363	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1364	{
1365	PSZ p = (PSZ)pszBeginOfTag,
1366	prc = NULL;
1367	LONG lNestingLevel = 0;
1368
1369	while (*p)
1370	{
1371	if (*p == '<')
1372	// another opening tag found:
1373	lNestingLevel++;
1374	else if (*p == '>')
1375	{
1376	// closing tag found:
1377	lNestingLevel--;
1378	if (lNestingLevel < 1)
1379	{
1380	// corresponding: return this
1381	prc = p;
1382	break;
1383	}
1384	}
1385	p++;
1386	}
1387
1388	return prc;
1389	}
1390
1391	/*
1392	* strhGetBlock:
1393	* this complex function searches the given string
1394	* for a pair of opening/closing HTML-style tags.
1395	*
1396	* If found, this routine returns TRUE and does
1397	* the following:
1398	*
1399	* 1) allocate a new buffer, copy the text
1400	* enclosed by the opening/closing tags
1401	* into it and set *ppszBlock to that
1402	* buffer;
1403	*
1404	* 2) if the opening tag has any attributes,
1405	* allocate another buffer, copy the
1406	* attributes into it and set *ppszAttrs
1407	* to that buffer; if no attributes are
1408	* found, *ppszAttrs will be NULL;
1409	*
1410	* 3) set *pulOffset to the offset from the
1411	* beginning of *ppszSearchIn where the
1412	* opening tag was found;
1413	*
1414	* 4) advance *ppszSearchIn to after the
1415	* closing tag, so that you can do
1416	* multiple searches without finding the
1417	* same tags twice.
1418	*
1419	* All buffers should be freed using free().
1420	*
1421	* This returns the following:
1422	* -- 0: no error
1423	* -- 1: tag not found at all (doesn't have to be an error)
1424	* -- 2: begin tag found, but no corresponding end tag found. This
1425	* is a real error.
1426	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
1427	*
1428	* <B>Example:</B>
1429	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
1430	+ PSZ pszBlock, pszAttrs;
1431	+ ULONG ulOfs;
1432	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1433	*
1434	* would do the following:
1435	*
1436	* 1) set pszBlock to a new string containing "This is page 1."
1437	* without quotes;
1438	*
1439	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
1440	*
1441	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
1442	*
1443	* 4) pSearch would be advanced to point to the "More text"
1444	* string in the original buffer.
1445	*
1446	* Hey-hey. A one-shot function, fairly complicated, but indispensable
1447	* for HTML parsing.
1448	*
1449	*@@added V0.9.0 [umoeller]
1450	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1451	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1452	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1453	*/
1454
1455	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1456	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1457	const char *pszTag,
1458	PSZ *ppszBlock, // out: block enclosed by the tags
1459	PSZ *ppszAttribs, // out: attributes of the opening tag
1460	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1461	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1462	{
1463	ULONG ulrc = 1;
1464	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1465	pszSearch2 = pszBeginTag,
1466	pszClosingTag;
1467	ULONG cbTag = strlen(pszTag);
1468
1469	// go thru the block and check all tags if it's the
1470	// begin tag we're looking for
1471	while ((pszBeginTag = strchr(pszBeginTag, '<')))
1472	{
1473	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1474	// yes: stop
1475	break;
1476	else
1477	pszBeginTag++;
1478	}
1479
1480	if (pszBeginTag)
1481	{
1482	// we found <TAG>:
1483	ULONG ulNestingLevel = 0;
1484
1485	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1486	// strchr(pszBeginTag, '>');
1487	if (pszEndOfBeginTag)
1488	{
1489	// does the caller want the attributes?
1490	if (ppszAttribs)
1491	{
1492	// yes: then copy them
1493	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1494	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1495	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1496	// add terminating 0
1497	*(pszAttrs + ulAttrLen) = 0;
1498
1499	*ppszAttribs = pszAttrs;
1500	}
1501
1502	// output offset of where we found the begin tag
1503	if (pulOfsBeginTag)
1504	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1505
1506	// now find corresponding closing tag (e.g. "</BODY>"
1507	pszBeginTag = pszEndOfBeginTag+1;
1508	// now we're behind the '>' char of the opening tag
1509	// increase offset of that too
1510	if (pulOfsBeginBlock)
1511	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1512
1513	// find next closing tag;
1514	// for the first run, pszSearch2 points to right
1515	// after the '>' char of the opening tag
1516	pszSearch2 = pszBeginTag;
1517	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1518	&& (pszClosingTag = strstr(pszSearch2, "<"))
1519	)
1520	{
1521	// if we have another opening tag before our closing
1522	// tag, we need to have several closing tags before
1523	// we're done
1524	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1525	ulNestingLevel++;
1526	else
1527	{
1528	// is this ours?
1529	if ( (*(pszClosingTag+1) == '/')
1530	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1531	)
1532	{
1533	// we've found a matching closing tag; is
1534	// it ours?
1535	if (ulNestingLevel == 0)
1536	{
1537	// our closing tag found:
1538	// allocate mem for a new buffer
1539	// and extract all the text between
1540	// open and closing tags to it
1541	ULONG ulLen = pszClosingTag - pszBeginTag;
1542	if (ppszBlock)
1543	{
1544	PSZ pNew = (PSZ)malloc(ulLen + 1);
1545	strhncpy0(pNew, pszBeginTag, ulLen);
1546	*ppszBlock = pNew;
1547	}
1548
1549	// raise search offset to after the closing tag
1550	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1551
1552	ulrc = 0;
1553
1554	break;
1555	} else
1556	// not our closing tag:
1557	ulNestingLevel--;
1558	}
1559	}
1560	// no matching closing tag: search on after that
1561	pszSearch2 = strhFindEndOfTag(pszClosingTag);
1562	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
1563
1564	if (!pszClosingTag)
1565	// no matching closing tag found:
1566	// return 2 (closing tag not found)
1567	ulrc = 2;
1568	} // end if (pszBeginTag)
1569	else
1570	// no matching ">" for opening tag found:
1571	ulrc = 3;
1572	}
1573
1574	return ulrc;
1575	}
1576
1577	/* ******************************************************************
1578	*
1579	* Miscellaneous
1580	*
1581	********************************************************************/
1582
1583	/*
1584	*@@ strhArrayAppend:
1585	* this appends a string to a "string array".
1586	*
1587	* A string array is considered a sequence of
1588	* zero-terminated strings in memory. That is,
1589	* after each string's null-byte, the next
1590	* string comes up.
1591	*
1592	* This is useful for composing a single block
1593	* of memory from, say, list box entries, which
1594	* can then be written to OS2.INI in one flush.
1595	*
1596	* To append strings to such an array, call this
1597	* function for each string you wish to append.
1598	* This will re-allocate *ppszRoot with each call,
1599	* and update *pcbRoot, which then contains the
1600	* total size of all strings (including all null
1601	* terminators).
1602	*
1603	* Pass *pcbRoot to PrfSaveProfileData to have the
1604	* block saved.
1605	*
1606	* Note: On the first call, ppszRoot and pcbRoot
1607	* _must_ be both NULL, or this crashes.
1608	*
1609	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1610	*/
1611
1612	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1613	const char *pcszNew, // in: string to append
1614	ULONG cbNew, // in: size of that string or 0 to run strlen() here
1615	PULONG pcbRoot) // in/out: size of array
1616	{
1617	PSZ pszTemp;
1618
1619	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1620	cbNew = strlen(pcszNew);
1621
1622	pszTemp = (PSZ)malloc(*pcbRoot
1623	+ cbNew
1624	+ 1); // two null bytes
1625	if (*ppszRoot)
1626	{
1627	// not first loop: copy old stuff
1628	memcpy(pszTemp,
1629	*ppszRoot,
1630	*pcbRoot);
1631	free(*ppszRoot);
1632	}
1633	// append new string
1634	strcpy(pszTemp + *pcbRoot,
1635	pcszNew);
1636	// update root
1637	*ppszRoot = pszTemp;
1638	// update length
1639	*pcbRoot += cbNew + 1;
1640	}
1641
1642	/*
1643	*@@ strhCreateDump:
1644	* this dumps a memory block into a string
1645	* and returns that string in a new buffer.
1646	*
1647	* You must free() the returned PSZ after use.
1648	*
1649	* The output looks like the following:
1650	*
1651	+ 0000: FE FF 0E 02 90 00 00 00 ........
1652	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
1653	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1654	*
1655	* Each line is terminated with a newline (\n)
1656	* character only.
1657	*
1658	*@@added V0.9.1 (2000-01-22) [umoeller]
1659	*/
1660
1661	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1662	ULONG ulSize, // in: size of buffer
1663	ULONG ulIndent) // in: indentation of every line
1664	{
1665	PSZ pszReturn = 0;
1666	XSTRING strReturn;
1667	CHAR szTemp[1000];
1668
1669	PBYTE pbCurrent = pb; // current byte
1670	ULONG ulCount = 0,
1671	ulCharsInLine = 0; // if this grows > 7, a new line is started
1672	CHAR szLine[400] = "",
1673	szAscii[30] = " "; // ASCII representation; filled for every line
1674	PSZ pszLine = szLine,
1675	pszAscii = szAscii;
1676
1677	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1678
1679	for (pbCurrent = pb;
1680	ulCount < ulSize;
1681	pbCurrent++, ulCount++)
1682	{
1683	if (ulCharsInLine == 0)
1684	{
1685	memset(szLine, ' ', ulIndent);
1686	pszLine += ulIndent;
1687	}
1688	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1689
1690	if ( (pbCurrent > 31) && (pbCurrent < 127) )
1691	// printable character:
1692	pszAscii = pbCurrent;
1693	else
1694	*pszAscii = '.';
1695	pszAscii++;
1696
1697	ulCharsInLine++;
1698	if ( (ulCharsInLine > 7) // 8 bytes added?
1699	\|\| (ulCount == ulSize-1) // end of buffer reached?
1700	)
1701	{
1702	// if we haven't had eight bytes yet,
1703	// fill buffer up to eight bytes with spaces
1704	ULONG ul2;
1705	for (ul2 = ulCharsInLine;
1706	ul2 < 8;
1707	ul2++)
1708	pszLine += sprintf(pszLine, " ");
1709
1710	sprintf(szTemp, "%04lX: %s %s\n",
1711	(ulCount & 0xFFFFFFF8), // offset in hex
1712	szLine, // bytes string
1713	szAscii); // ASCII string
1714	xstrcat(&strReturn, szTemp, 0);
1715
1716	// restart line buffer
1717	pszLine = szLine;
1718
1719	// clear ASCII buffer
1720	strcpy(szAscii, " ");
1721	pszAscii = szAscii;
1722
1723	// reset line counter
1724	ulCharsInLine = 0;
1725	}
1726	}
1727
1728	if (strReturn.cbAllocated)
1729	pszReturn = strReturn.psz;
1730
1731	return pszReturn;
1732	}
1733
1734	/* ******************************************************************
1735	*
1736	* Fast string searches
1737	*
1738	********************************************************************/
1739
1740	#define ASSERT(a)
1741
1742	/*
1743	* The following code has been taken from the "Standard
1744	* Function Library", file sflfind.c, and only slightly
1745	* modified to conform to the rest of this file.
1746	*
1747	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1748	* Revised: 98/05/04
1749	*
1750	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1751	*
1752	* The SFL Licence allows incorporating SFL code into other
1753	* programs, as long as the copyright is reprinted and the
1754	* code is marked as modified, so this is what we do.
1755	*/
1756
1757	/*
1758	*@@ strhmemfind:
1759	* searches for a pattern in a block of memory using the
1760	* Boyer-Moore-Horspool-Sunday algorithm.
1761	*
1762	* The block and pattern may contain any values; you must
1763	* explicitly provide their lengths. If you search for strings,
1764	* use strlen() on the buffers.
1765	*
1766	* Returns a pointer to the pattern if found within the block,
1767	* or NULL if the pattern was not found.
1768	*
1769	* This algorithm needs a "shift table" to cache data for the
1770	* search pattern. This table can be reused when performing
1771	* several searches with the same pattern.
1772	*
1773	* "shift" must point to an array big enough to hold 256 (8**2)
1774	* "size_t" values.
1775	*
1776	* If (*repeat_find == FALSE), the shift table is initialized.
1777	* So on the first search with a given pattern, *repeat_find
1778	* should be FALSE. This function sets it to TRUE after the
1779	* shift table is initialised, allowing the initialisation
1780	* phase to be skipped on subsequent searches.
1781	*
1782	* This function is most effective when repeated searches are
1783	* made for the same pattern in one or more large buffers.
1784	*
1785	* Example:
1786	*
1787	+ PSZ pszHaystack = "This is a sample string.",
1788	+ pszNeedle = "string";
1789	+ size_t shift[256];
1790	+ BOOL fRepeat = FALSE;
1791	+
1792	+ PSZ pFound = strhmemfind(pszHaystack,
1793	+ strlen(pszHaystack), // block size
1794	+ pszNeedle,
1795	+ strlen(pszNeedle), // pattern size
1796	+ shift,
1797	+ &fRepeat);
1798	*
1799	* Taken from the "Standard Function Library", file sflfind.c.
1800	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1801	* Slightly modified by umoeller.
1802	*
1803	*@@added V0.9.3 (2000-05-08) [umoeller]
1804	*/
1805
1806	void* strhmemfind(const void *in_block, // in: block containing data
1807	size_t block_size, // in: size of block in bytes
1808	const void *in_pattern, // in: pattern to search for
1809	size_t pattern_size, // in: size of pattern block
1810	size_t *shift, // in/out: shift table (search buffer)
1811	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
1812	{
1813	size_t byte_nbr, // Distance through block
1814	match_size; // Size of matched part
1815	const unsigned char
1816	*match_base = NULL, // Base of match of pattern
1817	*match_ptr = NULL, // Point within current match
1818	*limit = NULL; // Last potiental match point
1819	const unsigned char
1820	block = (unsigned char ) in_block, // Concrete pointer to block data
1821	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
1822
1823	if ( (block == NULL)
1824	\|\| (pattern == NULL)
1825	\|\| (shift == NULL)
1826	)
1827	return NULL;
1828
1829	// Pattern must be smaller or equal in size to string
1830	if (block_size < pattern_size)
1831	return NULL; // Otherwise it's not found
1832
1833	if (pattern_size == 0) // Empty patterns match at start
1834	return ((void *)block);
1835
1836	// Build the shift table unless we're continuing a previous search
1837
1838	// The shift table determines how far to shift before trying to match
1839	// again, if a match at this point fails. If the byte after where the
1840	// end of our pattern falls is not in our pattern, then we start to
1841	// match again after that byte; otherwise we line up the last occurence
1842	// of that byte in our pattern under that byte, and try match again.
1843
1844	if (!repeat_find \|\| !*repeat_find)
1845	{
1846	for (byte_nbr = 0;
1847	byte_nbr < 256;
1848	byte_nbr++)
1849	shift[byte_nbr] = pattern_size + 1;
1850	for (byte_nbr = 0;
1851	byte_nbr < pattern_size;
1852	byte_nbr++)
1853	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1854
1855	if (repeat_find)
1856	*repeat_find = TRUE;
1857	}
1858
1859	// Search for the block, each time jumping up by the amount
1860	// computed in the shift table
1861
1862	limit = block + (block_size - pattern_size + 1);
1863	ASSERT (limit > block);
1864
1865	for (match_base = block;
1866	match_base < limit;
1867	match_base += shift[*(match_base + pattern_size)])
1868	{
1869	match_ptr = match_base;
1870	match_size = 0;
1871
1872	// Compare pattern until it all matches, or we find a difference
1873	while (*match_ptr++ == pattern[match_size++])
1874	{
1875	ASSERT (match_size <= pattern_size &&
1876	match_ptr == (match_base + match_size));
1877
1878	// If we found a match, return the start address
1879	if (match_size >= pattern_size)
1880	return ((void*)(match_base));
1881
1882	}
1883	}
1884	return NULL; // Found nothing
1885	}
1886
1887	/*
1888	*@@ strhtxtfind:
1889	* searches for a case-insensitive text pattern in a string
1890	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1891	* pattern are null-terminated strings. Returns a pointer to the pattern
1892	* if found within the string, or NULL if the pattern was not found.
1893	* Will match strings irrespective of case. To match exact strings, use
1894	* strhfind(). Will not work on multibyte characters.
1895	*
1896	* Examples:
1897	+ char *result;
1898	+
1899	+ result = strhtxtfind ("AbracaDabra", "cad");
1900	+ if (result)
1901	+ puts (result);
1902	+
1903	* Taken from the "Standard Function Library", file sflfind.c.
1904	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
1905	* Slightly modified.
1906	*
1907	*@@added V0.9.3 (2000-05-08) [umoeller]
1908	*/
1909
1910	char* strhtxtfind (const char *string, // String containing data
1911	const char *pattern) // Pattern to search for
1912	{
1913	size_t
1914	shift [256]; // Shift distance for each value
1915	size_t
1916	string_size,
1917	pattern_size,
1918	byte_nbr, // Index into byte array
1919	match_size; // Size of matched part
1920	const char
1921	*match_base = NULL, // Base of match of pattern
1922	*match_ptr = NULL, // Point within current match
1923	*limit = NULL; // Last potiental match point
1924
1925	ASSERT (string); // Expect non-NULL pointers, but
1926	ASSERT (pattern); // fail gracefully if not debugging
1927	if (string == NULL \|\| pattern == NULL)
1928	return NULL;
1929
1930	string_size = strlen (string);
1931	pattern_size = strlen (pattern);
1932
1933	// Pattern must be smaller or equal in size to string
1934	if (string_size < pattern_size)
1935	return NULL; // Otherwise it cannot be found
1936
1937	if (pattern_size == 0) // Empty string matches at start
1938	return (char *) string;
1939
1940	// Build the shift table
1941
1942	// The shift table determines how far to shift before trying to match
1943	// again, if a match at this point fails. If the byte after where the
1944	// end of our pattern falls is not in our pattern, then we start to
1945	// match again after that byte; otherwise we line up the last occurence
1946	// of that byte in our pattern under that byte, and try match again.
1947
1948	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1949	shift [byte_nbr] = pattern_size + 1;
1950
1951	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1952	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1953
1954	// Search for the string. If we don't find a match, move up by the
1955	// amount we computed in the shift table above, to find location of
1956	// the next potiental match.
1957
1958	limit = string + (string_size - pattern_size + 1);
1959	ASSERT (limit > string);
1960
1961	for (match_base = string;
1962	match_base < limit;
1963	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1964	{
1965	match_ptr = match_base;
1966	match_size = 0;
1967
1968	// Compare pattern until it all matches, or we find a difference
1969	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1970	{
1971	ASSERT (match_size <= pattern_size &&
1972	match_ptr == (match_base + match_size));
1973
1974	// If we found a match, return the start address
1975	if (match_size >= pattern_size)
1976	return ((char *)(match_base));
1977	}
1978	}
1979	return NULL; // Found nothing
1980	}
1981

Note: See TracBrowser for help on using the repository browser.

Download in other formats: