Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

stringh.c

Visit:

Last change on this file was 442, checked in by pr, 7 years ago
Revert previous attempt at fixing the quoting mess.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 63.9 KB

Rev	Line
[8]	1
	2	/*
	3	*@@sourcefile stringh.c:
	4	* contains string/text helper functions. These are good for
[13]	5	* parsing/splitting strings and other stuff used throughout
	6	* XWorkplace.
[8]	7	*
[13]	8	* Note that these functions are really a bunch of very mixed
	9	* up string helpers, which you may or may not find helpful.
	10	* If you're looking for string functions with memory
	11	* management, look at xstring.c instead.
	12	*
[8]	13	* Usage: All OS/2 programs.
	14	*
	15	* Function prefixes (new with V0.81):
	16	* -- strh* string helper functions.
	17	*
	18	* Note: Version numbering in this file relates to XWorkplace version
	19	* numbering.
	20	*
	21	*@@header "helpers\stringh.h"
	22	*/
	23
	24	/*
[442]	25	* Copyright (C) 1997-2006 Ulrich Mller.
[8]	26	* Parts Copyright (C) 1991-1999 iMatix Corporation.
[14]	27	* This file is part of the "XWorkplace helpers" source package.
	28	* This is free software; you can redistribute it and/or modify
[8]	29	* it under the terms of the GNU General Public License as published
	30	* by the Free Software Foundation, in version 2 as it comes in the
	31	* "COPYING" file of the XWorkplace main distribution.
	32	* This program is distributed in the hope that it will be useful,
	33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	35	* GNU General Public License for more details.
	36	*/
	37
	38	#define OS2EMX_PLAIN_CHAR
	39	// this is needed for "os2emx.h"; if this is defined,
	40	// emx will define PSZ as _signed_ char, otherwise
	41	// as unsigned char
	42
	43	#define INCL_WINSHELLDATA
[123]	44	#define INCL_DOSERRORS
[8]	45	#include <os2.h>
	46
	47	#include <stdlib.h>
	48	#include <stdio.h>
	49	#include <string.h>
	50	#include <ctype.h>
	51	#include <math.h>
	52
	53	#include "setup.h" // code generation and debugging options
	54
[91]	55	#define DONT_REPLACE_STRINGH_MALLOC
[8]	56	#include "helpers\stringh.h"
	57	#include "helpers\xstring.h" // extended string helpers
	58
	59	#pragma hdrstop
	60
	61	/*
	62	*@@category: Helpers\C helpers\String management
[21]	63	* See stringh.c and xstring.c.
[8]	64	*/
	65
	66	/*
[21]	67	*@@category: Helpers\C helpers\String management\C string helpers
	68	* See stringh.c.
	69	*/
	70
[123]	71	#ifdef __DEBUG_MALLOC_ENABLED__
	72
[21]	73	/*
[123]	74	*@@ strhStoreDebug:
	75	* memory debug version of strhStore.
	76	*
	77	*@@added V0.9.16 (2001-12-08) [umoeller]
	78	*/
	79
[164]	80	APIRET (strhStoreDebug)(PSZ *ppszTarget,
	81	PCSZ pcszSource,
	82	PULONG pulLength, // out: length of new string (ptr can be NULL)
	83	PCSZ pcszSourceFile,
	84	unsigned long ulLine,
	85	PCSZ pcszFunction)
[123]	86	{
	87	ULONG ulLength = 0;
	88
[164]	89
	90
[123]	91	if (ppszTarget)
	92	{
	93	if (*ppszTarget)
	94	free(*ppszTarget);
	95
	96	if ( (pcszSource)
	97	&& (ulLength = strlen(pcszSource))
	98	)
	99	{
	100	if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
	101	pcszSourceFile,
	102	ulLine,
	103	pcszFunction))
	104	memcpy(*ppszTarget, pcszSource, ulLength + 1);
	105	else
[174]	106	return ERROR_NOT_ENOUGH_MEMORY;
[123]	107	}
	108	else
	109	*ppszTarget = NULL;
	110	}
	111
	112	if (pulLength)
	113	*pulLength = ulLength;
	114
[169]	115	return NO_ERROR;
[123]	116	}
	117
	118	#endif
	119
	120	/*
[122]	121	*@@ strhStore:
	122	* stores a copy of the given string in the specified
	123	* buffer. Uses strdup internally.
	124	*
	125	* If *ppszTarget != NULL, the previous string is freed
	126	* and set to NULL.
	127	* If pcszSource != NULL, a copy of it is stored in the
	128	* buffer.
	129	*
	130	*@@added V0.9.16 (2001-12-06) [umoeller]
	131	*/
	132
[123]	133	APIRET strhStore(PSZ *ppszTarget,
	134	PCSZ pcszSource,
	135	PULONG pulLength) // out: length of new string (ptr can be NULL)
[122]	136	{
	137	ULONG ulLength = 0;
	138
	139	if (ppszTarget)
	140	{
	141	if (*ppszTarget)
	142	free(*ppszTarget);
	143
	144	if ( (pcszSource)
	145	&& (ulLength = strlen(pcszSource))
	146	)
	147	{
	148	if (*ppszTarget = (PSZ)malloc(ulLength + 1))
	149	memcpy(*ppszTarget, pcszSource, ulLength + 1);
[123]	150	else
[174]	151	return ERROR_NOT_ENOUGH_MEMORY;
[122]	152	}
	153	else
	154	*ppszTarget = NULL;
	155	}
[127]	156	else
[174]	157	return ERROR_INVALID_PARAMETER;
[122]	158
	159	if (pulLength)
	160	*pulLength = ulLength;
[123]	161
[169]	162	return NO_ERROR;
[122]	163	}
	164
	165	/*
[94]	166	*@@ strhcpy:
[91]	167	* like strdup, but this one doesn't crash if string2 is NULL,
	168	* but sets the first byte in string1 to \0 instead.
	169	*
	170	*@@added V0.9.14 (2001-08-01) [umoeller]
	171	*/
	172
[140]	173	PSZ strhcpy(PSZ string1, PCSZ string2)
[91]	174	{
	175	if (string2)
[174]	176	return strcpy(string1, string2);
[91]	177
	178	*string1 = '\0';
[174]	179	return string1;
[91]	180	}
	181
[240]	182	/*
	183	*@@ strhCopyBuf:
	184	* copies pcszSource to pszTarget, taking
	185	* its length into account.
	186	*
	187	* Returns:
	188	*
	189	* -- NO_ERROR
	190	*
	191	* -- ERROR_INVALID_PARAMETER: pcszSource is
	192	* null or points to a null byte.
	193	*
	194	* -- ERROR_FILENAME_EXCED_RANGE: pcszSource
	195	* is too long to fit into pszTarget.
	196	*
	197	*@@added V1.0.1 (2003-01-05) [umoeller]
	198	*/
	199
	200	APIRET strhCopyBuf(PSZ pszTarget,
	201	PCSZ pcszSource,
	202	ULONG cbTarget)
	203	{
	204	ULONG cb;
	205	if (!pcszSource \|\| !*pcszSource)
	206	return ERROR_INVALID_PARAMETER;
	207	cb = strlen(pcszSource) + 1;
	208	if (cb > cbTarget)
	209	return ERROR_FILENAME_EXCED_RANGE;
	210
	211	memcpy(pszTarget,
	212	pcszSource,
	213	cb);
	214	return NO_ERROR;
	215	}
	216
[91]	217	#ifdef __DEBUG_MALLOC_ENABLED__
	218
	219	/*
[123]	220	*@@ strhdupDebug:
[91]	221	* memory debug version of strhdup.
	222	*
	223	*@@added V0.9.0 [umoeller]
	224	*/
	225
[140]	226	PSZ strhdupDebug(PCSZ pcszSource,
[116]	227	unsigned long *pulLength,
[140]	228	PCSZ pcszSourceFile,
[91]	229	unsigned long ulLine,
[140]	230	PCSZ pcszFunction)
[91]	231	{
[116]	232	PSZ pszReturn = NULL;
	233	ULONG ulLength = 0;
	234
	235	if ( (pcszSource)
	236	&& (ulLength = strlen(pcszSource))
	237	)
[91]	238	{
[116]	239	if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
[123]	240	pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
[116]	241	ulLine,
	242	pcszFunction))
	243	memcpy(pszReturn, pcszSource, ulLength + 1);
[91]	244	}
[116]	245
	246	if (pulLength)
	247	*pulLength = ulLength;
	248
[174]	249	return pszReturn;
[91]	250	}
	251
	252	#endif // __DEBUG_MALLOC_ENABLED__
	253
	254	/*
	255	*@@ strhdup:
[116]	256	* like strdup, but this one doesn't crash if pszSource
[238]	257	* is NULL. Instead, this returns NULL if pcszSource is
	258	* NULL or points to a null byte. In addition, this
[116]	259	* can report the length of the string (V0.9.16).
[8]	260	*
	261	*@@added V0.9.0 [umoeller]
[116]	262	*@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
[8]	263	*/
	264
[140]	265	PSZ strhdup(PCSZ pcszSource,
[116]	266	unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
[8]	267	{
[116]	268	PSZ pszReturn = NULL;
	269	ULONG ulLength = 0;
	270
	271	if ( (pcszSource)
	272	&& (ulLength = strlen(pcszSource))
	273	)
	274	{
	275	if (pszReturn = (PSZ)malloc(ulLength + 1))
	276	memcpy(pszReturn, pcszSource, ulLength + 1);
	277	}
	278
	279	if (pulLength)
	280	*pulLength = ulLength;
	281
[174]	282	return pszReturn;
[8]	283	}
	284
	285	/*
[38]	286	*@@ strhcmp:
	287	* better strcmp. This doesn't crash if any of the
	288	* string pointers are NULL, but returns a proper
	289	* value then.
	290	*
	291	* Besides, this is guaranteed to only return -1, 0,
	292	* or +1, while strcmp can return any positive or
[56]	293	* negative value. This is useful for tree comparison
	294	* funcs.
[38]	295	*
	296	*@@added V0.9.9 (2001-02-16) [umoeller]
	297	*/
	298
[140]	299	int strhcmp(PCSZ p1, PCSZ p2)
[38]	300	{
	301	if (p1 && p2)
	302	{
	303	int i = strcmp(p1, p2);
[174]	304	if (i < 0) return -1;
	305	if (i > 0) return +1;
[38]	306	}
	307	else if (p1)
	308	// but p2 is NULL: p1 greater than p2 then
[174]	309	return +1;
[38]	310	else if (p2)
	311	// but p1 is NULL: p1 less than p2 then
[174]	312	return -1;
[38]	313
	314	// return 0 if strcmp returned 0 above or both strings are NULL
[174]	315	return 0;
[38]	316	}
	317
	318	/*
[56]	319	*@@ strhicmp:
	320	* like strhcmp, but compares without respect
	321	* to case.
	322	*
	323	*@@added V0.9.9 (2001-04-07) [umoeller]
	324	*/
	325
[140]	326	int strhicmp(PCSZ p1, PCSZ p2)
[56]	327	{
	328	if (p1 && p2)
	329	{
	330	int i = stricmp(p1, p2);
[174]	331	if (i < 0) return -1;
	332	if (i > 0) return +1;
[56]	333	}
	334	else if (p1)
	335	// but p2 is NULL: p1 greater than p2 then
[174]	336	return +1;
[56]	337	else if (p2)
	338	// but p1 is NULL: p1 less than p2 then
[174]	339	return -1;
[56]	340
	341	// return 0 if strcmp returned 0 above or both strings are NULL
[174]	342	return 0;
[56]	343	}
	344
	345	/*
[8]	346	*@@ strhistr:
	347	* like strstr, but case-insensitive.
	348	*
	349	*@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
	350	*/
	351
[140]	352	PSZ strhistr(PCSZ string1, PCSZ string2)
[8]	353	{
	354	PSZ prc = NULL;
	355
	356	if ((string1) && (string2))
	357	{
	358	PSZ pszSrchIn = strdup(string1);
	359	PSZ pszSrchFor = strdup(string2);
	360
	361	if ((pszSrchIn) && (pszSrchFor))
	362	{
	363	strupr(pszSrchIn);
	364	strupr(pszSrchFor);
	365
[153]	366	if (prc = strstr(pszSrchIn, pszSrchFor))
[8]	367	{
	368	// prc now has the first occurence of the string,
	369	// but in pszSrchIn; we need to map this
	370	// return value to the original string
	371	prc = (prc-pszSrchIn) // offset in pszSrchIn
	372	+ (PSZ)string1;
	373	}
	374	}
	375	if (pszSrchFor)
	376	free(pszSrchFor);
	377	if (pszSrchIn)
	378	free(pszSrchIn);
	379	}
[174]	380
	381	return prc;
[8]	382	}
	383
	384	/*
	385	*@@ strhncpy0:
	386	* like strncpy, but always appends a 0 character.
[132]	387	*
	388	*@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
[8]	389	*/
	390
	391	ULONG strhncpy0(PSZ pszTarget,
[140]	392	PCSZ pszSource,
[8]	393	ULONG cbSource)
	394	{
	395	ULONG ul = 0;
[132]	396	PSZ pTarget = pszTarget,
	397	pSource;
[8]	398
[132]	399	if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
	400	{
	401	for (ul = 0; ul < cbSource; ul++)
	402	if (*pSource)
	403	pTarget++ = pSource++;
	404	else
	405	break;
	406	}
	407
[8]	408	*pTarget = 0;
	409
[174]	410	return ul;
[8]	411	}
	412
	413	/*
[245]	414	*@@ strlcpy:
	415	* copies src to string dst of size siz. At most siz-1 characters
	416	* will be copied. Always NUL terminates, unless siz == 0.
	417	*
	418	* Returns strlen(src); if retval >= siz, truncation occurred.
	419	*
	420	* Taken from the OpenBSD sources at
	421	*
	422	+ ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/strlcpy.c
	423	*
	424	* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
	425	* All rights reserved.
	426	*
	427	* OpenBSD licence applies (see top of that file).
	428	*
	429	*@@added V1.0.1 (2003-01-29) [umoeller]
	430	*/
	431
	432	size_t strlcpy(char *dst,
	433	const char *src,
	434	size_t siz)
	435	{
	436	register char *d = dst;
	437	register const char *s = src;
	438	register size_t n = siz;
	439
	440	/* Copy as many bytes as will fit */
	441	if (n != 0 && --n != 0)
	442	{
	443	do
	444	{
	445	if ((d++ = s++) == 0)
	446	break;
	447	} while (--n != 0);
	448	}
	449
	450	/* Not enough room in dst, add NUL and traverse rest of src */
	451	if (n == 0)
	452	{
	453	if (siz != 0)
	454	d = '\0'; / NUL-terminate dst */
	455	while (*s++)
	456	;
	457	}
	458
	459	return (s - src - 1); /* count does not include NUL */
	460	}
	461
	462	/*
	463	*@@ strlcat:
	464	* appends src to string dst of size siz. Unlike strncat,
	465	* siz is the full size of dst, not space left. At most
	466	* siz-1 characters will be copied. Always NUL terminates,
	467	* unless siz <= strlen(dst).
	468	*
	469	* Returns strlen(src) + MIN(siz, strlen(initial dst)),
	470	* in other words, strlen(dst) after the concatenation.
	471	* If retval >= siz, truncation occurred.
	472	*
	473	* Taken from the OpenBSD sources at
	474	*
	475	+ ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/strlcat.c
	476	*
	477	* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
	478	* All rights reserved.
	479	*
	480	* OpenBSD licence applies (see top of that file).
	481	*
	482	*@@added V1.0.1 (2003-01-29) [umoeller]
	483	*/
	484
	485	size_t strlcat(char *dst,
	486	const char *src,
	487	size_t siz)
	488	{
	489	register char *d = dst;
	490	register const char *s = src;
	491	register size_t n = siz;
	492	size_t dlen;
	493
	494	/* Find the end of dst and adjust bytes left but don't go past end */
	495	while (n-- != 0 && *d != '\0')
	496	d++;
	497	dlen = d - dst;
	498	n = siz - dlen;
	499
	500	if (n == 0)
	501	return(dlen + strlen(s));
	502	while (*s != '\0')
	503	{
	504	if (n != 1)
	505	{
	506	d++ = s;
	507	n--;
	508	}
	509	s++;
	510	}
	511	*d = '\0';
	512
	513	return (dlen + (s - src)); /* count does not include NUL */
	514	}
	515
	516	/*
[153]	517	*@@ strhlen:
	518	* like strlen, but doesn't crash on
	519	* null strings, but returns 0 also.
	520	*
	521	*@@added V0.9.19 (2002-04-02) [umoeller]
	522	*/
	523
	524	ULONG strhlen(PCSZ pcsz)
	525	{
	526	if (pcsz)
[174]	527	return strlen(pcsz);
[153]	528
	529	return 0;
	530	}
	531
	532	/*
[143]	533	*@@ strhSize:
	534	* returns the size of the given string, which
	535	* is the memory required to allocate a copy,
	536	* including the null terminator.
	537	*
[152]	538	* Returns 0 only if pcsz is NULL. If pcsz
	539	* points to a null character, this returns 1.
[143]	540	*
	541	*@@added V0.9.18 (2002-02-13) [umoeller]
[152]	542	*@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
[143]	543	*/
	544
	545	ULONG strhSize(PCSZ pcsz)
	546	{
[152]	547	if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
[238]	548	return strlen(pcsz) + 1;
[143]	549
[174]	550	return 0;
[143]	551	}
	552
	553	/*
[8]	554	* strhCount:
	555	* this counts the occurences of c in pszSearch.
	556	*/
	557
[140]	558	ULONG strhCount(PCSZ pszSearch,
[8]	559	CHAR c)
	560	{
[242]	561	PCSZ p = pszSearch;
	562	ULONG ulCount = 0;
[8]	563	while (TRUE)
	564	{
[242]	565	if (!(p = strchr(p, c)))
	566	return ulCount;
	567
	568	ulCount++;
	569	p++;
[8]	570	}
	571	}
	572
	573	/*
	574	*@@ strhIsDecimal:
	575	* returns TRUE if psz consists of decimal digits only.
	576	*/
	577
	578	BOOL strhIsDecimal(PSZ psz)
	579	{
	580	PSZ p = psz;
	581	while (*p != 0)
	582	{
	583	if (isdigit(*p) == 0)
[174]	584	return FALSE;
[8]	585	p++;
	586	}
	587
[174]	588	return TRUE;
[8]	589	}
	590
[91]	591	#ifdef __DEBUG_MALLOC_ENABLED__
	592
[8]	593	/*
[91]	594	*@@ strhSubstrDebug:
	595	* memory debug version of strhSubstr.
	596	*
	597	*@@added V0.9.14 (2001-08-01) [umoeller]
	598	*/
	599
[140]	600	PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
	601	PCSZ pEnd, // in: last char (not included)
	602	PCSZ pcszSourceFile,
[91]	603	unsigned long ulLine,
[140]	604	PCSZ pcszFunction)
[91]	605	{
	606	PSZ pszSubstr = NULL;
	607
	608	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
	609	{
	610	ULONG cbSubstr = (pEnd - pBegin);
	611	if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
	612	pcszSourceFile,
	613	ulLine,
	614	pcszFunction))
	615	{
	616	// strhncpy0(pszSubstr, pBegin, cbSubstr);
	617	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
	618	*(pszSubstr + cbSubstr) = '\0';
	619	}
	620	}
	621
[174]	622	return pszSubstr;
[91]	623	}
	624
	625	#endif // __DEBUG_MALLOC_ENABLED__
	626
	627	/*
[8]	628	*@@ strhSubstr:
	629	* this creates a new PSZ containing the string
	630	* from pBegin to pEnd, excluding the pEnd character.
[13]	631	* The new string is null-terminated. The caller
	632	* must free() the new string after use.
[8]	633	*
	634	* Example:
	635	+ "1234567890"
	636	+ ^ ^
	637	+ p1 p2
	638	+ strhSubstr(p1, p2)
	639	* would return a new string containing "2345678".
[54]	640	*
	641	*@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
	642	*@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
[8]	643	*/
	644
[140]	645	PSZ strhSubstr(PCSZ pBegin, // in: first char
	646	PCSZ pEnd) // in: last char (not included)
[8]	647	{
[54]	648	PSZ pszSubstr = NULL;
	649
	650	if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
	651	{
	652	ULONG cbSubstr = (pEnd - pBegin);
[91]	653	if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
[54]	654	{
	655	memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
	656	*(pszSubstr + cbSubstr) = '\0';
	657	}
	658	}
	659
[174]	660	return pszSubstr;
[8]	661	}
	662
	663	/*
	664	*@@ strhExtract:
	665	* searches pszBuf for the cOpen character and returns
	666	* the data in between cOpen and cClose, excluding
	667	* those two characters, in a newly allocated buffer
	668	* which you must free() afterwards.
	669	*
	670	* Spaces and newlines/linefeeds are skipped.
	671	*
	672	* If the search was successful, the new buffer
	673	* is returned and, if (ppEnd != NULL), *ppEnd points
	674	* to the first character after the cClose character
	675	* found in the buffer.
	676	*
	677	* If the search was not successful, NULL is
	678	* returned, and *ppEnd is unchanged.
	679	*
	680	* If another cOpen character is found before
	681	* cClose, matching cClose characters will be skipped.
	682	* You can therefore nest the cOpen and cClose
	683	* characters.
	684	*
	685	* This function ignores cOpen and cClose characters
	686	* in C-style comments and strings surrounded by
	687	* double quotes.
	688	*
	689	* Example:
[161]	690	*
[8]	691	+ PSZ pszBuf = "KEYWORD { --blah-- } next",
	692	+ pEnd;
	693	+ strhExtract(pszBuf,
	694	+ '{', '}',
	695	+ &pEnd)
[161]	696	*
[8]	697	* would return a new buffer containing " --blah-- ",
	698	* and ppEnd would afterwards point to the space
	699	* before "next" in the static buffer.
	700	*
	701	*@@added V0.9.0 [umoeller]
	702	*/
	703
[161]	704	PSZ strhExtract(PCSZ pszBuf, // in: search buffer
[8]	705	CHAR cOpen, // in: opening char
	706	CHAR cClose, // in: closing char
[161]	707	PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
[8]	708	{
	709	PSZ pszReturn = NULL;
[161]	710	PCSZ pOpen;
	711	if ( (pszBuf)
	712	&& (pOpen = strchr(pszBuf, cOpen))
	713	)
[8]	714	{
[161]	715	// opening char found:
	716	// now go thru the whole rest of the buffer
	717	PCSZ p = pOpen + 1;
	718	LONG lLevel = 1; // if this goes 0, we're done
	719	while (*p)
[8]	720	{
[161]	721	if (*p == cOpen)
	722	lLevel++;
	723	else if (*p == cClose)
[8]	724	{
[161]	725	lLevel--;
	726	if (lLevel <= 0)
[8]	727	{
[161]	728	// matching closing bracket found:
	729	// extract string
	730	pszReturn = strhSubstr(pOpen + 1, // after cOpen
	731	p); // excluding cClose
	732	if (ppEnd)
	733	*ppEnd = p + 1;
	734	break; // while (*p)
[8]	735	}
[161]	736	}
	737	else if (*p == '\"')
	738	{
	739	// beginning of string:
	740	PCSZ p2 = p+1;
	741	// find end of string
	742	while ((p2) && (p2 != '\"'))
	743	p2++;
[8]	744
[161]	745	if (*p2 == '\"')
	746	// closing quote found:
	747	// search on after that
	748	p = p2; // raised below
	749	else
	750	break; // while (*p)
	751	}
[8]	752
[161]	753	p++;
[8]	754	}
	755	}
	756
[174]	757	return pszReturn;
[8]	758	}
	759
	760	/*
	761	*@@ strhQuote:
	762	* similar to strhExtract, except that
	763	* opening and closing chars are the same,
	764	* and therefore no nesting is possible.
	765	* Useful for extracting stuff between
	766	* quotes.
	767	*
	768	*@@added V0.9.0 [umoeller]
	769	*/
	770
	771	PSZ strhQuote(PSZ pszBuf,
	772	CHAR cQuote,
	773	PSZ *ppEnd)
	774	{
	775	PSZ pszReturn = NULL,
	776	p1 = NULL;
	777	if ((p1 = strchr(pszBuf, cQuote)))
	778	{
[143]	779	PSZ p2;
	780	if (p2 = strchr(p1+1, cQuote))
[8]	781	{
	782	pszReturn = strhSubstr(p1+1, p2);
	783	if (ppEnd)
	784	// store closing char
	785	*ppEnd = p2 + 1;
	786	}
	787	}
	788
[174]	789	return pszReturn;
[8]	790	}
	791
	792	/*
	793	*@@ strhStrip:
	794	* removes all double spaces.
	795	* This copies within the "psz" buffer.
	796	* If any double spaces are found, the
	797	* string will be shorter than before,
	798	* but the buffer is _not_ reallocated,
	799	* so there will be unused bytes at the
	800	* end.
	801	*
	802	* Returns the number of spaces removed.
	803	*
	804	*@@added V0.9.0 [umoeller]
	805	*/
	806
	807	ULONG strhStrip(PSZ psz) // in/out: string
	808	{
	809	PSZ p;
	810	ULONG cb = strlen(psz),
	811	ulrc = 0;
	812
	813	for (p = psz; p < psz+cb; p++)
	814	{
	815	if ((p == ' ') && ((p+1) == ' '))
	816	{
	817	PSZ p2 = p;
	818	while (*p2)
	819	{
	820	p2 = (p2+1);
	821	p2++;
	822	}
	823	cb--;
	824	p--;
	825	ulrc++;
	826	}
	827	}
[174]	828	return ulrc;
[8]	829	}
	830
	831	/*
[12]	832	*@@ strhins:
	833	* this inserts one string into another.
	834	*
	835	* pszInsert is inserted into pszBuffer at offset
	836	* ulInsertOfs (which counts from 0).
	837	*
	838	* A newly allocated string is returned. pszBuffer is
	839	* not changed. The new string should be free()'d after
	840	* use.
	841	*
	842	* Upon errors, NULL is returned.
	843	*
	844	*@@changed V0.9.0 [umoeller]: completely rewritten.
	845	*/
	846
[140]	847	PSZ strhins(PCSZ pcszBuffer,
[12]	848	ULONG ulInsertOfs,
[140]	849	PCSZ pcszInsert)
[12]	850	{
	851	PSZ pszNew = NULL;
	852
	853	if ((pcszBuffer) && (pcszInsert))
	854	{
	855	do {
	856	ULONG cbBuffer = strlen(pcszBuffer);
	857	ULONG cbInsert = strlen(pcszInsert);
	858
	859	// check string length
	860	if (ulInsertOfs > cbBuffer + 1)
	861	break; // do
	862
	863	// OK, let's go.
	864	pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
	865
	866	// copy stuff before pInsertPos
	867	memcpy(pszNew,
	868	pcszBuffer,
	869	ulInsertOfs);
	870	// copy string to be inserted
	871	memcpy(pszNew + ulInsertOfs,
	872	pcszInsert,
	873	cbInsert);
	874	// copy stuff after pInsertPos
	875	strcpy(pszNew + ulInsertOfs + cbInsert,
	876	pcszBuffer + ulInsertOfs);
	877	} while (FALSE);
	878	}
	879
[174]	880	return pszNew;
[12]	881	}
	882
	883	/*
[23]	884	*@@ strhFindReplace:
	885	* wrapper around xstrFindReplace to work with C strings.
[12]	886	* Note that *ppszBuf can get reallocated and must
	887	* be free()'able.
	888	*
[13]	889	* Repetitive use of this wrapper is not recommended
[23]	890	* because it is considerably slower than xstrFindReplace.
[12]	891	*
	892	*@@added V0.9.6 (2000-11-01) [umoeller]
[23]	893	*@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
[12]	894	*/
	895
[23]	896	ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
	897	PULONG pulOfs, // in: where to begin search (0 = start);
	898	// out: ofs of first char after replacement string
[140]	899	PCSZ pcszSearch, // in: search string; cannot be NULL
	900	PCSZ pcszReplace) // in: replacement string; cannot be NULL
[12]	901	{
	902	ULONG ulrc = 0;
	903	XSTRING xstrBuf,
	904	xstrFind,
	905	xstrReplace;
[13]	906	size_t ShiftTable[256];
	907	BOOL fRepeat = FALSE;
[23]	908	xstrInitSet(&xstrBuf, *ppszBuf);
	909	// reallocated and returned, so we're safe
	910	xstrInitSet(&xstrFind, (PSZ)pcszSearch);
	911	xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
	912	// these two are never freed, so we're safe too
[12]	913
[23]	914	if ((ulrc = xstrFindReplace(&xstrBuf,
	915	pulOfs,
	916	&xstrFind,
	917	&xstrReplace,
	918	ShiftTable,
	919	&fRepeat)))
[12]	920	// replaced:
	921	*ppszBuf = xstrBuf.psz;
	922
[174]	923	return ulrc;
[12]	924	}
	925
	926	/*
[8]	927	* strhWords:
	928	* returns the no. of words in "psz".
	929	* A string is considered a "word" if
	930	* it is surrounded by spaces only.
	931	*
	932	*@@added V0.9.0 [umoeller]
	933	*/
	934
	935	ULONG strhWords(PSZ psz)
	936	{
	937	PSZ p;
	938	ULONG cb = strlen(psz),
	939	ulWords = 0;
	940	if (cb > 1)
	941	{
	942	ulWords = 1;
	943	for (p = psz; p < psz+cb; p++)
	944	if (*p == ' ')
	945	ulWords++;
	946	}
[174]	947	return ulWords;
[8]	948	}
	949
	950	/*
	951	*@@ strhGetWord:
	952	* finds word boundaries.
	953	*
	954	* *ppszStart is used as the beginning of the
	955	* search.
	956	*
	957	* If a word is found, *ppszStart is set to
	958	* the first character of the word which was
	959	* found and *ppszEnd receives the address
	960	* of the first character _after_ the word,
	961	* which is probably a space or a \n or \r char.
	962	* We then return TRUE.
	963	*
	964	* The search is stopped if a null character
	965	* is found or pLimit is reached. In that case,
	966	* FALSE is returned.
	967	*
	968	*@@added V0.9.1 (2000-02-13) [umoeller]
	969	*/
	970
	971	BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
	972	// out: start of word (if TRUE is returned)
[140]	973	PCSZ pLimit, // in: ptr to last char after *ppszStart to be
[8]	974	// searched; if the word does not end before
	975	// or with this char, FALSE is returned
[140]	976	PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
	977	PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
[8]	978	PSZ *ppszEnd) // out: first char _after_ word
	979	// (if TRUE is returned)
	980	{
	981	// characters after which a word can be started
[140]	982	// PCSZ pcszBeginChars = "\x0d\x0a ";
	983	// PCSZ pcszEndChars = "\x0d\x0a /-";
[8]	984
	985	PSZ pStart = *ppszStart;
	986
	987	// find start of word
	988	while ( (pStart < (PSZ)pLimit)
	989	&& (strchr(pcszBeginChars, *pStart))
	990	)
	991	// if char is a "before word" char: go for next
	992	pStart++;
	993
	994	if (pStart < (PSZ)pLimit)
	995	{
	996	// found a valid "word start" character
	997	// (which is not in pcszBeginChars):
	998
	999	// find end of word
	1000	PSZ pEndOfWord = pStart;
	1001	while ( (pEndOfWord <= (PSZ)pLimit)
	1002	&& (strchr(pcszEndChars, *pEndOfWord) == 0)
	1003	)
	1004	// if char is not an "end word" char: go for next
	1005	pEndOfWord++;
	1006
	1007	if (pEndOfWord <= (PSZ)pLimit)
	1008	{
	1009	// whoa, got a word:
	1010	*ppszStart = pStart;
	1011	*ppszEnd = pEndOfWord;
[174]	1012	return TRUE;
[8]	1013	}
	1014	}
	1015
[174]	1016	return FALSE;
[8]	1017	}
	1018
	1019	/*
[13]	1020	*@@ strhIsWord:
	1021	* returns TRUE if p points to a "word"
	1022	* in pcszBuf.
	1023	*
	1024	* p is considered a word if the character _before_
	1025	* it is in pcszBeginChars and the char _after_
	1026	* it (i.e. *(p+cbSearch)) is in pcszEndChars.
	1027	*
	1028	*@@added V0.9.6 (2000-11-12) [umoeller]
[144]	1029	*@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
[13]	1030	*/
	1031
[140]	1032	BOOL strhIsWord(PCSZ pcszBuf,
	1033	PCSZ p, // in: start of word
[144]	1034	ULONG cbSearch, // in: length of word
[140]	1035	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
	1036	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
[13]	1037	{
	1038	// check previous char
	1039	if ( (p == pcszBuf)
	1040	\|\| (strchr(pcszBeginChars, *(p-1)))
	1041	)
	1042	{
	1043	// OK, valid begin char:
	1044	// check end char
[144]	1045	CHAR cNextChar;
	1046	if (!(cNextChar = p[cbSearch]))
	1047	// null terminator:
	1048	return TRUE;
[13]	1049	else
	1050	{
[144]	1051	// not null terminator: check if char is
	1052	// in the list of valid end chars
	1053	if (strchr(pcszEndChars, cNextChar))
	1054	{
[13]	1055	// OK, is end char: avoid doubles of that char,
	1056	// but allow spaces
[144]	1057	// fixed V0.9.18 (2002-02-23) [umoeller]
	1058	CHAR cNextNext = p[cbSearch + 1];
	1059	if ( (cNextNext != cNextChar)
	1060	\|\| (cNextNext == ' ')
	1061	\|\| (cNextNext == 0)
[13]	1062	)
[144]	1063	return TRUE;
	1064	}
[13]	1065	}
	1066	}
	1067
[144]	1068	return FALSE;
[13]	1069	}
	1070
	1071	/*
[8]	1072	*@@ strhFindWord:
	1073	* searches for pszSearch in pszBuf, which is
	1074	* returned if found (or NULL if not).
	1075	*
	1076	* As opposed to strstr, this finds pszSearch
	1077	* only if it is a "word". A search string is
	1078	* considered a word if the character _before_
	1079	* it is in pcszBeginChars and the char _after_
	1080	* it is in pcszEndChars.
	1081	*
	1082	* Example:
	1083	+ strhFindWord("This is an example.", "is");
	1084	+ returns ...........^ this, but not the "is" in "This".
	1085	*
	1086	* The algorithm here uses strstr to find pszSearch in pszBuf
[13]	1087	* and performs additional "is-word" checks for each item found
	1088	* (by calling strhIsWord).
[8]	1089	*
[13]	1090	* Note that this function is fairly slow compared to xstrFindWord.
	1091	*
[8]	1092	*@@added V0.9.0 (99-11-08) [umoeller]
[51]	1093	*@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
[8]	1094	*/
	1095
[140]	1096	PSZ strhFindWord(PCSZ pszBuf,
	1097	PCSZ pszSearch,
	1098	PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
	1099	PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
[8]	1100	{
	1101	PSZ pszReturn = 0;
	1102	ULONG cbBuf = strlen(pszBuf),
	1103	cbSearch = strlen(pszSearch);
	1104
	1105	if ((cbBuf) && (cbSearch))
	1106	{
[140]	1107	PCSZ p = pszBuf;
[8]	1108
	1109	do // while p
	1110	{
	1111	p = strstr(p, pszSearch);
	1112	if (p)
	1113	{
	1114	// string found:
	1115	// check if that's a word
	1116
[13]	1117	if (strhIsWord(pszBuf,
	1118	p,
	1119	cbSearch,
	1120	pcszBeginChars,
	1121	pcszEndChars))
[8]	1122	{
[13]	1123	// valid end char:
	1124	pszReturn = (PSZ)p;
	1125	break;
	1126	}
[8]	1127
	1128	p += cbSearch;
	1129	}
	1130	} while (p);
	1131
	1132	}
[174]	1133	return pszReturn;
[8]	1134	}
	1135
	1136	/*
	1137	*@@ strhFindEOL:
	1138	* returns a pointer to the next \r, \n or null character
	1139	* following pszSearchIn. Stores the offset in *pulOffset.
	1140	*
	1141	* This should never return NULL because at some point,
	1142	* there will be a null byte in your string.
	1143	*
	1144	*@@added V0.9.4 (2000-07-01) [umoeller]
	1145	*/
	1146
[140]	1147	PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
[8]	1148	PULONG pulOffset) // out: offset (ptr can be NULL)
	1149	{
[140]	1150	PCSZ p = pcszSearchIn,
	1151	prc = 0;
[8]	1152	while (TRUE)
	1153	{
	1154	if ( (p == '\r') \|\| (p == '\n') \|\| (*p == 0) )
	1155	{
	1156	prc = p;
	1157	break;
	1158	}
	1159	p++;
	1160	}
	1161
[23]	1162	if ((pulOffset) && (prc))
[21]	1163	*pulOffset = prc - pcszSearchIn;
	1164
[238]	1165	return (PSZ)prc;
[8]	1166	}
	1167
	1168	/*
	1169	*@@ strhFindNextLine:
	1170	* like strhFindEOL, but this returns the character
	1171	* _after_ \r or \n. Note that this might return
	1172	* a pointer to terminating NULL character also.
	1173	*/
	1174
	1175	PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
	1176	{
	1177	PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
	1178	// pEOL now points to the \r char or the terminating 0 byte;
	1179	// if not null byte, advance pointer
	1180	PSZ pNextLine = pEOL;
	1181	if (*pNextLine == '\r')
	1182	pNextLine++;
	1183	if (*pNextLine == '\n')
	1184	pNextLine++;
	1185	if (pulOffset)
	1186	*pulOffset = pNextLine - pszSearchIn;
[174]	1187	return pNextLine;
[8]	1188	}
	1189
	1190	/*
	1191	*@@ strhBeautifyTitle:
	1192	* replaces all line breaks (0xd, 0xa) with spaces.
[178]	1193	* Returns the new length of the string or 0 on
	1194	* errors.
[69]	1195	*
	1196	*@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
[178]	1197	*@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length
[8]	1198	*/
	1199
[178]	1200	ULONG strhBeautifyTitle(PSZ psz)
[8]	1201	{
[178]	1202	ULONG ulrc;
	1203	PSZ p = psz;
[69]	1204
[178]	1205	while (*p)
	1206	{
[159]	1207	if ( (*p == '\r')
	1208	\|\| (*p == '\n')
[69]	1209	)
	1210	{
[178]	1211	if ( (p != psz)
	1212	&& (p[-1] == ' ')
[69]	1213	)
	1214	memmove(p, p + 1, strlen(p));
	1215	else
	1216	*p++ = ' ';
	1217	}
	1218	else
	1219	p++;
[178]	1220	}
[69]	1221
[178]	1222	return (p - psz);
[8]	1223	}
	1224
	1225	/*
[178]	1226	*@@ strhBeautifyTitle:
	1227	* like strhBeautifyTitle, but copies into
	1228	* a new buffer. More efficient.
	1229	*
	1230	*@@added V0.9.19 (2002-06-18) [umoeller]
	1231	*/
	1232
	1233	ULONG strhBeautifyTitle2(PSZ pszTarget, // out: beautified string
	1234	PCSZ pcszSource) // in: string to be beautified (can be NULL)
	1235	{
	1236	ULONG ulrc;
	1237	PCSZ pSource = pcszSource;
	1238	PSZ pTarget = pszTarget;
	1239	CHAR c;
	1240	if (!pcszSource)
	1241	{
	1242	*pszTarget = '\0';
	1243	return 0;
	1244	}
	1245
	1246	while (c = *pSource++)
	1247	{
	1248	if ( (c == '\r')
	1249	\|\| (c == '\n')
	1250	)
	1251	{
	1252	if ( (pTarget == pszTarget)
	1253	\|\| (pTarget[-1] != ' ')
	1254	)
	1255	*pTarget++ = ' ';
	1256	}
	1257	else
	1258	*pTarget++ = c;
	1259	}
	1260
	1261	// null-terminate
	1262	*pTarget = '\0';
	1263
	1264	return (pTarget - pszTarget);
	1265	}
	1266
	1267	/*
[245]	1268	*@@ strhKillChar:
	1269	* removes the first occurence of c in psz
	1270	* by overwriting it with the following characters.
	1271	*
	1272	* For this to work, you _must_ pass in strlen(psz)
	1273	* in the ULONG pointed to by ulLength. If
	1274	*
	1275	* Returns TRUE only if c was actually found. In
	1276	* that case, *pulLength is decremented.
	1277	*
	1278	*@@added V1.0.1 (2003-01-30) [umoeller]
	1279	*/
	1280
	1281	BOOL strhKillChar(PSZ psz,
	1282	CHAR c,
	1283	PULONG pulLength)
	1284	{
	1285	PSZ p;
	1286	if (p = strchr(psz, c))
	1287	{
	1288	// "string~rest"
	1289	// ÀÄÄÄÄÄÙ 6 chars (p - pszBuf)
	1290	// ÀÄÄÄÄÄÄÄÄÄÄÙ 11 chars (ulLen)
	1291	// ^ p (pszBuf + 6)
	1292	// ^ pszBuf
	1293	memmove(p, // pszBuf + 6
	1294	p + 1, // pszBuf + 7
	1295	// include null byte
	1296	*pulLength // 11
	1297	- (p - psz)); // - 6 = 5
	1298	--(*pulLength);
	1299
	1300	return TRUE;
	1301	}
	1302
	1303	return FALSE;
	1304	}
	1305
	1306	/*
[8]	1307	* strhFindAttribValue:
	1308	* searches for pszAttrib in pszSearchIn; if found,
	1309	* returns the first character after the "=" char.
	1310	* If "=" is not found, a space, \r, and \n are
	1311	* also accepted. This function searches without
	1312	* respecting case.
	1313	*
	1314	* <B>Example:</B>
[73]	1315	+ strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
[8]	1316	+
	1317	+ returns ....................... ^ this address.
	1318	*
	1319	*@@added V0.9.0 [umoeller]
	1320	*@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
[73]	1321	*@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
[329]	1322	*@@changed WarpIN V1.0.11 (2006-08-29) [pr]: handle attrib names in quoted strings @@fixes 718
[332]	1323	*@@changed WarpIN V1.0.12 (2006-09-07) [pr]: fix attrib handling again @@fixes 718 @@fixes 836
[8]	1324	*/
	1325
	1326	PSZ strhFindAttribValue(const char pszSearchIn, const char pszAttrib)
	1327	{
[329]	1328	PSZ prc = 0;
	1329	PSZ pszSearchIn2, p, pszStart, pszName, pszValue;
	1330	ULONG cbAttrib = strlen(pszAttrib),
	1331	ulLength = strlen(pszSearchIn);
	1332	BOOL fInQuote = FALSE;
[73]	1333
[329]	1334	// use alloca(), so memory is freed on function exit
	1335	pszSearchIn2 = (PSZ)alloca(ulLength + 1);
	1336	memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
[73]	1337
[332]	1338	// V1.0.12 (2006-09-07) [pr]: filter leading " and ' left over from the previous pass
	1339	for (p = pszSearchIn2; p == '\'' \|\| p == '"' \|\| *p == ' '
	1340	\|\| p == '\n' \|\| p == '\r' \|\| *p == '\t'; p++);
[442]	1341	for (pszStart = p; *p; p++)
[329]	1342	{
	1343	if (fInQuote)
[73]	1344	{
[332]	1345	// V1.0.12 (2006-09-07) [pr]: allow end of line to terminate a (broken) quote
[442]	1346	if (p == '"' \|\| p == '\n' \|\| *p == '\r')
[329]	1347	fInQuote = FALSE;
[73]	1348	}
[329]	1349	else
[8]	1350	{
[442]	1351	if (*p == '"')
[329]	1352	fInQuote = TRUE;
	1353	else
[52]	1354	{
[329]	1355	if (p == ' ' \|\| p == '\n' \|\| p == '\r' \|\| p == '\t')
	1356	{
	1357	*p = '\0';
	1358	pszName = strtok(pszStart, "=>");
	1359	pszStart = p + 1;
	1360	if (pszName && !stricmp(pszName, pszAttrib))
	1361	{
	1362	pszValue = strtok(NULL, "");
	1363	if (pszValue)
	1364	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
	1365	else
	1366	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
[52]	1367
[329]	1368	return(prc);
	1369	}
	1370	}
[52]	1371	}
[8]	1372	}
[329]	1373	}
[52]	1374
[329]	1375	if (pszStart != p)
	1376	{
	1377	pszName = strtok(pszStart, "=>");
	1378	if (pszName && !stricmp(pszName, pszAttrib))
	1379	{
	1380	pszValue = strtok(NULL, "");
	1381	if (pszValue)
	1382	prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2);
	1383	else
	1384	prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib;
	1385	}
[8]	1386	}
[329]	1387
[174]	1388	return prc;
[329]	1389	}
[8]	1390
	1391	/*
	1392	* strhGetNumAttribValue:
	1393	* stores the numerical parameter value of an HTML-style
	1394	* tag in *pl.
	1395	*
	1396	* Returns the address of the tag parameter in the
	1397	* search buffer, if found, or NULL.
	1398	*
	1399	* <B>Example:</B>
	1400	+ strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
	1401	*
	1402	* stores 123 in the "l" variable.
	1403	*
	1404	*@@added V0.9.0 [umoeller]
[55]	1405	*@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
[8]	1406	*/
	1407
	1408	PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
	1409	const char *pszTag, // e.g. "INDEX"
	1410	PLONG pl) // out: numerical value
	1411	{
	1412	PSZ pParam;
	1413	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
[55]	1414	{
	1415	if ( (*pParam == '\"')
	1416	\|\| (*pParam == '\'')
	1417	)
	1418	pParam++; // V0.9.9 (2001-04-04) [umoeller]
	1419
[13]	1420	sscanf(pParam, "%ld", pl);
[55]	1421	}
[8]	1422
[174]	1423	return pParam;
[8]	1424	}
	1425
	1426	/*
	1427	* strhGetTextAttr:
	1428	* retrieves the attribute value of a textual HTML-style tag
	1429	* in a newly allocated buffer, which is returned,
	1430	* or NULL if attribute not found.
	1431	* If an attribute value is to contain spaces, it
	1432	* must be enclosed in quotes.
	1433	*
	1434	* The offset of the attribute data in pszSearchIn is
	1435	* returned in *pulOffset so that you can do multiple
	1436	* searches.
	1437	*
	1438	* This returns a new buffer, which should be free()'d after use.
	1439	*
	1440	* <B>Example:</B>
	1441	+ ULONG ulOfs = 0;
	1442	+ strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
	1443	+ ............^ ulOfs
	1444	*
	1445	* returns a new string with the value "blublub" (without
	1446	* quotes) and sets ulOfs to 12.
	1447	*
	1448	*@@added V0.9.0 [umoeller]
[335]	1449	*@@changed V1.0.13 (2006-09-10) [pr]: improved parsing
[8]	1450	*/
	1451
	1452	PSZ strhGetTextAttr(const char *pszSearchIn,
	1453	const char *pszTag,
	1454	PULONG pulOffset) // out: offset where found
	1455	{
	1456	PSZ pParam,
	1457	pParam2,
	1458	prc = NULL;
	1459	ULONG ulCount = 0;
	1460	LONG lNestingLevel = 0;
	1461
	1462	if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
	1463	{
	1464	// determine end character to search for: a space
[442]	1465	CHAR cEnd = ' ';
[263]	1466	// V1.0.3 (2004-11-10) [pr]: @@fixes 461
[335]	1467	// V1.0.13 (2006-09-10) [pr]: optimized
[442]	1468	if ((pParam == '\"') \|\| (pParam == '\''))
[263]	1469	{
[335]	1470	// or, if the data is enclosed in quotes, a quote or single quote
	1471	cEnd = *pParam;
[263]	1472	pParam++;
	1473	}
	1474
[8]	1475	if (pulOffset)
	1476	// store the offset
	1477	(*pulOffset) = pParam - (PSZ)pszSearchIn;
	1478
	1479	// now find end of attribute
	1480	pParam2 = pParam;
	1481	while (*pParam)
	1482	{
[335]	1483	// V1.0.13 (2006-09-10) [pr]: line end terminates non-quoted attribute
	1484	if ( ( (cEnd == ' ')
	1485	&& ((pParam == ' ') \|\| (pParam == '\r') \|\| (*pParam == '\n')))
[442]	1486	\|\| (*pParam == cEnd)
[335]	1487	)
[8]	1488	// end character found
	1489	break;
	1490	else if (*pParam == '<')
	1491	// yet another opening tag found:
	1492	// this is probably some "<" in the attributes
	1493	lNestingLevel++;
	1494	else if (*pParam == '>')
	1495	{
	1496	lNestingLevel--;
	1497	if (lNestingLevel < 0)
	1498	// end of tag found:
	1499	break;
	1500	}
	1501	ulCount++;
[442]	1502	pParam++;
[8]	1503	}
	1504
[442]	1505	// copy attribute to new buffer
	1506	if (ulCount)
[8]	1507	{
[442]	1508	prc = (PSZ)malloc(ulCount+1);
	1509	memcpy(prc, pParam2, ulCount);
	1510	*(prc+ulCount) = 0;
[8]	1511	}
	1512	}
[174]	1513	return prc;
[8]	1514	}
	1515
	1516	/*
	1517	* strhFindEndOfTag:
	1518	* returns a pointer to the ">" char
	1519	* which seems to terminate the tag beginning
	1520	* after pszBeginOfTag.
	1521	*
	1522	* If additional "<" chars are found, we look
	1523	* for additional ">" characters too.
	1524	*
	1525	* Note: You must pass the address of the opening
	1526	* '<' character to this function.
	1527	*
	1528	* Example:
	1529	+ PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
	1530	+ strhFindEndOfTag(pszTest)
	1531	+ returns.................................^ this.
	1532	*
	1533	*@@added V0.9.0 [umoeller]
	1534	*/
	1535
	1536	PSZ strhFindEndOfTag(const char *pszBeginOfTag)
	1537	{
	1538	PSZ p = (PSZ)pszBeginOfTag,
	1539	prc = NULL;
	1540	LONG lNestingLevel = 0;
	1541
	1542	while (*p)
	1543	{
	1544	if (*p == '<')
	1545	// another opening tag found:
	1546	lNestingLevel++;
	1547	else if (*p == '>')
	1548	{
	1549	// closing tag found:
	1550	lNestingLevel--;
	1551	if (lNestingLevel < 1)
	1552	{
	1553	// corresponding: return this
	1554	prc = p;
	1555	break;
	1556	}
	1557	}
	1558	p++;
	1559	}
	1560
[174]	1561	return prc;
[8]	1562	}
	1563
	1564	/*
	1565	* strhGetBlock:
	1566	* this complex function searches the given string
	1567	* for a pair of opening/closing HTML-style tags.
	1568	*
	1569	* If found, this routine returns TRUE and does
	1570	* the following:
	1571	*
	1572	* 1) allocate a new buffer, copy the text
	1573	* enclosed by the opening/closing tags
	1574	* into it and set *ppszBlock to that
	1575	* buffer;
	1576	*
	1577	* 2) if the opening tag has any attributes,
	1578	* allocate another buffer, copy the
	1579	* attributes into it and set *ppszAttrs
	1580	* to that buffer; if no attributes are
	1581	* found, *ppszAttrs will be NULL;
	1582	*
	1583	* 3) set *pulOffset to the offset from the
	1584	* beginning of *ppszSearchIn where the
	1585	* opening tag was found;
	1586	*
	1587	* 4) advance *ppszSearchIn to after the
	1588	* closing tag, so that you can do
	1589	* multiple searches without finding the
	1590	* same tags twice.
	1591	*
	1592	* All buffers should be freed using free().
	1593	*
	1594	* This returns the following:
	1595	* -- 0: no error
	1596	* -- 1: tag not found at all (doesn't have to be an error)
	1597	* -- 2: begin tag found, but no corresponding end tag found. This
	1598	* is a real error.
	1599	* -- 3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever")
	1600	*
	1601	* <B>Example:</B>
	1602	+ PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text."
	1603	+ PSZ pszBlock, pszAttrs;
	1604	+ ULONG ulOfs;
	1605	+ strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
	1606	*
	1607	* would do the following:
	1608	*
	1609	* 1) set pszBlock to a new string containing "This is page 1."
	1610	* without quotes;
	1611	*
	1612	* 2) set pszAttrs to a new string containing "<PAGE INDEX=1>";
	1613	*
	1614	* 3) set ulOfs to 0, because "<PAGE" was found at the beginning;
	1615	*
	1616	* 4) pSearch would be advanced to point to the "More text"
	1617	* string in the original buffer.
	1618	*
	1619	* Hey-hey. A one-shot function, fairly complicated, but indispensable
	1620	* for HTML parsing.
	1621	*
	1622	*@@added V0.9.0 [umoeller]
	1623	*@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
	1624	*@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
	1625	*@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
	1626	*/
	1627
	1628	ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
	1629	PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
[147]	1630	const char *pszTag,
[8]	1631	PSZ *ppszBlock, // out: block enclosed by the tags
	1632	PSZ *ppszAttribs, // out: attributes of the opening tag
	1633	PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
	1634	PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
	1635	{
	1636	ULONG ulrc = 1;
	1637	PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
	1638	pszSearch2 = pszBeginTag,
	1639	pszClosingTag;
	1640	ULONG cbTag = strlen(pszTag);
	1641
	1642	// go thru the block and check all tags if it's the
	1643	// begin tag we're looking for
	1644	while ((pszBeginTag = strchr(pszBeginTag, '<')))
	1645	{
[147]	1646	if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
[8]	1647	// yes: stop
	1648	break;
	1649	else
	1650	pszBeginTag++;
	1651	}
	1652
	1653	if (pszBeginTag)
	1654	{
	1655	// we found <TAG>:
	1656	ULONG ulNestingLevel = 0;
	1657
	1658	PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
	1659	// strchr(pszBeginTag, '>');
	1660	if (pszEndOfBeginTag)
	1661	{
	1662	// does the caller want the attributes?
	1663	if (ppszAttribs)
	1664	{
	1665	// yes: then copy them
	1666	ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
	1667	PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
	1668	strncpy(pszAttrs, pszBeginTag, ulAttrLen);
	1669	// add terminating 0
	1670	*(pszAttrs + ulAttrLen) = 0;
	1671
	1672	*ppszAttribs = pszAttrs;
	1673	}
	1674
	1675	// output offset of where we found the begin tag
	1676	if (pulOfsBeginTag)
	1677	*pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
	1678
	1679	// now find corresponding closing tag (e.g. "</BODY>"
	1680	pszBeginTag = pszEndOfBeginTag+1;
	1681	// now we're behind the '>' char of the opening tag
	1682	// increase offset of that too
	1683	if (pulOfsBeginBlock)
	1684	*pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
	1685
	1686	// find next closing tag;
	1687	// for the first run, pszSearch2 points to right
	1688	// after the '>' char of the opening tag
	1689	pszSearch2 = pszBeginTag;
	1690	while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
	1691	&& (pszClosingTag = strstr(pszSearch2, "<"))
	1692	)
	1693	{
	1694	// if we have another opening tag before our closing
	1695	// tag, we need to have several closing tags before
	1696	// we're done
[147]	1697	if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
[8]	1698	ulNestingLevel++;
	1699	else
	1700	{
	1701	// is this ours?
	1702	if ( (*(pszClosingTag+1) == '/')
[147]	1703	&& (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
[8]	1704	)
	1705	{
	1706	// we've found a matching closing tag; is
	1707	// it ours?
	1708	if (ulNestingLevel == 0)
	1709	{
	1710	// our closing tag found:
	1711	// allocate mem for a new buffer
	1712	// and extract all the text between
	1713	// open and closing tags to it
	1714	ULONG ulLen = pszClosingTag - pszBeginTag;
	1715	if (ppszBlock)
	1716	{
	1717	PSZ pNew = (PSZ)malloc(ulLen + 1);
	1718	strhncpy0(pNew, pszBeginTag, ulLen);
	1719	*ppszBlock = pNew;
	1720	}
	1721
	1722	// raise search offset to after the closing tag
	1723	*pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
	1724
	1725	ulrc = 0;
	1726
	1727	break;
	1728	} else
	1729	// not our closing tag:
	1730	ulNestingLevel--;
	1731	}
	1732	}
	1733	// no matching closing tag: search on after that
	1734	pszSearch2 = strhFindEndOfTag(pszClosingTag);
	1735	} // end while (pszClosingTag = strstr(pszSearch2, "<"))
	1736
	1737	if (!pszClosingTag)
	1738	// no matching closing tag found:
	1739	// return 2 (closing tag not found)
	1740	ulrc = 2;
	1741	} // end if (pszBeginTag)
	1742	else
	1743	// no matching ">" for opening tag found:
	1744	ulrc = 3;
	1745	}
	1746
[174]	1747	return ulrc;
[8]	1748	}
	1749
	1750	/* ******************************************************************
[14]	1751	*
	1752	* Miscellaneous
	1753	*
[8]	1754	********************************************************************/
	1755
	1756	/*
	1757	*@@ strhArrayAppend:
	1758	* this appends a string to a "string array".
	1759	*
	1760	* A string array is considered a sequence of
	1761	* zero-terminated strings in memory. That is,
	1762	* after each string's null-byte, the next
	1763	* string comes up.
	1764	*
	1765	* This is useful for composing a single block
	1766	* of memory from, say, list box entries, which
	1767	* can then be written to OS2.INI in one flush.
	1768	*
	1769	* To append strings to such an array, call this
	1770	* function for each string you wish to append.
	1771	* This will re-allocate *ppszRoot with each call,
	1772	* and update *pcbRoot, which then contains the
	1773	* total size of all strings (including all null
	1774	* terminators).
	1775	*
	1776	* Pass *pcbRoot to PrfSaveProfileData to have the
	1777	* block saved.
	1778	*
	1779	* Note: On the first call, ppszRoot and pcbRoot
	1780	* _must_ be both NULL, or this crashes.
[81]	1781	*
	1782	*@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
[8]	1783	*/
	1784
	1785	VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
[14]	1786	const char *pcszNew, // in: string to append
[81]	1787	ULONG cbNew, // in: size of that string or 0 to run strlen() here
[8]	1788	PULONG pcbRoot) // in/out: size of array
	1789	{
[81]	1790	PSZ pszTemp;
	1791
	1792	if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
	1793	cbNew = strlen(pcszNew);
	1794
	1795	pszTemp = (PSZ)malloc(*pcbRoot
	1796	+ cbNew
	1797	+ 1); // two null bytes
[8]	1798	if (*ppszRoot)
	1799	{
	1800	// not first loop: copy old stuff
	1801	memcpy(pszTemp,
	1802	*ppszRoot,
	1803	*pcbRoot);
	1804	free(*ppszRoot);
	1805	}
	1806	// append new string
	1807	strcpy(pszTemp + *pcbRoot,
[14]	1808	pcszNew);
[8]	1809	// update root
	1810	*ppszRoot = pszTemp;
	1811	// update length
	1812	*pcbRoot += cbNew + 1;
	1813	}
	1814
	1815	/*
	1816	*@@ strhCreateDump:
	1817	* this dumps a memory block into a string
	1818	* and returns that string in a new buffer.
	1819	*
	1820	* You must free() the returned PSZ after use.
	1821	*
	1822	* The output looks like the following:
	1823	*
	1824	+ 0000: FE FF 0E 02 90 00 00 00 ........
	1825	+ 0008: FD 01 00 00 57 50 46 6F ....WPFo
	1826	+ 0010: 6C 64 65 72 00 78 01 34 lder.x.4
	1827	*
	1828	* Each line is terminated with a newline (\n)
	1829	* character only.
	1830	*
	1831	*@@added V0.9.1 (2000-01-22) [umoeller]
	1832	*/
	1833
	1834	PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
	1835	ULONG ulSize, // in: size of buffer
	1836	ULONG ulIndent) // in: indentation of every line
	1837	{
[12]	1838	PSZ pszReturn = 0;
	1839	XSTRING strReturn;
[8]	1840	CHAR szTemp[1000];
	1841
	1842	PBYTE pbCurrent = pb; // current byte
	1843	ULONG ulCount = 0,
	1844	ulCharsInLine = 0; // if this grows > 7, a new line is started
	1845	CHAR szLine[400] = "",
	1846	szAscii[30] = " "; // ASCII representation; filled for every line
	1847	PSZ pszLine = szLine,
	1848	pszAscii = szAscii;
	1849
[12]	1850	xstrInit(&strReturn, (ulSize * 30) + ulIndent);
	1851
[8]	1852	for (pbCurrent = pb;
	1853	ulCount < ulSize;
	1854	pbCurrent++, ulCount++)
	1855	{
	1856	if (ulCharsInLine == 0)
	1857	{
	1858	memset(szLine, ' ', ulIndent);
	1859	pszLine += ulIndent;
	1860	}
[13]	1861	pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
[8]	1862
	1863	if ( (pbCurrent > 31) && (pbCurrent < 127) )
	1864	// printable character:
	1865	pszAscii = pbCurrent;
	1866	else
	1867	*pszAscii = '.';
	1868	pszAscii++;
	1869
	1870	ulCharsInLine++;
	1871	if ( (ulCharsInLine > 7) // 8 bytes added?
	1872	\|\| (ulCount == ulSize-1) // end of buffer reached?
	1873	)
	1874	{
	1875	// if we haven't had eight bytes yet,
	1876	// fill buffer up to eight bytes with spaces
	1877	ULONG ul2;
	1878	for (ul2 = ulCharsInLine;
	1879	ul2 < 8;
	1880	ul2++)
	1881	pszLine += sprintf(pszLine, " ");
	1882
	1883	sprintf(szTemp, "%04lX: %s %s\n",
	1884	(ulCount & 0xFFFFFFF8), // offset in hex
	1885	szLine, // bytes string
	1886	szAscii); // ASCII string
[23]	1887	xstrcat(&strReturn, szTemp, 0);
[8]	1888
	1889	// restart line buffer
	1890	pszLine = szLine;
	1891
	1892	// clear ASCII buffer
	1893	strcpy(szAscii, " ");
	1894	pszAscii = szAscii;
	1895
	1896	// reset line counter
	1897	ulCharsInLine = 0;
	1898	}
	1899	}
	1900
[12]	1901	if (strReturn.cbAllocated)
	1902	pszReturn = strReturn.psz;
	1903
[174]	1904	return pszReturn;
[8]	1905	}
	1906
	1907	/* ******************************************************************
[14]	1908	*
	1909	* Fast string searches
	1910	*
[8]	1911	********************************************************************/
	1912
	1913	#define ASSERT(a)
	1914
	1915	/*
	1916	* The following code has been taken from the "Standard
	1917	* Function Library", file sflfind.c, and only slightly
	1918	* modified to conform to the rest of this file.
	1919	*
	1920	* Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
	1921	* Revised: 98/05/04
	1922	*
	1923	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
	1924	*
	1925	* The SFL Licence allows incorporating SFL code into other
	1926	* programs, as long as the copyright is reprinted and the
	1927	* code is marked as modified, so this is what we do.
	1928	*/
	1929
	1930	/*
[13]	1931	*@@ strhmemfind:
	1932	* searches for a pattern in a block of memory using the
	1933	* Boyer-Moore-Horspool-Sunday algorithm.
[8]	1934	*
[13]	1935	* The block and pattern may contain any values; you must
	1936	* explicitly provide their lengths. If you search for strings,
	1937	* use strlen() on the buffers.
[8]	1938	*
[13]	1939	* Returns a pointer to the pattern if found within the block,
	1940	* or NULL if the pattern was not found.
[8]	1941	*
[13]	1942	* This algorithm needs a "shift table" to cache data for the
	1943	* search pattern. This table can be reused when performing
	1944	* several searches with the same pattern.
[8]	1945	*
[13]	1946	* "shift" must point to an array big enough to hold 256 (8**2)
	1947	* "size_t" values.
[8]	1948	*
[13]	1949	* If (*repeat_find == FALSE), the shift table is initialized.
	1950	* So on the first search with a given pattern, *repeat_find
	1951	* should be FALSE. This function sets it to TRUE after the
	1952	* shift table is initialised, allowing the initialisation
	1953	* phase to be skipped on subsequent searches.
[8]	1954	*
[13]	1955	* This function is most effective when repeated searches are
	1956	* made for the same pattern in one or more large buffers.
[8]	1957	*
[13]	1958	* Example:
	1959	*
	1960	+ PSZ pszHaystack = "This is a sample string.",
	1961	+ pszNeedle = "string";
	1962	+ size_t shift[256];
	1963	+ BOOL fRepeat = FALSE;
[8]	1964	+
[13]	1965	+ PSZ pFound = strhmemfind(pszHaystack,
	1966	+ strlen(pszHaystack), // block size
	1967	+ pszNeedle,
	1968	+ strlen(pszNeedle), // pattern size
	1969	+ shift,
	1970	+ &fRepeat);
[8]	1971	*
	1972	* Taken from the "Standard Function Library", file sflfind.c.
	1973	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
[13]	1974	* Slightly modified by umoeller.
[8]	1975	*
	1976	*@@added V0.9.3 (2000-05-08) [umoeller]
	1977	*/
	1978
[13]	1979	void* strhmemfind(const void *in_block, // in: block containing data
	1980	size_t block_size, // in: size of block in bytes
	1981	const void *in_pattern, // in: pattern to search for
	1982	size_t pattern_size, // in: size of pattern block
	1983	size_t *shift, // in/out: shift table (search buffer)
	1984	BOOL repeat_find) // in/out: if TRUE, shift is already initialized
[8]	1985	{
[13]	1986	size_t byte_nbr, // Distance through block
	1987	match_size; // Size of matched part
[8]	1988	const unsigned char
[13]	1989	*match_base = NULL, // Base of match of pattern
	1990	*match_ptr = NULL, // Point within current match
	1991	*limit = NULL; // Last potiental match point
[8]	1992	const unsigned char
[13]	1993	block = (unsigned char ) in_block, // Concrete pointer to block data
	1994	pattern = (unsigned char ) in_pattern; // Concrete pointer to search value
[8]	1995
[13]	1996	if ( (block == NULL)
	1997	\|\| (pattern == NULL)
	1998	\|\| (shift == NULL)
	1999	)
[169]	2000	return NULL;
[8]	2001
	2002	// Pattern must be smaller or equal in size to string
	2003	if (block_size < pattern_size)
[169]	2004	return NULL; // Otherwise it's not found
[8]	2005
	2006	if (pattern_size == 0) // Empty patterns match at start
[238]	2007	return (void*)block;
[8]	2008
	2009	// Build the shift table unless we're continuing a previous search
	2010
	2011	// The shift table determines how far to shift before trying to match
	2012	// again, if a match at this point fails. If the byte after where the
	2013	// end of our pattern falls is not in our pattern, then we start to
	2014	// match again after that byte; otherwise we line up the last occurence
	2015	// of that byte in our pattern under that byte, and try match again.
	2016
	2017	if (!repeat_find \|\| !*repeat_find)
[13]	2018	{
	2019	for (byte_nbr = 0;
	2020	byte_nbr < 256;
	2021	byte_nbr++)
	2022	shift[byte_nbr] = pattern_size + 1;
	2023	for (byte_nbr = 0;
	2024	byte_nbr < pattern_size;
	2025	byte_nbr++)
	2026	shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
[8]	2027
	2028	if (repeat_find)
	2029	*repeat_find = TRUE;
[13]	2030	}
[8]	2031
	2032	// Search for the block, each time jumping up by the amount
	2033	// computed in the shift table
	2034
	2035	limit = block + (block_size - pattern_size + 1);
	2036	ASSERT (limit > block);
	2037
	2038	for (match_base = block;
	2039	match_base < limit;
[13]	2040	match_base += shift[*(match_base + pattern_size)])
	2041	{
[8]	2042	match_ptr = match_base;
	2043	match_size = 0;
	2044
	2045	// Compare pattern until it all matches, or we find a difference
[13]	2046	while (*match_ptr++ == pattern[match_size++])
	2047	{
[8]	2048	ASSERT (match_size <= pattern_size &&
	2049	match_ptr == (match_base + match_size));
	2050
[13]	2051	// If we found a match, return the start address
[8]	2052	if (match_size >= pattern_size)
[238]	2053	return (void*)match_base;
[8]	2054
[13]	2055	}
	2056	}
[169]	2057	return NULL; // Found nothing
[8]	2058	}
	2059
	2060	/*
	2061	*@@ strhtxtfind:
	2062	* searches for a case-insensitive text pattern in a string
	2063	* using the Boyer-Moore-Horspool-Sunday algorithm. The string and
	2064	* pattern are null-terminated strings. Returns a pointer to the pattern
	2065	* if found within the string, or NULL if the pattern was not found.
	2066	* Will match strings irrespective of case. To match exact strings, use
	2067	* strhfind(). Will not work on multibyte characters.
	2068	*
	2069	* Examples:
	2070	+ char *result;
	2071	+
	2072	+ result = strhtxtfind ("AbracaDabra", "cad");
	2073	+ if (result)
	2074	+ puts (result);
	2075	+
	2076	* Taken from the "Standard Function Library", file sflfind.c.
	2077	* Copyright: Copyright (c) 1991-99 iMatix Corporation.
	2078	* Slightly modified.
	2079	*
	2080	*@@added V0.9.3 (2000-05-08) [umoeller]
	2081	*/
	2082
	2083	char* strhtxtfind (const char *string, // String containing data
	2084	const char *pattern) // Pattern to search for
	2085	{
	2086	size_t
	2087	shift [256]; // Shift distance for each value
	2088	size_t
	2089	string_size,
	2090	pattern_size,
	2091	byte_nbr, // Index into byte array
	2092	match_size; // Size of matched part
	2093	const char
	2094	*match_base = NULL, // Base of match of pattern
	2095	*match_ptr = NULL, // Point within current match
	2096	*limit = NULL; // Last potiental match point
	2097
	2098	ASSERT (string); // Expect non-NULL pointers, but
	2099	ASSERT (pattern); // fail gracefully if not debugging
	2100	if (string == NULL \|\| pattern == NULL)
[169]	2101	return NULL;
[8]	2102
	2103	string_size = strlen (string);
	2104	pattern_size = strlen (pattern);
	2105
	2106	// Pattern must be smaller or equal in size to string
	2107	if (string_size < pattern_size)
[169]	2108	return NULL; // Otherwise it cannot be found
[8]	2109
	2110	if (pattern_size == 0) // Empty string matches at start
[238]	2111	return (char*)string;
[8]	2112
	2113	// Build the shift table
	2114
	2115	// The shift table determines how far to shift before trying to match
	2116	// again, if a match at this point fails. If the byte after where the
	2117	// end of our pattern falls is not in our pattern, then we start to
	2118	// match again after that byte; otherwise we line up the last occurence
	2119	// of that byte in our pattern under that byte, and try match again.
	2120
	2121	for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
	2122	shift [byte_nbr] = pattern_size + 1;
	2123
	2124	for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
	2125	shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
	2126
	2127	// Search for the string. If we don't find a match, move up by the
	2128	// amount we computed in the shift table above, to find location of
	2129	// the next potiental match.
	2130
	2131	limit = string + (string_size - pattern_size + 1);
	2132	ASSERT (limit > string);
	2133
	2134	for (match_base = string;
	2135	match_base < limit;
	2136	match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
	2137	{
	2138	match_ptr = match_base;
	2139	match_size = 0;
	2140
	2141	// Compare pattern until it all matches, or we find a difference
	2142	while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
	2143	{
	2144	ASSERT (match_size <= pattern_size &&
	2145	match_ptr == (match_base + match_size));
	2146
	2147	// If we found a match, return the start address
	2148	if (match_size >= pattern_size)
[238]	2149	return (char*)match_base;
[8]	2150	}
	2151	}
[245]	2152
[169]	2153	return NULL; // Found nothing
[8]	2154	}
	2155
[245]	2156

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/helpers/stringh.c

Download in other formats: