source: trunk/src/helpers/stringh.c@ 119

Last change on this file since 119 was 116, checked in by umoeller, 24 years ago

More updates.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 74.4 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#define DONT_REPLACE_STRINGH_MALLOC
55#include "helpers\stringh.h"
56#include "helpers\xstring.h" // extended string helpers
57
58#pragma hdrstop
59
60/*
61 *@@category: Helpers\C helpers\String management
62 * See stringh.c and xstring.c.
63 */
64
65/*
66 *@@category: Helpers\C helpers\String management\C string helpers
67 * See stringh.c.
68 */
69
70/*
71 *@@ strhcpy:
72 * like strdup, but this one doesn't crash if string2 is NULL,
73 * but sets the first byte in string1 to \0 instead.
74 *
75 *@@added V0.9.14 (2001-08-01) [umoeller]
76 */
77
78PSZ strhcpy(PSZ string1, const char *string2)
79{
80 if (string2)
81 return (strcpy(string1, string2));
82
83 *string1 = '\0';
84 return (string1);
85}
86
87#ifdef __DEBUG_MALLOC_ENABLED__
88
89/*
90 *@@ strhdup:
91 * memory debug version of strhdup.
92 *
93 *@@added V0.9.0 [umoeller]
94 */
95
96PSZ strhdupDebug(const char *pszSource,
97 unsigned long *pulLength,
98 const char *pcszSourceFile,
99 unsigned long ulLine,
100 const char *pcszFunction)
101{
102 PSZ pszReturn = NULL;
103 ULONG ulLength = 0;
104
105 if ( (pcszSource)
106 && (ulLength = strlen(pcszSource))
107 )
108 {
109 if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
110 pcszSourceFile,
111 ulLine,
112 pcszFunction))
113 memcpy(pszReturn, pcszSource, ulLength + 1);
114 }
115
116 if (pulLength)
117 *pulLength = ulLength;
118
119 return (pszReturn);
120}
121
122#endif // __DEBUG_MALLOC_ENABLED__
123
124/*
125 *@@ strhdup:
126 * like strdup, but this one doesn't crash if pszSource
127 * is NULL, but returns NULL also. In addition, this
128 * can report the length of the string (V0.9.16).
129 *
130 *@@added V0.9.0 [umoeller]
131 *@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
132 */
133
134PSZ strhdup(const char *pcszSource,
135 unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
136{
137 PSZ pszReturn = NULL;
138 ULONG ulLength = 0;
139
140 if ( (pcszSource)
141 && (ulLength = strlen(pcszSource))
142 )
143 {
144 if (pszReturn = (PSZ)malloc(ulLength + 1))
145 memcpy(pszReturn, pcszSource, ulLength + 1);
146 }
147
148 if (pulLength)
149 *pulLength = ulLength;
150
151 return (pszReturn);
152}
153
154/*
155 *@@ strhcmp:
156 * better strcmp. This doesn't crash if any of the
157 * string pointers are NULL, but returns a proper
158 * value then.
159 *
160 * Besides, this is guaranteed to only return -1, 0,
161 * or +1, while strcmp can return any positive or
162 * negative value. This is useful for tree comparison
163 * funcs.
164 *
165 *@@added V0.9.9 (2001-02-16) [umoeller]
166 */
167
168int strhcmp(const char *p1, const char *p2)
169{
170 if (p1 && p2)
171 {
172 int i = strcmp(p1, p2);
173 if (i < 0) return (-1);
174 if (i > 0) return (+1);
175 }
176 else if (p1)
177 // but p2 is NULL: p1 greater than p2 then
178 return (+1);
179 else if (p2)
180 // but p1 is NULL: p1 less than p2 then
181 return (-1);
182
183 // return 0 if strcmp returned 0 above or both strings are NULL
184 return (0);
185}
186
187/*
188 *@@ strhicmp:
189 * like strhcmp, but compares without respect
190 * to case.
191 *
192 *@@added V0.9.9 (2001-04-07) [umoeller]
193 */
194
195int strhicmp(const char *p1, const char *p2)
196{
197 if (p1 && p2)
198 {
199 int i = stricmp(p1, p2);
200 if (i < 0) return (-1);
201 if (i > 0) return (+1);
202 }
203 else if (p1)
204 // but p2 is NULL: p1 greater than p2 then
205 return (+1);
206 else if (p2)
207 // but p1 is NULL: p1 less than p2 then
208 return (-1);
209
210 // return 0 if strcmp returned 0 above or both strings are NULL
211 return (0);
212}
213
214/*
215 *@@ strhistr:
216 * like strstr, but case-insensitive.
217 *
218 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
219 */
220
221PSZ strhistr(const char *string1, const char *string2)
222{
223 PSZ prc = NULL;
224
225 if ((string1) && (string2))
226 {
227 PSZ pszSrchIn = strdup(string1);
228 PSZ pszSrchFor = strdup(string2);
229
230 if ((pszSrchIn) && (pszSrchFor))
231 {
232 strupr(pszSrchIn);
233 strupr(pszSrchFor);
234
235 prc = strstr(pszSrchIn, pszSrchFor);
236 if (prc)
237 {
238 // prc now has the first occurence of the string,
239 // but in pszSrchIn; we need to map this
240 // return value to the original string
241 prc = (prc-pszSrchIn) // offset in pszSrchIn
242 + (PSZ)string1;
243 }
244 }
245 if (pszSrchFor)
246 free(pszSrchFor);
247 if (pszSrchIn)
248 free(pszSrchIn);
249 }
250 return (prc);
251}
252
253/*
254 *@@ strhncpy0:
255 * like strncpy, but always appends a 0 character.
256 */
257
258ULONG strhncpy0(PSZ pszTarget,
259 const char *pszSource,
260 ULONG cbSource)
261{
262 ULONG ul = 0;
263 PSZ pTarget = pszTarget,
264 pSource = (PSZ)pszSource;
265
266 for (ul = 0; ul < cbSource; ul++)
267 if (*pSource)
268 *pTarget++ = *pSource++;
269 else
270 break;
271 *pTarget = 0;
272
273 return (ul);
274}
275
276/*
277 * strhCount:
278 * this counts the occurences of c in pszSearch.
279 */
280
281ULONG strhCount(const char *pszSearch,
282 CHAR c)
283{
284 PSZ p = (PSZ)pszSearch;
285 ULONG ulCount = 0;
286 while (TRUE)
287 {
288 p = strchr(p, c);
289 if (p)
290 {
291 ulCount++;
292 p++;
293 }
294 else
295 break;
296 }
297 return (ulCount);
298}
299
300/*
301 *@@ strhIsDecimal:
302 * returns TRUE if psz consists of decimal digits only.
303 */
304
305BOOL strhIsDecimal(PSZ psz)
306{
307 PSZ p = psz;
308 while (*p != 0)
309 {
310 if (isdigit(*p) == 0)
311 return (FALSE);
312 p++;
313 }
314
315 return (TRUE);
316}
317
318#ifdef __DEBUG_MALLOC_ENABLED__
319
320/*
321 *@@ strhSubstrDebug:
322 * memory debug version of strhSubstr.
323 *
324 *@@added V0.9.14 (2001-08-01) [umoeller]
325 */
326
327PSZ strhSubstrDebug(const char *pBegin, // in: first char
328 const char *pEnd, // in: last char (not included)
329 const char *pcszSourceFile,
330 unsigned long ulLine,
331 const char *pcszFunction)
332{
333 PSZ pszSubstr = NULL;
334
335 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
336 {
337 ULONG cbSubstr = (pEnd - pBegin);
338 if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
339 pcszSourceFile,
340 ulLine,
341 pcszFunction))
342 {
343 // strhncpy0(pszSubstr, pBegin, cbSubstr);
344 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
345 *(pszSubstr + cbSubstr) = '\0';
346 }
347 }
348
349 return (pszSubstr);
350}
351
352#endif // __DEBUG_MALLOC_ENABLED__
353
354/*
355 *@@ strhSubstr:
356 * this creates a new PSZ containing the string
357 * from pBegin to pEnd, excluding the pEnd character.
358 * The new string is null-terminated. The caller
359 * must free() the new string after use.
360 *
361 * Example:
362 + "1234567890"
363 + ^ ^
364 + p1 p2
365 + strhSubstr(p1, p2)
366 * would return a new string containing "2345678".
367 *
368 *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
369 *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
370 */
371
372PSZ strhSubstr(const char *pBegin, // in: first char
373 const char *pEnd) // in: last char (not included)
374{
375 PSZ pszSubstr = NULL;
376
377 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
378 {
379 ULONG cbSubstr = (pEnd - pBegin);
380 if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
381 {
382 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
383 *(pszSubstr + cbSubstr) = '\0';
384 }
385 }
386
387 return (pszSubstr);
388}
389
390/*
391 *@@ strhExtract:
392 * searches pszBuf for the cOpen character and returns
393 * the data in between cOpen and cClose, excluding
394 * those two characters, in a newly allocated buffer
395 * which you must free() afterwards.
396 *
397 * Spaces and newlines/linefeeds are skipped.
398 *
399 * If the search was successful, the new buffer
400 * is returned and, if (ppEnd != NULL), *ppEnd points
401 * to the first character after the cClose character
402 * found in the buffer.
403 *
404 * If the search was not successful, NULL is
405 * returned, and *ppEnd is unchanged.
406 *
407 * If another cOpen character is found before
408 * cClose, matching cClose characters will be skipped.
409 * You can therefore nest the cOpen and cClose
410 * characters.
411 *
412 * This function ignores cOpen and cClose characters
413 * in C-style comments and strings surrounded by
414 * double quotes.
415 *
416 * Example:
417 + PSZ pszBuf = "KEYWORD { --blah-- } next",
418 + pEnd;
419 + strhExtract(pszBuf,
420 + '{', '}',
421 + &pEnd)
422 * would return a new buffer containing " --blah-- ",
423 * and ppEnd would afterwards point to the space
424 * before "next" in the static buffer.
425 *
426 *@@added V0.9.0 [umoeller]
427 */
428
429PSZ strhExtract(PSZ pszBuf, // in: search buffer
430 CHAR cOpen, // in: opening char
431 CHAR cClose, // in: closing char
432 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
433{
434 PSZ pszReturn = NULL;
435
436 if (pszBuf)
437 {
438 PSZ pOpen = strchr(pszBuf, cOpen);
439 if (pOpen)
440 {
441 // opening char found:
442 // now go thru the whole rest of the buffer
443 PSZ p = pOpen+1;
444 LONG lLevel = 1; // if this goes 0, we're done
445 while (*p)
446 {
447 if (*p == cOpen)
448 lLevel++;
449 else if (*p == cClose)
450 {
451 lLevel--;
452 if (lLevel <= 0)
453 {
454 // matching closing bracket found:
455 // extract string
456 pszReturn = strhSubstr(pOpen+1, // after cOpen
457 p); // excluding cClose
458 if (ppEnd)
459 *ppEnd = p+1;
460 break; // while (*p)
461 }
462 }
463 else if (*p == '\"')
464 {
465 // beginning of string:
466 PSZ p2 = p+1;
467 // find end of string
468 while ((*p2) && (*p2 != '\"'))
469 p2++;
470
471 if (*p2 == '\"')
472 // closing quote found:
473 // search on after that
474 p = p2; // raised below
475 else
476 break; // while (*p)
477 }
478
479 p++;
480 }
481 }
482 }
483
484 return (pszReturn);
485}
486
487/*
488 *@@ strhQuote:
489 * similar to strhExtract, except that
490 * opening and closing chars are the same,
491 * and therefore no nesting is possible.
492 * Useful for extracting stuff between
493 * quotes.
494 *
495 *@@added V0.9.0 [umoeller]
496 */
497
498PSZ strhQuote(PSZ pszBuf,
499 CHAR cQuote,
500 PSZ *ppEnd)
501{
502 PSZ pszReturn = NULL,
503 p1 = NULL;
504 if ((p1 = strchr(pszBuf, cQuote)))
505 {
506 PSZ p2 = strchr(p1+1, cQuote);
507 if (p2)
508 {
509 pszReturn = strhSubstr(p1+1, p2);
510 if (ppEnd)
511 // store closing char
512 *ppEnd = p2 + 1;
513 }
514 }
515
516 return (pszReturn);
517}
518
519/*
520 *@@ strhStrip:
521 * removes all double spaces.
522 * This copies within the "psz" buffer.
523 * If any double spaces are found, the
524 * string will be shorter than before,
525 * but the buffer is _not_ reallocated,
526 * so there will be unused bytes at the
527 * end.
528 *
529 * Returns the number of spaces removed.
530 *
531 *@@added V0.9.0 [umoeller]
532 */
533
534ULONG strhStrip(PSZ psz) // in/out: string
535{
536 PSZ p;
537 ULONG cb = strlen(psz),
538 ulrc = 0;
539
540 for (p = psz; p < psz+cb; p++)
541 {
542 if ((*p == ' ') && (*(p+1) == ' '))
543 {
544 PSZ p2 = p;
545 while (*p2)
546 {
547 *p2 = *(p2+1);
548 p2++;
549 }
550 cb--;
551 p--;
552 ulrc++;
553 }
554 }
555 return (ulrc);
556}
557
558/*
559 *@@ strhins:
560 * this inserts one string into another.
561 *
562 * pszInsert is inserted into pszBuffer at offset
563 * ulInsertOfs (which counts from 0).
564 *
565 * A newly allocated string is returned. pszBuffer is
566 * not changed. The new string should be free()'d after
567 * use.
568 *
569 * Upon errors, NULL is returned.
570 *
571 *@@changed V0.9.0 [umoeller]: completely rewritten.
572 */
573
574PSZ strhins(const char *pcszBuffer,
575 ULONG ulInsertOfs,
576 const char *pcszInsert)
577{
578 PSZ pszNew = NULL;
579
580 if ((pcszBuffer) && (pcszInsert))
581 {
582 do {
583 ULONG cbBuffer = strlen(pcszBuffer);
584 ULONG cbInsert = strlen(pcszInsert);
585
586 // check string length
587 if (ulInsertOfs > cbBuffer + 1)
588 break; // do
589
590 // OK, let's go.
591 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
592
593 // copy stuff before pInsertPos
594 memcpy(pszNew,
595 pcszBuffer,
596 ulInsertOfs);
597 // copy string to be inserted
598 memcpy(pszNew + ulInsertOfs,
599 pcszInsert,
600 cbInsert);
601 // copy stuff after pInsertPos
602 strcpy(pszNew + ulInsertOfs + cbInsert,
603 pcszBuffer + ulInsertOfs);
604 } while (FALSE);
605 }
606
607 return (pszNew);
608}
609
610/*
611 *@@ strhFindReplace:
612 * wrapper around xstrFindReplace to work with C strings.
613 * Note that *ppszBuf can get reallocated and must
614 * be free()'able.
615 *
616 * Repetitive use of this wrapper is not recommended
617 * because it is considerably slower than xstrFindReplace.
618 *
619 *@@added V0.9.6 (2000-11-01) [umoeller]
620 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
621 */
622
623ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
624 PULONG pulOfs, // in: where to begin search (0 = start);
625 // out: ofs of first char after replacement string
626 const char *pcszSearch, // in: search string; cannot be NULL
627 const char *pcszReplace) // in: replacement string; cannot be NULL
628{
629 ULONG ulrc = 0;
630 XSTRING xstrBuf,
631 xstrFind,
632 xstrReplace;
633 size_t ShiftTable[256];
634 BOOL fRepeat = FALSE;
635 xstrInitSet(&xstrBuf, *ppszBuf);
636 // reallocated and returned, so we're safe
637 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
638 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
639 // these two are never freed, so we're safe too
640
641 if ((ulrc = xstrFindReplace(&xstrBuf,
642 pulOfs,
643 &xstrFind,
644 &xstrReplace,
645 ShiftTable,
646 &fRepeat)))
647 // replaced:
648 *ppszBuf = xstrBuf.psz;
649
650 return (ulrc);
651}
652
653/*
654 * strhWords:
655 * returns the no. of words in "psz".
656 * A string is considered a "word" if
657 * it is surrounded by spaces only.
658 *
659 *@@added V0.9.0 [umoeller]
660 */
661
662ULONG strhWords(PSZ psz)
663{
664 PSZ p;
665 ULONG cb = strlen(psz),
666 ulWords = 0;
667 if (cb > 1)
668 {
669 ulWords = 1;
670 for (p = psz; p < psz+cb; p++)
671 if (*p == ' ')
672 ulWords++;
673 }
674 return (ulWords);
675}
676
677/*
678 *@@ strhGetWord:
679 * finds word boundaries.
680 *
681 * *ppszStart is used as the beginning of the
682 * search.
683 *
684 * If a word is found, *ppszStart is set to
685 * the first character of the word which was
686 * found and *ppszEnd receives the address
687 * of the first character _after_ the word,
688 * which is probably a space or a \n or \r char.
689 * We then return TRUE.
690 *
691 * The search is stopped if a null character
692 * is found or pLimit is reached. In that case,
693 * FALSE is returned.
694 *
695 *@@added V0.9.1 (2000-02-13) [umoeller]
696 */
697
698BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
699 // out: start of word (if TRUE is returned)
700 const char *pLimit, // in: ptr to last char after *ppszStart to be
701 // searched; if the word does not end before
702 // or with this char, FALSE is returned
703 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
704 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
705 PSZ *ppszEnd) // out: first char _after_ word
706 // (if TRUE is returned)
707{
708 // characters after which a word can be started
709 // const char *pcszBeginChars = "\x0d\x0a ";
710 // const char *pcszEndChars = "\x0d\x0a /-";
711
712 PSZ pStart = *ppszStart;
713
714 // find start of word
715 while ( (pStart < (PSZ)pLimit)
716 && (strchr(pcszBeginChars, *pStart))
717 )
718 // if char is a "before word" char: go for next
719 pStart++;
720
721 if (pStart < (PSZ)pLimit)
722 {
723 // found a valid "word start" character
724 // (which is not in pcszBeginChars):
725
726 // find end of word
727 PSZ pEndOfWord = pStart;
728 while ( (pEndOfWord <= (PSZ)pLimit)
729 && (strchr(pcszEndChars, *pEndOfWord) == 0)
730 )
731 // if char is not an "end word" char: go for next
732 pEndOfWord++;
733
734 if (pEndOfWord <= (PSZ)pLimit)
735 {
736 // whoa, got a word:
737 *ppszStart = pStart;
738 *ppszEnd = pEndOfWord;
739 return (TRUE);
740 }
741 }
742
743 return (FALSE);
744}
745
746/*
747 *@@ strhIsWord:
748 * returns TRUE if p points to a "word"
749 * in pcszBuf.
750 *
751 * p is considered a word if the character _before_
752 * it is in pcszBeginChars and the char _after_
753 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
754 *
755 *@@added V0.9.6 (2000-11-12) [umoeller]
756 */
757
758BOOL strhIsWord(const char *pcszBuf,
759 const char *p, // in: start of word
760 ULONG cbSearch, // in: length of word
761 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
762 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
763{
764 BOOL fEndOK = FALSE;
765
766 // check previous char
767 if ( (p == pcszBuf)
768 || (strchr(pcszBeginChars, *(p-1)))
769 )
770 {
771 // OK, valid begin char:
772 // check end char
773 CHAR cNextChar = *(p + cbSearch);
774 if (cNextChar == 0)
775 fEndOK = TRUE;
776 else
777 {
778 char *pc = strchr(pcszEndChars, cNextChar);
779 if (pc)
780 // OK, is end char: avoid doubles of that char,
781 // but allow spaces
782 if ( (cNextChar+1 != *pc)
783 || (cNextChar+1 == ' ')
784 || (cNextChar+1 == 0)
785 )
786 fEndOK = TRUE;
787 }
788 }
789
790 return (fEndOK);
791}
792
793/*
794 *@@ strhFindWord:
795 * searches for pszSearch in pszBuf, which is
796 * returned if found (or NULL if not).
797 *
798 * As opposed to strstr, this finds pszSearch
799 * only if it is a "word". A search string is
800 * considered a word if the character _before_
801 * it is in pcszBeginChars and the char _after_
802 * it is in pcszEndChars.
803 *
804 * Example:
805 + strhFindWord("This is an example.", "is");
806 + returns ...........^ this, but not the "is" in "This".
807 *
808 * The algorithm here uses strstr to find pszSearch in pszBuf
809 * and performs additional "is-word" checks for each item found
810 * (by calling strhIsWord).
811 *
812 * Note that this function is fairly slow compared to xstrFindWord.
813 *
814 *@@added V0.9.0 (99-11-08) [umoeller]
815 *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
816 */
817
818PSZ strhFindWord(const char *pszBuf,
819 const char *pszSearch,
820 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
821 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
822{
823 PSZ pszReturn = 0;
824 ULONG cbBuf = strlen(pszBuf),
825 cbSearch = strlen(pszSearch);
826
827 if ((cbBuf) && (cbSearch))
828 {
829 const char *p = pszBuf;
830
831 do // while p
832 {
833 p = strstr(p, pszSearch);
834 if (p)
835 {
836 // string found:
837 // check if that's a word
838
839 if (strhIsWord(pszBuf,
840 p,
841 cbSearch,
842 pcszBeginChars,
843 pcszEndChars))
844 {
845 // valid end char:
846 pszReturn = (PSZ)p;
847 break;
848 }
849
850 p += cbSearch;
851 }
852 } while (p);
853
854 }
855 return (pszReturn);
856}
857
858/*
859 *@@ strhFindEOL:
860 * returns a pointer to the next \r, \n or null character
861 * following pszSearchIn. Stores the offset in *pulOffset.
862 *
863 * This should never return NULL because at some point,
864 * there will be a null byte in your string.
865 *
866 *@@added V0.9.4 (2000-07-01) [umoeller]
867 */
868
869PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
870 PULONG pulOffset) // out: offset (ptr can be NULL)
871{
872 const char *p = pcszSearchIn,
873 *prc = 0;
874 while (TRUE)
875 {
876 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
877 {
878 prc = p;
879 break;
880 }
881 p++;
882 }
883
884 if ((pulOffset) && (prc))
885 *pulOffset = prc - pcszSearchIn;
886
887 return ((PSZ)prc);
888}
889
890/*
891 *@@ strhFindNextLine:
892 * like strhFindEOL, but this returns the character
893 * _after_ \r or \n. Note that this might return
894 * a pointer to terminating NULL character also.
895 */
896
897PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
898{
899 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
900 // pEOL now points to the \r char or the terminating 0 byte;
901 // if not null byte, advance pointer
902 PSZ pNextLine = pEOL;
903 if (*pNextLine == '\r')
904 pNextLine++;
905 if (*pNextLine == '\n')
906 pNextLine++;
907 if (pulOffset)
908 *pulOffset = pNextLine - pszSearchIn;
909 return (pNextLine);
910}
911
912/*
913 *@@ strhBeautifyTitle:
914 * replaces all line breaks (0xd, 0xa) with spaces.
915 *
916 *@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
917 */
918
919BOOL strhBeautifyTitle(PSZ psz)
920{
921 BOOL rc = FALSE;
922 CHAR *p = psz;
923
924 while(*p)
925 if ( (*p == '\r')
926 || (*p == '\n')
927 )
928 {
929 rc = TRUE;
930 if ( (p != psz)
931 && (p[-1] == ' ')
932 )
933 memmove(p, p + 1, strlen(p));
934 else
935 *p++ = ' ';
936 }
937 else
938 p++;
939
940 return (rc);
941}
942
943/*
944 * strhFindAttribValue:
945 * searches for pszAttrib in pszSearchIn; if found,
946 * returns the first character after the "=" char.
947 * If "=" is not found, a space, \r, and \n are
948 * also accepted. This function searches without
949 * respecting case.
950 *
951 * <B>Example:</B>
952 + strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
953 +
954 + returns ....................... ^ this address.
955 *
956 *@@added V0.9.0 [umoeller]
957 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
958 *@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
959 */
960
961PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
962{
963 PSZ prc = 0;
964 PSZ pszSearchIn2, p;
965 ULONG cbAttrib = strlen(pszAttrib),
966 ulLength = strlen(pszSearchIn);
967
968 // use alloca(), so memory is freed on function exit
969 pszSearchIn2 = (PSZ)alloca(ulLength + 1);
970 memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
971
972 // 1) find token, (space char, \n, \r, \t)
973 p = strtok(pszSearchIn2, " \n\r\t");
974 while (p)
975 {
976 CHAR c2;
977 PSZ pOrig;
978
979 // check tag name
980 if (!strnicmp(p, pszAttrib, cbAttrib))
981 {
982 // position in original string
983 pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
984
985 // yes:
986 prc = pOrig + cbAttrib;
987 c2 = *prc;
988 while ( ( (c2 == ' ')
989 || (c2 == '=')
990 || (c2 == '\n')
991 || (c2 == '\r')
992 )
993 && (c2 != 0)
994 )
995 c2 = *++prc;
996
997 break;
998 }
999
1000 p = strtok(NULL, " \n\r\t");
1001 }
1002
1003 return (prc);
1004}
1005
1006/* PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1007{
1008 PSZ prc = 0;
1009 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1010 p,
1011 p2;
1012 ULONG cbAttrib = strlen(pszAttrib);
1013
1014 // 1) find space char
1015 while ((p = strchr(pszSearchIn2, ' ')))
1016 {
1017 CHAR c;
1018 p++;
1019 if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1020 {
1021 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1022 // now check whether the p+strlen(pszAttrib)
1023 // is a valid end-of-tag character
1024 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1025 && ( (c == ' ')
1026 || (c == '>')
1027 || (c == '=')
1028 || (c == '\r')
1029 || (c == '\n')
1030 || (c == 0)
1031 )
1032 )
1033 {
1034 // yes:
1035 CHAR c2;
1036 p2 = p + cbAttrib;
1037 c2 = *p2;
1038 while ( ( (c2 == ' ')
1039 || (c2 == '=')
1040 || (c2 == '\n')
1041 || (c2 == '\r')
1042 )
1043 && (c2 != 0)
1044 )
1045 c2 = *++p2;
1046
1047 prc = p2;
1048 break; // first while
1049 }
1050 }
1051 else
1052 break;
1053
1054 pszSearchIn2++;
1055 }
1056 return (prc);
1057} */
1058
1059/*
1060 * strhGetNumAttribValue:
1061 * stores the numerical parameter value of an HTML-style
1062 * tag in *pl.
1063 *
1064 * Returns the address of the tag parameter in the
1065 * search buffer, if found, or NULL.
1066 *
1067 * <B>Example:</B>
1068 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1069 *
1070 * stores 123 in the "l" variable.
1071 *
1072 *@@added V0.9.0 [umoeller]
1073 *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1074 */
1075
1076PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1077 const char *pszTag, // e.g. "INDEX"
1078 PLONG pl) // out: numerical value
1079{
1080 PSZ pParam;
1081 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1082 {
1083 if ( (*pParam == '\"')
1084 || (*pParam == '\'')
1085 )
1086 pParam++; // V0.9.9 (2001-04-04) [umoeller]
1087
1088 sscanf(pParam, "%ld", pl);
1089 }
1090
1091 return (pParam);
1092}
1093
1094/*
1095 * strhGetTextAttr:
1096 * retrieves the attribute value of a textual HTML-style tag
1097 * in a newly allocated buffer, which is returned,
1098 * or NULL if attribute not found.
1099 * If an attribute value is to contain spaces, it
1100 * must be enclosed in quotes.
1101 *
1102 * The offset of the attribute data in pszSearchIn is
1103 * returned in *pulOffset so that you can do multiple
1104 * searches.
1105 *
1106 * This returns a new buffer, which should be free()'d after use.
1107 *
1108 * <B>Example:</B>
1109 + ULONG ulOfs = 0;
1110 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1111 + ............^ ulOfs
1112 *
1113 * returns a new string with the value "blublub" (without
1114 * quotes) and sets ulOfs to 12.
1115 *
1116 *@@added V0.9.0 [umoeller]
1117 */
1118
1119PSZ strhGetTextAttr(const char *pszSearchIn,
1120 const char *pszTag,
1121 PULONG pulOffset) // out: offset where found
1122{
1123 PSZ pParam,
1124 pParam2,
1125 prc = NULL;
1126 ULONG ulCount = 0;
1127 LONG lNestingLevel = 0;
1128
1129 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1130 {
1131 // determine end character to search for: a space
1132 CHAR cEnd = ' ';
1133 if (*pParam == '\"')
1134 {
1135 // or, if the data is enclosed in quotes, a quote
1136 cEnd = '\"';
1137 pParam++;
1138 }
1139
1140 if (pulOffset)
1141 // store the offset
1142 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1143
1144 // now find end of attribute
1145 pParam2 = pParam;
1146 while (*pParam)
1147 {
1148 if (*pParam == cEnd)
1149 // end character found
1150 break;
1151 else if (*pParam == '<')
1152 // yet another opening tag found:
1153 // this is probably some "<" in the attributes
1154 lNestingLevel++;
1155 else if (*pParam == '>')
1156 {
1157 lNestingLevel--;
1158 if (lNestingLevel < 0)
1159 // end of tag found:
1160 break;
1161 }
1162 ulCount++;
1163 pParam++;
1164 }
1165
1166 // copy attribute to new buffer
1167 if (ulCount)
1168 {
1169 prc = (PSZ)malloc(ulCount+1);
1170 memcpy(prc, pParam2, ulCount);
1171 *(prc+ulCount) = 0;
1172 }
1173 }
1174 return (prc);
1175}
1176
1177/*
1178 * strhFindEndOfTag:
1179 * returns a pointer to the ">" char
1180 * which seems to terminate the tag beginning
1181 * after pszBeginOfTag.
1182 *
1183 * If additional "<" chars are found, we look
1184 * for additional ">" characters too.
1185 *
1186 * Note: You must pass the address of the opening
1187 * '<' character to this function.
1188 *
1189 * Example:
1190 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1191 + strhFindEndOfTag(pszTest)
1192 + returns.................................^ this.
1193 *
1194 *@@added V0.9.0 [umoeller]
1195 */
1196
1197PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1198{
1199 PSZ p = (PSZ)pszBeginOfTag,
1200 prc = NULL;
1201 LONG lNestingLevel = 0;
1202
1203 while (*p)
1204 {
1205 if (*p == '<')
1206 // another opening tag found:
1207 lNestingLevel++;
1208 else if (*p == '>')
1209 {
1210 // closing tag found:
1211 lNestingLevel--;
1212 if (lNestingLevel < 1)
1213 {
1214 // corresponding: return this
1215 prc = p;
1216 break;
1217 }
1218 }
1219 p++;
1220 }
1221
1222 return (prc);
1223}
1224
1225/*
1226 * strhGetBlock:
1227 * this complex function searches the given string
1228 * for a pair of opening/closing HTML-style tags.
1229 *
1230 * If found, this routine returns TRUE and does
1231 * the following:
1232 *
1233 * 1) allocate a new buffer, copy the text
1234 * enclosed by the opening/closing tags
1235 * into it and set *ppszBlock to that
1236 * buffer;
1237 *
1238 * 2) if the opening tag has any attributes,
1239 * allocate another buffer, copy the
1240 * attributes into it and set *ppszAttrs
1241 * to that buffer; if no attributes are
1242 * found, *ppszAttrs will be NULL;
1243 *
1244 * 3) set *pulOffset to the offset from the
1245 * beginning of *ppszSearchIn where the
1246 * opening tag was found;
1247 *
1248 * 4) advance *ppszSearchIn to after the
1249 * closing tag, so that you can do
1250 * multiple searches without finding the
1251 * same tags twice.
1252 *
1253 * All buffers should be freed using free().
1254 *
1255 * This returns the following:
1256 * -- 0: no error
1257 * -- 1: tag not found at all (doesn't have to be an error)
1258 * -- 2: begin tag found, but no corresponding end tag found. This
1259 * is a real error.
1260 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1261 *
1262 * <B>Example:</B>
1263 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1264 + PSZ pszBlock, pszAttrs;
1265 + ULONG ulOfs;
1266 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1267 *
1268 * would do the following:
1269 *
1270 * 1) set pszBlock to a new string containing "This is page 1."
1271 * without quotes;
1272 *
1273 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1274 *
1275 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1276 *
1277 * 4) pSearch would be advanced to point to the "More text"
1278 * string in the original buffer.
1279 *
1280 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1281 * for HTML parsing.
1282 *
1283 *@@added V0.9.0 [umoeller]
1284 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1285 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1286 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1287 */
1288
1289ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1290 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1291 PSZ pszTag,
1292 PSZ *ppszBlock, // out: block enclosed by the tags
1293 PSZ *ppszAttribs, // out: attributes of the opening tag
1294 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1295 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1296{
1297 ULONG ulrc = 1;
1298 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1299 pszSearch2 = pszBeginTag,
1300 pszClosingTag;
1301 ULONG cbTag = strlen(pszTag);
1302
1303 // go thru the block and check all tags if it's the
1304 // begin tag we're looking for
1305 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1306 {
1307 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1308 // yes: stop
1309 break;
1310 else
1311 pszBeginTag++;
1312 }
1313
1314 if (pszBeginTag)
1315 {
1316 // we found <TAG>:
1317 ULONG ulNestingLevel = 0;
1318
1319 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1320 // strchr(pszBeginTag, '>');
1321 if (pszEndOfBeginTag)
1322 {
1323 // does the caller want the attributes?
1324 if (ppszAttribs)
1325 {
1326 // yes: then copy them
1327 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1328 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1329 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1330 // add terminating 0
1331 *(pszAttrs + ulAttrLen) = 0;
1332
1333 *ppszAttribs = pszAttrs;
1334 }
1335
1336 // output offset of where we found the begin tag
1337 if (pulOfsBeginTag)
1338 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1339
1340 // now find corresponding closing tag (e.g. "</BODY>"
1341 pszBeginTag = pszEndOfBeginTag+1;
1342 // now we're behind the '>' char of the opening tag
1343 // increase offset of that too
1344 if (pulOfsBeginBlock)
1345 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1346
1347 // find next closing tag;
1348 // for the first run, pszSearch2 points to right
1349 // after the '>' char of the opening tag
1350 pszSearch2 = pszBeginTag;
1351 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1352 && (pszClosingTag = strstr(pszSearch2, "<"))
1353 )
1354 {
1355 // if we have another opening tag before our closing
1356 // tag, we need to have several closing tags before
1357 // we're done
1358 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1359 ulNestingLevel++;
1360 else
1361 {
1362 // is this ours?
1363 if ( (*(pszClosingTag+1) == '/')
1364 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1365 )
1366 {
1367 // we've found a matching closing tag; is
1368 // it ours?
1369 if (ulNestingLevel == 0)
1370 {
1371 // our closing tag found:
1372 // allocate mem for a new buffer
1373 // and extract all the text between
1374 // open and closing tags to it
1375 ULONG ulLen = pszClosingTag - pszBeginTag;
1376 if (ppszBlock)
1377 {
1378 PSZ pNew = (PSZ)malloc(ulLen + 1);
1379 strhncpy0(pNew, pszBeginTag, ulLen);
1380 *ppszBlock = pNew;
1381 }
1382
1383 // raise search offset to after the closing tag
1384 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1385
1386 ulrc = 0;
1387
1388 break;
1389 } else
1390 // not our closing tag:
1391 ulNestingLevel--;
1392 }
1393 }
1394 // no matching closing tag: search on after that
1395 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1396 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1397
1398 if (!pszClosingTag)
1399 // no matching closing tag found:
1400 // return 2 (closing tag not found)
1401 ulrc = 2;
1402 } // end if (pszBeginTag)
1403 else
1404 // no matching ">" for opening tag found:
1405 ulrc = 3;
1406 }
1407
1408 return (ulrc);
1409}
1410
1411/* ******************************************************************
1412 *
1413 * Miscellaneous
1414 *
1415 ********************************************************************/
1416
1417/*
1418 *@@ strhArrayAppend:
1419 * this appends a string to a "string array".
1420 *
1421 * A string array is considered a sequence of
1422 * zero-terminated strings in memory. That is,
1423 * after each string's null-byte, the next
1424 * string comes up.
1425 *
1426 * This is useful for composing a single block
1427 * of memory from, say, list box entries, which
1428 * can then be written to OS2.INI in one flush.
1429 *
1430 * To append strings to such an array, call this
1431 * function for each string you wish to append.
1432 * This will re-allocate *ppszRoot with each call,
1433 * and update *pcbRoot, which then contains the
1434 * total size of all strings (including all null
1435 * terminators).
1436 *
1437 * Pass *pcbRoot to PrfSaveProfileData to have the
1438 * block saved.
1439 *
1440 * Note: On the first call, *ppszRoot and *pcbRoot
1441 * _must_ be both NULL, or this crashes.
1442 *
1443 *@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1444 */
1445
1446VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1447 const char *pcszNew, // in: string to append
1448 ULONG cbNew, // in: size of that string or 0 to run strlen() here
1449 PULONG pcbRoot) // in/out: size of array
1450{
1451 PSZ pszTemp;
1452
1453 if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1454 cbNew = strlen(pcszNew);
1455
1456 pszTemp = (PSZ)malloc(*pcbRoot
1457 + cbNew
1458 + 1); // two null bytes
1459 if (*ppszRoot)
1460 {
1461 // not first loop: copy old stuff
1462 memcpy(pszTemp,
1463 *ppszRoot,
1464 *pcbRoot);
1465 free(*ppszRoot);
1466 }
1467 // append new string
1468 strcpy(pszTemp + *pcbRoot,
1469 pcszNew);
1470 // update root
1471 *ppszRoot = pszTemp;
1472 // update length
1473 *pcbRoot += cbNew + 1;
1474}
1475
1476/*
1477 *@@ strhCreateDump:
1478 * this dumps a memory block into a string
1479 * and returns that string in a new buffer.
1480 *
1481 * You must free() the returned PSZ after use.
1482 *
1483 * The output looks like the following:
1484 *
1485 + 0000: FE FF 0E 02 90 00 00 00 ........
1486 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1487 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1488 *
1489 * Each line is terminated with a newline (\n)
1490 * character only.
1491 *
1492 *@@added V0.9.1 (2000-01-22) [umoeller]
1493 */
1494
1495PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1496 ULONG ulSize, // in: size of buffer
1497 ULONG ulIndent) // in: indentation of every line
1498{
1499 PSZ pszReturn = 0;
1500 XSTRING strReturn;
1501 CHAR szTemp[1000];
1502
1503 PBYTE pbCurrent = pb; // current byte
1504 ULONG ulCount = 0,
1505 ulCharsInLine = 0; // if this grows > 7, a new line is started
1506 CHAR szLine[400] = "",
1507 szAscii[30] = " "; // ASCII representation; filled for every line
1508 PSZ pszLine = szLine,
1509 pszAscii = szAscii;
1510
1511 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1512
1513 for (pbCurrent = pb;
1514 ulCount < ulSize;
1515 pbCurrent++, ulCount++)
1516 {
1517 if (ulCharsInLine == 0)
1518 {
1519 memset(szLine, ' ', ulIndent);
1520 pszLine += ulIndent;
1521 }
1522 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1523
1524 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1525 // printable character:
1526 *pszAscii = *pbCurrent;
1527 else
1528 *pszAscii = '.';
1529 pszAscii++;
1530
1531 ulCharsInLine++;
1532 if ( (ulCharsInLine > 7) // 8 bytes added?
1533 || (ulCount == ulSize-1) // end of buffer reached?
1534 )
1535 {
1536 // if we haven't had eight bytes yet,
1537 // fill buffer up to eight bytes with spaces
1538 ULONG ul2;
1539 for (ul2 = ulCharsInLine;
1540 ul2 < 8;
1541 ul2++)
1542 pszLine += sprintf(pszLine, " ");
1543
1544 sprintf(szTemp, "%04lX: %s %s\n",
1545 (ulCount & 0xFFFFFFF8), // offset in hex
1546 szLine, // bytes string
1547 szAscii); // ASCII string
1548 xstrcat(&strReturn, szTemp, 0);
1549
1550 // restart line buffer
1551 pszLine = szLine;
1552
1553 // clear ASCII buffer
1554 strcpy(szAscii, " ");
1555 pszAscii = szAscii;
1556
1557 // reset line counter
1558 ulCharsInLine = 0;
1559 }
1560 }
1561
1562 if (strReturn.cbAllocated)
1563 pszReturn = strReturn.psz;
1564
1565 return (pszReturn);
1566}
1567
1568/* ******************************************************************
1569 *
1570 * Wildcard matching
1571 *
1572 ********************************************************************/
1573
1574/*
1575 * The following code has been taken from "fnmatch.zip".
1576 *
1577 * (c) 1994-1996 by Eberhard Mattes.
1578 */
1579
1580/* In OS/2 and DOS styles, both / and \ separate components of a path.
1581 * This macro returns true iff C is a separator. */
1582
1583#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1584
1585
1586/* This macro returns true if C is at the end of a component of a
1587 * path. */
1588
1589#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1590
1591/*
1592 * skip_comp_os2:
1593 * Return a pointer to the next component of the path SRC, for OS/2
1594 * and DOS styles. When the end of the string is reached, a pointer
1595 * to the terminating null character is returned.
1596 *
1597 * (c) 1994-1996 by Eberhard Mattes.
1598 */
1599
1600static const unsigned char* skip_comp_os2(const unsigned char *src)
1601{
1602 /* Skip characters until hitting a separator or the end of the
1603 * string. */
1604
1605 while (!IS_OS2_COMP_END(*src))
1606 ++src;
1607
1608 /* Skip the separator if we hit a separator. */
1609
1610 if (*src != 0)
1611 ++src;
1612 return src;
1613}
1614
1615/*
1616 * has_colon:
1617 * returns true iff the path P contains a colon.
1618 *
1619 * (c) 1994-1996 by Eberhard Mattes.
1620 */
1621
1622static int has_colon(const unsigned char *p)
1623{
1624 while (*p != 0)
1625 if (*p == ':')
1626 return 1;
1627 else
1628 ++p;
1629 return 0;
1630}
1631
1632/*
1633 * match_comp_os2:
1634 * compares a single component (directory name or file name)
1635 * of the paths, for OS/2 and DOS styles. MASK and NAME point
1636 * into a component of the wildcard and the name to be checked,
1637 * respectively. Comparing stops at the next separator.
1638 * The FLAGS argument is the same as that of fnmatch().
1639 *
1640 * HAS_DOT is true if a dot is in the current component of NAME.
1641 * The number of dots is not restricted, even in DOS style.
1642 *
1643 * Returns FNM_MATCH iff MASK and NAME match.
1644 *
1645 * Note that this function is recursive.
1646 *
1647 * (c) 1994-1996 by Eberhard Mattes.
1648 */
1649
1650static int match_comp_os2(const unsigned char *mask,
1651 const unsigned char *name,
1652 unsigned flags,
1653 int has_dot)
1654{
1655 int rc;
1656
1657 for (;;)
1658 switch (*mask)
1659 {
1660 case 0:
1661
1662 /* There must be no extra characters at the end of NAME when
1663 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1664 * in that case, NAME may point to a separator. */
1665
1666 if (*name == 0)
1667 return FNM_MATCH;
1668 if ((flags & FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1669 return FNM_MATCH;
1670 return FNM_NOMATCH;
1671
1672 case '/':
1673 case '\\':
1674
1675 /* Separators match separators. */
1676
1677 if (IS_OS2_COMP_SEP(*name))
1678 return FNM_MATCH;
1679
1680 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1681 * is ignored at the end of NAME. */
1682
1683 if ((flags & FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1684 return FNM_MATCH;
1685
1686 /* Stop comparing at the separator. */
1687
1688 return FNM_NOMATCH;
1689
1690 case '?':
1691
1692 /* A question mark matches one character. It does not match
1693 * a dot. At the end of the component (and before a dot),
1694 * it also matches zero characters. */
1695
1696 if (*name != '.' && !IS_OS2_COMP_END(*name))
1697 ++name;
1698 ++mask;
1699 break;
1700
1701 case '*':
1702
1703 /* An asterisk matches zero or more characters. In DOS
1704 * mode, dots are not matched. */
1705
1706 do
1707 {
1708 ++mask;
1709 }
1710 while (*mask == '*');
1711 for (;;)
1712 {
1713 rc = match_comp_os2(mask, name, flags, has_dot);
1714 if (rc != FNM_NOMATCH)
1715 return rc;
1716 if (IS_OS2_COMP_END(*name))
1717 return FNM_NOMATCH;
1718 if (*name == '.' && (flags & FNM_STYLE_MASK) == FNM_DOS)
1719 return FNM_NOMATCH;
1720 ++name;
1721 }
1722
1723 case '.':
1724
1725 /* A dot matches a dot. It also matches the implicit dot at
1726 * the end of a dot-less NAME. */
1727
1728 ++mask;
1729 if (*name == '.')
1730 ++name;
1731 else if (has_dot || !IS_OS2_COMP_END(*name))
1732 return FNM_NOMATCH;
1733 break;
1734
1735 default:
1736
1737 /* All other characters match themselves. */
1738
1739 if (flags & FNM_IGNORECASE)
1740 {
1741 if (tolower(*mask) != tolower(*name))
1742 return FNM_NOMATCH;
1743 }
1744 else
1745 {
1746 if (*mask != *name)
1747 return FNM_NOMATCH;
1748 }
1749 ++mask;
1750 ++name;
1751 break;
1752 }
1753}
1754
1755/*
1756 * match_comp:
1757 * compares a single component (directory name or file
1758 * name) of the paths, for all styles which need
1759 * component-by-component matching. MASK and NAME point
1760 * to the start of a component of the wildcard and the
1761 * name to be checked, respectively. Comparing stops at
1762 * the next separator. The FLAGS argument is the same as
1763 * that of fnmatch().
1764 *
1765 * Return FNM_MATCH iff MASK and NAME match.
1766 *
1767 * (c) 1994-1996 by Eberhard Mattes.
1768 */
1769
1770static int match_comp(const unsigned char *mask,
1771 const unsigned char *name,
1772 unsigned flags)
1773{
1774 const unsigned char *s;
1775
1776 switch (flags & FNM_STYLE_MASK)
1777 {
1778 case FNM_OS2:
1779 case FNM_DOS:
1780
1781 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1782 * the component if the component doesn't include a dot. */
1783
1784 s = name;
1785 while (!IS_OS2_COMP_END(*s) && *s != '.')
1786 ++s;
1787 return match_comp_os2(mask, name, flags, *s == '.');
1788
1789 default:
1790 return FNM_ERR;
1791 }
1792}
1793
1794/* In Unix styles, / separates components of a path. This macro
1795 * returns true iff C is a separator. */
1796
1797#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1798
1799
1800/* This macro returns true if C is at the end of a component of a
1801 * path. */
1802
1803#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1804
1805/*
1806 * match_unix:
1807 * matches complete paths for Unix styles.
1808 *
1809 * The FLAGS argument is the same as that of fnmatch().
1810 * COMP points to the start of the current component in
1811 * NAME. Return FNM_MATCH iff MASK and NAME match. The
1812 * backslash character is used for escaping ? and * unless
1813 * FNM_NOESCAPE is set.
1814 *
1815 * (c) 1994-1996 by Eberhard Mattes.
1816 */
1817
1818static int match_unix(const unsigned char *mask,
1819 const unsigned char *name,
1820 unsigned flags,
1821 const unsigned char *comp)
1822{
1823 unsigned char c1, c2;
1824 char invert, matched;
1825 const unsigned char *start;
1826 int rc;
1827
1828 for (;;)
1829 switch (*mask)
1830 {
1831 case 0:
1832
1833 /* There must be no extra characters at the end of NAME when
1834 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1835 * in that case, NAME may point to a separator. */
1836
1837 if (*name == 0)
1838 return FNM_MATCH;
1839 if ((flags & FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1840 return FNM_MATCH;
1841 return FNM_NOMATCH;
1842
1843 case '?':
1844
1845 /* A question mark matches one character. It does not match
1846 * the component separator if FNM_PATHNAME is set. It does
1847 * not match a dot at the start of a component if FNM_PERIOD
1848 * is set. */
1849
1850 if (*name == 0)
1851 return FNM_NOMATCH;
1852 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1853 return FNM_NOMATCH;
1854 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1855 return FNM_NOMATCH;
1856 ++mask;
1857 ++name;
1858 break;
1859
1860 case '*':
1861
1862 /* An asterisk matches zero or more characters. It does not
1863 * match the component separator if FNM_PATHNAME is set. It
1864 * does not match a dot at the start of a component if
1865 * FNM_PERIOD is set. */
1866
1867 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1868 return FNM_NOMATCH;
1869 do
1870 {
1871 ++mask;
1872 }
1873 while (*mask == '*');
1874 for (;;)
1875 {
1876 rc = match_unix(mask, name, flags, comp);
1877 if (rc != FNM_NOMATCH)
1878 return rc;
1879 if (*name == 0)
1880 return FNM_NOMATCH;
1881 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1882 return FNM_NOMATCH;
1883 ++name;
1884 }
1885
1886 case '/':
1887
1888 /* Separators match only separators. If _FNM_PATHPREFIX is
1889 * set, a trailing separator in MASK is ignored at the end
1890 * of NAME. */
1891
1892 if (!(IS_UNIX_COMP_SEP(*name)
1893 || ((flags & FNM_PATHPREFIX) && *name == 0
1894 && (mask[1] == 0
1895 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1896 && mask[2] == 0)))))
1897 return FNM_NOMATCH;
1898
1899 ++mask;
1900 if (*name != 0)
1901 ++name;
1902
1903 /* This is the beginning of a new component if FNM_PATHNAME
1904 * is set. */
1905
1906 if (flags & FNM_PATHNAME)
1907 comp = name;
1908 break;
1909
1910 case '[':
1911
1912 /* A set of characters. Always case-sensitive. */
1913
1914 if (*name == 0)
1915 return FNM_NOMATCH;
1916 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1917 return FNM_NOMATCH;
1918 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1919 return FNM_NOMATCH;
1920
1921 invert = 0;
1922 matched = 0;
1923 ++mask;
1924
1925 /* If the first character is a ! or ^, the set matches all
1926 * characters not listed in the set. */
1927
1928 if (*mask == '!' || *mask == '^')
1929 {
1930 ++mask;
1931 invert = 1;
1932 }
1933
1934 /* Loop over all the characters of the set. The loop ends
1935 * if the end of the string is reached or if a ] is
1936 * encountered unless it directly follows the initial [ or
1937 * [-. */
1938
1939 start = mask;
1940 while (!(*mask == 0 || (*mask == ']' && mask != start)))
1941 {
1942 /* Get the next character which is optionally preceded
1943 * by a backslash. */
1944
1945 c1 = *mask++;
1946 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
1947 {
1948 if (*mask == 0)
1949 break;
1950 c1 = *mask++;
1951 }
1952
1953 /* Ranges of characters are written as a-z. Don't
1954 * forget to check for the end of the string and to
1955 * handle the backslash. If the character after - is a
1956 * ], it isn't a range. */
1957
1958 if (*mask == '-' && mask[1] != ']')
1959 {
1960 ++mask; /* Skip the - character */
1961 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
1962 ++mask;
1963 if (*mask == 0)
1964 break;
1965 c2 = *mask++;
1966 }
1967 else
1968 c2 = c1;
1969
1970 /* Now check whether this character or range matches NAME. */
1971
1972 if (c1 <= *name && *name <= c2)
1973 matched = 1;
1974 }
1975
1976 /* If the end of the string is reached before a ] is found,
1977 * back up to the [ and compare it to NAME. */
1978
1979 if (*mask == 0)
1980 {
1981 if (*name != '[')
1982 return FNM_NOMATCH;
1983 ++name;
1984 mask = start;
1985 if (invert)
1986 --mask;
1987 }
1988 else
1989 {
1990 if (invert)
1991 matched = !matched;
1992 if (!matched)
1993 return FNM_NOMATCH;
1994 ++mask; /* Skip the ] character */
1995 if (*name != 0)
1996 ++name;
1997 }
1998 break;
1999
2000 case '\\':
2001 ++mask;
2002 if (flags & FNM_NOESCAPE)
2003 {
2004 if (*name != '\\')
2005 return FNM_NOMATCH;
2006 ++name;
2007 }
2008 else if (*mask == '*' || *mask == '?')
2009 {
2010 if (*mask != *name)
2011 return FNM_NOMATCH;
2012 ++mask;
2013 ++name;
2014 }
2015 break;
2016
2017 default:
2018
2019 /* All other characters match themselves. */
2020
2021 if (flags & FNM_IGNORECASE)
2022 {
2023 if (tolower(*mask) != tolower(*name))
2024 return FNM_NOMATCH;
2025 }
2026 else
2027 {
2028 if (*mask != *name)
2029 return FNM_NOMATCH;
2030 }
2031 ++mask;
2032 ++name;
2033 break;
2034 }
2035}
2036
2037/*
2038 * _fnmatch_unsigned:
2039 * Check whether the path name NAME matches the wildcard MASK.
2040 *
2041 * Return:
2042 * -- 0 (FNM_MATCH) if it matches,
2043 * -- _FNM_NOMATCH if it doesn't,
2044 * -- FNM_ERR on error.
2045 *
2046 * The operation of this function is controlled by FLAGS.
2047 * This is an internal function, with unsigned arguments.
2048 *
2049 * (c) 1994-1996 by Eberhard Mattes.
2050 */
2051
2052static int _fnmatch_unsigned(const unsigned char *mask,
2053 const unsigned char *name,
2054 unsigned flags)
2055{
2056 int m_drive,
2057 n_drive,
2058 rc;
2059
2060 /* Match and skip the drive name if present. */
2061
2062 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2063 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2064
2065 if (m_drive != n_drive)
2066 {
2067 if (m_drive == -1 || n_drive == -1)
2068 return FNM_NOMATCH;
2069 if (!(flags & FNM_IGNORECASE))
2070 return FNM_NOMATCH;
2071 if (tolower(m_drive) != tolower(n_drive))
2072 return FNM_NOMATCH;
2073 }
2074
2075 if (m_drive != -1)
2076 mask += 2;
2077 if (n_drive != -1)
2078 name += 2;
2079
2080 /* Colons are not allowed in path names, except for the drive name,
2081 * which was skipped above. */
2082
2083 if (has_colon(mask) || has_colon(name))
2084 return FNM_ERR;
2085
2086 /* The name "\\server\path" should not be matched by mask
2087 * "\*\server\path". Ditto for /. */
2088
2089 switch (flags & FNM_STYLE_MASK)
2090 {
2091 case FNM_OS2:
2092 case FNM_DOS:
2093
2094 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2095 {
2096 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2097 return FNM_NOMATCH;
2098 name += 2;
2099 mask += 2;
2100 }
2101 break;
2102
2103 case FNM_POSIX:
2104
2105 if (name[0] == '/' && name[1] == '/')
2106 {
2107 int i;
2108
2109 name += 2;
2110 for (i = 0; i < 2; ++i)
2111 if (mask[0] == '/')
2112 ++mask;
2113 else if (mask[0] == '\\' && mask[1] == '/')
2114 mask += 2;
2115 else
2116 return FNM_NOMATCH;
2117 }
2118
2119 /* In Unix styles, treating ? and * w.r.t. components is simple.
2120 * No need to do matching component by component. */
2121
2122 return match_unix(mask, name, flags, name);
2123 }
2124
2125 /* Now compare all the components of the path name, one by one.
2126 * Note that the path separator must not be enclosed in brackets. */
2127
2128 while (*mask != 0 || *name != 0)
2129 {
2130
2131 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2132 * is reached even if there are components left in NAME. */
2133
2134 if (*mask == 0 && (flags & FNM_PATHPREFIX))
2135 return FNM_MATCH;
2136
2137 /* Compare a single component of the path name. */
2138
2139 rc = match_comp(mask, name, flags);
2140 if (rc != FNM_MATCH)
2141 return rc;
2142
2143 /* Skip to the next component or to the end of the path name. */
2144
2145 mask = skip_comp_os2(mask);
2146 name = skip_comp_os2(name);
2147 }
2148
2149 /* If we reached the ends of both strings, the names match. */
2150
2151 if (*mask == 0 && *name == 0)
2152 return FNM_MATCH;
2153
2154 /* The names do not match. */
2155
2156 return FNM_NOMATCH;
2157}
2158
2159/*
2160 *@@ strhMatchOS2:
2161 * this matches wildcards, similar to what DosEditName does.
2162 * However, this does not require a file to be present, but
2163 * works on strings only.
2164 */
2165
2166BOOL strhMatchOS2(const char *pcszMask, // in: mask (e.g. "*.txt")
2167 const char *pcszName) // in: string to check (e.g. "test.txt")
2168{
2169 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2170 (const unsigned char *)pcszName,
2171 FNM_OS2 | FNM_IGNORECASE)
2172 == FNM_MATCH)
2173 );
2174}
2175
2176/*
2177 *@@ strhMatchExt:
2178 * like strhMatchOS2, but this takes all the flags
2179 * for input.
2180 *
2181 *@@added V0.9.15 (2001-09-14) [umoeller]
2182 */
2183
2184BOOL strhMatchExt(const char *pcszMask, // in: mask (e.g. "*.txt")
2185 const char *pcszName, // in: string to check (e.g. "test.txt")
2186 unsigned flags) // in: FNM_* flags
2187{
2188 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2189 (const unsigned char *)pcszName,
2190 flags)
2191 == FNM_MATCH)
2192 );
2193}
2194
2195/* ******************************************************************
2196 *
2197 * Fast string searches
2198 *
2199 ********************************************************************/
2200
2201#define ASSERT(a)
2202
2203/*
2204 * The following code has been taken from the "Standard
2205 * Function Library", file sflfind.c, and only slightly
2206 * modified to conform to the rest of this file.
2207 *
2208 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2209 * Revised: 98/05/04
2210 *
2211 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2212 *
2213 * The SFL Licence allows incorporating SFL code into other
2214 * programs, as long as the copyright is reprinted and the
2215 * code is marked as modified, so this is what we do.
2216 */
2217
2218/*
2219 *@@ strhmemfind:
2220 * searches for a pattern in a block of memory using the
2221 * Boyer-Moore-Horspool-Sunday algorithm.
2222 *
2223 * The block and pattern may contain any values; you must
2224 * explicitly provide their lengths. If you search for strings,
2225 * use strlen() on the buffers.
2226 *
2227 * Returns a pointer to the pattern if found within the block,
2228 * or NULL if the pattern was not found.
2229 *
2230 * This algorithm needs a "shift table" to cache data for the
2231 * search pattern. This table can be reused when performing
2232 * several searches with the same pattern.
2233 *
2234 * "shift" must point to an array big enough to hold 256 (8**2)
2235 * "size_t" values.
2236 *
2237 * If (*repeat_find == FALSE), the shift table is initialized.
2238 * So on the first search with a given pattern, *repeat_find
2239 * should be FALSE. This function sets it to TRUE after the
2240 * shift table is initialised, allowing the initialisation
2241 * phase to be skipped on subsequent searches.
2242 *
2243 * This function is most effective when repeated searches are
2244 * made for the same pattern in one or more large buffers.
2245 *
2246 * Example:
2247 *
2248 + PSZ pszHaystack = "This is a sample string.",
2249 + pszNeedle = "string";
2250 + size_t shift[256];
2251 + BOOL fRepeat = FALSE;
2252 +
2253 + PSZ pFound = strhmemfind(pszHaystack,
2254 + strlen(pszHaystack), // block size
2255 + pszNeedle,
2256 + strlen(pszNeedle), // pattern size
2257 + shift,
2258 + &fRepeat);
2259 *
2260 * Taken from the "Standard Function Library", file sflfind.c.
2261 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2262 * Slightly modified by umoeller.
2263 *
2264 *@@added V0.9.3 (2000-05-08) [umoeller]
2265 */
2266
2267void* strhmemfind(const void *in_block, // in: block containing data
2268 size_t block_size, // in: size of block in bytes
2269 const void *in_pattern, // in: pattern to search for
2270 size_t pattern_size, // in: size of pattern block
2271 size_t *shift, // in/out: shift table (search buffer)
2272 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2273{
2274 size_t byte_nbr, // Distance through block
2275 match_size; // Size of matched part
2276 const unsigned char
2277 *match_base = NULL, // Base of match of pattern
2278 *match_ptr = NULL, // Point within current match
2279 *limit = NULL; // Last potiental match point
2280 const unsigned char
2281 *block = (unsigned char *) in_block, // Concrete pointer to block data
2282 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2283
2284 if ( (block == NULL)
2285 || (pattern == NULL)
2286 || (shift == NULL)
2287 )
2288 return (NULL);
2289
2290 // Pattern must be smaller or equal in size to string
2291 if (block_size < pattern_size)
2292 return (NULL); // Otherwise it's not found
2293
2294 if (pattern_size == 0) // Empty patterns match at start
2295 return ((void *)block);
2296
2297 // Build the shift table unless we're continuing a previous search
2298
2299 // The shift table determines how far to shift before trying to match
2300 // again, if a match at this point fails. If the byte after where the
2301 // end of our pattern falls is not in our pattern, then we start to
2302 // match again after that byte; otherwise we line up the last occurence
2303 // of that byte in our pattern under that byte, and try match again.
2304
2305 if (!repeat_find || !*repeat_find)
2306 {
2307 for (byte_nbr = 0;
2308 byte_nbr < 256;
2309 byte_nbr++)
2310 shift[byte_nbr] = pattern_size + 1;
2311 for (byte_nbr = 0;
2312 byte_nbr < pattern_size;
2313 byte_nbr++)
2314 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2315
2316 if (repeat_find)
2317 *repeat_find = TRUE;
2318 }
2319
2320 // Search for the block, each time jumping up by the amount
2321 // computed in the shift table
2322
2323 limit = block + (block_size - pattern_size + 1);
2324 ASSERT (limit > block);
2325
2326 for (match_base = block;
2327 match_base < limit;
2328 match_base += shift[*(match_base + pattern_size)])
2329 {
2330 match_ptr = match_base;
2331 match_size = 0;
2332
2333 // Compare pattern until it all matches, or we find a difference
2334 while (*match_ptr++ == pattern[match_size++])
2335 {
2336 ASSERT (match_size <= pattern_size &&
2337 match_ptr == (match_base + match_size));
2338
2339 // If we found a match, return the start address
2340 if (match_size >= pattern_size)
2341 return ((void*)(match_base));
2342
2343 }
2344 }
2345 return (NULL); // Found nothing
2346}
2347
2348/*
2349 *@@ strhtxtfind:
2350 * searches for a case-insensitive text pattern in a string
2351 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2352 * pattern are null-terminated strings. Returns a pointer to the pattern
2353 * if found within the string, or NULL if the pattern was not found.
2354 * Will match strings irrespective of case. To match exact strings, use
2355 * strhfind(). Will not work on multibyte characters.
2356 *
2357 * Examples:
2358 + char *result;
2359 +
2360 + result = strhtxtfind ("AbracaDabra", "cad");
2361 + if (result)
2362 + puts (result);
2363 +
2364 * Taken from the "Standard Function Library", file sflfind.c.
2365 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2366 * Slightly modified.
2367 *
2368 *@@added V0.9.3 (2000-05-08) [umoeller]
2369 */
2370
2371char* strhtxtfind (const char *string, // String containing data
2372 const char *pattern) // Pattern to search for
2373{
2374 size_t
2375 shift [256]; // Shift distance for each value
2376 size_t
2377 string_size,
2378 pattern_size,
2379 byte_nbr, // Index into byte array
2380 match_size; // Size of matched part
2381 const char
2382 *match_base = NULL, // Base of match of pattern
2383 *match_ptr = NULL, // Point within current match
2384 *limit = NULL; // Last potiental match point
2385
2386 ASSERT (string); // Expect non-NULL pointers, but
2387 ASSERT (pattern); // fail gracefully if not debugging
2388 if (string == NULL || pattern == NULL)
2389 return (NULL);
2390
2391 string_size = strlen (string);
2392 pattern_size = strlen (pattern);
2393
2394 // Pattern must be smaller or equal in size to string
2395 if (string_size < pattern_size)
2396 return (NULL); // Otherwise it cannot be found
2397
2398 if (pattern_size == 0) // Empty string matches at start
2399 return (char *) string;
2400
2401 // Build the shift table
2402
2403 // The shift table determines how far to shift before trying to match
2404 // again, if a match at this point fails. If the byte after where the
2405 // end of our pattern falls is not in our pattern, then we start to
2406 // match again after that byte; otherwise we line up the last occurence
2407 // of that byte in our pattern under that byte, and try match again.
2408
2409 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2410 shift [byte_nbr] = pattern_size + 1;
2411
2412 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2413 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2414
2415 // Search for the string. If we don't find a match, move up by the
2416 // amount we computed in the shift table above, to find location of
2417 // the next potiental match.
2418
2419 limit = string + (string_size - pattern_size + 1);
2420 ASSERT (limit > string);
2421
2422 for (match_base = string;
2423 match_base < limit;
2424 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2425 {
2426 match_ptr = match_base;
2427 match_size = 0;
2428
2429 // Compare pattern until it all matches, or we find a difference
2430 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2431 {
2432 ASSERT (match_size <= pattern_size &&
2433 match_ptr == (match_base + match_size));
2434
2435 // If we found a match, return the start address
2436 if (match_size >= pattern_size)
2437 return ((char *)(match_base));
2438 }
2439 }
2440 return (NULL); // Found nothing
2441}
2442
Note: See TracBrowser for help on using the repository browser.