source: trunk/src/helpers/stringh.c@ 110

Last change on this file since 110 was 108, checked in by umoeller, 24 years ago

Lots of updates from the last week for conditional compiles and other stuff.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 73.7 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#define DONT_REPLACE_STRINGH_MALLOC
55#include "helpers\stringh.h"
56#include "helpers\xstring.h" // extended string helpers
57
58#pragma hdrstop
59
60/*
61 *@@category: Helpers\C helpers\String management
62 * See stringh.c and xstring.c.
63 */
64
65/*
66 *@@category: Helpers\C helpers\String management\C string helpers
67 * See stringh.c.
68 */
69
70/*
71 *@@ strhcpy:
72 * like strdup, but this one doesn't crash if string2 is NULL,
73 * but sets the first byte in string1 to \0 instead.
74 *
75 *@@added V0.9.14 (2001-08-01) [umoeller]
76 */
77
78PSZ strhcpy(PSZ string1, const char *string2)
79{
80 if (string2)
81 return (strcpy(string1, string2));
82
83 *string1 = '\0';
84 return (string1);
85}
86
87#ifdef __DEBUG_MALLOC_ENABLED__
88
89/*
90 *@@ strhdup:
91 * memory debug version of strhdup.
92 *
93 *@@added V0.9.0 [umoeller]
94 */
95
96PSZ strhdupDebug(const char *pszSource,
97 const char *pcszSourceFile,
98 unsigned long ulLine,
99 const char *pcszFunction)
100{
101 if (pszSource)
102 {
103 PSZ p = (PSZ)memdMalloc(strlen(pszSource) + 1,
104 pcszSourceFile,
105 ulLine,
106 pcszFunction);
107 strcpy(p, pszSource);
108 return (p);
109 }
110 else
111 return (0);
112}
113
114#endif // __DEBUG_MALLOC_ENABLED__
115
116/*
117 *@@ strhdup:
118 * like strdup, but this one doesn't crash if pszSource is NULL,
119 * but returns NULL also.
120 *
121 *@@added V0.9.0 [umoeller]
122 */
123
124PSZ strhdup(const char *pszSource)
125{
126 if (pszSource)
127 return (strdup(pszSource));
128 else
129 return (0);
130}
131
132/*
133 *@@ strhcmp:
134 * better strcmp. This doesn't crash if any of the
135 * string pointers are NULL, but returns a proper
136 * value then.
137 *
138 * Besides, this is guaranteed to only return -1, 0,
139 * or +1, while strcmp can return any positive or
140 * negative value. This is useful for tree comparison
141 * funcs.
142 *
143 *@@added V0.9.9 (2001-02-16) [umoeller]
144 */
145
146int strhcmp(const char *p1, const char *p2)
147{
148 if (p1 && p2)
149 {
150 int i = strcmp(p1, p2);
151 if (i < 0) return (-1);
152 if (i > 0) return (+1);
153 }
154 else if (p1)
155 // but p2 is NULL: p1 greater than p2 then
156 return (+1);
157 else if (p2)
158 // but p1 is NULL: p1 less than p2 then
159 return (-1);
160
161 // return 0 if strcmp returned 0 above or both strings are NULL
162 return (0);
163}
164
165/*
166 *@@ strhicmp:
167 * like strhcmp, but compares without respect
168 * to case.
169 *
170 *@@added V0.9.9 (2001-04-07) [umoeller]
171 */
172
173int strhicmp(const char *p1, const char *p2)
174{
175 if (p1 && p2)
176 {
177 int i = stricmp(p1, p2);
178 if (i < 0) return (-1);
179 if (i > 0) return (+1);
180 }
181 else if (p1)
182 // but p2 is NULL: p1 greater than p2 then
183 return (+1);
184 else if (p2)
185 // but p1 is NULL: p1 less than p2 then
186 return (-1);
187
188 // return 0 if strcmp returned 0 above or both strings are NULL
189 return (0);
190}
191
192/*
193 *@@ strhistr:
194 * like strstr, but case-insensitive.
195 *
196 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
197 */
198
199PSZ strhistr(const char *string1, const char *string2)
200{
201 PSZ prc = NULL;
202
203 if ((string1) && (string2))
204 {
205 PSZ pszSrchIn = strdup(string1);
206 PSZ pszSrchFor = strdup(string2);
207
208 if ((pszSrchIn) && (pszSrchFor))
209 {
210 strupr(pszSrchIn);
211 strupr(pszSrchFor);
212
213 prc = strstr(pszSrchIn, pszSrchFor);
214 if (prc)
215 {
216 // prc now has the first occurence of the string,
217 // but in pszSrchIn; we need to map this
218 // return value to the original string
219 prc = (prc-pszSrchIn) // offset in pszSrchIn
220 + (PSZ)string1;
221 }
222 }
223 if (pszSrchFor)
224 free(pszSrchFor);
225 if (pszSrchIn)
226 free(pszSrchIn);
227 }
228 return (prc);
229}
230
231/*
232 *@@ strhncpy0:
233 * like strncpy, but always appends a 0 character.
234 */
235
236ULONG strhncpy0(PSZ pszTarget,
237 const char *pszSource,
238 ULONG cbSource)
239{
240 ULONG ul = 0;
241 PSZ pTarget = pszTarget,
242 pSource = (PSZ)pszSource;
243
244 for (ul = 0; ul < cbSource; ul++)
245 if (*pSource)
246 *pTarget++ = *pSource++;
247 else
248 break;
249 *pTarget = 0;
250
251 return (ul);
252}
253
254/*
255 * strhCount:
256 * this counts the occurences of c in pszSearch.
257 */
258
259ULONG strhCount(const char *pszSearch,
260 CHAR c)
261{
262 PSZ p = (PSZ)pszSearch;
263 ULONG ulCount = 0;
264 while (TRUE)
265 {
266 p = strchr(p, c);
267 if (p)
268 {
269 ulCount++;
270 p++;
271 }
272 else
273 break;
274 }
275 return (ulCount);
276}
277
278/*
279 *@@ strhIsDecimal:
280 * returns TRUE if psz consists of decimal digits only.
281 */
282
283BOOL strhIsDecimal(PSZ psz)
284{
285 PSZ p = psz;
286 while (*p != 0)
287 {
288 if (isdigit(*p) == 0)
289 return (FALSE);
290 p++;
291 }
292
293 return (TRUE);
294}
295
296#ifdef __DEBUG_MALLOC_ENABLED__
297
298/*
299 *@@ strhSubstrDebug:
300 * memory debug version of strhSubstr.
301 *
302 *@@added V0.9.14 (2001-08-01) [umoeller]
303 */
304
305PSZ strhSubstrDebug(const char *pBegin, // in: first char
306 const char *pEnd, // in: last char (not included)
307 const char *pcszSourceFile,
308 unsigned long ulLine,
309 const char *pcszFunction)
310{
311 PSZ pszSubstr = NULL;
312
313 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
314 {
315 ULONG cbSubstr = (pEnd - pBegin);
316 if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
317 pcszSourceFile,
318 ulLine,
319 pcszFunction))
320 {
321 // strhncpy0(pszSubstr, pBegin, cbSubstr);
322 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
323 *(pszSubstr + cbSubstr) = '\0';
324 }
325 }
326
327 return (pszSubstr);
328}
329
330#endif // __DEBUG_MALLOC_ENABLED__
331
332/*
333 *@@ strhSubstr:
334 * this creates a new PSZ containing the string
335 * from pBegin to pEnd, excluding the pEnd character.
336 * The new string is null-terminated. The caller
337 * must free() the new string after use.
338 *
339 * Example:
340 + "1234567890"
341 + ^ ^
342 + p1 p2
343 + strhSubstr(p1, p2)
344 * would return a new string containing "2345678".
345 *
346 *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
347 *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
348 */
349
350PSZ strhSubstr(const char *pBegin, // in: first char
351 const char *pEnd) // in: last char (not included)
352{
353 PSZ pszSubstr = NULL;
354
355 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
356 {
357 ULONG cbSubstr = (pEnd - pBegin);
358 if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
359 {
360 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
361 *(pszSubstr + cbSubstr) = '\0';
362 }
363 }
364
365 return (pszSubstr);
366}
367
368/*
369 *@@ strhExtract:
370 * searches pszBuf for the cOpen character and returns
371 * the data in between cOpen and cClose, excluding
372 * those two characters, in a newly allocated buffer
373 * which you must free() afterwards.
374 *
375 * Spaces and newlines/linefeeds are skipped.
376 *
377 * If the search was successful, the new buffer
378 * is returned and, if (ppEnd != NULL), *ppEnd points
379 * to the first character after the cClose character
380 * found in the buffer.
381 *
382 * If the search was not successful, NULL is
383 * returned, and *ppEnd is unchanged.
384 *
385 * If another cOpen character is found before
386 * cClose, matching cClose characters will be skipped.
387 * You can therefore nest the cOpen and cClose
388 * characters.
389 *
390 * This function ignores cOpen and cClose characters
391 * in C-style comments and strings surrounded by
392 * double quotes.
393 *
394 * Example:
395 + PSZ pszBuf = "KEYWORD { --blah-- } next",
396 + pEnd;
397 + strhExtract(pszBuf,
398 + '{', '}',
399 + &pEnd)
400 * would return a new buffer containing " --blah-- ",
401 * and ppEnd would afterwards point to the space
402 * before "next" in the static buffer.
403 *
404 *@@added V0.9.0 [umoeller]
405 */
406
407PSZ strhExtract(PSZ pszBuf, // in: search buffer
408 CHAR cOpen, // in: opening char
409 CHAR cClose, // in: closing char
410 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
411{
412 PSZ pszReturn = NULL;
413
414 if (pszBuf)
415 {
416 PSZ pOpen = strchr(pszBuf, cOpen);
417 if (pOpen)
418 {
419 // opening char found:
420 // now go thru the whole rest of the buffer
421 PSZ p = pOpen+1;
422 LONG lLevel = 1; // if this goes 0, we're done
423 while (*p)
424 {
425 if (*p == cOpen)
426 lLevel++;
427 else if (*p == cClose)
428 {
429 lLevel--;
430 if (lLevel <= 0)
431 {
432 // matching closing bracket found:
433 // extract string
434 pszReturn = strhSubstr(pOpen+1, // after cOpen
435 p); // excluding cClose
436 if (ppEnd)
437 *ppEnd = p+1;
438 break; // while (*p)
439 }
440 }
441 else if (*p == '\"')
442 {
443 // beginning of string:
444 PSZ p2 = p+1;
445 // find end of string
446 while ((*p2) && (*p2 != '\"'))
447 p2++;
448
449 if (*p2 == '\"')
450 // closing quote found:
451 // search on after that
452 p = p2; // raised below
453 else
454 break; // while (*p)
455 }
456
457 p++;
458 }
459 }
460 }
461
462 return (pszReturn);
463}
464
465/*
466 *@@ strhQuote:
467 * similar to strhExtract, except that
468 * opening and closing chars are the same,
469 * and therefore no nesting is possible.
470 * Useful for extracting stuff between
471 * quotes.
472 *
473 *@@added V0.9.0 [umoeller]
474 */
475
476PSZ strhQuote(PSZ pszBuf,
477 CHAR cQuote,
478 PSZ *ppEnd)
479{
480 PSZ pszReturn = NULL,
481 p1 = NULL;
482 if ((p1 = strchr(pszBuf, cQuote)))
483 {
484 PSZ p2 = strchr(p1+1, cQuote);
485 if (p2)
486 {
487 pszReturn = strhSubstr(p1+1, p2);
488 if (ppEnd)
489 // store closing char
490 *ppEnd = p2 + 1;
491 }
492 }
493
494 return (pszReturn);
495}
496
497/*
498 *@@ strhStrip:
499 * removes all double spaces.
500 * This copies within the "psz" buffer.
501 * If any double spaces are found, the
502 * string will be shorter than before,
503 * but the buffer is _not_ reallocated,
504 * so there will be unused bytes at the
505 * end.
506 *
507 * Returns the number of spaces removed.
508 *
509 *@@added V0.9.0 [umoeller]
510 */
511
512ULONG strhStrip(PSZ psz) // in/out: string
513{
514 PSZ p;
515 ULONG cb = strlen(psz),
516 ulrc = 0;
517
518 for (p = psz; p < psz+cb; p++)
519 {
520 if ((*p == ' ') && (*(p+1) == ' '))
521 {
522 PSZ p2 = p;
523 while (*p2)
524 {
525 *p2 = *(p2+1);
526 p2++;
527 }
528 cb--;
529 p--;
530 ulrc++;
531 }
532 }
533 return (ulrc);
534}
535
536/*
537 *@@ strhins:
538 * this inserts one string into another.
539 *
540 * pszInsert is inserted into pszBuffer at offset
541 * ulInsertOfs (which counts from 0).
542 *
543 * A newly allocated string is returned. pszBuffer is
544 * not changed. The new string should be free()'d after
545 * use.
546 *
547 * Upon errors, NULL is returned.
548 *
549 *@@changed V0.9.0 [umoeller]: completely rewritten.
550 */
551
552PSZ strhins(const char *pcszBuffer,
553 ULONG ulInsertOfs,
554 const char *pcszInsert)
555{
556 PSZ pszNew = NULL;
557
558 if ((pcszBuffer) && (pcszInsert))
559 {
560 do {
561 ULONG cbBuffer = strlen(pcszBuffer);
562 ULONG cbInsert = strlen(pcszInsert);
563
564 // check string length
565 if (ulInsertOfs > cbBuffer + 1)
566 break; // do
567
568 // OK, let's go.
569 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
570
571 // copy stuff before pInsertPos
572 memcpy(pszNew,
573 pcszBuffer,
574 ulInsertOfs);
575 // copy string to be inserted
576 memcpy(pszNew + ulInsertOfs,
577 pcszInsert,
578 cbInsert);
579 // copy stuff after pInsertPos
580 strcpy(pszNew + ulInsertOfs + cbInsert,
581 pcszBuffer + ulInsertOfs);
582 } while (FALSE);
583 }
584
585 return (pszNew);
586}
587
588/*
589 *@@ strhFindReplace:
590 * wrapper around xstrFindReplace to work with C strings.
591 * Note that *ppszBuf can get reallocated and must
592 * be free()'able.
593 *
594 * Repetitive use of this wrapper is not recommended
595 * because it is considerably slower than xstrFindReplace.
596 *
597 *@@added V0.9.6 (2000-11-01) [umoeller]
598 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
599 */
600
601ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
602 PULONG pulOfs, // in: where to begin search (0 = start);
603 // out: ofs of first char after replacement string
604 const char *pcszSearch, // in: search string; cannot be NULL
605 const char *pcszReplace) // in: replacement string; cannot be NULL
606{
607 ULONG ulrc = 0;
608 XSTRING xstrBuf,
609 xstrFind,
610 xstrReplace;
611 size_t ShiftTable[256];
612 BOOL fRepeat = FALSE;
613 xstrInitSet(&xstrBuf, *ppszBuf);
614 // reallocated and returned, so we're safe
615 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
616 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
617 // these two are never freed, so we're safe too
618
619 if ((ulrc = xstrFindReplace(&xstrBuf,
620 pulOfs,
621 &xstrFind,
622 &xstrReplace,
623 ShiftTable,
624 &fRepeat)))
625 // replaced:
626 *ppszBuf = xstrBuf.psz;
627
628 return (ulrc);
629}
630
631/*
632 * strhWords:
633 * returns the no. of words in "psz".
634 * A string is considered a "word" if
635 * it is surrounded by spaces only.
636 *
637 *@@added V0.9.0 [umoeller]
638 */
639
640ULONG strhWords(PSZ psz)
641{
642 PSZ p;
643 ULONG cb = strlen(psz),
644 ulWords = 0;
645 if (cb > 1)
646 {
647 ulWords = 1;
648 for (p = psz; p < psz+cb; p++)
649 if (*p == ' ')
650 ulWords++;
651 }
652 return (ulWords);
653}
654
655/*
656 *@@ strhGetWord:
657 * finds word boundaries.
658 *
659 * *ppszStart is used as the beginning of the
660 * search.
661 *
662 * If a word is found, *ppszStart is set to
663 * the first character of the word which was
664 * found and *ppszEnd receives the address
665 * of the first character _after_ the word,
666 * which is probably a space or a \n or \r char.
667 * We then return TRUE.
668 *
669 * The search is stopped if a null character
670 * is found or pLimit is reached. In that case,
671 * FALSE is returned.
672 *
673 *@@added V0.9.1 (2000-02-13) [umoeller]
674 */
675
676BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
677 // out: start of word (if TRUE is returned)
678 const char *pLimit, // in: ptr to last char after *ppszStart to be
679 // searched; if the word does not end before
680 // or with this char, FALSE is returned
681 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
682 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
683 PSZ *ppszEnd) // out: first char _after_ word
684 // (if TRUE is returned)
685{
686 // characters after which a word can be started
687 // const char *pcszBeginChars = "\x0d\x0a ";
688 // const char *pcszEndChars = "\x0d\x0a /-";
689
690 PSZ pStart = *ppszStart;
691
692 // find start of word
693 while ( (pStart < (PSZ)pLimit)
694 && (strchr(pcszBeginChars, *pStart))
695 )
696 // if char is a "before word" char: go for next
697 pStart++;
698
699 if (pStart < (PSZ)pLimit)
700 {
701 // found a valid "word start" character
702 // (which is not in pcszBeginChars):
703
704 // find end of word
705 PSZ pEndOfWord = pStart;
706 while ( (pEndOfWord <= (PSZ)pLimit)
707 && (strchr(pcszEndChars, *pEndOfWord) == 0)
708 )
709 // if char is not an "end word" char: go for next
710 pEndOfWord++;
711
712 if (pEndOfWord <= (PSZ)pLimit)
713 {
714 // whoa, got a word:
715 *ppszStart = pStart;
716 *ppszEnd = pEndOfWord;
717 return (TRUE);
718 }
719 }
720
721 return (FALSE);
722}
723
724/*
725 *@@ strhIsWord:
726 * returns TRUE if p points to a "word"
727 * in pcszBuf.
728 *
729 * p is considered a word if the character _before_
730 * it is in pcszBeginChars and the char _after_
731 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
732 *
733 *@@added V0.9.6 (2000-11-12) [umoeller]
734 */
735
736BOOL strhIsWord(const char *pcszBuf,
737 const char *p, // in: start of word
738 ULONG cbSearch, // in: length of word
739 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
740 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
741{
742 BOOL fEndOK = FALSE;
743
744 // check previous char
745 if ( (p == pcszBuf)
746 || (strchr(pcszBeginChars, *(p-1)))
747 )
748 {
749 // OK, valid begin char:
750 // check end char
751 CHAR cNextChar = *(p + cbSearch);
752 if (cNextChar == 0)
753 fEndOK = TRUE;
754 else
755 {
756 char *pc = strchr(pcszEndChars, cNextChar);
757 if (pc)
758 // OK, is end char: avoid doubles of that char,
759 // but allow spaces
760 if ( (cNextChar+1 != *pc)
761 || (cNextChar+1 == ' ')
762 || (cNextChar+1 == 0)
763 )
764 fEndOK = TRUE;
765 }
766 }
767
768 return (fEndOK);
769}
770
771/*
772 *@@ strhFindWord:
773 * searches for pszSearch in pszBuf, which is
774 * returned if found (or NULL if not).
775 *
776 * As opposed to strstr, this finds pszSearch
777 * only if it is a "word". A search string is
778 * considered a word if the character _before_
779 * it is in pcszBeginChars and the char _after_
780 * it is in pcszEndChars.
781 *
782 * Example:
783 + strhFindWord("This is an example.", "is");
784 + returns ...........^ this, but not the "is" in "This".
785 *
786 * The algorithm here uses strstr to find pszSearch in pszBuf
787 * and performs additional "is-word" checks for each item found
788 * (by calling strhIsWord).
789 *
790 * Note that this function is fairly slow compared to xstrFindWord.
791 *
792 *@@added V0.9.0 (99-11-08) [umoeller]
793 *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
794 */
795
796PSZ strhFindWord(const char *pszBuf,
797 const char *pszSearch,
798 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
799 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
800{
801 PSZ pszReturn = 0;
802 ULONG cbBuf = strlen(pszBuf),
803 cbSearch = strlen(pszSearch);
804
805 if ((cbBuf) && (cbSearch))
806 {
807 const char *p = pszBuf;
808
809 do // while p
810 {
811 p = strstr(p, pszSearch);
812 if (p)
813 {
814 // string found:
815 // check if that's a word
816
817 if (strhIsWord(pszBuf,
818 p,
819 cbSearch,
820 pcszBeginChars,
821 pcszEndChars))
822 {
823 // valid end char:
824 pszReturn = (PSZ)p;
825 break;
826 }
827
828 p += cbSearch;
829 }
830 } while (p);
831
832 }
833 return (pszReturn);
834}
835
836/*
837 *@@ strhFindEOL:
838 * returns a pointer to the next \r, \n or null character
839 * following pszSearchIn. Stores the offset in *pulOffset.
840 *
841 * This should never return NULL because at some point,
842 * there will be a null byte in your string.
843 *
844 *@@added V0.9.4 (2000-07-01) [umoeller]
845 */
846
847PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
848 PULONG pulOffset) // out: offset (ptr can be NULL)
849{
850 const char *p = pcszSearchIn,
851 *prc = 0;
852 while (TRUE)
853 {
854 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
855 {
856 prc = p;
857 break;
858 }
859 p++;
860 }
861
862 if ((pulOffset) && (prc))
863 *pulOffset = prc - pcszSearchIn;
864
865 return ((PSZ)prc);
866}
867
868/*
869 *@@ strhFindNextLine:
870 * like strhFindEOL, but this returns the character
871 * _after_ \r or \n. Note that this might return
872 * a pointer to terminating NULL character also.
873 */
874
875PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
876{
877 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
878 // pEOL now points to the \r char or the terminating 0 byte;
879 // if not null byte, advance pointer
880 PSZ pNextLine = pEOL;
881 if (*pNextLine == '\r')
882 pNextLine++;
883 if (*pNextLine == '\n')
884 pNextLine++;
885 if (pulOffset)
886 *pulOffset = pNextLine - pszSearchIn;
887 return (pNextLine);
888}
889
890/*
891 *@@ strhBeautifyTitle:
892 * replaces all line breaks (0xd, 0xa) with spaces.
893 *
894 *@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
895 */
896
897BOOL strhBeautifyTitle(PSZ psz)
898{
899 BOOL rc = FALSE;
900 CHAR *p = psz;
901
902 while(*p)
903 if ( (*p == '\r')
904 || (*p == '\n')
905 )
906 {
907 rc = TRUE;
908 if ( (p != psz)
909 && (p[-1] == ' ')
910 )
911 memmove(p, p + 1, strlen(p));
912 else
913 *p++ = ' ';
914 }
915 else
916 p++;
917
918 return (rc);
919}
920
921/*
922 * strhFindAttribValue:
923 * searches for pszAttrib in pszSearchIn; if found,
924 * returns the first character after the "=" char.
925 * If "=" is not found, a space, \r, and \n are
926 * also accepted. This function searches without
927 * respecting case.
928 *
929 * <B>Example:</B>
930 + strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
931 +
932 + returns ....................... ^ this address.
933 *
934 *@@added V0.9.0 [umoeller]
935 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
936 *@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
937 */
938
939PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
940{
941 PSZ prc = 0;
942 PSZ pszSearchIn2, p;
943 ULONG cbAttrib = strlen(pszAttrib),
944 ulLength = strlen(pszSearchIn);
945
946 // use alloca(), so memory is freed on function exit
947 pszSearchIn2 = (PSZ)alloca(ulLength + 1);
948 memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
949
950 // 1) find token, (space char, \n, \r, \t)
951 p = strtok(pszSearchIn2, " \n\r\t");
952 while (p)
953 {
954 CHAR c2;
955 PSZ pOrig;
956
957 // check tag name
958 if (!strnicmp(p, pszAttrib, cbAttrib))
959 {
960 // position in original string
961 pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
962
963 // yes:
964 prc = pOrig + cbAttrib;
965 c2 = *prc;
966 while ( ( (c2 == ' ')
967 || (c2 == '=')
968 || (c2 == '\n')
969 || (c2 == '\r')
970 )
971 && (c2 != 0)
972 )
973 c2 = *++prc;
974
975 break;
976 }
977
978 p = strtok(NULL, " \n\r\t");
979 }
980
981 return (prc);
982}
983
984/* PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
985{
986 PSZ prc = 0;
987 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
988 p,
989 p2;
990 ULONG cbAttrib = strlen(pszAttrib);
991
992 // 1) find space char
993 while ((p = strchr(pszSearchIn2, ' ')))
994 {
995 CHAR c;
996 p++;
997 if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
998 {
999 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1000 // now check whether the p+strlen(pszAttrib)
1001 // is a valid end-of-tag character
1002 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1003 && ( (c == ' ')
1004 || (c == '>')
1005 || (c == '=')
1006 || (c == '\r')
1007 || (c == '\n')
1008 || (c == 0)
1009 )
1010 )
1011 {
1012 // yes:
1013 CHAR c2;
1014 p2 = p + cbAttrib;
1015 c2 = *p2;
1016 while ( ( (c2 == ' ')
1017 || (c2 == '=')
1018 || (c2 == '\n')
1019 || (c2 == '\r')
1020 )
1021 && (c2 != 0)
1022 )
1023 c2 = *++p2;
1024
1025 prc = p2;
1026 break; // first while
1027 }
1028 }
1029 else
1030 break;
1031
1032 pszSearchIn2++;
1033 }
1034 return (prc);
1035} */
1036
1037/*
1038 * strhGetNumAttribValue:
1039 * stores the numerical parameter value of an HTML-style
1040 * tag in *pl.
1041 *
1042 * Returns the address of the tag parameter in the
1043 * search buffer, if found, or NULL.
1044 *
1045 * <B>Example:</B>
1046 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1047 *
1048 * stores 123 in the "l" variable.
1049 *
1050 *@@added V0.9.0 [umoeller]
1051 *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1052 */
1053
1054PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1055 const char *pszTag, // e.g. "INDEX"
1056 PLONG pl) // out: numerical value
1057{
1058 PSZ pParam;
1059 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1060 {
1061 if ( (*pParam == '\"')
1062 || (*pParam == '\'')
1063 )
1064 pParam++; // V0.9.9 (2001-04-04) [umoeller]
1065
1066 sscanf(pParam, "%ld", pl);
1067 }
1068
1069 return (pParam);
1070}
1071
1072/*
1073 * strhGetTextAttr:
1074 * retrieves the attribute value of a textual HTML-style tag
1075 * in a newly allocated buffer, which is returned,
1076 * or NULL if attribute not found.
1077 * If an attribute value is to contain spaces, it
1078 * must be enclosed in quotes.
1079 *
1080 * The offset of the attribute data in pszSearchIn is
1081 * returned in *pulOffset so that you can do multiple
1082 * searches.
1083 *
1084 * This returns a new buffer, which should be free()'d after use.
1085 *
1086 * <B>Example:</B>
1087 + ULONG ulOfs = 0;
1088 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1089 + ............^ ulOfs
1090 *
1091 * returns a new string with the value "blublub" (without
1092 * quotes) and sets ulOfs to 12.
1093 *
1094 *@@added V0.9.0 [umoeller]
1095 */
1096
1097PSZ strhGetTextAttr(const char *pszSearchIn,
1098 const char *pszTag,
1099 PULONG pulOffset) // out: offset where found
1100{
1101 PSZ pParam,
1102 pParam2,
1103 prc = NULL;
1104 ULONG ulCount = 0;
1105 LONG lNestingLevel = 0;
1106
1107 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1108 {
1109 // determine end character to search for: a space
1110 CHAR cEnd = ' ';
1111 if (*pParam == '\"')
1112 {
1113 // or, if the data is enclosed in quotes, a quote
1114 cEnd = '\"';
1115 pParam++;
1116 }
1117
1118 if (pulOffset)
1119 // store the offset
1120 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1121
1122 // now find end of attribute
1123 pParam2 = pParam;
1124 while (*pParam)
1125 {
1126 if (*pParam == cEnd)
1127 // end character found
1128 break;
1129 else if (*pParam == '<')
1130 // yet another opening tag found:
1131 // this is probably some "<" in the attributes
1132 lNestingLevel++;
1133 else if (*pParam == '>')
1134 {
1135 lNestingLevel--;
1136 if (lNestingLevel < 0)
1137 // end of tag found:
1138 break;
1139 }
1140 ulCount++;
1141 pParam++;
1142 }
1143
1144 // copy attribute to new buffer
1145 if (ulCount)
1146 {
1147 prc = (PSZ)malloc(ulCount+1);
1148 memcpy(prc, pParam2, ulCount);
1149 *(prc+ulCount) = 0;
1150 }
1151 }
1152 return (prc);
1153}
1154
1155/*
1156 * strhFindEndOfTag:
1157 * returns a pointer to the ">" char
1158 * which seems to terminate the tag beginning
1159 * after pszBeginOfTag.
1160 *
1161 * If additional "<" chars are found, we look
1162 * for additional ">" characters too.
1163 *
1164 * Note: You must pass the address of the opening
1165 * '<' character to this function.
1166 *
1167 * Example:
1168 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1169 + strhFindEndOfTag(pszTest)
1170 + returns.................................^ this.
1171 *
1172 *@@added V0.9.0 [umoeller]
1173 */
1174
1175PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1176{
1177 PSZ p = (PSZ)pszBeginOfTag,
1178 prc = NULL;
1179 LONG lNestingLevel = 0;
1180
1181 while (*p)
1182 {
1183 if (*p == '<')
1184 // another opening tag found:
1185 lNestingLevel++;
1186 else if (*p == '>')
1187 {
1188 // closing tag found:
1189 lNestingLevel--;
1190 if (lNestingLevel < 1)
1191 {
1192 // corresponding: return this
1193 prc = p;
1194 break;
1195 }
1196 }
1197 p++;
1198 }
1199
1200 return (prc);
1201}
1202
1203/*
1204 * strhGetBlock:
1205 * this complex function searches the given string
1206 * for a pair of opening/closing HTML-style tags.
1207 *
1208 * If found, this routine returns TRUE and does
1209 * the following:
1210 *
1211 * 1) allocate a new buffer, copy the text
1212 * enclosed by the opening/closing tags
1213 * into it and set *ppszBlock to that
1214 * buffer;
1215 *
1216 * 2) if the opening tag has any attributes,
1217 * allocate another buffer, copy the
1218 * attributes into it and set *ppszAttrs
1219 * to that buffer; if no attributes are
1220 * found, *ppszAttrs will be NULL;
1221 *
1222 * 3) set *pulOffset to the offset from the
1223 * beginning of *ppszSearchIn where the
1224 * opening tag was found;
1225 *
1226 * 4) advance *ppszSearchIn to after the
1227 * closing tag, so that you can do
1228 * multiple searches without finding the
1229 * same tags twice.
1230 *
1231 * All buffers should be freed using free().
1232 *
1233 * This returns the following:
1234 * -- 0: no error
1235 * -- 1: tag not found at all (doesn't have to be an error)
1236 * -- 2: begin tag found, but no corresponding end tag found. This
1237 * is a real error.
1238 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1239 *
1240 * <B>Example:</B>
1241 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1242 + PSZ pszBlock, pszAttrs;
1243 + ULONG ulOfs;
1244 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1245 *
1246 * would do the following:
1247 *
1248 * 1) set pszBlock to a new string containing "This is page 1."
1249 * without quotes;
1250 *
1251 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1252 *
1253 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1254 *
1255 * 4) pSearch would be advanced to point to the "More text"
1256 * string in the original buffer.
1257 *
1258 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1259 * for HTML parsing.
1260 *
1261 *@@added V0.9.0 [umoeller]
1262 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1263 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1264 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1265 */
1266
1267ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1268 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1269 PSZ pszTag,
1270 PSZ *ppszBlock, // out: block enclosed by the tags
1271 PSZ *ppszAttribs, // out: attributes of the opening tag
1272 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1273 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1274{
1275 ULONG ulrc = 1;
1276 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1277 pszSearch2 = pszBeginTag,
1278 pszClosingTag;
1279 ULONG cbTag = strlen(pszTag);
1280
1281 // go thru the block and check all tags if it's the
1282 // begin tag we're looking for
1283 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1284 {
1285 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1286 // yes: stop
1287 break;
1288 else
1289 pszBeginTag++;
1290 }
1291
1292 if (pszBeginTag)
1293 {
1294 // we found <TAG>:
1295 ULONG ulNestingLevel = 0;
1296
1297 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1298 // strchr(pszBeginTag, '>');
1299 if (pszEndOfBeginTag)
1300 {
1301 // does the caller want the attributes?
1302 if (ppszAttribs)
1303 {
1304 // yes: then copy them
1305 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1306 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1307 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1308 // add terminating 0
1309 *(pszAttrs + ulAttrLen) = 0;
1310
1311 *ppszAttribs = pszAttrs;
1312 }
1313
1314 // output offset of where we found the begin tag
1315 if (pulOfsBeginTag)
1316 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1317
1318 // now find corresponding closing tag (e.g. "</BODY>"
1319 pszBeginTag = pszEndOfBeginTag+1;
1320 // now we're behind the '>' char of the opening tag
1321 // increase offset of that too
1322 if (pulOfsBeginBlock)
1323 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1324
1325 // find next closing tag;
1326 // for the first run, pszSearch2 points to right
1327 // after the '>' char of the opening tag
1328 pszSearch2 = pszBeginTag;
1329 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1330 && (pszClosingTag = strstr(pszSearch2, "<"))
1331 )
1332 {
1333 // if we have another opening tag before our closing
1334 // tag, we need to have several closing tags before
1335 // we're done
1336 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1337 ulNestingLevel++;
1338 else
1339 {
1340 // is this ours?
1341 if ( (*(pszClosingTag+1) == '/')
1342 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1343 )
1344 {
1345 // we've found a matching closing tag; is
1346 // it ours?
1347 if (ulNestingLevel == 0)
1348 {
1349 // our closing tag found:
1350 // allocate mem for a new buffer
1351 // and extract all the text between
1352 // open and closing tags to it
1353 ULONG ulLen = pszClosingTag - pszBeginTag;
1354 if (ppszBlock)
1355 {
1356 PSZ pNew = (PSZ)malloc(ulLen + 1);
1357 strhncpy0(pNew, pszBeginTag, ulLen);
1358 *ppszBlock = pNew;
1359 }
1360
1361 // raise search offset to after the closing tag
1362 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1363
1364 ulrc = 0;
1365
1366 break;
1367 } else
1368 // not our closing tag:
1369 ulNestingLevel--;
1370 }
1371 }
1372 // no matching closing tag: search on after that
1373 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1374 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1375
1376 if (!pszClosingTag)
1377 // no matching closing tag found:
1378 // return 2 (closing tag not found)
1379 ulrc = 2;
1380 } // end if (pszBeginTag)
1381 else
1382 // no matching ">" for opening tag found:
1383 ulrc = 3;
1384 }
1385
1386 return (ulrc);
1387}
1388
1389/* ******************************************************************
1390 *
1391 * Miscellaneous
1392 *
1393 ********************************************************************/
1394
1395/*
1396 *@@ strhArrayAppend:
1397 * this appends a string to a "string array".
1398 *
1399 * A string array is considered a sequence of
1400 * zero-terminated strings in memory. That is,
1401 * after each string's null-byte, the next
1402 * string comes up.
1403 *
1404 * This is useful for composing a single block
1405 * of memory from, say, list box entries, which
1406 * can then be written to OS2.INI in one flush.
1407 *
1408 * To append strings to such an array, call this
1409 * function for each string you wish to append.
1410 * This will re-allocate *ppszRoot with each call,
1411 * and update *pcbRoot, which then contains the
1412 * total size of all strings (including all null
1413 * terminators).
1414 *
1415 * Pass *pcbRoot to PrfSaveProfileData to have the
1416 * block saved.
1417 *
1418 * Note: On the first call, *ppszRoot and *pcbRoot
1419 * _must_ be both NULL, or this crashes.
1420 *
1421 *@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1422 */
1423
1424VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1425 const char *pcszNew, // in: string to append
1426 ULONG cbNew, // in: size of that string or 0 to run strlen() here
1427 PULONG pcbRoot) // in/out: size of array
1428{
1429 PSZ pszTemp;
1430
1431 if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1432 cbNew = strlen(pcszNew);
1433
1434 pszTemp = (PSZ)malloc(*pcbRoot
1435 + cbNew
1436 + 1); // two null bytes
1437 if (*ppszRoot)
1438 {
1439 // not first loop: copy old stuff
1440 memcpy(pszTemp,
1441 *ppszRoot,
1442 *pcbRoot);
1443 free(*ppszRoot);
1444 }
1445 // append new string
1446 strcpy(pszTemp + *pcbRoot,
1447 pcszNew);
1448 // update root
1449 *ppszRoot = pszTemp;
1450 // update length
1451 *pcbRoot += cbNew + 1;
1452}
1453
1454/*
1455 *@@ strhCreateDump:
1456 * this dumps a memory block into a string
1457 * and returns that string in a new buffer.
1458 *
1459 * You must free() the returned PSZ after use.
1460 *
1461 * The output looks like the following:
1462 *
1463 + 0000: FE FF 0E 02 90 00 00 00 ........
1464 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1465 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1466 *
1467 * Each line is terminated with a newline (\n)
1468 * character only.
1469 *
1470 *@@added V0.9.1 (2000-01-22) [umoeller]
1471 */
1472
1473PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1474 ULONG ulSize, // in: size of buffer
1475 ULONG ulIndent) // in: indentation of every line
1476{
1477 PSZ pszReturn = 0;
1478 XSTRING strReturn;
1479 CHAR szTemp[1000];
1480
1481 PBYTE pbCurrent = pb; // current byte
1482 ULONG ulCount = 0,
1483 ulCharsInLine = 0; // if this grows > 7, a new line is started
1484 CHAR szLine[400] = "",
1485 szAscii[30] = " "; // ASCII representation; filled for every line
1486 PSZ pszLine = szLine,
1487 pszAscii = szAscii;
1488
1489 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1490
1491 for (pbCurrent = pb;
1492 ulCount < ulSize;
1493 pbCurrent++, ulCount++)
1494 {
1495 if (ulCharsInLine == 0)
1496 {
1497 memset(szLine, ' ', ulIndent);
1498 pszLine += ulIndent;
1499 }
1500 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1501
1502 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1503 // printable character:
1504 *pszAscii = *pbCurrent;
1505 else
1506 *pszAscii = '.';
1507 pszAscii++;
1508
1509 ulCharsInLine++;
1510 if ( (ulCharsInLine > 7) // 8 bytes added?
1511 || (ulCount == ulSize-1) // end of buffer reached?
1512 )
1513 {
1514 // if we haven't had eight bytes yet,
1515 // fill buffer up to eight bytes with spaces
1516 ULONG ul2;
1517 for (ul2 = ulCharsInLine;
1518 ul2 < 8;
1519 ul2++)
1520 pszLine += sprintf(pszLine, " ");
1521
1522 sprintf(szTemp, "%04lX: %s %s\n",
1523 (ulCount & 0xFFFFFFF8), // offset in hex
1524 szLine, // bytes string
1525 szAscii); // ASCII string
1526 xstrcat(&strReturn, szTemp, 0);
1527
1528 // restart line buffer
1529 pszLine = szLine;
1530
1531 // clear ASCII buffer
1532 strcpy(szAscii, " ");
1533 pszAscii = szAscii;
1534
1535 // reset line counter
1536 ulCharsInLine = 0;
1537 }
1538 }
1539
1540 if (strReturn.cbAllocated)
1541 pszReturn = strReturn.psz;
1542
1543 return (pszReturn);
1544}
1545
1546/* ******************************************************************
1547 *
1548 * Wildcard matching
1549 *
1550 ********************************************************************/
1551
1552/*
1553 * The following code has been taken from "fnmatch.zip".
1554 *
1555 * (c) 1994-1996 by Eberhard Mattes.
1556 */
1557
1558/* In OS/2 and DOS styles, both / and \ separate components of a path.
1559 * This macro returns true iff C is a separator. */
1560
1561#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1562
1563
1564/* This macro returns true if C is at the end of a component of a
1565 * path. */
1566
1567#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1568
1569/*
1570 * skip_comp_os2:
1571 * Return a pointer to the next component of the path SRC, for OS/2
1572 * and DOS styles. When the end of the string is reached, a pointer
1573 * to the terminating null character is returned.
1574 *
1575 * (c) 1994-1996 by Eberhard Mattes.
1576 */
1577
1578static const unsigned char* skip_comp_os2(const unsigned char *src)
1579{
1580 /* Skip characters until hitting a separator or the end of the
1581 * string. */
1582
1583 while (!IS_OS2_COMP_END(*src))
1584 ++src;
1585
1586 /* Skip the separator if we hit a separator. */
1587
1588 if (*src != 0)
1589 ++src;
1590 return src;
1591}
1592
1593/*
1594 * has_colon:
1595 * returns true iff the path P contains a colon.
1596 *
1597 * (c) 1994-1996 by Eberhard Mattes.
1598 */
1599
1600static int has_colon(const unsigned char *p)
1601{
1602 while (*p != 0)
1603 if (*p == ':')
1604 return 1;
1605 else
1606 ++p;
1607 return 0;
1608}
1609
1610/*
1611 * match_comp_os2:
1612 * compares a single component (directory name or file name)
1613 * of the paths, for OS/2 and DOS styles. MASK and NAME point
1614 * into a component of the wildcard and the name to be checked,
1615 * respectively. Comparing stops at the next separator.
1616 * The FLAGS argument is the same as that of fnmatch().
1617 *
1618 * HAS_DOT is true if a dot is in the current component of NAME.
1619 * The number of dots is not restricted, even in DOS style.
1620 *
1621 * Returns FNM_MATCH iff MASK and NAME match.
1622 *
1623 * Note that this function is recursive.
1624 *
1625 * (c) 1994-1996 by Eberhard Mattes.
1626 */
1627
1628static int match_comp_os2(const unsigned char *mask,
1629 const unsigned char *name,
1630 unsigned flags,
1631 int has_dot)
1632{
1633 int rc;
1634
1635 for (;;)
1636 switch (*mask)
1637 {
1638 case 0:
1639
1640 /* There must be no extra characters at the end of NAME when
1641 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1642 * in that case, NAME may point to a separator. */
1643
1644 if (*name == 0)
1645 return FNM_MATCH;
1646 if ((flags & FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1647 return FNM_MATCH;
1648 return FNM_NOMATCH;
1649
1650 case '/':
1651 case '\\':
1652
1653 /* Separators match separators. */
1654
1655 if (IS_OS2_COMP_SEP(*name))
1656 return FNM_MATCH;
1657
1658 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1659 * is ignored at the end of NAME. */
1660
1661 if ((flags & FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1662 return FNM_MATCH;
1663
1664 /* Stop comparing at the separator. */
1665
1666 return FNM_NOMATCH;
1667
1668 case '?':
1669
1670 /* A question mark matches one character. It does not match
1671 * a dot. At the end of the component (and before a dot),
1672 * it also matches zero characters. */
1673
1674 if (*name != '.' && !IS_OS2_COMP_END(*name))
1675 ++name;
1676 ++mask;
1677 break;
1678
1679 case '*':
1680
1681 /* An asterisk matches zero or more characters. In DOS
1682 * mode, dots are not matched. */
1683
1684 do
1685 {
1686 ++mask;
1687 }
1688 while (*mask == '*');
1689 for (;;)
1690 {
1691 rc = match_comp_os2(mask, name, flags, has_dot);
1692 if (rc != FNM_NOMATCH)
1693 return rc;
1694 if (IS_OS2_COMP_END(*name))
1695 return FNM_NOMATCH;
1696 if (*name == '.' && (flags & FNM_STYLE_MASK) == FNM_DOS)
1697 return FNM_NOMATCH;
1698 ++name;
1699 }
1700
1701 case '.':
1702
1703 /* A dot matches a dot. It also matches the implicit dot at
1704 * the end of a dot-less NAME. */
1705
1706 ++mask;
1707 if (*name == '.')
1708 ++name;
1709 else if (has_dot || !IS_OS2_COMP_END(*name))
1710 return FNM_NOMATCH;
1711 break;
1712
1713 default:
1714
1715 /* All other characters match themselves. */
1716
1717 if (flags & FNM_IGNORECASE)
1718 {
1719 if (tolower(*mask) != tolower(*name))
1720 return FNM_NOMATCH;
1721 }
1722 else
1723 {
1724 if (*mask != *name)
1725 return FNM_NOMATCH;
1726 }
1727 ++mask;
1728 ++name;
1729 break;
1730 }
1731}
1732
1733/*
1734 * match_comp:
1735 * compares a single component (directory name or file
1736 * name) of the paths, for all styles which need
1737 * component-by-component matching. MASK and NAME point
1738 * to the start of a component of the wildcard and the
1739 * name to be checked, respectively. Comparing stops at
1740 * the next separator. The FLAGS argument is the same as
1741 * that of fnmatch().
1742 *
1743 * Return FNM_MATCH iff MASK and NAME match.
1744 *
1745 * (c) 1994-1996 by Eberhard Mattes.
1746 */
1747
1748static int match_comp(const unsigned char *mask,
1749 const unsigned char *name,
1750 unsigned flags)
1751{
1752 const unsigned char *s;
1753
1754 switch (flags & FNM_STYLE_MASK)
1755 {
1756 case FNM_OS2:
1757 case FNM_DOS:
1758
1759 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1760 * the component if the component doesn't include a dot. */
1761
1762 s = name;
1763 while (!IS_OS2_COMP_END(*s) && *s != '.')
1764 ++s;
1765 return match_comp_os2(mask, name, flags, *s == '.');
1766
1767 default:
1768 return FNM_ERR;
1769 }
1770}
1771
1772/* In Unix styles, / separates components of a path. This macro
1773 * returns true iff C is a separator. */
1774
1775#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1776
1777
1778/* This macro returns true if C is at the end of a component of a
1779 * path. */
1780
1781#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1782
1783/*
1784 * match_unix:
1785 * matches complete paths for Unix styles.
1786 *
1787 * The FLAGS argument is the same as that of fnmatch().
1788 * COMP points to the start of the current component in
1789 * NAME. Return FNM_MATCH iff MASK and NAME match. The
1790 * backslash character is used for escaping ? and * unless
1791 * FNM_NOESCAPE is set.
1792 *
1793 * (c) 1994-1996 by Eberhard Mattes.
1794 */
1795
1796static int match_unix(const unsigned char *mask,
1797 const unsigned char *name,
1798 unsigned flags,
1799 const unsigned char *comp)
1800{
1801 unsigned char c1, c2;
1802 char invert, matched;
1803 const unsigned char *start;
1804 int rc;
1805
1806 for (;;)
1807 switch (*mask)
1808 {
1809 case 0:
1810
1811 /* There must be no extra characters at the end of NAME when
1812 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1813 * in that case, NAME may point to a separator. */
1814
1815 if (*name == 0)
1816 return FNM_MATCH;
1817 if ((flags & FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1818 return FNM_MATCH;
1819 return FNM_NOMATCH;
1820
1821 case '?':
1822
1823 /* A question mark matches one character. It does not match
1824 * the component separator if FNM_PATHNAME is set. It does
1825 * not match a dot at the start of a component if FNM_PERIOD
1826 * is set. */
1827
1828 if (*name == 0)
1829 return FNM_NOMATCH;
1830 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1831 return FNM_NOMATCH;
1832 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1833 return FNM_NOMATCH;
1834 ++mask;
1835 ++name;
1836 break;
1837
1838 case '*':
1839
1840 /* An asterisk matches zero or more characters. It does not
1841 * match the component separator if FNM_PATHNAME is set. It
1842 * does not match a dot at the start of a component if
1843 * FNM_PERIOD is set. */
1844
1845 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1846 return FNM_NOMATCH;
1847 do
1848 {
1849 ++mask;
1850 }
1851 while (*mask == '*');
1852 for (;;)
1853 {
1854 rc = match_unix(mask, name, flags, comp);
1855 if (rc != FNM_NOMATCH)
1856 return rc;
1857 if (*name == 0)
1858 return FNM_NOMATCH;
1859 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1860 return FNM_NOMATCH;
1861 ++name;
1862 }
1863
1864 case '/':
1865
1866 /* Separators match only separators. If _FNM_PATHPREFIX is
1867 * set, a trailing separator in MASK is ignored at the end
1868 * of NAME. */
1869
1870 if (!(IS_UNIX_COMP_SEP(*name)
1871 || ((flags & FNM_PATHPREFIX) && *name == 0
1872 && (mask[1] == 0
1873 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1874 && mask[2] == 0)))))
1875 return FNM_NOMATCH;
1876
1877 ++mask;
1878 if (*name != 0)
1879 ++name;
1880
1881 /* This is the beginning of a new component if FNM_PATHNAME
1882 * is set. */
1883
1884 if (flags & FNM_PATHNAME)
1885 comp = name;
1886 break;
1887
1888 case '[':
1889
1890 /* A set of characters. Always case-sensitive. */
1891
1892 if (*name == 0)
1893 return FNM_NOMATCH;
1894 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1895 return FNM_NOMATCH;
1896 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1897 return FNM_NOMATCH;
1898
1899 invert = 0;
1900 matched = 0;
1901 ++mask;
1902
1903 /* If the first character is a ! or ^, the set matches all
1904 * characters not listed in the set. */
1905
1906 if (*mask == '!' || *mask == '^')
1907 {
1908 ++mask;
1909 invert = 1;
1910 }
1911
1912 /* Loop over all the characters of the set. The loop ends
1913 * if the end of the string is reached or if a ] is
1914 * encountered unless it directly follows the initial [ or
1915 * [-. */
1916
1917 start = mask;
1918 while (!(*mask == 0 || (*mask == ']' && mask != start)))
1919 {
1920 /* Get the next character which is optionally preceded
1921 * by a backslash. */
1922
1923 c1 = *mask++;
1924 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
1925 {
1926 if (*mask == 0)
1927 break;
1928 c1 = *mask++;
1929 }
1930
1931 /* Ranges of characters are written as a-z. Don't
1932 * forget to check for the end of the string and to
1933 * handle the backslash. If the character after - is a
1934 * ], it isn't a range. */
1935
1936 if (*mask == '-' && mask[1] != ']')
1937 {
1938 ++mask; /* Skip the - character */
1939 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
1940 ++mask;
1941 if (*mask == 0)
1942 break;
1943 c2 = *mask++;
1944 }
1945 else
1946 c2 = c1;
1947
1948 /* Now check whether this character or range matches NAME. */
1949
1950 if (c1 <= *name && *name <= c2)
1951 matched = 1;
1952 }
1953
1954 /* If the end of the string is reached before a ] is found,
1955 * back up to the [ and compare it to NAME. */
1956
1957 if (*mask == 0)
1958 {
1959 if (*name != '[')
1960 return FNM_NOMATCH;
1961 ++name;
1962 mask = start;
1963 if (invert)
1964 --mask;
1965 }
1966 else
1967 {
1968 if (invert)
1969 matched = !matched;
1970 if (!matched)
1971 return FNM_NOMATCH;
1972 ++mask; /* Skip the ] character */
1973 if (*name != 0)
1974 ++name;
1975 }
1976 break;
1977
1978 case '\\':
1979 ++mask;
1980 if (flags & FNM_NOESCAPE)
1981 {
1982 if (*name != '\\')
1983 return FNM_NOMATCH;
1984 ++name;
1985 }
1986 else if (*mask == '*' || *mask == '?')
1987 {
1988 if (*mask != *name)
1989 return FNM_NOMATCH;
1990 ++mask;
1991 ++name;
1992 }
1993 break;
1994
1995 default:
1996
1997 /* All other characters match themselves. */
1998
1999 if (flags & FNM_IGNORECASE)
2000 {
2001 if (tolower(*mask) != tolower(*name))
2002 return FNM_NOMATCH;
2003 }
2004 else
2005 {
2006 if (*mask != *name)
2007 return FNM_NOMATCH;
2008 }
2009 ++mask;
2010 ++name;
2011 break;
2012 }
2013}
2014
2015/*
2016 * _fnmatch_unsigned:
2017 * Check whether the path name NAME matches the wildcard MASK.
2018 *
2019 * Return:
2020 * -- 0 (FNM_MATCH) if it matches,
2021 * -- _FNM_NOMATCH if it doesn't,
2022 * -- FNM_ERR on error.
2023 *
2024 * The operation of this function is controlled by FLAGS.
2025 * This is an internal function, with unsigned arguments.
2026 *
2027 * (c) 1994-1996 by Eberhard Mattes.
2028 */
2029
2030static int _fnmatch_unsigned(const unsigned char *mask,
2031 const unsigned char *name,
2032 unsigned flags)
2033{
2034 int m_drive,
2035 n_drive,
2036 rc;
2037
2038 /* Match and skip the drive name if present. */
2039
2040 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2041 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2042
2043 if (m_drive != n_drive)
2044 {
2045 if (m_drive == -1 || n_drive == -1)
2046 return FNM_NOMATCH;
2047 if (!(flags & FNM_IGNORECASE))
2048 return FNM_NOMATCH;
2049 if (tolower(m_drive) != tolower(n_drive))
2050 return FNM_NOMATCH;
2051 }
2052
2053 if (m_drive != -1)
2054 mask += 2;
2055 if (n_drive != -1)
2056 name += 2;
2057
2058 /* Colons are not allowed in path names, except for the drive name,
2059 * which was skipped above. */
2060
2061 if (has_colon(mask) || has_colon(name))
2062 return FNM_ERR;
2063
2064 /* The name "\\server\path" should not be matched by mask
2065 * "\*\server\path". Ditto for /. */
2066
2067 switch (flags & FNM_STYLE_MASK)
2068 {
2069 case FNM_OS2:
2070 case FNM_DOS:
2071
2072 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2073 {
2074 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2075 return FNM_NOMATCH;
2076 name += 2;
2077 mask += 2;
2078 }
2079 break;
2080
2081 case FNM_POSIX:
2082
2083 if (name[0] == '/' && name[1] == '/')
2084 {
2085 int i;
2086
2087 name += 2;
2088 for (i = 0; i < 2; ++i)
2089 if (mask[0] == '/')
2090 ++mask;
2091 else if (mask[0] == '\\' && mask[1] == '/')
2092 mask += 2;
2093 else
2094 return FNM_NOMATCH;
2095 }
2096
2097 /* In Unix styles, treating ? and * w.r.t. components is simple.
2098 * No need to do matching component by component. */
2099
2100 return match_unix(mask, name, flags, name);
2101 }
2102
2103 /* Now compare all the components of the path name, one by one.
2104 * Note that the path separator must not be enclosed in brackets. */
2105
2106 while (*mask != 0 || *name != 0)
2107 {
2108
2109 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2110 * is reached even if there are components left in NAME. */
2111
2112 if (*mask == 0 && (flags & FNM_PATHPREFIX))
2113 return FNM_MATCH;
2114
2115 /* Compare a single component of the path name. */
2116
2117 rc = match_comp(mask, name, flags);
2118 if (rc != FNM_MATCH)
2119 return rc;
2120
2121 /* Skip to the next component or to the end of the path name. */
2122
2123 mask = skip_comp_os2(mask);
2124 name = skip_comp_os2(name);
2125 }
2126
2127 /* If we reached the ends of both strings, the names match. */
2128
2129 if (*mask == 0 && *name == 0)
2130 return FNM_MATCH;
2131
2132 /* The names do not match. */
2133
2134 return FNM_NOMATCH;
2135}
2136
2137/*
2138 *@@ strhMatchOS2:
2139 * this matches wildcards, similar to what DosEditName does.
2140 * However, this does not require a file to be present, but
2141 * works on strings only.
2142 */
2143
2144BOOL strhMatchOS2(const char *pcszMask, // in: mask (e.g. "*.txt")
2145 const char *pcszName) // in: string to check (e.g. "test.txt")
2146{
2147 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2148 (const unsigned char *)pcszName,
2149 FNM_OS2 | FNM_IGNORECASE)
2150 == FNM_MATCH)
2151 );
2152}
2153
2154/*
2155 *@@ strhMatchExt:
2156 * like strhMatchOS2, but this takes all the flags
2157 * for input.
2158 *
2159 *@@added V0.9.15 (2001-09-14) [umoeller]
2160 */
2161
2162BOOL strhMatchExt(const char *pcszMask, // in: mask (e.g. "*.txt")
2163 const char *pcszName, // in: string to check (e.g. "test.txt")
2164 unsigned flags) // in: FNM_* flags
2165{
2166 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2167 (const unsigned char *)pcszName,
2168 flags)
2169 == FNM_MATCH)
2170 );
2171}
2172
2173/* ******************************************************************
2174 *
2175 * Fast string searches
2176 *
2177 ********************************************************************/
2178
2179#define ASSERT(a)
2180
2181/*
2182 * The following code has been taken from the "Standard
2183 * Function Library", file sflfind.c, and only slightly
2184 * modified to conform to the rest of this file.
2185 *
2186 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2187 * Revised: 98/05/04
2188 *
2189 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2190 *
2191 * The SFL Licence allows incorporating SFL code into other
2192 * programs, as long as the copyright is reprinted and the
2193 * code is marked as modified, so this is what we do.
2194 */
2195
2196/*
2197 *@@ strhmemfind:
2198 * searches for a pattern in a block of memory using the
2199 * Boyer-Moore-Horspool-Sunday algorithm.
2200 *
2201 * The block and pattern may contain any values; you must
2202 * explicitly provide their lengths. If you search for strings,
2203 * use strlen() on the buffers.
2204 *
2205 * Returns a pointer to the pattern if found within the block,
2206 * or NULL if the pattern was not found.
2207 *
2208 * This algorithm needs a "shift table" to cache data for the
2209 * search pattern. This table can be reused when performing
2210 * several searches with the same pattern.
2211 *
2212 * "shift" must point to an array big enough to hold 256 (8**2)
2213 * "size_t" values.
2214 *
2215 * If (*repeat_find == FALSE), the shift table is initialized.
2216 * So on the first search with a given pattern, *repeat_find
2217 * should be FALSE. This function sets it to TRUE after the
2218 * shift table is initialised, allowing the initialisation
2219 * phase to be skipped on subsequent searches.
2220 *
2221 * This function is most effective when repeated searches are
2222 * made for the same pattern in one or more large buffers.
2223 *
2224 * Example:
2225 *
2226 + PSZ pszHaystack = "This is a sample string.",
2227 + pszNeedle = "string";
2228 + size_t shift[256];
2229 + BOOL fRepeat = FALSE;
2230 +
2231 + PSZ pFound = strhmemfind(pszHaystack,
2232 + strlen(pszHaystack), // block size
2233 + pszNeedle,
2234 + strlen(pszNeedle), // pattern size
2235 + shift,
2236 + &fRepeat);
2237 *
2238 * Taken from the "Standard Function Library", file sflfind.c.
2239 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2240 * Slightly modified by umoeller.
2241 *
2242 *@@added V0.9.3 (2000-05-08) [umoeller]
2243 */
2244
2245void* strhmemfind(const void *in_block, // in: block containing data
2246 size_t block_size, // in: size of block in bytes
2247 const void *in_pattern, // in: pattern to search for
2248 size_t pattern_size, // in: size of pattern block
2249 size_t *shift, // in/out: shift table (search buffer)
2250 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2251{
2252 size_t byte_nbr, // Distance through block
2253 match_size; // Size of matched part
2254 const unsigned char
2255 *match_base = NULL, // Base of match of pattern
2256 *match_ptr = NULL, // Point within current match
2257 *limit = NULL; // Last potiental match point
2258 const unsigned char
2259 *block = (unsigned char *) in_block, // Concrete pointer to block data
2260 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2261
2262 if ( (block == NULL)
2263 || (pattern == NULL)
2264 || (shift == NULL)
2265 )
2266 return (NULL);
2267
2268 // Pattern must be smaller or equal in size to string
2269 if (block_size < pattern_size)
2270 return (NULL); // Otherwise it's not found
2271
2272 if (pattern_size == 0) // Empty patterns match at start
2273 return ((void *)block);
2274
2275 // Build the shift table unless we're continuing a previous search
2276
2277 // The shift table determines how far to shift before trying to match
2278 // again, if a match at this point fails. If the byte after where the
2279 // end of our pattern falls is not in our pattern, then we start to
2280 // match again after that byte; otherwise we line up the last occurence
2281 // of that byte in our pattern under that byte, and try match again.
2282
2283 if (!repeat_find || !*repeat_find)
2284 {
2285 for (byte_nbr = 0;
2286 byte_nbr < 256;
2287 byte_nbr++)
2288 shift[byte_nbr] = pattern_size + 1;
2289 for (byte_nbr = 0;
2290 byte_nbr < pattern_size;
2291 byte_nbr++)
2292 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2293
2294 if (repeat_find)
2295 *repeat_find = TRUE;
2296 }
2297
2298 // Search for the block, each time jumping up by the amount
2299 // computed in the shift table
2300
2301 limit = block + (block_size - pattern_size + 1);
2302 ASSERT (limit > block);
2303
2304 for (match_base = block;
2305 match_base < limit;
2306 match_base += shift[*(match_base + pattern_size)])
2307 {
2308 match_ptr = match_base;
2309 match_size = 0;
2310
2311 // Compare pattern until it all matches, or we find a difference
2312 while (*match_ptr++ == pattern[match_size++])
2313 {
2314 ASSERT (match_size <= pattern_size &&
2315 match_ptr == (match_base + match_size));
2316
2317 // If we found a match, return the start address
2318 if (match_size >= pattern_size)
2319 return ((void*)(match_base));
2320
2321 }
2322 }
2323 return (NULL); // Found nothing
2324}
2325
2326/*
2327 *@@ strhtxtfind:
2328 * searches for a case-insensitive text pattern in a string
2329 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2330 * pattern are null-terminated strings. Returns a pointer to the pattern
2331 * if found within the string, or NULL if the pattern was not found.
2332 * Will match strings irrespective of case. To match exact strings, use
2333 * strhfind(). Will not work on multibyte characters.
2334 *
2335 * Examples:
2336 + char *result;
2337 +
2338 + result = strhtxtfind ("AbracaDabra", "cad");
2339 + if (result)
2340 + puts (result);
2341 +
2342 * Taken from the "Standard Function Library", file sflfind.c.
2343 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2344 * Slightly modified.
2345 *
2346 *@@added V0.9.3 (2000-05-08) [umoeller]
2347 */
2348
2349char* strhtxtfind (const char *string, // String containing data
2350 const char *pattern) // Pattern to search for
2351{
2352 size_t
2353 shift [256]; // Shift distance for each value
2354 size_t
2355 string_size,
2356 pattern_size,
2357 byte_nbr, // Index into byte array
2358 match_size; // Size of matched part
2359 const char
2360 *match_base = NULL, // Base of match of pattern
2361 *match_ptr = NULL, // Point within current match
2362 *limit = NULL; // Last potiental match point
2363
2364 ASSERT (string); // Expect non-NULL pointers, but
2365 ASSERT (pattern); // fail gracefully if not debugging
2366 if (string == NULL || pattern == NULL)
2367 return (NULL);
2368
2369 string_size = strlen (string);
2370 pattern_size = strlen (pattern);
2371
2372 // Pattern must be smaller or equal in size to string
2373 if (string_size < pattern_size)
2374 return (NULL); // Otherwise it cannot be found
2375
2376 if (pattern_size == 0) // Empty string matches at start
2377 return (char *) string;
2378
2379 // Build the shift table
2380
2381 // The shift table determines how far to shift before trying to match
2382 // again, if a match at this point fails. If the byte after where the
2383 // end of our pattern falls is not in our pattern, then we start to
2384 // match again after that byte; otherwise we line up the last occurence
2385 // of that byte in our pattern under that byte, and try match again.
2386
2387 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2388 shift [byte_nbr] = pattern_size + 1;
2389
2390 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2391 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2392
2393 // Search for the string. If we don't find a match, move up by the
2394 // amount we computed in the shift table above, to find location of
2395 // the next potiental match.
2396
2397 limit = string + (string_size - pattern_size + 1);
2398 ASSERT (limit > string);
2399
2400 for (match_base = string;
2401 match_base < limit;
2402 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2403 {
2404 match_ptr = match_base;
2405 match_size = 0;
2406
2407 // Compare pattern until it all matches, or we find a difference
2408 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2409 {
2410 ASSERT (match_size <= pattern_size &&
2411 match_ptr == (match_base + match_size));
2412
2413 // If we found a match, return the start address
2414 if (match_size >= pattern_size)
2415 return ((char *)(match_base));
2416 }
2417 }
2418 return (NULL); // Found nothing
2419}
2420
Note: See TracBrowser for help on using the repository browser.