source: trunk/src/helpers/stringh.c@ 124

Last change on this file since 124 was 123, checked in by umoeller, 24 years ago

Lots of changes for icons and refresh.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 76.7 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#define INCL_DOSERRORS
45#include <os2.h>
46
47#include <stdlib.h>
48#include <stdio.h>
49#include <string.h>
50#include <ctype.h>
51#include <math.h>
52
53#include "setup.h" // code generation and debugging options
54
55#define DONT_REPLACE_STRINGH_MALLOC
56#include "helpers\stringh.h"
57#include "helpers\xstring.h" // extended string helpers
58
59#pragma hdrstop
60
61/*
62 *@@category: Helpers\C helpers\String management
63 * See stringh.c and xstring.c.
64 */
65
66/*
67 *@@category: Helpers\C helpers\String management\C string helpers
68 * See stringh.c.
69 */
70
71#ifdef __DEBUG_MALLOC_ENABLED__
72
73/*
74 *@@ strhStoreDebug:
75 * memory debug version of strhStore.
76 *
77 *@@added V0.9.16 (2001-12-08) [umoeller]
78 */
79
80APIRET strhStoreDebug(PSZ *ppszTarget,
81 PCSZ pcszSource,
82 PULONG pulLength, // out: length of new string (ptr can be NULL)
83 const char *pcszSourceFile,
84 unsigned long ulLine,
85 const char *pcszFunction)
86{
87 ULONG ulLength = 0;
88
89 if (ppszTarget)
90 {
91 if (*ppszTarget)
92 free(*ppszTarget);
93
94 if ( (pcszSource)
95 && (ulLength = strlen(pcszSource))
96 )
97 {
98 if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
99 pcszSourceFile,
100 ulLine,
101 pcszFunction))
102 memcpy(*ppszTarget, pcszSource, ulLength + 1);
103 else
104 return (ERROR_NOT_ENOUGH_MEMORY);
105 }
106 else
107 *ppszTarget = NULL;
108 }
109
110 if (pulLength)
111 *pulLength = ulLength;
112
113 return (NO_ERROR);
114}
115
116#endif
117
118/*
119 *@@ strhStore:
120 * stores a copy of the given string in the specified
121 * buffer. Uses strdup internally.
122 *
123 * If *ppszTarget != NULL, the previous string is freed
124 * and set to NULL.
125 * If pcszSource != NULL, a copy of it is stored in the
126 * buffer.
127 *
128 *@@added V0.9.16 (2001-12-06) [umoeller]
129 */
130
131APIRET strhStore(PSZ *ppszTarget,
132 PCSZ pcszSource,
133 PULONG pulLength) // out: length of new string (ptr can be NULL)
134{
135 ULONG ulLength = 0;
136
137 if (ppszTarget)
138 {
139 if (*ppszTarget)
140 free(*ppszTarget);
141
142 if ( (pcszSource)
143 && (ulLength = strlen(pcszSource))
144 )
145 {
146 if (*ppszTarget = (PSZ)malloc(ulLength + 1))
147 memcpy(*ppszTarget, pcszSource, ulLength + 1);
148 else
149 return (ERROR_NOT_ENOUGH_MEMORY);
150 }
151 else
152 *ppszTarget = NULL;
153 }
154
155 if (pulLength)
156 *pulLength = ulLength;
157
158 return (NO_ERROR);
159}
160
161/*
162 *@@ strhcpy:
163 * like strdup, but this one doesn't crash if string2 is NULL,
164 * but sets the first byte in string1 to \0 instead.
165 *
166 *@@added V0.9.14 (2001-08-01) [umoeller]
167 */
168
169PSZ strhcpy(PSZ string1, const char *string2)
170{
171 if (string2)
172 return (strcpy(string1, string2));
173
174 *string1 = '\0';
175 return (string1);
176}
177
178#ifdef __DEBUG_MALLOC_ENABLED__
179
180/*
181 *@@ strhdupDebug:
182 * memory debug version of strhdup.
183 *
184 *@@added V0.9.0 [umoeller]
185 */
186
187PSZ strhdupDebug(const char *pcszSource,
188 unsigned long *pulLength,
189 const char *pcszSourceFile,
190 unsigned long ulLine,
191 const char *pcszFunction)
192{
193 PSZ pszReturn = NULL;
194 ULONG ulLength = 0;
195
196 if ( (pcszSource)
197 && (ulLength = strlen(pcszSource))
198 )
199 {
200 if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
201 pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
202 ulLine,
203 pcszFunction))
204 memcpy(pszReturn, pcszSource, ulLength + 1);
205 }
206
207 if (pulLength)
208 *pulLength = ulLength;
209
210 return (pszReturn);
211}
212
213#endif // __DEBUG_MALLOC_ENABLED__
214
215/*
216 *@@ strhdup:
217 * like strdup, but this one doesn't crash if pszSource
218 * is NULL, but returns NULL also. In addition, this
219 * can report the length of the string (V0.9.16).
220 *
221 *@@added V0.9.0 [umoeller]
222 *@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
223 */
224
225PSZ strhdup(const char *pcszSource,
226 unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
227{
228 PSZ pszReturn = NULL;
229 ULONG ulLength = 0;
230
231 if ( (pcszSource)
232 && (ulLength = strlen(pcszSource))
233 )
234 {
235 if (pszReturn = (PSZ)malloc(ulLength + 1))
236 memcpy(pszReturn, pcszSource, ulLength + 1);
237 }
238
239 if (pulLength)
240 *pulLength = ulLength;
241
242 return (pszReturn);
243}
244
245/*
246 *@@ strhcmp:
247 * better strcmp. This doesn't crash if any of the
248 * string pointers are NULL, but returns a proper
249 * value then.
250 *
251 * Besides, this is guaranteed to only return -1, 0,
252 * or +1, while strcmp can return any positive or
253 * negative value. This is useful for tree comparison
254 * funcs.
255 *
256 *@@added V0.9.9 (2001-02-16) [umoeller]
257 */
258
259int strhcmp(const char *p1, const char *p2)
260{
261 if (p1 && p2)
262 {
263 int i = strcmp(p1, p2);
264 if (i < 0) return (-1);
265 if (i > 0) return (+1);
266 }
267 else if (p1)
268 // but p2 is NULL: p1 greater than p2 then
269 return (+1);
270 else if (p2)
271 // but p1 is NULL: p1 less than p2 then
272 return (-1);
273
274 // return 0 if strcmp returned 0 above or both strings are NULL
275 return (0);
276}
277
278/*
279 *@@ strhicmp:
280 * like strhcmp, but compares without respect
281 * to case.
282 *
283 *@@added V0.9.9 (2001-04-07) [umoeller]
284 */
285
286int strhicmp(const char *p1, const char *p2)
287{
288 if (p1 && p2)
289 {
290 int i = stricmp(p1, p2);
291 if (i < 0) return (-1);
292 if (i > 0) return (+1);
293 }
294 else if (p1)
295 // but p2 is NULL: p1 greater than p2 then
296 return (+1);
297 else if (p2)
298 // but p1 is NULL: p1 less than p2 then
299 return (-1);
300
301 // return 0 if strcmp returned 0 above or both strings are NULL
302 return (0);
303}
304
305/*
306 *@@ strhistr:
307 * like strstr, but case-insensitive.
308 *
309 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
310 */
311
312PSZ strhistr(const char *string1, const char *string2)
313{
314 PSZ prc = NULL;
315
316 if ((string1) && (string2))
317 {
318 PSZ pszSrchIn = strdup(string1);
319 PSZ pszSrchFor = strdup(string2);
320
321 if ((pszSrchIn) && (pszSrchFor))
322 {
323 strupr(pszSrchIn);
324 strupr(pszSrchFor);
325
326 prc = strstr(pszSrchIn, pszSrchFor);
327 if (prc)
328 {
329 // prc now has the first occurence of the string,
330 // but in pszSrchIn; we need to map this
331 // return value to the original string
332 prc = (prc-pszSrchIn) // offset in pszSrchIn
333 + (PSZ)string1;
334 }
335 }
336 if (pszSrchFor)
337 free(pszSrchFor);
338 if (pszSrchIn)
339 free(pszSrchIn);
340 }
341 return (prc);
342}
343
344/*
345 *@@ strhncpy0:
346 * like strncpy, but always appends a 0 character.
347 */
348
349ULONG strhncpy0(PSZ pszTarget,
350 const char *pszSource,
351 ULONG cbSource)
352{
353 ULONG ul = 0;
354 PSZ pTarget = pszTarget,
355 pSource = (PSZ)pszSource;
356
357 for (ul = 0; ul < cbSource; ul++)
358 if (*pSource)
359 *pTarget++ = *pSource++;
360 else
361 break;
362 *pTarget = 0;
363
364 return (ul);
365}
366
367/*
368 * strhCount:
369 * this counts the occurences of c in pszSearch.
370 */
371
372ULONG strhCount(const char *pszSearch,
373 CHAR c)
374{
375 PSZ p = (PSZ)pszSearch;
376 ULONG ulCount = 0;
377 while (TRUE)
378 {
379 p = strchr(p, c);
380 if (p)
381 {
382 ulCount++;
383 p++;
384 }
385 else
386 break;
387 }
388 return (ulCount);
389}
390
391/*
392 *@@ strhIsDecimal:
393 * returns TRUE if psz consists of decimal digits only.
394 */
395
396BOOL strhIsDecimal(PSZ psz)
397{
398 PSZ p = psz;
399 while (*p != 0)
400 {
401 if (isdigit(*p) == 0)
402 return (FALSE);
403 p++;
404 }
405
406 return (TRUE);
407}
408
409#ifdef __DEBUG_MALLOC_ENABLED__
410
411/*
412 *@@ strhSubstrDebug:
413 * memory debug version of strhSubstr.
414 *
415 *@@added V0.9.14 (2001-08-01) [umoeller]
416 */
417
418PSZ strhSubstrDebug(const char *pBegin, // in: first char
419 const char *pEnd, // in: last char (not included)
420 const char *pcszSourceFile,
421 unsigned long ulLine,
422 const char *pcszFunction)
423{
424 PSZ pszSubstr = NULL;
425
426 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
427 {
428 ULONG cbSubstr = (pEnd - pBegin);
429 if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
430 pcszSourceFile,
431 ulLine,
432 pcszFunction))
433 {
434 // strhncpy0(pszSubstr, pBegin, cbSubstr);
435 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
436 *(pszSubstr + cbSubstr) = '\0';
437 }
438 }
439
440 return (pszSubstr);
441}
442
443#endif // __DEBUG_MALLOC_ENABLED__
444
445/*
446 *@@ strhSubstr:
447 * this creates a new PSZ containing the string
448 * from pBegin to pEnd, excluding the pEnd character.
449 * The new string is null-terminated. The caller
450 * must free() the new string after use.
451 *
452 * Example:
453 + "1234567890"
454 + ^ ^
455 + p1 p2
456 + strhSubstr(p1, p2)
457 * would return a new string containing "2345678".
458 *
459 *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
460 *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
461 */
462
463PSZ strhSubstr(const char *pBegin, // in: first char
464 const char *pEnd) // in: last char (not included)
465{
466 PSZ pszSubstr = NULL;
467
468 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
469 {
470 ULONG cbSubstr = (pEnd - pBegin);
471 if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
472 {
473 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
474 *(pszSubstr + cbSubstr) = '\0';
475 }
476 }
477
478 return (pszSubstr);
479}
480
481/*
482 *@@ strhExtract:
483 * searches pszBuf for the cOpen character and returns
484 * the data in between cOpen and cClose, excluding
485 * those two characters, in a newly allocated buffer
486 * which you must free() afterwards.
487 *
488 * Spaces and newlines/linefeeds are skipped.
489 *
490 * If the search was successful, the new buffer
491 * is returned and, if (ppEnd != NULL), *ppEnd points
492 * to the first character after the cClose character
493 * found in the buffer.
494 *
495 * If the search was not successful, NULL is
496 * returned, and *ppEnd is unchanged.
497 *
498 * If another cOpen character is found before
499 * cClose, matching cClose characters will be skipped.
500 * You can therefore nest the cOpen and cClose
501 * characters.
502 *
503 * This function ignores cOpen and cClose characters
504 * in C-style comments and strings surrounded by
505 * double quotes.
506 *
507 * Example:
508 + PSZ pszBuf = "KEYWORD { --blah-- } next",
509 + pEnd;
510 + strhExtract(pszBuf,
511 + '{', '}',
512 + &pEnd)
513 * would return a new buffer containing " --blah-- ",
514 * and ppEnd would afterwards point to the space
515 * before "next" in the static buffer.
516 *
517 *@@added V0.9.0 [umoeller]
518 */
519
520PSZ strhExtract(PSZ pszBuf, // in: search buffer
521 CHAR cOpen, // in: opening char
522 CHAR cClose, // in: closing char
523 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
524{
525 PSZ pszReturn = NULL;
526
527 if (pszBuf)
528 {
529 PSZ pOpen = strchr(pszBuf, cOpen);
530 if (pOpen)
531 {
532 // opening char found:
533 // now go thru the whole rest of the buffer
534 PSZ p = pOpen+1;
535 LONG lLevel = 1; // if this goes 0, we're done
536 while (*p)
537 {
538 if (*p == cOpen)
539 lLevel++;
540 else if (*p == cClose)
541 {
542 lLevel--;
543 if (lLevel <= 0)
544 {
545 // matching closing bracket found:
546 // extract string
547 pszReturn = strhSubstr(pOpen+1, // after cOpen
548 p); // excluding cClose
549 if (ppEnd)
550 *ppEnd = p+1;
551 break; // while (*p)
552 }
553 }
554 else if (*p == '\"')
555 {
556 // beginning of string:
557 PSZ p2 = p+1;
558 // find end of string
559 while ((*p2) && (*p2 != '\"'))
560 p2++;
561
562 if (*p2 == '\"')
563 // closing quote found:
564 // search on after that
565 p = p2; // raised below
566 else
567 break; // while (*p)
568 }
569
570 p++;
571 }
572 }
573 }
574
575 return (pszReturn);
576}
577
578/*
579 *@@ strhQuote:
580 * similar to strhExtract, except that
581 * opening and closing chars are the same,
582 * and therefore no nesting is possible.
583 * Useful for extracting stuff between
584 * quotes.
585 *
586 *@@added V0.9.0 [umoeller]
587 */
588
589PSZ strhQuote(PSZ pszBuf,
590 CHAR cQuote,
591 PSZ *ppEnd)
592{
593 PSZ pszReturn = NULL,
594 p1 = NULL;
595 if ((p1 = strchr(pszBuf, cQuote)))
596 {
597 PSZ p2 = strchr(p1+1, cQuote);
598 if (p2)
599 {
600 pszReturn = strhSubstr(p1+1, p2);
601 if (ppEnd)
602 // store closing char
603 *ppEnd = p2 + 1;
604 }
605 }
606
607 return (pszReturn);
608}
609
610/*
611 *@@ strhStrip:
612 * removes all double spaces.
613 * This copies within the "psz" buffer.
614 * If any double spaces are found, the
615 * string will be shorter than before,
616 * but the buffer is _not_ reallocated,
617 * so there will be unused bytes at the
618 * end.
619 *
620 * Returns the number of spaces removed.
621 *
622 *@@added V0.9.0 [umoeller]
623 */
624
625ULONG strhStrip(PSZ psz) // in/out: string
626{
627 PSZ p;
628 ULONG cb = strlen(psz),
629 ulrc = 0;
630
631 for (p = psz; p < psz+cb; p++)
632 {
633 if ((*p == ' ') && (*(p+1) == ' '))
634 {
635 PSZ p2 = p;
636 while (*p2)
637 {
638 *p2 = *(p2+1);
639 p2++;
640 }
641 cb--;
642 p--;
643 ulrc++;
644 }
645 }
646 return (ulrc);
647}
648
649/*
650 *@@ strhins:
651 * this inserts one string into another.
652 *
653 * pszInsert is inserted into pszBuffer at offset
654 * ulInsertOfs (which counts from 0).
655 *
656 * A newly allocated string is returned. pszBuffer is
657 * not changed. The new string should be free()'d after
658 * use.
659 *
660 * Upon errors, NULL is returned.
661 *
662 *@@changed V0.9.0 [umoeller]: completely rewritten.
663 */
664
665PSZ strhins(const char *pcszBuffer,
666 ULONG ulInsertOfs,
667 const char *pcszInsert)
668{
669 PSZ pszNew = NULL;
670
671 if ((pcszBuffer) && (pcszInsert))
672 {
673 do {
674 ULONG cbBuffer = strlen(pcszBuffer);
675 ULONG cbInsert = strlen(pcszInsert);
676
677 // check string length
678 if (ulInsertOfs > cbBuffer + 1)
679 break; // do
680
681 // OK, let's go.
682 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
683
684 // copy stuff before pInsertPos
685 memcpy(pszNew,
686 pcszBuffer,
687 ulInsertOfs);
688 // copy string to be inserted
689 memcpy(pszNew + ulInsertOfs,
690 pcszInsert,
691 cbInsert);
692 // copy stuff after pInsertPos
693 strcpy(pszNew + ulInsertOfs + cbInsert,
694 pcszBuffer + ulInsertOfs);
695 } while (FALSE);
696 }
697
698 return (pszNew);
699}
700
701/*
702 *@@ strhFindReplace:
703 * wrapper around xstrFindReplace to work with C strings.
704 * Note that *ppszBuf can get reallocated and must
705 * be free()'able.
706 *
707 * Repetitive use of this wrapper is not recommended
708 * because it is considerably slower than xstrFindReplace.
709 *
710 *@@added V0.9.6 (2000-11-01) [umoeller]
711 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
712 */
713
714ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
715 PULONG pulOfs, // in: where to begin search (0 = start);
716 // out: ofs of first char after replacement string
717 const char *pcszSearch, // in: search string; cannot be NULL
718 const char *pcszReplace) // in: replacement string; cannot be NULL
719{
720 ULONG ulrc = 0;
721 XSTRING xstrBuf,
722 xstrFind,
723 xstrReplace;
724 size_t ShiftTable[256];
725 BOOL fRepeat = FALSE;
726 xstrInitSet(&xstrBuf, *ppszBuf);
727 // reallocated and returned, so we're safe
728 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
729 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
730 // these two are never freed, so we're safe too
731
732 if ((ulrc = xstrFindReplace(&xstrBuf,
733 pulOfs,
734 &xstrFind,
735 &xstrReplace,
736 ShiftTable,
737 &fRepeat)))
738 // replaced:
739 *ppszBuf = xstrBuf.psz;
740
741 return (ulrc);
742}
743
744/*
745 * strhWords:
746 * returns the no. of words in "psz".
747 * A string is considered a "word" if
748 * it is surrounded by spaces only.
749 *
750 *@@added V0.9.0 [umoeller]
751 */
752
753ULONG strhWords(PSZ psz)
754{
755 PSZ p;
756 ULONG cb = strlen(psz),
757 ulWords = 0;
758 if (cb > 1)
759 {
760 ulWords = 1;
761 for (p = psz; p < psz+cb; p++)
762 if (*p == ' ')
763 ulWords++;
764 }
765 return (ulWords);
766}
767
768/*
769 *@@ strhGetWord:
770 * finds word boundaries.
771 *
772 * *ppszStart is used as the beginning of the
773 * search.
774 *
775 * If a word is found, *ppszStart is set to
776 * the first character of the word which was
777 * found and *ppszEnd receives the address
778 * of the first character _after_ the word,
779 * which is probably a space or a \n or \r char.
780 * We then return TRUE.
781 *
782 * The search is stopped if a null character
783 * is found or pLimit is reached. In that case,
784 * FALSE is returned.
785 *
786 *@@added V0.9.1 (2000-02-13) [umoeller]
787 */
788
789BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
790 // out: start of word (if TRUE is returned)
791 const char *pLimit, // in: ptr to last char after *ppszStart to be
792 // searched; if the word does not end before
793 // or with this char, FALSE is returned
794 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
795 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
796 PSZ *ppszEnd) // out: first char _after_ word
797 // (if TRUE is returned)
798{
799 // characters after which a word can be started
800 // const char *pcszBeginChars = "\x0d\x0a ";
801 // const char *pcszEndChars = "\x0d\x0a /-";
802
803 PSZ pStart = *ppszStart;
804
805 // find start of word
806 while ( (pStart < (PSZ)pLimit)
807 && (strchr(pcszBeginChars, *pStart))
808 )
809 // if char is a "before word" char: go for next
810 pStart++;
811
812 if (pStart < (PSZ)pLimit)
813 {
814 // found a valid "word start" character
815 // (which is not in pcszBeginChars):
816
817 // find end of word
818 PSZ pEndOfWord = pStart;
819 while ( (pEndOfWord <= (PSZ)pLimit)
820 && (strchr(pcszEndChars, *pEndOfWord) == 0)
821 )
822 // if char is not an "end word" char: go for next
823 pEndOfWord++;
824
825 if (pEndOfWord <= (PSZ)pLimit)
826 {
827 // whoa, got a word:
828 *ppszStart = pStart;
829 *ppszEnd = pEndOfWord;
830 return (TRUE);
831 }
832 }
833
834 return (FALSE);
835}
836
837/*
838 *@@ strhIsWord:
839 * returns TRUE if p points to a "word"
840 * in pcszBuf.
841 *
842 * p is considered a word if the character _before_
843 * it is in pcszBeginChars and the char _after_
844 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
845 *
846 *@@added V0.9.6 (2000-11-12) [umoeller]
847 */
848
849BOOL strhIsWord(const char *pcszBuf,
850 const char *p, // in: start of word
851 ULONG cbSearch, // in: length of word
852 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
853 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
854{
855 BOOL fEndOK = FALSE;
856
857 // check previous char
858 if ( (p == pcszBuf)
859 || (strchr(pcszBeginChars, *(p-1)))
860 )
861 {
862 // OK, valid begin char:
863 // check end char
864 CHAR cNextChar = *(p + cbSearch);
865 if (cNextChar == 0)
866 fEndOK = TRUE;
867 else
868 {
869 char *pc = strchr(pcszEndChars, cNextChar);
870 if (pc)
871 // OK, is end char: avoid doubles of that char,
872 // but allow spaces
873 if ( (cNextChar+1 != *pc)
874 || (cNextChar+1 == ' ')
875 || (cNextChar+1 == 0)
876 )
877 fEndOK = TRUE;
878 }
879 }
880
881 return (fEndOK);
882}
883
884/*
885 *@@ strhFindWord:
886 * searches for pszSearch in pszBuf, which is
887 * returned if found (or NULL if not).
888 *
889 * As opposed to strstr, this finds pszSearch
890 * only if it is a "word". A search string is
891 * considered a word if the character _before_
892 * it is in pcszBeginChars and the char _after_
893 * it is in pcszEndChars.
894 *
895 * Example:
896 + strhFindWord("This is an example.", "is");
897 + returns ...........^ this, but not the "is" in "This".
898 *
899 * The algorithm here uses strstr to find pszSearch in pszBuf
900 * and performs additional "is-word" checks for each item found
901 * (by calling strhIsWord).
902 *
903 * Note that this function is fairly slow compared to xstrFindWord.
904 *
905 *@@added V0.9.0 (99-11-08) [umoeller]
906 *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
907 */
908
909PSZ strhFindWord(const char *pszBuf,
910 const char *pszSearch,
911 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
912 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
913{
914 PSZ pszReturn = 0;
915 ULONG cbBuf = strlen(pszBuf),
916 cbSearch = strlen(pszSearch);
917
918 if ((cbBuf) && (cbSearch))
919 {
920 const char *p = pszBuf;
921
922 do // while p
923 {
924 p = strstr(p, pszSearch);
925 if (p)
926 {
927 // string found:
928 // check if that's a word
929
930 if (strhIsWord(pszBuf,
931 p,
932 cbSearch,
933 pcszBeginChars,
934 pcszEndChars))
935 {
936 // valid end char:
937 pszReturn = (PSZ)p;
938 break;
939 }
940
941 p += cbSearch;
942 }
943 } while (p);
944
945 }
946 return (pszReturn);
947}
948
949/*
950 *@@ strhFindEOL:
951 * returns a pointer to the next \r, \n or null character
952 * following pszSearchIn. Stores the offset in *pulOffset.
953 *
954 * This should never return NULL because at some point,
955 * there will be a null byte in your string.
956 *
957 *@@added V0.9.4 (2000-07-01) [umoeller]
958 */
959
960PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
961 PULONG pulOffset) // out: offset (ptr can be NULL)
962{
963 const char *p = pcszSearchIn,
964 *prc = 0;
965 while (TRUE)
966 {
967 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
968 {
969 prc = p;
970 break;
971 }
972 p++;
973 }
974
975 if ((pulOffset) && (prc))
976 *pulOffset = prc - pcszSearchIn;
977
978 return ((PSZ)prc);
979}
980
981/*
982 *@@ strhFindNextLine:
983 * like strhFindEOL, but this returns the character
984 * _after_ \r or \n. Note that this might return
985 * a pointer to terminating NULL character also.
986 */
987
988PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
989{
990 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
991 // pEOL now points to the \r char or the terminating 0 byte;
992 // if not null byte, advance pointer
993 PSZ pNextLine = pEOL;
994 if (*pNextLine == '\r')
995 pNextLine++;
996 if (*pNextLine == '\n')
997 pNextLine++;
998 if (pulOffset)
999 *pulOffset = pNextLine - pszSearchIn;
1000 return (pNextLine);
1001}
1002
1003/*
1004 *@@ strhBeautifyTitle:
1005 * replaces all line breaks (0xd, 0xa) with spaces.
1006 *
1007 *@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1008 */
1009
1010BOOL strhBeautifyTitle(PSZ psz)
1011{
1012 BOOL rc = FALSE;
1013 CHAR *p = psz;
1014
1015 while(*p)
1016 if ( (*p == '\r')
1017 || (*p == '\n')
1018 )
1019 {
1020 rc = TRUE;
1021 if ( (p != psz)
1022 && (p[-1] == ' ')
1023 )
1024 memmove(p, p + 1, strlen(p));
1025 else
1026 *p++ = ' ';
1027 }
1028 else
1029 p++;
1030
1031 return (rc);
1032}
1033
1034/*
1035 * strhFindAttribValue:
1036 * searches for pszAttrib in pszSearchIn; if found,
1037 * returns the first character after the "=" char.
1038 * If "=" is not found, a space, \r, and \n are
1039 * also accepted. This function searches without
1040 * respecting case.
1041 *
1042 * <B>Example:</B>
1043 + strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1044 +
1045 + returns ....................... ^ this address.
1046 *
1047 *@@added V0.9.0 [umoeller]
1048 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1049 *@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1050 */
1051
1052PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1053{
1054 PSZ prc = 0;
1055 PSZ pszSearchIn2, p;
1056 ULONG cbAttrib = strlen(pszAttrib),
1057 ulLength = strlen(pszSearchIn);
1058
1059 // use alloca(), so memory is freed on function exit
1060 pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1061 memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1062
1063 // 1) find token, (space char, \n, \r, \t)
1064 p = strtok(pszSearchIn2, " \n\r\t");
1065 while (p)
1066 {
1067 CHAR c2;
1068 PSZ pOrig;
1069
1070 // check tag name
1071 if (!strnicmp(p, pszAttrib, cbAttrib))
1072 {
1073 // position in original string
1074 pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1075
1076 // yes:
1077 prc = pOrig + cbAttrib;
1078 c2 = *prc;
1079 while ( ( (c2 == ' ')
1080 || (c2 == '=')
1081 || (c2 == '\n')
1082 || (c2 == '\r')
1083 )
1084 && (c2 != 0)
1085 )
1086 c2 = *++prc;
1087
1088 break;
1089 }
1090
1091 p = strtok(NULL, " \n\r\t");
1092 }
1093
1094 return (prc);
1095}
1096
1097/* PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1098{
1099 PSZ prc = 0;
1100 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1101 p,
1102 p2;
1103 ULONG cbAttrib = strlen(pszAttrib);
1104
1105 // 1) find space char
1106 while ((p = strchr(pszSearchIn2, ' ')))
1107 {
1108 CHAR c;
1109 p++;
1110 if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1111 {
1112 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1113 // now check whether the p+strlen(pszAttrib)
1114 // is a valid end-of-tag character
1115 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1116 && ( (c == ' ')
1117 || (c == '>')
1118 || (c == '=')
1119 || (c == '\r')
1120 || (c == '\n')
1121 || (c == 0)
1122 )
1123 )
1124 {
1125 // yes:
1126 CHAR c2;
1127 p2 = p + cbAttrib;
1128 c2 = *p2;
1129 while ( ( (c2 == ' ')
1130 || (c2 == '=')
1131 || (c2 == '\n')
1132 || (c2 == '\r')
1133 )
1134 && (c2 != 0)
1135 )
1136 c2 = *++p2;
1137
1138 prc = p2;
1139 break; // first while
1140 }
1141 }
1142 else
1143 break;
1144
1145 pszSearchIn2++;
1146 }
1147 return (prc);
1148} */
1149
1150/*
1151 * strhGetNumAttribValue:
1152 * stores the numerical parameter value of an HTML-style
1153 * tag in *pl.
1154 *
1155 * Returns the address of the tag parameter in the
1156 * search buffer, if found, or NULL.
1157 *
1158 * <B>Example:</B>
1159 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1160 *
1161 * stores 123 in the "l" variable.
1162 *
1163 *@@added V0.9.0 [umoeller]
1164 *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1165 */
1166
1167PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1168 const char *pszTag, // e.g. "INDEX"
1169 PLONG pl) // out: numerical value
1170{
1171 PSZ pParam;
1172 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1173 {
1174 if ( (*pParam == '\"')
1175 || (*pParam == '\'')
1176 )
1177 pParam++; // V0.9.9 (2001-04-04) [umoeller]
1178
1179 sscanf(pParam, "%ld", pl);
1180 }
1181
1182 return (pParam);
1183}
1184
1185/*
1186 * strhGetTextAttr:
1187 * retrieves the attribute value of a textual HTML-style tag
1188 * in a newly allocated buffer, which is returned,
1189 * or NULL if attribute not found.
1190 * If an attribute value is to contain spaces, it
1191 * must be enclosed in quotes.
1192 *
1193 * The offset of the attribute data in pszSearchIn is
1194 * returned in *pulOffset so that you can do multiple
1195 * searches.
1196 *
1197 * This returns a new buffer, which should be free()'d after use.
1198 *
1199 * <B>Example:</B>
1200 + ULONG ulOfs = 0;
1201 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1202 + ............^ ulOfs
1203 *
1204 * returns a new string with the value "blublub" (without
1205 * quotes) and sets ulOfs to 12.
1206 *
1207 *@@added V0.9.0 [umoeller]
1208 */
1209
1210PSZ strhGetTextAttr(const char *pszSearchIn,
1211 const char *pszTag,
1212 PULONG pulOffset) // out: offset where found
1213{
1214 PSZ pParam,
1215 pParam2,
1216 prc = NULL;
1217 ULONG ulCount = 0;
1218 LONG lNestingLevel = 0;
1219
1220 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1221 {
1222 // determine end character to search for: a space
1223 CHAR cEnd = ' ';
1224 if (*pParam == '\"')
1225 {
1226 // or, if the data is enclosed in quotes, a quote
1227 cEnd = '\"';
1228 pParam++;
1229 }
1230
1231 if (pulOffset)
1232 // store the offset
1233 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1234
1235 // now find end of attribute
1236 pParam2 = pParam;
1237 while (*pParam)
1238 {
1239 if (*pParam == cEnd)
1240 // end character found
1241 break;
1242 else if (*pParam == '<')
1243 // yet another opening tag found:
1244 // this is probably some "<" in the attributes
1245 lNestingLevel++;
1246 else if (*pParam == '>')
1247 {
1248 lNestingLevel--;
1249 if (lNestingLevel < 0)
1250 // end of tag found:
1251 break;
1252 }
1253 ulCount++;
1254 pParam++;
1255 }
1256
1257 // copy attribute to new buffer
1258 if (ulCount)
1259 {
1260 prc = (PSZ)malloc(ulCount+1);
1261 memcpy(prc, pParam2, ulCount);
1262 *(prc+ulCount) = 0;
1263 }
1264 }
1265 return (prc);
1266}
1267
1268/*
1269 * strhFindEndOfTag:
1270 * returns a pointer to the ">" char
1271 * which seems to terminate the tag beginning
1272 * after pszBeginOfTag.
1273 *
1274 * If additional "<" chars are found, we look
1275 * for additional ">" characters too.
1276 *
1277 * Note: You must pass the address of the opening
1278 * '<' character to this function.
1279 *
1280 * Example:
1281 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1282 + strhFindEndOfTag(pszTest)
1283 + returns.................................^ this.
1284 *
1285 *@@added V0.9.0 [umoeller]
1286 */
1287
1288PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1289{
1290 PSZ p = (PSZ)pszBeginOfTag,
1291 prc = NULL;
1292 LONG lNestingLevel = 0;
1293
1294 while (*p)
1295 {
1296 if (*p == '<')
1297 // another opening tag found:
1298 lNestingLevel++;
1299 else if (*p == '>')
1300 {
1301 // closing tag found:
1302 lNestingLevel--;
1303 if (lNestingLevel < 1)
1304 {
1305 // corresponding: return this
1306 prc = p;
1307 break;
1308 }
1309 }
1310 p++;
1311 }
1312
1313 return (prc);
1314}
1315
1316/*
1317 * strhGetBlock:
1318 * this complex function searches the given string
1319 * for a pair of opening/closing HTML-style tags.
1320 *
1321 * If found, this routine returns TRUE and does
1322 * the following:
1323 *
1324 * 1) allocate a new buffer, copy the text
1325 * enclosed by the opening/closing tags
1326 * into it and set *ppszBlock to that
1327 * buffer;
1328 *
1329 * 2) if the opening tag has any attributes,
1330 * allocate another buffer, copy the
1331 * attributes into it and set *ppszAttrs
1332 * to that buffer; if no attributes are
1333 * found, *ppszAttrs will be NULL;
1334 *
1335 * 3) set *pulOffset to the offset from the
1336 * beginning of *ppszSearchIn where the
1337 * opening tag was found;
1338 *
1339 * 4) advance *ppszSearchIn to after the
1340 * closing tag, so that you can do
1341 * multiple searches without finding the
1342 * same tags twice.
1343 *
1344 * All buffers should be freed using free().
1345 *
1346 * This returns the following:
1347 * -- 0: no error
1348 * -- 1: tag not found at all (doesn't have to be an error)
1349 * -- 2: begin tag found, but no corresponding end tag found. This
1350 * is a real error.
1351 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1352 *
1353 * <B>Example:</B>
1354 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1355 + PSZ pszBlock, pszAttrs;
1356 + ULONG ulOfs;
1357 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1358 *
1359 * would do the following:
1360 *
1361 * 1) set pszBlock to a new string containing "This is page 1."
1362 * without quotes;
1363 *
1364 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1365 *
1366 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1367 *
1368 * 4) pSearch would be advanced to point to the "More text"
1369 * string in the original buffer.
1370 *
1371 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1372 * for HTML parsing.
1373 *
1374 *@@added V0.9.0 [umoeller]
1375 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1376 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1377 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1378 */
1379
1380ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1381 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1382 PSZ pszTag,
1383 PSZ *ppszBlock, // out: block enclosed by the tags
1384 PSZ *ppszAttribs, // out: attributes of the opening tag
1385 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1386 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1387{
1388 ULONG ulrc = 1;
1389 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1390 pszSearch2 = pszBeginTag,
1391 pszClosingTag;
1392 ULONG cbTag = strlen(pszTag);
1393
1394 // go thru the block and check all tags if it's the
1395 // begin tag we're looking for
1396 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1397 {
1398 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1399 // yes: stop
1400 break;
1401 else
1402 pszBeginTag++;
1403 }
1404
1405 if (pszBeginTag)
1406 {
1407 // we found <TAG>:
1408 ULONG ulNestingLevel = 0;
1409
1410 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1411 // strchr(pszBeginTag, '>');
1412 if (pszEndOfBeginTag)
1413 {
1414 // does the caller want the attributes?
1415 if (ppszAttribs)
1416 {
1417 // yes: then copy them
1418 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1419 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1420 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1421 // add terminating 0
1422 *(pszAttrs + ulAttrLen) = 0;
1423
1424 *ppszAttribs = pszAttrs;
1425 }
1426
1427 // output offset of where we found the begin tag
1428 if (pulOfsBeginTag)
1429 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1430
1431 // now find corresponding closing tag (e.g. "</BODY>"
1432 pszBeginTag = pszEndOfBeginTag+1;
1433 // now we're behind the '>' char of the opening tag
1434 // increase offset of that too
1435 if (pulOfsBeginBlock)
1436 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1437
1438 // find next closing tag;
1439 // for the first run, pszSearch2 points to right
1440 // after the '>' char of the opening tag
1441 pszSearch2 = pszBeginTag;
1442 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1443 && (pszClosingTag = strstr(pszSearch2, "<"))
1444 )
1445 {
1446 // if we have another opening tag before our closing
1447 // tag, we need to have several closing tags before
1448 // we're done
1449 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1450 ulNestingLevel++;
1451 else
1452 {
1453 // is this ours?
1454 if ( (*(pszClosingTag+1) == '/')
1455 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1456 )
1457 {
1458 // we've found a matching closing tag; is
1459 // it ours?
1460 if (ulNestingLevel == 0)
1461 {
1462 // our closing tag found:
1463 // allocate mem for a new buffer
1464 // and extract all the text between
1465 // open and closing tags to it
1466 ULONG ulLen = pszClosingTag - pszBeginTag;
1467 if (ppszBlock)
1468 {
1469 PSZ pNew = (PSZ)malloc(ulLen + 1);
1470 strhncpy0(pNew, pszBeginTag, ulLen);
1471 *ppszBlock = pNew;
1472 }
1473
1474 // raise search offset to after the closing tag
1475 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1476
1477 ulrc = 0;
1478
1479 break;
1480 } else
1481 // not our closing tag:
1482 ulNestingLevel--;
1483 }
1484 }
1485 // no matching closing tag: search on after that
1486 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1487 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1488
1489 if (!pszClosingTag)
1490 // no matching closing tag found:
1491 // return 2 (closing tag not found)
1492 ulrc = 2;
1493 } // end if (pszBeginTag)
1494 else
1495 // no matching ">" for opening tag found:
1496 ulrc = 3;
1497 }
1498
1499 return (ulrc);
1500}
1501
1502/* ******************************************************************
1503 *
1504 * Miscellaneous
1505 *
1506 ********************************************************************/
1507
1508/*
1509 *@@ strhArrayAppend:
1510 * this appends a string to a "string array".
1511 *
1512 * A string array is considered a sequence of
1513 * zero-terminated strings in memory. That is,
1514 * after each string's null-byte, the next
1515 * string comes up.
1516 *
1517 * This is useful for composing a single block
1518 * of memory from, say, list box entries, which
1519 * can then be written to OS2.INI in one flush.
1520 *
1521 * To append strings to such an array, call this
1522 * function for each string you wish to append.
1523 * This will re-allocate *ppszRoot with each call,
1524 * and update *pcbRoot, which then contains the
1525 * total size of all strings (including all null
1526 * terminators).
1527 *
1528 * Pass *pcbRoot to PrfSaveProfileData to have the
1529 * block saved.
1530 *
1531 * Note: On the first call, *ppszRoot and *pcbRoot
1532 * _must_ be both NULL, or this crashes.
1533 *
1534 *@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1535 */
1536
1537VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1538 const char *pcszNew, // in: string to append
1539 ULONG cbNew, // in: size of that string or 0 to run strlen() here
1540 PULONG pcbRoot) // in/out: size of array
1541{
1542 PSZ pszTemp;
1543
1544 if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1545 cbNew = strlen(pcszNew);
1546
1547 pszTemp = (PSZ)malloc(*pcbRoot
1548 + cbNew
1549 + 1); // two null bytes
1550 if (*ppszRoot)
1551 {
1552 // not first loop: copy old stuff
1553 memcpy(pszTemp,
1554 *ppszRoot,
1555 *pcbRoot);
1556 free(*ppszRoot);
1557 }
1558 // append new string
1559 strcpy(pszTemp + *pcbRoot,
1560 pcszNew);
1561 // update root
1562 *ppszRoot = pszTemp;
1563 // update length
1564 *pcbRoot += cbNew + 1;
1565}
1566
1567/*
1568 *@@ strhCreateDump:
1569 * this dumps a memory block into a string
1570 * and returns that string in a new buffer.
1571 *
1572 * You must free() the returned PSZ after use.
1573 *
1574 * The output looks like the following:
1575 *
1576 + 0000: FE FF 0E 02 90 00 00 00 ........
1577 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1578 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1579 *
1580 * Each line is terminated with a newline (\n)
1581 * character only.
1582 *
1583 *@@added V0.9.1 (2000-01-22) [umoeller]
1584 */
1585
1586PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1587 ULONG ulSize, // in: size of buffer
1588 ULONG ulIndent) // in: indentation of every line
1589{
1590 PSZ pszReturn = 0;
1591 XSTRING strReturn;
1592 CHAR szTemp[1000];
1593
1594 PBYTE pbCurrent = pb; // current byte
1595 ULONG ulCount = 0,
1596 ulCharsInLine = 0; // if this grows > 7, a new line is started
1597 CHAR szLine[400] = "",
1598 szAscii[30] = " "; // ASCII representation; filled for every line
1599 PSZ pszLine = szLine,
1600 pszAscii = szAscii;
1601
1602 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1603
1604 for (pbCurrent = pb;
1605 ulCount < ulSize;
1606 pbCurrent++, ulCount++)
1607 {
1608 if (ulCharsInLine == 0)
1609 {
1610 memset(szLine, ' ', ulIndent);
1611 pszLine += ulIndent;
1612 }
1613 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1614
1615 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1616 // printable character:
1617 *pszAscii = *pbCurrent;
1618 else
1619 *pszAscii = '.';
1620 pszAscii++;
1621
1622 ulCharsInLine++;
1623 if ( (ulCharsInLine > 7) // 8 bytes added?
1624 || (ulCount == ulSize-1) // end of buffer reached?
1625 )
1626 {
1627 // if we haven't had eight bytes yet,
1628 // fill buffer up to eight bytes with spaces
1629 ULONG ul2;
1630 for (ul2 = ulCharsInLine;
1631 ul2 < 8;
1632 ul2++)
1633 pszLine += sprintf(pszLine, " ");
1634
1635 sprintf(szTemp, "%04lX: %s %s\n",
1636 (ulCount & 0xFFFFFFF8), // offset in hex
1637 szLine, // bytes string
1638 szAscii); // ASCII string
1639 xstrcat(&strReturn, szTemp, 0);
1640
1641 // restart line buffer
1642 pszLine = szLine;
1643
1644 // clear ASCII buffer
1645 strcpy(szAscii, " ");
1646 pszAscii = szAscii;
1647
1648 // reset line counter
1649 ulCharsInLine = 0;
1650 }
1651 }
1652
1653 if (strReturn.cbAllocated)
1654 pszReturn = strReturn.psz;
1655
1656 return (pszReturn);
1657}
1658
1659/* ******************************************************************
1660 *
1661 * Wildcard matching
1662 *
1663 ********************************************************************/
1664
1665/*
1666 * The following code has been taken from "fnmatch.zip".
1667 *
1668 * (c) 1994-1996 by Eberhard Mattes.
1669 */
1670
1671/* In OS/2 and DOS styles, both / and \ separate components of a path.
1672 * This macro returns true iff C is a separator. */
1673
1674#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1675
1676
1677/* This macro returns true if C is at the end of a component of a
1678 * path. */
1679
1680#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1681
1682/*
1683 * skip_comp_os2:
1684 * Return a pointer to the next component of the path SRC, for OS/2
1685 * and DOS styles. When the end of the string is reached, a pointer
1686 * to the terminating null character is returned.
1687 *
1688 * (c) 1994-1996 by Eberhard Mattes.
1689 */
1690
1691static const unsigned char* skip_comp_os2(const unsigned char *src)
1692{
1693 /* Skip characters until hitting a separator or the end of the
1694 * string. */
1695
1696 while (!IS_OS2_COMP_END(*src))
1697 ++src;
1698
1699 /* Skip the separator if we hit a separator. */
1700
1701 if (*src != 0)
1702 ++src;
1703 return src;
1704}
1705
1706/*
1707 * has_colon:
1708 * returns true iff the path P contains a colon.
1709 *
1710 * (c) 1994-1996 by Eberhard Mattes.
1711 */
1712
1713static int has_colon(const unsigned char *p)
1714{
1715 while (*p != 0)
1716 if (*p == ':')
1717 return 1;
1718 else
1719 ++p;
1720 return 0;
1721}
1722
1723/*
1724 * match_comp_os2:
1725 * compares a single component (directory name or file name)
1726 * of the paths, for OS/2 and DOS styles. MASK and NAME point
1727 * into a component of the wildcard and the name to be checked,
1728 * respectively. Comparing stops at the next separator.
1729 * The FLAGS argument is the same as that of fnmatch().
1730 *
1731 * HAS_DOT is true if a dot is in the current component of NAME.
1732 * The number of dots is not restricted, even in DOS style.
1733 *
1734 * Returns FNM_MATCH iff MASK and NAME match.
1735 *
1736 * Note that this function is recursive.
1737 *
1738 * (c) 1994-1996 by Eberhard Mattes.
1739 */
1740
1741static int match_comp_os2(const unsigned char *mask,
1742 const unsigned char *name,
1743 unsigned flags,
1744 int has_dot)
1745{
1746 int rc;
1747
1748 for (;;)
1749 switch (*mask)
1750 {
1751 case 0:
1752
1753 /* There must be no extra characters at the end of NAME when
1754 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1755 * in that case, NAME may point to a separator. */
1756
1757 if (*name == 0)
1758 return FNM_MATCH;
1759 if ((flags & FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1760 return FNM_MATCH;
1761 return FNM_NOMATCH;
1762
1763 case '/':
1764 case '\\':
1765
1766 /* Separators match separators. */
1767
1768 if (IS_OS2_COMP_SEP(*name))
1769 return FNM_MATCH;
1770
1771 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1772 * is ignored at the end of NAME. */
1773
1774 if ((flags & FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1775 return FNM_MATCH;
1776
1777 /* Stop comparing at the separator. */
1778
1779 return FNM_NOMATCH;
1780
1781 case '?':
1782
1783 /* A question mark matches one character. It does not match
1784 * a dot. At the end of the component (and before a dot),
1785 * it also matches zero characters. */
1786
1787 if (*name != '.' && !IS_OS2_COMP_END(*name))
1788 ++name;
1789 ++mask;
1790 break;
1791
1792 case '*':
1793
1794 /* An asterisk matches zero or more characters. In DOS
1795 * mode, dots are not matched. */
1796
1797 do
1798 {
1799 ++mask;
1800 }
1801 while (*mask == '*');
1802 for (;;)
1803 {
1804 rc = match_comp_os2(mask, name, flags, has_dot);
1805 if (rc != FNM_NOMATCH)
1806 return rc;
1807 if (IS_OS2_COMP_END(*name))
1808 return FNM_NOMATCH;
1809 if (*name == '.' && (flags & FNM_STYLE_MASK) == FNM_DOS)
1810 return FNM_NOMATCH;
1811 ++name;
1812 }
1813
1814 case '.':
1815
1816 /* A dot matches a dot. It also matches the implicit dot at
1817 * the end of a dot-less NAME. */
1818
1819 ++mask;
1820 if (*name == '.')
1821 ++name;
1822 else if (has_dot || !IS_OS2_COMP_END(*name))
1823 return FNM_NOMATCH;
1824 break;
1825
1826 default:
1827
1828 /* All other characters match themselves. */
1829
1830 if (flags & FNM_IGNORECASE)
1831 {
1832 if (tolower(*mask) != tolower(*name))
1833 return FNM_NOMATCH;
1834 }
1835 else
1836 {
1837 if (*mask != *name)
1838 return FNM_NOMATCH;
1839 }
1840 ++mask;
1841 ++name;
1842 break;
1843 }
1844}
1845
1846/*
1847 * match_comp:
1848 * compares a single component (directory name or file
1849 * name) of the paths, for all styles which need
1850 * component-by-component matching. MASK and NAME point
1851 * to the start of a component of the wildcard and the
1852 * name to be checked, respectively. Comparing stops at
1853 * the next separator. The FLAGS argument is the same as
1854 * that of fnmatch().
1855 *
1856 * Return FNM_MATCH iff MASK and NAME match.
1857 *
1858 * (c) 1994-1996 by Eberhard Mattes.
1859 */
1860
1861static int match_comp(const unsigned char *mask,
1862 const unsigned char *name,
1863 unsigned flags)
1864{
1865 const unsigned char *s;
1866
1867 switch (flags & FNM_STYLE_MASK)
1868 {
1869 case FNM_OS2:
1870 case FNM_DOS:
1871
1872 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1873 * the component if the component doesn't include a dot. */
1874
1875 s = name;
1876 while (!IS_OS2_COMP_END(*s) && *s != '.')
1877 ++s;
1878 return match_comp_os2(mask, name, flags, *s == '.');
1879
1880 default:
1881 return FNM_ERR;
1882 }
1883}
1884
1885/* In Unix styles, / separates components of a path. This macro
1886 * returns true iff C is a separator. */
1887
1888#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1889
1890
1891/* This macro returns true if C is at the end of a component of a
1892 * path. */
1893
1894#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1895
1896/*
1897 * match_unix:
1898 * matches complete paths for Unix styles.
1899 *
1900 * The FLAGS argument is the same as that of fnmatch().
1901 * COMP points to the start of the current component in
1902 * NAME. Return FNM_MATCH iff MASK and NAME match. The
1903 * backslash character is used for escaping ? and * unless
1904 * FNM_NOESCAPE is set.
1905 *
1906 * (c) 1994-1996 by Eberhard Mattes.
1907 */
1908
1909static int match_unix(const unsigned char *mask,
1910 const unsigned char *name,
1911 unsigned flags,
1912 const unsigned char *comp)
1913{
1914 unsigned char c1, c2;
1915 char invert, matched;
1916 const unsigned char *start;
1917 int rc;
1918
1919 for (;;)
1920 switch (*mask)
1921 {
1922 case 0:
1923
1924 /* There must be no extra characters at the end of NAME when
1925 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1926 * in that case, NAME may point to a separator. */
1927
1928 if (*name == 0)
1929 return FNM_MATCH;
1930 if ((flags & FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1931 return FNM_MATCH;
1932 return FNM_NOMATCH;
1933
1934 case '?':
1935
1936 /* A question mark matches one character. It does not match
1937 * the component separator if FNM_PATHNAME is set. It does
1938 * not match a dot at the start of a component if FNM_PERIOD
1939 * is set. */
1940
1941 if (*name == 0)
1942 return FNM_NOMATCH;
1943 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1944 return FNM_NOMATCH;
1945 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1946 return FNM_NOMATCH;
1947 ++mask;
1948 ++name;
1949 break;
1950
1951 case '*':
1952
1953 /* An asterisk matches zero or more characters. It does not
1954 * match the component separator if FNM_PATHNAME is set. It
1955 * does not match a dot at the start of a component if
1956 * FNM_PERIOD is set. */
1957
1958 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1959 return FNM_NOMATCH;
1960 do
1961 {
1962 ++mask;
1963 }
1964 while (*mask == '*');
1965 for (;;)
1966 {
1967 rc = match_unix(mask, name, flags, comp);
1968 if (rc != FNM_NOMATCH)
1969 return rc;
1970 if (*name == 0)
1971 return FNM_NOMATCH;
1972 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1973 return FNM_NOMATCH;
1974 ++name;
1975 }
1976
1977 case '/':
1978
1979 /* Separators match only separators. If _FNM_PATHPREFIX is
1980 * set, a trailing separator in MASK is ignored at the end
1981 * of NAME. */
1982
1983 if (!(IS_UNIX_COMP_SEP(*name)
1984 || ((flags & FNM_PATHPREFIX) && *name == 0
1985 && (mask[1] == 0
1986 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1987 && mask[2] == 0)))))
1988 return FNM_NOMATCH;
1989
1990 ++mask;
1991 if (*name != 0)
1992 ++name;
1993
1994 /* This is the beginning of a new component if FNM_PATHNAME
1995 * is set. */
1996
1997 if (flags & FNM_PATHNAME)
1998 comp = name;
1999 break;
2000
2001 case '[':
2002
2003 /* A set of characters. Always case-sensitive. */
2004
2005 if (*name == 0)
2006 return FNM_NOMATCH;
2007 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2008 return FNM_NOMATCH;
2009 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2010 return FNM_NOMATCH;
2011
2012 invert = 0;
2013 matched = 0;
2014 ++mask;
2015
2016 /* If the first character is a ! or ^, the set matches all
2017 * characters not listed in the set. */
2018
2019 if (*mask == '!' || *mask == '^')
2020 {
2021 ++mask;
2022 invert = 1;
2023 }
2024
2025 /* Loop over all the characters of the set. The loop ends
2026 * if the end of the string is reached or if a ] is
2027 * encountered unless it directly follows the initial [ or
2028 * [-. */
2029
2030 start = mask;
2031 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2032 {
2033 /* Get the next character which is optionally preceded
2034 * by a backslash. */
2035
2036 c1 = *mask++;
2037 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2038 {
2039 if (*mask == 0)
2040 break;
2041 c1 = *mask++;
2042 }
2043
2044 /* Ranges of characters are written as a-z. Don't
2045 * forget to check for the end of the string and to
2046 * handle the backslash. If the character after - is a
2047 * ], it isn't a range. */
2048
2049 if (*mask == '-' && mask[1] != ']')
2050 {
2051 ++mask; /* Skip the - character */
2052 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2053 ++mask;
2054 if (*mask == 0)
2055 break;
2056 c2 = *mask++;
2057 }
2058 else
2059 c2 = c1;
2060
2061 /* Now check whether this character or range matches NAME. */
2062
2063 if (c1 <= *name && *name <= c2)
2064 matched = 1;
2065 }
2066
2067 /* If the end of the string is reached before a ] is found,
2068 * back up to the [ and compare it to NAME. */
2069
2070 if (*mask == 0)
2071 {
2072 if (*name != '[')
2073 return FNM_NOMATCH;
2074 ++name;
2075 mask = start;
2076 if (invert)
2077 --mask;
2078 }
2079 else
2080 {
2081 if (invert)
2082 matched = !matched;
2083 if (!matched)
2084 return FNM_NOMATCH;
2085 ++mask; /* Skip the ] character */
2086 if (*name != 0)
2087 ++name;
2088 }
2089 break;
2090
2091 case '\\':
2092 ++mask;
2093 if (flags & FNM_NOESCAPE)
2094 {
2095 if (*name != '\\')
2096 return FNM_NOMATCH;
2097 ++name;
2098 }
2099 else if (*mask == '*' || *mask == '?')
2100 {
2101 if (*mask != *name)
2102 return FNM_NOMATCH;
2103 ++mask;
2104 ++name;
2105 }
2106 break;
2107
2108 default:
2109
2110 /* All other characters match themselves. */
2111
2112 if (flags & FNM_IGNORECASE)
2113 {
2114 if (tolower(*mask) != tolower(*name))
2115 return FNM_NOMATCH;
2116 }
2117 else
2118 {
2119 if (*mask != *name)
2120 return FNM_NOMATCH;
2121 }
2122 ++mask;
2123 ++name;
2124 break;
2125 }
2126}
2127
2128/*
2129 * _fnmatch_unsigned:
2130 * Check whether the path name NAME matches the wildcard MASK.
2131 *
2132 * Return:
2133 * -- 0 (FNM_MATCH) if it matches,
2134 * -- _FNM_NOMATCH if it doesn't,
2135 * -- FNM_ERR on error.
2136 *
2137 * The operation of this function is controlled by FLAGS.
2138 * This is an internal function, with unsigned arguments.
2139 *
2140 * (c) 1994-1996 by Eberhard Mattes.
2141 */
2142
2143static int _fnmatch_unsigned(const unsigned char *mask,
2144 const unsigned char *name,
2145 unsigned flags)
2146{
2147 int m_drive,
2148 n_drive,
2149 rc;
2150
2151 /* Match and skip the drive name if present. */
2152
2153 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2154 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2155
2156 if (m_drive != n_drive)
2157 {
2158 if (m_drive == -1 || n_drive == -1)
2159 return FNM_NOMATCH;
2160 if (!(flags & FNM_IGNORECASE))
2161 return FNM_NOMATCH;
2162 if (tolower(m_drive) != tolower(n_drive))
2163 return FNM_NOMATCH;
2164 }
2165
2166 if (m_drive != -1)
2167 mask += 2;
2168 if (n_drive != -1)
2169 name += 2;
2170
2171 /* Colons are not allowed in path names, except for the drive name,
2172 * which was skipped above. */
2173
2174 if (has_colon(mask) || has_colon(name))
2175 return FNM_ERR;
2176
2177 /* The name "\\server\path" should not be matched by mask
2178 * "\*\server\path". Ditto for /. */
2179
2180 switch (flags & FNM_STYLE_MASK)
2181 {
2182 case FNM_OS2:
2183 case FNM_DOS:
2184
2185 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2186 {
2187 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2188 return FNM_NOMATCH;
2189 name += 2;
2190 mask += 2;
2191 }
2192 break;
2193
2194 case FNM_POSIX:
2195
2196 if (name[0] == '/' && name[1] == '/')
2197 {
2198 int i;
2199
2200 name += 2;
2201 for (i = 0; i < 2; ++i)
2202 if (mask[0] == '/')
2203 ++mask;
2204 else if (mask[0] == '\\' && mask[1] == '/')
2205 mask += 2;
2206 else
2207 return FNM_NOMATCH;
2208 }
2209
2210 /* In Unix styles, treating ? and * w.r.t. components is simple.
2211 * No need to do matching component by component. */
2212
2213 return match_unix(mask, name, flags, name);
2214 }
2215
2216 /* Now compare all the components of the path name, one by one.
2217 * Note that the path separator must not be enclosed in brackets. */
2218
2219 while (*mask != 0 || *name != 0)
2220 {
2221
2222 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2223 * is reached even if there are components left in NAME. */
2224
2225 if (*mask == 0 && (flags & FNM_PATHPREFIX))
2226 return FNM_MATCH;
2227
2228 /* Compare a single component of the path name. */
2229
2230 rc = match_comp(mask, name, flags);
2231 if (rc != FNM_MATCH)
2232 return rc;
2233
2234 /* Skip to the next component or to the end of the path name. */
2235
2236 mask = skip_comp_os2(mask);
2237 name = skip_comp_os2(name);
2238 }
2239
2240 /* If we reached the ends of both strings, the names match. */
2241
2242 if (*mask == 0 && *name == 0)
2243 return FNM_MATCH;
2244
2245 /* The names do not match. */
2246
2247 return FNM_NOMATCH;
2248}
2249
2250/*
2251 *@@ strhMatchOS2:
2252 * this matches wildcards, similar to what DosEditName does.
2253 * However, this does not require a file to be present, but
2254 * works on strings only.
2255 */
2256
2257BOOL strhMatchOS2(const char *pcszMask, // in: mask (e.g. "*.txt")
2258 const char *pcszName) // in: string to check (e.g. "test.txt")
2259{
2260 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2261 (const unsigned char *)pcszName,
2262 FNM_OS2 | FNM_IGNORECASE)
2263 == FNM_MATCH)
2264 );
2265}
2266
2267/*
2268 *@@ strhMatchExt:
2269 * like strhMatchOS2, but this takes all the flags
2270 * for input.
2271 *
2272 *@@added V0.9.15 (2001-09-14) [umoeller]
2273 */
2274
2275BOOL strhMatchExt(const char *pcszMask, // in: mask (e.g. "*.txt")
2276 const char *pcszName, // in: string to check (e.g. "test.txt")
2277 unsigned flags) // in: FNM_* flags
2278{
2279 return ((BOOL)(_fnmatch_unsigned((const unsigned char *)pcszMask,
2280 (const unsigned char *)pcszName,
2281 flags)
2282 == FNM_MATCH)
2283 );
2284}
2285
2286/* ******************************************************************
2287 *
2288 * Fast string searches
2289 *
2290 ********************************************************************/
2291
2292#define ASSERT(a)
2293
2294/*
2295 * The following code has been taken from the "Standard
2296 * Function Library", file sflfind.c, and only slightly
2297 * modified to conform to the rest of this file.
2298 *
2299 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2300 * Revised: 98/05/04
2301 *
2302 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2303 *
2304 * The SFL Licence allows incorporating SFL code into other
2305 * programs, as long as the copyright is reprinted and the
2306 * code is marked as modified, so this is what we do.
2307 */
2308
2309/*
2310 *@@ strhmemfind:
2311 * searches for a pattern in a block of memory using the
2312 * Boyer-Moore-Horspool-Sunday algorithm.
2313 *
2314 * The block and pattern may contain any values; you must
2315 * explicitly provide their lengths. If you search for strings,
2316 * use strlen() on the buffers.
2317 *
2318 * Returns a pointer to the pattern if found within the block,
2319 * or NULL if the pattern was not found.
2320 *
2321 * This algorithm needs a "shift table" to cache data for the
2322 * search pattern. This table can be reused when performing
2323 * several searches with the same pattern.
2324 *
2325 * "shift" must point to an array big enough to hold 256 (8**2)
2326 * "size_t" values.
2327 *
2328 * If (*repeat_find == FALSE), the shift table is initialized.
2329 * So on the first search with a given pattern, *repeat_find
2330 * should be FALSE. This function sets it to TRUE after the
2331 * shift table is initialised, allowing the initialisation
2332 * phase to be skipped on subsequent searches.
2333 *
2334 * This function is most effective when repeated searches are
2335 * made for the same pattern in one or more large buffers.
2336 *
2337 * Example:
2338 *
2339 + PSZ pszHaystack = "This is a sample string.",
2340 + pszNeedle = "string";
2341 + size_t shift[256];
2342 + BOOL fRepeat = FALSE;
2343 +
2344 + PSZ pFound = strhmemfind(pszHaystack,
2345 + strlen(pszHaystack), // block size
2346 + pszNeedle,
2347 + strlen(pszNeedle), // pattern size
2348 + shift,
2349 + &fRepeat);
2350 *
2351 * Taken from the "Standard Function Library", file sflfind.c.
2352 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2353 * Slightly modified by umoeller.
2354 *
2355 *@@added V0.9.3 (2000-05-08) [umoeller]
2356 */
2357
2358void* strhmemfind(const void *in_block, // in: block containing data
2359 size_t block_size, // in: size of block in bytes
2360 const void *in_pattern, // in: pattern to search for
2361 size_t pattern_size, // in: size of pattern block
2362 size_t *shift, // in/out: shift table (search buffer)
2363 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2364{
2365 size_t byte_nbr, // Distance through block
2366 match_size; // Size of matched part
2367 const unsigned char
2368 *match_base = NULL, // Base of match of pattern
2369 *match_ptr = NULL, // Point within current match
2370 *limit = NULL; // Last potiental match point
2371 const unsigned char
2372 *block = (unsigned char *) in_block, // Concrete pointer to block data
2373 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2374
2375 if ( (block == NULL)
2376 || (pattern == NULL)
2377 || (shift == NULL)
2378 )
2379 return (NULL);
2380
2381 // Pattern must be smaller or equal in size to string
2382 if (block_size < pattern_size)
2383 return (NULL); // Otherwise it's not found
2384
2385 if (pattern_size == 0) // Empty patterns match at start
2386 return ((void *)block);
2387
2388 // Build the shift table unless we're continuing a previous search
2389
2390 // The shift table determines how far to shift before trying to match
2391 // again, if a match at this point fails. If the byte after where the
2392 // end of our pattern falls is not in our pattern, then we start to
2393 // match again after that byte; otherwise we line up the last occurence
2394 // of that byte in our pattern under that byte, and try match again.
2395
2396 if (!repeat_find || !*repeat_find)
2397 {
2398 for (byte_nbr = 0;
2399 byte_nbr < 256;
2400 byte_nbr++)
2401 shift[byte_nbr] = pattern_size + 1;
2402 for (byte_nbr = 0;
2403 byte_nbr < pattern_size;
2404 byte_nbr++)
2405 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2406
2407 if (repeat_find)
2408 *repeat_find = TRUE;
2409 }
2410
2411 // Search for the block, each time jumping up by the amount
2412 // computed in the shift table
2413
2414 limit = block + (block_size - pattern_size + 1);
2415 ASSERT (limit > block);
2416
2417 for (match_base = block;
2418 match_base < limit;
2419 match_base += shift[*(match_base + pattern_size)])
2420 {
2421 match_ptr = match_base;
2422 match_size = 0;
2423
2424 // Compare pattern until it all matches, or we find a difference
2425 while (*match_ptr++ == pattern[match_size++])
2426 {
2427 ASSERT (match_size <= pattern_size &&
2428 match_ptr == (match_base + match_size));
2429
2430 // If we found a match, return the start address
2431 if (match_size >= pattern_size)
2432 return ((void*)(match_base));
2433
2434 }
2435 }
2436 return (NULL); // Found nothing
2437}
2438
2439/*
2440 *@@ strhtxtfind:
2441 * searches for a case-insensitive text pattern in a string
2442 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2443 * pattern are null-terminated strings. Returns a pointer to the pattern
2444 * if found within the string, or NULL if the pattern was not found.
2445 * Will match strings irrespective of case. To match exact strings, use
2446 * strhfind(). Will not work on multibyte characters.
2447 *
2448 * Examples:
2449 + char *result;
2450 +
2451 + result = strhtxtfind ("AbracaDabra", "cad");
2452 + if (result)
2453 + puts (result);
2454 +
2455 * Taken from the "Standard Function Library", file sflfind.c.
2456 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2457 * Slightly modified.
2458 *
2459 *@@added V0.9.3 (2000-05-08) [umoeller]
2460 */
2461
2462char* strhtxtfind (const char *string, // String containing data
2463 const char *pattern) // Pattern to search for
2464{
2465 size_t
2466 shift [256]; // Shift distance for each value
2467 size_t
2468 string_size,
2469 pattern_size,
2470 byte_nbr, // Index into byte array
2471 match_size; // Size of matched part
2472 const char
2473 *match_base = NULL, // Base of match of pattern
2474 *match_ptr = NULL, // Point within current match
2475 *limit = NULL; // Last potiental match point
2476
2477 ASSERT (string); // Expect non-NULL pointers, but
2478 ASSERT (pattern); // fail gracefully if not debugging
2479 if (string == NULL || pattern == NULL)
2480 return (NULL);
2481
2482 string_size = strlen (string);
2483 pattern_size = strlen (pattern);
2484
2485 // Pattern must be smaller or equal in size to string
2486 if (string_size < pattern_size)
2487 return (NULL); // Otherwise it cannot be found
2488
2489 if (pattern_size == 0) // Empty string matches at start
2490 return (char *) string;
2491
2492 // Build the shift table
2493
2494 // The shift table determines how far to shift before trying to match
2495 // again, if a match at this point fails. If the byte after where the
2496 // end of our pattern falls is not in our pattern, then we start to
2497 // match again after that byte; otherwise we line up the last occurence
2498 // of that byte in our pattern under that byte, and try match again.
2499
2500 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2501 shift [byte_nbr] = pattern_size + 1;
2502
2503 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2504 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2505
2506 // Search for the string. If we don't find a match, move up by the
2507 // amount we computed in the shift table above, to find location of
2508 // the next potiental match.
2509
2510 limit = string + (string_size - pattern_size + 1);
2511 ASSERT (limit > string);
2512
2513 for (match_base = string;
2514 match_base < limit;
2515 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2516 {
2517 match_ptr = match_base;
2518 match_size = 0;
2519
2520 // Compare pattern until it all matches, or we find a difference
2521 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2522 {
2523 ASSERT (match_size <= pattern_size &&
2524 match_ptr == (match_base + match_size));
2525
2526 // If we found a match, return the start address
2527 if (match_size >= pattern_size)
2528 return ((char *)(match_base));
2529 }
2530 }
2531 return (NULL); // Found nothing
2532}
2533
Note: See TracBrowser for help on using the repository browser.