source: trunk/src/helpers/stringh.c@ 35

Last change on this file since 35 was 23, checked in by umoeller, 25 years ago

Fixes for V0.9.7.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 76.5 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#include "helpers\stringh.h"
55#include "helpers\xstring.h" // extended string helpers
56
57#pragma hdrstop
58
59/*
60 *@@category: Helpers\C helpers\String management
61 * See stringh.c and xstring.c.
62 */
63
64/*
65 *@@category: Helpers\C helpers\String management\C string helpers
66 * See stringh.c.
67 */
68
69/*
70 *@@ strhdup:
71 * like strdup, but this one
72 * doesn't crash if pszSource is NULL,
73 * but returns NULL also.
74 *
75 *@@added V0.9.0 [umoeller]
76 */
77
78PSZ strhdup(const char *pszSource)
79{
80 if (pszSource)
81 return (strdup(pszSource));
82 else
83 return (0);
84}
85
86/*
87 *@@ strhistr:
88 * like strstr, but case-insensitive.
89 *
90 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
91 */
92
93PSZ strhistr(const char *string1, const char *string2)
94{
95 PSZ prc = NULL;
96
97 if ((string1) && (string2))
98 {
99 PSZ pszSrchIn = strdup(string1);
100 PSZ pszSrchFor = strdup(string2);
101
102 if ((pszSrchIn) && (pszSrchFor))
103 {
104 strupr(pszSrchIn);
105 strupr(pszSrchFor);
106
107 prc = strstr(pszSrchIn, pszSrchFor);
108 if (prc)
109 {
110 // prc now has the first occurence of the string,
111 // but in pszSrchIn; we need to map this
112 // return value to the original string
113 prc = (prc-pszSrchIn) // offset in pszSrchIn
114 + (PSZ)string1;
115 }
116 }
117 if (pszSrchFor)
118 free(pszSrchFor);
119 if (pszSrchIn)
120 free(pszSrchIn);
121 }
122 return (prc);
123}
124
125/*
126 *@@ strhncpy0:
127 * like strncpy, but always appends a 0 character.
128 */
129
130ULONG strhncpy0(PSZ pszTarget,
131 const char *pszSource,
132 ULONG cbSource)
133{
134 ULONG ul = 0;
135 PSZ pTarget = pszTarget,
136 pSource = (PSZ)pszSource;
137
138 for (ul = 0; ul < cbSource; ul++)
139 if (*pSource)
140 *pTarget++ = *pSource++;
141 else
142 break;
143 *pTarget = 0;
144
145 return (ul);
146}
147
148/*
149 * strhCount:
150 * this counts the occurences of c in pszSearch.
151 */
152
153ULONG strhCount(const char *pszSearch,
154 CHAR c)
155{
156 PSZ p = (PSZ)pszSearch;
157 ULONG ulCount = 0;
158 while (TRUE)
159 {
160 p = strchr(p, c);
161 if (p)
162 {
163 ulCount++;
164 p++;
165 }
166 else
167 break;
168 }
169 return (ulCount);
170}
171
172/*
173 *@@ strhIsDecimal:
174 * returns TRUE if psz consists of decimal digits only.
175 */
176
177BOOL strhIsDecimal(PSZ psz)
178{
179 PSZ p = psz;
180 while (*p != 0)
181 {
182 if (isdigit(*p) == 0)
183 return (FALSE);
184 p++;
185 }
186
187 return (TRUE);
188}
189
190/*
191 *@@ strhSubstr:
192 * this creates a new PSZ containing the string
193 * from pBegin to pEnd, excluding the pEnd character.
194 * The new string is null-terminated. The caller
195 * must free() the new string after use.
196 *
197 * Example:
198 + "1234567890"
199 + ^ ^
200 + p1 p2
201 + strhSubstr(p1, p2)
202 * would return a new string containing "2345678".
203 */
204
205PSZ strhSubstr(const char *pBegin, const char *pEnd)
206{
207 ULONG cbSubstr = (pEnd - pBegin);
208 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
209 strhncpy0(pszSubstr, pBegin, cbSubstr);
210 return (pszSubstr);
211}
212
213/*
214 *@@ strhExtract:
215 * searches pszBuf for the cOpen character and returns
216 * the data in between cOpen and cClose, excluding
217 * those two characters, in a newly allocated buffer
218 * which you must free() afterwards.
219 *
220 * Spaces and newlines/linefeeds are skipped.
221 *
222 * If the search was successful, the new buffer
223 * is returned and, if (ppEnd != NULL), *ppEnd points
224 * to the first character after the cClose character
225 * found in the buffer.
226 *
227 * If the search was not successful, NULL is
228 * returned, and *ppEnd is unchanged.
229 *
230 * If another cOpen character is found before
231 * cClose, matching cClose characters will be skipped.
232 * You can therefore nest the cOpen and cClose
233 * characters.
234 *
235 * This function ignores cOpen and cClose characters
236 * in C-style comments and strings surrounded by
237 * double quotes.
238 *
239 * Example:
240 + PSZ pszBuf = "KEYWORD { --blah-- } next",
241 + pEnd;
242 + strhExtract(pszBuf,
243 + '{', '}',
244 + &pEnd)
245 * would return a new buffer containing " --blah-- ",
246 * and ppEnd would afterwards point to the space
247 * before "next" in the static buffer.
248 *
249 *@@added V0.9.0 [umoeller]
250 */
251
252PSZ strhExtract(PSZ pszBuf, // in: search buffer
253 CHAR cOpen, // in: opening char
254 CHAR cClose, // in: closing char
255 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
256{
257 PSZ pszReturn = NULL;
258
259 if (pszBuf)
260 {
261 PSZ pOpen = strchr(pszBuf, cOpen);
262 if (pOpen)
263 {
264 // opening char found:
265 // now go thru the whole rest of the buffer
266 PSZ p = pOpen+1;
267 LONG lLevel = 1; // if this goes 0, we're done
268 while (*p)
269 {
270 if (*p == cOpen)
271 lLevel++;
272 else if (*p == cClose)
273 {
274 lLevel--;
275 if (lLevel <= 0)
276 {
277 // matching closing bracket found:
278 // extract string
279 pszReturn = strhSubstr(pOpen+1, // after cOpen
280 p); // excluding cClose
281 if (ppEnd)
282 *ppEnd = p+1;
283 break; // while (*p)
284 }
285 }
286 else if (*p == '\"')
287 {
288 // beginning of string:
289 PSZ p2 = p+1;
290 // find end of string
291 while ((*p2) && (*p2 != '\"'))
292 p2++;
293
294 if (*p2 == '\"')
295 // closing quote found:
296 // search on after that
297 p = p2; // raised below
298 else
299 break; // while (*p)
300 }
301
302 p++;
303 }
304 }
305 }
306
307 return (pszReturn);
308}
309
310/*
311 *@@ strhQuote:
312 * similar to strhExtract, except that
313 * opening and closing chars are the same,
314 * and therefore no nesting is possible.
315 * Useful for extracting stuff between
316 * quotes.
317 *
318 *@@added V0.9.0 [umoeller]
319 */
320
321PSZ strhQuote(PSZ pszBuf,
322 CHAR cQuote,
323 PSZ *ppEnd)
324{
325 PSZ pszReturn = NULL,
326 p1 = NULL;
327 if ((p1 = strchr(pszBuf, cQuote)))
328 {
329 PSZ p2 = strchr(p1+1, cQuote);
330 if (p2)
331 {
332 pszReturn = strhSubstr(p1+1, p2);
333 if (ppEnd)
334 // store closing char
335 *ppEnd = p2 + 1;
336 }
337 }
338
339 return (pszReturn);
340}
341
342/*
343 *@@ strhStrip:
344 * removes all double spaces.
345 * This copies within the "psz" buffer.
346 * If any double spaces are found, the
347 * string will be shorter than before,
348 * but the buffer is _not_ reallocated,
349 * so there will be unused bytes at the
350 * end.
351 *
352 * Returns the number of spaces removed.
353 *
354 *@@added V0.9.0 [umoeller]
355 */
356
357ULONG strhStrip(PSZ psz) // in/out: string
358{
359 PSZ p;
360 ULONG cb = strlen(psz),
361 ulrc = 0;
362
363 for (p = psz; p < psz+cb; p++)
364 {
365 if ((*p == ' ') && (*(p+1) == ' '))
366 {
367 PSZ p2 = p;
368 while (*p2)
369 {
370 *p2 = *(p2+1);
371 p2++;
372 }
373 cb--;
374 p--;
375 ulrc++;
376 }
377 }
378 return (ulrc);
379}
380
381/*
382 *@@ strhins:
383 * this inserts one string into another.
384 *
385 * pszInsert is inserted into pszBuffer at offset
386 * ulInsertOfs (which counts from 0).
387 *
388 * A newly allocated string is returned. pszBuffer is
389 * not changed. The new string should be free()'d after
390 * use.
391 *
392 * Upon errors, NULL is returned.
393 *
394 *@@changed V0.9.0 [umoeller]: completely rewritten.
395 */
396
397PSZ strhins(const char *pcszBuffer,
398 ULONG ulInsertOfs,
399 const char *pcszInsert)
400{
401 PSZ pszNew = NULL;
402
403 if ((pcszBuffer) && (pcszInsert))
404 {
405 do {
406 ULONG cbBuffer = strlen(pcszBuffer);
407 ULONG cbInsert = strlen(pcszInsert);
408
409 // check string length
410 if (ulInsertOfs > cbBuffer + 1)
411 break; // do
412
413 // OK, let's go.
414 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
415
416 // copy stuff before pInsertPos
417 memcpy(pszNew,
418 pcszBuffer,
419 ulInsertOfs);
420 // copy string to be inserted
421 memcpy(pszNew + ulInsertOfs,
422 pcszInsert,
423 cbInsert);
424 // copy stuff after pInsertPos
425 strcpy(pszNew + ulInsertOfs + cbInsert,
426 pcszBuffer + ulInsertOfs);
427 } while (FALSE);
428 }
429
430 return (pszNew);
431}
432
433/*
434 *@@ strhFindReplace:
435 * wrapper around xstrFindReplace to work with C strings.
436 * Note that *ppszBuf can get reallocated and must
437 * be free()'able.
438 *
439 * Repetitive use of this wrapper is not recommended
440 * because it is considerably slower than xstrFindReplace.
441 *
442 *@@added V0.9.6 (2000-11-01) [umoeller]
443 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
444 */
445
446ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
447 PULONG pulOfs, // in: where to begin search (0 = start);
448 // out: ofs of first char after replacement string
449 const char *pcszSearch, // in: search string; cannot be NULL
450 const char *pcszReplace) // in: replacement string; cannot be NULL
451{
452 ULONG ulrc = 0;
453 XSTRING xstrBuf,
454 xstrFind,
455 xstrReplace;
456 size_t ShiftTable[256];
457 BOOL fRepeat = FALSE;
458 xstrInitSet(&xstrBuf, *ppszBuf);
459 // reallocated and returned, so we're safe
460 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
461 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
462 // these two are never freed, so we're safe too
463
464 if ((ulrc = xstrFindReplace(&xstrBuf,
465 pulOfs,
466 &xstrFind,
467 &xstrReplace,
468 ShiftTable,
469 &fRepeat)))
470 // replaced:
471 *ppszBuf = xstrBuf.psz;
472
473 return (ulrc);
474}
475
476/*
477 * strhWords:
478 * returns the no. of words in "psz".
479 * A string is considered a "word" if
480 * it is surrounded by spaces only.
481 *
482 *@@added V0.9.0 [umoeller]
483 */
484
485ULONG strhWords(PSZ psz)
486{
487 PSZ p;
488 ULONG cb = strlen(psz),
489 ulWords = 0;
490 if (cb > 1)
491 {
492 ulWords = 1;
493 for (p = psz; p < psz+cb; p++)
494 if (*p == ' ')
495 ulWords++;
496 }
497 return (ulWords);
498}
499
500/*
501 *@@ strhThousandsULong:
502 * converts a ULONG into a decimal string, while
503 * inserting thousands separators into it. Specify
504 * the separator character in cThousands.
505 *
506 * Returns pszTarget so you can use it directly
507 * with sprintf and the "%s" flag.
508 *
509 * For cThousands, you should use the data in
510 * OS2.INI ("PM_National" application), which is
511 * always set according to the "Country" object.
512 * You can use prfhQueryCountrySettings to
513 * retrieve this setting.
514 *
515 * Use strhThousandsDouble for "double" values.
516 */
517
518PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
519 ULONG ul, // in: decimal to convert
520 CHAR cThousands) // in: separator char (e.g. '.')
521{
522 USHORT ust, uss, usc;
523 CHAR szTemp[40];
524 sprintf(szTemp, "%lu", ul);
525
526 ust = 0;
527 usc = strlen(szTemp);
528 for (uss = 0; uss < usc; uss++)
529 {
530 if (uss)
531 if (((usc - uss) % 3) == 0)
532 {
533 pszTarget[ust] = cThousands;
534 ust++;
535 }
536 pszTarget[ust] = szTemp[uss];
537 ust++;
538 }
539 pszTarget[ust] = '\0';
540
541 return (pszTarget);
542}
543
544/*
545 *@@ strhThousandsDouble:
546 * like strhThousandsULong, but for a "double"
547 * value. Note that after-comma values are truncated.
548 */
549
550PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
551{
552 USHORT ust, uss, usc;
553 CHAR szTemp[40];
554 sprintf(szTemp, "%.0f", floor(dbl));
555
556 ust = 0;
557 usc = strlen(szTemp);
558 for (uss = 0; uss < usc; uss++)
559 {
560 if (uss)
561 if (((usc - uss) % 3) == 0)
562 {
563 pszTarget[ust] = cThousands;
564 ust++;
565 }
566 pszTarget[ust] = szTemp[uss];
567 ust++;
568 }
569 pszTarget[ust] = '\0';
570
571 return (pszTarget);
572}
573
574/*
575 *@@ strhVariableDouble:
576 * like strhThousandsULong, but for a "double" value, and
577 * with a variable number of decimal places depending on the
578 * size of the quantity.
579 *
580 *@@added V0.9.6 (2000-11-12) [pr]
581 */
582
583PSZ strhVariableDouble(PSZ pszTarget,
584 double dbl,
585 PSZ pszUnits,
586 CHAR cThousands)
587{
588 if (dbl < 100.0)
589 sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
590 else
591 if (dbl < 1000.0)
592 sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
593 else
594 strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
595 pszUnits);
596
597 return(pszTarget);
598}
599
600/*
601 *@@ strhFileDate:
602 * converts file date data to a string (to pszBuf).
603 * You can pass any FDATE structure to this function,
604 * which are returned in those FILEFINDBUF* or
605 * FILESTATUS* structs by the Dos* functions.
606 *
607 * ulDateFormat is the PM setting for the date format,
608 * as set in the "Country" object, and can be queried using
609 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
610 *
611 * meaning:
612 * -- 0 mm.dd.yyyy (English)
613 * -- 1 dd.mm.yyyy (e.g. German)
614 * -- 2 yyyy.mm.dd (Japanese, ISO)
615 * -- 3 yyyy.dd.mm
616 *
617 * cDateSep is used as a date separator (e.g. '.').
618 * This can be queried using:
619 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
620 *
621 * Alternatively, you can query all the country settings
622 * at once using prfhQueryCountrySettings (prfh.c).
623 *
624 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
625 */
626
627VOID strhFileDate(PSZ pszBuf, // out: string returned
628 FDATE *pfDate, // in: date information
629 ULONG ulDateFormat, // in: date format (0-3)
630 CHAR cDateSep) // in: date separator (e.g. '.')
631{
632 DATETIME dt;
633 dt.day = pfDate->day;
634 dt.month = pfDate->month;
635 dt.year = pfDate->year + 1980;
636
637 strhDateTime(pszBuf,
638 NULL, // no time
639 &dt,
640 ulDateFormat,
641 cDateSep,
642 0, 0); // no time
643}
644
645/*
646 *@@ strhFileTime:
647 * converts file time data to a string (to pszBuf).
648 * You can pass any FTIME structure to this function,
649 * which are returned in those FILEFINDBUF* or
650 * FILESTATUS* structs by the Dos* functions.
651 *
652 * ulTimeFormat is the PM setting for the time format,
653 * as set in the "Country" object, and can be queried using
654 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
655 * meaning:
656 * -- 0 12-hour clock
657 * -- >0 24-hour clock
658 *
659 * cDateSep is used as a time separator (e.g. ':').
660 * This can be queried using:
661 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
662 *
663 * Alternatively, you can query all the country settings
664 * at once using prfhQueryCountrySettings (prfh.c).
665 *
666 *@@changed 99-03-15 fixed 12-hour crash
667 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
668 */
669
670VOID strhFileTime(PSZ pszBuf, // out: string returned
671 FTIME *pfTime, // in: time information
672 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
673 CHAR cTimeSep) // in: time separator (e.g. ':')
674{
675 DATETIME dt;
676 dt.hours = pfTime->hours;
677 dt.minutes = pfTime->minutes;
678 dt.seconds = pfTime->twosecs * 2;
679
680 strhDateTime(NULL, // no date
681 pszBuf,
682 &dt,
683 0, 0, // no date
684 ulTimeFormat,
685 cTimeSep);
686}
687
688/*
689 *@@ strhDateTime:
690 * converts Control Program DATETIME info
691 * into two strings. See strhFileDate and strhFileTime
692 * for more detailed parameter descriptions.
693 *
694 *@@added V0.9.0 (99-11-07) [umoeller]
695 */
696
697VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
698 PSZ pszTime, // out: time string returned (can be NULL)
699 DATETIME *pDateTime, // in: date/time information
700 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
701 CHAR cDateSep, // in: date separator (e.g. '.')
702 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
703 CHAR cTimeSep) // in: time separator (e.g. ':')
704{
705 if (pszDate)
706 {
707 switch (ulDateFormat)
708 {
709 case 0: // mm.dd.yyyy (English)
710 sprintf(pszDate, "%02d%c%02d%c%04d",
711 pDateTime->month,
712 cDateSep,
713 pDateTime->day,
714 cDateSep,
715 pDateTime->year);
716 break;
717
718 case 1: // dd.mm.yyyy (e.g. German)
719 sprintf(pszDate, "%02d%c%02d%c%04d",
720 pDateTime->day,
721 cDateSep,
722 pDateTime->month,
723 cDateSep,
724 pDateTime->year);
725 break;
726
727 case 2: // yyyy.mm.dd (Japanese)
728 sprintf(pszDate, "%04d%c%02d%c%02d",
729 pDateTime->year,
730 cDateSep,
731 pDateTime->month,
732 cDateSep,
733 pDateTime->day);
734 break;
735
736 default: // yyyy.dd.mm
737 sprintf(pszDate, "%04d%c%02d%c%02d",
738 pDateTime->year,
739 cDateSep,
740 pDateTime->day,
741 cDateSep,
742 pDateTime->month);
743 break;
744 }
745 }
746
747 if (pszTime)
748 {
749 if (ulTimeFormat == 0)
750 {
751 // for 12-hour clock, we need additional INI data
752 CHAR szAMPM[10] = "err";
753
754 if (pDateTime->hours > 12)
755 {
756 // > 12h: PM.
757
758 // Note: 12:xx noon is 12 AM, not PM (even though
759 // AM stands for "ante meridiam", but English is just
760 // not logical), so that's handled below.
761
762 PrfQueryProfileString(HINI_USER,
763 "PM_National",
764 "s2359", // key
765 "PM", // default
766 szAMPM, sizeof(szAMPM)-1);
767 sprintf(pszTime, "%02d%c%02d%c%02d %s",
768 // leave 12 == 12 (not 0)
769 pDateTime->hours % 12,
770 cTimeSep,
771 pDateTime->minutes,
772 cTimeSep,
773 pDateTime->seconds,
774 szAMPM);
775 }
776 else
777 {
778 // <= 12h: AM
779 PrfQueryProfileString(HINI_USER,
780 "PM_National",
781 "s1159", // key
782 "AM", // default
783 szAMPM, sizeof(szAMPM)-1);
784 sprintf(pszTime, "%02d%c%02d%c%02d %s",
785 pDateTime->hours,
786 cTimeSep,
787 pDateTime->minutes,
788 cTimeSep,
789 pDateTime->seconds,
790 szAMPM);
791 }
792 }
793 else
794 // 24-hour clock
795 sprintf(pszTime, "%02d%c%02d%c%02d",
796 pDateTime->hours,
797 cTimeSep,
798 pDateTime->minutes,
799 cTimeSep,
800 pDateTime->seconds);
801 }
802}
803
804/*
805 *@@ strhGetWord:
806 * finds word boundaries.
807 *
808 * *ppszStart is used as the beginning of the
809 * search.
810 *
811 * If a word is found, *ppszStart is set to
812 * the first character of the word which was
813 * found and *ppszEnd receives the address
814 * of the first character _after_ the word,
815 * which is probably a space or a \n or \r char.
816 * We then return TRUE.
817 *
818 * The search is stopped if a null character
819 * is found or pLimit is reached. In that case,
820 * FALSE is returned.
821 *
822 *@@added V0.9.1 (2000-02-13) [umoeller]
823 */
824
825BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
826 // out: start of word (if TRUE is returned)
827 const char *pLimit, // in: ptr to last char after *ppszStart to be
828 // searched; if the word does not end before
829 // or with this char, FALSE is returned
830 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
831 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
832 PSZ *ppszEnd) // out: first char _after_ word
833 // (if TRUE is returned)
834{
835 // characters after which a word can be started
836 // const char *pcszBeginChars = "\x0d\x0a ";
837 // const char *pcszEndChars = "\x0d\x0a /-";
838
839 PSZ pStart = *ppszStart;
840
841 // find start of word
842 while ( (pStart < (PSZ)pLimit)
843 && (strchr(pcszBeginChars, *pStart))
844 )
845 // if char is a "before word" char: go for next
846 pStart++;
847
848 if (pStart < (PSZ)pLimit)
849 {
850 // found a valid "word start" character
851 // (which is not in pcszBeginChars):
852
853 // find end of word
854 PSZ pEndOfWord = pStart;
855 while ( (pEndOfWord <= (PSZ)pLimit)
856 && (strchr(pcszEndChars, *pEndOfWord) == 0)
857 )
858 // if char is not an "end word" char: go for next
859 pEndOfWord++;
860
861 if (pEndOfWord <= (PSZ)pLimit)
862 {
863 // whoa, got a word:
864 *ppszStart = pStart;
865 *ppszEnd = pEndOfWord;
866 return (TRUE);
867 }
868 }
869
870 return (FALSE);
871}
872
873/*
874 *@@ strhIsWord:
875 * returns TRUE if p points to a "word"
876 * in pcszBuf.
877 *
878 * p is considered a word if the character _before_
879 * it is in pcszBeginChars and the char _after_
880 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
881 *
882 *@@added V0.9.6 (2000-11-12) [umoeller]
883 */
884
885BOOL strhIsWord(const char *pcszBuf,
886 const char *p, // in: start of word
887 ULONG cbSearch, // in: length of word
888 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
889 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
890{
891 BOOL fEndOK = FALSE;
892
893 // check previous char
894 if ( (p == pcszBuf)
895 || (strchr(pcszBeginChars, *(p-1)))
896 )
897 {
898 // OK, valid begin char:
899 // check end char
900 CHAR cNextChar = *(p + cbSearch);
901 if (cNextChar == 0)
902 fEndOK = TRUE;
903 else
904 {
905 char *pc = strchr(pcszEndChars, cNextChar);
906 if (pc)
907 // OK, is end char: avoid doubles of that char,
908 // but allow spaces
909 if ( (cNextChar+1 != *pc)
910 || (cNextChar+1 == ' ')
911 || (cNextChar+1 == 0)
912 )
913 fEndOK = TRUE;
914 }
915 }
916
917 return (fEndOK);
918}
919
920/*
921 *@@ strhFindWord:
922 * searches for pszSearch in pszBuf, which is
923 * returned if found (or NULL if not).
924 *
925 * As opposed to strstr, this finds pszSearch
926 * only if it is a "word". A search string is
927 * considered a word if the character _before_
928 * it is in pcszBeginChars and the char _after_
929 * it is in pcszEndChars.
930 *
931 * Example:
932 + strhFindWord("This is an example.", "is");
933 + returns ...........^ this, but not the "is" in "This".
934 *
935 * The algorithm here uses strstr to find pszSearch in pszBuf
936 * and performs additional "is-word" checks for each item found
937 * (by calling strhIsWord).
938 *
939 * Note that this function is fairly slow compared to xstrFindWord.
940 *
941 *@@added V0.9.0 (99-11-08) [umoeller]
942 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
943 */
944
945PSZ strhFindWord(const char *pszBuf,
946 const char *pszSearch,
947 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
948 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
949{
950 PSZ pszReturn = 0;
951 ULONG cbBuf = strlen(pszBuf),
952 cbSearch = strlen(pszSearch);
953
954 if ((cbBuf) && (cbSearch))
955 {
956 const char *p = pszBuf;
957
958 do // while p
959 {
960 p = strstr(p, pszSearch);
961 if (p)
962 {
963 // string found:
964 // check if that's a word
965
966 if (strhIsWord(pszBuf,
967 p,
968 cbSearch,
969 pcszBeginChars,
970 pcszEndChars))
971 {
972 // valid end char:
973 pszReturn = (PSZ)p;
974 break;
975 }
976
977 p += cbSearch;
978 }
979 } while (p);
980
981 }
982 return (pszReturn);
983}
984
985/*
986 *@@ strhFindEOL:
987 * returns a pointer to the next \r, \n or null character
988 * following pszSearchIn. Stores the offset in *pulOffset.
989 *
990 * This should never return NULL because at some point,
991 * there will be a null byte in your string.
992 *
993 *@@added V0.9.4 (2000-07-01) [umoeller]
994 */
995
996PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
997 PULONG pulOffset) // out: offset (ptr can be NULL)
998{
999 const char *p = pcszSearchIn,
1000 *prc = 0;
1001 while (TRUE)
1002 {
1003 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
1004 {
1005 prc = p;
1006 break;
1007 }
1008 p++;
1009 }
1010
1011 if ((pulOffset) && (prc))
1012 *pulOffset = prc - pcszSearchIn;
1013
1014 return ((PSZ)prc);
1015}
1016
1017/*
1018 *@@ strhFindNextLine:
1019 * like strhFindEOL, but this returns the character
1020 * _after_ \r or \n. Note that this might return
1021 * a pointer to terminating NULL character also.
1022 */
1023
1024PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1025{
1026 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1027 // pEOL now points to the \r char or the terminating 0 byte;
1028 // if not null byte, advance pointer
1029 PSZ pNextLine = pEOL;
1030 if (*pNextLine == '\r')
1031 pNextLine++;
1032 if (*pNextLine == '\n')
1033 pNextLine++;
1034 if (pulOffset)
1035 *pulOffset = pNextLine - pszSearchIn;
1036 return (pNextLine);
1037}
1038
1039/*
1040 *@@ strhBeautifyTitle:
1041 * replaces all line breaks (0xd, 0xa) with spaces.
1042 */
1043
1044BOOL strhBeautifyTitle(PSZ psz)
1045{
1046 BOOL rc = FALSE;
1047 CHAR *p;
1048 while ((p = strchr(psz, 0xa)))
1049 {
1050 *p = ' ';
1051 rc = TRUE;
1052 }
1053 while ((p = strchr(psz, 0xd)))
1054 {
1055 *p = ' ';
1056 rc = TRUE;
1057 }
1058 return (rc);
1059}
1060
1061/*
1062 * strhFindAttribValue:
1063 * searches for pszAttrib in pszSearchIn; if found,
1064 * returns the first character after the "=" char.
1065 * If "=" is not found, a space, \r, and \n are
1066 * also accepted. This function searches without
1067 * respecting case.
1068 *
1069 * <B>Example:</B>
1070 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1071 +
1072 + returns ....................... ^ this address.
1073 *
1074 *@@added V0.9.0 [umoeller]
1075 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1076 */
1077
1078PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1079{
1080 PSZ prc = 0;
1081 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1082 p,
1083 p2;
1084 ULONG cbAttrib = strlen(pszAttrib);
1085
1086 // 1) find space char
1087 while ((p = strchr(pszSearchIn2, ' ')))
1088 {
1089 CHAR c;
1090 p++;
1091 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1092 // now check whether the p+strlen(pszAttrib)
1093 // is a valid end-of-tag character
1094 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1095 && ( (c == ' ')
1096 || (c == '>')
1097 || (c == '=')
1098 || (c == '\r')
1099 || (c == '\n')
1100 || (c == 0)
1101 )
1102 )
1103 {
1104 // yes:
1105 CHAR c2;
1106 p2 = p + cbAttrib;
1107 c2 = *p2;
1108 while ( ( (c2 == ' ')
1109 || (c2 == '=')
1110 || (c2 == '\n')
1111 || (c2 == '\r')
1112 )
1113 && (c2 != 0)
1114 )
1115 c2 = *++p2;
1116 prc = p2;
1117 break; // first while
1118 }
1119 pszSearchIn2++;
1120 }
1121 return (prc);
1122}
1123
1124/*
1125 * strhGetNumAttribValue:
1126 * stores the numerical parameter value of an HTML-style
1127 * tag in *pl.
1128 *
1129 * Returns the address of the tag parameter in the
1130 * search buffer, if found, or NULL.
1131 *
1132 * <B>Example:</B>
1133 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1134 *
1135 * stores 123 in the "l" variable.
1136 *
1137 *@@added V0.9.0 [umoeller]
1138 */
1139
1140PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1141 const char *pszTag, // e.g. "INDEX"
1142 PLONG pl) // out: numerical value
1143{
1144 PSZ pParam;
1145 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1146 sscanf(pParam, "%ld", pl);
1147
1148 return (pParam);
1149}
1150
1151/*
1152 * strhGetTextAttr:
1153 * retrieves the attribute value of a textual HTML-style tag
1154 * in a newly allocated buffer, which is returned,
1155 * or NULL if attribute not found.
1156 * If an attribute value is to contain spaces, it
1157 * must be enclosed in quotes.
1158 *
1159 * The offset of the attribute data in pszSearchIn is
1160 * returned in *pulOffset so that you can do multiple
1161 * searches.
1162 *
1163 * This returns a new buffer, which should be free()'d after use.
1164 *
1165 * <B>Example:</B>
1166 + ULONG ulOfs = 0;
1167 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1168 + ............^ ulOfs
1169 *
1170 * returns a new string with the value "blublub" (without
1171 * quotes) and sets ulOfs to 12.
1172 *
1173 *@@added V0.9.0 [umoeller]
1174 */
1175
1176PSZ strhGetTextAttr(const char *pszSearchIn,
1177 const char *pszTag,
1178 PULONG pulOffset) // out: offset where found
1179{
1180 PSZ pParam,
1181 pParam2,
1182 prc = NULL;
1183 ULONG ulCount = 0;
1184 LONG lNestingLevel = 0;
1185
1186 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1187 {
1188 // determine end character to search for: a space
1189 CHAR cEnd = ' ';
1190 if (*pParam == '\"')
1191 {
1192 // or, if the data is enclosed in quotes, a quote
1193 cEnd = '\"';
1194 pParam++;
1195 }
1196
1197 if (pulOffset)
1198 // store the offset
1199 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1200
1201 // now find end of attribute
1202 pParam2 = pParam;
1203 while (*pParam)
1204 {
1205 if (*pParam == cEnd)
1206 // end character found
1207 break;
1208 else if (*pParam == '<')
1209 // yet another opening tag found:
1210 // this is probably some "<" in the attributes
1211 lNestingLevel++;
1212 else if (*pParam == '>')
1213 {
1214 lNestingLevel--;
1215 if (lNestingLevel < 0)
1216 // end of tag found:
1217 break;
1218 }
1219 ulCount++;
1220 pParam++;
1221 }
1222
1223 // copy attribute to new buffer
1224 if (ulCount)
1225 {
1226 prc = (PSZ)malloc(ulCount+1);
1227 memcpy(prc, pParam2, ulCount);
1228 *(prc+ulCount) = 0;
1229 }
1230 }
1231 return (prc);
1232}
1233
1234/*
1235 * strhFindEndOfTag:
1236 * returns a pointer to the ">" char
1237 * which seems to terminate the tag beginning
1238 * after pszBeginOfTag.
1239 *
1240 * If additional "<" chars are found, we look
1241 * for additional ">" characters too.
1242 *
1243 * Note: You must pass the address of the opening
1244 * '<' character to this function.
1245 *
1246 * Example:
1247 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1248 + strhFindEndOfTag(pszTest)
1249 + returns.................................^ this.
1250 *
1251 *@@added V0.9.0 [umoeller]
1252 */
1253
1254PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1255{
1256 PSZ p = (PSZ)pszBeginOfTag,
1257 prc = NULL;
1258 LONG lNestingLevel = 0;
1259
1260 while (*p)
1261 {
1262 if (*p == '<')
1263 // another opening tag found:
1264 lNestingLevel++;
1265 else if (*p == '>')
1266 {
1267 // closing tag found:
1268 lNestingLevel--;
1269 if (lNestingLevel < 1)
1270 {
1271 // corresponding: return this
1272 prc = p;
1273 break;
1274 }
1275 }
1276 p++;
1277 }
1278
1279 return (prc);
1280}
1281
1282/*
1283 * strhGetBlock:
1284 * this complex function searches the given string
1285 * for a pair of opening/closing HTML-style tags.
1286 *
1287 * If found, this routine returns TRUE and does
1288 * the following:
1289 *
1290 * 1) allocate a new buffer, copy the text
1291 * enclosed by the opening/closing tags
1292 * into it and set *ppszBlock to that
1293 * buffer;
1294 *
1295 * 2) if the opening tag has any attributes,
1296 * allocate another buffer, copy the
1297 * attributes into it and set *ppszAttrs
1298 * to that buffer; if no attributes are
1299 * found, *ppszAttrs will be NULL;
1300 *
1301 * 3) set *pulOffset to the offset from the
1302 * beginning of *ppszSearchIn where the
1303 * opening tag was found;
1304 *
1305 * 4) advance *ppszSearchIn to after the
1306 * closing tag, so that you can do
1307 * multiple searches without finding the
1308 * same tags twice.
1309 *
1310 * All buffers should be freed using free().
1311 *
1312 * This returns the following:
1313 * -- 0: no error
1314 * -- 1: tag not found at all (doesn't have to be an error)
1315 * -- 2: begin tag found, but no corresponding end tag found. This
1316 * is a real error.
1317 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1318 *
1319 * <B>Example:</B>
1320 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1321 + PSZ pszBlock, pszAttrs;
1322 + ULONG ulOfs;
1323 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1324 *
1325 * would do the following:
1326 *
1327 * 1) set pszBlock to a new string containing "This is page 1."
1328 * without quotes;
1329 *
1330 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1331 *
1332 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1333 *
1334 * 4) pSearch would be advanced to point to the "More text"
1335 * string in the original buffer.
1336 *
1337 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1338 * for HTML parsing.
1339 *
1340 *@@added V0.9.0 [umoeller]
1341 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1342 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1343 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1344 */
1345
1346ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1347 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1348 PSZ pszTag,
1349 PSZ *ppszBlock, // out: block enclosed by the tags
1350 PSZ *ppszAttribs, // out: attributes of the opening tag
1351 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1352 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1353{
1354 ULONG ulrc = 1;
1355 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1356 pszSearch2 = pszBeginTag,
1357 pszClosingTag;
1358 ULONG cbTag = strlen(pszTag);
1359
1360 // go thru the block and check all tags if it's the
1361 // begin tag we're looking for
1362 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1363 {
1364 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1365 // yes: stop
1366 break;
1367 else
1368 pszBeginTag++;
1369 }
1370
1371 if (pszBeginTag)
1372 {
1373 // we found <TAG>:
1374 ULONG ulNestingLevel = 0;
1375
1376 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1377 // strchr(pszBeginTag, '>');
1378 if (pszEndOfBeginTag)
1379 {
1380 // does the caller want the attributes?
1381 if (ppszAttribs)
1382 {
1383 // yes: then copy them
1384 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1385 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1386 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1387 // add terminating 0
1388 *(pszAttrs + ulAttrLen) = 0;
1389
1390 *ppszAttribs = pszAttrs;
1391 }
1392
1393 // output offset of where we found the begin tag
1394 if (pulOfsBeginTag)
1395 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1396
1397 // now find corresponding closing tag (e.g. "</BODY>"
1398 pszBeginTag = pszEndOfBeginTag+1;
1399 // now we're behind the '>' char of the opening tag
1400 // increase offset of that too
1401 if (pulOfsBeginBlock)
1402 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1403
1404 // find next closing tag;
1405 // for the first run, pszSearch2 points to right
1406 // after the '>' char of the opening tag
1407 pszSearch2 = pszBeginTag;
1408 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1409 && (pszClosingTag = strstr(pszSearch2, "<"))
1410 )
1411 {
1412 // if we have another opening tag before our closing
1413 // tag, we need to have several closing tags before
1414 // we're done
1415 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1416 ulNestingLevel++;
1417 else
1418 {
1419 // is this ours?
1420 if ( (*(pszClosingTag+1) == '/')
1421 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1422 )
1423 {
1424 // we've found a matching closing tag; is
1425 // it ours?
1426 if (ulNestingLevel == 0)
1427 {
1428 // our closing tag found:
1429 // allocate mem for a new buffer
1430 // and extract all the text between
1431 // open and closing tags to it
1432 ULONG ulLen = pszClosingTag - pszBeginTag;
1433 if (ppszBlock)
1434 {
1435 PSZ pNew = (PSZ)malloc(ulLen + 1);
1436 strhncpy0(pNew, pszBeginTag, ulLen);
1437 *ppszBlock = pNew;
1438 }
1439
1440 // raise search offset to after the closing tag
1441 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1442
1443 ulrc = 0;
1444
1445 break;
1446 } else
1447 // not our closing tag:
1448 ulNestingLevel--;
1449 }
1450 }
1451 // no matching closing tag: search on after that
1452 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1453 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1454
1455 if (!pszClosingTag)
1456 // no matching closing tag found:
1457 // return 2 (closing tag not found)
1458 ulrc = 2;
1459 } // end if (pszBeginTag)
1460 else
1461 // no matching ">" for opening tag found:
1462 ulrc = 3;
1463 }
1464
1465 return (ulrc);
1466}
1467
1468/* ******************************************************************
1469 *
1470 * Miscellaneous
1471 *
1472 ********************************************************************/
1473
1474/*
1475 *@@ strhArrayAppend:
1476 * this appends a string to a "string array".
1477 *
1478 * A string array is considered a sequence of
1479 * zero-terminated strings in memory. That is,
1480 * after each string's null-byte, the next
1481 * string comes up.
1482 *
1483 * This is useful for composing a single block
1484 * of memory from, say, list box entries, which
1485 * can then be written to OS2.INI in one flush.
1486 *
1487 * To append strings to such an array, call this
1488 * function for each string you wish to append.
1489 * This will re-allocate *ppszRoot with each call,
1490 * and update *pcbRoot, which then contains the
1491 * total size of all strings (including all null
1492 * terminators).
1493 *
1494 * Pass *pcbRoot to PrfSaveProfileData to have the
1495 * block saved.
1496 *
1497 * Note: On the first call, *ppszRoot and *pcbRoot
1498 * _must_ be both NULL, or this crashes.
1499 */
1500
1501VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1502 const char *pcszNew, // in: string to append
1503 PULONG pcbRoot) // in/out: size of array
1504{
1505 ULONG cbNew = strlen(pcszNew);
1506 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1507 + cbNew
1508 + 1); // two null bytes
1509 if (*ppszRoot)
1510 {
1511 // not first loop: copy old stuff
1512 memcpy(pszTemp,
1513 *ppszRoot,
1514 *pcbRoot);
1515 free(*ppszRoot);
1516 }
1517 // append new string
1518 strcpy(pszTemp + *pcbRoot,
1519 pcszNew);
1520 // update root
1521 *ppszRoot = pszTemp;
1522 // update length
1523 *pcbRoot += cbNew + 1;
1524}
1525
1526/*
1527 *@@ strhCreateDump:
1528 * this dumps a memory block into a string
1529 * and returns that string in a new buffer.
1530 *
1531 * You must free() the returned PSZ after use.
1532 *
1533 * The output looks like the following:
1534 *
1535 + 0000: FE FF 0E 02 90 00 00 00 ........
1536 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1537 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1538 *
1539 * Each line is terminated with a newline (\n)
1540 * character only.
1541 *
1542 *@@added V0.9.1 (2000-01-22) [umoeller]
1543 */
1544
1545PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1546 ULONG ulSize, // in: size of buffer
1547 ULONG ulIndent) // in: indentation of every line
1548{
1549 PSZ pszReturn = 0;
1550 XSTRING strReturn;
1551 CHAR szTemp[1000];
1552
1553 PBYTE pbCurrent = pb; // current byte
1554 ULONG ulCount = 0,
1555 ulCharsInLine = 0; // if this grows > 7, a new line is started
1556 CHAR szLine[400] = "",
1557 szAscii[30] = " "; // ASCII representation; filled for every line
1558 PSZ pszLine = szLine,
1559 pszAscii = szAscii;
1560
1561 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1562
1563 for (pbCurrent = pb;
1564 ulCount < ulSize;
1565 pbCurrent++, ulCount++)
1566 {
1567 if (ulCharsInLine == 0)
1568 {
1569 memset(szLine, ' ', ulIndent);
1570 pszLine += ulIndent;
1571 }
1572 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1573
1574 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1575 // printable character:
1576 *pszAscii = *pbCurrent;
1577 else
1578 *pszAscii = '.';
1579 pszAscii++;
1580
1581 ulCharsInLine++;
1582 if ( (ulCharsInLine > 7) // 8 bytes added?
1583 || (ulCount == ulSize-1) // end of buffer reached?
1584 )
1585 {
1586 // if we haven't had eight bytes yet,
1587 // fill buffer up to eight bytes with spaces
1588 ULONG ul2;
1589 for (ul2 = ulCharsInLine;
1590 ul2 < 8;
1591 ul2++)
1592 pszLine += sprintf(pszLine, " ");
1593
1594 sprintf(szTemp, "%04lX: %s %s\n",
1595 (ulCount & 0xFFFFFFF8), // offset in hex
1596 szLine, // bytes string
1597 szAscii); // ASCII string
1598 xstrcat(&strReturn, szTemp, 0);
1599
1600 // restart line buffer
1601 pszLine = szLine;
1602
1603 // clear ASCII buffer
1604 strcpy(szAscii, " ");
1605 pszAscii = szAscii;
1606
1607 // reset line counter
1608 ulCharsInLine = 0;
1609 }
1610 }
1611
1612 if (strReturn.cbAllocated)
1613 pszReturn = strReturn.psz;
1614
1615 return (pszReturn);
1616}
1617
1618/* ******************************************************************
1619 *
1620 * Wildcard matching
1621 *
1622 ********************************************************************/
1623
1624/*
1625 * The following code has been taken from "fnmatch.zip".
1626 *
1627 * (c) 1994-1996 by Eberhard Mattes.
1628 */
1629
1630/* In OS/2 and DOS styles, both / and \ separate components of a path.
1631 * This macro returns true iff C is a separator. */
1632
1633#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1634
1635
1636/* This macro returns true if C is at the end of a component of a
1637 * path. */
1638
1639#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1640
1641/*
1642 * skip_comp_os2:
1643 * Return a pointer to the next component of the path SRC, for OS/2
1644 * and DOS styles. When the end of the string is reached, a pointer
1645 * to the terminating null character is returned.
1646 *
1647 * (c) 1994-1996 by Eberhard Mattes.
1648 */
1649
1650static const unsigned char* skip_comp_os2(const unsigned char *src)
1651{
1652 /* Skip characters until hitting a separator or the end of the
1653 * string. */
1654
1655 while (!IS_OS2_COMP_END(*src))
1656 ++src;
1657
1658 /* Skip the separator if we hit a separator. */
1659
1660 if (*src != 0)
1661 ++src;
1662 return src;
1663}
1664
1665/*
1666 * has_colon:
1667 * returns true iff the path P contains a colon.
1668 *
1669 * (c) 1994-1996 by Eberhard Mattes.
1670 */
1671
1672static int has_colon(const unsigned char *p)
1673{
1674 while (*p != 0)
1675 if (*p == ':')
1676 return 1;
1677 else
1678 ++p;
1679 return 0;
1680}
1681
1682/*
1683 * match_comp_os2:
1684 * Compare a single component (directory name or file name) of the
1685 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1686 * component of the wildcard and the name to be checked, respectively.
1687 * Comparing stops at the next separator. The FLAGS argument is the
1688 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1689 * current component of NAME. The number of dots is not restricted,
1690 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1691 * Note that this function is recursive.
1692 *
1693 * (c) 1994-1996 by Eberhard Mattes.
1694 */
1695
1696static int match_comp_os2(const unsigned char *mask,
1697 const unsigned char *name,
1698 unsigned flags,
1699 int has_dot)
1700{
1701 int rc;
1702
1703 for (;;)
1704 switch (*mask)
1705 {
1706 case 0:
1707
1708 /* There must be no extra characters at the end of NAME when
1709 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1710 * in that case, NAME may point to a separator. */
1711
1712 if (*name == 0)
1713 return FNM_MATCH;
1714 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1715 return FNM_MATCH;
1716 return FNM_NOMATCH;
1717
1718 case '/':
1719 case '\\':
1720
1721 /* Separators match separators. */
1722
1723 if (IS_OS2_COMP_SEP(*name))
1724 return FNM_MATCH;
1725
1726 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1727 * is ignored at the end of NAME. */
1728
1729 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1730 return FNM_MATCH;
1731
1732 /* Stop comparing at the separator. */
1733
1734 return FNM_NOMATCH;
1735
1736 case '?':
1737
1738 /* A question mark matches one character. It does not match
1739 * a dot. At the end of the component (and before a dot),
1740 * it also matches zero characters. */
1741
1742 if (*name != '.' && !IS_OS2_COMP_END(*name))
1743 ++name;
1744 ++mask;
1745 break;
1746
1747 case '*':
1748
1749 /* An asterisk matches zero or more characters. In DOS
1750 * mode, dots are not matched. */
1751
1752 do
1753 {
1754 ++mask;
1755 }
1756 while (*mask == '*');
1757 for (;;)
1758 {
1759 rc = match_comp_os2(mask, name, flags, has_dot);
1760 if (rc != FNM_NOMATCH)
1761 return rc;
1762 if (IS_OS2_COMP_END(*name))
1763 return FNM_NOMATCH;
1764 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1765 return FNM_NOMATCH;
1766 ++name;
1767 }
1768
1769 case '.':
1770
1771 /* A dot matches a dot. It also matches the implicit dot at
1772 * the end of a dot-less NAME. */
1773
1774 ++mask;
1775 if (*name == '.')
1776 ++name;
1777 else if (has_dot || !IS_OS2_COMP_END(*name))
1778 return FNM_NOMATCH;
1779 break;
1780
1781 default:
1782
1783 /* All other characters match themselves. */
1784
1785 if (flags & _FNM_IGNORECASE)
1786 {
1787 if (tolower(*mask) != tolower(*name))
1788 return FNM_NOMATCH;
1789 }
1790 else
1791 {
1792 if (*mask != *name)
1793 return FNM_NOMATCH;
1794 }
1795 ++mask;
1796 ++name;
1797 break;
1798 }
1799}
1800
1801/*
1802 * match_comp:
1803 * compare a single component (directory name or file name) of the
1804 * paths, for all styles which need component-by-component matching.
1805 * MASK and NAME point to the start of a component of the wildcard and
1806 * the name to be checked, respectively. Comparing stops at the next
1807 * separator. The FLAGS argument is the same as that of fnmatch().
1808 * Return FNM_MATCH iff MASK and NAME match.
1809 *
1810 * (c) 1994-1996 by Eberhard Mattes.
1811 */
1812
1813static int match_comp(const unsigned char *mask,
1814 const unsigned char *name,
1815 unsigned flags)
1816{
1817 const unsigned char *s;
1818
1819 switch (flags & _FNM_STYLE_MASK)
1820 {
1821 case _FNM_OS2:
1822 case _FNM_DOS:
1823
1824 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1825 * the component if the component doesn't include a dot. */
1826
1827 s = name;
1828 while (!IS_OS2_COMP_END(*s) && *s != '.')
1829 ++s;
1830 return match_comp_os2(mask, name, flags, *s == '.');
1831
1832 default:
1833 return FNM_ERR;
1834 }
1835}
1836
1837/* In Unix styles, / separates components of a path. This macro
1838 * returns true iff C is a separator. */
1839
1840#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1841
1842
1843/* This macro returns true if C is at the end of a component of a
1844 * path. */
1845
1846#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1847
1848/*
1849 * match_unix:
1850 * match complete paths for Unix styles. The FLAGS argument is the
1851 * same as that of fnmatch(). COMP points to the start of the current
1852 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
1853 * backslash character is used for escaping ? and * unless
1854 * FNM_NOESCAPE is set.
1855 *
1856 * (c) 1994-1996 by Eberhard Mattes.
1857 */
1858
1859static int match_unix(const unsigned char *mask,
1860 const unsigned char *name,
1861 unsigned flags,
1862 const unsigned char *comp)
1863{
1864 unsigned char c1, c2;
1865 char invert, matched;
1866 const unsigned char *start;
1867 int rc;
1868
1869 for (;;)
1870 switch (*mask)
1871 {
1872 case 0:
1873
1874 /* There must be no extra characters at the end of NAME when
1875 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1876 * in that case, NAME may point to a separator. */
1877
1878 if (*name == 0)
1879 return FNM_MATCH;
1880 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1881 return FNM_MATCH;
1882 return FNM_NOMATCH;
1883
1884 case '?':
1885
1886 /* A question mark matches one character. It does not match
1887 * the component separator if FNM_PATHNAME is set. It does
1888 * not match a dot at the start of a component if FNM_PERIOD
1889 * is set. */
1890
1891 if (*name == 0)
1892 return FNM_NOMATCH;
1893 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1894 return FNM_NOMATCH;
1895 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1896 return FNM_NOMATCH;
1897 ++mask;
1898 ++name;
1899 break;
1900
1901 case '*':
1902
1903 /* An asterisk matches zero or more characters. It does not
1904 * match the component separator if FNM_PATHNAME is set. It
1905 * does not match a dot at the start of a component if
1906 * FNM_PERIOD is set. */
1907
1908 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1909 return FNM_NOMATCH;
1910 do
1911 {
1912 ++mask;
1913 }
1914 while (*mask == '*');
1915 for (;;)
1916 {
1917 rc = match_unix(mask, name, flags, comp);
1918 if (rc != FNM_NOMATCH)
1919 return rc;
1920 if (*name == 0)
1921 return FNM_NOMATCH;
1922 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1923 return FNM_NOMATCH;
1924 ++name;
1925 }
1926
1927 case '/':
1928
1929 /* Separators match only separators. If _FNM_PATHPREFIX is
1930 * set, a trailing separator in MASK is ignored at the end
1931 * of NAME. */
1932
1933 if (!(IS_UNIX_COMP_SEP(*name)
1934 || ((flags & _FNM_PATHPREFIX) && *name == 0
1935 && (mask[1] == 0
1936 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1937 && mask[2] == 0)))))
1938 return FNM_NOMATCH;
1939
1940 ++mask;
1941 if (*name != 0)
1942 ++name;
1943
1944 /* This is the beginning of a new component if FNM_PATHNAME
1945 * is set. */
1946
1947 if (flags & FNM_PATHNAME)
1948 comp = name;
1949 break;
1950
1951 case '[':
1952
1953 /* A set of characters. Always case-sensitive. */
1954
1955 if (*name == 0)
1956 return FNM_NOMATCH;
1957 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1958 return FNM_NOMATCH;
1959 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1960 return FNM_NOMATCH;
1961
1962 invert = 0;
1963 matched = 0;
1964 ++mask;
1965
1966 /* If the first character is a ! or ^, the set matches all
1967 * characters not listed in the set. */
1968
1969 if (*mask == '!' || *mask == '^')
1970 {
1971 ++mask;
1972 invert = 1;
1973 }
1974
1975 /* Loop over all the characters of the set. The loop ends
1976 * if the end of the string is reached or if a ] is
1977 * encountered unless it directly follows the initial [ or
1978 * [-. */
1979
1980 start = mask;
1981 while (!(*mask == 0 || (*mask == ']' && mask != start)))
1982 {
1983 /* Get the next character which is optionally preceded
1984 * by a backslash. */
1985
1986 c1 = *mask++;
1987 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
1988 {
1989 if (*mask == 0)
1990 break;
1991 c1 = *mask++;
1992 }
1993
1994 /* Ranges of characters are written as a-z. Don't
1995 * forget to check for the end of the string and to
1996 * handle the backslash. If the character after - is a
1997 * ], it isn't a range. */
1998
1999 if (*mask == '-' && mask[1] != ']')
2000 {
2001 ++mask; /* Skip the - character */
2002 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2003 ++mask;
2004 if (*mask == 0)
2005 break;
2006 c2 = *mask++;
2007 }
2008 else
2009 c2 = c1;
2010
2011 /* Now check whether this character or range matches NAME. */
2012
2013 if (c1 <= *name && *name <= c2)
2014 matched = 1;
2015 }
2016
2017 /* If the end of the string is reached before a ] is found,
2018 * back up to the [ and compare it to NAME. */
2019
2020 if (*mask == 0)
2021 {
2022 if (*name != '[')
2023 return FNM_NOMATCH;
2024 ++name;
2025 mask = start;
2026 if (invert)
2027 --mask;
2028 }
2029 else
2030 {
2031 if (invert)
2032 matched = !matched;
2033 if (!matched)
2034 return FNM_NOMATCH;
2035 ++mask; /* Skip the ] character */
2036 if (*name != 0)
2037 ++name;
2038 }
2039 break;
2040
2041 case '\\':
2042 ++mask;
2043 if (flags & FNM_NOESCAPE)
2044 {
2045 if (*name != '\\')
2046 return FNM_NOMATCH;
2047 ++name;
2048 }
2049 else if (*mask == '*' || *mask == '?')
2050 {
2051 if (*mask != *name)
2052 return FNM_NOMATCH;
2053 ++mask;
2054 ++name;
2055 }
2056 break;
2057
2058 default:
2059
2060 /* All other characters match themselves. */
2061
2062 if (flags & _FNM_IGNORECASE)
2063 {
2064 if (tolower(*mask) != tolower(*name))
2065 return FNM_NOMATCH;
2066 }
2067 else
2068 {
2069 if (*mask != *name)
2070 return FNM_NOMATCH;
2071 }
2072 ++mask;
2073 ++name;
2074 break;
2075 }
2076}
2077
2078/*
2079 * _fnmatch_unsigned:
2080 * Check whether the path name NAME matches the wildcard MASK.
2081 *
2082 * Return:
2083 * -- 0 (FNM_MATCH) if it matches,
2084 * -- _FNM_NOMATCH if it doesn't,
2085 * -- FNM_ERR on error.
2086 *
2087 * The operation of this function is controlled by FLAGS.
2088 * This is an internal function, with unsigned arguments.
2089 *
2090 * (c) 1994-1996 by Eberhard Mattes.
2091 */
2092
2093static int _fnmatch_unsigned(const unsigned char *mask,
2094 const unsigned char *name,
2095 unsigned flags)
2096{
2097 int m_drive, n_drive,
2098 rc;
2099
2100 /* Match and skip the drive name if present. */
2101
2102 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2103 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2104
2105 if (m_drive != n_drive)
2106 {
2107 if (m_drive == -1 || n_drive == -1)
2108 return FNM_NOMATCH;
2109 if (!(flags & _FNM_IGNORECASE))
2110 return FNM_NOMATCH;
2111 if (tolower(m_drive) != tolower(n_drive))
2112 return FNM_NOMATCH;
2113 }
2114
2115 if (m_drive != -1)
2116 mask += 2;
2117 if (n_drive != -1)
2118 name += 2;
2119
2120 /* Colons are not allowed in path names, except for the drive name,
2121 * which was skipped above. */
2122
2123 if (has_colon(mask) || has_colon(name))
2124 return FNM_ERR;
2125
2126 /* The name "\\server\path" should not be matched by mask
2127 * "\*\server\path". Ditto for /. */
2128
2129 switch (flags & _FNM_STYLE_MASK)
2130 {
2131 case _FNM_OS2:
2132 case _FNM_DOS:
2133
2134 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2135 {
2136 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2137 return FNM_NOMATCH;
2138 name += 2;
2139 mask += 2;
2140 }
2141 break;
2142
2143 case _FNM_POSIX:
2144
2145 if (name[0] == '/' && name[1] == '/')
2146 {
2147 int i;
2148
2149 name += 2;
2150 for (i = 0; i < 2; ++i)
2151 if (mask[0] == '/')
2152 ++mask;
2153 else if (mask[0] == '\\' && mask[1] == '/')
2154 mask += 2;
2155 else
2156 return FNM_NOMATCH;
2157 }
2158
2159 /* In Unix styles, treating ? and * w.r.t. components is simple.
2160 * No need to do matching component by component. */
2161
2162 return match_unix(mask, name, flags, name);
2163 }
2164
2165 /* Now compare all the components of the path name, one by one.
2166 * Note that the path separator must not be enclosed in brackets. */
2167
2168 while (*mask != 0 || *name != 0)
2169 {
2170
2171 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2172 * is reached even if there are components left in NAME. */
2173
2174 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2175 return FNM_MATCH;
2176
2177 /* Compare a single component of the path name. */
2178
2179 rc = match_comp(mask, name, flags);
2180 if (rc != FNM_MATCH)
2181 return rc;
2182
2183 /* Skip to the next component or to the end of the path name. */
2184
2185 mask = skip_comp_os2(mask);
2186 name = skip_comp_os2(name);
2187 }
2188
2189 /* If we reached the ends of both strings, the names match. */
2190
2191 if (*mask == 0 && *name == 0)
2192 return FNM_MATCH;
2193
2194 /* The names do not match. */
2195
2196 return FNM_NOMATCH;
2197}
2198
2199/*
2200 *@@ strhMatchOS2:
2201 * this matches wildcards, similar to what DosEditName does.
2202 * However, this does not require a file to be present, but
2203 * works on strings only.
2204 */
2205
2206BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2207 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2208{
2209 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2210 pcszName,
2211 _FNM_OS2 | _FNM_IGNORECASE)
2212 == FNM_MATCH)
2213 );
2214}
2215
2216/* ******************************************************************
2217 *
2218 * Fast string searches
2219 *
2220 ********************************************************************/
2221
2222#define ASSERT(a)
2223
2224/*
2225 * The following code has been taken from the "Standard
2226 * Function Library", file sflfind.c, and only slightly
2227 * modified to conform to the rest of this file.
2228 *
2229 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2230 * Revised: 98/05/04
2231 *
2232 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2233 *
2234 * The SFL Licence allows incorporating SFL code into other
2235 * programs, as long as the copyright is reprinted and the
2236 * code is marked as modified, so this is what we do.
2237 */
2238
2239/*
2240 *@@ strhmemfind:
2241 * searches for a pattern in a block of memory using the
2242 * Boyer-Moore-Horspool-Sunday algorithm.
2243 *
2244 * The block and pattern may contain any values; you must
2245 * explicitly provide their lengths. If you search for strings,
2246 * use strlen() on the buffers.
2247 *
2248 * Returns a pointer to the pattern if found within the block,
2249 * or NULL if the pattern was not found.
2250 *
2251 * This algorithm needs a "shift table" to cache data for the
2252 * search pattern. This table can be reused when performing
2253 * several searches with the same pattern.
2254 *
2255 * "shift" must point to an array big enough to hold 256 (8**2)
2256 * "size_t" values.
2257 *
2258 * If (*repeat_find == FALSE), the shift table is initialized.
2259 * So on the first search with a given pattern, *repeat_find
2260 * should be FALSE. This function sets it to TRUE after the
2261 * shift table is initialised, allowing the initialisation
2262 * phase to be skipped on subsequent searches.
2263 *
2264 * This function is most effective when repeated searches are
2265 * made for the same pattern in one or more large buffers.
2266 *
2267 * Example:
2268 *
2269 + PSZ pszHaystack = "This is a sample string.",
2270 + pszNeedle = "string";
2271 + size_t shift[256];
2272 + BOOL fRepeat = FALSE;
2273 +
2274 + PSZ pFound = strhmemfind(pszHaystack,
2275 + strlen(pszHaystack), // block size
2276 + pszNeedle,
2277 + strlen(pszNeedle), // pattern size
2278 + shift,
2279 + &fRepeat);
2280 *
2281 * Taken from the "Standard Function Library", file sflfind.c.
2282 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2283 * Slightly modified by umoeller.
2284 *
2285 *@@added V0.9.3 (2000-05-08) [umoeller]
2286 */
2287
2288void* strhmemfind(const void *in_block, // in: block containing data
2289 size_t block_size, // in: size of block in bytes
2290 const void *in_pattern, // in: pattern to search for
2291 size_t pattern_size, // in: size of pattern block
2292 size_t *shift, // in/out: shift table (search buffer)
2293 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2294{
2295 size_t byte_nbr, // Distance through block
2296 match_size; // Size of matched part
2297 const unsigned char
2298 *match_base = NULL, // Base of match of pattern
2299 *match_ptr = NULL, // Point within current match
2300 *limit = NULL; // Last potiental match point
2301 const unsigned char
2302 *block = (unsigned char *) in_block, // Concrete pointer to block data
2303 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2304
2305 if ( (block == NULL)
2306 || (pattern == NULL)
2307 || (shift == NULL)
2308 )
2309 return (NULL);
2310
2311 // Pattern must be smaller or equal in size to string
2312 if (block_size < pattern_size)
2313 return (NULL); // Otherwise it's not found
2314
2315 if (pattern_size == 0) // Empty patterns match at start
2316 return ((void *)block);
2317
2318 // Build the shift table unless we're continuing a previous search
2319
2320 // The shift table determines how far to shift before trying to match
2321 // again, if a match at this point fails. If the byte after where the
2322 // end of our pattern falls is not in our pattern, then we start to
2323 // match again after that byte; otherwise we line up the last occurence
2324 // of that byte in our pattern under that byte, and try match again.
2325
2326 if (!repeat_find || !*repeat_find)
2327 {
2328 for (byte_nbr = 0;
2329 byte_nbr < 256;
2330 byte_nbr++)
2331 shift[byte_nbr] = pattern_size + 1;
2332 for (byte_nbr = 0;
2333 byte_nbr < pattern_size;
2334 byte_nbr++)
2335 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2336
2337 if (repeat_find)
2338 *repeat_find = TRUE;
2339 }
2340
2341 // Search for the block, each time jumping up by the amount
2342 // computed in the shift table
2343
2344 limit = block + (block_size - pattern_size + 1);
2345 ASSERT (limit > block);
2346
2347 for (match_base = block;
2348 match_base < limit;
2349 match_base += shift[*(match_base + pattern_size)])
2350 {
2351 match_ptr = match_base;
2352 match_size = 0;
2353
2354 // Compare pattern until it all matches, or we find a difference
2355 while (*match_ptr++ == pattern[match_size++])
2356 {
2357 ASSERT (match_size <= pattern_size &&
2358 match_ptr == (match_base + match_size));
2359
2360 // If we found a match, return the start address
2361 if (match_size >= pattern_size)
2362 return ((void*)(match_base));
2363
2364 }
2365 }
2366 return (NULL); // Found nothing
2367}
2368
2369/*
2370 *@@ strhtxtfind:
2371 * searches for a case-insensitive text pattern in a string
2372 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2373 * pattern are null-terminated strings. Returns a pointer to the pattern
2374 * if found within the string, or NULL if the pattern was not found.
2375 * Will match strings irrespective of case. To match exact strings, use
2376 * strhfind(). Will not work on multibyte characters.
2377 *
2378 * Examples:
2379 + char *result;
2380 +
2381 + result = strhtxtfind ("AbracaDabra", "cad");
2382 + if (result)
2383 + puts (result);
2384 +
2385 * Taken from the "Standard Function Library", file sflfind.c.
2386 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2387 * Slightly modified.
2388 *
2389 *@@added V0.9.3 (2000-05-08) [umoeller]
2390 */
2391
2392char* strhtxtfind (const char *string, // String containing data
2393 const char *pattern) // Pattern to search for
2394{
2395 size_t
2396 shift [256]; // Shift distance for each value
2397 size_t
2398 string_size,
2399 pattern_size,
2400 byte_nbr, // Index into byte array
2401 match_size; // Size of matched part
2402 const char
2403 *match_base = NULL, // Base of match of pattern
2404 *match_ptr = NULL, // Point within current match
2405 *limit = NULL; // Last potiental match point
2406
2407 ASSERT (string); // Expect non-NULL pointers, but
2408 ASSERT (pattern); // fail gracefully if not debugging
2409 if (string == NULL || pattern == NULL)
2410 return (NULL);
2411
2412 string_size = strlen (string);
2413 pattern_size = strlen (pattern);
2414
2415 // Pattern must be smaller or equal in size to string
2416 if (string_size < pattern_size)
2417 return (NULL); // Otherwise it cannot be found
2418
2419 if (pattern_size == 0) // Empty string matches at start
2420 return (char *) string;
2421
2422 // Build the shift table
2423
2424 // The shift table determines how far to shift before trying to match
2425 // again, if a match at this point fails. If the byte after where the
2426 // end of our pattern falls is not in our pattern, then we start to
2427 // match again after that byte; otherwise we line up the last occurence
2428 // of that byte in our pattern under that byte, and try match again.
2429
2430 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2431 shift [byte_nbr] = pattern_size + 1;
2432
2433 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2434 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2435
2436 // Search for the string. If we don't find a match, move up by the
2437 // amount we computed in the shift table above, to find location of
2438 // the next potiental match.
2439
2440 limit = string + (string_size - pattern_size + 1);
2441 ASSERT (limit > string);
2442
2443 for (match_base = string;
2444 match_base < limit;
2445 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2446 {
2447 match_ptr = match_base;
2448 match_size = 0;
2449
2450 // Compare pattern until it all matches, or we find a difference
2451 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2452 {
2453 ASSERT (match_size <= pattern_size &&
2454 match_ptr == (match_base + match_size));
2455
2456 // If we found a match, return the start address
2457 if (match_size >= pattern_size)
2458 return ((char *)(match_base));
2459 }
2460 }
2461 return (NULL); // Found nothing
2462}
2463
Note: See TracBrowser for help on using the repository browser.