source: trunk/src/helpers/stringh.c@ 9

Last change on this file since 9 was 8, checked in by umoeller, 25 years ago

Initial checkin of helpers code which used to be in WarpIN.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 90.3 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout XWorkplace.
6 *
7 * Usage: All OS/2 programs.
8 *
9 * Function prefixes (new with V0.81):
10 * -- strh* string helper functions.
11 *
12 * Note: Version numbering in this file relates to XWorkplace version
13 * numbering.
14 *
15 *@@header "helpers\stringh.h"
16 */
17
18/*
19 * Copyright (C) 1997-2000 Ulrich M”ller.
20 * Parts Copyright (C) 1991-1999 iMatix Corporation.
21 * This file is part of the XWorkplace source package.
22 * XWorkplace is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published
24 * by the Free Software Foundation, in version 2 as it comes in the
25 * "COPYING" file of the XWorkplace main distribution.
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 */
31
32#define OS2EMX_PLAIN_CHAR
33 // this is needed for "os2emx.h"; if this is defined,
34 // emx will define PSZ as _signed_ char, otherwise
35 // as unsigned char
36
37#define INCL_WINSHELLDATA
38#include <os2.h>
39
40#include <stdlib.h>
41#include <stdio.h>
42#include <string.h>
43#include <ctype.h>
44#include <math.h>
45
46#include "setup.h" // code generation and debugging options
47
48#include "helpers\stringh.h"
49#include "helpers\xstring.h" // extended string helpers
50
51#pragma hdrstop
52
53/*
54 *@@category: Helpers\C helpers\String management
55 */
56
57/*
58 *@@ strhdup:
59 * like strdup, but this one
60 * doesn't crash if pszSource is NULL,
61 * but returns NULL also.
62 *
63 *@@added V0.9.0 [umoeller]
64 */
65
66PSZ strhdup(const char *pszSource)
67{
68 if (pszSource)
69 return (strdup(pszSource));
70 else
71 return (0);
72}
73
74/*
75 *@@ strhistr:
76 * like strstr, but case-insensitive.
77 *
78 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
79 */
80
81PSZ strhistr(const char *string1, const char *string2)
82{
83 PSZ prc = NULL;
84
85 if ((string1) && (string2))
86 {
87 PSZ pszSrchIn = strdup(string1);
88 PSZ pszSrchFor = strdup(string2);
89
90 if ((pszSrchIn) && (pszSrchFor))
91 {
92 strupr(pszSrchIn);
93 strupr(pszSrchFor);
94
95 prc = strstr(pszSrchIn, pszSrchFor);
96 if (prc)
97 {
98 // prc now has the first occurence of the string,
99 // but in pszSrchIn; we need to map this
100 // return value to the original string
101 prc = (prc-pszSrchIn) // offset in pszSrchIn
102 + (PSZ)string1;
103 }
104 }
105 if (pszSrchFor)
106 free(pszSrchFor);
107 if (pszSrchIn)
108 free(pszSrchIn);
109 }
110 return (prc);
111}
112
113/*
114 *@@ strhncpy0:
115 * like strncpy, but always appends a 0 character.
116 */
117
118ULONG strhncpy0(PSZ pszTarget,
119 const char *pszSource,
120 ULONG cbSource)
121{
122 ULONG ul = 0;
123 PSZ pTarget = pszTarget,
124 pSource = (PSZ)pszSource;
125
126 for (ul = 0; ul < cbSource; ul++)
127 if (*pSource)
128 *pTarget++ = *pSource++;
129 else
130 break;
131 *pTarget = 0;
132
133 return (ul);
134}
135
136/*
137 * strhCount:
138 * this counts the occurences of c in pszSearch.
139 */
140
141ULONG strhCount(const char *pszSearch,
142 CHAR c)
143{
144 PSZ p = (PSZ)pszSearch;
145 ULONG ulCount = 0;
146 while (TRUE)
147 {
148 p = strchr(p, c);
149 if (p)
150 {
151 ulCount++;
152 p++;
153 }
154 else
155 break;
156 }
157 return (ulCount);
158}
159
160/*
161 *@@ strhIsDecimal:
162 * returns TRUE if psz consists of decimal digits only.
163 */
164
165BOOL strhIsDecimal(PSZ psz)
166{
167 PSZ p = psz;
168 while (*p != 0)
169 {
170 if (isdigit(*p) == 0)
171 return (FALSE);
172 p++;
173 }
174
175 return (TRUE);
176}
177
178/*
179 *@@ strhSubstr:
180 * this creates a new PSZ containing the string
181 * from pBegin to pEnd, excluding the pEnd character.
182 * The new string is null-terminated.
183 *
184 * Example:
185 + "1234567890"
186 + ^ ^
187 + p1 p2
188 + strhSubstr(p1, p2)
189 * would return a new string containing "2345678".
190 */
191
192PSZ strhSubstr(const char *pBegin, const char *pEnd)
193{
194 ULONG cbSubstr = (pEnd - pBegin);
195 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
196 strhncpy0(pszSubstr, pBegin, cbSubstr);
197 return (pszSubstr);
198}
199
200/*
201 *@@ strhExtract:
202 * searches pszBuf for the cOpen character and returns
203 * the data in between cOpen and cClose, excluding
204 * those two characters, in a newly allocated buffer
205 * which you must free() afterwards.
206 *
207 * Spaces and newlines/linefeeds are skipped.
208 *
209 * If the search was successful, the new buffer
210 * is returned and, if (ppEnd != NULL), *ppEnd points
211 * to the first character after the cClose character
212 * found in the buffer.
213 *
214 * If the search was not successful, NULL is
215 * returned, and *ppEnd is unchanged.
216 *
217 * If another cOpen character is found before
218 * cClose, matching cClose characters will be skipped.
219 * You can therefore nest the cOpen and cClose
220 * characters.
221 *
222 * This function ignores cOpen and cClose characters
223 * in C-style comments and strings surrounded by
224 * double quotes.
225 *
226 * Example:
227 + PSZ pszBuf = "KEYWORD { --blah-- } next",
228 + pEnd;
229 + strhExtract(pszBuf,
230 + '{', '}',
231 + &pEnd)
232 * would return a new buffer containing " --blah-- ",
233 * and ppEnd would afterwards point to the space
234 * before "next" in the static buffer.
235 *
236 *@@added V0.9.0 [umoeller]
237 */
238
239PSZ strhExtract(PSZ pszBuf, // in: search buffer
240 CHAR cOpen, // in: opening char
241 CHAR cClose, // in: closing char
242 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
243{
244 PSZ pszReturn = NULL;
245
246 if (pszBuf)
247 {
248 PSZ pOpen = strchr(pszBuf, cOpen);
249 if (pOpen)
250 {
251 // opening char found:
252 // now go thru the whole rest of the buffer
253 PSZ p = pOpen+1;
254 LONG lLevel = 1; // if this goes 0, we're done
255 while (*p)
256 {
257 if (*p == cOpen)
258 lLevel++;
259 else if (*p == cClose)
260 {
261 lLevel--;
262 if (lLevel <= 0)
263 {
264 // matching closing bracket found:
265 // extract string
266 pszReturn = strhSubstr(pOpen+1, // after cOpen
267 p); // excluding cClose
268 if (ppEnd)
269 *ppEnd = p+1;
270 break; // while (*p)
271 }
272 }
273 else if (*p == '\"')
274 {
275 // beginning of string:
276 PSZ p2 = p+1;
277 // find end of string
278 while ((*p2) && (*p2 != '\"'))
279 p2++;
280
281 if (*p2 == '\"')
282 // closing quote found:
283 // search on after that
284 p = p2; // raised below
285 else
286 break; // while (*p)
287 }
288
289 p++;
290 }
291 }
292 }
293
294 return (pszReturn);
295}
296
297/*
298 *@@ strhQuote:
299 * similar to strhExtract, except that
300 * opening and closing chars are the same,
301 * and therefore no nesting is possible.
302 * Useful for extracting stuff between
303 * quotes.
304 *
305 *@@added V0.9.0 [umoeller]
306 */
307
308PSZ strhQuote(PSZ pszBuf,
309 CHAR cQuote,
310 PSZ *ppEnd)
311{
312 PSZ pszReturn = NULL,
313 p1 = NULL;
314 if ((p1 = strchr(pszBuf, cQuote)))
315 {
316 PSZ p2 = strchr(p1+1, cQuote);
317 if (p2)
318 {
319 pszReturn = strhSubstr(p1+1, p2);
320 if (ppEnd)
321 // store closing char
322 *ppEnd = p2 + 1;
323 }
324 }
325
326 return (pszReturn);
327}
328
329/*
330 *@@ strhStrip:
331 * removes all double spaces.
332 * This copies within the "psz" buffer.
333 * If any double spaces are found, the
334 * string will be shorter than before,
335 * but the buffer is _not_ reallocated,
336 * so there will be unused bytes at the
337 * end.
338 *
339 * Returns the number of spaces removed.
340 *
341 *@@added V0.9.0 [umoeller]
342 */
343
344ULONG strhStrip(PSZ psz) // in/out: string
345{
346 PSZ p;
347 ULONG cb = strlen(psz),
348 ulrc = 0;
349
350 for (p = psz; p < psz+cb; p++)
351 {
352 if ((*p == ' ') && (*(p+1) == ' '))
353 {
354 PSZ p2 = p;
355 while (*p2)
356 {
357 *p2 = *(p2+1);
358 p2++;
359 }
360 cb--;
361 p--;
362 ulrc++;
363 }
364 }
365 return (ulrc);
366}
367
368/*
369 * strhWords:
370 * returns the no. of words in "psz".
371 * A string is considered a "word" if
372 * it is surrounded by spaces only.
373 *
374 *@@added V0.9.0 [umoeller]
375 */
376
377ULONG strhWords(PSZ psz)
378{
379 PSZ p;
380 ULONG cb = strlen(psz),
381 ulWords = 0;
382 if (cb > 1)
383 {
384 ulWords = 1;
385 for (p = psz; p < psz+cb; p++)
386 if (*p == ' ')
387 ulWords++;
388 }
389 return (ulWords);
390}
391
392/*
393 *@@ strhThousandsULong:
394 * converts a ULONG into a decimal string, while
395 * inserting thousands separators into it. Specify
396 * the separator char in cThousands.
397 * Returns pszTarget so you can use it directly
398 * with sprintf and the "%s" flag.
399 * For cThousands, you should use the data in
400 * OS2.INI ("PM_National" application), which is
401 * always set according to the "Country" object.
402 * Use strhThousandsDouble for "double" values.
403 */
404
405PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
406 ULONG ul, // in: decimal to convert
407 CHAR cThousands) // in: separator char (e.g. '.')
408{
409 USHORT ust, uss, usc;
410 CHAR szTemp[40];
411 sprintf(szTemp, "%d", ul);
412
413 ust = 0;
414 usc = strlen(szTemp);
415 for (uss = 0; uss < usc; uss++)
416 {
417 if (uss)
418 if (((usc - uss) % 3) == 0)
419 {
420 pszTarget[ust] = cThousands;
421 ust++;
422 }
423 pszTarget[ust] = szTemp[uss];
424 ust++;
425 }
426 pszTarget[ust] = '\0';
427
428 return (pszTarget);
429}
430
431/*
432 *@@ strhThousandsDouble:
433 * like strhThousandsULong, but for a "double"
434 * value. Note that after-comma values are truncated.
435 */
436
437PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
438{
439 USHORT ust, uss, usc;
440 CHAR szTemp[40];
441 sprintf(szTemp, "%.0f", floor(dbl));
442
443 ust = 0;
444 usc = strlen(szTemp);
445 for (uss = 0; uss < usc; uss++)
446 {
447 if (uss)
448 if (((usc - uss) % 3) == 0)
449 {
450 pszTarget[ust] = cThousands;
451 ust++;
452 }
453 pszTarget[ust] = szTemp[uss];
454 ust++;
455 }
456 pszTarget[ust] = '\0';
457
458 return (pszTarget);
459}
460
461/*
462 *@@ strhFileDate:
463 * converts file date data to a string (to pszBuf).
464 * You can pass any FDATE structure to this function,
465 * which are returned in those FILEFINDBUF* or
466 * FILESTATUS* structs by the Dos* functions.
467 *
468 * ulDateFormat is the PM setting for the date format,
469 * as set in the "Country" object, and can be queried using
470 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
471 *
472 * meaning:
473 * -- 0 mm.dd.yyyy (English)
474 * -- 1 dd.mm.yyyy (e.g. German)
475 * -- 2 yyyy.mm.dd (Japanese, ISO)
476 * -- 3 yyyy.dd.mm
477 *
478 * cDateSep is used as a date separator (e.g. '.').
479 * This can be queried using:
480 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
481 *
482 * Alternatively, you can query all the country settings
483 * at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
484 *
485 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
486 */
487
488VOID strhFileDate(PSZ pszBuf, // out: string returned
489 FDATE *pfDate, // in: date information
490 ULONG ulDateFormat, // in: date format (0-3)
491 CHAR cDateSep) // in: date separator (e.g. '.')
492{
493 DATETIME dt;
494 dt.day = pfDate->day;
495 dt.month = pfDate->month;
496 dt.year = pfDate->year + 1980;
497
498 strhDateTime(pszBuf,
499 NULL, // no time
500 &dt,
501 ulDateFormat,
502 cDateSep,
503 0, 0); // no time
504}
505
506/*
507 *@@ strhFileTime:
508 * converts file time data to a string (to pszBuf).
509 * You can pass any FTIME structure to this function,
510 * which are returned in those FILEFINDBUF* or
511 * FILESTATUS* structs by the Dos* functions.
512 *
513 * ulTimeFormat is the PM setting for the time format,
514 * as set in the "Country" object, and can be queried using
515 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
516 * meaning:
517 * -- 0 12-hour clock
518 * -- >0 24-hour clock
519 *
520 * cDateSep is used as a time separator (e.g. ':').
521 * This can be queried using:
522 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
523 *
524 * Alternatively, you can query all the country settings
525 * at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
526 *
527 *@@changed 99-03-15 fixed 12-hour crash
528 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
529 */
530
531VOID strhFileTime(PSZ pszBuf, // out: string returned
532 FTIME *pfTime, // in: time information
533 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
534 CHAR cTimeSep) // in: time separator (e.g. ':')
535{
536 DATETIME dt;
537 dt.hours = pfTime->hours;
538 dt.minutes = pfTime->minutes;
539 dt.seconds = pfTime->twosecs * 2;
540
541 strhDateTime(NULL, // no date
542 pszBuf,
543 &dt,
544 0, 0, // no date
545 ulTimeFormat,
546 cTimeSep);
547}
548
549/*
550 *@@ strhDateTime:
551 * converts Control Programe DATETIME info
552 * to two strings. See strhFileDate and strhFileTime
553 * for more detailed parameter descriptions.
554 *
555 *@@added V0.9.0 (99-11-07) [umoeller]
556 */
557
558VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
559 PSZ pszTime, // out: time string returned (can be NULL)
560 DATETIME *pDateTime, // in: date/time information
561 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
562 CHAR cDateSep, // in: date separator (e.g. '.')
563 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
564 CHAR cTimeSep) // in: time separator (e.g. ':')
565{
566 if (pszDate)
567 {
568 switch (ulDateFormat)
569 {
570 case 0: // mm.dd.yyyy (English)
571 sprintf(pszDate, "%02d%c%02d%c%04d",
572 pDateTime->month,
573 cDateSep,
574 pDateTime->day,
575 cDateSep,
576 pDateTime->year);
577 break;
578
579 case 1: // dd.mm.yyyy (e.g. German)
580 sprintf(pszDate, "%02d%c%02d%c%04d",
581 pDateTime->day,
582 cDateSep,
583 pDateTime->month,
584 cDateSep,
585 pDateTime->year);
586 break;
587
588 case 2: // yyyy.mm.dd (Japanese)
589 sprintf(pszDate, "%04d%c%02d%c%02d",
590 pDateTime->year,
591 cDateSep,
592 pDateTime->month,
593 cDateSep,
594 pDateTime->day);
595 break;
596
597 default: // yyyy.dd.mm
598 sprintf(pszDate, "%04d%c%02d%c%02d",
599 pDateTime->year,
600 cDateSep,
601 pDateTime->day,
602 cDateSep,
603 pDateTime->month);
604 break;
605 }
606 }
607
608 if (pszTime)
609 {
610 if (ulTimeFormat == 0)
611 {
612 // for 12-hour clock, we need additional INI data
613 CHAR szAMPM[10] = "err";
614
615 if (pDateTime->hours > 12)
616 {
617 // > 12h: PM.
618
619 // Note: 12:xx noon is 12 AM, not PM (even though
620 // AM stands for "ante meridiam", but English is just
621 // not logical), so that's handled below.
622
623 PrfQueryProfileString(HINI_USER,
624 "PM_National",
625 "s2359", // key
626 "PM", // default
627 szAMPM, sizeof(szAMPM)-1);
628 sprintf(pszTime, "%02d%c%02d%c%02d %s",
629 // leave 12 == 12 (not 0)
630 pDateTime->hours % 12,
631 cTimeSep,
632 pDateTime->minutes,
633 cTimeSep,
634 pDateTime->seconds,
635 szAMPM);
636 }
637 else
638 {
639 // <= 12h: AM
640 PrfQueryProfileString(HINI_USER,
641 "PM_National",
642 "s1159", // key
643 "AM", // default
644 szAMPM, sizeof(szAMPM)-1);
645 sprintf(pszTime, "%02d%c%02d%c%02d %s",
646 pDateTime->hours,
647 cTimeSep,
648 pDateTime->minutes,
649 cTimeSep,
650 pDateTime->seconds,
651 szAMPM);
652 }
653 }
654 else
655 // 24-hour clock
656 sprintf(pszTime, "%02d%c%02d%c%02d",
657 pDateTime->hours,
658 cTimeSep,
659 pDateTime->minutes,
660 cTimeSep,
661 pDateTime->seconds);
662 }
663}
664
665/*
666 *@@ strhGetWord:
667 * finds word boundaries.
668 *
669 * *ppszStart is used as the beginning of the
670 * search.
671 *
672 * If a word is found, *ppszStart is set to
673 * the first character of the word which was
674 * found and *ppszEnd receives the address
675 * of the first character _after_ the word,
676 * which is probably a space or a \n or \r char.
677 * We then return TRUE.
678 *
679 * The search is stopped if a null character
680 * is found or pLimit is reached. In that case,
681 * FALSE is returned.
682 *
683 *@@added V0.9.1 (2000-02-13) [umoeller]
684 */
685
686BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
687 // out: start of word (if TRUE is returned)
688 const char *pLimit, // in: ptr to last char after *ppszStart to be
689 // searched; if the word does not end before
690 // or with this char, FALSE is returned
691 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
692 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
693 PSZ *ppszEnd) // out: first char _after_ word
694 // (if TRUE is returned)
695{
696 // characters after which a word can be started
697 // const char *pcszBeginChars = "\x0d\x0a ";
698 // const char *pcszEndChars = "\x0d\x0a /-";
699
700 PSZ pStart = *ppszStart;
701
702 // find start of word
703 while ( (pStart < (PSZ)pLimit)
704 && (strchr(pcszBeginChars, *pStart))
705 )
706 // if char is a "before word" char: go for next
707 pStart++;
708
709 if (pStart < (PSZ)pLimit)
710 {
711 // found a valid "word start" character
712 // (which is not in pcszBeginChars):
713
714 // find end of word
715 PSZ pEndOfWord = pStart;
716 while ( (pEndOfWord <= (PSZ)pLimit)
717 && (strchr(pcszEndChars, *pEndOfWord) == 0)
718 )
719 // if char is not an "end word" char: go for next
720 pEndOfWord++;
721
722 if (pEndOfWord <= (PSZ)pLimit)
723 {
724 // whoa, got a word:
725 *ppszStart = pStart;
726 *ppszEnd = pEndOfWord;
727 return (TRUE);
728 }
729 }
730
731 return (FALSE);
732}
733
734/*
735 *@@ strhFindWord:
736 * searches for pszSearch in pszBuf, which is
737 * returned if found (or NULL if not).
738 *
739 * As opposed to strstr, this finds pszSearch
740 * only if it is a "word". A search string is
741 * considered a word if the character _before_
742 * it is in pcszBeginChars and the char _after_
743 * it is in pcszEndChars.
744 *
745 * Example:
746 + strhFindWord("This is an example.", "is");
747 + returns ...........^ this, but not the "is" in "This".
748 *
749 * The algorithm here uses strstr to find pszSearch in pszBuf
750 * and performs additional "is-word" checks for each item found.
751 * With VAC++ 3.0, this is still much faster than searching
752 * words first and then comparing each word with pszSearch.
753 * I've tried it that way too, and that took nearly double as
754 * long. Apparently, the VAC++ runtime library uses some
755 * optimized search algorithm here, so we better use that one.
756 *
757 *@@added V0.9.0 (99-11-08) [umoeller]
758 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
759 */
760
761PSZ strhFindWord(const char *pszBuf,
762 const char *pszSearch,
763 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
764 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
765{
766 PSZ pszReturn = 0;
767 ULONG cbBuf = strlen(pszBuf),
768 cbSearch = strlen(pszSearch);
769
770 if ((cbBuf) && (cbSearch))
771 {
772 const char *p = pszBuf;
773
774 /* // go thru all characters
775 while (*p)
776 {
777 // check if current character is either the
778 // very first or a "begin word" character
779 if ( (p == pszBuf)
780 || (strchr(pcszBeginChars, *p) == 0)
781 )
782 {
783 // yes: go for next
784 if (*(++p))
785 {
786 // compare with search string
787 if (strcmp(p, pszSearch) == 0)
788 {
789 // is the same:
790 // check if still in buffer
791 if (p < pszBuf + cbBuf)
792 {
793 CHAR cAfterEndOfWord = *(p + cbSearch);
794 if (cAfterEndOfWord == 0)
795 {
796 // end of string:
797 // that's ok
798 pszReturn = (PSZ)p;
799 break;
800 }
801 else
802 {
803 // check if in "end of word" list
804 char *pc2 = strchr(pcszEndChars, cAfterEndOfWord);
805 if (pc2)
806 // OK, is end char: avoid doubles of that char,
807 // but allow spaces
808 if ( (cAfterEndOfWord+1 != *pc2)
809 || (cAfterEndOfWord+1 == ' ')
810 || (cAfterEndOfWord+1 == 0)
811 )
812 {
813 // end of string:
814 // that's ok
815 pszReturn = (PSZ)p;
816 break;
817 }
818 }
819 }
820 }
821 }
822 else
823 // end of string:
824 break;
825 }
826
827 ++p;
828 } // end while
829 */
830
831 do // while p
832 {
833 p = strstr(p, pszSearch);
834 if (p)
835 {
836 // string found:
837 // check if that's a word
838
839 // check previous char
840 if ( (p == pszBuf)
841 || (strchr(pcszBeginChars, *(p-1)))
842 )
843 {
844 // OK, valid begin char:
845 BOOL fEndOK = FALSE;
846 // check end char
847 CHAR cNextChar = *(p + cbSearch);
848 if (cNextChar == 0)
849 fEndOK = TRUE;
850 else
851 {
852 char *pc = strchr(pcszEndChars, cNextChar);
853 if (pc)
854 // OK, is end char: avoid doubles of that char,
855 // but allow spaces
856 if ( (cNextChar+1 != *pc)
857 || (cNextChar+1 == ' ')
858 || (cNextChar+1 == 0)
859 )
860 fEndOK = TRUE;
861 }
862
863 if (fEndOK)
864 {
865 // valid end char:
866 pszReturn = (PSZ)p;
867 break;
868 }
869 }
870 p += cbSearch;
871 }
872 } while (p);
873
874 }
875 return (pszReturn);
876}
877
878/*
879 *@@ strhFindEOL:
880 * returns a pointer to the next \r, \n or null character
881 * following pszSearchIn. Stores the offset in *pulOffset.
882 *
883 * This should never return NULL because at some point,
884 * there will be a null byte in your string.
885 *
886 *@@added V0.9.4 (2000-07-01) [umoeller]
887 */
888
889PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
890 PULONG pulOffset) // out: offset (ptr can be NULL)
891{
892 PSZ p = pszSearchIn,
893 prc = NULL;
894 while (TRUE)
895 {
896 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
897 {
898 prc = p;
899 break;
900 }
901 p++;
902 }
903
904 if (pulOffset)
905 *pulOffset = prc - pszSearchIn;
906 return (prc);
907}
908
909/*
910 *@@ strhFindNextLine:
911 * like strhFindEOL, but this returns the character
912 * _after_ \r or \n. Note that this might return
913 * a pointer to terminating NULL character also.
914 */
915
916PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
917{
918 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
919 // pEOL now points to the \r char or the terminating 0 byte;
920 // if not null byte, advance pointer
921 PSZ pNextLine = pEOL;
922 if (*pNextLine == '\r')
923 pNextLine++;
924 if (*pNextLine == '\n')
925 pNextLine++;
926 if (pulOffset)
927 *pulOffset = pNextLine - pszSearchIn;
928 return (pNextLine);
929}
930
931/*
932 *@@ strhFindKey:
933 * finds pszKey in pszSearchIn; similar to strhistr,
934 * but this one makes sure the key is at the beginning
935 * of a line. Spaces before the key are tolerated.
936 * Returns NULL if the key was not found.
937 * Used by strhGetParameter/strhSetParameter; useful
938 * for analyzing CONFIG.SYS settings.
939 *
940 *@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
941 *@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
942 */
943
944PSZ strhFindKey(PSZ pszSearchIn, // in: text buffer to search
945 PSZ pszKey, // in: key to search for
946 PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
947 // can be NULL if not needed
948{
949 PSZ p = NULL,
950 pReturn = NULL;
951 // BOOL fFound = FALSE;
952
953 p = pszSearchIn;
954 do {
955 p = strhistr(p, pszKey);
956
957 if ((p) && (p >= pszSearchIn))
958 {
959 // make sure the key is at the beginning of a line
960 // by going backwards until we find a char != " "
961 PSZ p2 = p;
962 while ( (*p2 == ' ')
963 && (p2 > pszSearchIn)
964 )
965 p2--;
966
967 // if previous char is an EOL sign, go on
968 if ( (p2 == pszSearchIn) // order fixed V0.9.0, Rdiger Ihle
969 || (*(p2-1) == '\r')
970 || (*(p2-1) == '\n')
971 )
972 {
973 // now check whether the char after the search
974 // is a "=" char
975 // ULONG cbKey = strlen(pszKey);
976
977 // tolerate spaces before "="
978 /* PSZ p3 = p;
979 while (*(p3+cbKey) == ' ')
980 p3++;
981
982 if (*(p3+cbKey) == '=') */
983 {
984 // found:
985 pReturn = p; // go on, p contains found key
986
987 // test for all upper case?
988 if (pfIsAllUpperCase)
989 {
990 ULONG cbKey2 = strlen(pszKey),
991 ul = 0;
992 *pfIsAllUpperCase = TRUE;
993 for (ul = 0; ul < cbKey2; ul++)
994 if (islower(*(p+ul)))
995 {
996 *pfIsAllUpperCase = FALSE;
997 break; // for
998 }
999 }
1000
1001 break; // do
1002 }
1003 } // else search next key
1004
1005 p++; // search on after this key
1006 }
1007 } while ((!pReturn) && (p != NULL) && (p != pszSearchIn));
1008
1009 return (pReturn);
1010}
1011
1012/*
1013 *@@ strhGetParameter:
1014 * searches pszSearchIn for the key pszKey; if found, it
1015 * returns a pointer to the following characters in pszSearchIn
1016 * and, if pszCopyTo != NULL, copies the rest of the line to
1017 * that buffer, of which cbCopyTo specified the size.
1018 * If the key is not found, NULL is returned.
1019 * String search is done by calling strhFindKey.
1020 * This is useful for querying CONFIG.SYS settings.
1021 *
1022 * <B>Example:</B> this would return "YES" if you searched
1023 * for "PAUSEONERROR=", and "PAUSEONERROR=YES" existed in pszSearchIn.
1024 */
1025
1026PSZ strhGetParameter(PSZ pszSearchIn, // in: text buffer to search
1027 PSZ pszKey, // in: key to search for
1028 PSZ pszCopyTo, // out: key value
1029 ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1030{
1031 PSZ p = strhFindKey(pszSearchIn, pszKey, NULL),
1032 prc = NULL;
1033 if (p)
1034 {
1035 prc = p + strlen(pszKey);
1036 if (pszCopyTo)
1037 // copy to pszCopyTo
1038 {
1039 ULONG cb;
1040 PSZ pEOL = strhFindEOL(prc, &cb);
1041 if (pEOL)
1042 {
1043 if (cb > cbCopyTo)
1044 cb = cbCopyTo-1;
1045 strhncpy0(pszCopyTo, prc, cb);
1046 }
1047 }
1048 }
1049
1050 return (prc);
1051}
1052
1053/*
1054 *@@ strhSetParameter:
1055 * searches *ppszSearchIn for the key pszKey; if found, it
1056 * replaces the characters following this key up to the
1057 * end of the line with pszParam. If pszKey is not found in
1058 * pszSearchIn, it is appended to the file in a new line.
1059 *
1060 * If any changes are made, *ppszSearchIn is re-allocated.
1061 *
1062 * This function searches w/out case sensitivity.
1063 *
1064 * Returns a pointer to the new parameter inside the buffer.
1065 *
1066 *@@changed V0.9.0 [umoeller]: changed function prototype to PSZ* ppszSearchIn
1067 */
1068
1069PSZ strhSetParameter(PSZ* ppszSearchIn, // in: text buffer to search
1070 PSZ pszKey, // in: key to search for
1071 PSZ pszNewParam, // in: new parameter to set for key
1072 BOOL fRespectCase) // in: if TRUE, pszNewParam will
1073 // be converted to upper case if the found key is
1074 // in upper case also. pszNewParam should be in
1075 // lower case if you use this.
1076{
1077 BOOL fIsAllUpperCase = FALSE;
1078 PSZ pKey = strhFindKey(*ppszSearchIn, pszKey, &fIsAllUpperCase),
1079 prc = NULL;
1080
1081 if (pKey)
1082 {
1083 // key found in file:
1084 // replace existing parameter
1085 PSZ pOldParam = pKey + strlen(pszKey);
1086 prc = pOldParam;
1087 // pOldParam now has the old parameter, which we
1088 // will overwrite now
1089
1090 if (pOldParam)
1091 {
1092 ULONG cbOldParam;
1093 PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1094 // pEOL now has first end-of-line after the parameter
1095
1096 if (pEOL)
1097 {
1098 PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1099 strncpy(pszOldCopy, pOldParam, cbOldParam);
1100 pszOldCopy[cbOldParam] = '\0';
1101
1102 // check for upper case desired?
1103 if (fRespectCase)
1104 if (fIsAllUpperCase)
1105 strupr(pszNewParam);
1106
1107 xstrrpl(ppszSearchIn, 0, pszOldCopy, pszNewParam, NULL);
1108
1109 free(pszOldCopy);
1110 }
1111 }
1112 }
1113 else
1114 {
1115 PSZ pszNew = (PSZ)malloc(strlen(*ppszSearchIn)
1116 + strlen(pszKey)
1117 + strlen(pszNewParam)
1118 + 5); // 2 * \r\n + null byte
1119 // key not found: append to end of file
1120 sprintf(pszNew, "%s\r\n%s%s\r\n",
1121 *ppszSearchIn, pszKey, pszNewParam);
1122 free(*ppszSearchIn);
1123 *ppszSearchIn = pszNew;
1124 }
1125
1126 return (prc);
1127}
1128
1129/*
1130 *@@ strhDeleteLine:
1131 * this deletes the line in pszSearchIn which starts with
1132 * the key pszKey. Returns TRUE if the line was found and
1133 * deleted.
1134 *
1135 * This copies within pszSearchIn.
1136 */
1137
1138BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1139 PSZ pszKey) // in: key to find
1140{
1141 BOOL fIsAllUpperCase = FALSE;
1142 PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1143 BOOL brc = FALSE;
1144
1145 if (pKey) {
1146 PSZ pEOL = strhFindEOL(pKey, NULL);
1147 // pEOL now has first end-of-line after the key
1148 if (pEOL)
1149 {
1150 // delete line by overwriting it with
1151 // the next line
1152 strcpy(pKey, pEOL+2);
1153 }
1154 else
1155 {
1156 // EOL not found: we must be at the end of the file
1157 *pKey = '\0';
1158 }
1159 brc = TRUE;
1160 }
1161
1162 return (brc);
1163}
1164
1165/*
1166 *@@ strhBeautifyTitle:
1167 * replaces all line breaks (0xd, 0xa) with spaces.
1168 */
1169
1170BOOL strhBeautifyTitle(PSZ psz)
1171{
1172 BOOL rc = FALSE;
1173 CHAR *p;
1174 while ((p = strchr(psz, 0xa)))
1175 {
1176 *p = ' ';
1177 rc = TRUE;
1178 }
1179 while ((p = strchr(psz, 0xd)))
1180 {
1181 *p = ' ';
1182 rc = TRUE;
1183 }
1184 return (rc);
1185}
1186
1187/*
1188 * strhFindAttribValue:
1189 * searches for pszAttrib in pszSearchIn; if found,
1190 * returns the first character after the "=" char.
1191 * If "=" is not found, a space, \r, and \n are
1192 * also accepted. This function searches without
1193 * respecting case.
1194 *
1195 * <B>Example:</B>
1196 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1197 +
1198 + returns ....................... ^ this address.
1199 *
1200 *@@added V0.9.0 [umoeller]
1201 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1202 */
1203
1204PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1205{
1206 PSZ prc = 0;
1207 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1208 p,
1209 p2;
1210 ULONG cbAttrib = strlen(pszAttrib);
1211
1212 // 1) find space char
1213 while ((p = strchr(pszSearchIn2, ' ')))
1214 {
1215 CHAR c;
1216 p++;
1217 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1218 // now check whether the p+strlen(pszAttrib)
1219 // is a valid end-of-tag character
1220 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1221 && ( (c == ' ')
1222 || (c == '>')
1223 || (c == '=')
1224 || (c == '\r')
1225 || (c == '\n')
1226 || (c == 0)
1227 )
1228 )
1229 {
1230 // yes:
1231 CHAR c2;
1232 p2 = p + cbAttrib;
1233 c2 = *p2;
1234 while ( ( (c2 == ' ')
1235 || (c2 == '=')
1236 || (c2 == '\n')
1237 || (c2 == '\r')
1238 )
1239 && (c2 != 0)
1240 )
1241 c2 = *++p2;
1242 prc = p2;
1243 break; // first while
1244 }
1245 pszSearchIn2++;
1246 }
1247 return (prc);
1248}
1249
1250/*
1251 * strhGetNumAttribValue:
1252 * stores the numerical parameter value of an HTML-style
1253 * tag in *pl.
1254 *
1255 * Returns the address of the tag parameter in the
1256 * search buffer, if found, or NULL.
1257 *
1258 * <B>Example:</B>
1259 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1260 *
1261 * stores 123 in the "l" variable.
1262 *
1263 *@@added V0.9.0 [umoeller]
1264 */
1265
1266PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1267 const char *pszTag, // e.g. "INDEX"
1268 PLONG pl) // out: numerical value
1269{
1270 PSZ pParam;
1271 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1272 sscanf(pParam, "%d", pl);
1273
1274 return (pParam);
1275}
1276
1277/*
1278 * strhGetTextAttr:
1279 * retrieves the attribute value of a textual HTML-style tag
1280 * in a newly allocated buffer, which is returned,
1281 * or NULL if attribute not found.
1282 * If an attribute value is to contain spaces, it
1283 * must be enclosed in quotes.
1284 *
1285 * The offset of the attribute data in pszSearchIn is
1286 * returned in *pulOffset so that you can do multiple
1287 * searches.
1288 *
1289 * This returns a new buffer, which should be free()'d after use.
1290 *
1291 * <B>Example:</B>
1292 + ULONG ulOfs = 0;
1293 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1294 + ............^ ulOfs
1295 *
1296 * returns a new string with the value "blublub" (without
1297 * quotes) and sets ulOfs to 12.
1298 *
1299 *@@added V0.9.0 [umoeller]
1300 */
1301
1302PSZ strhGetTextAttr(const char *pszSearchIn,
1303 const char *pszTag,
1304 PULONG pulOffset) // out: offset where found
1305{
1306 PSZ pParam,
1307 pParam2,
1308 prc = NULL;
1309 ULONG ulCount = 0;
1310 LONG lNestingLevel = 0;
1311
1312 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1313 {
1314 // determine end character to search for: a space
1315 CHAR cEnd = ' ';
1316 if (*pParam == '\"')
1317 {
1318 // or, if the data is enclosed in quotes, a quote
1319 cEnd = '\"';
1320 pParam++;
1321 }
1322
1323 if (pulOffset)
1324 // store the offset
1325 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1326
1327 // now find end of attribute
1328 pParam2 = pParam;
1329 while (*pParam)
1330 {
1331 if (*pParam == cEnd)
1332 // end character found
1333 break;
1334 else if (*pParam == '<')
1335 // yet another opening tag found:
1336 // this is probably some "<" in the attributes
1337 lNestingLevel++;
1338 else if (*pParam == '>')
1339 {
1340 lNestingLevel--;
1341 if (lNestingLevel < 0)
1342 // end of tag found:
1343 break;
1344 }
1345 ulCount++;
1346 pParam++;
1347 }
1348
1349 // copy attribute to new buffer
1350 if (ulCount)
1351 {
1352 prc = (PSZ)malloc(ulCount+1);
1353 memcpy(prc, pParam2, ulCount);
1354 *(prc+ulCount) = 0;
1355 }
1356 }
1357 return (prc);
1358}
1359
1360/*
1361 * strhFindEndOfTag:
1362 * returns a pointer to the ">" char
1363 * which seems to terminate the tag beginning
1364 * after pszBeginOfTag.
1365 *
1366 * If additional "<" chars are found, we look
1367 * for additional ">" characters too.
1368 *
1369 * Note: You must pass the address of the opening
1370 * '<' character to this function.
1371 *
1372 * Example:
1373 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1374 + strhFindEndOfTag(pszTest)
1375 + returns.................................^ this.
1376 *
1377 *@@added V0.9.0 [umoeller]
1378 */
1379
1380PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1381{
1382 PSZ p = (PSZ)pszBeginOfTag,
1383 prc = NULL;
1384 LONG lNestingLevel = 0;
1385
1386 while (*p)
1387 {
1388 if (*p == '<')
1389 // another opening tag found:
1390 lNestingLevel++;
1391 else if (*p == '>')
1392 {
1393 // closing tag found:
1394 lNestingLevel--;
1395 if (lNestingLevel < 1)
1396 {
1397 // corresponding: return this
1398 prc = p;
1399 break;
1400 }
1401 }
1402 p++;
1403 }
1404
1405 return (prc);
1406}
1407
1408/*
1409 * strhGetBlock:
1410 * this complex function searches the given string
1411 * for a pair of opening/closing HTML-style tags.
1412 *
1413 * If found, this routine returns TRUE and does
1414 * the following:
1415 *
1416 * 1) allocate a new buffer, copy the text
1417 * enclosed by the opening/closing tags
1418 * into it and set *ppszBlock to that
1419 * buffer;
1420 *
1421 * 2) if the opening tag has any attributes,
1422 * allocate another buffer, copy the
1423 * attributes into it and set *ppszAttrs
1424 * to that buffer; if no attributes are
1425 * found, *ppszAttrs will be NULL;
1426 *
1427 * 3) set *pulOffset to the offset from the
1428 * beginning of *ppszSearchIn where the
1429 * opening tag was found;
1430 *
1431 * 4) advance *ppszSearchIn to after the
1432 * closing tag, so that you can do
1433 * multiple searches without finding the
1434 * same tags twice.
1435 *
1436 * All buffers should be freed using free().
1437 *
1438 * This returns the following:
1439 * -- 0: no error
1440 * -- 1: tag not found at all (doesn't have to be an error)
1441 * -- 2: begin tag found, but no corresponding end tag found. This
1442 * is a real error.
1443 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1444 *
1445 * <B>Example:</B>
1446 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1447 + PSZ pszBlock, pszAttrs;
1448 + ULONG ulOfs;
1449 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1450 *
1451 * would do the following:
1452 *
1453 * 1) set pszBlock to a new string containing "This is page 1."
1454 * without quotes;
1455 *
1456 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1457 *
1458 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1459 *
1460 * 4) pSearch would be advanced to point to the "More text"
1461 * string in the original buffer.
1462 *
1463 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1464 * for HTML parsing.
1465 *
1466 *@@added V0.9.0 [umoeller]
1467 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1468 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1469 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1470 */
1471
1472ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1473 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1474 PSZ pszTag,
1475 PSZ *ppszBlock, // out: block enclosed by the tags
1476 PSZ *ppszAttribs, // out: attributes of the opening tag
1477 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1478 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1479{
1480 ULONG ulrc = 1;
1481 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1482 pszSearch2 = pszBeginTag,
1483 pszClosingTag;
1484 ULONG cbTag = strlen(pszTag);
1485
1486 // go thru the block and check all tags if it's the
1487 // begin tag we're looking for
1488 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1489 {
1490 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1491 // yes: stop
1492 break;
1493 else
1494 pszBeginTag++;
1495 }
1496
1497 if (pszBeginTag)
1498 {
1499 // we found <TAG>:
1500 ULONG ulNestingLevel = 0;
1501
1502 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1503 // strchr(pszBeginTag, '>');
1504 if (pszEndOfBeginTag)
1505 {
1506 // does the caller want the attributes?
1507 if (ppszAttribs)
1508 {
1509 // yes: then copy them
1510 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1511 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1512 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1513 // add terminating 0
1514 *(pszAttrs + ulAttrLen) = 0;
1515
1516 *ppszAttribs = pszAttrs;
1517 }
1518
1519 // output offset of where we found the begin tag
1520 if (pulOfsBeginTag)
1521 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1522
1523 // now find corresponding closing tag (e.g. "</BODY>"
1524 pszBeginTag = pszEndOfBeginTag+1;
1525 // now we're behind the '>' char of the opening tag
1526 // increase offset of that too
1527 if (pulOfsBeginBlock)
1528 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1529
1530 // find next closing tag;
1531 // for the first run, pszSearch2 points to right
1532 // after the '>' char of the opening tag
1533 pszSearch2 = pszBeginTag;
1534 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1535 && (pszClosingTag = strstr(pszSearch2, "<"))
1536 )
1537 {
1538 // if we have another opening tag before our closing
1539 // tag, we need to have several closing tags before
1540 // we're done
1541 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1542 ulNestingLevel++;
1543 else
1544 {
1545 // is this ours?
1546 if ( (*(pszClosingTag+1) == '/')
1547 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1548 )
1549 {
1550 // we've found a matching closing tag; is
1551 // it ours?
1552 if (ulNestingLevel == 0)
1553 {
1554 // our closing tag found:
1555 // allocate mem for a new buffer
1556 // and extract all the text between
1557 // open and closing tags to it
1558 ULONG ulLen = pszClosingTag - pszBeginTag;
1559 if (ppszBlock)
1560 {
1561 PSZ pNew = (PSZ)malloc(ulLen + 1);
1562 strhncpy0(pNew, pszBeginTag, ulLen);
1563 *ppszBlock = pNew;
1564 }
1565
1566 // raise search offset to after the closing tag
1567 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1568
1569 ulrc = 0;
1570
1571 break;
1572 } else
1573 // not our closing tag:
1574 ulNestingLevel--;
1575 }
1576 }
1577 // no matching closing tag: search on after that
1578 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1579 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1580
1581 if (!pszClosingTag)
1582 // no matching closing tag found:
1583 // return 2 (closing tag not found)
1584 ulrc = 2;
1585 } // end if (pszBeginTag)
1586 else
1587 // no matching ">" for opening tag found:
1588 ulrc = 3;
1589 }
1590
1591 return (ulrc);
1592}
1593
1594/* ******************************************************************
1595 * *
1596 * Miscellaneous *
1597 * *
1598 ********************************************************************/
1599
1600/*
1601 *@@ strhArrayAppend:
1602 * this appends a string to a "string array".
1603 *
1604 * A string array is considered a sequence of
1605 * zero-terminated strings in memory. That is,
1606 * after each string's null-byte, the next
1607 * string comes up.
1608 *
1609 * This is useful for composing a single block
1610 * of memory from, say, list box entries, which
1611 * can then be written to OS2.INI in one flush.
1612 *
1613 * To append strings to such an array, call this
1614 * function for each string you wish to append.
1615 * This will re-allocate *ppszRoot with each call,
1616 * and update *pcbRoot, which then contains the
1617 * total size of all strings (including all null
1618 * terminators).
1619 *
1620 * Pass *pcbRoot to PrfSaveProfileData to have the
1621 * block saved.
1622 *
1623 * Note: On the first call, *ppszRoot and *pcbRoot
1624 * _must_ be both NULL, or this crashes.
1625 */
1626
1627VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1628 PSZ pszNew, // in: string to append
1629 PULONG pcbRoot) // in/out: size of array
1630{
1631 ULONG cbNew = strlen(pszNew);
1632 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1633 + cbNew
1634 + 1); // two null bytes
1635 if (*ppszRoot)
1636 {
1637 // not first loop: copy old stuff
1638 memcpy(pszTemp,
1639 *ppszRoot,
1640 *pcbRoot);
1641 free(*ppszRoot);
1642 }
1643 // append new string
1644 strcpy(pszTemp + *pcbRoot,
1645 pszNew);
1646 // update root
1647 *ppszRoot = pszTemp;
1648 // update length
1649 *pcbRoot += cbNew + 1;
1650}
1651
1652/*
1653 *@@ strhCreateDump:
1654 * this dumps a memory block into a string
1655 * and returns that string in a new buffer.
1656 *
1657 * You must free() the returned PSZ after use.
1658 *
1659 * The output looks like the following:
1660 *
1661 + 0000: FE FF 0E 02 90 00 00 00 ........
1662 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1663 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1664 *
1665 * Each line is terminated with a newline (\n)
1666 * character only.
1667 *
1668 *@@added V0.9.1 (2000-01-22) [umoeller]
1669 */
1670
1671PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1672 ULONG ulSize, // in: size of buffer
1673 ULONG ulIndent) // in: indentation of every line
1674{
1675 PSZ pszReturn = NULL;
1676 CHAR szTemp[1000];
1677
1678 PBYTE pbCurrent = pb; // current byte
1679 ULONG ulCount = 0,
1680 ulCharsInLine = 0; // if this grows > 7, a new line is started
1681 CHAR szLine[400] = "",
1682 szAscii[30] = " "; // ASCII representation; filled for every line
1683 PSZ pszLine = szLine,
1684 pszAscii = szAscii;
1685
1686 for (pbCurrent = pb;
1687 ulCount < ulSize;
1688 pbCurrent++, ulCount++)
1689 {
1690 if (ulCharsInLine == 0)
1691 {
1692 memset(szLine, ' ', ulIndent);
1693 pszLine += ulIndent;
1694 }
1695 pszLine += sprintf(pszLine, "%02lX ", *pbCurrent);
1696
1697 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1698 // printable character:
1699 *pszAscii = *pbCurrent;
1700 else
1701 *pszAscii = '.';
1702 pszAscii++;
1703
1704 ulCharsInLine++;
1705 if ( (ulCharsInLine > 7) // 8 bytes added?
1706 || (ulCount == ulSize-1) // end of buffer reached?
1707 )
1708 {
1709 // if we haven't had eight bytes yet,
1710 // fill buffer up to eight bytes with spaces
1711 ULONG ul2;
1712 for (ul2 = ulCharsInLine;
1713 ul2 < 8;
1714 ul2++)
1715 pszLine += sprintf(pszLine, " ");
1716
1717 sprintf(szTemp, "%04lX: %s %s\n",
1718 (ulCount & 0xFFFFFFF8), // offset in hex
1719 szLine, // bytes string
1720 szAscii); // ASCII string
1721 xstrcat(&pszReturn, szTemp);
1722
1723 // restart line buffer
1724 pszLine = szLine;
1725
1726 // clear ASCII buffer
1727 strcpy(szAscii, " ");
1728 pszAscii = szAscii;
1729
1730 // reset line counter
1731 ulCharsInLine = 0;
1732 }
1733 }
1734
1735 return (pszReturn);
1736}
1737
1738/* ******************************************************************
1739 * *
1740 * Wildcard matching *
1741 * *
1742 ********************************************************************/
1743
1744/*
1745 * The following code has been taken from "fnmatch.zip".
1746 *
1747 * (c) 1994-1996 by Eberhard Mattes.
1748 */
1749
1750/* In OS/2 and DOS styles, both / and \ separate components of a path.
1751 * This macro returns true iff C is a separator. */
1752
1753#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1754
1755
1756/* This macro returns true if C is at the end of a component of a
1757 * path. */
1758
1759#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1760
1761/*
1762 *@@ skip_comp_os2:
1763 * Return a pointer to the next component of the path SRC, for OS/2
1764 * and DOS styles. When the end of the string is reached, a pointer
1765 * to the terminating null character is returned.
1766 *
1767 * (c) 1994-1996 by Eberhard Mattes.
1768 */
1769
1770static const unsigned char* skip_comp_os2(const unsigned char *src)
1771{
1772 /* Skip characters until hitting a separator or the end of the
1773 * string. */
1774
1775 while (!IS_OS2_COMP_END(*src))
1776 ++src;
1777
1778 /* Skip the separator if we hit a separator. */
1779
1780 if (*src != 0)
1781 ++src;
1782 return src;
1783}
1784
1785/*
1786 * has_colon:
1787 * returns true iff the path P contains a colon.
1788 *
1789 * (c) 1994-1996 by Eberhard Mattes.
1790 */
1791
1792static int has_colon(const unsigned char *p)
1793{
1794 while (*p != 0)
1795 if (*p == ':')
1796 return 1;
1797 else
1798 ++p;
1799 return 0;
1800}
1801
1802/*
1803 * match_comp_os2:
1804 * Compare a single component (directory name or file name) of the
1805 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1806 * component of the wildcard and the name to be checked, respectively.
1807 * Comparing stops at the next separator. The FLAGS argument is the
1808 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1809 * current component of NAME. The number of dots is not restricted,
1810 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1811 * Note that this function is recursive.
1812 *
1813 * (c) 1994-1996 by Eberhard Mattes.
1814 */
1815
1816static int match_comp_os2(const unsigned char *mask,
1817 const unsigned char *name,
1818 unsigned flags,
1819 int has_dot)
1820{
1821 int rc;
1822
1823 for (;;)
1824 switch (*mask)
1825 {
1826 case 0:
1827
1828 /* There must be no extra characters at the end of NAME when
1829 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1830 * in that case, NAME may point to a separator. */
1831
1832 if (*name == 0)
1833 return FNM_MATCH;
1834 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1835 return FNM_MATCH;
1836 return FNM_NOMATCH;
1837
1838 case '/':
1839 case '\\':
1840
1841 /* Separators match separators. */
1842
1843 if (IS_OS2_COMP_SEP(*name))
1844 return FNM_MATCH;
1845
1846 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1847 * is ignored at the end of NAME. */
1848
1849 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1850 return FNM_MATCH;
1851
1852 /* Stop comparing at the separator. */
1853
1854 return FNM_NOMATCH;
1855
1856 case '?':
1857
1858 /* A question mark matches one character. It does not match
1859 * a dot. At the end of the component (and before a dot),
1860 * it also matches zero characters. */
1861
1862 if (*name != '.' && !IS_OS2_COMP_END(*name))
1863 ++name;
1864 ++mask;
1865 break;
1866
1867 case '*':
1868
1869 /* An asterisk matches zero or more characters. In DOS
1870 * mode, dots are not matched. */
1871
1872 do
1873 {
1874 ++mask;
1875 }
1876 while (*mask == '*');
1877 for (;;)
1878 {
1879 rc = match_comp_os2(mask, name, flags, has_dot);
1880 if (rc != FNM_NOMATCH)
1881 return rc;
1882 if (IS_OS2_COMP_END(*name))
1883 return FNM_NOMATCH;
1884 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1885 return FNM_NOMATCH;
1886 ++name;
1887 }
1888
1889 case '.':
1890
1891 /* A dot matches a dot. It also matches the implicit dot at
1892 * the end of a dot-less NAME. */
1893
1894 ++mask;
1895 if (*name == '.')
1896 ++name;
1897 else if (has_dot || !IS_OS2_COMP_END(*name))
1898 return FNM_NOMATCH;
1899 break;
1900
1901 default:
1902
1903 /* All other characters match themselves. */
1904
1905 if (flags & _FNM_IGNORECASE)
1906 {
1907 if (tolower(*mask) != tolower(*name))
1908 return FNM_NOMATCH;
1909 }
1910 else
1911 {
1912 if (*mask != *name)
1913 return FNM_NOMATCH;
1914 }
1915 ++mask;
1916 ++name;
1917 break;
1918 }
1919}
1920
1921/*
1922 * match_comp:
1923 * compare a single component (directory name or file name) of the
1924 * paths, for all styles which need component-by-component matching.
1925 * MASK and NAME point to the start of a component of the wildcard and
1926 * the name to be checked, respectively. Comparing stops at the next
1927 * separator. The FLAGS argument is the same as that of fnmatch().
1928 * Return FNM_MATCH iff MASK and NAME match.
1929 *
1930 * (c) 1994-1996 by Eberhard Mattes.
1931 */
1932
1933static int match_comp(const unsigned char *mask,
1934 const unsigned char *name,
1935 unsigned flags)
1936{
1937 const unsigned char *s;
1938
1939 switch (flags & _FNM_STYLE_MASK)
1940 {
1941 case _FNM_OS2:
1942 case _FNM_DOS:
1943
1944 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1945 * the component if the component doesn't include a dot. */
1946
1947 s = name;
1948 while (!IS_OS2_COMP_END(*s) && *s != '.')
1949 ++s;
1950 return match_comp_os2(mask, name, flags, *s == '.');
1951
1952 default:
1953 return FNM_ERR;
1954 }
1955}
1956
1957/* In Unix styles, / separates components of a path. This macro
1958 * returns true iff C is a separator. */
1959
1960#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1961
1962
1963/* This macro returns true if C is at the end of a component of a
1964 * path. */
1965
1966#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1967
1968/*
1969 * match_unix:
1970 * match complete paths for Unix styles. The FLAGS argument is the
1971 * same as that of fnmatch(). COMP points to the start of the current
1972 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
1973 * backslash character is used for escaping ? and * unless
1974 * FNM_NOESCAPE is set.
1975 *
1976 * (c) 1994-1996 by Eberhard Mattes.
1977 */
1978
1979static int match_unix(const unsigned char *mask,
1980 const unsigned char *name,
1981 unsigned flags,
1982 const unsigned char *comp)
1983{
1984 unsigned char c1, c2;
1985 char invert, matched;
1986 const unsigned char *start;
1987 int rc;
1988
1989 for (;;)
1990 switch (*mask)
1991 {
1992 case 0:
1993
1994 /* There must be no extra characters at the end of NAME when
1995 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1996 * in that case, NAME may point to a separator. */
1997
1998 if (*name == 0)
1999 return FNM_MATCH;
2000 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2001 return FNM_MATCH;
2002 return FNM_NOMATCH;
2003
2004 case '?':
2005
2006 /* A question mark matches one character. It does not match
2007 * the component separator if FNM_PATHNAME is set. It does
2008 * not match a dot at the start of a component if FNM_PERIOD
2009 * is set. */
2010
2011 if (*name == 0)
2012 return FNM_NOMATCH;
2013 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2014 return FNM_NOMATCH;
2015 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2016 return FNM_NOMATCH;
2017 ++mask;
2018 ++name;
2019 break;
2020
2021 case '*':
2022
2023 /* An asterisk matches zero or more characters. It does not
2024 * match the component separator if FNM_PATHNAME is set. It
2025 * does not match a dot at the start of a component if
2026 * FNM_PERIOD is set. */
2027
2028 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2029 return FNM_NOMATCH;
2030 do
2031 {
2032 ++mask;
2033 }
2034 while (*mask == '*');
2035 for (;;)
2036 {
2037 rc = match_unix(mask, name, flags, comp);
2038 if (rc != FNM_NOMATCH)
2039 return rc;
2040 if (*name == 0)
2041 return FNM_NOMATCH;
2042 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2043 return FNM_NOMATCH;
2044 ++name;
2045 }
2046
2047 case '/':
2048
2049 /* Separators match only separators. If _FNM_PATHPREFIX is
2050 * set, a trailing separator in MASK is ignored at the end
2051 * of NAME. */
2052
2053 if (!(IS_UNIX_COMP_SEP(*name)
2054 || ((flags & _FNM_PATHPREFIX) && *name == 0
2055 && (mask[1] == 0
2056 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2057 && mask[2] == 0)))))
2058 return FNM_NOMATCH;
2059
2060 ++mask;
2061 if (*name != 0)
2062 ++name;
2063
2064 /* This is the beginning of a new component if FNM_PATHNAME
2065 * is set. */
2066
2067 if (flags & FNM_PATHNAME)
2068 comp = name;
2069 break;
2070
2071 case '[':
2072
2073 /* A set of characters. Always case-sensitive. */
2074
2075 if (*name == 0)
2076 return FNM_NOMATCH;
2077 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2078 return FNM_NOMATCH;
2079 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2080 return FNM_NOMATCH;
2081
2082 invert = 0;
2083 matched = 0;
2084 ++mask;
2085
2086 /* If the first character is a ! or ^, the set matches all
2087 * characters not listed in the set. */
2088
2089 if (*mask == '!' || *mask == '^')
2090 {
2091 ++mask;
2092 invert = 1;
2093 }
2094
2095 /* Loop over all the characters of the set. The loop ends
2096 * if the end of the string is reached or if a ] is
2097 * encountered unless it directly follows the initial [ or
2098 * [-. */
2099
2100 start = mask;
2101 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2102 {
2103 /* Get the next character which is optionally preceded
2104 * by a backslash. */
2105
2106 c1 = *mask++;
2107 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2108 {
2109 if (*mask == 0)
2110 break;
2111 c1 = *mask++;
2112 }
2113
2114 /* Ranges of characters are written as a-z. Don't
2115 * forget to check for the end of the string and to
2116 * handle the backslash. If the character after - is a
2117 * ], it isn't a range. */
2118
2119 if (*mask == '-' && mask[1] != ']')
2120 {
2121 ++mask; /* Skip the - character */
2122 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2123 ++mask;
2124 if (*mask == 0)
2125 break;
2126 c2 = *mask++;
2127 }
2128 else
2129 c2 = c1;
2130
2131 /* Now check whether this character or range matches NAME. */
2132
2133 if (c1 <= *name && *name <= c2)
2134 matched = 1;
2135 }
2136
2137 /* If the end of the string is reached before a ] is found,
2138 * back up to the [ and compare it to NAME. */
2139
2140 if (*mask == 0)
2141 {
2142 if (*name != '[')
2143 return FNM_NOMATCH;
2144 ++name;
2145 mask = start;
2146 if (invert)
2147 --mask;
2148 }
2149 else
2150 {
2151 if (invert)
2152 matched = !matched;
2153 if (!matched)
2154 return FNM_NOMATCH;
2155 ++mask; /* Skip the ] character */
2156 if (*name != 0)
2157 ++name;
2158 }
2159 break;
2160
2161 case '\\':
2162 ++mask;
2163 if (flags & FNM_NOESCAPE)
2164 {
2165 if (*name != '\\')
2166 return FNM_NOMATCH;
2167 ++name;
2168 }
2169 else if (*mask == '*' || *mask == '?')
2170 {
2171 if (*mask != *name)
2172 return FNM_NOMATCH;
2173 ++mask;
2174 ++name;
2175 }
2176 break;
2177
2178 default:
2179
2180 /* All other characters match themselves. */
2181
2182 if (flags & _FNM_IGNORECASE)
2183 {
2184 if (tolower(*mask) != tolower(*name))
2185 return FNM_NOMATCH;
2186 }
2187 else
2188 {
2189 if (*mask != *name)
2190 return FNM_NOMATCH;
2191 }
2192 ++mask;
2193 ++name;
2194 break;
2195 }
2196}
2197
2198/*
2199 * _fnmatch_unsigned:
2200 * Check whether the path name NAME matches the wildcard MASK.
2201 *
2202 * Return:
2203 * -- 0 (FNM_MATCH) if it matches,
2204 * -- _FNM_NOMATCH if it doesn't,
2205 * -- FNM_ERR on error.
2206 *
2207 * The operation of this function is controlled by FLAGS.
2208 * This is an internal function, with unsigned arguments.
2209 *
2210 * (c) 1994-1996 by Eberhard Mattes.
2211 */
2212
2213static int _fnmatch_unsigned(const unsigned char *mask,
2214 const unsigned char *name,
2215 unsigned flags)
2216{
2217 int m_drive, n_drive,
2218 rc;
2219
2220 /* Match and skip the drive name if present. */
2221
2222 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2223 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2224
2225 if (m_drive != n_drive)
2226 {
2227 if (m_drive == -1 || n_drive == -1)
2228 return FNM_NOMATCH;
2229 if (!(flags & _FNM_IGNORECASE))
2230 return FNM_NOMATCH;
2231 if (tolower(m_drive) != tolower(n_drive))
2232 return FNM_NOMATCH;
2233 }
2234
2235 if (m_drive != -1)
2236 mask += 2;
2237 if (n_drive != -1)
2238 name += 2;
2239
2240 /* Colons are not allowed in path names, except for the drive name,
2241 * which was skipped above. */
2242
2243 if (has_colon(mask) || has_colon(name))
2244 return FNM_ERR;
2245
2246 /* The name "\\server\path" should not be matched by mask
2247 * "\*\server\path". Ditto for /. */
2248
2249 switch (flags & _FNM_STYLE_MASK)
2250 {
2251 case _FNM_OS2:
2252 case _FNM_DOS:
2253
2254 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2255 {
2256 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2257 return FNM_NOMATCH;
2258 name += 2;
2259 mask += 2;
2260 }
2261 break;
2262
2263 case _FNM_POSIX:
2264
2265 if (name[0] == '/' && name[1] == '/')
2266 {
2267 int i;
2268
2269 name += 2;
2270 for (i = 0; i < 2; ++i)
2271 if (mask[0] == '/')
2272 ++mask;
2273 else if (mask[0] == '\\' && mask[1] == '/')
2274 mask += 2;
2275 else
2276 return FNM_NOMATCH;
2277 }
2278
2279 /* In Unix styles, treating ? and * w.r.t. components is simple.
2280 * No need to do matching component by component. */
2281
2282 return match_unix(mask, name, flags, name);
2283 }
2284
2285 /* Now compare all the components of the path name, one by one.
2286 * Note that the path separator must not be enclosed in brackets. */
2287
2288 while (*mask != 0 || *name != 0)
2289 {
2290
2291 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2292 * is reached even if there are components left in NAME. */
2293
2294 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2295 return FNM_MATCH;
2296
2297 /* Compare a single component of the path name. */
2298
2299 rc = match_comp(mask, name, flags);
2300 if (rc != FNM_MATCH)
2301 return rc;
2302
2303 /* Skip to the next component or to the end of the path name. */
2304
2305 mask = skip_comp_os2(mask);
2306 name = skip_comp_os2(name);
2307 }
2308
2309 /* If we reached the ends of both strings, the names match. */
2310
2311 if (*mask == 0 && *name == 0)
2312 return FNM_MATCH;
2313
2314 /* The names do not match. */
2315
2316 return FNM_NOMATCH;
2317}
2318
2319/*
2320 *@@ strhMatchOS2:
2321 * this matches wildcards, similar to what DosEditName does.
2322 * However, this does not require a file to be present, but
2323 * works on strings only.
2324 */
2325
2326BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2327 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2328{
2329 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2330 pcszName,
2331 _FNM_OS2 | _FNM_IGNORECASE)
2332 == FNM_MATCH)
2333 );
2334}
2335
2336/* ******************************************************************
2337 * *
2338 * Fast string searches *
2339 * *
2340 ********************************************************************/
2341
2342#define ASSERT(a)
2343
2344/*
2345 * The following code has been taken from the "Standard
2346 * Function Library", file sflfind.c, and only slightly
2347 * modified to conform to the rest of this file.
2348 *
2349 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2350 * Revised: 98/05/04
2351 *
2352 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2353 *
2354 * The SFL Licence allows incorporating SFL code into other
2355 * programs, as long as the copyright is reprinted and the
2356 * code is marked as modified, so this is what we do.
2357 */
2358
2359/*
2360 *@@ strhfind:
2361 * searches for a pattern in a string using the Boyer-Moore-
2362 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2363 * strings. Returns a pointer to the pattern if found within the string,
2364 * or NULL if the pattern was not found. If you repeatedly scan for the
2365 * same pattern, use the repeat_find argument. If this is TRUE, the
2366 * function does not re-parse the pattern. You must of course call the
2367 * function with repeat_find equal to FALSE the first time. This function
2368 * is meant to handle character data, and is most effective when you work
2369 * with large strings. To search binary data use strhmemfind(). Will not work
2370 * on multibyte characters.
2371 *
2372 * Examples:
2373 + char *result;
2374 +
2375 + result = strhfind ("abracadabra", "cad", FALSE);
2376 + if (result)
2377 + puts (result);
2378 +
2379 * Taken from the "Standard Function Library", file sflfind.c.
2380 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2381 * Slightly modified.
2382 *
2383 *@@added V0.9.3 (2000-05-08) [umoeller]
2384 */
2385
2386char* strhfind (const char *string, // String containing data
2387 const char *pattern, // Pattern to search for
2388 BOOL repeat_find) // Same pattern as last time
2389{
2390 static size_t
2391 searchbuf [256]; // Fixed search buffer
2392
2393 ASSERT (string); // Expect non-NULL pointers, but
2394 ASSERT (pattern); // fall through if not debugging
2395
2396 return (char *) strhmemfind_rb (string, strlen (string),
2397 pattern, strlen (pattern),
2398 searchbuf, &repeat_find);
2399}
2400
2401/*
2402 *@@ strhfind_r:
2403 * searches for a pattern in a string using the Boyer-Moore-
2404 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2405 * strings. Returns a pointer to the pattern if found within the string,
2406 * or NULL if the pattern was not found. This function is meant to handle
2407 * character data, and is most effective when you work with large strings.
2408 * To search binary data use strhmemfind(). Will not work on multibyte
2409 * characters. Reentrant.
2410 *
2411 * Examples:
2412 + char *result;
2413 +
2414 + result = strhfind_r ("abracadabra", "cad");
2415 + if (result)
2416 + puts (result);
2417 *
2418 * Taken from the "Standard Function Library", file sflfind.c.
2419 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2420 * Slightly modified.
2421 *
2422 *@@added V0.9.3 (2000-05-08) [umoeller]
2423 */
2424
2425char* strhfind_r (const char *string, // String containing data
2426 const char *pattern) // Pattern to search for
2427{
2428 size_t
2429 searchbuf [256]; // One-time search buffer
2430 BOOL
2431 secondtime = FALSE; // Search buffer init needed
2432
2433 ASSERT (string); // Expect non-NULL pointers, but
2434 ASSERT (pattern); // fall through if not debugging
2435
2436 return (char *) strhmemfind_rb (string, strlen (string),
2437 pattern, strlen (pattern),
2438 searchbuf, &secondtime);
2439}
2440
2441/*
2442 *@@ strhfind_rb:
2443 * searches for a pattern in a string using the Boyer-Moore-
2444 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2445 * strings. Returns a pointer to the pattern if found within the string,
2446 * or NULL if the pattern was not found. Supports more efficient repeat
2447 * searches (for the same pattern), through a supplied search buffer. The
2448 * search buffer must be long enough to contain 256 (2**8) size_t entries.
2449 * On the first call repeat_find must be set to FALSE. After the search
2450 * buffer has been initialised, repeat_find will be set to TRUE by the
2451 * function, avoiding the search buffer initialisation on later calls.
2452 *
2453 * This function is most effective when repeated searches are made for
2454 * the same pattern in one or more strings. This function is meant to
2455 * handle character data, and is most effective when you work with
2456 * large strings. To search binary data use strhmemfind(). Will not work
2457 * on multibyte characters. Reentrant.
2458 *
2459 * Examples:
2460 + char *result;
2461 + BOOL repeat_search = FALSE;
2462 + size_t searchbuf[256];
2463 +
2464 + result = strhfind_rb ("abracadabra", "cad", searchbuf, &repeat_search);
2465 + if (result)
2466 + {
2467 + puts (result);
2468 + result = strhfind_rb ("cad/cam", "cad", searchbuf, &repeat_search);
2469 + if (result)
2470 + puts (result);
2471 + }
2472 *
2473 * Taken from the "Standard Function Library", file sflfind.c.
2474 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2475 * Slightly modified.
2476 *
2477 *@@added V0.9.3 (2000-05-08) [umoeller]
2478 */
2479
2480char* strhfind_rb (const char *string, // String containing data
2481 const char *pattern, // Pattern to search for
2482 size_t *shift, // Working buffer between searches
2483 BOOL *repeat_find) // Flag for first/later search
2484{
2485 ASSERT (string); // Expect non-NULL pointers, but
2486 ASSERT (pattern); // fall through if not debugging
2487 ASSERT (shift);
2488 ASSERT (repeat_find);
2489
2490 return (char *) strhmemfind_rb (string, strlen (string),
2491 pattern, strlen (pattern),
2492 shift, repeat_find);
2493}
2494
2495/*
2496 *@@ strhmemfind:
2497 * searches for a pattern in a block of memory using the Boyer-
2498 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2499 * values; you must explicitly provide their lengths. Returns a pointer to
2500 * the pattern if found within the block, or NULL if the pattern was not
2501 * found. If you repeatedly scan for the same pattern, use the repeat_find
2502 * argument. If this is TRUE, the function does not re-parse the pattern.
2503 * This function is meant to handle binary data. If you need to search
2504 * strings, use the strhfind_r or strhfind_rb() functions. Non-Reentrant.
2505 *
2506 * Taken from the "Standard Function Library", file sflfind.c.
2507 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2508 * Slightly modified.
2509 *
2510 *@@added V0.9.3 (2000-05-08) [umoeller]
2511 */
2512
2513void* strhmemfind (const void *block, // Block containing data
2514 size_t block_size, // Size of block in bytes
2515 const void *pattern, // Pattern to search for
2516 size_t pattern_size, // Size of pattern block
2517 BOOL repeat_find) // Same pattern as last time
2518{
2519 static size_t
2520 searchbuf [256]; // Static shared search buffer
2521
2522 ASSERT (block); // Expect non-NULL pointers, but
2523 ASSERT (pattern); // full through if not debugging
2524
2525 return strhmemfind_rb (block, block_size, pattern, pattern_size,
2526 searchbuf, &repeat_find);
2527}
2528
2529/*
2530 *@@ strhmemfind_r:
2531 * searches for a pattern in a block of memory using the Boyer-
2532 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2533 * values; you must explicitly provide their lengths. Returns a pointer to
2534 * the pattern if found within the block, or NULL if the pattern was not
2535 * found.
2536 *
2537 * This function is meant to handle binary data, for a single search for
2538 * a given pattern. If you need to search strings, use the strhfind_r()
2539 * or strhfind_rb() functions. If you want to do efficient repeated searches
2540 * for one pattern, use strhmemfind_rb(). Reentrant.
2541 *
2542 * Taken from the "Standard Function Library", file sflfind.c.
2543 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2544 * Slightly modified.
2545 *
2546 *@@added V0.9.3 (2000-05-08) [umoeller]
2547 */
2548
2549void* strhmemfind_r (const void *block, // Block containing data
2550 size_t block_size, // Size of block in bytes
2551 const void *pattern, // Pattern to search for
2552 size_t pattern_size) // Size of pattern block
2553{
2554 size_t
2555 searchbuf [256]; // One-time search buffer
2556 BOOL
2557 secondtime = FALSE;
2558
2559 ASSERT (block); // Expect non-NULL pointers, but
2560 ASSERT (pattern); // full through if not debugging
2561
2562 return strhmemfind_rb (block, block_size, pattern, pattern_size,
2563 searchbuf, &secondtime);
2564}
2565
2566/*
2567 *@@ strhmemfind_rb:
2568 * searches for a pattern in a block of memory using the Boyer-
2569 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2570 * values; you must explicitly provide their lengths. Returns a pointer to
2571 * the pattern if found within the block, or NULL if the pattern was not
2572 * found. On the first search with a given pattern, *repeat_find should
2573 * be FALSE. It will be set to TRUE after the shift table is initialised,
2574 * allowing the initialisation phase to be skipped on subsequent searches.
2575 * shift must point to an array big enough to hold 256 (8**2) size_t values.
2576 *
2577 * This function is meant to handle binary data, for repeated searches
2578 * for the same pattern. If you need to search strings, use the
2579 * strhfind_r() or strhfind_rb() functions. If you wish to search for a
2580 * pattern only once consider using strhmemfind_r(). Reentrant.
2581 *
2582 * Taken from the "Standard Function Library", file sflfind.c.
2583 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2584 * Slightly modified.
2585 *
2586 *@@added V0.9.3 (2000-05-08) [umoeller]
2587 */
2588
2589void* strhmemfind_rb (const void *in_block, // Block containing data
2590 size_t block_size, // Size of block in bytes
2591 const void *in_pattern, // Pattern to search for
2592 size_t pattern_size, // Size of pattern block
2593 size_t *shift, // Shift table (search buffer)
2594 BOOL *repeat_find) // TRUE: search buffer already init
2595{
2596 size_t
2597 byte_nbr, // Distance through block
2598 match_size; // Size of matched part
2599 const unsigned char
2600 *match_base = NULL, // Base of match of pattern
2601 *match_ptr = NULL, // Point within current match
2602 *limit = NULL; // Last potiental match point
2603 const unsigned char
2604 *block = (unsigned char *) in_block, // Concrete pointer to block data
2605 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2606
2607 ASSERT (block); // Expect non-NULL pointers, but
2608 ASSERT (pattern); // fail gracefully if not debugging
2609 ASSERT (shift); // NULL repeat_find => is false
2610 if (block == NULL || pattern == NULL || shift == NULL)
2611 return (NULL);
2612
2613 // Pattern must be smaller or equal in size to string
2614 if (block_size < pattern_size)
2615 return (NULL); // Otherwise it's not found
2616
2617 if (pattern_size == 0) // Empty patterns match at start
2618 return ((void *)block);
2619
2620 // Build the shift table unless we're continuing a previous search
2621
2622 // The shift table determines how far to shift before trying to match
2623 // again, if a match at this point fails. If the byte after where the
2624 // end of our pattern falls is not in our pattern, then we start to
2625 // match again after that byte; otherwise we line up the last occurence
2626 // of that byte in our pattern under that byte, and try match again.
2627
2628 if (!repeat_find || !*repeat_find)
2629 {
2630 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2631 shift [byte_nbr] = pattern_size + 1;
2632 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2633 shift [(unsigned char) pattern [byte_nbr]] = pattern_size - byte_nbr;
2634
2635 if (repeat_find)
2636 *repeat_find = TRUE;
2637 }
2638
2639 // Search for the block, each time jumping up by the amount
2640 // computed in the shift table
2641
2642 limit = block + (block_size - pattern_size + 1);
2643 ASSERT (limit > block);
2644
2645 for (match_base = block;
2646 match_base < limit;
2647 match_base += shift [*(match_base + pattern_size)])
2648 {
2649 match_ptr = match_base;
2650 match_size = 0;
2651
2652 // Compare pattern until it all matches, or we find a difference
2653 while (*match_ptr++ == pattern [match_size++])
2654 {
2655 ASSERT (match_size <= pattern_size &&
2656 match_ptr == (match_base + match_size));
2657
2658 // If we found a match, return the start address
2659 if (match_size >= pattern_size)
2660 return ((void*)(match_base));
2661
2662 }
2663 }
2664 return (NULL); // Found nothing
2665}
2666
2667/*
2668 *@@ strhtxtfind:
2669 * searches for a case-insensitive text pattern in a string
2670 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2671 * pattern are null-terminated strings. Returns a pointer to the pattern
2672 * if found within the string, or NULL if the pattern was not found.
2673 * Will match strings irrespective of case. To match exact strings, use
2674 * strhfind(). Will not work on multibyte characters.
2675 *
2676 * Examples:
2677 + char *result;
2678 +
2679 + result = strhtxtfind ("AbracaDabra", "cad");
2680 + if (result)
2681 + puts (result);
2682 +
2683 * Taken from the "Standard Function Library", file sflfind.c.
2684 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2685 * Slightly modified.
2686 *
2687 *@@added V0.9.3 (2000-05-08) [umoeller]
2688 */
2689
2690char* strhtxtfind (const char *string, // String containing data
2691 const char *pattern) // Pattern to search for
2692{
2693 size_t
2694 shift [256]; // Shift distance for each value
2695 size_t
2696 string_size,
2697 pattern_size,
2698 byte_nbr, // Index into byte array
2699 match_size; // Size of matched part
2700 const char
2701 *match_base = NULL, // Base of match of pattern
2702 *match_ptr = NULL, // Point within current match
2703 *limit = NULL; // Last potiental match point
2704
2705 ASSERT (string); // Expect non-NULL pointers, but
2706 ASSERT (pattern); // fail gracefully if not debugging
2707 if (string == NULL || pattern == NULL)
2708 return (NULL);
2709
2710 string_size = strlen (string);
2711 pattern_size = strlen (pattern);
2712
2713 // Pattern must be smaller or equal in size to string
2714 if (string_size < pattern_size)
2715 return (NULL); // Otherwise it cannot be found
2716
2717 if (pattern_size == 0) // Empty string matches at start
2718 return (char *) string;
2719
2720 // Build the shift table
2721
2722 // The shift table determines how far to shift before trying to match
2723 // again, if a match at this point fails. If the byte after where the
2724 // end of our pattern falls is not in our pattern, then we start to
2725 // match again after that byte; otherwise we line up the last occurence
2726 // of that byte in our pattern under that byte, and try match again.
2727
2728 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2729 shift [byte_nbr] = pattern_size + 1;
2730
2731 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2732 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2733
2734 // Search for the string. If we don't find a match, move up by the
2735 // amount we computed in the shift table above, to find location of
2736 // the next potiental match.
2737
2738 limit = string + (string_size - pattern_size + 1);
2739 ASSERT (limit > string);
2740
2741 for (match_base = string;
2742 match_base < limit;
2743 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2744 {
2745 match_ptr = match_base;
2746 match_size = 0;
2747
2748 // Compare pattern until it all matches, or we find a difference
2749 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2750 {
2751 ASSERT (match_size <= pattern_size &&
2752 match_ptr == (match_base + match_size));
2753
2754 // If we found a match, return the start address
2755 if (match_size >= pattern_size)
2756 return ((char *)(match_base));
2757 }
2758 }
2759 return (NULL); // Found nothing
2760}
2761
Note: See TracBrowser for help on using the repository browser.