source: trunk/src/helpers/stringh.c@ 12

Last change on this file since 12 was 12, checked in by umoeller, 25 years ago

Updated string helpers.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 93.6 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout XWorkplace.
6 *
7 * Usage: All OS/2 programs.
8 *
9 * Function prefixes (new with V0.81):
10 * -- strh* string helper functions.
11 *
12 * Note: Version numbering in this file relates to XWorkplace version
13 * numbering.
14 *
15 *@@header "helpers\stringh.h"
16 */
17
18/*
19 * Copyright (C) 1997-2000 Ulrich M”ller.
20 * Parts Copyright (C) 1991-1999 iMatix Corporation.
21 * This file is part of the XWorkplace source package.
22 * XWorkplace is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published
24 * by the Free Software Foundation, in version 2 as it comes in the
25 * "COPYING" file of the XWorkplace main distribution.
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 */
31
32#define OS2EMX_PLAIN_CHAR
33 // this is needed for "os2emx.h"; if this is defined,
34 // emx will define PSZ as _signed_ char, otherwise
35 // as unsigned char
36
37#define INCL_WINSHELLDATA
38#include <os2.h>
39
40#include <stdlib.h>
41#include <stdio.h>
42#include <string.h>
43#include <ctype.h>
44#include <math.h>
45
46#include "setup.h" // code generation and debugging options
47
48#include "helpers\stringh.h"
49#include "helpers\xstring.h" // extended string helpers
50
51#pragma hdrstop
52
53/*
54 *@@category: Helpers\C helpers\String management
55 */
56
57/*
58 *@@ strhdup:
59 * like strdup, but this one
60 * doesn't crash if pszSource is NULL,
61 * but returns NULL also.
62 *
63 *@@added V0.9.0 [umoeller]
64 */
65
66PSZ strhdup(const char *pszSource)
67{
68 if (pszSource)
69 return (strdup(pszSource));
70 else
71 return (0);
72}
73
74/*
75 *@@ strhistr:
76 * like strstr, but case-insensitive.
77 *
78 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
79 */
80
81PSZ strhistr(const char *string1, const char *string2)
82{
83 PSZ prc = NULL;
84
85 if ((string1) && (string2))
86 {
87 PSZ pszSrchIn = strdup(string1);
88 PSZ pszSrchFor = strdup(string2);
89
90 if ((pszSrchIn) && (pszSrchFor))
91 {
92 strupr(pszSrchIn);
93 strupr(pszSrchFor);
94
95 prc = strstr(pszSrchIn, pszSrchFor);
96 if (prc)
97 {
98 // prc now has the first occurence of the string,
99 // but in pszSrchIn; we need to map this
100 // return value to the original string
101 prc = (prc-pszSrchIn) // offset in pszSrchIn
102 + (PSZ)string1;
103 }
104 }
105 if (pszSrchFor)
106 free(pszSrchFor);
107 if (pszSrchIn)
108 free(pszSrchIn);
109 }
110 return (prc);
111}
112
113/*
114 *@@ strhncpy0:
115 * like strncpy, but always appends a 0 character.
116 */
117
118ULONG strhncpy0(PSZ pszTarget,
119 const char *pszSource,
120 ULONG cbSource)
121{
122 ULONG ul = 0;
123 PSZ pTarget = pszTarget,
124 pSource = (PSZ)pszSource;
125
126 for (ul = 0; ul < cbSource; ul++)
127 if (*pSource)
128 *pTarget++ = *pSource++;
129 else
130 break;
131 *pTarget = 0;
132
133 return (ul);
134}
135
136/*
137 * strhCount:
138 * this counts the occurences of c in pszSearch.
139 */
140
141ULONG strhCount(const char *pszSearch,
142 CHAR c)
143{
144 PSZ p = (PSZ)pszSearch;
145 ULONG ulCount = 0;
146 while (TRUE)
147 {
148 p = strchr(p, c);
149 if (p)
150 {
151 ulCount++;
152 p++;
153 }
154 else
155 break;
156 }
157 return (ulCount);
158}
159
160/*
161 *@@ strhIsDecimal:
162 * returns TRUE if psz consists of decimal digits only.
163 */
164
165BOOL strhIsDecimal(PSZ psz)
166{
167 PSZ p = psz;
168 while (*p != 0)
169 {
170 if (isdigit(*p) == 0)
171 return (FALSE);
172 p++;
173 }
174
175 return (TRUE);
176}
177
178/*
179 *@@ strhSubstr:
180 * this creates a new PSZ containing the string
181 * from pBegin to pEnd, excluding the pEnd character.
182 * The new string is null-terminated.
183 *
184 * Example:
185 + "1234567890"
186 + ^ ^
187 + p1 p2
188 + strhSubstr(p1, p2)
189 * would return a new string containing "2345678".
190 */
191
192PSZ strhSubstr(const char *pBegin, const char *pEnd)
193{
194 ULONG cbSubstr = (pEnd - pBegin);
195 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
196 strhncpy0(pszSubstr, pBegin, cbSubstr);
197 return (pszSubstr);
198}
199
200/*
201 *@@ strhExtract:
202 * searches pszBuf for the cOpen character and returns
203 * the data in between cOpen and cClose, excluding
204 * those two characters, in a newly allocated buffer
205 * which you must free() afterwards.
206 *
207 * Spaces and newlines/linefeeds are skipped.
208 *
209 * If the search was successful, the new buffer
210 * is returned and, if (ppEnd != NULL), *ppEnd points
211 * to the first character after the cClose character
212 * found in the buffer.
213 *
214 * If the search was not successful, NULL is
215 * returned, and *ppEnd is unchanged.
216 *
217 * If another cOpen character is found before
218 * cClose, matching cClose characters will be skipped.
219 * You can therefore nest the cOpen and cClose
220 * characters.
221 *
222 * This function ignores cOpen and cClose characters
223 * in C-style comments and strings surrounded by
224 * double quotes.
225 *
226 * Example:
227 + PSZ pszBuf = "KEYWORD { --blah-- } next",
228 + pEnd;
229 + strhExtract(pszBuf,
230 + '{', '}',
231 + &pEnd)
232 * would return a new buffer containing " --blah-- ",
233 * and ppEnd would afterwards point to the space
234 * before "next" in the static buffer.
235 *
236 *@@added V0.9.0 [umoeller]
237 */
238
239PSZ strhExtract(PSZ pszBuf, // in: search buffer
240 CHAR cOpen, // in: opening char
241 CHAR cClose, // in: closing char
242 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
243{
244 PSZ pszReturn = NULL;
245
246 if (pszBuf)
247 {
248 PSZ pOpen = strchr(pszBuf, cOpen);
249 if (pOpen)
250 {
251 // opening char found:
252 // now go thru the whole rest of the buffer
253 PSZ p = pOpen+1;
254 LONG lLevel = 1; // if this goes 0, we're done
255 while (*p)
256 {
257 if (*p == cOpen)
258 lLevel++;
259 else if (*p == cClose)
260 {
261 lLevel--;
262 if (lLevel <= 0)
263 {
264 // matching closing bracket found:
265 // extract string
266 pszReturn = strhSubstr(pOpen+1, // after cOpen
267 p); // excluding cClose
268 if (ppEnd)
269 *ppEnd = p+1;
270 break; // while (*p)
271 }
272 }
273 else if (*p == '\"')
274 {
275 // beginning of string:
276 PSZ p2 = p+1;
277 // find end of string
278 while ((*p2) && (*p2 != '\"'))
279 p2++;
280
281 if (*p2 == '\"')
282 // closing quote found:
283 // search on after that
284 p = p2; // raised below
285 else
286 break; // while (*p)
287 }
288
289 p++;
290 }
291 }
292 }
293
294 return (pszReturn);
295}
296
297/*
298 *@@ strhQuote:
299 * similar to strhExtract, except that
300 * opening and closing chars are the same,
301 * and therefore no nesting is possible.
302 * Useful for extracting stuff between
303 * quotes.
304 *
305 *@@added V0.9.0 [umoeller]
306 */
307
308PSZ strhQuote(PSZ pszBuf,
309 CHAR cQuote,
310 PSZ *ppEnd)
311{
312 PSZ pszReturn = NULL,
313 p1 = NULL;
314 if ((p1 = strchr(pszBuf, cQuote)))
315 {
316 PSZ p2 = strchr(p1+1, cQuote);
317 if (p2)
318 {
319 pszReturn = strhSubstr(p1+1, p2);
320 if (ppEnd)
321 // store closing char
322 *ppEnd = p2 + 1;
323 }
324 }
325
326 return (pszReturn);
327}
328
329/*
330 *@@ strhStrip:
331 * removes all double spaces.
332 * This copies within the "psz" buffer.
333 * If any double spaces are found, the
334 * string will be shorter than before,
335 * but the buffer is _not_ reallocated,
336 * so there will be unused bytes at the
337 * end.
338 *
339 * Returns the number of spaces removed.
340 *
341 *@@added V0.9.0 [umoeller]
342 */
343
344ULONG strhStrip(PSZ psz) // in/out: string
345{
346 PSZ p;
347 ULONG cb = strlen(psz),
348 ulrc = 0;
349
350 for (p = psz; p < psz+cb; p++)
351 {
352 if ((*p == ' ') && (*(p+1) == ' '))
353 {
354 PSZ p2 = p;
355 while (*p2)
356 {
357 *p2 = *(p2+1);
358 p2++;
359 }
360 cb--;
361 p--;
362 ulrc++;
363 }
364 }
365 return (ulrc);
366}
367
368/*
369 *@@ strhins:
370 * this inserts one string into another.
371 *
372 * pszInsert is inserted into pszBuffer at offset
373 * ulInsertOfs (which counts from 0).
374 *
375 * A newly allocated string is returned. pszBuffer is
376 * not changed. The new string should be free()'d after
377 * use.
378 *
379 * Upon errors, NULL is returned.
380 *
381 *@@changed V0.9.0 [umoeller]: completely rewritten.
382 */
383
384PSZ strhins(const char *pcszBuffer,
385 ULONG ulInsertOfs,
386 const char *pcszInsert)
387{
388 PSZ pszNew = NULL;
389
390 if ((pcszBuffer) && (pcszInsert))
391 {
392 do {
393 ULONG cbBuffer = strlen(pcszBuffer);
394 ULONG cbInsert = strlen(pcszInsert);
395
396 // check string length
397 if (ulInsertOfs > cbBuffer + 1)
398 break; // do
399
400 // OK, let's go.
401 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
402
403 // copy stuff before pInsertPos
404 memcpy(pszNew,
405 pcszBuffer,
406 ulInsertOfs);
407 // copy string to be inserted
408 memcpy(pszNew + ulInsertOfs,
409 pcszInsert,
410 cbInsert);
411 // copy stuff after pInsertPos
412 strcpy(pszNew + ulInsertOfs + cbInsert,
413 pcszBuffer + ulInsertOfs);
414 } while (FALSE);
415 }
416
417 return (pszNew);
418}
419
420/*
421 *@@ strhrpl:
422 * wrapper around xstrrpl to work with C strings.
423 * Note that *ppszBuf can get reallocated and must
424 * be free()'able.
425 *
426 * Use of this wrapper is not recommended because
427 * it is considerably slower than xstrrpl.
428 *
429 *@@added V0.9.6 (2000-11-01) [umoeller]
430 */
431
432ULONG strhrpl(PSZ *ppszBuf, // in/out: string
433 ULONG ulOfs, // in: where to begin search (0 = start)
434 const char *pcszSearch, // in: search string; cannot be NULL
435 const char *pcszReplace, // in: replacement string; cannot be NULL
436 PULONG pulAfterOfs) // out: offset where found (ptr can be NULL)
437{
438 ULONG ulrc = 0;
439 XSTRING xstrBuf,
440 xstrFind,
441 xstrReplace;
442 xstrInit(&xstrBuf, 0);
443 xstrset(&xstrBuf, *ppszBuf);
444 xstrInit(&xstrFind, 0);
445 xstrset(&xstrFind, (PSZ)pcszSearch);
446 xstrInit(&xstrReplace, 0);
447 xstrset(&xstrReplace, (PSZ)pcszReplace);
448
449 if (ulrc = xstrrpl(&xstrBuf, ulOfs, &xstrFind, &xstrReplace, pulAfterOfs))
450 // replaced:
451 *ppszBuf = xstrBuf.psz;
452
453 return (ulrc);
454}
455
456/*
457 * strhWords:
458 * returns the no. of words in "psz".
459 * A string is considered a "word" if
460 * it is surrounded by spaces only.
461 *
462 *@@added V0.9.0 [umoeller]
463 */
464
465ULONG strhWords(PSZ psz)
466{
467 PSZ p;
468 ULONG cb = strlen(psz),
469 ulWords = 0;
470 if (cb > 1)
471 {
472 ulWords = 1;
473 for (p = psz; p < psz+cb; p++)
474 if (*p == ' ')
475 ulWords++;
476 }
477 return (ulWords);
478}
479
480/*
481 *@@ strhThousandsULong:
482 * converts a ULONG into a decimal string, while
483 * inserting thousands separators into it. Specify
484 * the separator char in cThousands.
485 * Returns pszTarget so you can use it directly
486 * with sprintf and the "%s" flag.
487 * For cThousands, you should use the data in
488 * OS2.INI ("PM_National" application), which is
489 * always set according to the "Country" object.
490 * Use strhThousandsDouble for "double" values.
491 */
492
493PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
494 ULONG ul, // in: decimal to convert
495 CHAR cThousands) // in: separator char (e.g. '.')
496{
497 USHORT ust, uss, usc;
498 CHAR szTemp[40];
499 sprintf(szTemp, "%d", ul);
500
501 ust = 0;
502 usc = strlen(szTemp);
503 for (uss = 0; uss < usc; uss++)
504 {
505 if (uss)
506 if (((usc - uss) % 3) == 0)
507 {
508 pszTarget[ust] = cThousands;
509 ust++;
510 }
511 pszTarget[ust] = szTemp[uss];
512 ust++;
513 }
514 pszTarget[ust] = '\0';
515
516 return (pszTarget);
517}
518
519/*
520 *@@ strhThousandsDouble:
521 * like strhThousandsULong, but for a "double"
522 * value. Note that after-comma values are truncated.
523 */
524
525PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
526{
527 USHORT ust, uss, usc;
528 CHAR szTemp[40];
529 sprintf(szTemp, "%.0f", floor(dbl));
530
531 ust = 0;
532 usc = strlen(szTemp);
533 for (uss = 0; uss < usc; uss++)
534 {
535 if (uss)
536 if (((usc - uss) % 3) == 0)
537 {
538 pszTarget[ust] = cThousands;
539 ust++;
540 }
541 pszTarget[ust] = szTemp[uss];
542 ust++;
543 }
544 pszTarget[ust] = '\0';
545
546 return (pszTarget);
547}
548
549/*
550 *@@ strhFileDate:
551 * converts file date data to a string (to pszBuf).
552 * You can pass any FDATE structure to this function,
553 * which are returned in those FILEFINDBUF* or
554 * FILESTATUS* structs by the Dos* functions.
555 *
556 * ulDateFormat is the PM setting for the date format,
557 * as set in the "Country" object, and can be queried using
558 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
559 *
560 * meaning:
561 * -- 0 mm.dd.yyyy (English)
562 * -- 1 dd.mm.yyyy (e.g. German)
563 * -- 2 yyyy.mm.dd (Japanese, ISO)
564 * -- 3 yyyy.dd.mm
565 *
566 * cDateSep is used as a date separator (e.g. '.').
567 * This can be queried using:
568 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
569 *
570 * Alternatively, you can query all the country settings
571 * at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
572 *
573 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
574 */
575
576VOID strhFileDate(PSZ pszBuf, // out: string returned
577 FDATE *pfDate, // in: date information
578 ULONG ulDateFormat, // in: date format (0-3)
579 CHAR cDateSep) // in: date separator (e.g. '.')
580{
581 DATETIME dt;
582 dt.day = pfDate->day;
583 dt.month = pfDate->month;
584 dt.year = pfDate->year + 1980;
585
586 strhDateTime(pszBuf,
587 NULL, // no time
588 &dt,
589 ulDateFormat,
590 cDateSep,
591 0, 0); // no time
592}
593
594/*
595 *@@ strhFileTime:
596 * converts file time data to a string (to pszBuf).
597 * You can pass any FTIME structure to this function,
598 * which are returned in those FILEFINDBUF* or
599 * FILESTATUS* structs by the Dos* functions.
600 *
601 * ulTimeFormat is the PM setting for the time format,
602 * as set in the "Country" object, and can be queried using
603 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
604 * meaning:
605 * -- 0 12-hour clock
606 * -- >0 24-hour clock
607 *
608 * cDateSep is used as a time separator (e.g. ':').
609 * This can be queried using:
610 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
611 *
612 * Alternatively, you can query all the country settings
613 * at once using prfhQueryCountrySettings (prfh.c, new with V0.9.0).
614 *
615 *@@changed 99-03-15 fixed 12-hour crash
616 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
617 */
618
619VOID strhFileTime(PSZ pszBuf, // out: string returned
620 FTIME *pfTime, // in: time information
621 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
622 CHAR cTimeSep) // in: time separator (e.g. ':')
623{
624 DATETIME dt;
625 dt.hours = pfTime->hours;
626 dt.minutes = pfTime->minutes;
627 dt.seconds = pfTime->twosecs * 2;
628
629 strhDateTime(NULL, // no date
630 pszBuf,
631 &dt,
632 0, 0, // no date
633 ulTimeFormat,
634 cTimeSep);
635}
636
637/*
638 *@@ strhDateTime:
639 * converts Control Programe DATETIME info
640 * to two strings. See strhFileDate and strhFileTime
641 * for more detailed parameter descriptions.
642 *
643 *@@added V0.9.0 (99-11-07) [umoeller]
644 */
645
646VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
647 PSZ pszTime, // out: time string returned (can be NULL)
648 DATETIME *pDateTime, // in: date/time information
649 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
650 CHAR cDateSep, // in: date separator (e.g. '.')
651 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
652 CHAR cTimeSep) // in: time separator (e.g. ':')
653{
654 if (pszDate)
655 {
656 switch (ulDateFormat)
657 {
658 case 0: // mm.dd.yyyy (English)
659 sprintf(pszDate, "%02d%c%02d%c%04d",
660 pDateTime->month,
661 cDateSep,
662 pDateTime->day,
663 cDateSep,
664 pDateTime->year);
665 break;
666
667 case 1: // dd.mm.yyyy (e.g. German)
668 sprintf(pszDate, "%02d%c%02d%c%04d",
669 pDateTime->day,
670 cDateSep,
671 pDateTime->month,
672 cDateSep,
673 pDateTime->year);
674 break;
675
676 case 2: // yyyy.mm.dd (Japanese)
677 sprintf(pszDate, "%04d%c%02d%c%02d",
678 pDateTime->year,
679 cDateSep,
680 pDateTime->month,
681 cDateSep,
682 pDateTime->day);
683 break;
684
685 default: // yyyy.dd.mm
686 sprintf(pszDate, "%04d%c%02d%c%02d",
687 pDateTime->year,
688 cDateSep,
689 pDateTime->day,
690 cDateSep,
691 pDateTime->month);
692 break;
693 }
694 }
695
696 if (pszTime)
697 {
698 if (ulTimeFormat == 0)
699 {
700 // for 12-hour clock, we need additional INI data
701 CHAR szAMPM[10] = "err";
702
703 if (pDateTime->hours > 12)
704 {
705 // > 12h: PM.
706
707 // Note: 12:xx noon is 12 AM, not PM (even though
708 // AM stands for "ante meridiam", but English is just
709 // not logical), so that's handled below.
710
711 PrfQueryProfileString(HINI_USER,
712 "PM_National",
713 "s2359", // key
714 "PM", // default
715 szAMPM, sizeof(szAMPM)-1);
716 sprintf(pszTime, "%02d%c%02d%c%02d %s",
717 // leave 12 == 12 (not 0)
718 pDateTime->hours % 12,
719 cTimeSep,
720 pDateTime->minutes,
721 cTimeSep,
722 pDateTime->seconds,
723 szAMPM);
724 }
725 else
726 {
727 // <= 12h: AM
728 PrfQueryProfileString(HINI_USER,
729 "PM_National",
730 "s1159", // key
731 "AM", // default
732 szAMPM, sizeof(szAMPM)-1);
733 sprintf(pszTime, "%02d%c%02d%c%02d %s",
734 pDateTime->hours,
735 cTimeSep,
736 pDateTime->minutes,
737 cTimeSep,
738 pDateTime->seconds,
739 szAMPM);
740 }
741 }
742 else
743 // 24-hour clock
744 sprintf(pszTime, "%02d%c%02d%c%02d",
745 pDateTime->hours,
746 cTimeSep,
747 pDateTime->minutes,
748 cTimeSep,
749 pDateTime->seconds);
750 }
751}
752
753/*
754 *@@ strhGetWord:
755 * finds word boundaries.
756 *
757 * *ppszStart is used as the beginning of the
758 * search.
759 *
760 * If a word is found, *ppszStart is set to
761 * the first character of the word which was
762 * found and *ppszEnd receives the address
763 * of the first character _after_ the word,
764 * which is probably a space or a \n or \r char.
765 * We then return TRUE.
766 *
767 * The search is stopped if a null character
768 * is found or pLimit is reached. In that case,
769 * FALSE is returned.
770 *
771 *@@added V0.9.1 (2000-02-13) [umoeller]
772 */
773
774BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
775 // out: start of word (if TRUE is returned)
776 const char *pLimit, // in: ptr to last char after *ppszStart to be
777 // searched; if the word does not end before
778 // or with this char, FALSE is returned
779 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
780 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
781 PSZ *ppszEnd) // out: first char _after_ word
782 // (if TRUE is returned)
783{
784 // characters after which a word can be started
785 // const char *pcszBeginChars = "\x0d\x0a ";
786 // const char *pcszEndChars = "\x0d\x0a /-";
787
788 PSZ pStart = *ppszStart;
789
790 // find start of word
791 while ( (pStart < (PSZ)pLimit)
792 && (strchr(pcszBeginChars, *pStart))
793 )
794 // if char is a "before word" char: go for next
795 pStart++;
796
797 if (pStart < (PSZ)pLimit)
798 {
799 // found a valid "word start" character
800 // (which is not in pcszBeginChars):
801
802 // find end of word
803 PSZ pEndOfWord = pStart;
804 while ( (pEndOfWord <= (PSZ)pLimit)
805 && (strchr(pcszEndChars, *pEndOfWord) == 0)
806 )
807 // if char is not an "end word" char: go for next
808 pEndOfWord++;
809
810 if (pEndOfWord <= (PSZ)pLimit)
811 {
812 // whoa, got a word:
813 *ppszStart = pStart;
814 *ppszEnd = pEndOfWord;
815 return (TRUE);
816 }
817 }
818
819 return (FALSE);
820}
821
822/*
823 *@@ strhFindWord:
824 * searches for pszSearch in pszBuf, which is
825 * returned if found (or NULL if not).
826 *
827 * As opposed to strstr, this finds pszSearch
828 * only if it is a "word". A search string is
829 * considered a word if the character _before_
830 * it is in pcszBeginChars and the char _after_
831 * it is in pcszEndChars.
832 *
833 * Example:
834 + strhFindWord("This is an example.", "is");
835 + returns ...........^ this, but not the "is" in "This".
836 *
837 * The algorithm here uses strstr to find pszSearch in pszBuf
838 * and performs additional "is-word" checks for each item found.
839 * With VAC++ 3.0, this is still much faster than searching
840 * words first and then comparing each word with pszSearch.
841 * I've tried it that way too, and that took nearly double as
842 * long. Apparently, the VAC++ runtime library uses some
843 * optimized search algorithm here, so we better use that one.
844 *
845 *@@added V0.9.0 (99-11-08) [umoeller]
846 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
847 */
848
849PSZ strhFindWord(const char *pszBuf,
850 const char *pszSearch,
851 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
852 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
853{
854 PSZ pszReturn = 0;
855 ULONG cbBuf = strlen(pszBuf),
856 cbSearch = strlen(pszSearch);
857
858 if ((cbBuf) && (cbSearch))
859 {
860 const char *p = pszBuf;
861
862 /* // go thru all characters
863 while (*p)
864 {
865 // check if current character is either the
866 // very first or a "begin word" character
867 if ( (p == pszBuf)
868 || (strchr(pcszBeginChars, *p) == 0)
869 )
870 {
871 // yes: go for next
872 if (*(++p))
873 {
874 // compare with search string
875 if (strcmp(p, pszSearch) == 0)
876 {
877 // is the same:
878 // check if still in buffer
879 if (p < pszBuf + cbBuf)
880 {
881 CHAR cAfterEndOfWord = *(p + cbSearch);
882 if (cAfterEndOfWord == 0)
883 {
884 // end of string:
885 // that's ok
886 pszReturn = (PSZ)p;
887 break;
888 }
889 else
890 {
891 // check if in "end of word" list
892 char *pc2 = strchr(pcszEndChars, cAfterEndOfWord);
893 if (pc2)
894 // OK, is end char: avoid doubles of that char,
895 // but allow spaces
896 if ( (cAfterEndOfWord+1 != *pc2)
897 || (cAfterEndOfWord+1 == ' ')
898 || (cAfterEndOfWord+1 == 0)
899 )
900 {
901 // end of string:
902 // that's ok
903 pszReturn = (PSZ)p;
904 break;
905 }
906 }
907 }
908 }
909 }
910 else
911 // end of string:
912 break;
913 }
914
915 ++p;
916 } // end while
917 */
918
919 do // while p
920 {
921 p = strstr(p, pszSearch);
922 if (p)
923 {
924 // string found:
925 // check if that's a word
926
927 // check previous char
928 if ( (p == pszBuf)
929 || (strchr(pcszBeginChars, *(p-1)))
930 )
931 {
932 // OK, valid begin char:
933 BOOL fEndOK = FALSE;
934 // check end char
935 CHAR cNextChar = *(p + cbSearch);
936 if (cNextChar == 0)
937 fEndOK = TRUE;
938 else
939 {
940 char *pc = strchr(pcszEndChars, cNextChar);
941 if (pc)
942 // OK, is end char: avoid doubles of that char,
943 // but allow spaces
944 if ( (cNextChar+1 != *pc)
945 || (cNextChar+1 == ' ')
946 || (cNextChar+1 == 0)
947 )
948 fEndOK = TRUE;
949 }
950
951 if (fEndOK)
952 {
953 // valid end char:
954 pszReturn = (PSZ)p;
955 break;
956 }
957 }
958 p += cbSearch;
959 }
960 } while (p);
961
962 }
963 return (pszReturn);
964}
965
966/*
967 *@@ strhFindEOL:
968 * returns a pointer to the next \r, \n or null character
969 * following pszSearchIn. Stores the offset in *pulOffset.
970 *
971 * This should never return NULL because at some point,
972 * there will be a null byte in your string.
973 *
974 *@@added V0.9.4 (2000-07-01) [umoeller]
975 */
976
977PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
978 PULONG pulOffset) // out: offset (ptr can be NULL)
979{
980 PSZ p = pszSearchIn,
981 prc = NULL;
982 while (TRUE)
983 {
984 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
985 {
986 prc = p;
987 break;
988 }
989 p++;
990 }
991
992 if (pulOffset)
993 *pulOffset = prc - pszSearchIn;
994 return (prc);
995}
996
997/*
998 *@@ strhFindNextLine:
999 * like strhFindEOL, but this returns the character
1000 * _after_ \r or \n. Note that this might return
1001 * a pointer to terminating NULL character also.
1002 */
1003
1004PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1005{
1006 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1007 // pEOL now points to the \r char or the terminating 0 byte;
1008 // if not null byte, advance pointer
1009 PSZ pNextLine = pEOL;
1010 if (*pNextLine == '\r')
1011 pNextLine++;
1012 if (*pNextLine == '\n')
1013 pNextLine++;
1014 if (pulOffset)
1015 *pulOffset = pNextLine - pszSearchIn;
1016 return (pNextLine);
1017}
1018
1019/*
1020 *@@ strhFindKey:
1021 * finds pszKey in pszSearchIn; similar to strhistr,
1022 * but this one makes sure the key is at the beginning
1023 * of a line. Spaces before the key are tolerated.
1024 * Returns NULL if the key was not found.
1025 *
1026 * Used by strhGetParameter/strhSetParameter; useful
1027 * for analyzing CONFIG.SYS settings.
1028 *
1029 *@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1030 *@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1031 */
1032
1033PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1034 const char *pcszKey, // in: key to search for
1035 PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1036 // can be NULL if not needed
1037{
1038 const char *p = NULL;
1039 PSZ pReturn = NULL;
1040 // BOOL fFound = FALSE;
1041
1042 p = pcszSearchIn;
1043 do {
1044 p = strhistr(p, pcszKey);
1045
1046 if ((p) && (p >= pcszSearchIn))
1047 {
1048 // make sure the key is at the beginning of a line
1049 // by going backwards until we find a char != " "
1050 const char *p2 = p;
1051 while ( (*p2 == ' ')
1052 && (p2 > pcszSearchIn)
1053 )
1054 p2--;
1055
1056 // if previous char is an EOL sign, go on
1057 if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1058 || (*(p2-1) == '\r')
1059 || (*(p2-1) == '\n')
1060 )
1061 {
1062 // now check whether the char after the search
1063 // is a "=" char
1064 // ULONG cbKey = strlen(pszKey);
1065
1066 // tolerate spaces before "="
1067 /* PSZ p3 = p;
1068 while (*(p3+cbKey) == ' ')
1069 p3++;
1070
1071 if (*(p3+cbKey) == '=') */
1072 {
1073 // found:
1074 pReturn = (PSZ)p; // go on, p contains found key
1075
1076 // test for all upper case?
1077 if (pfIsAllUpperCase)
1078 {
1079 ULONG cbKey2 = strlen(pcszKey),
1080 ul = 0;
1081 *pfIsAllUpperCase = TRUE;
1082 for (ul = 0; ul < cbKey2; ul++)
1083 if (islower(*(p+ul)))
1084 {
1085 *pfIsAllUpperCase = FALSE;
1086 break; // for
1087 }
1088 }
1089
1090 break; // do
1091 }
1092 } // else search next key
1093
1094 p++; // search on after this key
1095 }
1096 } while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1097
1098 return (pReturn);
1099}
1100
1101/*
1102 *@@ strhGetParameter:
1103 * searches pszSearchIn for the key pszKey; if found, it
1104 * returns a pointer to the following characters in pszSearchIn
1105 * and, if pszCopyTo != NULL, copies the rest of the line to
1106 * that buffer, of which cbCopyTo specified the size.
1107 * If the key is not found, NULL is returned.
1108 * String search is done by calling strhFindKey.
1109 * This is useful for querying CONFIG.SYS settings.
1110 *
1111 * <B>Example:</B> this would return "YES" if you searched
1112 * for "PAUSEONERROR=", and "PAUSEONERROR=YES" existed in pszSearchIn.
1113 */
1114
1115PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1116 const char *pcszKey, // in: key to search for
1117 PSZ pszCopyTo, // out: key value
1118 ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1119{
1120 PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1121 prc = NULL;
1122 if (p)
1123 {
1124 prc = p + strlen(pcszKey);
1125 if (pszCopyTo)
1126 // copy to pszCopyTo
1127 {
1128 ULONG cb;
1129 PSZ pEOL = strhFindEOL(prc, &cb);
1130 if (pEOL)
1131 {
1132 if (cb > cbCopyTo)
1133 cb = cbCopyTo-1;
1134 strhncpy0(pszCopyTo, prc, cb);
1135 }
1136 }
1137 }
1138
1139 return (prc);
1140}
1141
1142/*
1143 *@@ strhSetParameter:
1144 * searches *ppszBuf for the key pszKey; if found, it
1145 * replaces the characters following this key up to the
1146 * end of the line with pszParam. If pszKey is not found in
1147 * *ppszBuf, it is appended to the file in a new line.
1148 *
1149 * If any changes are made, *ppszBuf is re-allocated.
1150 *
1151 * This function searches w/out case sensitivity.
1152 *
1153 * Returns a pointer to the new parameter inside the buffer.
1154 *
1155 *@@changed V0.9.0 [umoeller]: changed function prototype to PSZ* ppszSearchIn
1156 */
1157
1158PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1159 const char *pcszKey, // in: key to search for
1160 PSZ pszNewParam, // in: new parameter to set for key
1161 BOOL fRespectCase) // in: if TRUE, pszNewParam will
1162 // be converted to upper case if the found key is
1163 // in upper case also. pszNewParam should be in
1164 // lower case if you use this.
1165{
1166 BOOL fIsAllUpperCase = FALSE;
1167 PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1168 prc = NULL;
1169
1170 if (pKey)
1171 {
1172 // key found in file:
1173 // replace existing parameter
1174 PSZ pOldParam = pKey + strlen(pcszKey);
1175
1176 prc = pOldParam;
1177 // pOldParam now has the old parameter, which we
1178 // will overwrite now
1179
1180 if (pOldParam)
1181 {
1182 ULONG cbOldParam;
1183 PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1184 // pEOL now has first end-of-line after the parameter
1185
1186 if (pEOL)
1187 {
1188 XSTRING strBuf,
1189 strFind,
1190 strReplace;
1191
1192 PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1193 strncpy(pszOldCopy, pOldParam, cbOldParam);
1194 pszOldCopy[cbOldParam] = '\0';
1195
1196 xstrInit(&strBuf, 0);
1197 xstrset(&strBuf, *ppszBuf); // this must not be freed!
1198 xstrInit(&strFind, 0);
1199 xstrset(&strFind, pszOldCopy); // this must not be freed!
1200 xstrInit(&strReplace, 0);
1201 xstrset(&strReplace, pszNewParam); // this must not be freed!
1202
1203 // check for upper case desired?
1204 if (fRespectCase)
1205 if (fIsAllUpperCase)
1206 strupr(pszNewParam);
1207
1208 xstrrpl(&strBuf, 0, &strFind, &strReplace, NULL);
1209
1210 free(pszOldCopy);
1211
1212 *ppszBuf = strBuf.psz;
1213 }
1214 }
1215 }
1216 else
1217 {
1218 PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1219 + strlen(pcszKey)
1220 + strlen(pszNewParam)
1221 + 5); // 2 * \r\n + null byte
1222 // key not found: append to end of file
1223 sprintf(pszNew, "%s\r\n%s%s\r\n",
1224 *ppszBuf, pcszKey, pszNewParam);
1225 free(*ppszBuf);
1226 *ppszBuf = pszNew;
1227 }
1228
1229 return (prc);
1230}
1231
1232/*
1233 *@@ strhDeleteLine:
1234 * this deletes the line in pszSearchIn which starts with
1235 * the key pszKey. Returns TRUE if the line was found and
1236 * deleted.
1237 *
1238 * This copies within pszSearchIn.
1239 */
1240
1241BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1242 PSZ pszKey) // in: key to find
1243{
1244 BOOL fIsAllUpperCase = FALSE;
1245 PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1246 BOOL brc = FALSE;
1247
1248 if (pKey) {
1249 PSZ pEOL = strhFindEOL(pKey, NULL);
1250 // pEOL now has first end-of-line after the key
1251 if (pEOL)
1252 {
1253 // delete line by overwriting it with
1254 // the next line
1255 strcpy(pKey, pEOL+2);
1256 }
1257 else
1258 {
1259 // EOL not found: we must be at the end of the file
1260 *pKey = '\0';
1261 }
1262 brc = TRUE;
1263 }
1264
1265 return (brc);
1266}
1267
1268/*
1269 *@@ strhBeautifyTitle:
1270 * replaces all line breaks (0xd, 0xa) with spaces.
1271 */
1272
1273BOOL strhBeautifyTitle(PSZ psz)
1274{
1275 BOOL rc = FALSE;
1276 CHAR *p;
1277 while ((p = strchr(psz, 0xa)))
1278 {
1279 *p = ' ';
1280 rc = TRUE;
1281 }
1282 while ((p = strchr(psz, 0xd)))
1283 {
1284 *p = ' ';
1285 rc = TRUE;
1286 }
1287 return (rc);
1288}
1289
1290/*
1291 * strhFindAttribValue:
1292 * searches for pszAttrib in pszSearchIn; if found,
1293 * returns the first character after the "=" char.
1294 * If "=" is not found, a space, \r, and \n are
1295 * also accepted. This function searches without
1296 * respecting case.
1297 *
1298 * <B>Example:</B>
1299 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1300 +
1301 + returns ....................... ^ this address.
1302 *
1303 *@@added V0.9.0 [umoeller]
1304 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1305 */
1306
1307PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1308{
1309 PSZ prc = 0;
1310 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1311 p,
1312 p2;
1313 ULONG cbAttrib = strlen(pszAttrib);
1314
1315 // 1) find space char
1316 while ((p = strchr(pszSearchIn2, ' ')))
1317 {
1318 CHAR c;
1319 p++;
1320 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1321 // now check whether the p+strlen(pszAttrib)
1322 // is a valid end-of-tag character
1323 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1324 && ( (c == ' ')
1325 || (c == '>')
1326 || (c == '=')
1327 || (c == '\r')
1328 || (c == '\n')
1329 || (c == 0)
1330 )
1331 )
1332 {
1333 // yes:
1334 CHAR c2;
1335 p2 = p + cbAttrib;
1336 c2 = *p2;
1337 while ( ( (c2 == ' ')
1338 || (c2 == '=')
1339 || (c2 == '\n')
1340 || (c2 == '\r')
1341 )
1342 && (c2 != 0)
1343 )
1344 c2 = *++p2;
1345 prc = p2;
1346 break; // first while
1347 }
1348 pszSearchIn2++;
1349 }
1350 return (prc);
1351}
1352
1353/*
1354 * strhGetNumAttribValue:
1355 * stores the numerical parameter value of an HTML-style
1356 * tag in *pl.
1357 *
1358 * Returns the address of the tag parameter in the
1359 * search buffer, if found, or NULL.
1360 *
1361 * <B>Example:</B>
1362 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1363 *
1364 * stores 123 in the "l" variable.
1365 *
1366 *@@added V0.9.0 [umoeller]
1367 */
1368
1369PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1370 const char *pszTag, // e.g. "INDEX"
1371 PLONG pl) // out: numerical value
1372{
1373 PSZ pParam;
1374 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1375 sscanf(pParam, "%d", pl);
1376
1377 return (pParam);
1378}
1379
1380/*
1381 * strhGetTextAttr:
1382 * retrieves the attribute value of a textual HTML-style tag
1383 * in a newly allocated buffer, which is returned,
1384 * or NULL if attribute not found.
1385 * If an attribute value is to contain spaces, it
1386 * must be enclosed in quotes.
1387 *
1388 * The offset of the attribute data in pszSearchIn is
1389 * returned in *pulOffset so that you can do multiple
1390 * searches.
1391 *
1392 * This returns a new buffer, which should be free()'d after use.
1393 *
1394 * <B>Example:</B>
1395 + ULONG ulOfs = 0;
1396 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1397 + ............^ ulOfs
1398 *
1399 * returns a new string with the value "blublub" (without
1400 * quotes) and sets ulOfs to 12.
1401 *
1402 *@@added V0.9.0 [umoeller]
1403 */
1404
1405PSZ strhGetTextAttr(const char *pszSearchIn,
1406 const char *pszTag,
1407 PULONG pulOffset) // out: offset where found
1408{
1409 PSZ pParam,
1410 pParam2,
1411 prc = NULL;
1412 ULONG ulCount = 0;
1413 LONG lNestingLevel = 0;
1414
1415 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1416 {
1417 // determine end character to search for: a space
1418 CHAR cEnd = ' ';
1419 if (*pParam == '\"')
1420 {
1421 // or, if the data is enclosed in quotes, a quote
1422 cEnd = '\"';
1423 pParam++;
1424 }
1425
1426 if (pulOffset)
1427 // store the offset
1428 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1429
1430 // now find end of attribute
1431 pParam2 = pParam;
1432 while (*pParam)
1433 {
1434 if (*pParam == cEnd)
1435 // end character found
1436 break;
1437 else if (*pParam == '<')
1438 // yet another opening tag found:
1439 // this is probably some "<" in the attributes
1440 lNestingLevel++;
1441 else if (*pParam == '>')
1442 {
1443 lNestingLevel--;
1444 if (lNestingLevel < 0)
1445 // end of tag found:
1446 break;
1447 }
1448 ulCount++;
1449 pParam++;
1450 }
1451
1452 // copy attribute to new buffer
1453 if (ulCount)
1454 {
1455 prc = (PSZ)malloc(ulCount+1);
1456 memcpy(prc, pParam2, ulCount);
1457 *(prc+ulCount) = 0;
1458 }
1459 }
1460 return (prc);
1461}
1462
1463/*
1464 * strhFindEndOfTag:
1465 * returns a pointer to the ">" char
1466 * which seems to terminate the tag beginning
1467 * after pszBeginOfTag.
1468 *
1469 * If additional "<" chars are found, we look
1470 * for additional ">" characters too.
1471 *
1472 * Note: You must pass the address of the opening
1473 * '<' character to this function.
1474 *
1475 * Example:
1476 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1477 + strhFindEndOfTag(pszTest)
1478 + returns.................................^ this.
1479 *
1480 *@@added V0.9.0 [umoeller]
1481 */
1482
1483PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1484{
1485 PSZ p = (PSZ)pszBeginOfTag,
1486 prc = NULL;
1487 LONG lNestingLevel = 0;
1488
1489 while (*p)
1490 {
1491 if (*p == '<')
1492 // another opening tag found:
1493 lNestingLevel++;
1494 else if (*p == '>')
1495 {
1496 // closing tag found:
1497 lNestingLevel--;
1498 if (lNestingLevel < 1)
1499 {
1500 // corresponding: return this
1501 prc = p;
1502 break;
1503 }
1504 }
1505 p++;
1506 }
1507
1508 return (prc);
1509}
1510
1511/*
1512 * strhGetBlock:
1513 * this complex function searches the given string
1514 * for a pair of opening/closing HTML-style tags.
1515 *
1516 * If found, this routine returns TRUE and does
1517 * the following:
1518 *
1519 * 1) allocate a new buffer, copy the text
1520 * enclosed by the opening/closing tags
1521 * into it and set *ppszBlock to that
1522 * buffer;
1523 *
1524 * 2) if the opening tag has any attributes,
1525 * allocate another buffer, copy the
1526 * attributes into it and set *ppszAttrs
1527 * to that buffer; if no attributes are
1528 * found, *ppszAttrs will be NULL;
1529 *
1530 * 3) set *pulOffset to the offset from the
1531 * beginning of *ppszSearchIn where the
1532 * opening tag was found;
1533 *
1534 * 4) advance *ppszSearchIn to after the
1535 * closing tag, so that you can do
1536 * multiple searches without finding the
1537 * same tags twice.
1538 *
1539 * All buffers should be freed using free().
1540 *
1541 * This returns the following:
1542 * -- 0: no error
1543 * -- 1: tag not found at all (doesn't have to be an error)
1544 * -- 2: begin tag found, but no corresponding end tag found. This
1545 * is a real error.
1546 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1547 *
1548 * <B>Example:</B>
1549 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1550 + PSZ pszBlock, pszAttrs;
1551 + ULONG ulOfs;
1552 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1553 *
1554 * would do the following:
1555 *
1556 * 1) set pszBlock to a new string containing "This is page 1."
1557 * without quotes;
1558 *
1559 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1560 *
1561 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1562 *
1563 * 4) pSearch would be advanced to point to the "More text"
1564 * string in the original buffer.
1565 *
1566 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1567 * for HTML parsing.
1568 *
1569 *@@added V0.9.0 [umoeller]
1570 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1571 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1572 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1573 */
1574
1575ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1576 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1577 PSZ pszTag,
1578 PSZ *ppszBlock, // out: block enclosed by the tags
1579 PSZ *ppszAttribs, // out: attributes of the opening tag
1580 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1581 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1582{
1583 ULONG ulrc = 1;
1584 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1585 pszSearch2 = pszBeginTag,
1586 pszClosingTag;
1587 ULONG cbTag = strlen(pszTag);
1588
1589 // go thru the block and check all tags if it's the
1590 // begin tag we're looking for
1591 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1592 {
1593 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1594 // yes: stop
1595 break;
1596 else
1597 pszBeginTag++;
1598 }
1599
1600 if (pszBeginTag)
1601 {
1602 // we found <TAG>:
1603 ULONG ulNestingLevel = 0;
1604
1605 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1606 // strchr(pszBeginTag, '>');
1607 if (pszEndOfBeginTag)
1608 {
1609 // does the caller want the attributes?
1610 if (ppszAttribs)
1611 {
1612 // yes: then copy them
1613 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1614 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1615 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1616 // add terminating 0
1617 *(pszAttrs + ulAttrLen) = 0;
1618
1619 *ppszAttribs = pszAttrs;
1620 }
1621
1622 // output offset of where we found the begin tag
1623 if (pulOfsBeginTag)
1624 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1625
1626 // now find corresponding closing tag (e.g. "</BODY>"
1627 pszBeginTag = pszEndOfBeginTag+1;
1628 // now we're behind the '>' char of the opening tag
1629 // increase offset of that too
1630 if (pulOfsBeginBlock)
1631 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1632
1633 // find next closing tag;
1634 // for the first run, pszSearch2 points to right
1635 // after the '>' char of the opening tag
1636 pszSearch2 = pszBeginTag;
1637 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1638 && (pszClosingTag = strstr(pszSearch2, "<"))
1639 )
1640 {
1641 // if we have another opening tag before our closing
1642 // tag, we need to have several closing tags before
1643 // we're done
1644 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1645 ulNestingLevel++;
1646 else
1647 {
1648 // is this ours?
1649 if ( (*(pszClosingTag+1) == '/')
1650 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1651 )
1652 {
1653 // we've found a matching closing tag; is
1654 // it ours?
1655 if (ulNestingLevel == 0)
1656 {
1657 // our closing tag found:
1658 // allocate mem for a new buffer
1659 // and extract all the text between
1660 // open and closing tags to it
1661 ULONG ulLen = pszClosingTag - pszBeginTag;
1662 if (ppszBlock)
1663 {
1664 PSZ pNew = (PSZ)malloc(ulLen + 1);
1665 strhncpy0(pNew, pszBeginTag, ulLen);
1666 *ppszBlock = pNew;
1667 }
1668
1669 // raise search offset to after the closing tag
1670 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1671
1672 ulrc = 0;
1673
1674 break;
1675 } else
1676 // not our closing tag:
1677 ulNestingLevel--;
1678 }
1679 }
1680 // no matching closing tag: search on after that
1681 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1682 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1683
1684 if (!pszClosingTag)
1685 // no matching closing tag found:
1686 // return 2 (closing tag not found)
1687 ulrc = 2;
1688 } // end if (pszBeginTag)
1689 else
1690 // no matching ">" for opening tag found:
1691 ulrc = 3;
1692 }
1693
1694 return (ulrc);
1695}
1696
1697/* ******************************************************************
1698 * *
1699 * Miscellaneous *
1700 * *
1701 ********************************************************************/
1702
1703/*
1704 *@@ strhArrayAppend:
1705 * this appends a string to a "string array".
1706 *
1707 * A string array is considered a sequence of
1708 * zero-terminated strings in memory. That is,
1709 * after each string's null-byte, the next
1710 * string comes up.
1711 *
1712 * This is useful for composing a single block
1713 * of memory from, say, list box entries, which
1714 * can then be written to OS2.INI in one flush.
1715 *
1716 * To append strings to such an array, call this
1717 * function for each string you wish to append.
1718 * This will re-allocate *ppszRoot with each call,
1719 * and update *pcbRoot, which then contains the
1720 * total size of all strings (including all null
1721 * terminators).
1722 *
1723 * Pass *pcbRoot to PrfSaveProfileData to have the
1724 * block saved.
1725 *
1726 * Note: On the first call, *ppszRoot and *pcbRoot
1727 * _must_ be both NULL, or this crashes.
1728 */
1729
1730VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1731 PSZ pszNew, // in: string to append
1732 PULONG pcbRoot) // in/out: size of array
1733{
1734 ULONG cbNew = strlen(pszNew);
1735 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1736 + cbNew
1737 + 1); // two null bytes
1738 if (*ppszRoot)
1739 {
1740 // not first loop: copy old stuff
1741 memcpy(pszTemp,
1742 *ppszRoot,
1743 *pcbRoot);
1744 free(*ppszRoot);
1745 }
1746 // append new string
1747 strcpy(pszTemp + *pcbRoot,
1748 pszNew);
1749 // update root
1750 *ppszRoot = pszTemp;
1751 // update length
1752 *pcbRoot += cbNew + 1;
1753}
1754
1755/*
1756 *@@ strhCreateDump:
1757 * this dumps a memory block into a string
1758 * and returns that string in a new buffer.
1759 *
1760 * You must free() the returned PSZ after use.
1761 *
1762 * The output looks like the following:
1763 *
1764 + 0000: FE FF 0E 02 90 00 00 00 ........
1765 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1766 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1767 *
1768 * Each line is terminated with a newline (\n)
1769 * character only.
1770 *
1771 *@@added V0.9.1 (2000-01-22) [umoeller]
1772 */
1773
1774PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1775 ULONG ulSize, // in: size of buffer
1776 ULONG ulIndent) // in: indentation of every line
1777{
1778 PSZ pszReturn = 0;
1779 XSTRING strReturn;
1780 CHAR szTemp[1000];
1781
1782 PBYTE pbCurrent = pb; // current byte
1783 ULONG ulCount = 0,
1784 ulCharsInLine = 0; // if this grows > 7, a new line is started
1785 CHAR szLine[400] = "",
1786 szAscii[30] = " "; // ASCII representation; filled for every line
1787 PSZ pszLine = szLine,
1788 pszAscii = szAscii;
1789
1790 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1791
1792 for (pbCurrent = pb;
1793 ulCount < ulSize;
1794 pbCurrent++, ulCount++)
1795 {
1796 if (ulCharsInLine == 0)
1797 {
1798 memset(szLine, ' ', ulIndent);
1799 pszLine += ulIndent;
1800 }
1801 pszLine += sprintf(pszLine, "%02lX ", *pbCurrent);
1802
1803 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1804 // printable character:
1805 *pszAscii = *pbCurrent;
1806 else
1807 *pszAscii = '.';
1808 pszAscii++;
1809
1810 ulCharsInLine++;
1811 if ( (ulCharsInLine > 7) // 8 bytes added?
1812 || (ulCount == ulSize-1) // end of buffer reached?
1813 )
1814 {
1815 // if we haven't had eight bytes yet,
1816 // fill buffer up to eight bytes with spaces
1817 ULONG ul2;
1818 for (ul2 = ulCharsInLine;
1819 ul2 < 8;
1820 ul2++)
1821 pszLine += sprintf(pszLine, " ");
1822
1823 sprintf(szTemp, "%04lX: %s %s\n",
1824 (ulCount & 0xFFFFFFF8), // offset in hex
1825 szLine, // bytes string
1826 szAscii); // ASCII string
1827 xstrcat(&strReturn, szTemp);
1828
1829 // restart line buffer
1830 pszLine = szLine;
1831
1832 // clear ASCII buffer
1833 strcpy(szAscii, " ");
1834 pszAscii = szAscii;
1835
1836 // reset line counter
1837 ulCharsInLine = 0;
1838 }
1839 }
1840
1841 if (strReturn.cbAllocated)
1842 pszReturn = strReturn.psz;
1843
1844 return (pszReturn);
1845}
1846
1847/* ******************************************************************
1848 * *
1849 * Wildcard matching *
1850 * *
1851 ********************************************************************/
1852
1853/*
1854 * The following code has been taken from "fnmatch.zip".
1855 *
1856 * (c) 1994-1996 by Eberhard Mattes.
1857 */
1858
1859/* In OS/2 and DOS styles, both / and \ separate components of a path.
1860 * This macro returns true iff C is a separator. */
1861
1862#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1863
1864
1865/* This macro returns true if C is at the end of a component of a
1866 * path. */
1867
1868#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1869
1870/*
1871 *@@ skip_comp_os2:
1872 * Return a pointer to the next component of the path SRC, for OS/2
1873 * and DOS styles. When the end of the string is reached, a pointer
1874 * to the terminating null character is returned.
1875 *
1876 * (c) 1994-1996 by Eberhard Mattes.
1877 */
1878
1879static const unsigned char* skip_comp_os2(const unsigned char *src)
1880{
1881 /* Skip characters until hitting a separator or the end of the
1882 * string. */
1883
1884 while (!IS_OS2_COMP_END(*src))
1885 ++src;
1886
1887 /* Skip the separator if we hit a separator. */
1888
1889 if (*src != 0)
1890 ++src;
1891 return src;
1892}
1893
1894/*
1895 * has_colon:
1896 * returns true iff the path P contains a colon.
1897 *
1898 * (c) 1994-1996 by Eberhard Mattes.
1899 */
1900
1901static int has_colon(const unsigned char *p)
1902{
1903 while (*p != 0)
1904 if (*p == ':')
1905 return 1;
1906 else
1907 ++p;
1908 return 0;
1909}
1910
1911/*
1912 * match_comp_os2:
1913 * Compare a single component (directory name or file name) of the
1914 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1915 * component of the wildcard and the name to be checked, respectively.
1916 * Comparing stops at the next separator. The FLAGS argument is the
1917 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1918 * current component of NAME. The number of dots is not restricted,
1919 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1920 * Note that this function is recursive.
1921 *
1922 * (c) 1994-1996 by Eberhard Mattes.
1923 */
1924
1925static int match_comp_os2(const unsigned char *mask,
1926 const unsigned char *name,
1927 unsigned flags,
1928 int has_dot)
1929{
1930 int rc;
1931
1932 for (;;)
1933 switch (*mask)
1934 {
1935 case 0:
1936
1937 /* There must be no extra characters at the end of NAME when
1938 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1939 * in that case, NAME may point to a separator. */
1940
1941 if (*name == 0)
1942 return FNM_MATCH;
1943 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1944 return FNM_MATCH;
1945 return FNM_NOMATCH;
1946
1947 case '/':
1948 case '\\':
1949
1950 /* Separators match separators. */
1951
1952 if (IS_OS2_COMP_SEP(*name))
1953 return FNM_MATCH;
1954
1955 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1956 * is ignored at the end of NAME. */
1957
1958 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1959 return FNM_MATCH;
1960
1961 /* Stop comparing at the separator. */
1962
1963 return FNM_NOMATCH;
1964
1965 case '?':
1966
1967 /* A question mark matches one character. It does not match
1968 * a dot. At the end of the component (and before a dot),
1969 * it also matches zero characters. */
1970
1971 if (*name != '.' && !IS_OS2_COMP_END(*name))
1972 ++name;
1973 ++mask;
1974 break;
1975
1976 case '*':
1977
1978 /* An asterisk matches zero or more characters. In DOS
1979 * mode, dots are not matched. */
1980
1981 do
1982 {
1983 ++mask;
1984 }
1985 while (*mask == '*');
1986 for (;;)
1987 {
1988 rc = match_comp_os2(mask, name, flags, has_dot);
1989 if (rc != FNM_NOMATCH)
1990 return rc;
1991 if (IS_OS2_COMP_END(*name))
1992 return FNM_NOMATCH;
1993 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1994 return FNM_NOMATCH;
1995 ++name;
1996 }
1997
1998 case '.':
1999
2000 /* A dot matches a dot. It also matches the implicit dot at
2001 * the end of a dot-less NAME. */
2002
2003 ++mask;
2004 if (*name == '.')
2005 ++name;
2006 else if (has_dot || !IS_OS2_COMP_END(*name))
2007 return FNM_NOMATCH;
2008 break;
2009
2010 default:
2011
2012 /* All other characters match themselves. */
2013
2014 if (flags & _FNM_IGNORECASE)
2015 {
2016 if (tolower(*mask) != tolower(*name))
2017 return FNM_NOMATCH;
2018 }
2019 else
2020 {
2021 if (*mask != *name)
2022 return FNM_NOMATCH;
2023 }
2024 ++mask;
2025 ++name;
2026 break;
2027 }
2028}
2029
2030/*
2031 * match_comp:
2032 * compare a single component (directory name or file name) of the
2033 * paths, for all styles which need component-by-component matching.
2034 * MASK and NAME point to the start of a component of the wildcard and
2035 * the name to be checked, respectively. Comparing stops at the next
2036 * separator. The FLAGS argument is the same as that of fnmatch().
2037 * Return FNM_MATCH iff MASK and NAME match.
2038 *
2039 * (c) 1994-1996 by Eberhard Mattes.
2040 */
2041
2042static int match_comp(const unsigned char *mask,
2043 const unsigned char *name,
2044 unsigned flags)
2045{
2046 const unsigned char *s;
2047
2048 switch (flags & _FNM_STYLE_MASK)
2049 {
2050 case _FNM_OS2:
2051 case _FNM_DOS:
2052
2053 /* For OS/2 and DOS styles, we add an implicit dot at the end of
2054 * the component if the component doesn't include a dot. */
2055
2056 s = name;
2057 while (!IS_OS2_COMP_END(*s) && *s != '.')
2058 ++s;
2059 return match_comp_os2(mask, name, flags, *s == '.');
2060
2061 default:
2062 return FNM_ERR;
2063 }
2064}
2065
2066/* In Unix styles, / separates components of a path. This macro
2067 * returns true iff C is a separator. */
2068
2069#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2070
2071
2072/* This macro returns true if C is at the end of a component of a
2073 * path. */
2074
2075#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
2076
2077/*
2078 * match_unix:
2079 * match complete paths for Unix styles. The FLAGS argument is the
2080 * same as that of fnmatch(). COMP points to the start of the current
2081 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2082 * backslash character is used for escaping ? and * unless
2083 * FNM_NOESCAPE is set.
2084 *
2085 * (c) 1994-1996 by Eberhard Mattes.
2086 */
2087
2088static int match_unix(const unsigned char *mask,
2089 const unsigned char *name,
2090 unsigned flags,
2091 const unsigned char *comp)
2092{
2093 unsigned char c1, c2;
2094 char invert, matched;
2095 const unsigned char *start;
2096 int rc;
2097
2098 for (;;)
2099 switch (*mask)
2100 {
2101 case 0:
2102
2103 /* There must be no extra characters at the end of NAME when
2104 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
2105 * in that case, NAME may point to a separator. */
2106
2107 if (*name == 0)
2108 return FNM_MATCH;
2109 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2110 return FNM_MATCH;
2111 return FNM_NOMATCH;
2112
2113 case '?':
2114
2115 /* A question mark matches one character. It does not match
2116 * the component separator if FNM_PATHNAME is set. It does
2117 * not match a dot at the start of a component if FNM_PERIOD
2118 * is set. */
2119
2120 if (*name == 0)
2121 return FNM_NOMATCH;
2122 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2123 return FNM_NOMATCH;
2124 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2125 return FNM_NOMATCH;
2126 ++mask;
2127 ++name;
2128 break;
2129
2130 case '*':
2131
2132 /* An asterisk matches zero or more characters. It does not
2133 * match the component separator if FNM_PATHNAME is set. It
2134 * does not match a dot at the start of a component if
2135 * FNM_PERIOD is set. */
2136
2137 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2138 return FNM_NOMATCH;
2139 do
2140 {
2141 ++mask;
2142 }
2143 while (*mask == '*');
2144 for (;;)
2145 {
2146 rc = match_unix(mask, name, flags, comp);
2147 if (rc != FNM_NOMATCH)
2148 return rc;
2149 if (*name == 0)
2150 return FNM_NOMATCH;
2151 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2152 return FNM_NOMATCH;
2153 ++name;
2154 }
2155
2156 case '/':
2157
2158 /* Separators match only separators. If _FNM_PATHPREFIX is
2159 * set, a trailing separator in MASK is ignored at the end
2160 * of NAME. */
2161
2162 if (!(IS_UNIX_COMP_SEP(*name)
2163 || ((flags & _FNM_PATHPREFIX) && *name == 0
2164 && (mask[1] == 0
2165 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2166 && mask[2] == 0)))))
2167 return FNM_NOMATCH;
2168
2169 ++mask;
2170 if (*name != 0)
2171 ++name;
2172
2173 /* This is the beginning of a new component if FNM_PATHNAME
2174 * is set. */
2175
2176 if (flags & FNM_PATHNAME)
2177 comp = name;
2178 break;
2179
2180 case '[':
2181
2182 /* A set of characters. Always case-sensitive. */
2183
2184 if (*name == 0)
2185 return FNM_NOMATCH;
2186 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2187 return FNM_NOMATCH;
2188 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2189 return FNM_NOMATCH;
2190
2191 invert = 0;
2192 matched = 0;
2193 ++mask;
2194
2195 /* If the first character is a ! or ^, the set matches all
2196 * characters not listed in the set. */
2197
2198 if (*mask == '!' || *mask == '^')
2199 {
2200 ++mask;
2201 invert = 1;
2202 }
2203
2204 /* Loop over all the characters of the set. The loop ends
2205 * if the end of the string is reached or if a ] is
2206 * encountered unless it directly follows the initial [ or
2207 * [-. */
2208
2209 start = mask;
2210 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2211 {
2212 /* Get the next character which is optionally preceded
2213 * by a backslash. */
2214
2215 c1 = *mask++;
2216 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2217 {
2218 if (*mask == 0)
2219 break;
2220 c1 = *mask++;
2221 }
2222
2223 /* Ranges of characters are written as a-z. Don't
2224 * forget to check for the end of the string and to
2225 * handle the backslash. If the character after - is a
2226 * ], it isn't a range. */
2227
2228 if (*mask == '-' && mask[1] != ']')
2229 {
2230 ++mask; /* Skip the - character */
2231 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2232 ++mask;
2233 if (*mask == 0)
2234 break;
2235 c2 = *mask++;
2236 }
2237 else
2238 c2 = c1;
2239
2240 /* Now check whether this character or range matches NAME. */
2241
2242 if (c1 <= *name && *name <= c2)
2243 matched = 1;
2244 }
2245
2246 /* If the end of the string is reached before a ] is found,
2247 * back up to the [ and compare it to NAME. */
2248
2249 if (*mask == 0)
2250 {
2251 if (*name != '[')
2252 return FNM_NOMATCH;
2253 ++name;
2254 mask = start;
2255 if (invert)
2256 --mask;
2257 }
2258 else
2259 {
2260 if (invert)
2261 matched = !matched;
2262 if (!matched)
2263 return FNM_NOMATCH;
2264 ++mask; /* Skip the ] character */
2265 if (*name != 0)
2266 ++name;
2267 }
2268 break;
2269
2270 case '\\':
2271 ++mask;
2272 if (flags & FNM_NOESCAPE)
2273 {
2274 if (*name != '\\')
2275 return FNM_NOMATCH;
2276 ++name;
2277 }
2278 else if (*mask == '*' || *mask == '?')
2279 {
2280 if (*mask != *name)
2281 return FNM_NOMATCH;
2282 ++mask;
2283 ++name;
2284 }
2285 break;
2286
2287 default:
2288
2289 /* All other characters match themselves. */
2290
2291 if (flags & _FNM_IGNORECASE)
2292 {
2293 if (tolower(*mask) != tolower(*name))
2294 return FNM_NOMATCH;
2295 }
2296 else
2297 {
2298 if (*mask != *name)
2299 return FNM_NOMATCH;
2300 }
2301 ++mask;
2302 ++name;
2303 break;
2304 }
2305}
2306
2307/*
2308 * _fnmatch_unsigned:
2309 * Check whether the path name NAME matches the wildcard MASK.
2310 *
2311 * Return:
2312 * -- 0 (FNM_MATCH) if it matches,
2313 * -- _FNM_NOMATCH if it doesn't,
2314 * -- FNM_ERR on error.
2315 *
2316 * The operation of this function is controlled by FLAGS.
2317 * This is an internal function, with unsigned arguments.
2318 *
2319 * (c) 1994-1996 by Eberhard Mattes.
2320 */
2321
2322static int _fnmatch_unsigned(const unsigned char *mask,
2323 const unsigned char *name,
2324 unsigned flags)
2325{
2326 int m_drive, n_drive,
2327 rc;
2328
2329 /* Match and skip the drive name if present. */
2330
2331 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2332 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2333
2334 if (m_drive != n_drive)
2335 {
2336 if (m_drive == -1 || n_drive == -1)
2337 return FNM_NOMATCH;
2338 if (!(flags & _FNM_IGNORECASE))
2339 return FNM_NOMATCH;
2340 if (tolower(m_drive) != tolower(n_drive))
2341 return FNM_NOMATCH;
2342 }
2343
2344 if (m_drive != -1)
2345 mask += 2;
2346 if (n_drive != -1)
2347 name += 2;
2348
2349 /* Colons are not allowed in path names, except for the drive name,
2350 * which was skipped above. */
2351
2352 if (has_colon(mask) || has_colon(name))
2353 return FNM_ERR;
2354
2355 /* The name "\\server\path" should not be matched by mask
2356 * "\*\server\path". Ditto for /. */
2357
2358 switch (flags & _FNM_STYLE_MASK)
2359 {
2360 case _FNM_OS2:
2361 case _FNM_DOS:
2362
2363 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2364 {
2365 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2366 return FNM_NOMATCH;
2367 name += 2;
2368 mask += 2;
2369 }
2370 break;
2371
2372 case _FNM_POSIX:
2373
2374 if (name[0] == '/' && name[1] == '/')
2375 {
2376 int i;
2377
2378 name += 2;
2379 for (i = 0; i < 2; ++i)
2380 if (mask[0] == '/')
2381 ++mask;
2382 else if (mask[0] == '\\' && mask[1] == '/')
2383 mask += 2;
2384 else
2385 return FNM_NOMATCH;
2386 }
2387
2388 /* In Unix styles, treating ? and * w.r.t. components is simple.
2389 * No need to do matching component by component. */
2390
2391 return match_unix(mask, name, flags, name);
2392 }
2393
2394 /* Now compare all the components of the path name, one by one.
2395 * Note that the path separator must not be enclosed in brackets. */
2396
2397 while (*mask != 0 || *name != 0)
2398 {
2399
2400 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2401 * is reached even if there are components left in NAME. */
2402
2403 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2404 return FNM_MATCH;
2405
2406 /* Compare a single component of the path name. */
2407
2408 rc = match_comp(mask, name, flags);
2409 if (rc != FNM_MATCH)
2410 return rc;
2411
2412 /* Skip to the next component or to the end of the path name. */
2413
2414 mask = skip_comp_os2(mask);
2415 name = skip_comp_os2(name);
2416 }
2417
2418 /* If we reached the ends of both strings, the names match. */
2419
2420 if (*mask == 0 && *name == 0)
2421 return FNM_MATCH;
2422
2423 /* The names do not match. */
2424
2425 return FNM_NOMATCH;
2426}
2427
2428/*
2429 *@@ strhMatchOS2:
2430 * this matches wildcards, similar to what DosEditName does.
2431 * However, this does not require a file to be present, but
2432 * works on strings only.
2433 */
2434
2435BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2436 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2437{
2438 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2439 pcszName,
2440 _FNM_OS2 | _FNM_IGNORECASE)
2441 == FNM_MATCH)
2442 );
2443}
2444
2445/* ******************************************************************
2446 * *
2447 * Fast string searches *
2448 * *
2449 ********************************************************************/
2450
2451#define ASSERT(a)
2452
2453/*
2454 * The following code has been taken from the "Standard
2455 * Function Library", file sflfind.c, and only slightly
2456 * modified to conform to the rest of this file.
2457 *
2458 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2459 * Revised: 98/05/04
2460 *
2461 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2462 *
2463 * The SFL Licence allows incorporating SFL code into other
2464 * programs, as long as the copyright is reprinted and the
2465 * code is marked as modified, so this is what we do.
2466 */
2467
2468/*
2469 *@@ strhfind:
2470 * searches for a pattern in a string using the Boyer-Moore-
2471 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2472 * strings. Returns a pointer to the pattern if found within the string,
2473 * or NULL if the pattern was not found. If you repeatedly scan for the
2474 * same pattern, use the repeat_find argument. If this is TRUE, the
2475 * function does not re-parse the pattern. You must of course call the
2476 * function with repeat_find equal to FALSE the first time. This function
2477 * is meant to handle character data, and is most effective when you work
2478 * with large strings. To search binary data use strhmemfind(). Will not work
2479 * on multibyte characters.
2480 *
2481 * Examples:
2482 + char *result;
2483 +
2484 + result = strhfind ("abracadabra", "cad", FALSE);
2485 + if (result)
2486 + puts (result);
2487 +
2488 * Taken from the "Standard Function Library", file sflfind.c.
2489 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2490 * Slightly modified.
2491 *
2492 *@@added V0.9.3 (2000-05-08) [umoeller]
2493 */
2494
2495char* strhfind (const char *string, // String containing data
2496 const char *pattern, // Pattern to search for
2497 BOOL repeat_find) // Same pattern as last time
2498{
2499 static size_t
2500 searchbuf [256]; // Fixed search buffer
2501
2502 ASSERT (string); // Expect non-NULL pointers, but
2503 ASSERT (pattern); // fall through if not debugging
2504
2505 return (char *) strhmemfind_rb (string, strlen (string),
2506 pattern, strlen (pattern),
2507 searchbuf, &repeat_find);
2508}
2509
2510/*
2511 *@@ strhfind_r:
2512 * searches for a pattern in a string using the Boyer-Moore-
2513 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2514 * strings. Returns a pointer to the pattern if found within the string,
2515 * or NULL if the pattern was not found. This function is meant to handle
2516 * character data, and is most effective when you work with large strings.
2517 * To search binary data use strhmemfind(). Will not work on multibyte
2518 * characters. Reentrant.
2519 *
2520 * Examples:
2521 + char *result;
2522 +
2523 + result = strhfind_r ("abracadabra", "cad");
2524 + if (result)
2525 + puts (result);
2526 *
2527 * Taken from the "Standard Function Library", file sflfind.c.
2528 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2529 * Slightly modified.
2530 *
2531 *@@added V0.9.3 (2000-05-08) [umoeller]
2532 */
2533
2534char* strhfind_r (const char *string, // String containing data
2535 const char *pattern) // Pattern to search for
2536{
2537 size_t
2538 searchbuf [256]; // One-time search buffer
2539 BOOL
2540 secondtime = FALSE; // Search buffer init needed
2541
2542 ASSERT (string); // Expect non-NULL pointers, but
2543 ASSERT (pattern); // fall through if not debugging
2544
2545 return (char *) strhmemfind_rb (string, strlen (string),
2546 pattern, strlen (pattern),
2547 searchbuf, &secondtime);
2548}
2549
2550/*
2551 *@@ strhfind_rb:
2552 * searches for a pattern in a string using the Boyer-Moore-
2553 * Horspool-Sunday algorithm. The string and pattern are null-terminated
2554 * strings. Returns a pointer to the pattern if found within the string,
2555 * or NULL if the pattern was not found. Supports more efficient repeat
2556 * searches (for the same pattern), through a supplied search buffer. The
2557 * search buffer must be long enough to contain 256 (2**8) size_t entries.
2558 * On the first call repeat_find must be set to FALSE. After the search
2559 * buffer has been initialised, repeat_find will be set to TRUE by the
2560 * function, avoiding the search buffer initialisation on later calls.
2561 *
2562 * This function is most effective when repeated searches are made for
2563 * the same pattern in one or more strings. This function is meant to
2564 * handle character data, and is most effective when you work with
2565 * large strings. To search binary data use strhmemfind(). Will not work
2566 * on multibyte characters. Reentrant.
2567 *
2568 * Examples:
2569 + char *result;
2570 + BOOL repeat_search = FALSE;
2571 + size_t searchbuf[256];
2572 +
2573 + result = strhfind_rb ("abracadabra", "cad", searchbuf, &repeat_search);
2574 + if (result)
2575 + {
2576 + puts (result);
2577 + result = strhfind_rb ("cad/cam", "cad", searchbuf, &repeat_search);
2578 + if (result)
2579 + puts (result);
2580 + }
2581 *
2582 * Taken from the "Standard Function Library", file sflfind.c.
2583 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2584 * Slightly modified.
2585 *
2586 *@@added V0.9.3 (2000-05-08) [umoeller]
2587 */
2588
2589char* strhfind_rb (const char *string, // String containing data
2590 const char *pattern, // Pattern to search for
2591 size_t *shift, // Working buffer between searches
2592 BOOL *repeat_find) // Flag for first/later search
2593{
2594 ASSERT (string); // Expect non-NULL pointers, but
2595 ASSERT (pattern); // fall through if not debugging
2596 ASSERT (shift);
2597 ASSERT (repeat_find);
2598
2599 return (char *) strhmemfind_rb (string, strlen (string),
2600 pattern, strlen (pattern),
2601 shift, repeat_find);
2602}
2603
2604/*
2605 *@@ strhmemfind:
2606 * searches for a pattern in a block of memory using the Boyer-
2607 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2608 * values; you must explicitly provide their lengths. Returns a pointer to
2609 * the pattern if found within the block, or NULL if the pattern was not
2610 * found. If you repeatedly scan for the same pattern, use the repeat_find
2611 * argument. If this is TRUE, the function does not re-parse the pattern.
2612 * This function is meant to handle binary data. If you need to search
2613 * strings, use the strhfind_r or strhfind_rb() functions. Non-Reentrant.
2614 *
2615 * Taken from the "Standard Function Library", file sflfind.c.
2616 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2617 * Slightly modified.
2618 *
2619 *@@added V0.9.3 (2000-05-08) [umoeller]
2620 */
2621
2622void* strhmemfind (const void *block, // Block containing data
2623 size_t block_size, // Size of block in bytes
2624 const void *pattern, // Pattern to search for
2625 size_t pattern_size, // Size of pattern block
2626 BOOL repeat_find) // Same pattern as last time
2627{
2628 static size_t
2629 searchbuf [256]; // Static shared search buffer
2630
2631 ASSERT (block); // Expect non-NULL pointers, but
2632 ASSERT (pattern); // full through if not debugging
2633
2634 return strhmemfind_rb (block, block_size, pattern, pattern_size,
2635 searchbuf, &repeat_find);
2636}
2637
2638/*
2639 *@@ strhmemfind_r:
2640 * searches for a pattern in a block of memory using the Boyer-
2641 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2642 * values; you must explicitly provide their lengths. Returns a pointer to
2643 * the pattern if found within the block, or NULL if the pattern was not
2644 * found.
2645 *
2646 * This function is meant to handle binary data, for a single search for
2647 * a given pattern. If you need to search strings, use the strhfind_r()
2648 * or strhfind_rb() functions. If you want to do efficient repeated searches
2649 * for one pattern, use strhmemfind_rb(). Reentrant.
2650 *
2651 * Taken from the "Standard Function Library", file sflfind.c.
2652 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2653 * Slightly modified.
2654 *
2655 *@@added V0.9.3 (2000-05-08) [umoeller]
2656 */
2657
2658void* strhmemfind_r (const void *block, // Block containing data
2659 size_t block_size, // Size of block in bytes
2660 const void *pattern, // Pattern to search for
2661 size_t pattern_size) // Size of pattern block
2662{
2663 size_t
2664 searchbuf [256]; // One-time search buffer
2665 BOOL
2666 secondtime = FALSE;
2667
2668 ASSERT (block); // Expect non-NULL pointers, but
2669 ASSERT (pattern); // full through if not debugging
2670
2671 return strhmemfind_rb (block, block_size, pattern, pattern_size,
2672 searchbuf, &secondtime);
2673}
2674
2675/*
2676 *@@ strhmemfind_rb:
2677 * searches for a pattern in a block of memory using the Boyer-
2678 * Moore-Horspool-Sunday algorithm. The block and pattern may contain any
2679 * values; you must explicitly provide their lengths. Returns a pointer to
2680 * the pattern if found within the block, or NULL if the pattern was not
2681 * found. On the first search with a given pattern, *repeat_find should
2682 * be FALSE. It will be set to TRUE after the shift table is initialised,
2683 * allowing the initialisation phase to be skipped on subsequent searches.
2684 * shift must point to an array big enough to hold 256 (8**2) size_t values.
2685 *
2686 * This function is meant to handle binary data, for repeated searches
2687 * for the same pattern. If you need to search strings, use the
2688 * strhfind_r() or strhfind_rb() functions. If you wish to search for a
2689 * pattern only once consider using strhmemfind_r(). Reentrant.
2690 *
2691 * Taken from the "Standard Function Library", file sflfind.c.
2692 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2693 * Slightly modified.
2694 *
2695 *@@added V0.9.3 (2000-05-08) [umoeller]
2696 */
2697
2698void* strhmemfind_rb (const void *in_block, // Block containing data
2699 size_t block_size, // Size of block in bytes
2700 const void *in_pattern, // Pattern to search for
2701 size_t pattern_size, // Size of pattern block
2702 size_t *shift, // Shift table (search buffer)
2703 BOOL *repeat_find) // TRUE: search buffer already init
2704{
2705 size_t
2706 byte_nbr, // Distance through block
2707 match_size; // Size of matched part
2708 const unsigned char
2709 *match_base = NULL, // Base of match of pattern
2710 *match_ptr = NULL, // Point within current match
2711 *limit = NULL; // Last potiental match point
2712 const unsigned char
2713 *block = (unsigned char *) in_block, // Concrete pointer to block data
2714 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2715
2716 ASSERT (block); // Expect non-NULL pointers, but
2717 ASSERT (pattern); // fail gracefully if not debugging
2718 ASSERT (shift); // NULL repeat_find => is false
2719 if (block == NULL || pattern == NULL || shift == NULL)
2720 return (NULL);
2721
2722 // Pattern must be smaller or equal in size to string
2723 if (block_size < pattern_size)
2724 return (NULL); // Otherwise it's not found
2725
2726 if (pattern_size == 0) // Empty patterns match at start
2727 return ((void *)block);
2728
2729 // Build the shift table unless we're continuing a previous search
2730
2731 // The shift table determines how far to shift before trying to match
2732 // again, if a match at this point fails. If the byte after where the
2733 // end of our pattern falls is not in our pattern, then we start to
2734 // match again after that byte; otherwise we line up the last occurence
2735 // of that byte in our pattern under that byte, and try match again.
2736
2737 if (!repeat_find || !*repeat_find)
2738 {
2739 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2740 shift [byte_nbr] = pattern_size + 1;
2741 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2742 shift [(unsigned char) pattern [byte_nbr]] = pattern_size - byte_nbr;
2743
2744 if (repeat_find)
2745 *repeat_find = TRUE;
2746 }
2747
2748 // Search for the block, each time jumping up by the amount
2749 // computed in the shift table
2750
2751 limit = block + (block_size - pattern_size + 1);
2752 ASSERT (limit > block);
2753
2754 for (match_base = block;
2755 match_base < limit;
2756 match_base += shift [*(match_base + pattern_size)])
2757 {
2758 match_ptr = match_base;
2759 match_size = 0;
2760
2761 // Compare pattern until it all matches, or we find a difference
2762 while (*match_ptr++ == pattern [match_size++])
2763 {
2764 ASSERT (match_size <= pattern_size &&
2765 match_ptr == (match_base + match_size));
2766
2767 // If we found a match, return the start address
2768 if (match_size >= pattern_size)
2769 return ((void*)(match_base));
2770
2771 }
2772 }
2773 return (NULL); // Found nothing
2774}
2775
2776/*
2777 *@@ strhtxtfind:
2778 * searches for a case-insensitive text pattern in a string
2779 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2780 * pattern are null-terminated strings. Returns a pointer to the pattern
2781 * if found within the string, or NULL if the pattern was not found.
2782 * Will match strings irrespective of case. To match exact strings, use
2783 * strhfind(). Will not work on multibyte characters.
2784 *
2785 * Examples:
2786 + char *result;
2787 +
2788 + result = strhtxtfind ("AbracaDabra", "cad");
2789 + if (result)
2790 + puts (result);
2791 +
2792 * Taken from the "Standard Function Library", file sflfind.c.
2793 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2794 * Slightly modified.
2795 *
2796 *@@added V0.9.3 (2000-05-08) [umoeller]
2797 */
2798
2799char* strhtxtfind (const char *string, // String containing data
2800 const char *pattern) // Pattern to search for
2801{
2802 size_t
2803 shift [256]; // Shift distance for each value
2804 size_t
2805 string_size,
2806 pattern_size,
2807 byte_nbr, // Index into byte array
2808 match_size; // Size of matched part
2809 const char
2810 *match_base = NULL, // Base of match of pattern
2811 *match_ptr = NULL, // Point within current match
2812 *limit = NULL; // Last potiental match point
2813
2814 ASSERT (string); // Expect non-NULL pointers, but
2815 ASSERT (pattern); // fail gracefully if not debugging
2816 if (string == NULL || pattern == NULL)
2817 return (NULL);
2818
2819 string_size = strlen (string);
2820 pattern_size = strlen (pattern);
2821
2822 // Pattern must be smaller or equal in size to string
2823 if (string_size < pattern_size)
2824 return (NULL); // Otherwise it cannot be found
2825
2826 if (pattern_size == 0) // Empty string matches at start
2827 return (char *) string;
2828
2829 // Build the shift table
2830
2831 // The shift table determines how far to shift before trying to match
2832 // again, if a match at this point fails. If the byte after where the
2833 // end of our pattern falls is not in our pattern, then we start to
2834 // match again after that byte; otherwise we line up the last occurence
2835 // of that byte in our pattern under that byte, and try match again.
2836
2837 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2838 shift [byte_nbr] = pattern_size + 1;
2839
2840 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2841 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2842
2843 // Search for the string. If we don't find a match, move up by the
2844 // amount we computed in the shift table above, to find location of
2845 // the next potiental match.
2846
2847 limit = string + (string_size - pattern_size + 1);
2848 ASSERT (limit > string);
2849
2850 for (match_base = string;
2851 match_base < limit;
2852 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2853 {
2854 match_ptr = match_base;
2855 match_size = 0;
2856
2857 // Compare pattern until it all matches, or we find a difference
2858 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2859 {
2860 ASSERT (match_size <= pattern_size &&
2861 match_ptr == (match_base + match_size));
2862
2863 // If we found a match, return the start address
2864 if (match_size >= pattern_size)
2865 return ((char *)(match_base));
2866 }
2867 }
2868 return (NULL); // Found nothing
2869}
2870
Note: See TracBrowser for help on using the repository browser.