source: trunk/src/helpers/stringh.c@ 21

Last change on this file since 21 was 21, checked in by umoeller, 25 years ago

Final changes for 0.9.7, i hope...

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 84.5 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#include "helpers\stringh.h"
55#include "helpers\xstring.h" // extended string helpers
56
57#pragma hdrstop
58
59/*
60 *@@category: Helpers\C helpers\String management
61 * See stringh.c and xstring.c.
62 */
63
64/*
65 *@@category: Helpers\C helpers\String management\C string helpers
66 * See stringh.c.
67 */
68
69/*
70 *@@ strhdup:
71 * like strdup, but this one
72 * doesn't crash if pszSource is NULL,
73 * but returns NULL also.
74 *
75 *@@added V0.9.0 [umoeller]
76 */
77
78PSZ strhdup(const char *pszSource)
79{
80 if (pszSource)
81 return (strdup(pszSource));
82 else
83 return (0);
84}
85
86/*
87 *@@ strhistr:
88 * like strstr, but case-insensitive.
89 *
90 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
91 */
92
93PSZ strhistr(const char *string1, const char *string2)
94{
95 PSZ prc = NULL;
96
97 if ((string1) && (string2))
98 {
99 PSZ pszSrchIn = strdup(string1);
100 PSZ pszSrchFor = strdup(string2);
101
102 if ((pszSrchIn) && (pszSrchFor))
103 {
104 strupr(pszSrchIn);
105 strupr(pszSrchFor);
106
107 prc = strstr(pszSrchIn, pszSrchFor);
108 if (prc)
109 {
110 // prc now has the first occurence of the string,
111 // but in pszSrchIn; we need to map this
112 // return value to the original string
113 prc = (prc-pszSrchIn) // offset in pszSrchIn
114 + (PSZ)string1;
115 }
116 }
117 if (pszSrchFor)
118 free(pszSrchFor);
119 if (pszSrchIn)
120 free(pszSrchIn);
121 }
122 return (prc);
123}
124
125/*
126 *@@ strhncpy0:
127 * like strncpy, but always appends a 0 character.
128 */
129
130ULONG strhncpy0(PSZ pszTarget,
131 const char *pszSource,
132 ULONG cbSource)
133{
134 ULONG ul = 0;
135 PSZ pTarget = pszTarget,
136 pSource = (PSZ)pszSource;
137
138 for (ul = 0; ul < cbSource; ul++)
139 if (*pSource)
140 *pTarget++ = *pSource++;
141 else
142 break;
143 *pTarget = 0;
144
145 return (ul);
146}
147
148/*
149 * strhCount:
150 * this counts the occurences of c in pszSearch.
151 */
152
153ULONG strhCount(const char *pszSearch,
154 CHAR c)
155{
156 PSZ p = (PSZ)pszSearch;
157 ULONG ulCount = 0;
158 while (TRUE)
159 {
160 p = strchr(p, c);
161 if (p)
162 {
163 ulCount++;
164 p++;
165 }
166 else
167 break;
168 }
169 return (ulCount);
170}
171
172/*
173 *@@ strhIsDecimal:
174 * returns TRUE if psz consists of decimal digits only.
175 */
176
177BOOL strhIsDecimal(PSZ psz)
178{
179 PSZ p = psz;
180 while (*p != 0)
181 {
182 if (isdigit(*p) == 0)
183 return (FALSE);
184 p++;
185 }
186
187 return (TRUE);
188}
189
190/*
191 *@@ strhSubstr:
192 * this creates a new PSZ containing the string
193 * from pBegin to pEnd, excluding the pEnd character.
194 * The new string is null-terminated. The caller
195 * must free() the new string after use.
196 *
197 * Example:
198 + "1234567890"
199 + ^ ^
200 + p1 p2
201 + strhSubstr(p1, p2)
202 * would return a new string containing "2345678".
203 */
204
205PSZ strhSubstr(const char *pBegin, const char *pEnd)
206{
207 ULONG cbSubstr = (pEnd - pBegin);
208 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
209 strhncpy0(pszSubstr, pBegin, cbSubstr);
210 return (pszSubstr);
211}
212
213/*
214 *@@ strhExtract:
215 * searches pszBuf for the cOpen character and returns
216 * the data in between cOpen and cClose, excluding
217 * those two characters, in a newly allocated buffer
218 * which you must free() afterwards.
219 *
220 * Spaces and newlines/linefeeds are skipped.
221 *
222 * If the search was successful, the new buffer
223 * is returned and, if (ppEnd != NULL), *ppEnd points
224 * to the first character after the cClose character
225 * found in the buffer.
226 *
227 * If the search was not successful, NULL is
228 * returned, and *ppEnd is unchanged.
229 *
230 * If another cOpen character is found before
231 * cClose, matching cClose characters will be skipped.
232 * You can therefore nest the cOpen and cClose
233 * characters.
234 *
235 * This function ignores cOpen and cClose characters
236 * in C-style comments and strings surrounded by
237 * double quotes.
238 *
239 * Example:
240 + PSZ pszBuf = "KEYWORD { --blah-- } next",
241 + pEnd;
242 + strhExtract(pszBuf,
243 + '{', '}',
244 + &pEnd)
245 * would return a new buffer containing " --blah-- ",
246 * and ppEnd would afterwards point to the space
247 * before "next" in the static buffer.
248 *
249 *@@added V0.9.0 [umoeller]
250 */
251
252PSZ strhExtract(PSZ pszBuf, // in: search buffer
253 CHAR cOpen, // in: opening char
254 CHAR cClose, // in: closing char
255 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
256{
257 PSZ pszReturn = NULL;
258
259 if (pszBuf)
260 {
261 PSZ pOpen = strchr(pszBuf, cOpen);
262 if (pOpen)
263 {
264 // opening char found:
265 // now go thru the whole rest of the buffer
266 PSZ p = pOpen+1;
267 LONG lLevel = 1; // if this goes 0, we're done
268 while (*p)
269 {
270 if (*p == cOpen)
271 lLevel++;
272 else if (*p == cClose)
273 {
274 lLevel--;
275 if (lLevel <= 0)
276 {
277 // matching closing bracket found:
278 // extract string
279 pszReturn = strhSubstr(pOpen+1, // after cOpen
280 p); // excluding cClose
281 if (ppEnd)
282 *ppEnd = p+1;
283 break; // while (*p)
284 }
285 }
286 else if (*p == '\"')
287 {
288 // beginning of string:
289 PSZ p2 = p+1;
290 // find end of string
291 while ((*p2) && (*p2 != '\"'))
292 p2++;
293
294 if (*p2 == '\"')
295 // closing quote found:
296 // search on after that
297 p = p2; // raised below
298 else
299 break; // while (*p)
300 }
301
302 p++;
303 }
304 }
305 }
306
307 return (pszReturn);
308}
309
310/*
311 *@@ strhQuote:
312 * similar to strhExtract, except that
313 * opening and closing chars are the same,
314 * and therefore no nesting is possible.
315 * Useful for extracting stuff between
316 * quotes.
317 *
318 *@@added V0.9.0 [umoeller]
319 */
320
321PSZ strhQuote(PSZ pszBuf,
322 CHAR cQuote,
323 PSZ *ppEnd)
324{
325 PSZ pszReturn = NULL,
326 p1 = NULL;
327 if ((p1 = strchr(pszBuf, cQuote)))
328 {
329 PSZ p2 = strchr(p1+1, cQuote);
330 if (p2)
331 {
332 pszReturn = strhSubstr(p1+1, p2);
333 if (ppEnd)
334 // store closing char
335 *ppEnd = p2 + 1;
336 }
337 }
338
339 return (pszReturn);
340}
341
342/*
343 *@@ strhStrip:
344 * removes all double spaces.
345 * This copies within the "psz" buffer.
346 * If any double spaces are found, the
347 * string will be shorter than before,
348 * but the buffer is _not_ reallocated,
349 * so there will be unused bytes at the
350 * end.
351 *
352 * Returns the number of spaces removed.
353 *
354 *@@added V0.9.0 [umoeller]
355 */
356
357ULONG strhStrip(PSZ psz) // in/out: string
358{
359 PSZ p;
360 ULONG cb = strlen(psz),
361 ulrc = 0;
362
363 for (p = psz; p < psz+cb; p++)
364 {
365 if ((*p == ' ') && (*(p+1) == ' '))
366 {
367 PSZ p2 = p;
368 while (*p2)
369 {
370 *p2 = *(p2+1);
371 p2++;
372 }
373 cb--;
374 p--;
375 ulrc++;
376 }
377 }
378 return (ulrc);
379}
380
381/*
382 *@@ strhins:
383 * this inserts one string into another.
384 *
385 * pszInsert is inserted into pszBuffer at offset
386 * ulInsertOfs (which counts from 0).
387 *
388 * A newly allocated string is returned. pszBuffer is
389 * not changed. The new string should be free()'d after
390 * use.
391 *
392 * Upon errors, NULL is returned.
393 *
394 *@@changed V0.9.0 [umoeller]: completely rewritten.
395 */
396
397PSZ strhins(const char *pcszBuffer,
398 ULONG ulInsertOfs,
399 const char *pcszInsert)
400{
401 PSZ pszNew = NULL;
402
403 if ((pcszBuffer) && (pcszInsert))
404 {
405 do {
406 ULONG cbBuffer = strlen(pcszBuffer);
407 ULONG cbInsert = strlen(pcszInsert);
408
409 // check string length
410 if (ulInsertOfs > cbBuffer + 1)
411 break; // do
412
413 // OK, let's go.
414 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
415
416 // copy stuff before pInsertPos
417 memcpy(pszNew,
418 pcszBuffer,
419 ulInsertOfs);
420 // copy string to be inserted
421 memcpy(pszNew + ulInsertOfs,
422 pcszInsert,
423 cbInsert);
424 // copy stuff after pInsertPos
425 strcpy(pszNew + ulInsertOfs + cbInsert,
426 pcszBuffer + ulInsertOfs);
427 } while (FALSE);
428 }
429
430 return (pszNew);
431}
432
433/*
434 *@@ strhrpl:
435 * wrapper around xstrrpl to work with C strings.
436 * Note that *ppszBuf can get reallocated and must
437 * be free()'able.
438 *
439 * Repetitive use of this wrapper is not recommended
440 * because it is considerably slower than xstrrpl.
441 *
442 *@@added V0.9.6 (2000-11-01) [umoeller]
443 */
444
445ULONG strhrpl(PSZ *ppszBuf, // in/out: string
446 PULONG pulOfs, // in: where to begin search (0 = start);
447 // out: ofs of first char after replacement string
448 const char *pcszSearch, // in: search string; cannot be NULL
449 const char *pcszReplace) // in: replacement string; cannot be NULL
450{
451 ULONG ulrc = 0;
452 XSTRING xstrBuf,
453 xstrFind,
454 xstrReplace;
455 size_t ShiftTable[256];
456 BOOL fRepeat = FALSE;
457 xstrInit(&xstrBuf, 0);
458 xstrset(&xstrBuf, *ppszBuf);
459 xstrInit(&xstrFind, 0);
460 xstrset(&xstrFind, (PSZ)pcszSearch);
461 xstrInit(&xstrReplace, 0);
462 xstrset(&xstrReplace, (PSZ)pcszReplace);
463
464 if ((ulrc = xstrrpl(&xstrBuf,
465 pulOfs,
466 &xstrFind,
467 &xstrReplace,
468 ShiftTable,
469 &fRepeat)))
470 // replaced:
471 *ppszBuf = xstrBuf.psz;
472
473 return (ulrc);
474}
475
476/*
477 * strhWords:
478 * returns the no. of words in "psz".
479 * A string is considered a "word" if
480 * it is surrounded by spaces only.
481 *
482 *@@added V0.9.0 [umoeller]
483 */
484
485ULONG strhWords(PSZ psz)
486{
487 PSZ p;
488 ULONG cb = strlen(psz),
489 ulWords = 0;
490 if (cb > 1)
491 {
492 ulWords = 1;
493 for (p = psz; p < psz+cb; p++)
494 if (*p == ' ')
495 ulWords++;
496 }
497 return (ulWords);
498}
499
500/*
501 *@@ strhThousandsULong:
502 * converts a ULONG into a decimal string, while
503 * inserting thousands separators into it. Specify
504 * the separator character in cThousands.
505 *
506 * Returns pszTarget so you can use it directly
507 * with sprintf and the "%s" flag.
508 *
509 * For cThousands, you should use the data in
510 * OS2.INI ("PM_National" application), which is
511 * always set according to the "Country" object.
512 * You can use prfhQueryCountrySettings to
513 * retrieve this setting.
514 *
515 * Use strhThousandsDouble for "double" values.
516 */
517
518PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
519 ULONG ul, // in: decimal to convert
520 CHAR cThousands) // in: separator char (e.g. '.')
521{
522 USHORT ust, uss, usc;
523 CHAR szTemp[40];
524 sprintf(szTemp, "%lu", ul);
525
526 ust = 0;
527 usc = strlen(szTemp);
528 for (uss = 0; uss < usc; uss++)
529 {
530 if (uss)
531 if (((usc - uss) % 3) == 0)
532 {
533 pszTarget[ust] = cThousands;
534 ust++;
535 }
536 pszTarget[ust] = szTemp[uss];
537 ust++;
538 }
539 pszTarget[ust] = '\0';
540
541 return (pszTarget);
542}
543
544/*
545 *@@ strhThousandsDouble:
546 * like strhThousandsULong, but for a "double"
547 * value. Note that after-comma values are truncated.
548 */
549
550PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
551{
552 USHORT ust, uss, usc;
553 CHAR szTemp[40];
554 sprintf(szTemp, "%.0f", floor(dbl));
555
556 ust = 0;
557 usc = strlen(szTemp);
558 for (uss = 0; uss < usc; uss++)
559 {
560 if (uss)
561 if (((usc - uss) % 3) == 0)
562 {
563 pszTarget[ust] = cThousands;
564 ust++;
565 }
566 pszTarget[ust] = szTemp[uss];
567 ust++;
568 }
569 pszTarget[ust] = '\0';
570
571 return (pszTarget);
572}
573
574/*
575 *@@ strhVariableDouble:
576 * like strhThousandsULong, but for a "double" value, and
577 * with a variable number of decimal places depending on the
578 * size of the quantity.
579 *
580 *@@added V0.9.6 (2000-11-12) [pr]
581 */
582
583PSZ strhVariableDouble(PSZ pszTarget,
584 double dbl,
585 PSZ pszUnits,
586 CHAR cThousands)
587{
588 if (dbl < 100.0)
589 sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
590 else
591 if (dbl < 1000.0)
592 sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
593 else
594 strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
595 pszUnits);
596
597 return(pszTarget);
598}
599
600/*
601 *@@ strhFileDate:
602 * converts file date data to a string (to pszBuf).
603 * You can pass any FDATE structure to this function,
604 * which are returned in those FILEFINDBUF* or
605 * FILESTATUS* structs by the Dos* functions.
606 *
607 * ulDateFormat is the PM setting for the date format,
608 * as set in the "Country" object, and can be queried using
609 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
610 *
611 * meaning:
612 * -- 0 mm.dd.yyyy (English)
613 * -- 1 dd.mm.yyyy (e.g. German)
614 * -- 2 yyyy.mm.dd (Japanese, ISO)
615 * -- 3 yyyy.dd.mm
616 *
617 * cDateSep is used as a date separator (e.g. '.').
618 * This can be queried using:
619 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
620 *
621 * Alternatively, you can query all the country settings
622 * at once using prfhQueryCountrySettings (prfh.c).
623 *
624 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
625 */
626
627VOID strhFileDate(PSZ pszBuf, // out: string returned
628 FDATE *pfDate, // in: date information
629 ULONG ulDateFormat, // in: date format (0-3)
630 CHAR cDateSep) // in: date separator (e.g. '.')
631{
632 DATETIME dt;
633 dt.day = pfDate->day;
634 dt.month = pfDate->month;
635 dt.year = pfDate->year + 1980;
636
637 strhDateTime(pszBuf,
638 NULL, // no time
639 &dt,
640 ulDateFormat,
641 cDateSep,
642 0, 0); // no time
643}
644
645/*
646 *@@ strhFileTime:
647 * converts file time data to a string (to pszBuf).
648 * You can pass any FTIME structure to this function,
649 * which are returned in those FILEFINDBUF* or
650 * FILESTATUS* structs by the Dos* functions.
651 *
652 * ulTimeFormat is the PM setting for the time format,
653 * as set in the "Country" object, and can be queried using
654 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
655 * meaning:
656 * -- 0 12-hour clock
657 * -- >0 24-hour clock
658 *
659 * cDateSep is used as a time separator (e.g. ':').
660 * This can be queried using:
661 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
662 *
663 * Alternatively, you can query all the country settings
664 * at once using prfhQueryCountrySettings (prfh.c).
665 *
666 *@@changed 99-03-15 fixed 12-hour crash
667 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
668 */
669
670VOID strhFileTime(PSZ pszBuf, // out: string returned
671 FTIME *pfTime, // in: time information
672 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
673 CHAR cTimeSep) // in: time separator (e.g. ':')
674{
675 DATETIME dt;
676 dt.hours = pfTime->hours;
677 dt.minutes = pfTime->minutes;
678 dt.seconds = pfTime->twosecs * 2;
679
680 strhDateTime(NULL, // no date
681 pszBuf,
682 &dt,
683 0, 0, // no date
684 ulTimeFormat,
685 cTimeSep);
686}
687
688/*
689 *@@ strhDateTime:
690 * converts Control Program DATETIME info
691 * into two strings. See strhFileDate and strhFileTime
692 * for more detailed parameter descriptions.
693 *
694 *@@added V0.9.0 (99-11-07) [umoeller]
695 */
696
697VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
698 PSZ pszTime, // out: time string returned (can be NULL)
699 DATETIME *pDateTime, // in: date/time information
700 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
701 CHAR cDateSep, // in: date separator (e.g. '.')
702 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
703 CHAR cTimeSep) // in: time separator (e.g. ':')
704{
705 if (pszDate)
706 {
707 switch (ulDateFormat)
708 {
709 case 0: // mm.dd.yyyy (English)
710 sprintf(pszDate, "%02d%c%02d%c%04d",
711 pDateTime->month,
712 cDateSep,
713 pDateTime->day,
714 cDateSep,
715 pDateTime->year);
716 break;
717
718 case 1: // dd.mm.yyyy (e.g. German)
719 sprintf(pszDate, "%02d%c%02d%c%04d",
720 pDateTime->day,
721 cDateSep,
722 pDateTime->month,
723 cDateSep,
724 pDateTime->year);
725 break;
726
727 case 2: // yyyy.mm.dd (Japanese)
728 sprintf(pszDate, "%04d%c%02d%c%02d",
729 pDateTime->year,
730 cDateSep,
731 pDateTime->month,
732 cDateSep,
733 pDateTime->day);
734 break;
735
736 default: // yyyy.dd.mm
737 sprintf(pszDate, "%04d%c%02d%c%02d",
738 pDateTime->year,
739 cDateSep,
740 pDateTime->day,
741 cDateSep,
742 pDateTime->month);
743 break;
744 }
745 }
746
747 if (pszTime)
748 {
749 if (ulTimeFormat == 0)
750 {
751 // for 12-hour clock, we need additional INI data
752 CHAR szAMPM[10] = "err";
753
754 if (pDateTime->hours > 12)
755 {
756 // > 12h: PM.
757
758 // Note: 12:xx noon is 12 AM, not PM (even though
759 // AM stands for "ante meridiam", but English is just
760 // not logical), so that's handled below.
761
762 PrfQueryProfileString(HINI_USER,
763 "PM_National",
764 "s2359", // key
765 "PM", // default
766 szAMPM, sizeof(szAMPM)-1);
767 sprintf(pszTime, "%02d%c%02d%c%02d %s",
768 // leave 12 == 12 (not 0)
769 pDateTime->hours % 12,
770 cTimeSep,
771 pDateTime->minutes,
772 cTimeSep,
773 pDateTime->seconds,
774 szAMPM);
775 }
776 else
777 {
778 // <= 12h: AM
779 PrfQueryProfileString(HINI_USER,
780 "PM_National",
781 "s1159", // key
782 "AM", // default
783 szAMPM, sizeof(szAMPM)-1);
784 sprintf(pszTime, "%02d%c%02d%c%02d %s",
785 pDateTime->hours,
786 cTimeSep,
787 pDateTime->minutes,
788 cTimeSep,
789 pDateTime->seconds,
790 szAMPM);
791 }
792 }
793 else
794 // 24-hour clock
795 sprintf(pszTime, "%02d%c%02d%c%02d",
796 pDateTime->hours,
797 cTimeSep,
798 pDateTime->minutes,
799 cTimeSep,
800 pDateTime->seconds);
801 }
802}
803
804/*
805 *@@ strhGetWord:
806 * finds word boundaries.
807 *
808 * *ppszStart is used as the beginning of the
809 * search.
810 *
811 * If a word is found, *ppszStart is set to
812 * the first character of the word which was
813 * found and *ppszEnd receives the address
814 * of the first character _after_ the word,
815 * which is probably a space or a \n or \r char.
816 * We then return TRUE.
817 *
818 * The search is stopped if a null character
819 * is found or pLimit is reached. In that case,
820 * FALSE is returned.
821 *
822 *@@added V0.9.1 (2000-02-13) [umoeller]
823 */
824
825BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
826 // out: start of word (if TRUE is returned)
827 const char *pLimit, // in: ptr to last char after *ppszStart to be
828 // searched; if the word does not end before
829 // or with this char, FALSE is returned
830 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
831 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
832 PSZ *ppszEnd) // out: first char _after_ word
833 // (if TRUE is returned)
834{
835 // characters after which a word can be started
836 // const char *pcszBeginChars = "\x0d\x0a ";
837 // const char *pcszEndChars = "\x0d\x0a /-";
838
839 PSZ pStart = *ppszStart;
840
841 // find start of word
842 while ( (pStart < (PSZ)pLimit)
843 && (strchr(pcszBeginChars, *pStart))
844 )
845 // if char is a "before word" char: go for next
846 pStart++;
847
848 if (pStart < (PSZ)pLimit)
849 {
850 // found a valid "word start" character
851 // (which is not in pcszBeginChars):
852
853 // find end of word
854 PSZ pEndOfWord = pStart;
855 while ( (pEndOfWord <= (PSZ)pLimit)
856 && (strchr(pcszEndChars, *pEndOfWord) == 0)
857 )
858 // if char is not an "end word" char: go for next
859 pEndOfWord++;
860
861 if (pEndOfWord <= (PSZ)pLimit)
862 {
863 // whoa, got a word:
864 *ppszStart = pStart;
865 *ppszEnd = pEndOfWord;
866 return (TRUE);
867 }
868 }
869
870 return (FALSE);
871}
872
873/*
874 *@@ strhIsWord:
875 * returns TRUE if p points to a "word"
876 * in pcszBuf.
877 *
878 * p is considered a word if the character _before_
879 * it is in pcszBeginChars and the char _after_
880 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
881 *
882 *@@added V0.9.6 (2000-11-12) [umoeller]
883 */
884
885BOOL strhIsWord(const char *pcszBuf,
886 const char *p, // in: start of word
887 ULONG cbSearch, // in: length of word
888 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
889 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
890{
891 BOOL fEndOK = FALSE;
892
893 // check previous char
894 if ( (p == pcszBuf)
895 || (strchr(pcszBeginChars, *(p-1)))
896 )
897 {
898 // OK, valid begin char:
899 // check end char
900 CHAR cNextChar = *(p + cbSearch);
901 if (cNextChar == 0)
902 fEndOK = TRUE;
903 else
904 {
905 char *pc = strchr(pcszEndChars, cNextChar);
906 if (pc)
907 // OK, is end char: avoid doubles of that char,
908 // but allow spaces
909 if ( (cNextChar+1 != *pc)
910 || (cNextChar+1 == ' ')
911 || (cNextChar+1 == 0)
912 )
913 fEndOK = TRUE;
914 }
915 }
916
917 return (fEndOK);
918}
919
920/*
921 *@@ strhFindWord:
922 * searches for pszSearch in pszBuf, which is
923 * returned if found (or NULL if not).
924 *
925 * As opposed to strstr, this finds pszSearch
926 * only if it is a "word". A search string is
927 * considered a word if the character _before_
928 * it is in pcszBeginChars and the char _after_
929 * it is in pcszEndChars.
930 *
931 * Example:
932 + strhFindWord("This is an example.", "is");
933 + returns ...........^ this, but not the "is" in "This".
934 *
935 * The algorithm here uses strstr to find pszSearch in pszBuf
936 * and performs additional "is-word" checks for each item found
937 * (by calling strhIsWord).
938 *
939 * Note that this function is fairly slow compared to xstrFindWord.
940 *
941 *@@added V0.9.0 (99-11-08) [umoeller]
942 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
943 */
944
945PSZ strhFindWord(const char *pszBuf,
946 const char *pszSearch,
947 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
948 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
949{
950 PSZ pszReturn = 0;
951 ULONG cbBuf = strlen(pszBuf),
952 cbSearch = strlen(pszSearch);
953
954 if ((cbBuf) && (cbSearch))
955 {
956 const char *p = pszBuf;
957
958 do // while p
959 {
960 p = strstr(p, pszSearch);
961 if (p)
962 {
963 // string found:
964 // check if that's a word
965
966 if (strhIsWord(pszBuf,
967 p,
968 cbSearch,
969 pcszBeginChars,
970 pcszEndChars))
971 {
972 // valid end char:
973 pszReturn = (PSZ)p;
974 break;
975 }
976
977 p += cbSearch;
978 }
979 } while (p);
980
981 }
982 return (pszReturn);
983}
984
985/*
986 *@@ strhFindEOL:
987 * returns a pointer to the next \r, \n or null character
988 * following pszSearchIn. Stores the offset in *pulOffset.
989 *
990 * This should never return NULL because at some point,
991 * there will be a null byte in your string.
992 *
993 *@@added V0.9.4 (2000-07-01) [umoeller]
994 */
995
996PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
997 PULONG pulOffset) // out: offset (ptr can be NULL)
998{
999 const char *p = pcszSearchIn,
1000 *prc = 0;
1001 while (TRUE)
1002 {
1003 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
1004 {
1005 prc = p;
1006 break;
1007 }
1008 p++;
1009 }
1010
1011 if (pulOffset)
1012 *pulOffset = prc - pcszSearchIn;
1013
1014 return ((PSZ)prc);
1015}
1016
1017/*
1018 *@@ strhFindNextLine:
1019 * like strhFindEOL, but this returns the character
1020 * _after_ \r or \n. Note that this might return
1021 * a pointer to terminating NULL character also.
1022 */
1023
1024PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1025{
1026 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1027 // pEOL now points to the \r char or the terminating 0 byte;
1028 // if not null byte, advance pointer
1029 PSZ pNextLine = pEOL;
1030 if (*pNextLine == '\r')
1031 pNextLine++;
1032 if (*pNextLine == '\n')
1033 pNextLine++;
1034 if (pulOffset)
1035 *pulOffset = pNextLine - pszSearchIn;
1036 return (pNextLine);
1037}
1038
1039/*
1040 *@@ strhFindKey:
1041 * finds pszKey in pszSearchIn; similar to strhistr,
1042 * but this one makes sure the key is at the beginning
1043 * of a line. Spaces before the key are tolerated.
1044 * Returns NULL if the key was not found.
1045 *
1046 * Used by strhGetParameter/strhSetParameter; useful
1047 * for analyzing CONFIG.SYS settings.
1048 *
1049 *@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1050 *@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1051 */
1052
1053PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1054 const char *pcszKey, // in: key to search for
1055 PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1056 // can be NULL if not needed
1057{
1058 const char *p = NULL;
1059 PSZ pReturn = NULL;
1060 // BOOL fFound = FALSE;
1061
1062 p = pcszSearchIn;
1063 do {
1064 p = strhistr(p, pcszKey);
1065
1066 if ((p) && (p >= pcszSearchIn))
1067 {
1068 // make sure the key is at the beginning of a line
1069 // by going backwards until we find a char != " "
1070 const char *p2 = p;
1071 while ( (*p2 == ' ')
1072 && (p2 > pcszSearchIn)
1073 )
1074 p2--;
1075
1076 // if previous char is an EOL sign, go on
1077 if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1078 || (*(p2-1) == '\r')
1079 || (*(p2-1) == '\n')
1080 )
1081 {
1082 // now check whether the char after the search
1083 // is a "=" char
1084 // ULONG cbKey = strlen(pszKey);
1085
1086 // tolerate spaces before "="
1087 /* PSZ p3 = p;
1088 while (*(p3+cbKey) == ' ')
1089 p3++;
1090
1091 if (*(p3+cbKey) == '=') */
1092 {
1093 // found:
1094 pReturn = (PSZ)p; // go on, p contains found key
1095
1096 // test for all upper case?
1097 if (pfIsAllUpperCase)
1098 {
1099 ULONG cbKey2 = strlen(pcszKey),
1100 ul = 0;
1101 *pfIsAllUpperCase = TRUE;
1102 for (ul = 0; ul < cbKey2; ul++)
1103 if (islower(*(p+ul)))
1104 {
1105 *pfIsAllUpperCase = FALSE;
1106 break; // for
1107 }
1108 }
1109
1110 break; // do
1111 }
1112 } // else search next key
1113
1114 p++; // search on after this key
1115 }
1116 } while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1117
1118 return (pReturn);
1119}
1120
1121/*
1122 *@@ strhGetParameter:
1123 * searches pszSearchIn for the key pszKey; if found, it
1124 * returns a pointer to the following characters in pszSearchIn
1125 * and, if pszCopyTo != NULL, copies the rest of the line to
1126 * that buffer, of which cbCopyTo specified the size.
1127 *
1128 * If the key is not found, NULL is returned.
1129 * String search is done by calling strhFindKey.
1130 * This is useful for querying CONFIG.SYS settings.
1131 *
1132 * <B>Example:</B>
1133 *
1134 * this would return "YES" if you searched for "PAUSEONERROR=",
1135 * and "PAUSEONERROR=YES" existed in pszSearchIn.
1136 */
1137
1138PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1139 const char *pcszKey, // in: key to search for
1140 PSZ pszCopyTo, // out: key value
1141 ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1142{
1143 PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1144 prc = NULL;
1145 if (p)
1146 {
1147 prc = p + strlen(pcszKey);
1148 if (pszCopyTo)
1149 // copy to pszCopyTo
1150 {
1151 ULONG cb;
1152 PSZ pEOL = strhFindEOL(prc, &cb);
1153 if (pEOL)
1154 {
1155 if (cb > cbCopyTo)
1156 cb = cbCopyTo-1;
1157 strhncpy0(pszCopyTo, prc, cb);
1158 }
1159 }
1160 }
1161
1162 return (prc);
1163}
1164
1165/*
1166 *@@ strhSetParameter:
1167 * searches *ppszBuf for the key pszKey; if found, it
1168 * replaces the characters following this key up to the
1169 * end of the line with pszParam. If pszKey is not found in
1170 * *ppszBuf, it is appended to the file in a new line.
1171 *
1172 * If any changes are made, *ppszBuf is re-allocated.
1173 *
1174 * This function searches w/out case sensitivity.
1175 *
1176 * Returns a pointer to the new parameter inside the buffer.
1177 *
1178 *@@changed V0.9.0 [umoeller]: changed function prototype to PSZ* ppszSearchIn
1179 */
1180
1181PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1182 const char *pcszKey, // in: key to search for
1183 PSZ pszNewParam, // in: new parameter to set for key
1184 BOOL fRespectCase) // in: if TRUE, pszNewParam will
1185 // be converted to upper case if the found key is
1186 // in upper case also. pszNewParam should be in
1187 // lower case if you use this.
1188{
1189 BOOL fIsAllUpperCase = FALSE;
1190 PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1191 prc = NULL;
1192
1193 if (pKey)
1194 {
1195 // key found in file:
1196 // replace existing parameter
1197 PSZ pOldParam = pKey + strlen(pcszKey);
1198
1199 prc = pOldParam;
1200 // pOldParam now has the old parameter, which we
1201 // will overwrite now
1202
1203 if (pOldParam)
1204 {
1205 ULONG cbOldParam;
1206 PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1207 // pEOL now has first end-of-line after the parameter
1208
1209 if (pEOL)
1210 {
1211 XSTRING strBuf;
1212 ULONG ulOfs = 0;
1213
1214 PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1215 strncpy(pszOldCopy, pOldParam, cbOldParam);
1216 pszOldCopy[cbOldParam] = '\0';
1217
1218 xstrInit(&strBuf, 0);
1219 xstrset(&strBuf, *ppszBuf); // this must not be freed!
1220 /* xstrInit(&strFind, 0);
1221 xstrset(&strFind, pszOldCopy); // this must not be freed!
1222 xstrInit(&strReplace, 0);
1223 xstrset(&strReplace, pszNewParam); // this must not be freed!
1224 */
1225
1226 // check for upper case desired?
1227 if (fRespectCase)
1228 if (fIsAllUpperCase)
1229 strupr(pszNewParam);
1230
1231 xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1232
1233 free(pszOldCopy);
1234
1235 *ppszBuf = strBuf.psz;
1236 }
1237 }
1238 }
1239 else
1240 {
1241 PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1242 + strlen(pcszKey)
1243 + strlen(pszNewParam)
1244 + 5); // 2 * \r\n + null byte
1245 // key not found: append to end of file
1246 sprintf(pszNew, "%s\r\n%s%s\r\n",
1247 *ppszBuf, pcszKey, pszNewParam);
1248 free(*ppszBuf);
1249 *ppszBuf = pszNew;
1250 }
1251
1252 return (prc);
1253}
1254
1255/*
1256 *@@ strhDeleteLine:
1257 * this deletes the line in pszSearchIn which starts with
1258 * the key pszKey. Returns TRUE if the line was found and
1259 * deleted.
1260 *
1261 * This copies within pszSearchIn.
1262 */
1263
1264BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1265 PSZ pszKey) // in: key to find
1266{
1267 BOOL fIsAllUpperCase = FALSE;
1268 PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1269 BOOL brc = FALSE;
1270
1271 if (pKey) {
1272 PSZ pEOL = strhFindEOL(pKey, NULL);
1273 // pEOL now has first end-of-line after the key
1274 if (pEOL)
1275 {
1276 // delete line by overwriting it with
1277 // the next line
1278 strcpy(pKey, pEOL+2);
1279 }
1280 else
1281 {
1282 // EOL not found: we must be at the end of the file
1283 *pKey = '\0';
1284 }
1285 brc = TRUE;
1286 }
1287
1288 return (brc);
1289}
1290
1291/*
1292 *@@ strhBeautifyTitle:
1293 * replaces all line breaks (0xd, 0xa) with spaces.
1294 */
1295
1296BOOL strhBeautifyTitle(PSZ psz)
1297{
1298 BOOL rc = FALSE;
1299 CHAR *p;
1300 while ((p = strchr(psz, 0xa)))
1301 {
1302 *p = ' ';
1303 rc = TRUE;
1304 }
1305 while ((p = strchr(psz, 0xd)))
1306 {
1307 *p = ' ';
1308 rc = TRUE;
1309 }
1310 return (rc);
1311}
1312
1313/*
1314 * strhFindAttribValue:
1315 * searches for pszAttrib in pszSearchIn; if found,
1316 * returns the first character after the "=" char.
1317 * If "=" is not found, a space, \r, and \n are
1318 * also accepted. This function searches without
1319 * respecting case.
1320 *
1321 * <B>Example:</B>
1322 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1323 +
1324 + returns ....................... ^ this address.
1325 *
1326 *@@added V0.9.0 [umoeller]
1327 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1328 */
1329
1330PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1331{
1332 PSZ prc = 0;
1333 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1334 p,
1335 p2;
1336 ULONG cbAttrib = strlen(pszAttrib);
1337
1338 // 1) find space char
1339 while ((p = strchr(pszSearchIn2, ' ')))
1340 {
1341 CHAR c;
1342 p++;
1343 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1344 // now check whether the p+strlen(pszAttrib)
1345 // is a valid end-of-tag character
1346 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1347 && ( (c == ' ')
1348 || (c == '>')
1349 || (c == '=')
1350 || (c == '\r')
1351 || (c == '\n')
1352 || (c == 0)
1353 )
1354 )
1355 {
1356 // yes:
1357 CHAR c2;
1358 p2 = p + cbAttrib;
1359 c2 = *p2;
1360 while ( ( (c2 == ' ')
1361 || (c2 == '=')
1362 || (c2 == '\n')
1363 || (c2 == '\r')
1364 )
1365 && (c2 != 0)
1366 )
1367 c2 = *++p2;
1368 prc = p2;
1369 break; // first while
1370 }
1371 pszSearchIn2++;
1372 }
1373 return (prc);
1374}
1375
1376/*
1377 * strhGetNumAttribValue:
1378 * stores the numerical parameter value of an HTML-style
1379 * tag in *pl.
1380 *
1381 * Returns the address of the tag parameter in the
1382 * search buffer, if found, or NULL.
1383 *
1384 * <B>Example:</B>
1385 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1386 *
1387 * stores 123 in the "l" variable.
1388 *
1389 *@@added V0.9.0 [umoeller]
1390 */
1391
1392PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1393 const char *pszTag, // e.g. "INDEX"
1394 PLONG pl) // out: numerical value
1395{
1396 PSZ pParam;
1397 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1398 sscanf(pParam, "%ld", pl);
1399
1400 return (pParam);
1401}
1402
1403/*
1404 * strhGetTextAttr:
1405 * retrieves the attribute value of a textual HTML-style tag
1406 * in a newly allocated buffer, which is returned,
1407 * or NULL if attribute not found.
1408 * If an attribute value is to contain spaces, it
1409 * must be enclosed in quotes.
1410 *
1411 * The offset of the attribute data in pszSearchIn is
1412 * returned in *pulOffset so that you can do multiple
1413 * searches.
1414 *
1415 * This returns a new buffer, which should be free()'d after use.
1416 *
1417 * <B>Example:</B>
1418 + ULONG ulOfs = 0;
1419 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1420 + ............^ ulOfs
1421 *
1422 * returns a new string with the value "blublub" (without
1423 * quotes) and sets ulOfs to 12.
1424 *
1425 *@@added V0.9.0 [umoeller]
1426 */
1427
1428PSZ strhGetTextAttr(const char *pszSearchIn,
1429 const char *pszTag,
1430 PULONG pulOffset) // out: offset where found
1431{
1432 PSZ pParam,
1433 pParam2,
1434 prc = NULL;
1435 ULONG ulCount = 0;
1436 LONG lNestingLevel = 0;
1437
1438 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1439 {
1440 // determine end character to search for: a space
1441 CHAR cEnd = ' ';
1442 if (*pParam == '\"')
1443 {
1444 // or, if the data is enclosed in quotes, a quote
1445 cEnd = '\"';
1446 pParam++;
1447 }
1448
1449 if (pulOffset)
1450 // store the offset
1451 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1452
1453 // now find end of attribute
1454 pParam2 = pParam;
1455 while (*pParam)
1456 {
1457 if (*pParam == cEnd)
1458 // end character found
1459 break;
1460 else if (*pParam == '<')
1461 // yet another opening tag found:
1462 // this is probably some "<" in the attributes
1463 lNestingLevel++;
1464 else if (*pParam == '>')
1465 {
1466 lNestingLevel--;
1467 if (lNestingLevel < 0)
1468 // end of tag found:
1469 break;
1470 }
1471 ulCount++;
1472 pParam++;
1473 }
1474
1475 // copy attribute to new buffer
1476 if (ulCount)
1477 {
1478 prc = (PSZ)malloc(ulCount+1);
1479 memcpy(prc, pParam2, ulCount);
1480 *(prc+ulCount) = 0;
1481 }
1482 }
1483 return (prc);
1484}
1485
1486/*
1487 * strhFindEndOfTag:
1488 * returns a pointer to the ">" char
1489 * which seems to terminate the tag beginning
1490 * after pszBeginOfTag.
1491 *
1492 * If additional "<" chars are found, we look
1493 * for additional ">" characters too.
1494 *
1495 * Note: You must pass the address of the opening
1496 * '<' character to this function.
1497 *
1498 * Example:
1499 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1500 + strhFindEndOfTag(pszTest)
1501 + returns.................................^ this.
1502 *
1503 *@@added V0.9.0 [umoeller]
1504 */
1505
1506PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1507{
1508 PSZ p = (PSZ)pszBeginOfTag,
1509 prc = NULL;
1510 LONG lNestingLevel = 0;
1511
1512 while (*p)
1513 {
1514 if (*p == '<')
1515 // another opening tag found:
1516 lNestingLevel++;
1517 else if (*p == '>')
1518 {
1519 // closing tag found:
1520 lNestingLevel--;
1521 if (lNestingLevel < 1)
1522 {
1523 // corresponding: return this
1524 prc = p;
1525 break;
1526 }
1527 }
1528 p++;
1529 }
1530
1531 return (prc);
1532}
1533
1534/*
1535 * strhGetBlock:
1536 * this complex function searches the given string
1537 * for a pair of opening/closing HTML-style tags.
1538 *
1539 * If found, this routine returns TRUE and does
1540 * the following:
1541 *
1542 * 1) allocate a new buffer, copy the text
1543 * enclosed by the opening/closing tags
1544 * into it and set *ppszBlock to that
1545 * buffer;
1546 *
1547 * 2) if the opening tag has any attributes,
1548 * allocate another buffer, copy the
1549 * attributes into it and set *ppszAttrs
1550 * to that buffer; if no attributes are
1551 * found, *ppszAttrs will be NULL;
1552 *
1553 * 3) set *pulOffset to the offset from the
1554 * beginning of *ppszSearchIn where the
1555 * opening tag was found;
1556 *
1557 * 4) advance *ppszSearchIn to after the
1558 * closing tag, so that you can do
1559 * multiple searches without finding the
1560 * same tags twice.
1561 *
1562 * All buffers should be freed using free().
1563 *
1564 * This returns the following:
1565 * -- 0: no error
1566 * -- 1: tag not found at all (doesn't have to be an error)
1567 * -- 2: begin tag found, but no corresponding end tag found. This
1568 * is a real error.
1569 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1570 *
1571 * <B>Example:</B>
1572 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1573 + PSZ pszBlock, pszAttrs;
1574 + ULONG ulOfs;
1575 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1576 *
1577 * would do the following:
1578 *
1579 * 1) set pszBlock to a new string containing "This is page 1."
1580 * without quotes;
1581 *
1582 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1583 *
1584 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1585 *
1586 * 4) pSearch would be advanced to point to the "More text"
1587 * string in the original buffer.
1588 *
1589 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1590 * for HTML parsing.
1591 *
1592 *@@added V0.9.0 [umoeller]
1593 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1594 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1595 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1596 */
1597
1598ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1599 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1600 PSZ pszTag,
1601 PSZ *ppszBlock, // out: block enclosed by the tags
1602 PSZ *ppszAttribs, // out: attributes of the opening tag
1603 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1604 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1605{
1606 ULONG ulrc = 1;
1607 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1608 pszSearch2 = pszBeginTag,
1609 pszClosingTag;
1610 ULONG cbTag = strlen(pszTag);
1611
1612 // go thru the block and check all tags if it's the
1613 // begin tag we're looking for
1614 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1615 {
1616 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1617 // yes: stop
1618 break;
1619 else
1620 pszBeginTag++;
1621 }
1622
1623 if (pszBeginTag)
1624 {
1625 // we found <TAG>:
1626 ULONG ulNestingLevel = 0;
1627
1628 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1629 // strchr(pszBeginTag, '>');
1630 if (pszEndOfBeginTag)
1631 {
1632 // does the caller want the attributes?
1633 if (ppszAttribs)
1634 {
1635 // yes: then copy them
1636 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1637 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1638 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1639 // add terminating 0
1640 *(pszAttrs + ulAttrLen) = 0;
1641
1642 *ppszAttribs = pszAttrs;
1643 }
1644
1645 // output offset of where we found the begin tag
1646 if (pulOfsBeginTag)
1647 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1648
1649 // now find corresponding closing tag (e.g. "</BODY>"
1650 pszBeginTag = pszEndOfBeginTag+1;
1651 // now we're behind the '>' char of the opening tag
1652 // increase offset of that too
1653 if (pulOfsBeginBlock)
1654 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1655
1656 // find next closing tag;
1657 // for the first run, pszSearch2 points to right
1658 // after the '>' char of the opening tag
1659 pszSearch2 = pszBeginTag;
1660 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1661 && (pszClosingTag = strstr(pszSearch2, "<"))
1662 )
1663 {
1664 // if we have another opening tag before our closing
1665 // tag, we need to have several closing tags before
1666 // we're done
1667 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1668 ulNestingLevel++;
1669 else
1670 {
1671 // is this ours?
1672 if ( (*(pszClosingTag+1) == '/')
1673 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1674 )
1675 {
1676 // we've found a matching closing tag; is
1677 // it ours?
1678 if (ulNestingLevel == 0)
1679 {
1680 // our closing tag found:
1681 // allocate mem for a new buffer
1682 // and extract all the text between
1683 // open and closing tags to it
1684 ULONG ulLen = pszClosingTag - pszBeginTag;
1685 if (ppszBlock)
1686 {
1687 PSZ pNew = (PSZ)malloc(ulLen + 1);
1688 strhncpy0(pNew, pszBeginTag, ulLen);
1689 *ppszBlock = pNew;
1690 }
1691
1692 // raise search offset to after the closing tag
1693 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1694
1695 ulrc = 0;
1696
1697 break;
1698 } else
1699 // not our closing tag:
1700 ulNestingLevel--;
1701 }
1702 }
1703 // no matching closing tag: search on after that
1704 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1705 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1706
1707 if (!pszClosingTag)
1708 // no matching closing tag found:
1709 // return 2 (closing tag not found)
1710 ulrc = 2;
1711 } // end if (pszBeginTag)
1712 else
1713 // no matching ">" for opening tag found:
1714 ulrc = 3;
1715 }
1716
1717 return (ulrc);
1718}
1719
1720/* ******************************************************************
1721 *
1722 * Miscellaneous
1723 *
1724 ********************************************************************/
1725
1726/*
1727 *@@ strhArrayAppend:
1728 * this appends a string to a "string array".
1729 *
1730 * A string array is considered a sequence of
1731 * zero-terminated strings in memory. That is,
1732 * after each string's null-byte, the next
1733 * string comes up.
1734 *
1735 * This is useful for composing a single block
1736 * of memory from, say, list box entries, which
1737 * can then be written to OS2.INI in one flush.
1738 *
1739 * To append strings to such an array, call this
1740 * function for each string you wish to append.
1741 * This will re-allocate *ppszRoot with each call,
1742 * and update *pcbRoot, which then contains the
1743 * total size of all strings (including all null
1744 * terminators).
1745 *
1746 * Pass *pcbRoot to PrfSaveProfileData to have the
1747 * block saved.
1748 *
1749 * Note: On the first call, *ppszRoot and *pcbRoot
1750 * _must_ be both NULL, or this crashes.
1751 */
1752
1753VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1754 const char *pcszNew, // in: string to append
1755 PULONG pcbRoot) // in/out: size of array
1756{
1757 ULONG cbNew = strlen(pcszNew);
1758 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1759 + cbNew
1760 + 1); // two null bytes
1761 if (*ppszRoot)
1762 {
1763 // not first loop: copy old stuff
1764 memcpy(pszTemp,
1765 *ppszRoot,
1766 *pcbRoot);
1767 free(*ppszRoot);
1768 }
1769 // append new string
1770 strcpy(pszTemp + *pcbRoot,
1771 pcszNew);
1772 // update root
1773 *ppszRoot = pszTemp;
1774 // update length
1775 *pcbRoot += cbNew + 1;
1776}
1777
1778/*
1779 *@@ strhCreateDump:
1780 * this dumps a memory block into a string
1781 * and returns that string in a new buffer.
1782 *
1783 * You must free() the returned PSZ after use.
1784 *
1785 * The output looks like the following:
1786 *
1787 + 0000: FE FF 0E 02 90 00 00 00 ........
1788 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1789 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1790 *
1791 * Each line is terminated with a newline (\n)
1792 * character only.
1793 *
1794 *@@added V0.9.1 (2000-01-22) [umoeller]
1795 */
1796
1797PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1798 ULONG ulSize, // in: size of buffer
1799 ULONG ulIndent) // in: indentation of every line
1800{
1801 PSZ pszReturn = 0;
1802 XSTRING strReturn;
1803 CHAR szTemp[1000];
1804
1805 PBYTE pbCurrent = pb; // current byte
1806 ULONG ulCount = 0,
1807 ulCharsInLine = 0; // if this grows > 7, a new line is started
1808 CHAR szLine[400] = "",
1809 szAscii[30] = " "; // ASCII representation; filled for every line
1810 PSZ pszLine = szLine,
1811 pszAscii = szAscii;
1812
1813 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1814
1815 for (pbCurrent = pb;
1816 ulCount < ulSize;
1817 pbCurrent++, ulCount++)
1818 {
1819 if (ulCharsInLine == 0)
1820 {
1821 memset(szLine, ' ', ulIndent);
1822 pszLine += ulIndent;
1823 }
1824 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1825
1826 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1827 // printable character:
1828 *pszAscii = *pbCurrent;
1829 else
1830 *pszAscii = '.';
1831 pszAscii++;
1832
1833 ulCharsInLine++;
1834 if ( (ulCharsInLine > 7) // 8 bytes added?
1835 || (ulCount == ulSize-1) // end of buffer reached?
1836 )
1837 {
1838 // if we haven't had eight bytes yet,
1839 // fill buffer up to eight bytes with spaces
1840 ULONG ul2;
1841 for (ul2 = ulCharsInLine;
1842 ul2 < 8;
1843 ul2++)
1844 pszLine += sprintf(pszLine, " ");
1845
1846 sprintf(szTemp, "%04lX: %s %s\n",
1847 (ulCount & 0xFFFFFFF8), // offset in hex
1848 szLine, // bytes string
1849 szAscii); // ASCII string
1850 xstrcat(&strReturn, szTemp);
1851
1852 // restart line buffer
1853 pszLine = szLine;
1854
1855 // clear ASCII buffer
1856 strcpy(szAscii, " ");
1857 pszAscii = szAscii;
1858
1859 // reset line counter
1860 ulCharsInLine = 0;
1861 }
1862 }
1863
1864 if (strReturn.cbAllocated)
1865 pszReturn = strReturn.psz;
1866
1867 return (pszReturn);
1868}
1869
1870/* ******************************************************************
1871 *
1872 * Wildcard matching
1873 *
1874 ********************************************************************/
1875
1876/*
1877 * The following code has been taken from "fnmatch.zip".
1878 *
1879 * (c) 1994-1996 by Eberhard Mattes.
1880 */
1881
1882/* In OS/2 and DOS styles, both / and \ separate components of a path.
1883 * This macro returns true iff C is a separator. */
1884
1885#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1886
1887
1888/* This macro returns true if C is at the end of a component of a
1889 * path. */
1890
1891#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1892
1893/*
1894 * skip_comp_os2:
1895 * Return a pointer to the next component of the path SRC, for OS/2
1896 * and DOS styles. When the end of the string is reached, a pointer
1897 * to the terminating null character is returned.
1898 *
1899 * (c) 1994-1996 by Eberhard Mattes.
1900 */
1901
1902static const unsigned char* skip_comp_os2(const unsigned char *src)
1903{
1904 /* Skip characters until hitting a separator or the end of the
1905 * string. */
1906
1907 while (!IS_OS2_COMP_END(*src))
1908 ++src;
1909
1910 /* Skip the separator if we hit a separator. */
1911
1912 if (*src != 0)
1913 ++src;
1914 return src;
1915}
1916
1917/*
1918 * has_colon:
1919 * returns true iff the path P contains a colon.
1920 *
1921 * (c) 1994-1996 by Eberhard Mattes.
1922 */
1923
1924static int has_colon(const unsigned char *p)
1925{
1926 while (*p != 0)
1927 if (*p == ':')
1928 return 1;
1929 else
1930 ++p;
1931 return 0;
1932}
1933
1934/*
1935 * match_comp_os2:
1936 * Compare a single component (directory name or file name) of the
1937 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1938 * component of the wildcard and the name to be checked, respectively.
1939 * Comparing stops at the next separator. The FLAGS argument is the
1940 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1941 * current component of NAME. The number of dots is not restricted,
1942 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1943 * Note that this function is recursive.
1944 *
1945 * (c) 1994-1996 by Eberhard Mattes.
1946 */
1947
1948static int match_comp_os2(const unsigned char *mask,
1949 const unsigned char *name,
1950 unsigned flags,
1951 int has_dot)
1952{
1953 int rc;
1954
1955 for (;;)
1956 switch (*mask)
1957 {
1958 case 0:
1959
1960 /* There must be no extra characters at the end of NAME when
1961 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1962 * in that case, NAME may point to a separator. */
1963
1964 if (*name == 0)
1965 return FNM_MATCH;
1966 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1967 return FNM_MATCH;
1968 return FNM_NOMATCH;
1969
1970 case '/':
1971 case '\\':
1972
1973 /* Separators match separators. */
1974
1975 if (IS_OS2_COMP_SEP(*name))
1976 return FNM_MATCH;
1977
1978 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1979 * is ignored at the end of NAME. */
1980
1981 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1982 return FNM_MATCH;
1983
1984 /* Stop comparing at the separator. */
1985
1986 return FNM_NOMATCH;
1987
1988 case '?':
1989
1990 /* A question mark matches one character. It does not match
1991 * a dot. At the end of the component (and before a dot),
1992 * it also matches zero characters. */
1993
1994 if (*name != '.' && !IS_OS2_COMP_END(*name))
1995 ++name;
1996 ++mask;
1997 break;
1998
1999 case '*':
2000
2001 /* An asterisk matches zero or more characters. In DOS
2002 * mode, dots are not matched. */
2003
2004 do
2005 {
2006 ++mask;
2007 }
2008 while (*mask == '*');
2009 for (;;)
2010 {
2011 rc = match_comp_os2(mask, name, flags, has_dot);
2012 if (rc != FNM_NOMATCH)
2013 return rc;
2014 if (IS_OS2_COMP_END(*name))
2015 return FNM_NOMATCH;
2016 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2017 return FNM_NOMATCH;
2018 ++name;
2019 }
2020
2021 case '.':
2022
2023 /* A dot matches a dot. It also matches the implicit dot at
2024 * the end of a dot-less NAME. */
2025
2026 ++mask;
2027 if (*name == '.')
2028 ++name;
2029 else if (has_dot || !IS_OS2_COMP_END(*name))
2030 return FNM_NOMATCH;
2031 break;
2032
2033 default:
2034
2035 /* All other characters match themselves. */
2036
2037 if (flags & _FNM_IGNORECASE)
2038 {
2039 if (tolower(*mask) != tolower(*name))
2040 return FNM_NOMATCH;
2041 }
2042 else
2043 {
2044 if (*mask != *name)
2045 return FNM_NOMATCH;
2046 }
2047 ++mask;
2048 ++name;
2049 break;
2050 }
2051}
2052
2053/*
2054 * match_comp:
2055 * compare a single component (directory name or file name) of the
2056 * paths, for all styles which need component-by-component matching.
2057 * MASK and NAME point to the start of a component of the wildcard and
2058 * the name to be checked, respectively. Comparing stops at the next
2059 * separator. The FLAGS argument is the same as that of fnmatch().
2060 * Return FNM_MATCH iff MASK and NAME match.
2061 *
2062 * (c) 1994-1996 by Eberhard Mattes.
2063 */
2064
2065static int match_comp(const unsigned char *mask,
2066 const unsigned char *name,
2067 unsigned flags)
2068{
2069 const unsigned char *s;
2070
2071 switch (flags & _FNM_STYLE_MASK)
2072 {
2073 case _FNM_OS2:
2074 case _FNM_DOS:
2075
2076 /* For OS/2 and DOS styles, we add an implicit dot at the end of
2077 * the component if the component doesn't include a dot. */
2078
2079 s = name;
2080 while (!IS_OS2_COMP_END(*s) && *s != '.')
2081 ++s;
2082 return match_comp_os2(mask, name, flags, *s == '.');
2083
2084 default:
2085 return FNM_ERR;
2086 }
2087}
2088
2089/* In Unix styles, / separates components of a path. This macro
2090 * returns true iff C is a separator. */
2091
2092#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2093
2094
2095/* This macro returns true if C is at the end of a component of a
2096 * path. */
2097
2098#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
2099
2100/*
2101 * match_unix:
2102 * match complete paths for Unix styles. The FLAGS argument is the
2103 * same as that of fnmatch(). COMP points to the start of the current
2104 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2105 * backslash character is used for escaping ? and * unless
2106 * FNM_NOESCAPE is set.
2107 *
2108 * (c) 1994-1996 by Eberhard Mattes.
2109 */
2110
2111static int match_unix(const unsigned char *mask,
2112 const unsigned char *name,
2113 unsigned flags,
2114 const unsigned char *comp)
2115{
2116 unsigned char c1, c2;
2117 char invert, matched;
2118 const unsigned char *start;
2119 int rc;
2120
2121 for (;;)
2122 switch (*mask)
2123 {
2124 case 0:
2125
2126 /* There must be no extra characters at the end of NAME when
2127 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
2128 * in that case, NAME may point to a separator. */
2129
2130 if (*name == 0)
2131 return FNM_MATCH;
2132 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2133 return FNM_MATCH;
2134 return FNM_NOMATCH;
2135
2136 case '?':
2137
2138 /* A question mark matches one character. It does not match
2139 * the component separator if FNM_PATHNAME is set. It does
2140 * not match a dot at the start of a component if FNM_PERIOD
2141 * is set. */
2142
2143 if (*name == 0)
2144 return FNM_NOMATCH;
2145 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2146 return FNM_NOMATCH;
2147 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2148 return FNM_NOMATCH;
2149 ++mask;
2150 ++name;
2151 break;
2152
2153 case '*':
2154
2155 /* An asterisk matches zero or more characters. It does not
2156 * match the component separator if FNM_PATHNAME is set. It
2157 * does not match a dot at the start of a component if
2158 * FNM_PERIOD is set. */
2159
2160 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2161 return FNM_NOMATCH;
2162 do
2163 {
2164 ++mask;
2165 }
2166 while (*mask == '*');
2167 for (;;)
2168 {
2169 rc = match_unix(mask, name, flags, comp);
2170 if (rc != FNM_NOMATCH)
2171 return rc;
2172 if (*name == 0)
2173 return FNM_NOMATCH;
2174 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2175 return FNM_NOMATCH;
2176 ++name;
2177 }
2178
2179 case '/':
2180
2181 /* Separators match only separators. If _FNM_PATHPREFIX is
2182 * set, a trailing separator in MASK is ignored at the end
2183 * of NAME. */
2184
2185 if (!(IS_UNIX_COMP_SEP(*name)
2186 || ((flags & _FNM_PATHPREFIX) && *name == 0
2187 && (mask[1] == 0
2188 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2189 && mask[2] == 0)))))
2190 return FNM_NOMATCH;
2191
2192 ++mask;
2193 if (*name != 0)
2194 ++name;
2195
2196 /* This is the beginning of a new component if FNM_PATHNAME
2197 * is set. */
2198
2199 if (flags & FNM_PATHNAME)
2200 comp = name;
2201 break;
2202
2203 case '[':
2204
2205 /* A set of characters. Always case-sensitive. */
2206
2207 if (*name == 0)
2208 return FNM_NOMATCH;
2209 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2210 return FNM_NOMATCH;
2211 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2212 return FNM_NOMATCH;
2213
2214 invert = 0;
2215 matched = 0;
2216 ++mask;
2217
2218 /* If the first character is a ! or ^, the set matches all
2219 * characters not listed in the set. */
2220
2221 if (*mask == '!' || *mask == '^')
2222 {
2223 ++mask;
2224 invert = 1;
2225 }
2226
2227 /* Loop over all the characters of the set. The loop ends
2228 * if the end of the string is reached or if a ] is
2229 * encountered unless it directly follows the initial [ or
2230 * [-. */
2231
2232 start = mask;
2233 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2234 {
2235 /* Get the next character which is optionally preceded
2236 * by a backslash. */
2237
2238 c1 = *mask++;
2239 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2240 {
2241 if (*mask == 0)
2242 break;
2243 c1 = *mask++;
2244 }
2245
2246 /* Ranges of characters are written as a-z. Don't
2247 * forget to check for the end of the string and to
2248 * handle the backslash. If the character after - is a
2249 * ], it isn't a range. */
2250
2251 if (*mask == '-' && mask[1] != ']')
2252 {
2253 ++mask; /* Skip the - character */
2254 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2255 ++mask;
2256 if (*mask == 0)
2257 break;
2258 c2 = *mask++;
2259 }
2260 else
2261 c2 = c1;
2262
2263 /* Now check whether this character or range matches NAME. */
2264
2265 if (c1 <= *name && *name <= c2)
2266 matched = 1;
2267 }
2268
2269 /* If the end of the string is reached before a ] is found,
2270 * back up to the [ and compare it to NAME. */
2271
2272 if (*mask == 0)
2273 {
2274 if (*name != '[')
2275 return FNM_NOMATCH;
2276 ++name;
2277 mask = start;
2278 if (invert)
2279 --mask;
2280 }
2281 else
2282 {
2283 if (invert)
2284 matched = !matched;
2285 if (!matched)
2286 return FNM_NOMATCH;
2287 ++mask; /* Skip the ] character */
2288 if (*name != 0)
2289 ++name;
2290 }
2291 break;
2292
2293 case '\\':
2294 ++mask;
2295 if (flags & FNM_NOESCAPE)
2296 {
2297 if (*name != '\\')
2298 return FNM_NOMATCH;
2299 ++name;
2300 }
2301 else if (*mask == '*' || *mask == '?')
2302 {
2303 if (*mask != *name)
2304 return FNM_NOMATCH;
2305 ++mask;
2306 ++name;
2307 }
2308 break;
2309
2310 default:
2311
2312 /* All other characters match themselves. */
2313
2314 if (flags & _FNM_IGNORECASE)
2315 {
2316 if (tolower(*mask) != tolower(*name))
2317 return FNM_NOMATCH;
2318 }
2319 else
2320 {
2321 if (*mask != *name)
2322 return FNM_NOMATCH;
2323 }
2324 ++mask;
2325 ++name;
2326 break;
2327 }
2328}
2329
2330/*
2331 * _fnmatch_unsigned:
2332 * Check whether the path name NAME matches the wildcard MASK.
2333 *
2334 * Return:
2335 * -- 0 (FNM_MATCH) if it matches,
2336 * -- _FNM_NOMATCH if it doesn't,
2337 * -- FNM_ERR on error.
2338 *
2339 * The operation of this function is controlled by FLAGS.
2340 * This is an internal function, with unsigned arguments.
2341 *
2342 * (c) 1994-1996 by Eberhard Mattes.
2343 */
2344
2345static int _fnmatch_unsigned(const unsigned char *mask,
2346 const unsigned char *name,
2347 unsigned flags)
2348{
2349 int m_drive, n_drive,
2350 rc;
2351
2352 /* Match and skip the drive name if present. */
2353
2354 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2355 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2356
2357 if (m_drive != n_drive)
2358 {
2359 if (m_drive == -1 || n_drive == -1)
2360 return FNM_NOMATCH;
2361 if (!(flags & _FNM_IGNORECASE))
2362 return FNM_NOMATCH;
2363 if (tolower(m_drive) != tolower(n_drive))
2364 return FNM_NOMATCH;
2365 }
2366
2367 if (m_drive != -1)
2368 mask += 2;
2369 if (n_drive != -1)
2370 name += 2;
2371
2372 /* Colons are not allowed in path names, except for the drive name,
2373 * which was skipped above. */
2374
2375 if (has_colon(mask) || has_colon(name))
2376 return FNM_ERR;
2377
2378 /* The name "\\server\path" should not be matched by mask
2379 * "\*\server\path". Ditto for /. */
2380
2381 switch (flags & _FNM_STYLE_MASK)
2382 {
2383 case _FNM_OS2:
2384 case _FNM_DOS:
2385
2386 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2387 {
2388 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2389 return FNM_NOMATCH;
2390 name += 2;
2391 mask += 2;
2392 }
2393 break;
2394
2395 case _FNM_POSIX:
2396
2397 if (name[0] == '/' && name[1] == '/')
2398 {
2399 int i;
2400
2401 name += 2;
2402 for (i = 0; i < 2; ++i)
2403 if (mask[0] == '/')
2404 ++mask;
2405 else if (mask[0] == '\\' && mask[1] == '/')
2406 mask += 2;
2407 else
2408 return FNM_NOMATCH;
2409 }
2410
2411 /* In Unix styles, treating ? and * w.r.t. components is simple.
2412 * No need to do matching component by component. */
2413
2414 return match_unix(mask, name, flags, name);
2415 }
2416
2417 /* Now compare all the components of the path name, one by one.
2418 * Note that the path separator must not be enclosed in brackets. */
2419
2420 while (*mask != 0 || *name != 0)
2421 {
2422
2423 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2424 * is reached even if there are components left in NAME. */
2425
2426 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2427 return FNM_MATCH;
2428
2429 /* Compare a single component of the path name. */
2430
2431 rc = match_comp(mask, name, flags);
2432 if (rc != FNM_MATCH)
2433 return rc;
2434
2435 /* Skip to the next component or to the end of the path name. */
2436
2437 mask = skip_comp_os2(mask);
2438 name = skip_comp_os2(name);
2439 }
2440
2441 /* If we reached the ends of both strings, the names match. */
2442
2443 if (*mask == 0 && *name == 0)
2444 return FNM_MATCH;
2445
2446 /* The names do not match. */
2447
2448 return FNM_NOMATCH;
2449}
2450
2451/*
2452 *@@ strhMatchOS2:
2453 * this matches wildcards, similar to what DosEditName does.
2454 * However, this does not require a file to be present, but
2455 * works on strings only.
2456 */
2457
2458BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2459 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2460{
2461 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2462 pcszName,
2463 _FNM_OS2 | _FNM_IGNORECASE)
2464 == FNM_MATCH)
2465 );
2466}
2467
2468/* ******************************************************************
2469 *
2470 * Fast string searches
2471 *
2472 ********************************************************************/
2473
2474#define ASSERT(a)
2475
2476/*
2477 * The following code has been taken from the "Standard
2478 * Function Library", file sflfind.c, and only slightly
2479 * modified to conform to the rest of this file.
2480 *
2481 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2482 * Revised: 98/05/04
2483 *
2484 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2485 *
2486 * The SFL Licence allows incorporating SFL code into other
2487 * programs, as long as the copyright is reprinted and the
2488 * code is marked as modified, so this is what we do.
2489 */
2490
2491/*
2492 *@@ strhmemfind:
2493 * searches for a pattern in a block of memory using the
2494 * Boyer-Moore-Horspool-Sunday algorithm.
2495 *
2496 * The block and pattern may contain any values; you must
2497 * explicitly provide their lengths. If you search for strings,
2498 * use strlen() on the buffers.
2499 *
2500 * Returns a pointer to the pattern if found within the block,
2501 * or NULL if the pattern was not found.
2502 *
2503 * This algorithm needs a "shift table" to cache data for the
2504 * search pattern. This table can be reused when performing
2505 * several searches with the same pattern.
2506 *
2507 * "shift" must point to an array big enough to hold 256 (8**2)
2508 * "size_t" values.
2509 *
2510 * If (*repeat_find == FALSE), the shift table is initialized.
2511 * So on the first search with a given pattern, *repeat_find
2512 * should be FALSE. This function sets it to TRUE after the
2513 * shift table is initialised, allowing the initialisation
2514 * phase to be skipped on subsequent searches.
2515 *
2516 * This function is most effective when repeated searches are
2517 * made for the same pattern in one or more large buffers.
2518 *
2519 * Example:
2520 *
2521 + PSZ pszHaystack = "This is a sample string.",
2522 + pszNeedle = "string";
2523 + size_t shift[256];
2524 + BOOL fRepeat = FALSE;
2525 +
2526 + PSZ pFound = strhmemfind(pszHaystack,
2527 + strlen(pszHaystack), // block size
2528 + pszNeedle,
2529 + strlen(pszNeedle), // pattern size
2530 + shift,
2531 + &fRepeat);
2532 *
2533 * Taken from the "Standard Function Library", file sflfind.c.
2534 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2535 * Slightly modified by umoeller.
2536 *
2537 *@@added V0.9.3 (2000-05-08) [umoeller]
2538 */
2539
2540void* strhmemfind(const void *in_block, // in: block containing data
2541 size_t block_size, // in: size of block in bytes
2542 const void *in_pattern, // in: pattern to search for
2543 size_t pattern_size, // in: size of pattern block
2544 size_t *shift, // in/out: shift table (search buffer)
2545 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2546{
2547 size_t byte_nbr, // Distance through block
2548 match_size; // Size of matched part
2549 const unsigned char
2550 *match_base = NULL, // Base of match of pattern
2551 *match_ptr = NULL, // Point within current match
2552 *limit = NULL; // Last potiental match point
2553 const unsigned char
2554 *block = (unsigned char *) in_block, // Concrete pointer to block data
2555 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2556
2557 if ( (block == NULL)
2558 || (pattern == NULL)
2559 || (shift == NULL)
2560 )
2561 return (NULL);
2562
2563 // Pattern must be smaller or equal in size to string
2564 if (block_size < pattern_size)
2565 return (NULL); // Otherwise it's not found
2566
2567 if (pattern_size == 0) // Empty patterns match at start
2568 return ((void *)block);
2569
2570 // Build the shift table unless we're continuing a previous search
2571
2572 // The shift table determines how far to shift before trying to match
2573 // again, if a match at this point fails. If the byte after where the
2574 // end of our pattern falls is not in our pattern, then we start to
2575 // match again after that byte; otherwise we line up the last occurence
2576 // of that byte in our pattern under that byte, and try match again.
2577
2578 if (!repeat_find || !*repeat_find)
2579 {
2580 for (byte_nbr = 0;
2581 byte_nbr < 256;
2582 byte_nbr++)
2583 shift[byte_nbr] = pattern_size + 1;
2584 for (byte_nbr = 0;
2585 byte_nbr < pattern_size;
2586 byte_nbr++)
2587 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2588
2589 if (repeat_find)
2590 *repeat_find = TRUE;
2591 }
2592
2593 // Search for the block, each time jumping up by the amount
2594 // computed in the shift table
2595
2596 limit = block + (block_size - pattern_size + 1);
2597 ASSERT (limit > block);
2598
2599 for (match_base = block;
2600 match_base < limit;
2601 match_base += shift[*(match_base + pattern_size)])
2602 {
2603 match_ptr = match_base;
2604 match_size = 0;
2605
2606 // Compare pattern until it all matches, or we find a difference
2607 while (*match_ptr++ == pattern[match_size++])
2608 {
2609 ASSERT (match_size <= pattern_size &&
2610 match_ptr == (match_base + match_size));
2611
2612 // If we found a match, return the start address
2613 if (match_size >= pattern_size)
2614 return ((void*)(match_base));
2615
2616 }
2617 }
2618 return (NULL); // Found nothing
2619}
2620
2621/*
2622 *@@ strhtxtfind:
2623 * searches for a case-insensitive text pattern in a string
2624 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2625 * pattern are null-terminated strings. Returns a pointer to the pattern
2626 * if found within the string, or NULL if the pattern was not found.
2627 * Will match strings irrespective of case. To match exact strings, use
2628 * strhfind(). Will not work on multibyte characters.
2629 *
2630 * Examples:
2631 + char *result;
2632 +
2633 + result = strhtxtfind ("AbracaDabra", "cad");
2634 + if (result)
2635 + puts (result);
2636 +
2637 * Taken from the "Standard Function Library", file sflfind.c.
2638 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2639 * Slightly modified.
2640 *
2641 *@@added V0.9.3 (2000-05-08) [umoeller]
2642 */
2643
2644char* strhtxtfind (const char *string, // String containing data
2645 const char *pattern) // Pattern to search for
2646{
2647 size_t
2648 shift [256]; // Shift distance for each value
2649 size_t
2650 string_size,
2651 pattern_size,
2652 byte_nbr, // Index into byte array
2653 match_size; // Size of matched part
2654 const char
2655 *match_base = NULL, // Base of match of pattern
2656 *match_ptr = NULL, // Point within current match
2657 *limit = NULL; // Last potiental match point
2658
2659 ASSERT (string); // Expect non-NULL pointers, but
2660 ASSERT (pattern); // fail gracefully if not debugging
2661 if (string == NULL || pattern == NULL)
2662 return (NULL);
2663
2664 string_size = strlen (string);
2665 pattern_size = strlen (pattern);
2666
2667 // Pattern must be smaller or equal in size to string
2668 if (string_size < pattern_size)
2669 return (NULL); // Otherwise it cannot be found
2670
2671 if (pattern_size == 0) // Empty string matches at start
2672 return (char *) string;
2673
2674 // Build the shift table
2675
2676 // The shift table determines how far to shift before trying to match
2677 // again, if a match at this point fails. If the byte after where the
2678 // end of our pattern falls is not in our pattern, then we start to
2679 // match again after that byte; otherwise we line up the last occurence
2680 // of that byte in our pattern under that byte, and try match again.
2681
2682 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2683 shift [byte_nbr] = pattern_size + 1;
2684
2685 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2686 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2687
2688 // Search for the string. If we don't find a match, move up by the
2689 // amount we computed in the shift table above, to find location of
2690 // the next potiental match.
2691
2692 limit = string + (string_size - pattern_size + 1);
2693 ASSERT (limit > string);
2694
2695 for (match_base = string;
2696 match_base < limit;
2697 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2698 {
2699 match_ptr = match_base;
2700 match_size = 0;
2701
2702 // Compare pattern until it all matches, or we find a difference
2703 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2704 {
2705 ASSERT (match_size <= pattern_size &&
2706 match_ptr == (match_base + match_size));
2707
2708 // If we found a match, return the start address
2709 if (match_size >= pattern_size)
2710 return ((char *)(match_base));
2711 }
2712 }
2713 return (NULL); // Found nothing
2714}
2715
Note: See TracBrowser for help on using the repository browser.