source: trunk/src/helpers/stringh.c@ 13

Last change on this file since 13 was 13, checked in by umoeller, 25 years ago

Updates for V0.9.6.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 84.8 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the XWorkplace source package.
28 * XWorkplace is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#include "helpers\stringh.h"
55#include "helpers\xstring.h" // extended string helpers
56
57#pragma hdrstop
58
59/*
60 *@@category: Helpers\C helpers\String management
61 */
62
63/*
64 *@@ strhdup:
65 * like strdup, but this one
66 * doesn't crash if pszSource is NULL,
67 * but returns NULL also.
68 *
69 *@@added V0.9.0 [umoeller]
70 */
71
72PSZ strhdup(const char *pszSource)
73{
74 if (pszSource)
75 return (strdup(pszSource));
76 else
77 return (0);
78}
79
80/*
81 *@@ strhistr:
82 * like strstr, but case-insensitive.
83 *
84 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
85 */
86
87PSZ strhistr(const char *string1, const char *string2)
88{
89 PSZ prc = NULL;
90
91 if ((string1) && (string2))
92 {
93 PSZ pszSrchIn = strdup(string1);
94 PSZ pszSrchFor = strdup(string2);
95
96 if ((pszSrchIn) && (pszSrchFor))
97 {
98 strupr(pszSrchIn);
99 strupr(pszSrchFor);
100
101 prc = strstr(pszSrchIn, pszSrchFor);
102 if (prc)
103 {
104 // prc now has the first occurence of the string,
105 // but in pszSrchIn; we need to map this
106 // return value to the original string
107 prc = (prc-pszSrchIn) // offset in pszSrchIn
108 + (PSZ)string1;
109 }
110 }
111 if (pszSrchFor)
112 free(pszSrchFor);
113 if (pszSrchIn)
114 free(pszSrchIn);
115 }
116 return (prc);
117}
118
119/*
120 *@@ strhncpy0:
121 * like strncpy, but always appends a 0 character.
122 */
123
124ULONG strhncpy0(PSZ pszTarget,
125 const char *pszSource,
126 ULONG cbSource)
127{
128 ULONG ul = 0;
129 PSZ pTarget = pszTarget,
130 pSource = (PSZ)pszSource;
131
132 for (ul = 0; ul < cbSource; ul++)
133 if (*pSource)
134 *pTarget++ = *pSource++;
135 else
136 break;
137 *pTarget = 0;
138
139 return (ul);
140}
141
142/*
143 * strhCount:
144 * this counts the occurences of c in pszSearch.
145 */
146
147ULONG strhCount(const char *pszSearch,
148 CHAR c)
149{
150 PSZ p = (PSZ)pszSearch;
151 ULONG ulCount = 0;
152 while (TRUE)
153 {
154 p = strchr(p, c);
155 if (p)
156 {
157 ulCount++;
158 p++;
159 }
160 else
161 break;
162 }
163 return (ulCount);
164}
165
166/*
167 *@@ strhIsDecimal:
168 * returns TRUE if psz consists of decimal digits only.
169 */
170
171BOOL strhIsDecimal(PSZ psz)
172{
173 PSZ p = psz;
174 while (*p != 0)
175 {
176 if (isdigit(*p) == 0)
177 return (FALSE);
178 p++;
179 }
180
181 return (TRUE);
182}
183
184/*
185 *@@ strhSubstr:
186 * this creates a new PSZ containing the string
187 * from pBegin to pEnd, excluding the pEnd character.
188 * The new string is null-terminated. The caller
189 * must free() the new string after use.
190 *
191 * Example:
192 + "1234567890"
193 + ^ ^
194 + p1 p2
195 + strhSubstr(p1, p2)
196 * would return a new string containing "2345678".
197 */
198
199PSZ strhSubstr(const char *pBegin, const char *pEnd)
200{
201 ULONG cbSubstr = (pEnd - pBegin);
202 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
203 strhncpy0(pszSubstr, pBegin, cbSubstr);
204 return (pszSubstr);
205}
206
207/*
208 *@@ strhExtract:
209 * searches pszBuf for the cOpen character and returns
210 * the data in between cOpen and cClose, excluding
211 * those two characters, in a newly allocated buffer
212 * which you must free() afterwards.
213 *
214 * Spaces and newlines/linefeeds are skipped.
215 *
216 * If the search was successful, the new buffer
217 * is returned and, if (ppEnd != NULL), *ppEnd points
218 * to the first character after the cClose character
219 * found in the buffer.
220 *
221 * If the search was not successful, NULL is
222 * returned, and *ppEnd is unchanged.
223 *
224 * If another cOpen character is found before
225 * cClose, matching cClose characters will be skipped.
226 * You can therefore nest the cOpen and cClose
227 * characters.
228 *
229 * This function ignores cOpen and cClose characters
230 * in C-style comments and strings surrounded by
231 * double quotes.
232 *
233 * Example:
234 + PSZ pszBuf = "KEYWORD { --blah-- } next",
235 + pEnd;
236 + strhExtract(pszBuf,
237 + '{', '}',
238 + &pEnd)
239 * would return a new buffer containing " --blah-- ",
240 * and ppEnd would afterwards point to the space
241 * before "next" in the static buffer.
242 *
243 *@@added V0.9.0 [umoeller]
244 */
245
246PSZ strhExtract(PSZ pszBuf, // in: search buffer
247 CHAR cOpen, // in: opening char
248 CHAR cClose, // in: closing char
249 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
250{
251 PSZ pszReturn = NULL;
252
253 if (pszBuf)
254 {
255 PSZ pOpen = strchr(pszBuf, cOpen);
256 if (pOpen)
257 {
258 // opening char found:
259 // now go thru the whole rest of the buffer
260 PSZ p = pOpen+1;
261 LONG lLevel = 1; // if this goes 0, we're done
262 while (*p)
263 {
264 if (*p == cOpen)
265 lLevel++;
266 else if (*p == cClose)
267 {
268 lLevel--;
269 if (lLevel <= 0)
270 {
271 // matching closing bracket found:
272 // extract string
273 pszReturn = strhSubstr(pOpen+1, // after cOpen
274 p); // excluding cClose
275 if (ppEnd)
276 *ppEnd = p+1;
277 break; // while (*p)
278 }
279 }
280 else if (*p == '\"')
281 {
282 // beginning of string:
283 PSZ p2 = p+1;
284 // find end of string
285 while ((*p2) && (*p2 != '\"'))
286 p2++;
287
288 if (*p2 == '\"')
289 // closing quote found:
290 // search on after that
291 p = p2; // raised below
292 else
293 break; // while (*p)
294 }
295
296 p++;
297 }
298 }
299 }
300
301 return (pszReturn);
302}
303
304/*
305 *@@ strhQuote:
306 * similar to strhExtract, except that
307 * opening and closing chars are the same,
308 * and therefore no nesting is possible.
309 * Useful for extracting stuff between
310 * quotes.
311 *
312 *@@added V0.9.0 [umoeller]
313 */
314
315PSZ strhQuote(PSZ pszBuf,
316 CHAR cQuote,
317 PSZ *ppEnd)
318{
319 PSZ pszReturn = NULL,
320 p1 = NULL;
321 if ((p1 = strchr(pszBuf, cQuote)))
322 {
323 PSZ p2 = strchr(p1+1, cQuote);
324 if (p2)
325 {
326 pszReturn = strhSubstr(p1+1, p2);
327 if (ppEnd)
328 // store closing char
329 *ppEnd = p2 + 1;
330 }
331 }
332
333 return (pszReturn);
334}
335
336/*
337 *@@ strhStrip:
338 * removes all double spaces.
339 * This copies within the "psz" buffer.
340 * If any double spaces are found, the
341 * string will be shorter than before,
342 * but the buffer is _not_ reallocated,
343 * so there will be unused bytes at the
344 * end.
345 *
346 * Returns the number of spaces removed.
347 *
348 *@@added V0.9.0 [umoeller]
349 */
350
351ULONG strhStrip(PSZ psz) // in/out: string
352{
353 PSZ p;
354 ULONG cb = strlen(psz),
355 ulrc = 0;
356
357 for (p = psz; p < psz+cb; p++)
358 {
359 if ((*p == ' ') && (*(p+1) == ' '))
360 {
361 PSZ p2 = p;
362 while (*p2)
363 {
364 *p2 = *(p2+1);
365 p2++;
366 }
367 cb--;
368 p--;
369 ulrc++;
370 }
371 }
372 return (ulrc);
373}
374
375/*
376 *@@ strhins:
377 * this inserts one string into another.
378 *
379 * pszInsert is inserted into pszBuffer at offset
380 * ulInsertOfs (which counts from 0).
381 *
382 * A newly allocated string is returned. pszBuffer is
383 * not changed. The new string should be free()'d after
384 * use.
385 *
386 * Upon errors, NULL is returned.
387 *
388 *@@changed V0.9.0 [umoeller]: completely rewritten.
389 */
390
391PSZ strhins(const char *pcszBuffer,
392 ULONG ulInsertOfs,
393 const char *pcszInsert)
394{
395 PSZ pszNew = NULL;
396
397 if ((pcszBuffer) && (pcszInsert))
398 {
399 do {
400 ULONG cbBuffer = strlen(pcszBuffer);
401 ULONG cbInsert = strlen(pcszInsert);
402
403 // check string length
404 if (ulInsertOfs > cbBuffer + 1)
405 break; // do
406
407 // OK, let's go.
408 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
409
410 // copy stuff before pInsertPos
411 memcpy(pszNew,
412 pcszBuffer,
413 ulInsertOfs);
414 // copy string to be inserted
415 memcpy(pszNew + ulInsertOfs,
416 pcszInsert,
417 cbInsert);
418 // copy stuff after pInsertPos
419 strcpy(pszNew + ulInsertOfs + cbInsert,
420 pcszBuffer + ulInsertOfs);
421 } while (FALSE);
422 }
423
424 return (pszNew);
425}
426
427/*
428 *@@ strhrpl:
429 * wrapper around xstrrpl to work with C strings.
430 * Note that *ppszBuf can get reallocated and must
431 * be free()'able.
432 *
433 * Repetitive use of this wrapper is not recommended
434 * because it is considerably slower than xstrrpl.
435 *
436 *@@added V0.9.6 (2000-11-01) [umoeller]
437 */
438
439ULONG strhrpl(PSZ *ppszBuf, // in/out: string
440 PULONG pulOfs, // in: where to begin search (0 = start);
441 // out: ofs of first char after replacement string
442 const char *pcszSearch, // in: search string; cannot be NULL
443 const char *pcszReplace) // in: replacement string; cannot be NULL
444{
445 ULONG ulrc = 0;
446 XSTRING xstrBuf,
447 xstrFind,
448 xstrReplace;
449 size_t ShiftTable[256];
450 BOOL fRepeat = FALSE;
451 xstrInit(&xstrBuf, 0);
452 xstrset(&xstrBuf, *ppszBuf);
453 xstrInit(&xstrFind, 0);
454 xstrset(&xstrFind, (PSZ)pcszSearch);
455 xstrInit(&xstrReplace, 0);
456 xstrset(&xstrReplace, (PSZ)pcszReplace);
457
458 if ((ulrc = xstrrpl(&xstrBuf,
459 pulOfs,
460 &xstrFind,
461 &xstrReplace,
462 ShiftTable,
463 &fRepeat)))
464 // replaced:
465 *ppszBuf = xstrBuf.psz;
466
467 return (ulrc);
468}
469
470/*
471 * strhWords:
472 * returns the no. of words in "psz".
473 * A string is considered a "word" if
474 * it is surrounded by spaces only.
475 *
476 *@@added V0.9.0 [umoeller]
477 */
478
479ULONG strhWords(PSZ psz)
480{
481 PSZ p;
482 ULONG cb = strlen(psz),
483 ulWords = 0;
484 if (cb > 1)
485 {
486 ulWords = 1;
487 for (p = psz; p < psz+cb; p++)
488 if (*p == ' ')
489 ulWords++;
490 }
491 return (ulWords);
492}
493
494/*
495 *@@ strhThousandsULong:
496 * converts a ULONG into a decimal string, while
497 * inserting thousands separators into it. Specify
498 * the separator character in cThousands.
499 *
500 * Returns pszTarget so you can use it directly
501 * with sprintf and the "%s" flag.
502 *
503 * For cThousands, you should use the data in
504 * OS2.INI ("PM_National" application), which is
505 * always set according to the "Country" object.
506 * You can use prfhQueryCountrySettings to
507 * retrieve this setting.
508 *
509 * Use strhThousandsDouble for "double" values.
510 */
511
512PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
513 ULONG ul, // in: decimal to convert
514 CHAR cThousands) // in: separator char (e.g. '.')
515{
516 USHORT ust, uss, usc;
517 CHAR szTemp[40];
518 sprintf(szTemp, "%lu", ul);
519
520 ust = 0;
521 usc = strlen(szTemp);
522 for (uss = 0; uss < usc; uss++)
523 {
524 if (uss)
525 if (((usc - uss) % 3) == 0)
526 {
527 pszTarget[ust] = cThousands;
528 ust++;
529 }
530 pszTarget[ust] = szTemp[uss];
531 ust++;
532 }
533 pszTarget[ust] = '\0';
534
535 return (pszTarget);
536}
537
538/*
539 *@@ strhThousandsDouble:
540 * like strhThousandsULong, but for a "double"
541 * value. Note that after-comma values are truncated.
542 */
543
544PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
545{
546 USHORT ust, uss, usc;
547 CHAR szTemp[40];
548 sprintf(szTemp, "%.0f", floor(dbl));
549
550 ust = 0;
551 usc = strlen(szTemp);
552 for (uss = 0; uss < usc; uss++)
553 {
554 if (uss)
555 if (((usc - uss) % 3) == 0)
556 {
557 pszTarget[ust] = cThousands;
558 ust++;
559 }
560 pszTarget[ust] = szTemp[uss];
561 ust++;
562 }
563 pszTarget[ust] = '\0';
564
565 return (pszTarget);
566}
567
568/*
569 *@@ strhVariableDouble:
570 * like strhThousandsULong, but for a "double" value, and
571 * with a variable number of decimal places depending on the
572 * size of the quantity.
573 *
574 *@@added V0.9.6 (2000-11-12) [pr]
575 */
576
577PSZ strhVariableDouble(PSZ pszTarget,
578 double dbl,
579 PSZ pszUnits,
580 CHAR cThousands)
581{
582 if (dbl < 100.0)
583 sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
584 else
585 if (dbl < 1000.0)
586 sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
587 else
588 strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
589 pszUnits);
590
591 return(pszTarget);
592}
593
594/*
595 *@@ strhFileDate:
596 * converts file date data to a string (to pszBuf).
597 * You can pass any FDATE structure to this function,
598 * which are returned in those FILEFINDBUF* or
599 * FILESTATUS* structs by the Dos* functions.
600 *
601 * ulDateFormat is the PM setting for the date format,
602 * as set in the "Country" object, and can be queried using
603 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
604 *
605 * meaning:
606 * -- 0 mm.dd.yyyy (English)
607 * -- 1 dd.mm.yyyy (e.g. German)
608 * -- 2 yyyy.mm.dd (Japanese, ISO)
609 * -- 3 yyyy.dd.mm
610 *
611 * cDateSep is used as a date separator (e.g. '.').
612 * This can be queried using:
613 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
614 *
615 * Alternatively, you can query all the country settings
616 * at once using prfhQueryCountrySettings (prfh.c).
617 *
618 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
619 */
620
621VOID strhFileDate(PSZ pszBuf, // out: string returned
622 FDATE *pfDate, // in: date information
623 ULONG ulDateFormat, // in: date format (0-3)
624 CHAR cDateSep) // in: date separator (e.g. '.')
625{
626 DATETIME dt;
627 dt.day = pfDate->day;
628 dt.month = pfDate->month;
629 dt.year = pfDate->year + 1980;
630
631 strhDateTime(pszBuf,
632 NULL, // no time
633 &dt,
634 ulDateFormat,
635 cDateSep,
636 0, 0); // no time
637}
638
639/*
640 *@@ strhFileTime:
641 * converts file time data to a string (to pszBuf).
642 * You can pass any FTIME structure to this function,
643 * which are returned in those FILEFINDBUF* or
644 * FILESTATUS* structs by the Dos* functions.
645 *
646 * ulTimeFormat is the PM setting for the time format,
647 * as set in the "Country" object, and can be queried using
648 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
649 * meaning:
650 * -- 0 12-hour clock
651 * -- >0 24-hour clock
652 *
653 * cDateSep is used as a time separator (e.g. ':').
654 * This can be queried using:
655 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
656 *
657 * Alternatively, you can query all the country settings
658 * at once using prfhQueryCountrySettings (prfh.c).
659 *
660 *@@changed 99-03-15 fixed 12-hour crash
661 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
662 */
663
664VOID strhFileTime(PSZ pszBuf, // out: string returned
665 FTIME *pfTime, // in: time information
666 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
667 CHAR cTimeSep) // in: time separator (e.g. ':')
668{
669 DATETIME dt;
670 dt.hours = pfTime->hours;
671 dt.minutes = pfTime->minutes;
672 dt.seconds = pfTime->twosecs * 2;
673
674 strhDateTime(NULL, // no date
675 pszBuf,
676 &dt,
677 0, 0, // no date
678 ulTimeFormat,
679 cTimeSep);
680}
681
682/*
683 *@@ strhDateTime:
684 * converts Control Program DATETIME info
685 * into two strings. See strhFileDate and strhFileTime
686 * for more detailed parameter descriptions.
687 *
688 *@@added V0.9.0 (99-11-07) [umoeller]
689 */
690
691VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
692 PSZ pszTime, // out: time string returned (can be NULL)
693 DATETIME *pDateTime, // in: date/time information
694 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
695 CHAR cDateSep, // in: date separator (e.g. '.')
696 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
697 CHAR cTimeSep) // in: time separator (e.g. ':')
698{
699 if (pszDate)
700 {
701 switch (ulDateFormat)
702 {
703 case 0: // mm.dd.yyyy (English)
704 sprintf(pszDate, "%02d%c%02d%c%04d",
705 pDateTime->month,
706 cDateSep,
707 pDateTime->day,
708 cDateSep,
709 pDateTime->year);
710 break;
711
712 case 1: // dd.mm.yyyy (e.g. German)
713 sprintf(pszDate, "%02d%c%02d%c%04d",
714 pDateTime->day,
715 cDateSep,
716 pDateTime->month,
717 cDateSep,
718 pDateTime->year);
719 break;
720
721 case 2: // yyyy.mm.dd (Japanese)
722 sprintf(pszDate, "%04d%c%02d%c%02d",
723 pDateTime->year,
724 cDateSep,
725 pDateTime->month,
726 cDateSep,
727 pDateTime->day);
728 break;
729
730 default: // yyyy.dd.mm
731 sprintf(pszDate, "%04d%c%02d%c%02d",
732 pDateTime->year,
733 cDateSep,
734 pDateTime->day,
735 cDateSep,
736 pDateTime->month);
737 break;
738 }
739 }
740
741 if (pszTime)
742 {
743 if (ulTimeFormat == 0)
744 {
745 // for 12-hour clock, we need additional INI data
746 CHAR szAMPM[10] = "err";
747
748 if (pDateTime->hours > 12)
749 {
750 // > 12h: PM.
751
752 // Note: 12:xx noon is 12 AM, not PM (even though
753 // AM stands for "ante meridiam", but English is just
754 // not logical), so that's handled below.
755
756 PrfQueryProfileString(HINI_USER,
757 "PM_National",
758 "s2359", // key
759 "PM", // default
760 szAMPM, sizeof(szAMPM)-1);
761 sprintf(pszTime, "%02d%c%02d%c%02d %s",
762 // leave 12 == 12 (not 0)
763 pDateTime->hours % 12,
764 cTimeSep,
765 pDateTime->minutes,
766 cTimeSep,
767 pDateTime->seconds,
768 szAMPM);
769 }
770 else
771 {
772 // <= 12h: AM
773 PrfQueryProfileString(HINI_USER,
774 "PM_National",
775 "s1159", // key
776 "AM", // default
777 szAMPM, sizeof(szAMPM)-1);
778 sprintf(pszTime, "%02d%c%02d%c%02d %s",
779 pDateTime->hours,
780 cTimeSep,
781 pDateTime->minutes,
782 cTimeSep,
783 pDateTime->seconds,
784 szAMPM);
785 }
786 }
787 else
788 // 24-hour clock
789 sprintf(pszTime, "%02d%c%02d%c%02d",
790 pDateTime->hours,
791 cTimeSep,
792 pDateTime->minutes,
793 cTimeSep,
794 pDateTime->seconds);
795 }
796}
797
798/*
799 *@@ strhGetWord:
800 * finds word boundaries.
801 *
802 * *ppszStart is used as the beginning of the
803 * search.
804 *
805 * If a word is found, *ppszStart is set to
806 * the first character of the word which was
807 * found and *ppszEnd receives the address
808 * of the first character _after_ the word,
809 * which is probably a space or a \n or \r char.
810 * We then return TRUE.
811 *
812 * The search is stopped if a null character
813 * is found or pLimit is reached. In that case,
814 * FALSE is returned.
815 *
816 *@@added V0.9.1 (2000-02-13) [umoeller]
817 */
818
819BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
820 // out: start of word (if TRUE is returned)
821 const char *pLimit, // in: ptr to last char after *ppszStart to be
822 // searched; if the word does not end before
823 // or with this char, FALSE is returned
824 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
825 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
826 PSZ *ppszEnd) // out: first char _after_ word
827 // (if TRUE is returned)
828{
829 // characters after which a word can be started
830 // const char *pcszBeginChars = "\x0d\x0a ";
831 // const char *pcszEndChars = "\x0d\x0a /-";
832
833 PSZ pStart = *ppszStart;
834
835 // find start of word
836 while ( (pStart < (PSZ)pLimit)
837 && (strchr(pcszBeginChars, *pStart))
838 )
839 // if char is a "before word" char: go for next
840 pStart++;
841
842 if (pStart < (PSZ)pLimit)
843 {
844 // found a valid "word start" character
845 // (which is not in pcszBeginChars):
846
847 // find end of word
848 PSZ pEndOfWord = pStart;
849 while ( (pEndOfWord <= (PSZ)pLimit)
850 && (strchr(pcszEndChars, *pEndOfWord) == 0)
851 )
852 // if char is not an "end word" char: go for next
853 pEndOfWord++;
854
855 if (pEndOfWord <= (PSZ)pLimit)
856 {
857 // whoa, got a word:
858 *ppszStart = pStart;
859 *ppszEnd = pEndOfWord;
860 return (TRUE);
861 }
862 }
863
864 return (FALSE);
865}
866
867/*
868 *@@ strhIsWord:
869 * returns TRUE if p points to a "word"
870 * in pcszBuf.
871 *
872 * p is considered a word if the character _before_
873 * it is in pcszBeginChars and the char _after_
874 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
875 *
876 *@@added V0.9.6 (2000-11-12) [umoeller]
877 */
878
879BOOL strhIsWord(const char *pcszBuf,
880 const char *p, // in: start of word
881 ULONG cbSearch, // in: length of word
882 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
883 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
884{
885 BOOL fEndOK = FALSE;
886
887 // check previous char
888 if ( (p == pcszBuf)
889 || (strchr(pcszBeginChars, *(p-1)))
890 )
891 {
892 // OK, valid begin char:
893 // check end char
894 CHAR cNextChar = *(p + cbSearch);
895 if (cNextChar == 0)
896 fEndOK = TRUE;
897 else
898 {
899 char *pc = strchr(pcszEndChars, cNextChar);
900 if (pc)
901 // OK, is end char: avoid doubles of that char,
902 // but allow spaces
903 if ( (cNextChar+1 != *pc)
904 || (cNextChar+1 == ' ')
905 || (cNextChar+1 == 0)
906 )
907 fEndOK = TRUE;
908 }
909 }
910
911 return (fEndOK);
912}
913
914/*
915 *@@ strhFindWord:
916 * searches for pszSearch in pszBuf, which is
917 * returned if found (or NULL if not).
918 *
919 * As opposed to strstr, this finds pszSearch
920 * only if it is a "word". A search string is
921 * considered a word if the character _before_
922 * it is in pcszBeginChars and the char _after_
923 * it is in pcszEndChars.
924 *
925 * Example:
926 + strhFindWord("This is an example.", "is");
927 + returns ...........^ this, but not the "is" in "This".
928 *
929 * The algorithm here uses strstr to find pszSearch in pszBuf
930 * and performs additional "is-word" checks for each item found
931 * (by calling strhIsWord).
932 *
933 * Note that this function is fairly slow compared to xstrFindWord.
934 *
935 *@@added V0.9.0 (99-11-08) [umoeller]
936 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
937 */
938
939PSZ strhFindWord(const char *pszBuf,
940 const char *pszSearch,
941 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
942 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
943{
944 PSZ pszReturn = 0;
945 ULONG cbBuf = strlen(pszBuf),
946 cbSearch = strlen(pszSearch);
947
948 if ((cbBuf) && (cbSearch))
949 {
950 const char *p = pszBuf;
951
952 do // while p
953 {
954 p = strstr(p, pszSearch);
955 if (p)
956 {
957 // string found:
958 // check if that's a word
959
960 if (strhIsWord(pszBuf,
961 p,
962 cbSearch,
963 pcszBeginChars,
964 pcszEndChars))
965 {
966 // valid end char:
967 pszReturn = (PSZ)p;
968 break;
969 }
970
971 p += cbSearch;
972 }
973 } while (p);
974
975 }
976 return (pszReturn);
977}
978
979/*
980 *@@ strhFindEOL:
981 * returns a pointer to the next \r, \n or null character
982 * following pszSearchIn. Stores the offset in *pulOffset.
983 *
984 * This should never return NULL because at some point,
985 * there will be a null byte in your string.
986 *
987 *@@added V0.9.4 (2000-07-01) [umoeller]
988 */
989
990PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
991 PULONG pulOffset) // out: offset (ptr can be NULL)
992{
993 PSZ p = pszSearchIn,
994 prc = NULL;
995 while (TRUE)
996 {
997 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
998 {
999 prc = p;
1000 break;
1001 }
1002 p++;
1003 }
1004
1005 if (pulOffset)
1006 *pulOffset = prc - pszSearchIn;
1007 return (prc);
1008}
1009
1010/*
1011 *@@ strhFindNextLine:
1012 * like strhFindEOL, but this returns the character
1013 * _after_ \r or \n. Note that this might return
1014 * a pointer to terminating NULL character also.
1015 */
1016
1017PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1018{
1019 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1020 // pEOL now points to the \r char or the terminating 0 byte;
1021 // if not null byte, advance pointer
1022 PSZ pNextLine = pEOL;
1023 if (*pNextLine == '\r')
1024 pNextLine++;
1025 if (*pNextLine == '\n')
1026 pNextLine++;
1027 if (pulOffset)
1028 *pulOffset = pNextLine - pszSearchIn;
1029 return (pNextLine);
1030}
1031
1032/*
1033 *@@ strhFindKey:
1034 * finds pszKey in pszSearchIn; similar to strhistr,
1035 * but this one makes sure the key is at the beginning
1036 * of a line. Spaces before the key are tolerated.
1037 * Returns NULL if the key was not found.
1038 *
1039 * Used by strhGetParameter/strhSetParameter; useful
1040 * for analyzing CONFIG.SYS settings.
1041 *
1042 *@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1043 *@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1044 */
1045
1046PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1047 const char *pcszKey, // in: key to search for
1048 PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1049 // can be NULL if not needed
1050{
1051 const char *p = NULL;
1052 PSZ pReturn = NULL;
1053 // BOOL fFound = FALSE;
1054
1055 p = pcszSearchIn;
1056 do {
1057 p = strhistr(p, pcszKey);
1058
1059 if ((p) && (p >= pcszSearchIn))
1060 {
1061 // make sure the key is at the beginning of a line
1062 // by going backwards until we find a char != " "
1063 const char *p2 = p;
1064 while ( (*p2 == ' ')
1065 && (p2 > pcszSearchIn)
1066 )
1067 p2--;
1068
1069 // if previous char is an EOL sign, go on
1070 if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1071 || (*(p2-1) == '\r')
1072 || (*(p2-1) == '\n')
1073 )
1074 {
1075 // now check whether the char after the search
1076 // is a "=" char
1077 // ULONG cbKey = strlen(pszKey);
1078
1079 // tolerate spaces before "="
1080 /* PSZ p3 = p;
1081 while (*(p3+cbKey) == ' ')
1082 p3++;
1083
1084 if (*(p3+cbKey) == '=') */
1085 {
1086 // found:
1087 pReturn = (PSZ)p; // go on, p contains found key
1088
1089 // test for all upper case?
1090 if (pfIsAllUpperCase)
1091 {
1092 ULONG cbKey2 = strlen(pcszKey),
1093 ul = 0;
1094 *pfIsAllUpperCase = TRUE;
1095 for (ul = 0; ul < cbKey2; ul++)
1096 if (islower(*(p+ul)))
1097 {
1098 *pfIsAllUpperCase = FALSE;
1099 break; // for
1100 }
1101 }
1102
1103 break; // do
1104 }
1105 } // else search next key
1106
1107 p++; // search on after this key
1108 }
1109 } while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1110
1111 return (pReturn);
1112}
1113
1114/*
1115 *@@ strhGetParameter:
1116 * searches pszSearchIn for the key pszKey; if found, it
1117 * returns a pointer to the following characters in pszSearchIn
1118 * and, if pszCopyTo != NULL, copies the rest of the line to
1119 * that buffer, of which cbCopyTo specified the size.
1120 * If the key is not found, NULL is returned.
1121 * String search is done by calling strhFindKey.
1122 * This is useful for querying CONFIG.SYS settings.
1123 *
1124 * <B>Example:</B> this would return "YES" if you searched
1125 * for "PAUSEONERROR=", and "PAUSEONERROR=YES" existed in pszSearchIn.
1126 */
1127
1128PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1129 const char *pcszKey, // in: key to search for
1130 PSZ pszCopyTo, // out: key value
1131 ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1132{
1133 PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1134 prc = NULL;
1135 if (p)
1136 {
1137 prc = p + strlen(pcszKey);
1138 if (pszCopyTo)
1139 // copy to pszCopyTo
1140 {
1141 ULONG cb;
1142 PSZ pEOL = strhFindEOL(prc, &cb);
1143 if (pEOL)
1144 {
1145 if (cb > cbCopyTo)
1146 cb = cbCopyTo-1;
1147 strhncpy0(pszCopyTo, prc, cb);
1148 }
1149 }
1150 }
1151
1152 return (prc);
1153}
1154
1155/*
1156 *@@ strhSetParameter:
1157 * searches *ppszBuf for the key pszKey; if found, it
1158 * replaces the characters following this key up to the
1159 * end of the line with pszParam. If pszKey is not found in
1160 * *ppszBuf, it is appended to the file in a new line.
1161 *
1162 * If any changes are made, *ppszBuf is re-allocated.
1163 *
1164 * This function searches w/out case sensitivity.
1165 *
1166 * Returns a pointer to the new parameter inside the buffer.
1167 *
1168 *@@changed V0.9.0 [umoeller]: changed function prototype to PSZ* ppszSearchIn
1169 */
1170
1171PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1172 const char *pcszKey, // in: key to search for
1173 PSZ pszNewParam, // in: new parameter to set for key
1174 BOOL fRespectCase) // in: if TRUE, pszNewParam will
1175 // be converted to upper case if the found key is
1176 // in upper case also. pszNewParam should be in
1177 // lower case if you use this.
1178{
1179 BOOL fIsAllUpperCase = FALSE;
1180 PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1181 prc = NULL;
1182
1183 if (pKey)
1184 {
1185 // key found in file:
1186 // replace existing parameter
1187 PSZ pOldParam = pKey + strlen(pcszKey);
1188
1189 prc = pOldParam;
1190 // pOldParam now has the old parameter, which we
1191 // will overwrite now
1192
1193 if (pOldParam)
1194 {
1195 ULONG cbOldParam;
1196 PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1197 // pEOL now has first end-of-line after the parameter
1198
1199 if (pEOL)
1200 {
1201 XSTRING strBuf;
1202 ULONG ulOfs = 0;
1203
1204 PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1205 strncpy(pszOldCopy, pOldParam, cbOldParam);
1206 pszOldCopy[cbOldParam] = '\0';
1207
1208 xstrInit(&strBuf, 0);
1209 xstrset(&strBuf, *ppszBuf); // this must not be freed!
1210 /* xstrInit(&strFind, 0);
1211 xstrset(&strFind, pszOldCopy); // this must not be freed!
1212 xstrInit(&strReplace, 0);
1213 xstrset(&strReplace, pszNewParam); // this must not be freed!
1214 */
1215
1216 // check for upper case desired?
1217 if (fRespectCase)
1218 if (fIsAllUpperCase)
1219 strupr(pszNewParam);
1220
1221 xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1222
1223 free(pszOldCopy);
1224
1225 *ppszBuf = strBuf.psz;
1226 }
1227 }
1228 }
1229 else
1230 {
1231 PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1232 + strlen(pcszKey)
1233 + strlen(pszNewParam)
1234 + 5); // 2 * \r\n + null byte
1235 // key not found: append to end of file
1236 sprintf(pszNew, "%s\r\n%s%s\r\n",
1237 *ppszBuf, pcszKey, pszNewParam);
1238 free(*ppszBuf);
1239 *ppszBuf = pszNew;
1240 }
1241
1242 return (prc);
1243}
1244
1245/*
1246 *@@ strhDeleteLine:
1247 * this deletes the line in pszSearchIn which starts with
1248 * the key pszKey. Returns TRUE if the line was found and
1249 * deleted.
1250 *
1251 * This copies within pszSearchIn.
1252 */
1253
1254BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1255 PSZ pszKey) // in: key to find
1256{
1257 BOOL fIsAllUpperCase = FALSE;
1258 PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1259 BOOL brc = FALSE;
1260
1261 if (pKey) {
1262 PSZ pEOL = strhFindEOL(pKey, NULL);
1263 // pEOL now has first end-of-line after the key
1264 if (pEOL)
1265 {
1266 // delete line by overwriting it with
1267 // the next line
1268 strcpy(pKey, pEOL+2);
1269 }
1270 else
1271 {
1272 // EOL not found: we must be at the end of the file
1273 *pKey = '\0';
1274 }
1275 brc = TRUE;
1276 }
1277
1278 return (brc);
1279}
1280
1281/*
1282 *@@ strhBeautifyTitle:
1283 * replaces all line breaks (0xd, 0xa) with spaces.
1284 */
1285
1286BOOL strhBeautifyTitle(PSZ psz)
1287{
1288 BOOL rc = FALSE;
1289 CHAR *p;
1290 while ((p = strchr(psz, 0xa)))
1291 {
1292 *p = ' ';
1293 rc = TRUE;
1294 }
1295 while ((p = strchr(psz, 0xd)))
1296 {
1297 *p = ' ';
1298 rc = TRUE;
1299 }
1300 return (rc);
1301}
1302
1303/*
1304 * strhFindAttribValue:
1305 * searches for pszAttrib in pszSearchIn; if found,
1306 * returns the first character after the "=" char.
1307 * If "=" is not found, a space, \r, and \n are
1308 * also accepted. This function searches without
1309 * respecting case.
1310 *
1311 * <B>Example:</B>
1312 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1313 +
1314 + returns ....................... ^ this address.
1315 *
1316 *@@added V0.9.0 [umoeller]
1317 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1318 */
1319
1320PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1321{
1322 PSZ prc = 0;
1323 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1324 p,
1325 p2;
1326 ULONG cbAttrib = strlen(pszAttrib);
1327
1328 // 1) find space char
1329 while ((p = strchr(pszSearchIn2, ' ')))
1330 {
1331 CHAR c;
1332 p++;
1333 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1334 // now check whether the p+strlen(pszAttrib)
1335 // is a valid end-of-tag character
1336 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1337 && ( (c == ' ')
1338 || (c == '>')
1339 || (c == '=')
1340 || (c == '\r')
1341 || (c == '\n')
1342 || (c == 0)
1343 )
1344 )
1345 {
1346 // yes:
1347 CHAR c2;
1348 p2 = p + cbAttrib;
1349 c2 = *p2;
1350 while ( ( (c2 == ' ')
1351 || (c2 == '=')
1352 || (c2 == '\n')
1353 || (c2 == '\r')
1354 )
1355 && (c2 != 0)
1356 )
1357 c2 = *++p2;
1358 prc = p2;
1359 break; // first while
1360 }
1361 pszSearchIn2++;
1362 }
1363 return (prc);
1364}
1365
1366/*
1367 * strhGetNumAttribValue:
1368 * stores the numerical parameter value of an HTML-style
1369 * tag in *pl.
1370 *
1371 * Returns the address of the tag parameter in the
1372 * search buffer, if found, or NULL.
1373 *
1374 * <B>Example:</B>
1375 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1376 *
1377 * stores 123 in the "l" variable.
1378 *
1379 *@@added V0.9.0 [umoeller]
1380 */
1381
1382PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1383 const char *pszTag, // e.g. "INDEX"
1384 PLONG pl) // out: numerical value
1385{
1386 PSZ pParam;
1387 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1388 sscanf(pParam, "%ld", pl);
1389
1390 return (pParam);
1391}
1392
1393/*
1394 * strhGetTextAttr:
1395 * retrieves the attribute value of a textual HTML-style tag
1396 * in a newly allocated buffer, which is returned,
1397 * or NULL if attribute not found.
1398 * If an attribute value is to contain spaces, it
1399 * must be enclosed in quotes.
1400 *
1401 * The offset of the attribute data in pszSearchIn is
1402 * returned in *pulOffset so that you can do multiple
1403 * searches.
1404 *
1405 * This returns a new buffer, which should be free()'d after use.
1406 *
1407 * <B>Example:</B>
1408 + ULONG ulOfs = 0;
1409 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1410 + ............^ ulOfs
1411 *
1412 * returns a new string with the value "blublub" (without
1413 * quotes) and sets ulOfs to 12.
1414 *
1415 *@@added V0.9.0 [umoeller]
1416 */
1417
1418PSZ strhGetTextAttr(const char *pszSearchIn,
1419 const char *pszTag,
1420 PULONG pulOffset) // out: offset where found
1421{
1422 PSZ pParam,
1423 pParam2,
1424 prc = NULL;
1425 ULONG ulCount = 0;
1426 LONG lNestingLevel = 0;
1427
1428 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1429 {
1430 // determine end character to search for: a space
1431 CHAR cEnd = ' ';
1432 if (*pParam == '\"')
1433 {
1434 // or, if the data is enclosed in quotes, a quote
1435 cEnd = '\"';
1436 pParam++;
1437 }
1438
1439 if (pulOffset)
1440 // store the offset
1441 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1442
1443 // now find end of attribute
1444 pParam2 = pParam;
1445 while (*pParam)
1446 {
1447 if (*pParam == cEnd)
1448 // end character found
1449 break;
1450 else if (*pParam == '<')
1451 // yet another opening tag found:
1452 // this is probably some "<" in the attributes
1453 lNestingLevel++;
1454 else if (*pParam == '>')
1455 {
1456 lNestingLevel--;
1457 if (lNestingLevel < 0)
1458 // end of tag found:
1459 break;
1460 }
1461 ulCount++;
1462 pParam++;
1463 }
1464
1465 // copy attribute to new buffer
1466 if (ulCount)
1467 {
1468 prc = (PSZ)malloc(ulCount+1);
1469 memcpy(prc, pParam2, ulCount);
1470 *(prc+ulCount) = 0;
1471 }
1472 }
1473 return (prc);
1474}
1475
1476/*
1477 * strhFindEndOfTag:
1478 * returns a pointer to the ">" char
1479 * which seems to terminate the tag beginning
1480 * after pszBeginOfTag.
1481 *
1482 * If additional "<" chars are found, we look
1483 * for additional ">" characters too.
1484 *
1485 * Note: You must pass the address of the opening
1486 * '<' character to this function.
1487 *
1488 * Example:
1489 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1490 + strhFindEndOfTag(pszTest)
1491 + returns.................................^ this.
1492 *
1493 *@@added V0.9.0 [umoeller]
1494 */
1495
1496PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1497{
1498 PSZ p = (PSZ)pszBeginOfTag,
1499 prc = NULL;
1500 LONG lNestingLevel = 0;
1501
1502 while (*p)
1503 {
1504 if (*p == '<')
1505 // another opening tag found:
1506 lNestingLevel++;
1507 else if (*p == '>')
1508 {
1509 // closing tag found:
1510 lNestingLevel--;
1511 if (lNestingLevel < 1)
1512 {
1513 // corresponding: return this
1514 prc = p;
1515 break;
1516 }
1517 }
1518 p++;
1519 }
1520
1521 return (prc);
1522}
1523
1524/*
1525 * strhGetBlock:
1526 * this complex function searches the given string
1527 * for a pair of opening/closing HTML-style tags.
1528 *
1529 * If found, this routine returns TRUE and does
1530 * the following:
1531 *
1532 * 1) allocate a new buffer, copy the text
1533 * enclosed by the opening/closing tags
1534 * into it and set *ppszBlock to that
1535 * buffer;
1536 *
1537 * 2) if the opening tag has any attributes,
1538 * allocate another buffer, copy the
1539 * attributes into it and set *ppszAttrs
1540 * to that buffer; if no attributes are
1541 * found, *ppszAttrs will be NULL;
1542 *
1543 * 3) set *pulOffset to the offset from the
1544 * beginning of *ppszSearchIn where the
1545 * opening tag was found;
1546 *
1547 * 4) advance *ppszSearchIn to after the
1548 * closing tag, so that you can do
1549 * multiple searches without finding the
1550 * same tags twice.
1551 *
1552 * All buffers should be freed using free().
1553 *
1554 * This returns the following:
1555 * -- 0: no error
1556 * -- 1: tag not found at all (doesn't have to be an error)
1557 * -- 2: begin tag found, but no corresponding end tag found. This
1558 * is a real error.
1559 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1560 *
1561 * <B>Example:</B>
1562 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1563 + PSZ pszBlock, pszAttrs;
1564 + ULONG ulOfs;
1565 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1566 *
1567 * would do the following:
1568 *
1569 * 1) set pszBlock to a new string containing "This is page 1."
1570 * without quotes;
1571 *
1572 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1573 *
1574 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1575 *
1576 * 4) pSearch would be advanced to point to the "More text"
1577 * string in the original buffer.
1578 *
1579 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1580 * for HTML parsing.
1581 *
1582 *@@added V0.9.0 [umoeller]
1583 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1584 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1585 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1586 */
1587
1588ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1589 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1590 PSZ pszTag,
1591 PSZ *ppszBlock, // out: block enclosed by the tags
1592 PSZ *ppszAttribs, // out: attributes of the opening tag
1593 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1594 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1595{
1596 ULONG ulrc = 1;
1597 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1598 pszSearch2 = pszBeginTag,
1599 pszClosingTag;
1600 ULONG cbTag = strlen(pszTag);
1601
1602 // go thru the block and check all tags if it's the
1603 // begin tag we're looking for
1604 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1605 {
1606 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1607 // yes: stop
1608 break;
1609 else
1610 pszBeginTag++;
1611 }
1612
1613 if (pszBeginTag)
1614 {
1615 // we found <TAG>:
1616 ULONG ulNestingLevel = 0;
1617
1618 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1619 // strchr(pszBeginTag, '>');
1620 if (pszEndOfBeginTag)
1621 {
1622 // does the caller want the attributes?
1623 if (ppszAttribs)
1624 {
1625 // yes: then copy them
1626 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1627 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1628 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1629 // add terminating 0
1630 *(pszAttrs + ulAttrLen) = 0;
1631
1632 *ppszAttribs = pszAttrs;
1633 }
1634
1635 // output offset of where we found the begin tag
1636 if (pulOfsBeginTag)
1637 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1638
1639 // now find corresponding closing tag (e.g. "</BODY>"
1640 pszBeginTag = pszEndOfBeginTag+1;
1641 // now we're behind the '>' char of the opening tag
1642 // increase offset of that too
1643 if (pulOfsBeginBlock)
1644 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1645
1646 // find next closing tag;
1647 // for the first run, pszSearch2 points to right
1648 // after the '>' char of the opening tag
1649 pszSearch2 = pszBeginTag;
1650 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1651 && (pszClosingTag = strstr(pszSearch2, "<"))
1652 )
1653 {
1654 // if we have another opening tag before our closing
1655 // tag, we need to have several closing tags before
1656 // we're done
1657 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1658 ulNestingLevel++;
1659 else
1660 {
1661 // is this ours?
1662 if ( (*(pszClosingTag+1) == '/')
1663 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1664 )
1665 {
1666 // we've found a matching closing tag; is
1667 // it ours?
1668 if (ulNestingLevel == 0)
1669 {
1670 // our closing tag found:
1671 // allocate mem for a new buffer
1672 // and extract all the text between
1673 // open and closing tags to it
1674 ULONG ulLen = pszClosingTag - pszBeginTag;
1675 if (ppszBlock)
1676 {
1677 PSZ pNew = (PSZ)malloc(ulLen + 1);
1678 strhncpy0(pNew, pszBeginTag, ulLen);
1679 *ppszBlock = pNew;
1680 }
1681
1682 // raise search offset to after the closing tag
1683 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1684
1685 ulrc = 0;
1686
1687 break;
1688 } else
1689 // not our closing tag:
1690 ulNestingLevel--;
1691 }
1692 }
1693 // no matching closing tag: search on after that
1694 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1695 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1696
1697 if (!pszClosingTag)
1698 // no matching closing tag found:
1699 // return 2 (closing tag not found)
1700 ulrc = 2;
1701 } // end if (pszBeginTag)
1702 else
1703 // no matching ">" for opening tag found:
1704 ulrc = 3;
1705 }
1706
1707 return (ulrc);
1708}
1709
1710/* ******************************************************************
1711 * *
1712 * Miscellaneous *
1713 * *
1714 ********************************************************************/
1715
1716/*
1717 *@@ strhArrayAppend:
1718 * this appends a string to a "string array".
1719 *
1720 * A string array is considered a sequence of
1721 * zero-terminated strings in memory. That is,
1722 * after each string's null-byte, the next
1723 * string comes up.
1724 *
1725 * This is useful for composing a single block
1726 * of memory from, say, list box entries, which
1727 * can then be written to OS2.INI in one flush.
1728 *
1729 * To append strings to such an array, call this
1730 * function for each string you wish to append.
1731 * This will re-allocate *ppszRoot with each call,
1732 * and update *pcbRoot, which then contains the
1733 * total size of all strings (including all null
1734 * terminators).
1735 *
1736 * Pass *pcbRoot to PrfSaveProfileData to have the
1737 * block saved.
1738 *
1739 * Note: On the first call, *ppszRoot and *pcbRoot
1740 * _must_ be both NULL, or this crashes.
1741 */
1742
1743VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1744 PSZ pszNew, // in: string to append
1745 PULONG pcbRoot) // in/out: size of array
1746{
1747 ULONG cbNew = strlen(pszNew);
1748 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1749 + cbNew
1750 + 1); // two null bytes
1751 if (*ppszRoot)
1752 {
1753 // not first loop: copy old stuff
1754 memcpy(pszTemp,
1755 *ppszRoot,
1756 *pcbRoot);
1757 free(*ppszRoot);
1758 }
1759 // append new string
1760 strcpy(pszTemp + *pcbRoot,
1761 pszNew);
1762 // update root
1763 *ppszRoot = pszTemp;
1764 // update length
1765 *pcbRoot += cbNew + 1;
1766}
1767
1768/*
1769 *@@ strhCreateDump:
1770 * this dumps a memory block into a string
1771 * and returns that string in a new buffer.
1772 *
1773 * You must free() the returned PSZ after use.
1774 *
1775 * The output looks like the following:
1776 *
1777 + 0000: FE FF 0E 02 90 00 00 00 ........
1778 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1779 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1780 *
1781 * Each line is terminated with a newline (\n)
1782 * character only.
1783 *
1784 *@@added V0.9.1 (2000-01-22) [umoeller]
1785 */
1786
1787PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1788 ULONG ulSize, // in: size of buffer
1789 ULONG ulIndent) // in: indentation of every line
1790{
1791 PSZ pszReturn = 0;
1792 XSTRING strReturn;
1793 CHAR szTemp[1000];
1794
1795 PBYTE pbCurrent = pb; // current byte
1796 ULONG ulCount = 0,
1797 ulCharsInLine = 0; // if this grows > 7, a new line is started
1798 CHAR szLine[400] = "",
1799 szAscii[30] = " "; // ASCII representation; filled for every line
1800 PSZ pszLine = szLine,
1801 pszAscii = szAscii;
1802
1803 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1804
1805 for (pbCurrent = pb;
1806 ulCount < ulSize;
1807 pbCurrent++, ulCount++)
1808 {
1809 if (ulCharsInLine == 0)
1810 {
1811 memset(szLine, ' ', ulIndent);
1812 pszLine += ulIndent;
1813 }
1814 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1815
1816 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1817 // printable character:
1818 *pszAscii = *pbCurrent;
1819 else
1820 *pszAscii = '.';
1821 pszAscii++;
1822
1823 ulCharsInLine++;
1824 if ( (ulCharsInLine > 7) // 8 bytes added?
1825 || (ulCount == ulSize-1) // end of buffer reached?
1826 )
1827 {
1828 // if we haven't had eight bytes yet,
1829 // fill buffer up to eight bytes with spaces
1830 ULONG ul2;
1831 for (ul2 = ulCharsInLine;
1832 ul2 < 8;
1833 ul2++)
1834 pszLine += sprintf(pszLine, " ");
1835
1836 sprintf(szTemp, "%04lX: %s %s\n",
1837 (ulCount & 0xFFFFFFF8), // offset in hex
1838 szLine, // bytes string
1839 szAscii); // ASCII string
1840 xstrcat(&strReturn, szTemp);
1841
1842 // restart line buffer
1843 pszLine = szLine;
1844
1845 // clear ASCII buffer
1846 strcpy(szAscii, " ");
1847 pszAscii = szAscii;
1848
1849 // reset line counter
1850 ulCharsInLine = 0;
1851 }
1852 }
1853
1854 if (strReturn.cbAllocated)
1855 pszReturn = strReturn.psz;
1856
1857 return (pszReturn);
1858}
1859
1860/* ******************************************************************
1861 * *
1862 * Wildcard matching *
1863 * *
1864 ********************************************************************/
1865
1866/*
1867 * The following code has been taken from "fnmatch.zip".
1868 *
1869 * (c) 1994-1996 by Eberhard Mattes.
1870 */
1871
1872/* In OS/2 and DOS styles, both / and \ separate components of a path.
1873 * This macro returns true iff C is a separator. */
1874
1875#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1876
1877
1878/* This macro returns true if C is at the end of a component of a
1879 * path. */
1880
1881#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1882
1883/*
1884 * skip_comp_os2:
1885 * Return a pointer to the next component of the path SRC, for OS/2
1886 * and DOS styles. When the end of the string is reached, a pointer
1887 * to the terminating null character is returned.
1888 *
1889 * (c) 1994-1996 by Eberhard Mattes.
1890 */
1891
1892static const unsigned char* skip_comp_os2(const unsigned char *src)
1893{
1894 /* Skip characters until hitting a separator or the end of the
1895 * string. */
1896
1897 while (!IS_OS2_COMP_END(*src))
1898 ++src;
1899
1900 /* Skip the separator if we hit a separator. */
1901
1902 if (*src != 0)
1903 ++src;
1904 return src;
1905}
1906
1907/*
1908 * has_colon:
1909 * returns true iff the path P contains a colon.
1910 *
1911 * (c) 1994-1996 by Eberhard Mattes.
1912 */
1913
1914static int has_colon(const unsigned char *p)
1915{
1916 while (*p != 0)
1917 if (*p == ':')
1918 return 1;
1919 else
1920 ++p;
1921 return 0;
1922}
1923
1924/*
1925 * match_comp_os2:
1926 * Compare a single component (directory name or file name) of the
1927 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1928 * component of the wildcard and the name to be checked, respectively.
1929 * Comparing stops at the next separator. The FLAGS argument is the
1930 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1931 * current component of NAME. The number of dots is not restricted,
1932 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1933 * Note that this function is recursive.
1934 *
1935 * (c) 1994-1996 by Eberhard Mattes.
1936 */
1937
1938static int match_comp_os2(const unsigned char *mask,
1939 const unsigned char *name,
1940 unsigned flags,
1941 int has_dot)
1942{
1943 int rc;
1944
1945 for (;;)
1946 switch (*mask)
1947 {
1948 case 0:
1949
1950 /* There must be no extra characters at the end of NAME when
1951 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1952 * in that case, NAME may point to a separator. */
1953
1954 if (*name == 0)
1955 return FNM_MATCH;
1956 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1957 return FNM_MATCH;
1958 return FNM_NOMATCH;
1959
1960 case '/':
1961 case '\\':
1962
1963 /* Separators match separators. */
1964
1965 if (IS_OS2_COMP_SEP(*name))
1966 return FNM_MATCH;
1967
1968 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1969 * is ignored at the end of NAME. */
1970
1971 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1972 return FNM_MATCH;
1973
1974 /* Stop comparing at the separator. */
1975
1976 return FNM_NOMATCH;
1977
1978 case '?':
1979
1980 /* A question mark matches one character. It does not match
1981 * a dot. At the end of the component (and before a dot),
1982 * it also matches zero characters. */
1983
1984 if (*name != '.' && !IS_OS2_COMP_END(*name))
1985 ++name;
1986 ++mask;
1987 break;
1988
1989 case '*':
1990
1991 /* An asterisk matches zero or more characters. In DOS
1992 * mode, dots are not matched. */
1993
1994 do
1995 {
1996 ++mask;
1997 }
1998 while (*mask == '*');
1999 for (;;)
2000 {
2001 rc = match_comp_os2(mask, name, flags, has_dot);
2002 if (rc != FNM_NOMATCH)
2003 return rc;
2004 if (IS_OS2_COMP_END(*name))
2005 return FNM_NOMATCH;
2006 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2007 return FNM_NOMATCH;
2008 ++name;
2009 }
2010
2011 case '.':
2012
2013 /* A dot matches a dot. It also matches the implicit dot at
2014 * the end of a dot-less NAME. */
2015
2016 ++mask;
2017 if (*name == '.')
2018 ++name;
2019 else if (has_dot || !IS_OS2_COMP_END(*name))
2020 return FNM_NOMATCH;
2021 break;
2022
2023 default:
2024
2025 /* All other characters match themselves. */
2026
2027 if (flags & _FNM_IGNORECASE)
2028 {
2029 if (tolower(*mask) != tolower(*name))
2030 return FNM_NOMATCH;
2031 }
2032 else
2033 {
2034 if (*mask != *name)
2035 return FNM_NOMATCH;
2036 }
2037 ++mask;
2038 ++name;
2039 break;
2040 }
2041}
2042
2043/*
2044 * match_comp:
2045 * compare a single component (directory name or file name) of the
2046 * paths, for all styles which need component-by-component matching.
2047 * MASK and NAME point to the start of a component of the wildcard and
2048 * the name to be checked, respectively. Comparing stops at the next
2049 * separator. The FLAGS argument is the same as that of fnmatch().
2050 * Return FNM_MATCH iff MASK and NAME match.
2051 *
2052 * (c) 1994-1996 by Eberhard Mattes.
2053 */
2054
2055static int match_comp(const unsigned char *mask,
2056 const unsigned char *name,
2057 unsigned flags)
2058{
2059 const unsigned char *s;
2060
2061 switch (flags & _FNM_STYLE_MASK)
2062 {
2063 case _FNM_OS2:
2064 case _FNM_DOS:
2065
2066 /* For OS/2 and DOS styles, we add an implicit dot at the end of
2067 * the component if the component doesn't include a dot. */
2068
2069 s = name;
2070 while (!IS_OS2_COMP_END(*s) && *s != '.')
2071 ++s;
2072 return match_comp_os2(mask, name, flags, *s == '.');
2073
2074 default:
2075 return FNM_ERR;
2076 }
2077}
2078
2079/* In Unix styles, / separates components of a path. This macro
2080 * returns true iff C is a separator. */
2081
2082#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2083
2084
2085/* This macro returns true if C is at the end of a component of a
2086 * path. */
2087
2088#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
2089
2090/*
2091 * match_unix:
2092 * match complete paths for Unix styles. The FLAGS argument is the
2093 * same as that of fnmatch(). COMP points to the start of the current
2094 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2095 * backslash character is used for escaping ? and * unless
2096 * FNM_NOESCAPE is set.
2097 *
2098 * (c) 1994-1996 by Eberhard Mattes.
2099 */
2100
2101static int match_unix(const unsigned char *mask,
2102 const unsigned char *name,
2103 unsigned flags,
2104 const unsigned char *comp)
2105{
2106 unsigned char c1, c2;
2107 char invert, matched;
2108 const unsigned char *start;
2109 int rc;
2110
2111 for (;;)
2112 switch (*mask)
2113 {
2114 case 0:
2115
2116 /* There must be no extra characters at the end of NAME when
2117 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
2118 * in that case, NAME may point to a separator. */
2119
2120 if (*name == 0)
2121 return FNM_MATCH;
2122 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2123 return FNM_MATCH;
2124 return FNM_NOMATCH;
2125
2126 case '?':
2127
2128 /* A question mark matches one character. It does not match
2129 * the component separator if FNM_PATHNAME is set. It does
2130 * not match a dot at the start of a component if FNM_PERIOD
2131 * is set. */
2132
2133 if (*name == 0)
2134 return FNM_NOMATCH;
2135 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2136 return FNM_NOMATCH;
2137 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2138 return FNM_NOMATCH;
2139 ++mask;
2140 ++name;
2141 break;
2142
2143 case '*':
2144
2145 /* An asterisk matches zero or more characters. It does not
2146 * match the component separator if FNM_PATHNAME is set. It
2147 * does not match a dot at the start of a component if
2148 * FNM_PERIOD is set. */
2149
2150 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2151 return FNM_NOMATCH;
2152 do
2153 {
2154 ++mask;
2155 }
2156 while (*mask == '*');
2157 for (;;)
2158 {
2159 rc = match_unix(mask, name, flags, comp);
2160 if (rc != FNM_NOMATCH)
2161 return rc;
2162 if (*name == 0)
2163 return FNM_NOMATCH;
2164 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2165 return FNM_NOMATCH;
2166 ++name;
2167 }
2168
2169 case '/':
2170
2171 /* Separators match only separators. If _FNM_PATHPREFIX is
2172 * set, a trailing separator in MASK is ignored at the end
2173 * of NAME. */
2174
2175 if (!(IS_UNIX_COMP_SEP(*name)
2176 || ((flags & _FNM_PATHPREFIX) && *name == 0
2177 && (mask[1] == 0
2178 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2179 && mask[2] == 0)))))
2180 return FNM_NOMATCH;
2181
2182 ++mask;
2183 if (*name != 0)
2184 ++name;
2185
2186 /* This is the beginning of a new component if FNM_PATHNAME
2187 * is set. */
2188
2189 if (flags & FNM_PATHNAME)
2190 comp = name;
2191 break;
2192
2193 case '[':
2194
2195 /* A set of characters. Always case-sensitive. */
2196
2197 if (*name == 0)
2198 return FNM_NOMATCH;
2199 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2200 return FNM_NOMATCH;
2201 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2202 return FNM_NOMATCH;
2203
2204 invert = 0;
2205 matched = 0;
2206 ++mask;
2207
2208 /* If the first character is a ! or ^, the set matches all
2209 * characters not listed in the set. */
2210
2211 if (*mask == '!' || *mask == '^')
2212 {
2213 ++mask;
2214 invert = 1;
2215 }
2216
2217 /* Loop over all the characters of the set. The loop ends
2218 * if the end of the string is reached or if a ] is
2219 * encountered unless it directly follows the initial [ or
2220 * [-. */
2221
2222 start = mask;
2223 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2224 {
2225 /* Get the next character which is optionally preceded
2226 * by a backslash. */
2227
2228 c1 = *mask++;
2229 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2230 {
2231 if (*mask == 0)
2232 break;
2233 c1 = *mask++;
2234 }
2235
2236 /* Ranges of characters are written as a-z. Don't
2237 * forget to check for the end of the string and to
2238 * handle the backslash. If the character after - is a
2239 * ], it isn't a range. */
2240
2241 if (*mask == '-' && mask[1] != ']')
2242 {
2243 ++mask; /* Skip the - character */
2244 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2245 ++mask;
2246 if (*mask == 0)
2247 break;
2248 c2 = *mask++;
2249 }
2250 else
2251 c2 = c1;
2252
2253 /* Now check whether this character or range matches NAME. */
2254
2255 if (c1 <= *name && *name <= c2)
2256 matched = 1;
2257 }
2258
2259 /* If the end of the string is reached before a ] is found,
2260 * back up to the [ and compare it to NAME. */
2261
2262 if (*mask == 0)
2263 {
2264 if (*name != '[')
2265 return FNM_NOMATCH;
2266 ++name;
2267 mask = start;
2268 if (invert)
2269 --mask;
2270 }
2271 else
2272 {
2273 if (invert)
2274 matched = !matched;
2275 if (!matched)
2276 return FNM_NOMATCH;
2277 ++mask; /* Skip the ] character */
2278 if (*name != 0)
2279 ++name;
2280 }
2281 break;
2282
2283 case '\\':
2284 ++mask;
2285 if (flags & FNM_NOESCAPE)
2286 {
2287 if (*name != '\\')
2288 return FNM_NOMATCH;
2289 ++name;
2290 }
2291 else if (*mask == '*' || *mask == '?')
2292 {
2293 if (*mask != *name)
2294 return FNM_NOMATCH;
2295 ++mask;
2296 ++name;
2297 }
2298 break;
2299
2300 default:
2301
2302 /* All other characters match themselves. */
2303
2304 if (flags & _FNM_IGNORECASE)
2305 {
2306 if (tolower(*mask) != tolower(*name))
2307 return FNM_NOMATCH;
2308 }
2309 else
2310 {
2311 if (*mask != *name)
2312 return FNM_NOMATCH;
2313 }
2314 ++mask;
2315 ++name;
2316 break;
2317 }
2318}
2319
2320/*
2321 * _fnmatch_unsigned:
2322 * Check whether the path name NAME matches the wildcard MASK.
2323 *
2324 * Return:
2325 * -- 0 (FNM_MATCH) if it matches,
2326 * -- _FNM_NOMATCH if it doesn't,
2327 * -- FNM_ERR on error.
2328 *
2329 * The operation of this function is controlled by FLAGS.
2330 * This is an internal function, with unsigned arguments.
2331 *
2332 * (c) 1994-1996 by Eberhard Mattes.
2333 */
2334
2335static int _fnmatch_unsigned(const unsigned char *mask,
2336 const unsigned char *name,
2337 unsigned flags)
2338{
2339 int m_drive, n_drive,
2340 rc;
2341
2342 /* Match and skip the drive name if present. */
2343
2344 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2345 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2346
2347 if (m_drive != n_drive)
2348 {
2349 if (m_drive == -1 || n_drive == -1)
2350 return FNM_NOMATCH;
2351 if (!(flags & _FNM_IGNORECASE))
2352 return FNM_NOMATCH;
2353 if (tolower(m_drive) != tolower(n_drive))
2354 return FNM_NOMATCH;
2355 }
2356
2357 if (m_drive != -1)
2358 mask += 2;
2359 if (n_drive != -1)
2360 name += 2;
2361
2362 /* Colons are not allowed in path names, except for the drive name,
2363 * which was skipped above. */
2364
2365 if (has_colon(mask) || has_colon(name))
2366 return FNM_ERR;
2367
2368 /* The name "\\server\path" should not be matched by mask
2369 * "\*\server\path". Ditto for /. */
2370
2371 switch (flags & _FNM_STYLE_MASK)
2372 {
2373 case _FNM_OS2:
2374 case _FNM_DOS:
2375
2376 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2377 {
2378 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2379 return FNM_NOMATCH;
2380 name += 2;
2381 mask += 2;
2382 }
2383 break;
2384
2385 case _FNM_POSIX:
2386
2387 if (name[0] == '/' && name[1] == '/')
2388 {
2389 int i;
2390
2391 name += 2;
2392 for (i = 0; i < 2; ++i)
2393 if (mask[0] == '/')
2394 ++mask;
2395 else if (mask[0] == '\\' && mask[1] == '/')
2396 mask += 2;
2397 else
2398 return FNM_NOMATCH;
2399 }
2400
2401 /* In Unix styles, treating ? and * w.r.t. components is simple.
2402 * No need to do matching component by component. */
2403
2404 return match_unix(mask, name, flags, name);
2405 }
2406
2407 /* Now compare all the components of the path name, one by one.
2408 * Note that the path separator must not be enclosed in brackets. */
2409
2410 while (*mask != 0 || *name != 0)
2411 {
2412
2413 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2414 * is reached even if there are components left in NAME. */
2415
2416 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2417 return FNM_MATCH;
2418
2419 /* Compare a single component of the path name. */
2420
2421 rc = match_comp(mask, name, flags);
2422 if (rc != FNM_MATCH)
2423 return rc;
2424
2425 /* Skip to the next component or to the end of the path name. */
2426
2427 mask = skip_comp_os2(mask);
2428 name = skip_comp_os2(name);
2429 }
2430
2431 /* If we reached the ends of both strings, the names match. */
2432
2433 if (*mask == 0 && *name == 0)
2434 return FNM_MATCH;
2435
2436 /* The names do not match. */
2437
2438 return FNM_NOMATCH;
2439}
2440
2441/*
2442 *@@ strhMatchOS2:
2443 * this matches wildcards, similar to what DosEditName does.
2444 * However, this does not require a file to be present, but
2445 * works on strings only.
2446 */
2447
2448BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2449 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2450{
2451 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2452 pcszName,
2453 _FNM_OS2 | _FNM_IGNORECASE)
2454 == FNM_MATCH)
2455 );
2456}
2457
2458/* ******************************************************************
2459 * *
2460 * Fast string searches *
2461 * *
2462 ********************************************************************/
2463
2464#define ASSERT(a)
2465
2466/*
2467 * The following code has been taken from the "Standard
2468 * Function Library", file sflfind.c, and only slightly
2469 * modified to conform to the rest of this file.
2470 *
2471 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2472 * Revised: 98/05/04
2473 *
2474 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2475 *
2476 * The SFL Licence allows incorporating SFL code into other
2477 * programs, as long as the copyright is reprinted and the
2478 * code is marked as modified, so this is what we do.
2479 */
2480
2481/*
2482 *@@ strhmemfind:
2483 * searches for a pattern in a block of memory using the
2484 * Boyer-Moore-Horspool-Sunday algorithm.
2485 *
2486 * The block and pattern may contain any values; you must
2487 * explicitly provide their lengths. If you search for strings,
2488 * use strlen() on the buffers.
2489 *
2490 * Returns a pointer to the pattern if found within the block,
2491 * or NULL if the pattern was not found.
2492 *
2493 * This algorithm needs a "shift table" to cache data for the
2494 * search pattern. This table can be reused when performing
2495 * several searches with the same pattern.
2496 *
2497 * "shift" must point to an array big enough to hold 256 (8**2)
2498 * "size_t" values.
2499 *
2500 * If (*repeat_find == FALSE), the shift table is initialized.
2501 * So on the first search with a given pattern, *repeat_find
2502 * should be FALSE. This function sets it to TRUE after the
2503 * shift table is initialised, allowing the initialisation
2504 * phase to be skipped on subsequent searches.
2505 *
2506 * This function is most effective when repeated searches are
2507 * made for the same pattern in one or more large buffers.
2508 *
2509 * Example:
2510 *
2511 + PSZ pszHaystack = "This is a sample string.",
2512 + pszNeedle = "string";
2513 + size_t shift[256];
2514 + BOOL fRepeat = FALSE;
2515 +
2516 + PSZ pFound = strhmemfind(pszHaystack,
2517 + strlen(pszHaystack), // block size
2518 + pszNeedle,
2519 + strlen(pszNeedle), // pattern size
2520 + shift,
2521 + &fRepeat);
2522 *
2523 * Taken from the "Standard Function Library", file sflfind.c.
2524 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2525 * Slightly modified by umoeller.
2526 *
2527 *@@added V0.9.3 (2000-05-08) [umoeller]
2528 */
2529
2530void* strhmemfind(const void *in_block, // in: block containing data
2531 size_t block_size, // in: size of block in bytes
2532 const void *in_pattern, // in: pattern to search for
2533 size_t pattern_size, // in: size of pattern block
2534 size_t *shift, // in/out: shift table (search buffer)
2535 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2536{
2537 size_t byte_nbr, // Distance through block
2538 match_size; // Size of matched part
2539 const unsigned char
2540 *match_base = NULL, // Base of match of pattern
2541 *match_ptr = NULL, // Point within current match
2542 *limit = NULL; // Last potiental match point
2543 const unsigned char
2544 *block = (unsigned char *) in_block, // Concrete pointer to block data
2545 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2546
2547 if ( (block == NULL)
2548 || (pattern == NULL)
2549 || (shift == NULL)
2550 )
2551 return (NULL);
2552
2553 // Pattern must be smaller or equal in size to string
2554 if (block_size < pattern_size)
2555 return (NULL); // Otherwise it's not found
2556
2557 if (pattern_size == 0) // Empty patterns match at start
2558 return ((void *)block);
2559
2560 // Build the shift table unless we're continuing a previous search
2561
2562 // The shift table determines how far to shift before trying to match
2563 // again, if a match at this point fails. If the byte after where the
2564 // end of our pattern falls is not in our pattern, then we start to
2565 // match again after that byte; otherwise we line up the last occurence
2566 // of that byte in our pattern under that byte, and try match again.
2567
2568 if (!repeat_find || !*repeat_find)
2569 {
2570 for (byte_nbr = 0;
2571 byte_nbr < 256;
2572 byte_nbr++)
2573 shift[byte_nbr] = pattern_size + 1;
2574 for (byte_nbr = 0;
2575 byte_nbr < pattern_size;
2576 byte_nbr++)
2577 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2578
2579 if (repeat_find)
2580 *repeat_find = TRUE;
2581 }
2582
2583 // Search for the block, each time jumping up by the amount
2584 // computed in the shift table
2585
2586 limit = block + (block_size - pattern_size + 1);
2587 ASSERT (limit > block);
2588
2589 for (match_base = block;
2590 match_base < limit;
2591 match_base += shift[*(match_base + pattern_size)])
2592 {
2593 match_ptr = match_base;
2594 match_size = 0;
2595
2596 // Compare pattern until it all matches, or we find a difference
2597 while (*match_ptr++ == pattern[match_size++])
2598 {
2599 ASSERT (match_size <= pattern_size &&
2600 match_ptr == (match_base + match_size));
2601
2602 // If we found a match, return the start address
2603 if (match_size >= pattern_size)
2604 return ((void*)(match_base));
2605
2606 }
2607 }
2608 return (NULL); // Found nothing
2609}
2610
2611/*
2612 *@@ strhtxtfind:
2613 * searches for a case-insensitive text pattern in a string
2614 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2615 * pattern are null-terminated strings. Returns a pointer to the pattern
2616 * if found within the string, or NULL if the pattern was not found.
2617 * Will match strings irrespective of case. To match exact strings, use
2618 * strhfind(). Will not work on multibyte characters.
2619 *
2620 * Examples:
2621 + char *result;
2622 +
2623 + result = strhtxtfind ("AbracaDabra", "cad");
2624 + if (result)
2625 + puts (result);
2626 +
2627 * Taken from the "Standard Function Library", file sflfind.c.
2628 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2629 * Slightly modified.
2630 *
2631 *@@added V0.9.3 (2000-05-08) [umoeller]
2632 */
2633
2634char* strhtxtfind (const char *string, // String containing data
2635 const char *pattern) // Pattern to search for
2636{
2637 size_t
2638 shift [256]; // Shift distance for each value
2639 size_t
2640 string_size,
2641 pattern_size,
2642 byte_nbr, // Index into byte array
2643 match_size; // Size of matched part
2644 const char
2645 *match_base = NULL, // Base of match of pattern
2646 *match_ptr = NULL, // Point within current match
2647 *limit = NULL; // Last potiental match point
2648
2649 ASSERT (string); // Expect non-NULL pointers, but
2650 ASSERT (pattern); // fail gracefully if not debugging
2651 if (string == NULL || pattern == NULL)
2652 return (NULL);
2653
2654 string_size = strlen (string);
2655 pattern_size = strlen (pattern);
2656
2657 // Pattern must be smaller or equal in size to string
2658 if (string_size < pattern_size)
2659 return (NULL); // Otherwise it cannot be found
2660
2661 if (pattern_size == 0) // Empty string matches at start
2662 return (char *) string;
2663
2664 // Build the shift table
2665
2666 // The shift table determines how far to shift before trying to match
2667 // again, if a match at this point fails. If the byte after where the
2668 // end of our pattern falls is not in our pattern, then we start to
2669 // match again after that byte; otherwise we line up the last occurence
2670 // of that byte in our pattern under that byte, and try match again.
2671
2672 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2673 shift [byte_nbr] = pattern_size + 1;
2674
2675 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2676 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2677
2678 // Search for the string. If we don't find a match, move up by the
2679 // amount we computed in the shift table above, to find location of
2680 // the next potiental match.
2681
2682 limit = string + (string_size - pattern_size + 1);
2683 ASSERT (limit > string);
2684
2685 for (match_base = string;
2686 match_base < limit;
2687 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2688 {
2689 match_ptr = match_base;
2690 match_size = 0;
2691
2692 // Compare pattern until it all matches, or we find a difference
2693 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2694 {
2695 ASSERT (match_size <= pattern_size &&
2696 match_ptr == (match_base + match_size));
2697
2698 // If we found a match, return the start address
2699 if (match_size >= pattern_size)
2700 return ((char *)(match_base));
2701 }
2702 }
2703 return (NULL); // Found nothing
2704}
2705
Note: See TracBrowser for help on using the repository browser.