source: trunk/src/helpers/stringh.c@ 18

Last change on this file since 18 was 18, checked in by umoeller, 25 years ago

Tons of updates.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 84.3 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#include "helpers\stringh.h"
55#include "helpers\xstring.h" // extended string helpers
56
57#pragma hdrstop
58
59/*
60 *@@category: Helpers\C helpers\String management
61 */
62
63/*
64 *@@ strhdup:
65 * like strdup, but this one
66 * doesn't crash if pszSource is NULL,
67 * but returns NULL also.
68 *
69 *@@added V0.9.0 [umoeller]
70 */
71
72PSZ strhdup(const char *pszSource)
73{
74 if (pszSource)
75 return (strdup(pszSource));
76 else
77 return (0);
78}
79
80/*
81 *@@ strhistr:
82 * like strstr, but case-insensitive.
83 *
84 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
85 */
86
87PSZ strhistr(const char *string1, const char *string2)
88{
89 PSZ prc = NULL;
90
91 if ((string1) && (string2))
92 {
93 PSZ pszSrchIn = strdup(string1);
94 PSZ pszSrchFor = strdup(string2);
95
96 if ((pszSrchIn) && (pszSrchFor))
97 {
98 strupr(pszSrchIn);
99 strupr(pszSrchFor);
100
101 prc = strstr(pszSrchIn, pszSrchFor);
102 if (prc)
103 {
104 // prc now has the first occurence of the string,
105 // but in pszSrchIn; we need to map this
106 // return value to the original string
107 prc = (prc-pszSrchIn) // offset in pszSrchIn
108 + (PSZ)string1;
109 }
110 }
111 if (pszSrchFor)
112 free(pszSrchFor);
113 if (pszSrchIn)
114 free(pszSrchIn);
115 }
116 return (prc);
117}
118
119/*
120 *@@ strhncpy0:
121 * like strncpy, but always appends a 0 character.
122 */
123
124ULONG strhncpy0(PSZ pszTarget,
125 const char *pszSource,
126 ULONG cbSource)
127{
128 ULONG ul = 0;
129 PSZ pTarget = pszTarget,
130 pSource = (PSZ)pszSource;
131
132 for (ul = 0; ul < cbSource; ul++)
133 if (*pSource)
134 *pTarget++ = *pSource++;
135 else
136 break;
137 *pTarget = 0;
138
139 return (ul);
140}
141
142/*
143 * strhCount:
144 * this counts the occurences of c in pszSearch.
145 */
146
147ULONG strhCount(const char *pszSearch,
148 CHAR c)
149{
150 PSZ p = (PSZ)pszSearch;
151 ULONG ulCount = 0;
152 while (TRUE)
153 {
154 p = strchr(p, c);
155 if (p)
156 {
157 ulCount++;
158 p++;
159 }
160 else
161 break;
162 }
163 return (ulCount);
164}
165
166/*
167 *@@ strhIsDecimal:
168 * returns TRUE if psz consists of decimal digits only.
169 */
170
171BOOL strhIsDecimal(PSZ psz)
172{
173 PSZ p = psz;
174 while (*p != 0)
175 {
176 if (isdigit(*p) == 0)
177 return (FALSE);
178 p++;
179 }
180
181 return (TRUE);
182}
183
184/*
185 *@@ strhSubstr:
186 * this creates a new PSZ containing the string
187 * from pBegin to pEnd, excluding the pEnd character.
188 * The new string is null-terminated. The caller
189 * must free() the new string after use.
190 *
191 * Example:
192 + "1234567890"
193 + ^ ^
194 + p1 p2
195 + strhSubstr(p1, p2)
196 * would return a new string containing "2345678".
197 */
198
199PSZ strhSubstr(const char *pBegin, const char *pEnd)
200{
201 ULONG cbSubstr = (pEnd - pBegin);
202 PSZ pszSubstr = (PSZ)malloc(cbSubstr + 1);
203 strhncpy0(pszSubstr, pBegin, cbSubstr);
204 return (pszSubstr);
205}
206
207/*
208 *@@ strhExtract:
209 * searches pszBuf for the cOpen character and returns
210 * the data in between cOpen and cClose, excluding
211 * those two characters, in a newly allocated buffer
212 * which you must free() afterwards.
213 *
214 * Spaces and newlines/linefeeds are skipped.
215 *
216 * If the search was successful, the new buffer
217 * is returned and, if (ppEnd != NULL), *ppEnd points
218 * to the first character after the cClose character
219 * found in the buffer.
220 *
221 * If the search was not successful, NULL is
222 * returned, and *ppEnd is unchanged.
223 *
224 * If another cOpen character is found before
225 * cClose, matching cClose characters will be skipped.
226 * You can therefore nest the cOpen and cClose
227 * characters.
228 *
229 * This function ignores cOpen and cClose characters
230 * in C-style comments and strings surrounded by
231 * double quotes.
232 *
233 * Example:
234 + PSZ pszBuf = "KEYWORD { --blah-- } next",
235 + pEnd;
236 + strhExtract(pszBuf,
237 + '{', '}',
238 + &pEnd)
239 * would return a new buffer containing " --blah-- ",
240 * and ppEnd would afterwards point to the space
241 * before "next" in the static buffer.
242 *
243 *@@added V0.9.0 [umoeller]
244 */
245
246PSZ strhExtract(PSZ pszBuf, // in: search buffer
247 CHAR cOpen, // in: opening char
248 CHAR cClose, // in: closing char
249 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
250{
251 PSZ pszReturn = NULL;
252
253 if (pszBuf)
254 {
255 PSZ pOpen = strchr(pszBuf, cOpen);
256 if (pOpen)
257 {
258 // opening char found:
259 // now go thru the whole rest of the buffer
260 PSZ p = pOpen+1;
261 LONG lLevel = 1; // if this goes 0, we're done
262 while (*p)
263 {
264 if (*p == cOpen)
265 lLevel++;
266 else if (*p == cClose)
267 {
268 lLevel--;
269 if (lLevel <= 0)
270 {
271 // matching closing bracket found:
272 // extract string
273 pszReturn = strhSubstr(pOpen+1, // after cOpen
274 p); // excluding cClose
275 if (ppEnd)
276 *ppEnd = p+1;
277 break; // while (*p)
278 }
279 }
280 else if (*p == '\"')
281 {
282 // beginning of string:
283 PSZ p2 = p+1;
284 // find end of string
285 while ((*p2) && (*p2 != '\"'))
286 p2++;
287
288 if (*p2 == '\"')
289 // closing quote found:
290 // search on after that
291 p = p2; // raised below
292 else
293 break; // while (*p)
294 }
295
296 p++;
297 }
298 }
299 }
300
301 return (pszReturn);
302}
303
304/*
305 *@@ strhQuote:
306 * similar to strhExtract, except that
307 * opening and closing chars are the same,
308 * and therefore no nesting is possible.
309 * Useful for extracting stuff between
310 * quotes.
311 *
312 *@@added V0.9.0 [umoeller]
313 */
314
315PSZ strhQuote(PSZ pszBuf,
316 CHAR cQuote,
317 PSZ *ppEnd)
318{
319 PSZ pszReturn = NULL,
320 p1 = NULL;
321 if ((p1 = strchr(pszBuf, cQuote)))
322 {
323 PSZ p2 = strchr(p1+1, cQuote);
324 if (p2)
325 {
326 pszReturn = strhSubstr(p1+1, p2);
327 if (ppEnd)
328 // store closing char
329 *ppEnd = p2 + 1;
330 }
331 }
332
333 return (pszReturn);
334}
335
336/*
337 *@@ strhStrip:
338 * removes all double spaces.
339 * This copies within the "psz" buffer.
340 * If any double spaces are found, the
341 * string will be shorter than before,
342 * but the buffer is _not_ reallocated,
343 * so there will be unused bytes at the
344 * end.
345 *
346 * Returns the number of spaces removed.
347 *
348 *@@added V0.9.0 [umoeller]
349 */
350
351ULONG strhStrip(PSZ psz) // in/out: string
352{
353 PSZ p;
354 ULONG cb = strlen(psz),
355 ulrc = 0;
356
357 for (p = psz; p < psz+cb; p++)
358 {
359 if ((*p == ' ') && (*(p+1) == ' '))
360 {
361 PSZ p2 = p;
362 while (*p2)
363 {
364 *p2 = *(p2+1);
365 p2++;
366 }
367 cb--;
368 p--;
369 ulrc++;
370 }
371 }
372 return (ulrc);
373}
374
375/*
376 *@@ strhins:
377 * this inserts one string into another.
378 *
379 * pszInsert is inserted into pszBuffer at offset
380 * ulInsertOfs (which counts from 0).
381 *
382 * A newly allocated string is returned. pszBuffer is
383 * not changed. The new string should be free()'d after
384 * use.
385 *
386 * Upon errors, NULL is returned.
387 *
388 *@@changed V0.9.0 [umoeller]: completely rewritten.
389 */
390
391PSZ strhins(const char *pcszBuffer,
392 ULONG ulInsertOfs,
393 const char *pcszInsert)
394{
395 PSZ pszNew = NULL;
396
397 if ((pcszBuffer) && (pcszInsert))
398 {
399 do {
400 ULONG cbBuffer = strlen(pcszBuffer);
401 ULONG cbInsert = strlen(pcszInsert);
402
403 // check string length
404 if (ulInsertOfs > cbBuffer + 1)
405 break; // do
406
407 // OK, let's go.
408 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
409
410 // copy stuff before pInsertPos
411 memcpy(pszNew,
412 pcszBuffer,
413 ulInsertOfs);
414 // copy string to be inserted
415 memcpy(pszNew + ulInsertOfs,
416 pcszInsert,
417 cbInsert);
418 // copy stuff after pInsertPos
419 strcpy(pszNew + ulInsertOfs + cbInsert,
420 pcszBuffer + ulInsertOfs);
421 } while (FALSE);
422 }
423
424 return (pszNew);
425}
426
427/*
428 *@@ strhrpl:
429 * wrapper around xstrrpl to work with C strings.
430 * Note that *ppszBuf can get reallocated and must
431 * be free()'able.
432 *
433 * Repetitive use of this wrapper is not recommended
434 * because it is considerably slower than xstrrpl.
435 *
436 *@@added V0.9.6 (2000-11-01) [umoeller]
437 */
438
439ULONG strhrpl(PSZ *ppszBuf, // in/out: string
440 PULONG pulOfs, // in: where to begin search (0 = start);
441 // out: ofs of first char after replacement string
442 const char *pcszSearch, // in: search string; cannot be NULL
443 const char *pcszReplace) // in: replacement string; cannot be NULL
444{
445 ULONG ulrc = 0;
446 XSTRING xstrBuf,
447 xstrFind,
448 xstrReplace;
449 size_t ShiftTable[256];
450 BOOL fRepeat = FALSE;
451 xstrInit(&xstrBuf, 0);
452 xstrset(&xstrBuf, *ppszBuf);
453 xstrInit(&xstrFind, 0);
454 xstrset(&xstrFind, (PSZ)pcszSearch);
455 xstrInit(&xstrReplace, 0);
456 xstrset(&xstrReplace, (PSZ)pcszReplace);
457
458 if ((ulrc = xstrrpl(&xstrBuf,
459 pulOfs,
460 &xstrFind,
461 &xstrReplace,
462 ShiftTable,
463 &fRepeat)))
464 // replaced:
465 *ppszBuf = xstrBuf.psz;
466
467 return (ulrc);
468}
469
470/*
471 * strhWords:
472 * returns the no. of words in "psz".
473 * A string is considered a "word" if
474 * it is surrounded by spaces only.
475 *
476 *@@added V0.9.0 [umoeller]
477 */
478
479ULONG strhWords(PSZ psz)
480{
481 PSZ p;
482 ULONG cb = strlen(psz),
483 ulWords = 0;
484 if (cb > 1)
485 {
486 ulWords = 1;
487 for (p = psz; p < psz+cb; p++)
488 if (*p == ' ')
489 ulWords++;
490 }
491 return (ulWords);
492}
493
494/*
495 *@@ strhThousandsULong:
496 * converts a ULONG into a decimal string, while
497 * inserting thousands separators into it. Specify
498 * the separator character in cThousands.
499 *
500 * Returns pszTarget so you can use it directly
501 * with sprintf and the "%s" flag.
502 *
503 * For cThousands, you should use the data in
504 * OS2.INI ("PM_National" application), which is
505 * always set according to the "Country" object.
506 * You can use prfhQueryCountrySettings to
507 * retrieve this setting.
508 *
509 * Use strhThousandsDouble for "double" values.
510 */
511
512PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
513 ULONG ul, // in: decimal to convert
514 CHAR cThousands) // in: separator char (e.g. '.')
515{
516 USHORT ust, uss, usc;
517 CHAR szTemp[40];
518 sprintf(szTemp, "%lu", ul);
519
520 ust = 0;
521 usc = strlen(szTemp);
522 for (uss = 0; uss < usc; uss++)
523 {
524 if (uss)
525 if (((usc - uss) % 3) == 0)
526 {
527 pszTarget[ust] = cThousands;
528 ust++;
529 }
530 pszTarget[ust] = szTemp[uss];
531 ust++;
532 }
533 pszTarget[ust] = '\0';
534
535 return (pszTarget);
536}
537
538/*
539 *@@ strhThousandsDouble:
540 * like strhThousandsULong, but for a "double"
541 * value. Note that after-comma values are truncated.
542 */
543
544PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
545{
546 USHORT ust, uss, usc;
547 CHAR szTemp[40];
548 sprintf(szTemp, "%.0f", floor(dbl));
549
550 ust = 0;
551 usc = strlen(szTemp);
552 for (uss = 0; uss < usc; uss++)
553 {
554 if (uss)
555 if (((usc - uss) % 3) == 0)
556 {
557 pszTarget[ust] = cThousands;
558 ust++;
559 }
560 pszTarget[ust] = szTemp[uss];
561 ust++;
562 }
563 pszTarget[ust] = '\0';
564
565 return (pszTarget);
566}
567
568/*
569 *@@ strhVariableDouble:
570 * like strhThousandsULong, but for a "double" value, and
571 * with a variable number of decimal places depending on the
572 * size of the quantity.
573 *
574 *@@added V0.9.6 (2000-11-12) [pr]
575 */
576
577PSZ strhVariableDouble(PSZ pszTarget,
578 double dbl,
579 PSZ pszUnits,
580 CHAR cThousands)
581{
582 if (dbl < 100.0)
583 sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
584 else
585 if (dbl < 1000.0)
586 sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
587 else
588 strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
589 pszUnits);
590
591 return(pszTarget);
592}
593
594/*
595 *@@ strhFileDate:
596 * converts file date data to a string (to pszBuf).
597 * You can pass any FDATE structure to this function,
598 * which are returned in those FILEFINDBUF* or
599 * FILESTATUS* structs by the Dos* functions.
600 *
601 * ulDateFormat is the PM setting for the date format,
602 * as set in the "Country" object, and can be queried using
603 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
604 *
605 * meaning:
606 * -- 0 mm.dd.yyyy (English)
607 * -- 1 dd.mm.yyyy (e.g. German)
608 * -- 2 yyyy.mm.dd (Japanese, ISO)
609 * -- 3 yyyy.dd.mm
610 *
611 * cDateSep is used as a date separator (e.g. '.').
612 * This can be queried using:
613 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
614 *
615 * Alternatively, you can query all the country settings
616 * at once using prfhQueryCountrySettings (prfh.c).
617 *
618 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
619 */
620
621VOID strhFileDate(PSZ pszBuf, // out: string returned
622 FDATE *pfDate, // in: date information
623 ULONG ulDateFormat, // in: date format (0-3)
624 CHAR cDateSep) // in: date separator (e.g. '.')
625{
626 DATETIME dt;
627 dt.day = pfDate->day;
628 dt.month = pfDate->month;
629 dt.year = pfDate->year + 1980;
630
631 strhDateTime(pszBuf,
632 NULL, // no time
633 &dt,
634 ulDateFormat,
635 cDateSep,
636 0, 0); // no time
637}
638
639/*
640 *@@ strhFileTime:
641 * converts file time data to a string (to pszBuf).
642 * You can pass any FTIME structure to this function,
643 * which are returned in those FILEFINDBUF* or
644 * FILESTATUS* structs by the Dos* functions.
645 *
646 * ulTimeFormat is the PM setting for the time format,
647 * as set in the "Country" object, and can be queried using
648 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
649 * meaning:
650 * -- 0 12-hour clock
651 * -- >0 24-hour clock
652 *
653 * cDateSep is used as a time separator (e.g. ':').
654 * This can be queried using:
655 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
656 *
657 * Alternatively, you can query all the country settings
658 * at once using prfhQueryCountrySettings (prfh.c).
659 *
660 *@@changed 99-03-15 fixed 12-hour crash
661 *@@changed (99-11-07) [umoeller]: now calling strhDateTime
662 */
663
664VOID strhFileTime(PSZ pszBuf, // out: string returned
665 FTIME *pfTime, // in: time information
666 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
667 CHAR cTimeSep) // in: time separator (e.g. ':')
668{
669 DATETIME dt;
670 dt.hours = pfTime->hours;
671 dt.minutes = pfTime->minutes;
672 dt.seconds = pfTime->twosecs * 2;
673
674 strhDateTime(NULL, // no date
675 pszBuf,
676 &dt,
677 0, 0, // no date
678 ulTimeFormat,
679 cTimeSep);
680}
681
682/*
683 *@@ strhDateTime:
684 * converts Control Program DATETIME info
685 * into two strings. See strhFileDate and strhFileTime
686 * for more detailed parameter descriptions.
687 *
688 *@@added V0.9.0 (99-11-07) [umoeller]
689 */
690
691VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
692 PSZ pszTime, // out: time string returned (can be NULL)
693 DATETIME *pDateTime, // in: date/time information
694 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
695 CHAR cDateSep, // in: date separator (e.g. '.')
696 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
697 CHAR cTimeSep) // in: time separator (e.g. ':')
698{
699 if (pszDate)
700 {
701 switch (ulDateFormat)
702 {
703 case 0: // mm.dd.yyyy (English)
704 sprintf(pszDate, "%02d%c%02d%c%04d",
705 pDateTime->month,
706 cDateSep,
707 pDateTime->day,
708 cDateSep,
709 pDateTime->year);
710 break;
711
712 case 1: // dd.mm.yyyy (e.g. German)
713 sprintf(pszDate, "%02d%c%02d%c%04d",
714 pDateTime->day,
715 cDateSep,
716 pDateTime->month,
717 cDateSep,
718 pDateTime->year);
719 break;
720
721 case 2: // yyyy.mm.dd (Japanese)
722 sprintf(pszDate, "%04d%c%02d%c%02d",
723 pDateTime->year,
724 cDateSep,
725 pDateTime->month,
726 cDateSep,
727 pDateTime->day);
728 break;
729
730 default: // yyyy.dd.mm
731 sprintf(pszDate, "%04d%c%02d%c%02d",
732 pDateTime->year,
733 cDateSep,
734 pDateTime->day,
735 cDateSep,
736 pDateTime->month);
737 break;
738 }
739 }
740
741 if (pszTime)
742 {
743 if (ulTimeFormat == 0)
744 {
745 // for 12-hour clock, we need additional INI data
746 CHAR szAMPM[10] = "err";
747
748 if (pDateTime->hours > 12)
749 {
750 // > 12h: PM.
751
752 // Note: 12:xx noon is 12 AM, not PM (even though
753 // AM stands for "ante meridiam", but English is just
754 // not logical), so that's handled below.
755
756 PrfQueryProfileString(HINI_USER,
757 "PM_National",
758 "s2359", // key
759 "PM", // default
760 szAMPM, sizeof(szAMPM)-1);
761 sprintf(pszTime, "%02d%c%02d%c%02d %s",
762 // leave 12 == 12 (not 0)
763 pDateTime->hours % 12,
764 cTimeSep,
765 pDateTime->minutes,
766 cTimeSep,
767 pDateTime->seconds,
768 szAMPM);
769 }
770 else
771 {
772 // <= 12h: AM
773 PrfQueryProfileString(HINI_USER,
774 "PM_National",
775 "s1159", // key
776 "AM", // default
777 szAMPM, sizeof(szAMPM)-1);
778 sprintf(pszTime, "%02d%c%02d%c%02d %s",
779 pDateTime->hours,
780 cTimeSep,
781 pDateTime->minutes,
782 cTimeSep,
783 pDateTime->seconds,
784 szAMPM);
785 }
786 }
787 else
788 // 24-hour clock
789 sprintf(pszTime, "%02d%c%02d%c%02d",
790 pDateTime->hours,
791 cTimeSep,
792 pDateTime->minutes,
793 cTimeSep,
794 pDateTime->seconds);
795 }
796}
797
798/*
799 *@@ strhGetWord:
800 * finds word boundaries.
801 *
802 * *ppszStart is used as the beginning of the
803 * search.
804 *
805 * If a word is found, *ppszStart is set to
806 * the first character of the word which was
807 * found and *ppszEnd receives the address
808 * of the first character _after_ the word,
809 * which is probably a space or a \n or \r char.
810 * We then return TRUE.
811 *
812 * The search is stopped if a null character
813 * is found or pLimit is reached. In that case,
814 * FALSE is returned.
815 *
816 *@@added V0.9.1 (2000-02-13) [umoeller]
817 */
818
819BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
820 // out: start of word (if TRUE is returned)
821 const char *pLimit, // in: ptr to last char after *ppszStart to be
822 // searched; if the word does not end before
823 // or with this char, FALSE is returned
824 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
825 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
826 PSZ *ppszEnd) // out: first char _after_ word
827 // (if TRUE is returned)
828{
829 // characters after which a word can be started
830 // const char *pcszBeginChars = "\x0d\x0a ";
831 // const char *pcszEndChars = "\x0d\x0a /-";
832
833 PSZ pStart = *ppszStart;
834
835 // find start of word
836 while ( (pStart < (PSZ)pLimit)
837 && (strchr(pcszBeginChars, *pStart))
838 )
839 // if char is a "before word" char: go for next
840 pStart++;
841
842 if (pStart < (PSZ)pLimit)
843 {
844 // found a valid "word start" character
845 // (which is not in pcszBeginChars):
846
847 // find end of word
848 PSZ pEndOfWord = pStart;
849 while ( (pEndOfWord <= (PSZ)pLimit)
850 && (strchr(pcszEndChars, *pEndOfWord) == 0)
851 )
852 // if char is not an "end word" char: go for next
853 pEndOfWord++;
854
855 if (pEndOfWord <= (PSZ)pLimit)
856 {
857 // whoa, got a word:
858 *ppszStart = pStart;
859 *ppszEnd = pEndOfWord;
860 return (TRUE);
861 }
862 }
863
864 return (FALSE);
865}
866
867/*
868 *@@ strhIsWord:
869 * returns TRUE if p points to a "word"
870 * in pcszBuf.
871 *
872 * p is considered a word if the character _before_
873 * it is in pcszBeginChars and the char _after_
874 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
875 *
876 *@@added V0.9.6 (2000-11-12) [umoeller]
877 */
878
879BOOL strhIsWord(const char *pcszBuf,
880 const char *p, // in: start of word
881 ULONG cbSearch, // in: length of word
882 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
883 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
884{
885 BOOL fEndOK = FALSE;
886
887 // check previous char
888 if ( (p == pcszBuf)
889 || (strchr(pcszBeginChars, *(p-1)))
890 )
891 {
892 // OK, valid begin char:
893 // check end char
894 CHAR cNextChar = *(p + cbSearch);
895 if (cNextChar == 0)
896 fEndOK = TRUE;
897 else
898 {
899 char *pc = strchr(pcszEndChars, cNextChar);
900 if (pc)
901 // OK, is end char: avoid doubles of that char,
902 // but allow spaces
903 if ( (cNextChar+1 != *pc)
904 || (cNextChar+1 == ' ')
905 || (cNextChar+1 == 0)
906 )
907 fEndOK = TRUE;
908 }
909 }
910
911 return (fEndOK);
912}
913
914/*
915 *@@ strhFindWord:
916 * searches for pszSearch in pszBuf, which is
917 * returned if found (or NULL if not).
918 *
919 * As opposed to strstr, this finds pszSearch
920 * only if it is a "word". A search string is
921 * considered a word if the character _before_
922 * it is in pcszBeginChars and the char _after_
923 * it is in pcszEndChars.
924 *
925 * Example:
926 + strhFindWord("This is an example.", "is");
927 + returns ...........^ this, but not the "is" in "This".
928 *
929 * The algorithm here uses strstr to find pszSearch in pszBuf
930 * and performs additional "is-word" checks for each item found
931 * (by calling strhIsWord).
932 *
933 * Note that this function is fairly slow compared to xstrFindWord.
934 *
935 *@@added V0.9.0 (99-11-08) [umoeller]
936 *@@changed (99-11-10) [umoeller]: tried second algorithm, reverted to original...
937 */
938
939PSZ strhFindWord(const char *pszBuf,
940 const char *pszSearch,
941 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
942 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
943{
944 PSZ pszReturn = 0;
945 ULONG cbBuf = strlen(pszBuf),
946 cbSearch = strlen(pszSearch);
947
948 if ((cbBuf) && (cbSearch))
949 {
950 const char *p = pszBuf;
951
952 do // while p
953 {
954 p = strstr(p, pszSearch);
955 if (p)
956 {
957 // string found:
958 // check if that's a word
959
960 if (strhIsWord(pszBuf,
961 p,
962 cbSearch,
963 pcszBeginChars,
964 pcszEndChars))
965 {
966 // valid end char:
967 pszReturn = (PSZ)p;
968 break;
969 }
970
971 p += cbSearch;
972 }
973 } while (p);
974
975 }
976 return (pszReturn);
977}
978
979/*
980 *@@ strhFindEOL:
981 * returns a pointer to the next \r, \n or null character
982 * following pszSearchIn. Stores the offset in *pulOffset.
983 *
984 * This should never return NULL because at some point,
985 * there will be a null byte in your string.
986 *
987 *@@added V0.9.4 (2000-07-01) [umoeller]
988 */
989
990PSZ strhFindEOL(PSZ pszSearchIn, // in: where to search
991 PULONG pulOffset) // out: offset (ptr can be NULL)
992{
993 PSZ p = pszSearchIn,
994 prc = NULL;
995 while (TRUE)
996 {
997 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
998 {
999 prc = p;
1000 break;
1001 }
1002 p++;
1003 }
1004
1005 if (pulOffset)
1006 *pulOffset = prc - pszSearchIn;
1007 return (prc);
1008}
1009
1010/*
1011 *@@ strhFindNextLine:
1012 * like strhFindEOL, but this returns the character
1013 * _after_ \r or \n. Note that this might return
1014 * a pointer to terminating NULL character also.
1015 */
1016
1017PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1018{
1019 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1020 // pEOL now points to the \r char or the terminating 0 byte;
1021 // if not null byte, advance pointer
1022 PSZ pNextLine = pEOL;
1023 if (*pNextLine == '\r')
1024 pNextLine++;
1025 if (*pNextLine == '\n')
1026 pNextLine++;
1027 if (pulOffset)
1028 *pulOffset = pNextLine - pszSearchIn;
1029 return (pNextLine);
1030}
1031
1032/*
1033 *@@ strhFindKey:
1034 * finds pszKey in pszSearchIn; similar to strhistr,
1035 * but this one makes sure the key is at the beginning
1036 * of a line. Spaces before the key are tolerated.
1037 * Returns NULL if the key was not found.
1038 *
1039 * Used by strhGetParameter/strhSetParameter; useful
1040 * for analyzing CONFIG.SYS settings.
1041 *
1042 *@@changed V0.9.0 [umoeller]: fixed bug in that this would also return something if only the first chars matched
1043 *@@changed V0.9.0 [umoeller]: fixed bug which could cause character before pszSearchIn to be examined
1044 */
1045
1046PSZ strhFindKey(const char *pcszSearchIn, // in: text buffer to search
1047 const char *pcszKey, // in: key to search for
1048 PBOOL pfIsAllUpperCase) // out: TRUE if key is completely in upper case;
1049 // can be NULL if not needed
1050{
1051 const char *p = NULL;
1052 PSZ pReturn = NULL;
1053 // BOOL fFound = FALSE;
1054
1055 p = pcszSearchIn;
1056 do {
1057 p = strhistr(p, pcszKey);
1058
1059 if ((p) && (p >= pcszSearchIn))
1060 {
1061 // make sure the key is at the beginning of a line
1062 // by going backwards until we find a char != " "
1063 const char *p2 = p;
1064 while ( (*p2 == ' ')
1065 && (p2 > pcszSearchIn)
1066 )
1067 p2--;
1068
1069 // if previous char is an EOL sign, go on
1070 if ( (p2 == pcszSearchIn) // order fixed V0.9.0, Rdiger Ihle
1071 || (*(p2-1) == '\r')
1072 || (*(p2-1) == '\n')
1073 )
1074 {
1075 // now check whether the char after the search
1076 // is a "=" char
1077 // ULONG cbKey = strlen(pszKey);
1078
1079 // tolerate spaces before "="
1080 /* PSZ p3 = p;
1081 while (*(p3+cbKey) == ' ')
1082 p3++;
1083
1084 if (*(p3+cbKey) == '=') */
1085 {
1086 // found:
1087 pReturn = (PSZ)p; // go on, p contains found key
1088
1089 // test for all upper case?
1090 if (pfIsAllUpperCase)
1091 {
1092 ULONG cbKey2 = strlen(pcszKey),
1093 ul = 0;
1094 *pfIsAllUpperCase = TRUE;
1095 for (ul = 0; ul < cbKey2; ul++)
1096 if (islower(*(p+ul)))
1097 {
1098 *pfIsAllUpperCase = FALSE;
1099 break; // for
1100 }
1101 }
1102
1103 break; // do
1104 }
1105 } // else search next key
1106
1107 p++; // search on after this key
1108 }
1109 } while ((!pReturn) && (p != NULL) && (p != pcszSearchIn));
1110
1111 return (pReturn);
1112}
1113
1114/*
1115 *@@ strhGetParameter:
1116 * searches pszSearchIn for the key pszKey; if found, it
1117 * returns a pointer to the following characters in pszSearchIn
1118 * and, if pszCopyTo != NULL, copies the rest of the line to
1119 * that buffer, of which cbCopyTo specified the size.
1120 *
1121 * If the key is not found, NULL is returned.
1122 * String search is done by calling strhFindKey.
1123 * This is useful for querying CONFIG.SYS settings.
1124 *
1125 * <B>Example:</B>
1126 *
1127 * this would return "YES" if you searched for "PAUSEONERROR=",
1128 * and "PAUSEONERROR=YES" existed in pszSearchIn.
1129 */
1130
1131PSZ strhGetParameter(const char *pcszSearchIn, // in: text buffer to search
1132 const char *pcszKey, // in: key to search for
1133 PSZ pszCopyTo, // out: key value
1134 ULONG cbCopyTo) // out: sizeof(*pszCopyTo)
1135{
1136 PSZ p = strhFindKey(pcszSearchIn, pcszKey, NULL),
1137 prc = NULL;
1138 if (p)
1139 {
1140 prc = p + strlen(pcszKey);
1141 if (pszCopyTo)
1142 // copy to pszCopyTo
1143 {
1144 ULONG cb;
1145 PSZ pEOL = strhFindEOL(prc, &cb);
1146 if (pEOL)
1147 {
1148 if (cb > cbCopyTo)
1149 cb = cbCopyTo-1;
1150 strhncpy0(pszCopyTo, prc, cb);
1151 }
1152 }
1153 }
1154
1155 return (prc);
1156}
1157
1158/*
1159 *@@ strhSetParameter:
1160 * searches *ppszBuf for the key pszKey; if found, it
1161 * replaces the characters following this key up to the
1162 * end of the line with pszParam. If pszKey is not found in
1163 * *ppszBuf, it is appended to the file in a new line.
1164 *
1165 * If any changes are made, *ppszBuf is re-allocated.
1166 *
1167 * This function searches w/out case sensitivity.
1168 *
1169 * Returns a pointer to the new parameter inside the buffer.
1170 *
1171 *@@changed V0.9.0 [umoeller]: changed function prototype to PSZ* ppszSearchIn
1172 */
1173
1174PSZ strhSetParameter(PSZ* ppszBuf, // in: text buffer to search
1175 const char *pcszKey, // in: key to search for
1176 PSZ pszNewParam, // in: new parameter to set for key
1177 BOOL fRespectCase) // in: if TRUE, pszNewParam will
1178 // be converted to upper case if the found key is
1179 // in upper case also. pszNewParam should be in
1180 // lower case if you use this.
1181{
1182 BOOL fIsAllUpperCase = FALSE;
1183 PSZ pKey = strhFindKey(*ppszBuf, pcszKey, &fIsAllUpperCase),
1184 prc = NULL;
1185
1186 if (pKey)
1187 {
1188 // key found in file:
1189 // replace existing parameter
1190 PSZ pOldParam = pKey + strlen(pcszKey);
1191
1192 prc = pOldParam;
1193 // pOldParam now has the old parameter, which we
1194 // will overwrite now
1195
1196 if (pOldParam)
1197 {
1198 ULONG cbOldParam;
1199 PSZ pEOL = strhFindEOL(pOldParam, &cbOldParam);
1200 // pEOL now has first end-of-line after the parameter
1201
1202 if (pEOL)
1203 {
1204 XSTRING strBuf;
1205 ULONG ulOfs = 0;
1206
1207 PSZ pszOldCopy = (PSZ)malloc(cbOldParam+1);
1208 strncpy(pszOldCopy, pOldParam, cbOldParam);
1209 pszOldCopy[cbOldParam] = '\0';
1210
1211 xstrInit(&strBuf, 0);
1212 xstrset(&strBuf, *ppszBuf); // this must not be freed!
1213 /* xstrInit(&strFind, 0);
1214 xstrset(&strFind, pszOldCopy); // this must not be freed!
1215 xstrInit(&strReplace, 0);
1216 xstrset(&strReplace, pszNewParam); // this must not be freed!
1217 */
1218
1219 // check for upper case desired?
1220 if (fRespectCase)
1221 if (fIsAllUpperCase)
1222 strupr(pszNewParam);
1223
1224 xstrcrpl(&strBuf, &ulOfs, pszOldCopy, pszNewParam);
1225
1226 free(pszOldCopy);
1227
1228 *ppszBuf = strBuf.psz;
1229 }
1230 }
1231 }
1232 else
1233 {
1234 PSZ pszNew = (PSZ)malloc(strlen(*ppszBuf)
1235 + strlen(pcszKey)
1236 + strlen(pszNewParam)
1237 + 5); // 2 * \r\n + null byte
1238 // key not found: append to end of file
1239 sprintf(pszNew, "%s\r\n%s%s\r\n",
1240 *ppszBuf, pcszKey, pszNewParam);
1241 free(*ppszBuf);
1242 *ppszBuf = pszNew;
1243 }
1244
1245 return (prc);
1246}
1247
1248/*
1249 *@@ strhDeleteLine:
1250 * this deletes the line in pszSearchIn which starts with
1251 * the key pszKey. Returns TRUE if the line was found and
1252 * deleted.
1253 *
1254 * This copies within pszSearchIn.
1255 */
1256
1257BOOL strhDeleteLine(PSZ pszSearchIn, // in: buffer to search
1258 PSZ pszKey) // in: key to find
1259{
1260 BOOL fIsAllUpperCase = FALSE;
1261 PSZ pKey = strhFindKey(pszSearchIn, pszKey, &fIsAllUpperCase);
1262 BOOL brc = FALSE;
1263
1264 if (pKey) {
1265 PSZ pEOL = strhFindEOL(pKey, NULL);
1266 // pEOL now has first end-of-line after the key
1267 if (pEOL)
1268 {
1269 // delete line by overwriting it with
1270 // the next line
1271 strcpy(pKey, pEOL+2);
1272 }
1273 else
1274 {
1275 // EOL not found: we must be at the end of the file
1276 *pKey = '\0';
1277 }
1278 brc = TRUE;
1279 }
1280
1281 return (brc);
1282}
1283
1284/*
1285 *@@ strhBeautifyTitle:
1286 * replaces all line breaks (0xd, 0xa) with spaces.
1287 */
1288
1289BOOL strhBeautifyTitle(PSZ psz)
1290{
1291 BOOL rc = FALSE;
1292 CHAR *p;
1293 while ((p = strchr(psz, 0xa)))
1294 {
1295 *p = ' ';
1296 rc = TRUE;
1297 }
1298 while ((p = strchr(psz, 0xd)))
1299 {
1300 *p = ' ';
1301 rc = TRUE;
1302 }
1303 return (rc);
1304}
1305
1306/*
1307 * strhFindAttribValue:
1308 * searches for pszAttrib in pszSearchIn; if found,
1309 * returns the first character after the "=" char.
1310 * If "=" is not found, a space, \r, and \n are
1311 * also accepted. This function searches without
1312 * respecting case.
1313 *
1314 * <B>Example:</B>
1315 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1316 +
1317 + returns ....................... ^ this address.
1318 *
1319 *@@added V0.9.0 [umoeller]
1320 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1321 */
1322
1323PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1324{
1325 PSZ prc = 0;
1326 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1327 p,
1328 p2;
1329 ULONG cbAttrib = strlen(pszAttrib);
1330
1331 // 1) find space char
1332 while ((p = strchr(pszSearchIn2, ' ')))
1333 {
1334 CHAR c;
1335 p++;
1336 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1337 // now check whether the p+strlen(pszAttrib)
1338 // is a valid end-of-tag character
1339 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1340 && ( (c == ' ')
1341 || (c == '>')
1342 || (c == '=')
1343 || (c == '\r')
1344 || (c == '\n')
1345 || (c == 0)
1346 )
1347 )
1348 {
1349 // yes:
1350 CHAR c2;
1351 p2 = p + cbAttrib;
1352 c2 = *p2;
1353 while ( ( (c2 == ' ')
1354 || (c2 == '=')
1355 || (c2 == '\n')
1356 || (c2 == '\r')
1357 )
1358 && (c2 != 0)
1359 )
1360 c2 = *++p2;
1361 prc = p2;
1362 break; // first while
1363 }
1364 pszSearchIn2++;
1365 }
1366 return (prc);
1367}
1368
1369/*
1370 * strhGetNumAttribValue:
1371 * stores the numerical parameter value of an HTML-style
1372 * tag in *pl.
1373 *
1374 * Returns the address of the tag parameter in the
1375 * search buffer, if found, or NULL.
1376 *
1377 * <B>Example:</B>
1378 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1379 *
1380 * stores 123 in the "l" variable.
1381 *
1382 *@@added V0.9.0 [umoeller]
1383 */
1384
1385PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1386 const char *pszTag, // e.g. "INDEX"
1387 PLONG pl) // out: numerical value
1388{
1389 PSZ pParam;
1390 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1391 sscanf(pParam, "%ld", pl);
1392
1393 return (pParam);
1394}
1395
1396/*
1397 * strhGetTextAttr:
1398 * retrieves the attribute value of a textual HTML-style tag
1399 * in a newly allocated buffer, which is returned,
1400 * or NULL if attribute not found.
1401 * If an attribute value is to contain spaces, it
1402 * must be enclosed in quotes.
1403 *
1404 * The offset of the attribute data in pszSearchIn is
1405 * returned in *pulOffset so that you can do multiple
1406 * searches.
1407 *
1408 * This returns a new buffer, which should be free()'d after use.
1409 *
1410 * <B>Example:</B>
1411 + ULONG ulOfs = 0;
1412 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1413 + ............^ ulOfs
1414 *
1415 * returns a new string with the value "blublub" (without
1416 * quotes) and sets ulOfs to 12.
1417 *
1418 *@@added V0.9.0 [umoeller]
1419 */
1420
1421PSZ strhGetTextAttr(const char *pszSearchIn,
1422 const char *pszTag,
1423 PULONG pulOffset) // out: offset where found
1424{
1425 PSZ pParam,
1426 pParam2,
1427 prc = NULL;
1428 ULONG ulCount = 0;
1429 LONG lNestingLevel = 0;
1430
1431 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1432 {
1433 // determine end character to search for: a space
1434 CHAR cEnd = ' ';
1435 if (*pParam == '\"')
1436 {
1437 // or, if the data is enclosed in quotes, a quote
1438 cEnd = '\"';
1439 pParam++;
1440 }
1441
1442 if (pulOffset)
1443 // store the offset
1444 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1445
1446 // now find end of attribute
1447 pParam2 = pParam;
1448 while (*pParam)
1449 {
1450 if (*pParam == cEnd)
1451 // end character found
1452 break;
1453 else if (*pParam == '<')
1454 // yet another opening tag found:
1455 // this is probably some "<" in the attributes
1456 lNestingLevel++;
1457 else if (*pParam == '>')
1458 {
1459 lNestingLevel--;
1460 if (lNestingLevel < 0)
1461 // end of tag found:
1462 break;
1463 }
1464 ulCount++;
1465 pParam++;
1466 }
1467
1468 // copy attribute to new buffer
1469 if (ulCount)
1470 {
1471 prc = (PSZ)malloc(ulCount+1);
1472 memcpy(prc, pParam2, ulCount);
1473 *(prc+ulCount) = 0;
1474 }
1475 }
1476 return (prc);
1477}
1478
1479/*
1480 * strhFindEndOfTag:
1481 * returns a pointer to the ">" char
1482 * which seems to terminate the tag beginning
1483 * after pszBeginOfTag.
1484 *
1485 * If additional "<" chars are found, we look
1486 * for additional ">" characters too.
1487 *
1488 * Note: You must pass the address of the opening
1489 * '<' character to this function.
1490 *
1491 * Example:
1492 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1493 + strhFindEndOfTag(pszTest)
1494 + returns.................................^ this.
1495 *
1496 *@@added V0.9.0 [umoeller]
1497 */
1498
1499PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1500{
1501 PSZ p = (PSZ)pszBeginOfTag,
1502 prc = NULL;
1503 LONG lNestingLevel = 0;
1504
1505 while (*p)
1506 {
1507 if (*p == '<')
1508 // another opening tag found:
1509 lNestingLevel++;
1510 else if (*p == '>')
1511 {
1512 // closing tag found:
1513 lNestingLevel--;
1514 if (lNestingLevel < 1)
1515 {
1516 // corresponding: return this
1517 prc = p;
1518 break;
1519 }
1520 }
1521 p++;
1522 }
1523
1524 return (prc);
1525}
1526
1527/*
1528 * strhGetBlock:
1529 * this complex function searches the given string
1530 * for a pair of opening/closing HTML-style tags.
1531 *
1532 * If found, this routine returns TRUE and does
1533 * the following:
1534 *
1535 * 1) allocate a new buffer, copy the text
1536 * enclosed by the opening/closing tags
1537 * into it and set *ppszBlock to that
1538 * buffer;
1539 *
1540 * 2) if the opening tag has any attributes,
1541 * allocate another buffer, copy the
1542 * attributes into it and set *ppszAttrs
1543 * to that buffer; if no attributes are
1544 * found, *ppszAttrs will be NULL;
1545 *
1546 * 3) set *pulOffset to the offset from the
1547 * beginning of *ppszSearchIn where the
1548 * opening tag was found;
1549 *
1550 * 4) advance *ppszSearchIn to after the
1551 * closing tag, so that you can do
1552 * multiple searches without finding the
1553 * same tags twice.
1554 *
1555 * All buffers should be freed using free().
1556 *
1557 * This returns the following:
1558 * -- 0: no error
1559 * -- 1: tag not found at all (doesn't have to be an error)
1560 * -- 2: begin tag found, but no corresponding end tag found. This
1561 * is a real error.
1562 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1563 *
1564 * <B>Example:</B>
1565 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1566 + PSZ pszBlock, pszAttrs;
1567 + ULONG ulOfs;
1568 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1569 *
1570 * would do the following:
1571 *
1572 * 1) set pszBlock to a new string containing "This is page 1."
1573 * without quotes;
1574 *
1575 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1576 *
1577 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1578 *
1579 * 4) pSearch would be advanced to point to the "More text"
1580 * string in the original buffer.
1581 *
1582 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1583 * for HTML parsing.
1584 *
1585 *@@added V0.9.0 [umoeller]
1586 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1587 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1588 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1589 */
1590
1591ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1592 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1593 PSZ pszTag,
1594 PSZ *ppszBlock, // out: block enclosed by the tags
1595 PSZ *ppszAttribs, // out: attributes of the opening tag
1596 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1597 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1598{
1599 ULONG ulrc = 1;
1600 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1601 pszSearch2 = pszBeginTag,
1602 pszClosingTag;
1603 ULONG cbTag = strlen(pszTag);
1604
1605 // go thru the block and check all tags if it's the
1606 // begin tag we're looking for
1607 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1608 {
1609 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1610 // yes: stop
1611 break;
1612 else
1613 pszBeginTag++;
1614 }
1615
1616 if (pszBeginTag)
1617 {
1618 // we found <TAG>:
1619 ULONG ulNestingLevel = 0;
1620
1621 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1622 // strchr(pszBeginTag, '>');
1623 if (pszEndOfBeginTag)
1624 {
1625 // does the caller want the attributes?
1626 if (ppszAttribs)
1627 {
1628 // yes: then copy them
1629 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1630 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1631 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1632 // add terminating 0
1633 *(pszAttrs + ulAttrLen) = 0;
1634
1635 *ppszAttribs = pszAttrs;
1636 }
1637
1638 // output offset of where we found the begin tag
1639 if (pulOfsBeginTag)
1640 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1641
1642 // now find corresponding closing tag (e.g. "</BODY>"
1643 pszBeginTag = pszEndOfBeginTag+1;
1644 // now we're behind the '>' char of the opening tag
1645 // increase offset of that too
1646 if (pulOfsBeginBlock)
1647 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1648
1649 // find next closing tag;
1650 // for the first run, pszSearch2 points to right
1651 // after the '>' char of the opening tag
1652 pszSearch2 = pszBeginTag;
1653 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1654 && (pszClosingTag = strstr(pszSearch2, "<"))
1655 )
1656 {
1657 // if we have another opening tag before our closing
1658 // tag, we need to have several closing tags before
1659 // we're done
1660 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1661 ulNestingLevel++;
1662 else
1663 {
1664 // is this ours?
1665 if ( (*(pszClosingTag+1) == '/')
1666 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1667 )
1668 {
1669 // we've found a matching closing tag; is
1670 // it ours?
1671 if (ulNestingLevel == 0)
1672 {
1673 // our closing tag found:
1674 // allocate mem for a new buffer
1675 // and extract all the text between
1676 // open and closing tags to it
1677 ULONG ulLen = pszClosingTag - pszBeginTag;
1678 if (ppszBlock)
1679 {
1680 PSZ pNew = (PSZ)malloc(ulLen + 1);
1681 strhncpy0(pNew, pszBeginTag, ulLen);
1682 *ppszBlock = pNew;
1683 }
1684
1685 // raise search offset to after the closing tag
1686 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1687
1688 ulrc = 0;
1689
1690 break;
1691 } else
1692 // not our closing tag:
1693 ulNestingLevel--;
1694 }
1695 }
1696 // no matching closing tag: search on after that
1697 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1698 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1699
1700 if (!pszClosingTag)
1701 // no matching closing tag found:
1702 // return 2 (closing tag not found)
1703 ulrc = 2;
1704 } // end if (pszBeginTag)
1705 else
1706 // no matching ">" for opening tag found:
1707 ulrc = 3;
1708 }
1709
1710 return (ulrc);
1711}
1712
1713/* ******************************************************************
1714 *
1715 * Miscellaneous
1716 *
1717 ********************************************************************/
1718
1719/*
1720 *@@ strhArrayAppend:
1721 * this appends a string to a "string array".
1722 *
1723 * A string array is considered a sequence of
1724 * zero-terminated strings in memory. That is,
1725 * after each string's null-byte, the next
1726 * string comes up.
1727 *
1728 * This is useful for composing a single block
1729 * of memory from, say, list box entries, which
1730 * can then be written to OS2.INI in one flush.
1731 *
1732 * To append strings to such an array, call this
1733 * function for each string you wish to append.
1734 * This will re-allocate *ppszRoot with each call,
1735 * and update *pcbRoot, which then contains the
1736 * total size of all strings (including all null
1737 * terminators).
1738 *
1739 * Pass *pcbRoot to PrfSaveProfileData to have the
1740 * block saved.
1741 *
1742 * Note: On the first call, *ppszRoot and *pcbRoot
1743 * _must_ be both NULL, or this crashes.
1744 */
1745
1746VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1747 const char *pcszNew, // in: string to append
1748 PULONG pcbRoot) // in/out: size of array
1749{
1750 ULONG cbNew = strlen(pcszNew);
1751 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1752 + cbNew
1753 + 1); // two null bytes
1754 if (*ppszRoot)
1755 {
1756 // not first loop: copy old stuff
1757 memcpy(pszTemp,
1758 *ppszRoot,
1759 *pcbRoot);
1760 free(*ppszRoot);
1761 }
1762 // append new string
1763 strcpy(pszTemp + *pcbRoot,
1764 pcszNew);
1765 // update root
1766 *ppszRoot = pszTemp;
1767 // update length
1768 *pcbRoot += cbNew + 1;
1769}
1770
1771/*
1772 *@@ strhCreateDump:
1773 * this dumps a memory block into a string
1774 * and returns that string in a new buffer.
1775 *
1776 * You must free() the returned PSZ after use.
1777 *
1778 * The output looks like the following:
1779 *
1780 + 0000: FE FF 0E 02 90 00 00 00 ........
1781 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1782 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1783 *
1784 * Each line is terminated with a newline (\n)
1785 * character only.
1786 *
1787 *@@added V0.9.1 (2000-01-22) [umoeller]
1788 */
1789
1790PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1791 ULONG ulSize, // in: size of buffer
1792 ULONG ulIndent) // in: indentation of every line
1793{
1794 PSZ pszReturn = 0;
1795 XSTRING strReturn;
1796 CHAR szTemp[1000];
1797
1798 PBYTE pbCurrent = pb; // current byte
1799 ULONG ulCount = 0,
1800 ulCharsInLine = 0; // if this grows > 7, a new line is started
1801 CHAR szLine[400] = "",
1802 szAscii[30] = " "; // ASCII representation; filled for every line
1803 PSZ pszLine = szLine,
1804 pszAscii = szAscii;
1805
1806 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1807
1808 for (pbCurrent = pb;
1809 ulCount < ulSize;
1810 pbCurrent++, ulCount++)
1811 {
1812 if (ulCharsInLine == 0)
1813 {
1814 memset(szLine, ' ', ulIndent);
1815 pszLine += ulIndent;
1816 }
1817 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1818
1819 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1820 // printable character:
1821 *pszAscii = *pbCurrent;
1822 else
1823 *pszAscii = '.';
1824 pszAscii++;
1825
1826 ulCharsInLine++;
1827 if ( (ulCharsInLine > 7) // 8 bytes added?
1828 || (ulCount == ulSize-1) // end of buffer reached?
1829 )
1830 {
1831 // if we haven't had eight bytes yet,
1832 // fill buffer up to eight bytes with spaces
1833 ULONG ul2;
1834 for (ul2 = ulCharsInLine;
1835 ul2 < 8;
1836 ul2++)
1837 pszLine += sprintf(pszLine, " ");
1838
1839 sprintf(szTemp, "%04lX: %s %s\n",
1840 (ulCount & 0xFFFFFFF8), // offset in hex
1841 szLine, // bytes string
1842 szAscii); // ASCII string
1843 xstrcat(&strReturn, szTemp);
1844
1845 // restart line buffer
1846 pszLine = szLine;
1847
1848 // clear ASCII buffer
1849 strcpy(szAscii, " ");
1850 pszAscii = szAscii;
1851
1852 // reset line counter
1853 ulCharsInLine = 0;
1854 }
1855 }
1856
1857 if (strReturn.cbAllocated)
1858 pszReturn = strReturn.psz;
1859
1860 return (pszReturn);
1861}
1862
1863/* ******************************************************************
1864 *
1865 * Wildcard matching
1866 *
1867 ********************************************************************/
1868
1869/*
1870 * The following code has been taken from "fnmatch.zip".
1871 *
1872 * (c) 1994-1996 by Eberhard Mattes.
1873 */
1874
1875/* In OS/2 and DOS styles, both / and \ separate components of a path.
1876 * This macro returns true iff C is a separator. */
1877
1878#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1879
1880
1881/* This macro returns true if C is at the end of a component of a
1882 * path. */
1883
1884#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1885
1886/*
1887 * skip_comp_os2:
1888 * Return a pointer to the next component of the path SRC, for OS/2
1889 * and DOS styles. When the end of the string is reached, a pointer
1890 * to the terminating null character is returned.
1891 *
1892 * (c) 1994-1996 by Eberhard Mattes.
1893 */
1894
1895static const unsigned char* skip_comp_os2(const unsigned char *src)
1896{
1897 /* Skip characters until hitting a separator or the end of the
1898 * string. */
1899
1900 while (!IS_OS2_COMP_END(*src))
1901 ++src;
1902
1903 /* Skip the separator if we hit a separator. */
1904
1905 if (*src != 0)
1906 ++src;
1907 return src;
1908}
1909
1910/*
1911 * has_colon:
1912 * returns true iff the path P contains a colon.
1913 *
1914 * (c) 1994-1996 by Eberhard Mattes.
1915 */
1916
1917static int has_colon(const unsigned char *p)
1918{
1919 while (*p != 0)
1920 if (*p == ':')
1921 return 1;
1922 else
1923 ++p;
1924 return 0;
1925}
1926
1927/*
1928 * match_comp_os2:
1929 * Compare a single component (directory name or file name) of the
1930 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1931 * component of the wildcard and the name to be checked, respectively.
1932 * Comparing stops at the next separator. The FLAGS argument is the
1933 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1934 * current component of NAME. The number of dots is not restricted,
1935 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1936 * Note that this function is recursive.
1937 *
1938 * (c) 1994-1996 by Eberhard Mattes.
1939 */
1940
1941static int match_comp_os2(const unsigned char *mask,
1942 const unsigned char *name,
1943 unsigned flags,
1944 int has_dot)
1945{
1946 int rc;
1947
1948 for (;;)
1949 switch (*mask)
1950 {
1951 case 0:
1952
1953 /* There must be no extra characters at the end of NAME when
1954 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1955 * in that case, NAME may point to a separator. */
1956
1957 if (*name == 0)
1958 return FNM_MATCH;
1959 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1960 return FNM_MATCH;
1961 return FNM_NOMATCH;
1962
1963 case '/':
1964 case '\\':
1965
1966 /* Separators match separators. */
1967
1968 if (IS_OS2_COMP_SEP(*name))
1969 return FNM_MATCH;
1970
1971 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1972 * is ignored at the end of NAME. */
1973
1974 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1975 return FNM_MATCH;
1976
1977 /* Stop comparing at the separator. */
1978
1979 return FNM_NOMATCH;
1980
1981 case '?':
1982
1983 /* A question mark matches one character. It does not match
1984 * a dot. At the end of the component (and before a dot),
1985 * it also matches zero characters. */
1986
1987 if (*name != '.' && !IS_OS2_COMP_END(*name))
1988 ++name;
1989 ++mask;
1990 break;
1991
1992 case '*':
1993
1994 /* An asterisk matches zero or more characters. In DOS
1995 * mode, dots are not matched. */
1996
1997 do
1998 {
1999 ++mask;
2000 }
2001 while (*mask == '*');
2002 for (;;)
2003 {
2004 rc = match_comp_os2(mask, name, flags, has_dot);
2005 if (rc != FNM_NOMATCH)
2006 return rc;
2007 if (IS_OS2_COMP_END(*name))
2008 return FNM_NOMATCH;
2009 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
2010 return FNM_NOMATCH;
2011 ++name;
2012 }
2013
2014 case '.':
2015
2016 /* A dot matches a dot. It also matches the implicit dot at
2017 * the end of a dot-less NAME. */
2018
2019 ++mask;
2020 if (*name == '.')
2021 ++name;
2022 else if (has_dot || !IS_OS2_COMP_END(*name))
2023 return FNM_NOMATCH;
2024 break;
2025
2026 default:
2027
2028 /* All other characters match themselves. */
2029
2030 if (flags & _FNM_IGNORECASE)
2031 {
2032 if (tolower(*mask) != tolower(*name))
2033 return FNM_NOMATCH;
2034 }
2035 else
2036 {
2037 if (*mask != *name)
2038 return FNM_NOMATCH;
2039 }
2040 ++mask;
2041 ++name;
2042 break;
2043 }
2044}
2045
2046/*
2047 * match_comp:
2048 * compare a single component (directory name or file name) of the
2049 * paths, for all styles which need component-by-component matching.
2050 * MASK and NAME point to the start of a component of the wildcard and
2051 * the name to be checked, respectively. Comparing stops at the next
2052 * separator. The FLAGS argument is the same as that of fnmatch().
2053 * Return FNM_MATCH iff MASK and NAME match.
2054 *
2055 * (c) 1994-1996 by Eberhard Mattes.
2056 */
2057
2058static int match_comp(const unsigned char *mask,
2059 const unsigned char *name,
2060 unsigned flags)
2061{
2062 const unsigned char *s;
2063
2064 switch (flags & _FNM_STYLE_MASK)
2065 {
2066 case _FNM_OS2:
2067 case _FNM_DOS:
2068
2069 /* For OS/2 and DOS styles, we add an implicit dot at the end of
2070 * the component if the component doesn't include a dot. */
2071
2072 s = name;
2073 while (!IS_OS2_COMP_END(*s) && *s != '.')
2074 ++s;
2075 return match_comp_os2(mask, name, flags, *s == '.');
2076
2077 default:
2078 return FNM_ERR;
2079 }
2080}
2081
2082/* In Unix styles, / separates components of a path. This macro
2083 * returns true iff C is a separator. */
2084
2085#define IS_UNIX_COMP_SEP(C) ((C) == '/')
2086
2087
2088/* This macro returns true if C is at the end of a component of a
2089 * path. */
2090
2091#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
2092
2093/*
2094 * match_unix:
2095 * match complete paths for Unix styles. The FLAGS argument is the
2096 * same as that of fnmatch(). COMP points to the start of the current
2097 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
2098 * backslash character is used for escaping ? and * unless
2099 * FNM_NOESCAPE is set.
2100 *
2101 * (c) 1994-1996 by Eberhard Mattes.
2102 */
2103
2104static int match_unix(const unsigned char *mask,
2105 const unsigned char *name,
2106 unsigned flags,
2107 const unsigned char *comp)
2108{
2109 unsigned char c1, c2;
2110 char invert, matched;
2111 const unsigned char *start;
2112 int rc;
2113
2114 for (;;)
2115 switch (*mask)
2116 {
2117 case 0:
2118
2119 /* There must be no extra characters at the end of NAME when
2120 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
2121 * in that case, NAME may point to a separator. */
2122
2123 if (*name == 0)
2124 return FNM_MATCH;
2125 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
2126 return FNM_MATCH;
2127 return FNM_NOMATCH;
2128
2129 case '?':
2130
2131 /* A question mark matches one character. It does not match
2132 * the component separator if FNM_PATHNAME is set. It does
2133 * not match a dot at the start of a component if FNM_PERIOD
2134 * is set. */
2135
2136 if (*name == 0)
2137 return FNM_NOMATCH;
2138 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2139 return FNM_NOMATCH;
2140 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2141 return FNM_NOMATCH;
2142 ++mask;
2143 ++name;
2144 break;
2145
2146 case '*':
2147
2148 /* An asterisk matches zero or more characters. It does not
2149 * match the component separator if FNM_PATHNAME is set. It
2150 * does not match a dot at the start of a component if
2151 * FNM_PERIOD is set. */
2152
2153 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2154 return FNM_NOMATCH;
2155 do
2156 {
2157 ++mask;
2158 }
2159 while (*mask == '*');
2160 for (;;)
2161 {
2162 rc = match_unix(mask, name, flags, comp);
2163 if (rc != FNM_NOMATCH)
2164 return rc;
2165 if (*name == 0)
2166 return FNM_NOMATCH;
2167 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2168 return FNM_NOMATCH;
2169 ++name;
2170 }
2171
2172 case '/':
2173
2174 /* Separators match only separators. If _FNM_PATHPREFIX is
2175 * set, a trailing separator in MASK is ignored at the end
2176 * of NAME. */
2177
2178 if (!(IS_UNIX_COMP_SEP(*name)
2179 || ((flags & _FNM_PATHPREFIX) && *name == 0
2180 && (mask[1] == 0
2181 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
2182 && mask[2] == 0)))))
2183 return FNM_NOMATCH;
2184
2185 ++mask;
2186 if (*name != 0)
2187 ++name;
2188
2189 /* This is the beginning of a new component if FNM_PATHNAME
2190 * is set. */
2191
2192 if (flags & FNM_PATHNAME)
2193 comp = name;
2194 break;
2195
2196 case '[':
2197
2198 /* A set of characters. Always case-sensitive. */
2199
2200 if (*name == 0)
2201 return FNM_NOMATCH;
2202 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2203 return FNM_NOMATCH;
2204 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2205 return FNM_NOMATCH;
2206
2207 invert = 0;
2208 matched = 0;
2209 ++mask;
2210
2211 /* If the first character is a ! or ^, the set matches all
2212 * characters not listed in the set. */
2213
2214 if (*mask == '!' || *mask == '^')
2215 {
2216 ++mask;
2217 invert = 1;
2218 }
2219
2220 /* Loop over all the characters of the set. The loop ends
2221 * if the end of the string is reached or if a ] is
2222 * encountered unless it directly follows the initial [ or
2223 * [-. */
2224
2225 start = mask;
2226 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2227 {
2228 /* Get the next character which is optionally preceded
2229 * by a backslash. */
2230
2231 c1 = *mask++;
2232 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2233 {
2234 if (*mask == 0)
2235 break;
2236 c1 = *mask++;
2237 }
2238
2239 /* Ranges of characters are written as a-z. Don't
2240 * forget to check for the end of the string and to
2241 * handle the backslash. If the character after - is a
2242 * ], it isn't a range. */
2243
2244 if (*mask == '-' && mask[1] != ']')
2245 {
2246 ++mask; /* Skip the - character */
2247 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2248 ++mask;
2249 if (*mask == 0)
2250 break;
2251 c2 = *mask++;
2252 }
2253 else
2254 c2 = c1;
2255
2256 /* Now check whether this character or range matches NAME. */
2257
2258 if (c1 <= *name && *name <= c2)
2259 matched = 1;
2260 }
2261
2262 /* If the end of the string is reached before a ] is found,
2263 * back up to the [ and compare it to NAME. */
2264
2265 if (*mask == 0)
2266 {
2267 if (*name != '[')
2268 return FNM_NOMATCH;
2269 ++name;
2270 mask = start;
2271 if (invert)
2272 --mask;
2273 }
2274 else
2275 {
2276 if (invert)
2277 matched = !matched;
2278 if (!matched)
2279 return FNM_NOMATCH;
2280 ++mask; /* Skip the ] character */
2281 if (*name != 0)
2282 ++name;
2283 }
2284 break;
2285
2286 case '\\':
2287 ++mask;
2288 if (flags & FNM_NOESCAPE)
2289 {
2290 if (*name != '\\')
2291 return FNM_NOMATCH;
2292 ++name;
2293 }
2294 else if (*mask == '*' || *mask == '?')
2295 {
2296 if (*mask != *name)
2297 return FNM_NOMATCH;
2298 ++mask;
2299 ++name;
2300 }
2301 break;
2302
2303 default:
2304
2305 /* All other characters match themselves. */
2306
2307 if (flags & _FNM_IGNORECASE)
2308 {
2309 if (tolower(*mask) != tolower(*name))
2310 return FNM_NOMATCH;
2311 }
2312 else
2313 {
2314 if (*mask != *name)
2315 return FNM_NOMATCH;
2316 }
2317 ++mask;
2318 ++name;
2319 break;
2320 }
2321}
2322
2323/*
2324 * _fnmatch_unsigned:
2325 * Check whether the path name NAME matches the wildcard MASK.
2326 *
2327 * Return:
2328 * -- 0 (FNM_MATCH) if it matches,
2329 * -- _FNM_NOMATCH if it doesn't,
2330 * -- FNM_ERR on error.
2331 *
2332 * The operation of this function is controlled by FLAGS.
2333 * This is an internal function, with unsigned arguments.
2334 *
2335 * (c) 1994-1996 by Eberhard Mattes.
2336 */
2337
2338static int _fnmatch_unsigned(const unsigned char *mask,
2339 const unsigned char *name,
2340 unsigned flags)
2341{
2342 int m_drive, n_drive,
2343 rc;
2344
2345 /* Match and skip the drive name if present. */
2346
2347 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2348 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2349
2350 if (m_drive != n_drive)
2351 {
2352 if (m_drive == -1 || n_drive == -1)
2353 return FNM_NOMATCH;
2354 if (!(flags & _FNM_IGNORECASE))
2355 return FNM_NOMATCH;
2356 if (tolower(m_drive) != tolower(n_drive))
2357 return FNM_NOMATCH;
2358 }
2359
2360 if (m_drive != -1)
2361 mask += 2;
2362 if (n_drive != -1)
2363 name += 2;
2364
2365 /* Colons are not allowed in path names, except for the drive name,
2366 * which was skipped above. */
2367
2368 if (has_colon(mask) || has_colon(name))
2369 return FNM_ERR;
2370
2371 /* The name "\\server\path" should not be matched by mask
2372 * "\*\server\path". Ditto for /. */
2373
2374 switch (flags & _FNM_STYLE_MASK)
2375 {
2376 case _FNM_OS2:
2377 case _FNM_DOS:
2378
2379 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2380 {
2381 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2382 return FNM_NOMATCH;
2383 name += 2;
2384 mask += 2;
2385 }
2386 break;
2387
2388 case _FNM_POSIX:
2389
2390 if (name[0] == '/' && name[1] == '/')
2391 {
2392 int i;
2393
2394 name += 2;
2395 for (i = 0; i < 2; ++i)
2396 if (mask[0] == '/')
2397 ++mask;
2398 else if (mask[0] == '\\' && mask[1] == '/')
2399 mask += 2;
2400 else
2401 return FNM_NOMATCH;
2402 }
2403
2404 /* In Unix styles, treating ? and * w.r.t. components is simple.
2405 * No need to do matching component by component. */
2406
2407 return match_unix(mask, name, flags, name);
2408 }
2409
2410 /* Now compare all the components of the path name, one by one.
2411 * Note that the path separator must not be enclosed in brackets. */
2412
2413 while (*mask != 0 || *name != 0)
2414 {
2415
2416 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2417 * is reached even if there are components left in NAME. */
2418
2419 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2420 return FNM_MATCH;
2421
2422 /* Compare a single component of the path name. */
2423
2424 rc = match_comp(mask, name, flags);
2425 if (rc != FNM_MATCH)
2426 return rc;
2427
2428 /* Skip to the next component or to the end of the path name. */
2429
2430 mask = skip_comp_os2(mask);
2431 name = skip_comp_os2(name);
2432 }
2433
2434 /* If we reached the ends of both strings, the names match. */
2435
2436 if (*mask == 0 && *name == 0)
2437 return FNM_MATCH;
2438
2439 /* The names do not match. */
2440
2441 return FNM_NOMATCH;
2442}
2443
2444/*
2445 *@@ strhMatchOS2:
2446 * this matches wildcards, similar to what DosEditName does.
2447 * However, this does not require a file to be present, but
2448 * works on strings only.
2449 */
2450
2451BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2452 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2453{
2454 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2455 pcszName,
2456 _FNM_OS2 | _FNM_IGNORECASE)
2457 == FNM_MATCH)
2458 );
2459}
2460
2461/* ******************************************************************
2462 *
2463 * Fast string searches
2464 *
2465 ********************************************************************/
2466
2467#define ASSERT(a)
2468
2469/*
2470 * The following code has been taken from the "Standard
2471 * Function Library", file sflfind.c, and only slightly
2472 * modified to conform to the rest of this file.
2473 *
2474 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2475 * Revised: 98/05/04
2476 *
2477 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2478 *
2479 * The SFL Licence allows incorporating SFL code into other
2480 * programs, as long as the copyright is reprinted and the
2481 * code is marked as modified, so this is what we do.
2482 */
2483
2484/*
2485 *@@ strhmemfind:
2486 * searches for a pattern in a block of memory using the
2487 * Boyer-Moore-Horspool-Sunday algorithm.
2488 *
2489 * The block and pattern may contain any values; you must
2490 * explicitly provide their lengths. If you search for strings,
2491 * use strlen() on the buffers.
2492 *
2493 * Returns a pointer to the pattern if found within the block,
2494 * or NULL if the pattern was not found.
2495 *
2496 * This algorithm needs a "shift table" to cache data for the
2497 * search pattern. This table can be reused when performing
2498 * several searches with the same pattern.
2499 *
2500 * "shift" must point to an array big enough to hold 256 (8**2)
2501 * "size_t" values.
2502 *
2503 * If (*repeat_find == FALSE), the shift table is initialized.
2504 * So on the first search with a given pattern, *repeat_find
2505 * should be FALSE. This function sets it to TRUE after the
2506 * shift table is initialised, allowing the initialisation
2507 * phase to be skipped on subsequent searches.
2508 *
2509 * This function is most effective when repeated searches are
2510 * made for the same pattern in one or more large buffers.
2511 *
2512 * Example:
2513 *
2514 + PSZ pszHaystack = "This is a sample string.",
2515 + pszNeedle = "string";
2516 + size_t shift[256];
2517 + BOOL fRepeat = FALSE;
2518 +
2519 + PSZ pFound = strhmemfind(pszHaystack,
2520 + strlen(pszHaystack), // block size
2521 + pszNeedle,
2522 + strlen(pszNeedle), // pattern size
2523 + shift,
2524 + &fRepeat);
2525 *
2526 * Taken from the "Standard Function Library", file sflfind.c.
2527 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2528 * Slightly modified by umoeller.
2529 *
2530 *@@added V0.9.3 (2000-05-08) [umoeller]
2531 */
2532
2533void* strhmemfind(const void *in_block, // in: block containing data
2534 size_t block_size, // in: size of block in bytes
2535 const void *in_pattern, // in: pattern to search for
2536 size_t pattern_size, // in: size of pattern block
2537 size_t *shift, // in/out: shift table (search buffer)
2538 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2539{
2540 size_t byte_nbr, // Distance through block
2541 match_size; // Size of matched part
2542 const unsigned char
2543 *match_base = NULL, // Base of match of pattern
2544 *match_ptr = NULL, // Point within current match
2545 *limit = NULL; // Last potiental match point
2546 const unsigned char
2547 *block = (unsigned char *) in_block, // Concrete pointer to block data
2548 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2549
2550 if ( (block == NULL)
2551 || (pattern == NULL)
2552 || (shift == NULL)
2553 )
2554 return (NULL);
2555
2556 // Pattern must be smaller or equal in size to string
2557 if (block_size < pattern_size)
2558 return (NULL); // Otherwise it's not found
2559
2560 if (pattern_size == 0) // Empty patterns match at start
2561 return ((void *)block);
2562
2563 // Build the shift table unless we're continuing a previous search
2564
2565 // The shift table determines how far to shift before trying to match
2566 // again, if a match at this point fails. If the byte after where the
2567 // end of our pattern falls is not in our pattern, then we start to
2568 // match again after that byte; otherwise we line up the last occurence
2569 // of that byte in our pattern under that byte, and try match again.
2570
2571 if (!repeat_find || !*repeat_find)
2572 {
2573 for (byte_nbr = 0;
2574 byte_nbr < 256;
2575 byte_nbr++)
2576 shift[byte_nbr] = pattern_size + 1;
2577 for (byte_nbr = 0;
2578 byte_nbr < pattern_size;
2579 byte_nbr++)
2580 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2581
2582 if (repeat_find)
2583 *repeat_find = TRUE;
2584 }
2585
2586 // Search for the block, each time jumping up by the amount
2587 // computed in the shift table
2588
2589 limit = block + (block_size - pattern_size + 1);
2590 ASSERT (limit > block);
2591
2592 for (match_base = block;
2593 match_base < limit;
2594 match_base += shift[*(match_base + pattern_size)])
2595 {
2596 match_ptr = match_base;
2597 match_size = 0;
2598
2599 // Compare pattern until it all matches, or we find a difference
2600 while (*match_ptr++ == pattern[match_size++])
2601 {
2602 ASSERT (match_size <= pattern_size &&
2603 match_ptr == (match_base + match_size));
2604
2605 // If we found a match, return the start address
2606 if (match_size >= pattern_size)
2607 return ((void*)(match_base));
2608
2609 }
2610 }
2611 return (NULL); // Found nothing
2612}
2613
2614/*
2615 *@@ strhtxtfind:
2616 * searches for a case-insensitive text pattern in a string
2617 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2618 * pattern are null-terminated strings. Returns a pointer to the pattern
2619 * if found within the string, or NULL if the pattern was not found.
2620 * Will match strings irrespective of case. To match exact strings, use
2621 * strhfind(). Will not work on multibyte characters.
2622 *
2623 * Examples:
2624 + char *result;
2625 +
2626 + result = strhtxtfind ("AbracaDabra", "cad");
2627 + if (result)
2628 + puts (result);
2629 +
2630 * Taken from the "Standard Function Library", file sflfind.c.
2631 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2632 * Slightly modified.
2633 *
2634 *@@added V0.9.3 (2000-05-08) [umoeller]
2635 */
2636
2637char* strhtxtfind (const char *string, // String containing data
2638 const char *pattern) // Pattern to search for
2639{
2640 size_t
2641 shift [256]; // Shift distance for each value
2642 size_t
2643 string_size,
2644 pattern_size,
2645 byte_nbr, // Index into byte array
2646 match_size; // Size of matched part
2647 const char
2648 *match_base = NULL, // Base of match of pattern
2649 *match_ptr = NULL, // Point within current match
2650 *limit = NULL; // Last potiental match point
2651
2652 ASSERT (string); // Expect non-NULL pointers, but
2653 ASSERT (pattern); // fail gracefully if not debugging
2654 if (string == NULL || pattern == NULL)
2655 return (NULL);
2656
2657 string_size = strlen (string);
2658 pattern_size = strlen (pattern);
2659
2660 // Pattern must be smaller or equal in size to string
2661 if (string_size < pattern_size)
2662 return (NULL); // Otherwise it cannot be found
2663
2664 if (pattern_size == 0) // Empty string matches at start
2665 return (char *) string;
2666
2667 // Build the shift table
2668
2669 // The shift table determines how far to shift before trying to match
2670 // again, if a match at this point fails. If the byte after where the
2671 // end of our pattern falls is not in our pattern, then we start to
2672 // match again after that byte; otherwise we line up the last occurence
2673 // of that byte in our pattern under that byte, and try match again.
2674
2675 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2676 shift [byte_nbr] = pattern_size + 1;
2677
2678 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2679 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2680
2681 // Search for the string. If we don't find a match, move up by the
2682 // amount we computed in the shift table above, to find location of
2683 // the next potiental match.
2684
2685 limit = string + (string_size - pattern_size + 1);
2686 ASSERT (limit > string);
2687
2688 for (match_base = string;
2689 match_base < limit;
2690 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2691 {
2692 match_ptr = match_base;
2693 match_size = 0;
2694
2695 // Compare pattern until it all matches, or we find a difference
2696 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2697 {
2698 ASSERT (match_size <= pattern_size &&
2699 match_ptr == (match_base + match_size));
2700
2701 // If we found a match, return the start address
2702 if (match_size >= pattern_size)
2703 return ((char *)(match_base));
2704 }
2705 }
2706 return (NULL); // Found nothing
2707}
2708
Note: See TracBrowser for help on using the repository browser.