source: trunk/src/helpers/stringh.c@ 55

Last change on this file since 55 was 55, checked in by umoeller, 24 years ago

misc changes

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 78.4 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#include <os2.h>
45
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <math.h>
51
52#include "setup.h" // code generation and debugging options
53
54#include "helpers\stringh.h"
55#include "helpers\xstring.h" // extended string helpers
56
57#pragma hdrstop
58
59/*
60 *@@category: Helpers\C helpers\String management
61 * See stringh.c and xstring.c.
62 */
63
64/*
65 *@@category: Helpers\C helpers\String management\C string helpers
66 * See stringh.c.
67 */
68
69/*
70 *@@ strhdup:
71 * like strdup, but this one
72 * doesn't crash if pszSource is NULL,
73 * but returns NULL also.
74 *
75 *@@added V0.9.0 [umoeller]
76 */
77
78PSZ strhdup(const char *pszSource)
79{
80 if (pszSource)
81 return (strdup(pszSource));
82 else
83 return (0);
84}
85
86/*
87 *@@ strhcmp:
88 * better strcmp. This doesn't crash if any of the
89 * string pointers are NULL, but returns a proper
90 * value then.
91 *
92 * Besides, this is guaranteed to only return -1, 0,
93 * or +1, while strcmp can return any positive or
94 * negative value.
95 *
96 *@@added V0.9.9 (2001-02-16) [umoeller]
97 */
98
99int strhcmp(const char *p1, const char *p2)
100{
101 if (p1 && p2)
102 {
103 int i = strcmp(p1, p2);
104 if (i < 0) return (-1);
105 if (i > 0) return (+1);
106 }
107 else if (p1)
108 // but p2 is NULL: p1 greater than p2 then
109 return (+1);
110 else if (p2)
111 // but p1 is NULL: p1 less than p2 then
112 return (-1);
113
114 // return 0 if strcmp returned 0 above or both strings are NULL
115 return (0);
116}
117
118/*
119 *@@ strhistr:
120 * like strstr, but case-insensitive.
121 *
122 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
123 */
124
125PSZ strhistr(const char *string1, const char *string2)
126{
127 PSZ prc = NULL;
128
129 if ((string1) && (string2))
130 {
131 PSZ pszSrchIn = strdup(string1);
132 PSZ pszSrchFor = strdup(string2);
133
134 if ((pszSrchIn) && (pszSrchFor))
135 {
136 strupr(pszSrchIn);
137 strupr(pszSrchFor);
138
139 prc = strstr(pszSrchIn, pszSrchFor);
140 if (prc)
141 {
142 // prc now has the first occurence of the string,
143 // but in pszSrchIn; we need to map this
144 // return value to the original string
145 prc = (prc-pszSrchIn) // offset in pszSrchIn
146 + (PSZ)string1;
147 }
148 }
149 if (pszSrchFor)
150 free(pszSrchFor);
151 if (pszSrchIn)
152 free(pszSrchIn);
153 }
154 return (prc);
155}
156
157/*
158 *@@ strhncpy0:
159 * like strncpy, but always appends a 0 character.
160 */
161
162ULONG strhncpy0(PSZ pszTarget,
163 const char *pszSource,
164 ULONG cbSource)
165{
166 ULONG ul = 0;
167 PSZ pTarget = pszTarget,
168 pSource = (PSZ)pszSource;
169
170 for (ul = 0; ul < cbSource; ul++)
171 if (*pSource)
172 *pTarget++ = *pSource++;
173 else
174 break;
175 *pTarget = 0;
176
177 return (ul);
178}
179
180/*
181 * strhCount:
182 * this counts the occurences of c in pszSearch.
183 */
184
185ULONG strhCount(const char *pszSearch,
186 CHAR c)
187{
188 PSZ p = (PSZ)pszSearch;
189 ULONG ulCount = 0;
190 while (TRUE)
191 {
192 p = strchr(p, c);
193 if (p)
194 {
195 ulCount++;
196 p++;
197 }
198 else
199 break;
200 }
201 return (ulCount);
202}
203
204/*
205 *@@ strhIsDecimal:
206 * returns TRUE if psz consists of decimal digits only.
207 */
208
209BOOL strhIsDecimal(PSZ psz)
210{
211 PSZ p = psz;
212 while (*p != 0)
213 {
214 if (isdigit(*p) == 0)
215 return (FALSE);
216 p++;
217 }
218
219 return (TRUE);
220}
221
222/*
223 *@@ strhSubstr:
224 * this creates a new PSZ containing the string
225 * from pBegin to pEnd, excluding the pEnd character.
226 * The new string is null-terminated. The caller
227 * must free() the new string after use.
228 *
229 * Example:
230 + "1234567890"
231 + ^ ^
232 + p1 p2
233 + strhSubstr(p1, p2)
234 * would return a new string containing "2345678".
235 *
236 *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
237 *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
238 */
239
240PSZ strhSubstr(const char *pBegin, // in: first char
241 const char *pEnd) // in: last char (not included)
242{
243 PSZ pszSubstr = NULL;
244
245 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
246 {
247 ULONG cbSubstr = (pEnd - pBegin);
248 pszSubstr = (PSZ)malloc(cbSubstr + 1);
249 if (pszSubstr)
250 {
251 // strhncpy0(pszSubstr, pBegin, cbSubstr);
252 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
253 *(pszSubstr + cbSubstr) = '\0';
254 }
255 }
256
257 return (pszSubstr);
258}
259
260/*
261 *@@ strhExtract:
262 * searches pszBuf for the cOpen character and returns
263 * the data in between cOpen and cClose, excluding
264 * those two characters, in a newly allocated buffer
265 * which you must free() afterwards.
266 *
267 * Spaces and newlines/linefeeds are skipped.
268 *
269 * If the search was successful, the new buffer
270 * is returned and, if (ppEnd != NULL), *ppEnd points
271 * to the first character after the cClose character
272 * found in the buffer.
273 *
274 * If the search was not successful, NULL is
275 * returned, and *ppEnd is unchanged.
276 *
277 * If another cOpen character is found before
278 * cClose, matching cClose characters will be skipped.
279 * You can therefore nest the cOpen and cClose
280 * characters.
281 *
282 * This function ignores cOpen and cClose characters
283 * in C-style comments and strings surrounded by
284 * double quotes.
285 *
286 * Example:
287 + PSZ pszBuf = "KEYWORD { --blah-- } next",
288 + pEnd;
289 + strhExtract(pszBuf,
290 + '{', '}',
291 + &pEnd)
292 * would return a new buffer containing " --blah-- ",
293 * and ppEnd would afterwards point to the space
294 * before "next" in the static buffer.
295 *
296 *@@added V0.9.0 [umoeller]
297 */
298
299PSZ strhExtract(PSZ pszBuf, // in: search buffer
300 CHAR cOpen, // in: opening char
301 CHAR cClose, // in: closing char
302 PSZ *ppEnd) // out: if != NULL, receives first character after closing char
303{
304 PSZ pszReturn = NULL;
305
306 if (pszBuf)
307 {
308 PSZ pOpen = strchr(pszBuf, cOpen);
309 if (pOpen)
310 {
311 // opening char found:
312 // now go thru the whole rest of the buffer
313 PSZ p = pOpen+1;
314 LONG lLevel = 1; // if this goes 0, we're done
315 while (*p)
316 {
317 if (*p == cOpen)
318 lLevel++;
319 else if (*p == cClose)
320 {
321 lLevel--;
322 if (lLevel <= 0)
323 {
324 // matching closing bracket found:
325 // extract string
326 pszReturn = strhSubstr(pOpen+1, // after cOpen
327 p); // excluding cClose
328 if (ppEnd)
329 *ppEnd = p+1;
330 break; // while (*p)
331 }
332 }
333 else if (*p == '\"')
334 {
335 // beginning of string:
336 PSZ p2 = p+1;
337 // find end of string
338 while ((*p2) && (*p2 != '\"'))
339 p2++;
340
341 if (*p2 == '\"')
342 // closing quote found:
343 // search on after that
344 p = p2; // raised below
345 else
346 break; // while (*p)
347 }
348
349 p++;
350 }
351 }
352 }
353
354 return (pszReturn);
355}
356
357/*
358 *@@ strhQuote:
359 * similar to strhExtract, except that
360 * opening and closing chars are the same,
361 * and therefore no nesting is possible.
362 * Useful for extracting stuff between
363 * quotes.
364 *
365 *@@added V0.9.0 [umoeller]
366 */
367
368PSZ strhQuote(PSZ pszBuf,
369 CHAR cQuote,
370 PSZ *ppEnd)
371{
372 PSZ pszReturn = NULL,
373 p1 = NULL;
374 if ((p1 = strchr(pszBuf, cQuote)))
375 {
376 PSZ p2 = strchr(p1+1, cQuote);
377 if (p2)
378 {
379 pszReturn = strhSubstr(p1+1, p2);
380 if (ppEnd)
381 // store closing char
382 *ppEnd = p2 + 1;
383 }
384 }
385
386 return (pszReturn);
387}
388
389/*
390 *@@ strhStrip:
391 * removes all double spaces.
392 * This copies within the "psz" buffer.
393 * If any double spaces are found, the
394 * string will be shorter than before,
395 * but the buffer is _not_ reallocated,
396 * so there will be unused bytes at the
397 * end.
398 *
399 * Returns the number of spaces removed.
400 *
401 *@@added V0.9.0 [umoeller]
402 */
403
404ULONG strhStrip(PSZ psz) // in/out: string
405{
406 PSZ p;
407 ULONG cb = strlen(psz),
408 ulrc = 0;
409
410 for (p = psz; p < psz+cb; p++)
411 {
412 if ((*p == ' ') && (*(p+1) == ' '))
413 {
414 PSZ p2 = p;
415 while (*p2)
416 {
417 *p2 = *(p2+1);
418 p2++;
419 }
420 cb--;
421 p--;
422 ulrc++;
423 }
424 }
425 return (ulrc);
426}
427
428/*
429 *@@ strhins:
430 * this inserts one string into another.
431 *
432 * pszInsert is inserted into pszBuffer at offset
433 * ulInsertOfs (which counts from 0).
434 *
435 * A newly allocated string is returned. pszBuffer is
436 * not changed. The new string should be free()'d after
437 * use.
438 *
439 * Upon errors, NULL is returned.
440 *
441 *@@changed V0.9.0 [umoeller]: completely rewritten.
442 */
443
444PSZ strhins(const char *pcszBuffer,
445 ULONG ulInsertOfs,
446 const char *pcszInsert)
447{
448 PSZ pszNew = NULL;
449
450 if ((pcszBuffer) && (pcszInsert))
451 {
452 do {
453 ULONG cbBuffer = strlen(pcszBuffer);
454 ULONG cbInsert = strlen(pcszInsert);
455
456 // check string length
457 if (ulInsertOfs > cbBuffer + 1)
458 break; // do
459
460 // OK, let's go.
461 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
462
463 // copy stuff before pInsertPos
464 memcpy(pszNew,
465 pcszBuffer,
466 ulInsertOfs);
467 // copy string to be inserted
468 memcpy(pszNew + ulInsertOfs,
469 pcszInsert,
470 cbInsert);
471 // copy stuff after pInsertPos
472 strcpy(pszNew + ulInsertOfs + cbInsert,
473 pcszBuffer + ulInsertOfs);
474 } while (FALSE);
475 }
476
477 return (pszNew);
478}
479
480/*
481 *@@ strhFindReplace:
482 * wrapper around xstrFindReplace to work with C strings.
483 * Note that *ppszBuf can get reallocated and must
484 * be free()'able.
485 *
486 * Repetitive use of this wrapper is not recommended
487 * because it is considerably slower than xstrFindReplace.
488 *
489 *@@added V0.9.6 (2000-11-01) [umoeller]
490 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
491 */
492
493ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
494 PULONG pulOfs, // in: where to begin search (0 = start);
495 // out: ofs of first char after replacement string
496 const char *pcszSearch, // in: search string; cannot be NULL
497 const char *pcszReplace) // in: replacement string; cannot be NULL
498{
499 ULONG ulrc = 0;
500 XSTRING xstrBuf,
501 xstrFind,
502 xstrReplace;
503 size_t ShiftTable[256];
504 BOOL fRepeat = FALSE;
505 xstrInitSet(&xstrBuf, *ppszBuf);
506 // reallocated and returned, so we're safe
507 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
508 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
509 // these two are never freed, so we're safe too
510
511 if ((ulrc = xstrFindReplace(&xstrBuf,
512 pulOfs,
513 &xstrFind,
514 &xstrReplace,
515 ShiftTable,
516 &fRepeat)))
517 // replaced:
518 *ppszBuf = xstrBuf.psz;
519
520 return (ulrc);
521}
522
523/*
524 * strhWords:
525 * returns the no. of words in "psz".
526 * A string is considered a "word" if
527 * it is surrounded by spaces only.
528 *
529 *@@added V0.9.0 [umoeller]
530 */
531
532ULONG strhWords(PSZ psz)
533{
534 PSZ p;
535 ULONG cb = strlen(psz),
536 ulWords = 0;
537 if (cb > 1)
538 {
539 ulWords = 1;
540 for (p = psz; p < psz+cb; p++)
541 if (*p == ' ')
542 ulWords++;
543 }
544 return (ulWords);
545}
546
547/*
548 *@@ strhThousandsULong:
549 * converts a ULONG into a decimal string, while
550 * inserting thousands separators into it. Specify
551 * the separator character in cThousands.
552 *
553 * Returns pszTarget so you can use it directly
554 * with sprintf and the "%s" flag.
555 *
556 * For cThousands, you should use the data in
557 * OS2.INI ("PM_National" application), which is
558 * always set according to the "Country" object.
559 * You can use prfhQueryCountrySettings to
560 * retrieve this setting.
561 *
562 * Use strhThousandsDouble for "double" values.
563 */
564
565PSZ strhThousandsULong(PSZ pszTarget, // out: decimal as string
566 ULONG ul, // in: decimal to convert
567 CHAR cThousands) // in: separator char (e.g. '.')
568{
569 USHORT ust, uss, usc;
570 CHAR szTemp[40];
571 sprintf(szTemp, "%lu", ul);
572
573 ust = 0;
574 usc = strlen(szTemp);
575 for (uss = 0; uss < usc; uss++)
576 {
577 if (uss)
578 if (((usc - uss) % 3) == 0)
579 {
580 pszTarget[ust] = cThousands;
581 ust++;
582 }
583 pszTarget[ust] = szTemp[uss];
584 ust++;
585 }
586 pszTarget[ust] = '\0';
587
588 return (pszTarget);
589}
590
591/*
592 *@@ strhThousandsDouble:
593 * like strhThousandsULong, but for a "double"
594 * value. Note that after-comma values are truncated.
595 */
596
597PSZ strhThousandsDouble(PSZ pszTarget, double dbl, CHAR cThousands)
598{
599 USHORT ust, uss, usc;
600 CHAR szTemp[40];
601 sprintf(szTemp, "%.0f", floor(dbl));
602
603 ust = 0;
604 usc = strlen(szTemp);
605 for (uss = 0; uss < usc; uss++)
606 {
607 if (uss)
608 if (((usc - uss) % 3) == 0)
609 {
610 pszTarget[ust] = cThousands;
611 ust++;
612 }
613 pszTarget[ust] = szTemp[uss];
614 ust++;
615 }
616 pszTarget[ust] = '\0';
617
618 return (pszTarget);
619}
620
621/*
622 *@@ strhVariableDouble:
623 * like strhThousandsULong, but for a "double" value, and
624 * with a variable number of decimal places depending on the
625 * size of the quantity.
626 *
627 *@@added V0.9.6 (2000-11-12) [pr]
628 */
629
630PSZ strhVariableDouble(PSZ pszTarget,
631 double dbl,
632 PSZ pszUnits,
633 CHAR cThousands)
634{
635 if (dbl < 100.0)
636 sprintf(pszTarget, "%.2f%s", dbl, pszUnits);
637 else
638 if (dbl < 1000.0)
639 sprintf(pszTarget, "%.1f%s", dbl, pszUnits);
640 else
641 strcat(strhThousandsDouble(pszTarget, dbl, cThousands),
642 pszUnits);
643
644 return(pszTarget);
645}
646
647/*
648 *@@ strhFileDate:
649 * converts file date data to a string (to pszBuf).
650 * You can pass any FDATE structure to this function,
651 * which are returned in those FILEFINDBUF* or
652 * FILESTATUS* structs by the Dos* functions.
653 *
654 * ulDateFormat is the PM setting for the date format,
655 * as set in the "Country" object, and can be queried using
656 + PrfQueryProfileInt(HINI_USER, "PM_National", "iDate", 0);
657 *
658 * meaning:
659 * -- 0 mm.dd.yyyy (English)
660 * -- 1 dd.mm.yyyy (e.g. German)
661 * -- 2 yyyy.mm.dd (Japanese, ISO)
662 * -- 3 yyyy.dd.mm
663 *
664 * cDateSep is used as a date separator (e.g. '.').
665 * This can be queried using:
666 + prfhQueryProfileChar(HINI_USER, "PM_National", "sDate", '/');
667 *
668 * Alternatively, you can query all the country settings
669 * at once using prfhQueryCountrySettings (prfh.c).
670 *
671 *@@changed V0.9.0 (99-11-07) [umoeller]: now calling strhDateTime
672 */
673
674VOID strhFileDate(PSZ pszBuf, // out: string returned
675 FDATE *pfDate, // in: date information
676 ULONG ulDateFormat, // in: date format (0-3)
677 CHAR cDateSep) // in: date separator (e.g. '.')
678{
679 DATETIME dt;
680 dt.day = pfDate->day;
681 dt.month = pfDate->month;
682 dt.year = pfDate->year + 1980;
683
684 strhDateTime(pszBuf,
685 NULL, // no time
686 &dt,
687 ulDateFormat,
688 cDateSep,
689 0, 0); // no time
690}
691
692/*
693 *@@ strhFileTime:
694 * converts file time data to a string (to pszBuf).
695 * You can pass any FTIME structure to this function,
696 * which are returned in those FILEFINDBUF* or
697 * FILESTATUS* structs by the Dos* functions.
698 *
699 * ulTimeFormat is the PM setting for the time format,
700 * as set in the "Country" object, and can be queried using
701 + PrfQueryProfileInt(HINI_USER, "PM_National", "iTime", 0);
702 * meaning:
703 * -- 0 12-hour clock
704 * -- >0 24-hour clock
705 *
706 * cDateSep is used as a time separator (e.g. ':').
707 * This can be queried using:
708 + prfhQueryProfileChar(HINI_USER, "PM_National", "sTime", ':');
709 *
710 * Alternatively, you can query all the country settings
711 * at once using prfhQueryCountrySettings (prfh.c).
712 *
713 *@@changed V0.8.5 (99-03-15) [umoeller]: fixed 12-hour crash
714 *@@changed V0.9.0 (99-11-07) [umoeller]: now calling strhDateTime
715 */
716
717VOID strhFileTime(PSZ pszBuf, // out: string returned
718 FTIME *pfTime, // in: time information
719 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1)
720 CHAR cTimeSep) // in: time separator (e.g. ':')
721{
722 DATETIME dt;
723 dt.hours = pfTime->hours;
724 dt.minutes = pfTime->minutes;
725 dt.seconds = pfTime->twosecs * 2;
726
727 strhDateTime(NULL, // no date
728 pszBuf,
729 &dt,
730 0, 0, // no date
731 ulTimeFormat,
732 cTimeSep);
733}
734
735/*
736 *@@ strhDateTime:
737 * converts Control Program DATETIME info
738 * into two strings. See strhFileDate and strhFileTime
739 * for more detailed parameter descriptions.
740 *
741 *@@added V0.9.0 (99-11-07) [umoeller]
742 */
743
744VOID strhDateTime(PSZ pszDate, // out: date string returned (can be NULL)
745 PSZ pszTime, // out: time string returned (can be NULL)
746 DATETIME *pDateTime, // in: date/time information
747 ULONG ulDateFormat, // in: date format (0-3); see strhFileDate
748 CHAR cDateSep, // in: date separator (e.g. '.')
749 ULONG ulTimeFormat, // in: 24-hour time format (0 or 1); see strhFileTime
750 CHAR cTimeSep) // in: time separator (e.g. ':')
751{
752 if (pszDate)
753 {
754 switch (ulDateFormat)
755 {
756 case 0: // mm.dd.yyyy (English)
757 sprintf(pszDate, "%02d%c%02d%c%04d",
758 pDateTime->month,
759 cDateSep,
760 pDateTime->day,
761 cDateSep,
762 pDateTime->year);
763 break;
764
765 case 1: // dd.mm.yyyy (e.g. German)
766 sprintf(pszDate, "%02d%c%02d%c%04d",
767 pDateTime->day,
768 cDateSep,
769 pDateTime->month,
770 cDateSep,
771 pDateTime->year);
772 break;
773
774 case 2: // yyyy.mm.dd (Japanese)
775 sprintf(pszDate, "%04d%c%02d%c%02d",
776 pDateTime->year,
777 cDateSep,
778 pDateTime->month,
779 cDateSep,
780 pDateTime->day);
781 break;
782
783 default: // yyyy.dd.mm
784 sprintf(pszDate, "%04d%c%02d%c%02d",
785 pDateTime->year,
786 cDateSep,
787 pDateTime->day,
788 cDateSep,
789 pDateTime->month);
790 break;
791 }
792 }
793
794 if (pszTime)
795 {
796 if (ulTimeFormat == 0)
797 {
798 // for 12-hour clock, we need additional INI data
799 CHAR szAMPM[10] = "err";
800
801 if (pDateTime->hours > 12)
802 {
803 // > 12h: PM.
804
805 // Note: 12:xx noon is 12 AM, not PM (even though
806 // AM stands for "ante meridiam", but English is just
807 // not logical), so that's handled below.
808
809 PrfQueryProfileString(HINI_USER,
810 "PM_National",
811 "s2359", // key
812 "PM", // default
813 szAMPM, sizeof(szAMPM)-1);
814 sprintf(pszTime, "%02d%c%02d%c%02d %s",
815 // leave 12 == 12 (not 0)
816 pDateTime->hours % 12,
817 cTimeSep,
818 pDateTime->minutes,
819 cTimeSep,
820 pDateTime->seconds,
821 szAMPM);
822 }
823 else
824 {
825 // <= 12h: AM
826 PrfQueryProfileString(HINI_USER,
827 "PM_National",
828 "s1159", // key
829 "AM", // default
830 szAMPM, sizeof(szAMPM)-1);
831 sprintf(pszTime, "%02d%c%02d%c%02d %s",
832 pDateTime->hours,
833 cTimeSep,
834 pDateTime->minutes,
835 cTimeSep,
836 pDateTime->seconds,
837 szAMPM);
838 }
839 }
840 else
841 // 24-hour clock
842 sprintf(pszTime, "%02d%c%02d%c%02d",
843 pDateTime->hours,
844 cTimeSep,
845 pDateTime->minutes,
846 cTimeSep,
847 pDateTime->seconds);
848 }
849}
850
851/*
852 *@@ strhGetWord:
853 * finds word boundaries.
854 *
855 * *ppszStart is used as the beginning of the
856 * search.
857 *
858 * If a word is found, *ppszStart is set to
859 * the first character of the word which was
860 * found and *ppszEnd receives the address
861 * of the first character _after_ the word,
862 * which is probably a space or a \n or \r char.
863 * We then return TRUE.
864 *
865 * The search is stopped if a null character
866 * is found or pLimit is reached. In that case,
867 * FALSE is returned.
868 *
869 *@@added V0.9.1 (2000-02-13) [umoeller]
870 */
871
872BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
873 // out: start of word (if TRUE is returned)
874 const char *pLimit, // in: ptr to last char after *ppszStart to be
875 // searched; if the word does not end before
876 // or with this char, FALSE is returned
877 const char *pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
878 const char *pcszEndChars, // stringh.h defines STRH_END_CHARS
879 PSZ *ppszEnd) // out: first char _after_ word
880 // (if TRUE is returned)
881{
882 // characters after which a word can be started
883 // const char *pcszBeginChars = "\x0d\x0a ";
884 // const char *pcszEndChars = "\x0d\x0a /-";
885
886 PSZ pStart = *ppszStart;
887
888 // find start of word
889 while ( (pStart < (PSZ)pLimit)
890 && (strchr(pcszBeginChars, *pStart))
891 )
892 // if char is a "before word" char: go for next
893 pStart++;
894
895 if (pStart < (PSZ)pLimit)
896 {
897 // found a valid "word start" character
898 // (which is not in pcszBeginChars):
899
900 // find end of word
901 PSZ pEndOfWord = pStart;
902 while ( (pEndOfWord <= (PSZ)pLimit)
903 && (strchr(pcszEndChars, *pEndOfWord) == 0)
904 )
905 // if char is not an "end word" char: go for next
906 pEndOfWord++;
907
908 if (pEndOfWord <= (PSZ)pLimit)
909 {
910 // whoa, got a word:
911 *ppszStart = pStart;
912 *ppszEnd = pEndOfWord;
913 return (TRUE);
914 }
915 }
916
917 return (FALSE);
918}
919
920/*
921 *@@ strhIsWord:
922 * returns TRUE if p points to a "word"
923 * in pcszBuf.
924 *
925 * p is considered a word if the character _before_
926 * it is in pcszBeginChars and the char _after_
927 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
928 *
929 *@@added V0.9.6 (2000-11-12) [umoeller]
930 */
931
932BOOL strhIsWord(const char *pcszBuf,
933 const char *p, // in: start of word
934 ULONG cbSearch, // in: length of word
935 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
936 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
937{
938 BOOL fEndOK = FALSE;
939
940 // check previous char
941 if ( (p == pcszBuf)
942 || (strchr(pcszBeginChars, *(p-1)))
943 )
944 {
945 // OK, valid begin char:
946 // check end char
947 CHAR cNextChar = *(p + cbSearch);
948 if (cNextChar == 0)
949 fEndOK = TRUE;
950 else
951 {
952 char *pc = strchr(pcszEndChars, cNextChar);
953 if (pc)
954 // OK, is end char: avoid doubles of that char,
955 // but allow spaces
956 if ( (cNextChar+1 != *pc)
957 || (cNextChar+1 == ' ')
958 || (cNextChar+1 == 0)
959 )
960 fEndOK = TRUE;
961 }
962 }
963
964 return (fEndOK);
965}
966
967/*
968 *@@ strhFindWord:
969 * searches for pszSearch in pszBuf, which is
970 * returned if found (or NULL if not).
971 *
972 * As opposed to strstr, this finds pszSearch
973 * only if it is a "word". A search string is
974 * considered a word if the character _before_
975 * it is in pcszBeginChars and the char _after_
976 * it is in pcszEndChars.
977 *
978 * Example:
979 + strhFindWord("This is an example.", "is");
980 + returns ...........^ this, but not the "is" in "This".
981 *
982 * The algorithm here uses strstr to find pszSearch in pszBuf
983 * and performs additional "is-word" checks for each item found
984 * (by calling strhIsWord).
985 *
986 * Note that this function is fairly slow compared to xstrFindWord.
987 *
988 *@@added V0.9.0 (99-11-08) [umoeller]
989 *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
990 */
991
992PSZ strhFindWord(const char *pszBuf,
993 const char *pszSearch,
994 const char *pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
995 const char *pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
996{
997 PSZ pszReturn = 0;
998 ULONG cbBuf = strlen(pszBuf),
999 cbSearch = strlen(pszSearch);
1000
1001 if ((cbBuf) && (cbSearch))
1002 {
1003 const char *p = pszBuf;
1004
1005 do // while p
1006 {
1007 p = strstr(p, pszSearch);
1008 if (p)
1009 {
1010 // string found:
1011 // check if that's a word
1012
1013 if (strhIsWord(pszBuf,
1014 p,
1015 cbSearch,
1016 pcszBeginChars,
1017 pcszEndChars))
1018 {
1019 // valid end char:
1020 pszReturn = (PSZ)p;
1021 break;
1022 }
1023
1024 p += cbSearch;
1025 }
1026 } while (p);
1027
1028 }
1029 return (pszReturn);
1030}
1031
1032/*
1033 *@@ strhFindEOL:
1034 * returns a pointer to the next \r, \n or null character
1035 * following pszSearchIn. Stores the offset in *pulOffset.
1036 *
1037 * This should never return NULL because at some point,
1038 * there will be a null byte in your string.
1039 *
1040 *@@added V0.9.4 (2000-07-01) [umoeller]
1041 */
1042
1043PSZ strhFindEOL(const char *pcszSearchIn, // in: where to search
1044 PULONG pulOffset) // out: offset (ptr can be NULL)
1045{
1046 const char *p = pcszSearchIn,
1047 *prc = 0;
1048 while (TRUE)
1049 {
1050 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
1051 {
1052 prc = p;
1053 break;
1054 }
1055 p++;
1056 }
1057
1058 if ((pulOffset) && (prc))
1059 *pulOffset = prc - pcszSearchIn;
1060
1061 return ((PSZ)prc);
1062}
1063
1064/*
1065 *@@ strhFindNextLine:
1066 * like strhFindEOL, but this returns the character
1067 * _after_ \r or \n. Note that this might return
1068 * a pointer to terminating NULL character also.
1069 */
1070
1071PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1072{
1073 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1074 // pEOL now points to the \r char or the terminating 0 byte;
1075 // if not null byte, advance pointer
1076 PSZ pNextLine = pEOL;
1077 if (*pNextLine == '\r')
1078 pNextLine++;
1079 if (*pNextLine == '\n')
1080 pNextLine++;
1081 if (pulOffset)
1082 *pulOffset = pNextLine - pszSearchIn;
1083 return (pNextLine);
1084}
1085
1086/*
1087 *@@ strhBeautifyTitle:
1088 * replaces all line breaks (0xd, 0xa) with spaces.
1089 */
1090
1091BOOL strhBeautifyTitle(PSZ psz)
1092{
1093 BOOL rc = FALSE;
1094 CHAR *p;
1095 while ((p = strchr(psz, 0xa)))
1096 {
1097 *p = ' ';
1098 rc = TRUE;
1099 }
1100 while ((p = strchr(psz, 0xd)))
1101 {
1102 *p = ' ';
1103 rc = TRUE;
1104 }
1105 return (rc);
1106}
1107
1108/*
1109 * strhFindAttribValue:
1110 * searches for pszAttrib in pszSearchIn; if found,
1111 * returns the first character after the "=" char.
1112 * If "=" is not found, a space, \r, and \n are
1113 * also accepted. This function searches without
1114 * respecting case.
1115 *
1116 * <B>Example:</B>
1117 + strhFindAttribValue("<PAGE BLAH="data">, "BLAH")
1118 +
1119 + returns ....................... ^ this address.
1120 *
1121 *@@added V0.9.0 [umoeller]
1122 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1123 */
1124
1125PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1126{
1127 PSZ prc = 0;
1128 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1129 p,
1130 p2;
1131 ULONG cbAttrib = strlen(pszAttrib);
1132
1133 // 1) find space char
1134 while ((p = strchr(pszSearchIn2, ' ')))
1135 {
1136 CHAR c;
1137 p++;
1138 if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1139 {
1140 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1141 // now check whether the p+strlen(pszAttrib)
1142 // is a valid end-of-tag character
1143 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1144 && ( (c == ' ')
1145 || (c == '>')
1146 || (c == '=')
1147 || (c == '\r')
1148 || (c == '\n')
1149 || (c == 0)
1150 )
1151 )
1152 {
1153 // yes:
1154 CHAR c2;
1155 p2 = p + cbAttrib;
1156 c2 = *p2;
1157 while ( ( (c2 == ' ')
1158 || (c2 == '=')
1159 || (c2 == '\n')
1160 || (c2 == '\r')
1161 )
1162 && (c2 != 0)
1163 )
1164 c2 = *++p2;
1165
1166 prc = p2;
1167 break; // first while
1168 }
1169 }
1170 else
1171 break;
1172
1173 pszSearchIn2++;
1174 }
1175 return (prc);
1176}
1177
1178/*
1179 * strhGetNumAttribValue:
1180 * stores the numerical parameter value of an HTML-style
1181 * tag in *pl.
1182 *
1183 * Returns the address of the tag parameter in the
1184 * search buffer, if found, or NULL.
1185 *
1186 * <B>Example:</B>
1187 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1188 *
1189 * stores 123 in the "l" variable.
1190 *
1191 *@@added V0.9.0 [umoeller]
1192 *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1193 */
1194
1195PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1196 const char *pszTag, // e.g. "INDEX"
1197 PLONG pl) // out: numerical value
1198{
1199 PSZ pParam;
1200 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1201 {
1202 if ( (*pParam == '\"')
1203 || (*pParam == '\'')
1204 )
1205 pParam++; // V0.9.9 (2001-04-04) [umoeller]
1206
1207 sscanf(pParam, "%ld", pl);
1208 }
1209
1210 return (pParam);
1211}
1212
1213/*
1214 * strhGetTextAttr:
1215 * retrieves the attribute value of a textual HTML-style tag
1216 * in a newly allocated buffer, which is returned,
1217 * or NULL if attribute not found.
1218 * If an attribute value is to contain spaces, it
1219 * must be enclosed in quotes.
1220 *
1221 * The offset of the attribute data in pszSearchIn is
1222 * returned in *pulOffset so that you can do multiple
1223 * searches.
1224 *
1225 * This returns a new buffer, which should be free()'d after use.
1226 *
1227 * <B>Example:</B>
1228 + ULONG ulOfs = 0;
1229 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1230 + ............^ ulOfs
1231 *
1232 * returns a new string with the value "blublub" (without
1233 * quotes) and sets ulOfs to 12.
1234 *
1235 *@@added V0.9.0 [umoeller]
1236 */
1237
1238PSZ strhGetTextAttr(const char *pszSearchIn,
1239 const char *pszTag,
1240 PULONG pulOffset) // out: offset where found
1241{
1242 PSZ pParam,
1243 pParam2,
1244 prc = NULL;
1245 ULONG ulCount = 0;
1246 LONG lNestingLevel = 0;
1247
1248 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1249 {
1250 // determine end character to search for: a space
1251 CHAR cEnd = ' ';
1252 if (*pParam == '\"')
1253 {
1254 // or, if the data is enclosed in quotes, a quote
1255 cEnd = '\"';
1256 pParam++;
1257 }
1258
1259 if (pulOffset)
1260 // store the offset
1261 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1262
1263 // now find end of attribute
1264 pParam2 = pParam;
1265 while (*pParam)
1266 {
1267 if (*pParam == cEnd)
1268 // end character found
1269 break;
1270 else if (*pParam == '<')
1271 // yet another opening tag found:
1272 // this is probably some "<" in the attributes
1273 lNestingLevel++;
1274 else if (*pParam == '>')
1275 {
1276 lNestingLevel--;
1277 if (lNestingLevel < 0)
1278 // end of tag found:
1279 break;
1280 }
1281 ulCount++;
1282 pParam++;
1283 }
1284
1285 // copy attribute to new buffer
1286 if (ulCount)
1287 {
1288 prc = (PSZ)malloc(ulCount+1);
1289 memcpy(prc, pParam2, ulCount);
1290 *(prc+ulCount) = 0;
1291 }
1292 }
1293 return (prc);
1294}
1295
1296/*
1297 * strhFindEndOfTag:
1298 * returns a pointer to the ">" char
1299 * which seems to terminate the tag beginning
1300 * after pszBeginOfTag.
1301 *
1302 * If additional "<" chars are found, we look
1303 * for additional ">" characters too.
1304 *
1305 * Note: You must pass the address of the opening
1306 * '<' character to this function.
1307 *
1308 * Example:
1309 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1310 + strhFindEndOfTag(pszTest)
1311 + returns.................................^ this.
1312 *
1313 *@@added V0.9.0 [umoeller]
1314 */
1315
1316PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1317{
1318 PSZ p = (PSZ)pszBeginOfTag,
1319 prc = NULL;
1320 LONG lNestingLevel = 0;
1321
1322 while (*p)
1323 {
1324 if (*p == '<')
1325 // another opening tag found:
1326 lNestingLevel++;
1327 else if (*p == '>')
1328 {
1329 // closing tag found:
1330 lNestingLevel--;
1331 if (lNestingLevel < 1)
1332 {
1333 // corresponding: return this
1334 prc = p;
1335 break;
1336 }
1337 }
1338 p++;
1339 }
1340
1341 return (prc);
1342}
1343
1344/*
1345 * strhGetBlock:
1346 * this complex function searches the given string
1347 * for a pair of opening/closing HTML-style tags.
1348 *
1349 * If found, this routine returns TRUE and does
1350 * the following:
1351 *
1352 * 1) allocate a new buffer, copy the text
1353 * enclosed by the opening/closing tags
1354 * into it and set *ppszBlock to that
1355 * buffer;
1356 *
1357 * 2) if the opening tag has any attributes,
1358 * allocate another buffer, copy the
1359 * attributes into it and set *ppszAttrs
1360 * to that buffer; if no attributes are
1361 * found, *ppszAttrs will be NULL;
1362 *
1363 * 3) set *pulOffset to the offset from the
1364 * beginning of *ppszSearchIn where the
1365 * opening tag was found;
1366 *
1367 * 4) advance *ppszSearchIn to after the
1368 * closing tag, so that you can do
1369 * multiple searches without finding the
1370 * same tags twice.
1371 *
1372 * All buffers should be freed using free().
1373 *
1374 * This returns the following:
1375 * -- 0: no error
1376 * -- 1: tag not found at all (doesn't have to be an error)
1377 * -- 2: begin tag found, but no corresponding end tag found. This
1378 * is a real error.
1379 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1380 *
1381 * <B>Example:</B>
1382 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1383 + PSZ pszBlock, pszAttrs;
1384 + ULONG ulOfs;
1385 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1386 *
1387 * would do the following:
1388 *
1389 * 1) set pszBlock to a new string containing "This is page 1."
1390 * without quotes;
1391 *
1392 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1393 *
1394 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1395 *
1396 * 4) pSearch would be advanced to point to the "More text"
1397 * string in the original buffer.
1398 *
1399 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1400 * for HTML parsing.
1401 *
1402 *@@added V0.9.0 [umoeller]
1403 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1404 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1405 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1406 */
1407
1408ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1409 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1410 PSZ pszTag,
1411 PSZ *ppszBlock, // out: block enclosed by the tags
1412 PSZ *ppszAttribs, // out: attributes of the opening tag
1413 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1414 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1415{
1416 ULONG ulrc = 1;
1417 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1418 pszSearch2 = pszBeginTag,
1419 pszClosingTag;
1420 ULONG cbTag = strlen(pszTag);
1421
1422 // go thru the block and check all tags if it's the
1423 // begin tag we're looking for
1424 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1425 {
1426 if (memicmp(pszBeginTag+1, pszTag, strlen(pszTag)) == 0)
1427 // yes: stop
1428 break;
1429 else
1430 pszBeginTag++;
1431 }
1432
1433 if (pszBeginTag)
1434 {
1435 // we found <TAG>:
1436 ULONG ulNestingLevel = 0;
1437
1438 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1439 // strchr(pszBeginTag, '>');
1440 if (pszEndOfBeginTag)
1441 {
1442 // does the caller want the attributes?
1443 if (ppszAttribs)
1444 {
1445 // yes: then copy them
1446 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1447 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1448 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1449 // add terminating 0
1450 *(pszAttrs + ulAttrLen) = 0;
1451
1452 *ppszAttribs = pszAttrs;
1453 }
1454
1455 // output offset of where we found the begin tag
1456 if (pulOfsBeginTag)
1457 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1458
1459 // now find corresponding closing tag (e.g. "</BODY>"
1460 pszBeginTag = pszEndOfBeginTag+1;
1461 // now we're behind the '>' char of the opening tag
1462 // increase offset of that too
1463 if (pulOfsBeginBlock)
1464 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1465
1466 // find next closing tag;
1467 // for the first run, pszSearch2 points to right
1468 // after the '>' char of the opening tag
1469 pszSearch2 = pszBeginTag;
1470 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1471 && (pszClosingTag = strstr(pszSearch2, "<"))
1472 )
1473 {
1474 // if we have another opening tag before our closing
1475 // tag, we need to have several closing tags before
1476 // we're done
1477 if (memicmp(pszClosingTag+1, pszTag, cbTag) == 0)
1478 ulNestingLevel++;
1479 else
1480 {
1481 // is this ours?
1482 if ( (*(pszClosingTag+1) == '/')
1483 && (memicmp(pszClosingTag+2, pszTag, cbTag) == 0)
1484 )
1485 {
1486 // we've found a matching closing tag; is
1487 // it ours?
1488 if (ulNestingLevel == 0)
1489 {
1490 // our closing tag found:
1491 // allocate mem for a new buffer
1492 // and extract all the text between
1493 // open and closing tags to it
1494 ULONG ulLen = pszClosingTag - pszBeginTag;
1495 if (ppszBlock)
1496 {
1497 PSZ pNew = (PSZ)malloc(ulLen + 1);
1498 strhncpy0(pNew, pszBeginTag, ulLen);
1499 *ppszBlock = pNew;
1500 }
1501
1502 // raise search offset to after the closing tag
1503 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1504
1505 ulrc = 0;
1506
1507 break;
1508 } else
1509 // not our closing tag:
1510 ulNestingLevel--;
1511 }
1512 }
1513 // no matching closing tag: search on after that
1514 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1515 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1516
1517 if (!pszClosingTag)
1518 // no matching closing tag found:
1519 // return 2 (closing tag not found)
1520 ulrc = 2;
1521 } // end if (pszBeginTag)
1522 else
1523 // no matching ">" for opening tag found:
1524 ulrc = 3;
1525 }
1526
1527 return (ulrc);
1528}
1529
1530/* ******************************************************************
1531 *
1532 * Miscellaneous
1533 *
1534 ********************************************************************/
1535
1536/*
1537 *@@ strhArrayAppend:
1538 * this appends a string to a "string array".
1539 *
1540 * A string array is considered a sequence of
1541 * zero-terminated strings in memory. That is,
1542 * after each string's null-byte, the next
1543 * string comes up.
1544 *
1545 * This is useful for composing a single block
1546 * of memory from, say, list box entries, which
1547 * can then be written to OS2.INI in one flush.
1548 *
1549 * To append strings to such an array, call this
1550 * function for each string you wish to append.
1551 * This will re-allocate *ppszRoot with each call,
1552 * and update *pcbRoot, which then contains the
1553 * total size of all strings (including all null
1554 * terminators).
1555 *
1556 * Pass *pcbRoot to PrfSaveProfileData to have the
1557 * block saved.
1558 *
1559 * Note: On the first call, *ppszRoot and *pcbRoot
1560 * _must_ be both NULL, or this crashes.
1561 */
1562
1563VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1564 const char *pcszNew, // in: string to append
1565 PULONG pcbRoot) // in/out: size of array
1566{
1567 ULONG cbNew = strlen(pcszNew);
1568 PSZ pszTemp = (PSZ)malloc(*pcbRoot
1569 + cbNew
1570 + 1); // two null bytes
1571 if (*ppszRoot)
1572 {
1573 // not first loop: copy old stuff
1574 memcpy(pszTemp,
1575 *ppszRoot,
1576 *pcbRoot);
1577 free(*ppszRoot);
1578 }
1579 // append new string
1580 strcpy(pszTemp + *pcbRoot,
1581 pcszNew);
1582 // update root
1583 *ppszRoot = pszTemp;
1584 // update length
1585 *pcbRoot += cbNew + 1;
1586}
1587
1588/*
1589 *@@ strhCreateDump:
1590 * this dumps a memory block into a string
1591 * and returns that string in a new buffer.
1592 *
1593 * You must free() the returned PSZ after use.
1594 *
1595 * The output looks like the following:
1596 *
1597 + 0000: FE FF 0E 02 90 00 00 00 ........
1598 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1599 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1600 *
1601 * Each line is terminated with a newline (\n)
1602 * character only.
1603 *
1604 *@@added V0.9.1 (2000-01-22) [umoeller]
1605 */
1606
1607PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1608 ULONG ulSize, // in: size of buffer
1609 ULONG ulIndent) // in: indentation of every line
1610{
1611 PSZ pszReturn = 0;
1612 XSTRING strReturn;
1613 CHAR szTemp[1000];
1614
1615 PBYTE pbCurrent = pb; // current byte
1616 ULONG ulCount = 0,
1617 ulCharsInLine = 0; // if this grows > 7, a new line is started
1618 CHAR szLine[400] = "",
1619 szAscii[30] = " "; // ASCII representation; filled for every line
1620 PSZ pszLine = szLine,
1621 pszAscii = szAscii;
1622
1623 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1624
1625 for (pbCurrent = pb;
1626 ulCount < ulSize;
1627 pbCurrent++, ulCount++)
1628 {
1629 if (ulCharsInLine == 0)
1630 {
1631 memset(szLine, ' ', ulIndent);
1632 pszLine += ulIndent;
1633 }
1634 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1635
1636 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1637 // printable character:
1638 *pszAscii = *pbCurrent;
1639 else
1640 *pszAscii = '.';
1641 pszAscii++;
1642
1643 ulCharsInLine++;
1644 if ( (ulCharsInLine > 7) // 8 bytes added?
1645 || (ulCount == ulSize-1) // end of buffer reached?
1646 )
1647 {
1648 // if we haven't had eight bytes yet,
1649 // fill buffer up to eight bytes with spaces
1650 ULONG ul2;
1651 for (ul2 = ulCharsInLine;
1652 ul2 < 8;
1653 ul2++)
1654 pszLine += sprintf(pszLine, " ");
1655
1656 sprintf(szTemp, "%04lX: %s %s\n",
1657 (ulCount & 0xFFFFFFF8), // offset in hex
1658 szLine, // bytes string
1659 szAscii); // ASCII string
1660 xstrcat(&strReturn, szTemp, 0);
1661
1662 // restart line buffer
1663 pszLine = szLine;
1664
1665 // clear ASCII buffer
1666 strcpy(szAscii, " ");
1667 pszAscii = szAscii;
1668
1669 // reset line counter
1670 ulCharsInLine = 0;
1671 }
1672 }
1673
1674 if (strReturn.cbAllocated)
1675 pszReturn = strReturn.psz;
1676
1677 return (pszReturn);
1678}
1679
1680/* ******************************************************************
1681 *
1682 * Wildcard matching
1683 *
1684 ********************************************************************/
1685
1686/*
1687 * The following code has been taken from "fnmatch.zip".
1688 *
1689 * (c) 1994-1996 by Eberhard Mattes.
1690 */
1691
1692/* In OS/2 and DOS styles, both / and \ separate components of a path.
1693 * This macro returns true iff C is a separator. */
1694
1695#define IS_OS2_COMP_SEP(C) ((C) == '/' || (C) == '\\')
1696
1697
1698/* This macro returns true if C is at the end of a component of a
1699 * path. */
1700
1701#define IS_OS2_COMP_END(C) ((C) == 0 || IS_OS2_COMP_SEP (C))
1702
1703/*
1704 * skip_comp_os2:
1705 * Return a pointer to the next component of the path SRC, for OS/2
1706 * and DOS styles. When the end of the string is reached, a pointer
1707 * to the terminating null character is returned.
1708 *
1709 * (c) 1994-1996 by Eberhard Mattes.
1710 */
1711
1712static const unsigned char* skip_comp_os2(const unsigned char *src)
1713{
1714 /* Skip characters until hitting a separator or the end of the
1715 * string. */
1716
1717 while (!IS_OS2_COMP_END(*src))
1718 ++src;
1719
1720 /* Skip the separator if we hit a separator. */
1721
1722 if (*src != 0)
1723 ++src;
1724 return src;
1725}
1726
1727/*
1728 * has_colon:
1729 * returns true iff the path P contains a colon.
1730 *
1731 * (c) 1994-1996 by Eberhard Mattes.
1732 */
1733
1734static int has_colon(const unsigned char *p)
1735{
1736 while (*p != 0)
1737 if (*p == ':')
1738 return 1;
1739 else
1740 ++p;
1741 return 0;
1742}
1743
1744/*
1745 * match_comp_os2:
1746 * Compare a single component (directory name or file name) of the
1747 * paths, for OS/2 and DOS styles. MASK and NAME point into a
1748 * component of the wildcard and the name to be checked, respectively.
1749 * Comparing stops at the next separator. The FLAGS argument is the
1750 * same as that of fnmatch(). HAS_DOT is true if a dot is in the
1751 * current component of NAME. The number of dots is not restricted,
1752 * even in DOS style. Return FNM_MATCH iff MASK and NAME match.
1753 * Note that this function is recursive.
1754 *
1755 * (c) 1994-1996 by Eberhard Mattes.
1756 */
1757
1758static int match_comp_os2(const unsigned char *mask,
1759 const unsigned char *name,
1760 unsigned flags,
1761 int has_dot)
1762{
1763 int rc;
1764
1765 for (;;)
1766 switch (*mask)
1767 {
1768 case 0:
1769
1770 /* There must be no extra characters at the end of NAME when
1771 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1772 * in that case, NAME may point to a separator. */
1773
1774 if (*name == 0)
1775 return FNM_MATCH;
1776 if ((flags & _FNM_PATHPREFIX) && IS_OS2_COMP_SEP(*name))
1777 return FNM_MATCH;
1778 return FNM_NOMATCH;
1779
1780 case '/':
1781 case '\\':
1782
1783 /* Separators match separators. */
1784
1785 if (IS_OS2_COMP_SEP(*name))
1786 return FNM_MATCH;
1787
1788 /* If _FNM_PATHPREFIX is set, a trailing separator in MASK
1789 * is ignored at the end of NAME. */
1790
1791 if ((flags & _FNM_PATHPREFIX) && mask[1] == 0 && *name == 0)
1792 return FNM_MATCH;
1793
1794 /* Stop comparing at the separator. */
1795
1796 return FNM_NOMATCH;
1797
1798 case '?':
1799
1800 /* A question mark matches one character. It does not match
1801 * a dot. At the end of the component (and before a dot),
1802 * it also matches zero characters. */
1803
1804 if (*name != '.' && !IS_OS2_COMP_END(*name))
1805 ++name;
1806 ++mask;
1807 break;
1808
1809 case '*':
1810
1811 /* An asterisk matches zero or more characters. In DOS
1812 * mode, dots are not matched. */
1813
1814 do
1815 {
1816 ++mask;
1817 }
1818 while (*mask == '*');
1819 for (;;)
1820 {
1821 rc = match_comp_os2(mask, name, flags, has_dot);
1822 if (rc != FNM_NOMATCH)
1823 return rc;
1824 if (IS_OS2_COMP_END(*name))
1825 return FNM_NOMATCH;
1826 if (*name == '.' && (flags & _FNM_STYLE_MASK) == _FNM_DOS)
1827 return FNM_NOMATCH;
1828 ++name;
1829 }
1830
1831 case '.':
1832
1833 /* A dot matches a dot. It also matches the implicit dot at
1834 * the end of a dot-less NAME. */
1835
1836 ++mask;
1837 if (*name == '.')
1838 ++name;
1839 else if (has_dot || !IS_OS2_COMP_END(*name))
1840 return FNM_NOMATCH;
1841 break;
1842
1843 default:
1844
1845 /* All other characters match themselves. */
1846
1847 if (flags & _FNM_IGNORECASE)
1848 {
1849 if (tolower(*mask) != tolower(*name))
1850 return FNM_NOMATCH;
1851 }
1852 else
1853 {
1854 if (*mask != *name)
1855 return FNM_NOMATCH;
1856 }
1857 ++mask;
1858 ++name;
1859 break;
1860 }
1861}
1862
1863/*
1864 * match_comp:
1865 * compare a single component (directory name or file name) of the
1866 * paths, for all styles which need component-by-component matching.
1867 * MASK and NAME point to the start of a component of the wildcard and
1868 * the name to be checked, respectively. Comparing stops at the next
1869 * separator. The FLAGS argument is the same as that of fnmatch().
1870 * Return FNM_MATCH iff MASK and NAME match.
1871 *
1872 * (c) 1994-1996 by Eberhard Mattes.
1873 */
1874
1875static int match_comp(const unsigned char *mask,
1876 const unsigned char *name,
1877 unsigned flags)
1878{
1879 const unsigned char *s;
1880
1881 switch (flags & _FNM_STYLE_MASK)
1882 {
1883 case _FNM_OS2:
1884 case _FNM_DOS:
1885
1886 /* For OS/2 and DOS styles, we add an implicit dot at the end of
1887 * the component if the component doesn't include a dot. */
1888
1889 s = name;
1890 while (!IS_OS2_COMP_END(*s) && *s != '.')
1891 ++s;
1892 return match_comp_os2(mask, name, flags, *s == '.');
1893
1894 default:
1895 return FNM_ERR;
1896 }
1897}
1898
1899/* In Unix styles, / separates components of a path. This macro
1900 * returns true iff C is a separator. */
1901
1902#define IS_UNIX_COMP_SEP(C) ((C) == '/')
1903
1904
1905/* This macro returns true if C is at the end of a component of a
1906 * path. */
1907
1908#define IS_UNIX_COMP_END(C) ((C) == 0 || IS_UNIX_COMP_SEP (C))
1909
1910/*
1911 * match_unix:
1912 * match complete paths for Unix styles. The FLAGS argument is the
1913 * same as that of fnmatch(). COMP points to the start of the current
1914 * component in NAME. Return FNM_MATCH iff MASK and NAME match. The
1915 * backslash character is used for escaping ? and * unless
1916 * FNM_NOESCAPE is set.
1917 *
1918 * (c) 1994-1996 by Eberhard Mattes.
1919 */
1920
1921static int match_unix(const unsigned char *mask,
1922 const unsigned char *name,
1923 unsigned flags,
1924 const unsigned char *comp)
1925{
1926 unsigned char c1, c2;
1927 char invert, matched;
1928 const unsigned char *start;
1929 int rc;
1930
1931 for (;;)
1932 switch (*mask)
1933 {
1934 case 0:
1935
1936 /* There must be no extra characters at the end of NAME when
1937 * reaching the end of MASK unless _FNM_PATHPREFIX is set:
1938 * in that case, NAME may point to a separator. */
1939
1940 if (*name == 0)
1941 return FNM_MATCH;
1942 if ((flags & _FNM_PATHPREFIX) && IS_UNIX_COMP_SEP(*name))
1943 return FNM_MATCH;
1944 return FNM_NOMATCH;
1945
1946 case '?':
1947
1948 /* A question mark matches one character. It does not match
1949 * the component separator if FNM_PATHNAME is set. It does
1950 * not match a dot at the start of a component if FNM_PERIOD
1951 * is set. */
1952
1953 if (*name == 0)
1954 return FNM_NOMATCH;
1955 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1956 return FNM_NOMATCH;
1957 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1958 return FNM_NOMATCH;
1959 ++mask;
1960 ++name;
1961 break;
1962
1963 case '*':
1964
1965 /* An asterisk matches zero or more characters. It does not
1966 * match the component separator if FNM_PATHNAME is set. It
1967 * does not match a dot at the start of a component if
1968 * FNM_PERIOD is set. */
1969
1970 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
1971 return FNM_NOMATCH;
1972 do
1973 {
1974 ++mask;
1975 }
1976 while (*mask == '*');
1977 for (;;)
1978 {
1979 rc = match_unix(mask, name, flags, comp);
1980 if (rc != FNM_NOMATCH)
1981 return rc;
1982 if (*name == 0)
1983 return FNM_NOMATCH;
1984 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
1985 return FNM_NOMATCH;
1986 ++name;
1987 }
1988
1989 case '/':
1990
1991 /* Separators match only separators. If _FNM_PATHPREFIX is
1992 * set, a trailing separator in MASK is ignored at the end
1993 * of NAME. */
1994
1995 if (!(IS_UNIX_COMP_SEP(*name)
1996 || ((flags & _FNM_PATHPREFIX) && *name == 0
1997 && (mask[1] == 0
1998 || (!(flags & FNM_NOESCAPE) && mask[1] == '\\'
1999 && mask[2] == 0)))))
2000 return FNM_NOMATCH;
2001
2002 ++mask;
2003 if (*name != 0)
2004 ++name;
2005
2006 /* This is the beginning of a new component if FNM_PATHNAME
2007 * is set. */
2008
2009 if (flags & FNM_PATHNAME)
2010 comp = name;
2011 break;
2012
2013 case '[':
2014
2015 /* A set of characters. Always case-sensitive. */
2016
2017 if (*name == 0)
2018 return FNM_NOMATCH;
2019 if ((flags & FNM_PATHNAME) && IS_UNIX_COMP_SEP(*name))
2020 return FNM_NOMATCH;
2021 if (*name == '.' && (flags & FNM_PERIOD) && name == comp)
2022 return FNM_NOMATCH;
2023
2024 invert = 0;
2025 matched = 0;
2026 ++mask;
2027
2028 /* If the first character is a ! or ^, the set matches all
2029 * characters not listed in the set. */
2030
2031 if (*mask == '!' || *mask == '^')
2032 {
2033 ++mask;
2034 invert = 1;
2035 }
2036
2037 /* Loop over all the characters of the set. The loop ends
2038 * if the end of the string is reached or if a ] is
2039 * encountered unless it directly follows the initial [ or
2040 * [-. */
2041
2042 start = mask;
2043 while (!(*mask == 0 || (*mask == ']' && mask != start)))
2044 {
2045 /* Get the next character which is optionally preceded
2046 * by a backslash. */
2047
2048 c1 = *mask++;
2049 if (!(flags & FNM_NOESCAPE) && c1 == '\\')
2050 {
2051 if (*mask == 0)
2052 break;
2053 c1 = *mask++;
2054 }
2055
2056 /* Ranges of characters are written as a-z. Don't
2057 * forget to check for the end of the string and to
2058 * handle the backslash. If the character after - is a
2059 * ], it isn't a range. */
2060
2061 if (*mask == '-' && mask[1] != ']')
2062 {
2063 ++mask; /* Skip the - character */
2064 if (!(flags & FNM_NOESCAPE) && *mask == '\\')
2065 ++mask;
2066 if (*mask == 0)
2067 break;
2068 c2 = *mask++;
2069 }
2070 else
2071 c2 = c1;
2072
2073 /* Now check whether this character or range matches NAME. */
2074
2075 if (c1 <= *name && *name <= c2)
2076 matched = 1;
2077 }
2078
2079 /* If the end of the string is reached before a ] is found,
2080 * back up to the [ and compare it to NAME. */
2081
2082 if (*mask == 0)
2083 {
2084 if (*name != '[')
2085 return FNM_NOMATCH;
2086 ++name;
2087 mask = start;
2088 if (invert)
2089 --mask;
2090 }
2091 else
2092 {
2093 if (invert)
2094 matched = !matched;
2095 if (!matched)
2096 return FNM_NOMATCH;
2097 ++mask; /* Skip the ] character */
2098 if (*name != 0)
2099 ++name;
2100 }
2101 break;
2102
2103 case '\\':
2104 ++mask;
2105 if (flags & FNM_NOESCAPE)
2106 {
2107 if (*name != '\\')
2108 return FNM_NOMATCH;
2109 ++name;
2110 }
2111 else if (*mask == '*' || *mask == '?')
2112 {
2113 if (*mask != *name)
2114 return FNM_NOMATCH;
2115 ++mask;
2116 ++name;
2117 }
2118 break;
2119
2120 default:
2121
2122 /* All other characters match themselves. */
2123
2124 if (flags & _FNM_IGNORECASE)
2125 {
2126 if (tolower(*mask) != tolower(*name))
2127 return FNM_NOMATCH;
2128 }
2129 else
2130 {
2131 if (*mask != *name)
2132 return FNM_NOMATCH;
2133 }
2134 ++mask;
2135 ++name;
2136 break;
2137 }
2138}
2139
2140/*
2141 * _fnmatch_unsigned:
2142 * Check whether the path name NAME matches the wildcard MASK.
2143 *
2144 * Return:
2145 * -- 0 (FNM_MATCH) if it matches,
2146 * -- _FNM_NOMATCH if it doesn't,
2147 * -- FNM_ERR on error.
2148 *
2149 * The operation of this function is controlled by FLAGS.
2150 * This is an internal function, with unsigned arguments.
2151 *
2152 * (c) 1994-1996 by Eberhard Mattes.
2153 */
2154
2155static int _fnmatch_unsigned(const unsigned char *mask,
2156 const unsigned char *name,
2157 unsigned flags)
2158{
2159 int m_drive, n_drive,
2160 rc;
2161
2162 /* Match and skip the drive name if present. */
2163
2164 m_drive = ((isalpha(mask[0]) && mask[1] == ':') ? mask[0] : -1);
2165 n_drive = ((isalpha(name[0]) && name[1] == ':') ? name[0] : -1);
2166
2167 if (m_drive != n_drive)
2168 {
2169 if (m_drive == -1 || n_drive == -1)
2170 return FNM_NOMATCH;
2171 if (!(flags & _FNM_IGNORECASE))
2172 return FNM_NOMATCH;
2173 if (tolower(m_drive) != tolower(n_drive))
2174 return FNM_NOMATCH;
2175 }
2176
2177 if (m_drive != -1)
2178 mask += 2;
2179 if (n_drive != -1)
2180 name += 2;
2181
2182 /* Colons are not allowed in path names, except for the drive name,
2183 * which was skipped above. */
2184
2185 if (has_colon(mask) || has_colon(name))
2186 return FNM_ERR;
2187
2188 /* The name "\\server\path" should not be matched by mask
2189 * "\*\server\path". Ditto for /. */
2190
2191 switch (flags & _FNM_STYLE_MASK)
2192 {
2193 case _FNM_OS2:
2194 case _FNM_DOS:
2195
2196 if (IS_OS2_COMP_SEP(name[0]) && IS_OS2_COMP_SEP(name[1]))
2197 {
2198 if (!(IS_OS2_COMP_SEP(mask[0]) && IS_OS2_COMP_SEP(mask[1])))
2199 return FNM_NOMATCH;
2200 name += 2;
2201 mask += 2;
2202 }
2203 break;
2204
2205 case _FNM_POSIX:
2206
2207 if (name[0] == '/' && name[1] == '/')
2208 {
2209 int i;
2210
2211 name += 2;
2212 for (i = 0; i < 2; ++i)
2213 if (mask[0] == '/')
2214 ++mask;
2215 else if (mask[0] == '\\' && mask[1] == '/')
2216 mask += 2;
2217 else
2218 return FNM_NOMATCH;
2219 }
2220
2221 /* In Unix styles, treating ? and * w.r.t. components is simple.
2222 * No need to do matching component by component. */
2223
2224 return match_unix(mask, name, flags, name);
2225 }
2226
2227 /* Now compare all the components of the path name, one by one.
2228 * Note that the path separator must not be enclosed in brackets. */
2229
2230 while (*mask != 0 || *name != 0)
2231 {
2232
2233 /* If _FNM_PATHPREFIX is set, the names match if the end of MASK
2234 * is reached even if there are components left in NAME. */
2235
2236 if (*mask == 0 && (flags & _FNM_PATHPREFIX))
2237 return FNM_MATCH;
2238
2239 /* Compare a single component of the path name. */
2240
2241 rc = match_comp(mask, name, flags);
2242 if (rc != FNM_MATCH)
2243 return rc;
2244
2245 /* Skip to the next component or to the end of the path name. */
2246
2247 mask = skip_comp_os2(mask);
2248 name = skip_comp_os2(name);
2249 }
2250
2251 /* If we reached the ends of both strings, the names match. */
2252
2253 if (*mask == 0 && *name == 0)
2254 return FNM_MATCH;
2255
2256 /* The names do not match. */
2257
2258 return FNM_NOMATCH;
2259}
2260
2261/*
2262 *@@ strhMatchOS2:
2263 * this matches wildcards, similar to what DosEditName does.
2264 * However, this does not require a file to be present, but
2265 * works on strings only.
2266 */
2267
2268BOOL strhMatchOS2(const unsigned char* pcszMask, // in: mask (e.g. "*.txt")
2269 const unsigned char* pcszName) // in: string to check (e.g. "test.txt")
2270{
2271 return ((BOOL)(_fnmatch_unsigned(pcszMask,
2272 pcszName,
2273 _FNM_OS2 | _FNM_IGNORECASE)
2274 == FNM_MATCH)
2275 );
2276}
2277
2278/* ******************************************************************
2279 *
2280 * Fast string searches
2281 *
2282 ********************************************************************/
2283
2284#define ASSERT(a)
2285
2286/*
2287 * The following code has been taken from the "Standard
2288 * Function Library", file sflfind.c, and only slightly
2289 * modified to conform to the rest of this file.
2290 *
2291 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
2292 * Revised: 98/05/04
2293 *
2294 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2295 *
2296 * The SFL Licence allows incorporating SFL code into other
2297 * programs, as long as the copyright is reprinted and the
2298 * code is marked as modified, so this is what we do.
2299 */
2300
2301/*
2302 *@@ strhmemfind:
2303 * searches for a pattern in a block of memory using the
2304 * Boyer-Moore-Horspool-Sunday algorithm.
2305 *
2306 * The block and pattern may contain any values; you must
2307 * explicitly provide their lengths. If you search for strings,
2308 * use strlen() on the buffers.
2309 *
2310 * Returns a pointer to the pattern if found within the block,
2311 * or NULL if the pattern was not found.
2312 *
2313 * This algorithm needs a "shift table" to cache data for the
2314 * search pattern. This table can be reused when performing
2315 * several searches with the same pattern.
2316 *
2317 * "shift" must point to an array big enough to hold 256 (8**2)
2318 * "size_t" values.
2319 *
2320 * If (*repeat_find == FALSE), the shift table is initialized.
2321 * So on the first search with a given pattern, *repeat_find
2322 * should be FALSE. This function sets it to TRUE after the
2323 * shift table is initialised, allowing the initialisation
2324 * phase to be skipped on subsequent searches.
2325 *
2326 * This function is most effective when repeated searches are
2327 * made for the same pattern in one or more large buffers.
2328 *
2329 * Example:
2330 *
2331 + PSZ pszHaystack = "This is a sample string.",
2332 + pszNeedle = "string";
2333 + size_t shift[256];
2334 + BOOL fRepeat = FALSE;
2335 +
2336 + PSZ pFound = strhmemfind(pszHaystack,
2337 + strlen(pszHaystack), // block size
2338 + pszNeedle,
2339 + strlen(pszNeedle), // pattern size
2340 + shift,
2341 + &fRepeat);
2342 *
2343 * Taken from the "Standard Function Library", file sflfind.c.
2344 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2345 * Slightly modified by umoeller.
2346 *
2347 *@@added V0.9.3 (2000-05-08) [umoeller]
2348 */
2349
2350void* strhmemfind(const void *in_block, // in: block containing data
2351 size_t block_size, // in: size of block in bytes
2352 const void *in_pattern, // in: pattern to search for
2353 size_t pattern_size, // in: size of pattern block
2354 size_t *shift, // in/out: shift table (search buffer)
2355 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
2356{
2357 size_t byte_nbr, // Distance through block
2358 match_size; // Size of matched part
2359 const unsigned char
2360 *match_base = NULL, // Base of match of pattern
2361 *match_ptr = NULL, // Point within current match
2362 *limit = NULL; // Last potiental match point
2363 const unsigned char
2364 *block = (unsigned char *) in_block, // Concrete pointer to block data
2365 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
2366
2367 if ( (block == NULL)
2368 || (pattern == NULL)
2369 || (shift == NULL)
2370 )
2371 return (NULL);
2372
2373 // Pattern must be smaller or equal in size to string
2374 if (block_size < pattern_size)
2375 return (NULL); // Otherwise it's not found
2376
2377 if (pattern_size == 0) // Empty patterns match at start
2378 return ((void *)block);
2379
2380 // Build the shift table unless we're continuing a previous search
2381
2382 // The shift table determines how far to shift before trying to match
2383 // again, if a match at this point fails. If the byte after where the
2384 // end of our pattern falls is not in our pattern, then we start to
2385 // match again after that byte; otherwise we line up the last occurence
2386 // of that byte in our pattern under that byte, and try match again.
2387
2388 if (!repeat_find || !*repeat_find)
2389 {
2390 for (byte_nbr = 0;
2391 byte_nbr < 256;
2392 byte_nbr++)
2393 shift[byte_nbr] = pattern_size + 1;
2394 for (byte_nbr = 0;
2395 byte_nbr < pattern_size;
2396 byte_nbr++)
2397 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
2398
2399 if (repeat_find)
2400 *repeat_find = TRUE;
2401 }
2402
2403 // Search for the block, each time jumping up by the amount
2404 // computed in the shift table
2405
2406 limit = block + (block_size - pattern_size + 1);
2407 ASSERT (limit > block);
2408
2409 for (match_base = block;
2410 match_base < limit;
2411 match_base += shift[*(match_base + pattern_size)])
2412 {
2413 match_ptr = match_base;
2414 match_size = 0;
2415
2416 // Compare pattern until it all matches, or we find a difference
2417 while (*match_ptr++ == pattern[match_size++])
2418 {
2419 ASSERT (match_size <= pattern_size &&
2420 match_ptr == (match_base + match_size));
2421
2422 // If we found a match, return the start address
2423 if (match_size >= pattern_size)
2424 return ((void*)(match_base));
2425
2426 }
2427 }
2428 return (NULL); // Found nothing
2429}
2430
2431/*
2432 *@@ strhtxtfind:
2433 * searches for a case-insensitive text pattern in a string
2434 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
2435 * pattern are null-terminated strings. Returns a pointer to the pattern
2436 * if found within the string, or NULL if the pattern was not found.
2437 * Will match strings irrespective of case. To match exact strings, use
2438 * strhfind(). Will not work on multibyte characters.
2439 *
2440 * Examples:
2441 + char *result;
2442 +
2443 + result = strhtxtfind ("AbracaDabra", "cad");
2444 + if (result)
2445 + puts (result);
2446 +
2447 * Taken from the "Standard Function Library", file sflfind.c.
2448 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
2449 * Slightly modified.
2450 *
2451 *@@added V0.9.3 (2000-05-08) [umoeller]
2452 */
2453
2454char* strhtxtfind (const char *string, // String containing data
2455 const char *pattern) // Pattern to search for
2456{
2457 size_t
2458 shift [256]; // Shift distance for each value
2459 size_t
2460 string_size,
2461 pattern_size,
2462 byte_nbr, // Index into byte array
2463 match_size; // Size of matched part
2464 const char
2465 *match_base = NULL, // Base of match of pattern
2466 *match_ptr = NULL, // Point within current match
2467 *limit = NULL; // Last potiental match point
2468
2469 ASSERT (string); // Expect non-NULL pointers, but
2470 ASSERT (pattern); // fail gracefully if not debugging
2471 if (string == NULL || pattern == NULL)
2472 return (NULL);
2473
2474 string_size = strlen (string);
2475 pattern_size = strlen (pattern);
2476
2477 // Pattern must be smaller or equal in size to string
2478 if (string_size < pattern_size)
2479 return (NULL); // Otherwise it cannot be found
2480
2481 if (pattern_size == 0) // Empty string matches at start
2482 return (char *) string;
2483
2484 // Build the shift table
2485
2486 // The shift table determines how far to shift before trying to match
2487 // again, if a match at this point fails. If the byte after where the
2488 // end of our pattern falls is not in our pattern, then we start to
2489 // match again after that byte; otherwise we line up the last occurence
2490 // of that byte in our pattern under that byte, and try match again.
2491
2492 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
2493 shift [byte_nbr] = pattern_size + 1;
2494
2495 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
2496 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
2497
2498 // Search for the string. If we don't find a match, move up by the
2499 // amount we computed in the shift table above, to find location of
2500 // the next potiental match.
2501
2502 limit = string + (string_size - pattern_size + 1);
2503 ASSERT (limit > string);
2504
2505 for (match_base = string;
2506 match_base < limit;
2507 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
2508 {
2509 match_ptr = match_base;
2510 match_size = 0;
2511
2512 // Compare pattern until it all matches, or we find a difference
2513 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
2514 {
2515 ASSERT (match_size <= pattern_size &&
2516 match_ptr == (match_base + match_size));
2517
2518 // If we found a match, return the start address
2519 if (match_size >= pattern_size)
2520 return ((char *)(match_base));
2521 }
2522 }
2523 return (NULL); // Found nothing
2524}
2525
Note: See TracBrowser for help on using the repository browser.