source: trunk/src/helpers/stringh.c@ 165

Last change on this file since 165 was 164, checked in by umoeller, 23 years ago

Massive pager rework.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 58.4 KB
Line 
1
2/*
3 *@@sourcefile stringh.c:
4 * contains string/text helper functions. These are good for
5 * parsing/splitting strings and other stuff used throughout
6 * XWorkplace.
7 *
8 * Note that these functions are really a bunch of very mixed
9 * up string helpers, which you may or may not find helpful.
10 * If you're looking for string functions with memory
11 * management, look at xstring.c instead.
12 *
13 * Usage: All OS/2 programs.
14 *
15 * Function prefixes (new with V0.81):
16 * -- strh* string helper functions.
17 *
18 * Note: Version numbering in this file relates to XWorkplace version
19 * numbering.
20 *
21 *@@header "helpers\stringh.h"
22 */
23
24/*
25 * Copyright (C) 1997-2000 Ulrich M”ller.
26 * Parts Copyright (C) 1991-1999 iMatix Corporation.
27 * This file is part of the "XWorkplace helpers" source package.
28 * This is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published
30 * by the Free Software Foundation, in version 2 as it comes in the
31 * "COPYING" file of the XWorkplace main distribution.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 */
37
38#define OS2EMX_PLAIN_CHAR
39 // this is needed for "os2emx.h"; if this is defined,
40 // emx will define PSZ as _signed_ char, otherwise
41 // as unsigned char
42
43#define INCL_WINSHELLDATA
44#define INCL_DOSERRORS
45#include <os2.h>
46
47#include <stdlib.h>
48#include <stdio.h>
49#include <string.h>
50#include <ctype.h>
51#include <math.h>
52
53#include "setup.h" // code generation and debugging options
54
55#define DONT_REPLACE_STRINGH_MALLOC
56#include "helpers\stringh.h"
57#include "helpers\xstring.h" // extended string helpers
58
59#pragma hdrstop
60
61/*
62 *@@category: Helpers\C helpers\String management
63 * See stringh.c and xstring.c.
64 */
65
66/*
67 *@@category: Helpers\C helpers\String management\C string helpers
68 * See stringh.c.
69 */
70
71#ifdef __DEBUG_MALLOC_ENABLED__
72
73/*
74 *@@ strhStoreDebug:
75 * memory debug version of strhStore.
76 *
77 *@@added V0.9.16 (2001-12-08) [umoeller]
78 */
79
80APIRET (strhStoreDebug)(PSZ *ppszTarget,
81 PCSZ pcszSource,
82 PULONG pulLength, // out: length of new string (ptr can be NULL)
83 PCSZ pcszSourceFile,
84 unsigned long ulLine,
85 PCSZ pcszFunction)
86{
87 ULONG ulLength = 0;
88
89
90
91 if (ppszTarget)
92 {
93 if (*ppszTarget)
94 free(*ppszTarget);
95
96 if ( (pcszSource)
97 && (ulLength = strlen(pcszSource))
98 )
99 {
100 if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1,
101 pcszSourceFile,
102 ulLine,
103 pcszFunction))
104 memcpy(*ppszTarget, pcszSource, ulLength + 1);
105 else
106 return (ERROR_NOT_ENOUGH_MEMORY);
107 }
108 else
109 *ppszTarget = NULL;
110 }
111
112 if (pulLength)
113 *pulLength = ulLength;
114
115 return (NO_ERROR);
116}
117
118#endif
119
120/*
121 *@@ strhStore:
122 * stores a copy of the given string in the specified
123 * buffer. Uses strdup internally.
124 *
125 * If *ppszTarget != NULL, the previous string is freed
126 * and set to NULL.
127 * If pcszSource != NULL, a copy of it is stored in the
128 * buffer.
129 *
130 *@@added V0.9.16 (2001-12-06) [umoeller]
131 */
132
133APIRET strhStore(PSZ *ppszTarget,
134 PCSZ pcszSource,
135 PULONG pulLength) // out: length of new string (ptr can be NULL)
136{
137 ULONG ulLength = 0;
138
139 if (ppszTarget)
140 {
141 if (*ppszTarget)
142 free(*ppszTarget);
143
144 if ( (pcszSource)
145 && (ulLength = strlen(pcszSource))
146 )
147 {
148 if (*ppszTarget = (PSZ)malloc(ulLength + 1))
149 memcpy(*ppszTarget, pcszSource, ulLength + 1);
150 else
151 return (ERROR_NOT_ENOUGH_MEMORY);
152 }
153 else
154 *ppszTarget = NULL;
155 }
156 else
157 return (ERROR_INVALID_PARAMETER);
158
159 if (pulLength)
160 *pulLength = ulLength;
161
162 return (NO_ERROR);
163}
164
165/*
166 *@@ strhcpy:
167 * like strdup, but this one doesn't crash if string2 is NULL,
168 * but sets the first byte in string1 to \0 instead.
169 *
170 *@@added V0.9.14 (2001-08-01) [umoeller]
171 */
172
173PSZ strhcpy(PSZ string1, PCSZ string2)
174{
175 if (string2)
176 return (strcpy(string1, string2));
177
178 *string1 = '\0';
179 return (string1);
180}
181
182#ifdef __DEBUG_MALLOC_ENABLED__
183
184/*
185 *@@ strhdupDebug:
186 * memory debug version of strhdup.
187 *
188 *@@added V0.9.0 [umoeller]
189 */
190
191PSZ strhdupDebug(PCSZ pcszSource,
192 unsigned long *pulLength,
193 PCSZ pcszSourceFile,
194 unsigned long ulLine,
195 PCSZ pcszFunction)
196{
197 PSZ pszReturn = NULL;
198 ULONG ulLength = 0;
199
200 if ( (pcszSource)
201 && (ulLength = strlen(pcszSource))
202 )
203 {
204 if (pszReturn = (PSZ)memdMalloc(ulLength + 1,
205 pcszSourceFile, // fixed V0.9.16 (2001-12-08) [umoeller]
206 ulLine,
207 pcszFunction))
208 memcpy(pszReturn, pcszSource, ulLength + 1);
209 }
210
211 if (pulLength)
212 *pulLength = ulLength;
213
214 return (pszReturn);
215}
216
217#endif // __DEBUG_MALLOC_ENABLED__
218
219/*
220 *@@ strhdup:
221 * like strdup, but this one doesn't crash if pszSource
222 * is NULL, but returns NULL also. In addition, this
223 * can report the length of the string (V0.9.16).
224 *
225 *@@added V0.9.0 [umoeller]
226 *@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength
227 */
228
229PSZ strhdup(PCSZ pcszSource,
230 unsigned long *pulLength) // out: length of string excl. null terminator (ptr can be NULL)
231{
232 PSZ pszReturn = NULL;
233 ULONG ulLength = 0;
234
235 if ( (pcszSource)
236 && (ulLength = strlen(pcszSource))
237 )
238 {
239 if (pszReturn = (PSZ)malloc(ulLength + 1))
240 memcpy(pszReturn, pcszSource, ulLength + 1);
241 }
242
243 if (pulLength)
244 *pulLength = ulLength;
245
246 return (pszReturn);
247}
248
249/*
250 *@@ strhcmp:
251 * better strcmp. This doesn't crash if any of the
252 * string pointers are NULL, but returns a proper
253 * value then.
254 *
255 * Besides, this is guaranteed to only return -1, 0,
256 * or +1, while strcmp can return any positive or
257 * negative value. This is useful for tree comparison
258 * funcs.
259 *
260 *@@added V0.9.9 (2001-02-16) [umoeller]
261 */
262
263int strhcmp(PCSZ p1, PCSZ p2)
264{
265 if (p1 && p2)
266 {
267 int i = strcmp(p1, p2);
268 if (i < 0) return (-1);
269 if (i > 0) return (+1);
270 }
271 else if (p1)
272 // but p2 is NULL: p1 greater than p2 then
273 return (+1);
274 else if (p2)
275 // but p1 is NULL: p1 less than p2 then
276 return (-1);
277
278 // return 0 if strcmp returned 0 above or both strings are NULL
279 return (0);
280}
281
282/*
283 *@@ strhicmp:
284 * like strhcmp, but compares without respect
285 * to case.
286 *
287 *@@added V0.9.9 (2001-04-07) [umoeller]
288 */
289
290int strhicmp(PCSZ p1, PCSZ p2)
291{
292 if (p1 && p2)
293 {
294 int i = stricmp(p1, p2);
295 if (i < 0) return (-1);
296 if (i > 0) return (+1);
297 }
298 else if (p1)
299 // but p2 is NULL: p1 greater than p2 then
300 return (+1);
301 else if (p2)
302 // but p1 is NULL: p1 less than p2 then
303 return (-1);
304
305 // return 0 if strcmp returned 0 above or both strings are NULL
306 return (0);
307}
308
309/*
310 *@@ strhistr:
311 * like strstr, but case-insensitive.
312 *
313 *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle
314 */
315
316PSZ strhistr(PCSZ string1, PCSZ string2)
317{
318 PSZ prc = NULL;
319
320 if ((string1) && (string2))
321 {
322 PSZ pszSrchIn = strdup(string1);
323 PSZ pszSrchFor = strdup(string2);
324
325 if ((pszSrchIn) && (pszSrchFor))
326 {
327 strupr(pszSrchIn);
328 strupr(pszSrchFor);
329
330 if (prc = strstr(pszSrchIn, pszSrchFor))
331 {
332 // prc now has the first occurence of the string,
333 // but in pszSrchIn; we need to map this
334 // return value to the original string
335 prc = (prc-pszSrchIn) // offset in pszSrchIn
336 + (PSZ)string1;
337 }
338 }
339 if (pszSrchFor)
340 free(pszSrchFor);
341 if (pszSrchIn)
342 free(pszSrchIn);
343 }
344 return (prc);
345}
346
347/*
348 *@@ strhncpy0:
349 * like strncpy, but always appends a 0 character.
350 *
351 *@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource
352 */
353
354ULONG strhncpy0(PSZ pszTarget,
355 PCSZ pszSource,
356 ULONG cbSource)
357{
358 ULONG ul = 0;
359 PSZ pTarget = pszTarget,
360 pSource;
361
362 if (pSource = (PSZ)pszSource) // V0.9.16 (2002-01-09) [umoeller]
363 {
364 for (ul = 0; ul < cbSource; ul++)
365 if (*pSource)
366 *pTarget++ = *pSource++;
367 else
368 break;
369 }
370
371 *pTarget = 0;
372
373 return (ul);
374}
375
376/*
377 *@@ strhlen:
378 * like strlen, but doesn't crash on
379 * null strings, but returns 0 also.
380 *
381 *@@added V0.9.19 (2002-04-02) [umoeller]
382 */
383
384ULONG strhlen(PCSZ pcsz)
385{
386 if (pcsz)
387 return (strlen(pcsz));
388
389 return 0;
390}
391
392/*
393 *@@ strhSize:
394 * returns the size of the given string, which
395 * is the memory required to allocate a copy,
396 * including the null terminator.
397 *
398 * Returns 0 only if pcsz is NULL. If pcsz
399 * points to a null character, this returns 1.
400 *
401 *@@added V0.9.18 (2002-02-13) [umoeller]
402 *@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte
403 */
404
405ULONG strhSize(PCSZ pcsz)
406{
407 if (pcsz) // && *pcsz) // V0.9.18 (2002-03-27) [umoeller]
408 return (strlen(pcsz) + 1);
409
410 return (0);
411}
412
413/*
414 * strhCount:
415 * this counts the occurences of c in pszSearch.
416 */
417
418ULONG strhCount(PCSZ pszSearch,
419 CHAR c)
420{
421 PSZ p = (PSZ)pszSearch;
422 ULONG ulCount = 0;
423 while (TRUE)
424 {
425 p = strchr(p, c);
426 if (p)
427 {
428 ulCount++;
429 p++;
430 }
431 else
432 break;
433 }
434 return (ulCount);
435}
436
437/*
438 *@@ strhIsDecimal:
439 * returns TRUE if psz consists of decimal digits only.
440 */
441
442BOOL strhIsDecimal(PSZ psz)
443{
444 PSZ p = psz;
445 while (*p != 0)
446 {
447 if (isdigit(*p) == 0)
448 return (FALSE);
449 p++;
450 }
451
452 return (TRUE);
453}
454
455#ifdef __DEBUG_MALLOC_ENABLED__
456
457/*
458 *@@ strhSubstrDebug:
459 * memory debug version of strhSubstr.
460 *
461 *@@added V0.9.14 (2001-08-01) [umoeller]
462 */
463
464PSZ strhSubstrDebug(PCSZ pBegin, // in: first char
465 PCSZ pEnd, // in: last char (not included)
466 PCSZ pcszSourceFile,
467 unsigned long ulLine,
468 PCSZ pcszFunction)
469{
470 PSZ pszSubstr = NULL;
471
472 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
473 {
474 ULONG cbSubstr = (pEnd - pBegin);
475 if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1,
476 pcszSourceFile,
477 ulLine,
478 pcszFunction))
479 {
480 // strhncpy0(pszSubstr, pBegin, cbSubstr);
481 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
482 *(pszSubstr + cbSubstr) = '\0';
483 }
484 }
485
486 return (pszSubstr);
487}
488
489#endif // __DEBUG_MALLOC_ENABLED__
490
491/*
492 *@@ strhSubstr:
493 * this creates a new PSZ containing the string
494 * from pBegin to pEnd, excluding the pEnd character.
495 * The new string is null-terminated. The caller
496 * must free() the new string after use.
497 *
498 * Example:
499 + "1234567890"
500 + ^ ^
501 + p1 p2
502 + strhSubstr(p1, p2)
503 * would return a new string containing "2345678".
504 *
505 *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers
506 *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed
507 */
508
509PSZ strhSubstr(PCSZ pBegin, // in: first char
510 PCSZ pEnd) // in: last char (not included)
511{
512 PSZ pszSubstr = NULL;
513
514 if (pEnd > pBegin) // V0.9.9 (2001-04-04) [umoeller]
515 {
516 ULONG cbSubstr = (pEnd - pBegin);
517 if (pszSubstr = (PSZ)malloc(cbSubstr + 1))
518 {
519 memcpy(pszSubstr, pBegin, cbSubstr); // V0.9.9 (2001-04-04) [umoeller]
520 *(pszSubstr + cbSubstr) = '\0';
521 }
522 }
523
524 return (pszSubstr);
525}
526
527/*
528 *@@ strhExtract:
529 * searches pszBuf for the cOpen character and returns
530 * the data in between cOpen and cClose, excluding
531 * those two characters, in a newly allocated buffer
532 * which you must free() afterwards.
533 *
534 * Spaces and newlines/linefeeds are skipped.
535 *
536 * If the search was successful, the new buffer
537 * is returned and, if (ppEnd != NULL), *ppEnd points
538 * to the first character after the cClose character
539 * found in the buffer.
540 *
541 * If the search was not successful, NULL is
542 * returned, and *ppEnd is unchanged.
543 *
544 * If another cOpen character is found before
545 * cClose, matching cClose characters will be skipped.
546 * You can therefore nest the cOpen and cClose
547 * characters.
548 *
549 * This function ignores cOpen and cClose characters
550 * in C-style comments and strings surrounded by
551 * double quotes.
552 *
553 * Example:
554 *
555 + PSZ pszBuf = "KEYWORD { --blah-- } next",
556 + pEnd;
557 + strhExtract(pszBuf,
558 + '{', '}',
559 + &pEnd)
560 *
561 * would return a new buffer containing " --blah-- ",
562 * and ppEnd would afterwards point to the space
563 * before "next" in the static buffer.
564 *
565 *@@added V0.9.0 [umoeller]
566 */
567
568PSZ strhExtract(PCSZ pszBuf, // in: search buffer
569 CHAR cOpen, // in: opening char
570 CHAR cClose, // in: closing char
571 PCSZ *ppEnd) // out: if != NULL, receives first character after closing char
572{
573 PSZ pszReturn = NULL;
574 PCSZ pOpen;
575 if ( (pszBuf)
576 && (pOpen = strchr(pszBuf, cOpen))
577 )
578 {
579 // opening char found:
580 // now go thru the whole rest of the buffer
581 PCSZ p = pOpen + 1;
582 LONG lLevel = 1; // if this goes 0, we're done
583 while (*p)
584 {
585 if (*p == cOpen)
586 lLevel++;
587 else if (*p == cClose)
588 {
589 lLevel--;
590 if (lLevel <= 0)
591 {
592 // matching closing bracket found:
593 // extract string
594 pszReturn = strhSubstr(pOpen + 1, // after cOpen
595 p); // excluding cClose
596 if (ppEnd)
597 *ppEnd = p + 1;
598 break; // while (*p)
599 }
600 }
601 else if (*p == '\"')
602 {
603 // beginning of string:
604 PCSZ p2 = p+1;
605 // find end of string
606 while ((*p2) && (*p2 != '\"'))
607 p2++;
608
609 if (*p2 == '\"')
610 // closing quote found:
611 // search on after that
612 p = p2; // raised below
613 else
614 break; // while (*p)
615 }
616
617 p++;
618 }
619 }
620
621 return (pszReturn);
622}
623
624/*
625 *@@ strhQuote:
626 * similar to strhExtract, except that
627 * opening and closing chars are the same,
628 * and therefore no nesting is possible.
629 * Useful for extracting stuff between
630 * quotes.
631 *
632 *@@added V0.9.0 [umoeller]
633 */
634
635PSZ strhQuote(PSZ pszBuf,
636 CHAR cQuote,
637 PSZ *ppEnd)
638{
639 PSZ pszReturn = NULL,
640 p1 = NULL;
641 if ((p1 = strchr(pszBuf, cQuote)))
642 {
643 PSZ p2;
644 if (p2 = strchr(p1+1, cQuote))
645 {
646 pszReturn = strhSubstr(p1+1, p2);
647 if (ppEnd)
648 // store closing char
649 *ppEnd = p2 + 1;
650 }
651 }
652
653 return (pszReturn);
654}
655
656/*
657 *@@ strhStrip:
658 * removes all double spaces.
659 * This copies within the "psz" buffer.
660 * If any double spaces are found, the
661 * string will be shorter than before,
662 * but the buffer is _not_ reallocated,
663 * so there will be unused bytes at the
664 * end.
665 *
666 * Returns the number of spaces removed.
667 *
668 *@@added V0.9.0 [umoeller]
669 */
670
671ULONG strhStrip(PSZ psz) // in/out: string
672{
673 PSZ p;
674 ULONG cb = strlen(psz),
675 ulrc = 0;
676
677 for (p = psz; p < psz+cb; p++)
678 {
679 if ((*p == ' ') && (*(p+1) == ' '))
680 {
681 PSZ p2 = p;
682 while (*p2)
683 {
684 *p2 = *(p2+1);
685 p2++;
686 }
687 cb--;
688 p--;
689 ulrc++;
690 }
691 }
692 return (ulrc);
693}
694
695/*
696 *@@ strhins:
697 * this inserts one string into another.
698 *
699 * pszInsert is inserted into pszBuffer at offset
700 * ulInsertOfs (which counts from 0).
701 *
702 * A newly allocated string is returned. pszBuffer is
703 * not changed. The new string should be free()'d after
704 * use.
705 *
706 * Upon errors, NULL is returned.
707 *
708 *@@changed V0.9.0 [umoeller]: completely rewritten.
709 */
710
711PSZ strhins(PCSZ pcszBuffer,
712 ULONG ulInsertOfs,
713 PCSZ pcszInsert)
714{
715 PSZ pszNew = NULL;
716
717 if ((pcszBuffer) && (pcszInsert))
718 {
719 do {
720 ULONG cbBuffer = strlen(pcszBuffer);
721 ULONG cbInsert = strlen(pcszInsert);
722
723 // check string length
724 if (ulInsertOfs > cbBuffer + 1)
725 break; // do
726
727 // OK, let's go.
728 pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1); // additional null terminator
729
730 // copy stuff before pInsertPos
731 memcpy(pszNew,
732 pcszBuffer,
733 ulInsertOfs);
734 // copy string to be inserted
735 memcpy(pszNew + ulInsertOfs,
736 pcszInsert,
737 cbInsert);
738 // copy stuff after pInsertPos
739 strcpy(pszNew + ulInsertOfs + cbInsert,
740 pcszBuffer + ulInsertOfs);
741 } while (FALSE);
742 }
743
744 return (pszNew);
745}
746
747/*
748 *@@ strhFindReplace:
749 * wrapper around xstrFindReplace to work with C strings.
750 * Note that *ppszBuf can get reallocated and must
751 * be free()'able.
752 *
753 * Repetitive use of this wrapper is not recommended
754 * because it is considerably slower than xstrFindReplace.
755 *
756 *@@added V0.9.6 (2000-11-01) [umoeller]
757 *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl
758 */
759
760ULONG strhFindReplace(PSZ *ppszBuf, // in/out: string
761 PULONG pulOfs, // in: where to begin search (0 = start);
762 // out: ofs of first char after replacement string
763 PCSZ pcszSearch, // in: search string; cannot be NULL
764 PCSZ pcszReplace) // in: replacement string; cannot be NULL
765{
766 ULONG ulrc = 0;
767 XSTRING xstrBuf,
768 xstrFind,
769 xstrReplace;
770 size_t ShiftTable[256];
771 BOOL fRepeat = FALSE;
772 xstrInitSet(&xstrBuf, *ppszBuf);
773 // reallocated and returned, so we're safe
774 xstrInitSet(&xstrFind, (PSZ)pcszSearch);
775 xstrInitSet(&xstrReplace, (PSZ)pcszReplace);
776 // these two are never freed, so we're safe too
777
778 if ((ulrc = xstrFindReplace(&xstrBuf,
779 pulOfs,
780 &xstrFind,
781 &xstrReplace,
782 ShiftTable,
783 &fRepeat)))
784 // replaced:
785 *ppszBuf = xstrBuf.psz;
786
787 return (ulrc);
788}
789
790/*
791 * strhWords:
792 * returns the no. of words in "psz".
793 * A string is considered a "word" if
794 * it is surrounded by spaces only.
795 *
796 *@@added V0.9.0 [umoeller]
797 */
798
799ULONG strhWords(PSZ psz)
800{
801 PSZ p;
802 ULONG cb = strlen(psz),
803 ulWords = 0;
804 if (cb > 1)
805 {
806 ulWords = 1;
807 for (p = psz; p < psz+cb; p++)
808 if (*p == ' ')
809 ulWords++;
810 }
811 return (ulWords);
812}
813
814/*
815 *@@ strhGetWord:
816 * finds word boundaries.
817 *
818 * *ppszStart is used as the beginning of the
819 * search.
820 *
821 * If a word is found, *ppszStart is set to
822 * the first character of the word which was
823 * found and *ppszEnd receives the address
824 * of the first character _after_ the word,
825 * which is probably a space or a \n or \r char.
826 * We then return TRUE.
827 *
828 * The search is stopped if a null character
829 * is found or pLimit is reached. In that case,
830 * FALSE is returned.
831 *
832 *@@added V0.9.1 (2000-02-13) [umoeller]
833 */
834
835BOOL strhGetWord(PSZ *ppszStart, // in: start of search,
836 // out: start of word (if TRUE is returned)
837 PCSZ pLimit, // in: ptr to last char after *ppszStart to be
838 // searched; if the word does not end before
839 // or with this char, FALSE is returned
840 PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS
841 PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS
842 PSZ *ppszEnd) // out: first char _after_ word
843 // (if TRUE is returned)
844{
845 // characters after which a word can be started
846 // PCSZ pcszBeginChars = "\x0d\x0a ";
847 // PCSZ pcszEndChars = "\x0d\x0a /-";
848
849 PSZ pStart = *ppszStart;
850
851 // find start of word
852 while ( (pStart < (PSZ)pLimit)
853 && (strchr(pcszBeginChars, *pStart))
854 )
855 // if char is a "before word" char: go for next
856 pStart++;
857
858 if (pStart < (PSZ)pLimit)
859 {
860 // found a valid "word start" character
861 // (which is not in pcszBeginChars):
862
863 // find end of word
864 PSZ pEndOfWord = pStart;
865 while ( (pEndOfWord <= (PSZ)pLimit)
866 && (strchr(pcszEndChars, *pEndOfWord) == 0)
867 )
868 // if char is not an "end word" char: go for next
869 pEndOfWord++;
870
871 if (pEndOfWord <= (PSZ)pLimit)
872 {
873 // whoa, got a word:
874 *ppszStart = pStart;
875 *ppszEnd = pEndOfWord;
876 return (TRUE);
877 }
878 }
879
880 return (FALSE);
881}
882
883/*
884 *@@ strhIsWord:
885 * returns TRUE if p points to a "word"
886 * in pcszBuf.
887 *
888 * p is considered a word if the character _before_
889 * it is in pcszBeginChars and the char _after_
890 * it (i.e. *(p+cbSearch)) is in pcszEndChars.
891 *
892 *@@added V0.9.6 (2000-11-12) [umoeller]
893 *@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check
894 */
895
896BOOL strhIsWord(PCSZ pcszBuf,
897 PCSZ p, // in: start of word
898 ULONG cbSearch, // in: length of word
899 PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
900 PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
901{
902 // check previous char
903 if ( (p == pcszBuf)
904 || (strchr(pcszBeginChars, *(p-1)))
905 )
906 {
907 // OK, valid begin char:
908 // check end char
909 CHAR cNextChar;
910 if (!(cNextChar = p[cbSearch]))
911 // null terminator:
912 return TRUE;
913 else
914 {
915 // not null terminator: check if char is
916 // in the list of valid end chars
917 if (strchr(pcszEndChars, cNextChar))
918 {
919 // OK, is end char: avoid doubles of that char,
920 // but allow spaces
921 // fixed V0.9.18 (2002-02-23) [umoeller]
922 CHAR cNextNext = p[cbSearch + 1];
923 if ( (cNextNext != cNextChar)
924 || (cNextNext == ' ')
925 || (cNextNext == 0)
926 )
927 return TRUE;
928 }
929 }
930 }
931
932 return FALSE;
933}
934
935/*
936 *@@ strhFindWord:
937 * searches for pszSearch in pszBuf, which is
938 * returned if found (or NULL if not).
939 *
940 * As opposed to strstr, this finds pszSearch
941 * only if it is a "word". A search string is
942 * considered a word if the character _before_
943 * it is in pcszBeginChars and the char _after_
944 * it is in pcszEndChars.
945 *
946 * Example:
947 + strhFindWord("This is an example.", "is");
948 + returns ...........^ this, but not the "is" in "This".
949 *
950 * The algorithm here uses strstr to find pszSearch in pszBuf
951 * and performs additional "is-word" checks for each item found
952 * (by calling strhIsWord).
953 *
954 * Note that this function is fairly slow compared to xstrFindWord.
955 *
956 *@@added V0.9.0 (99-11-08) [umoeller]
957 *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original...
958 */
959
960PSZ strhFindWord(PCSZ pszBuf,
961 PCSZ pszSearch,
962 PCSZ pcszBeginChars, // suggestion: "\x0d\x0a ()/\\-,."
963 PCSZ pcszEndChars) // suggestion: "\x0d\x0a ()/\\-,.:;"
964{
965 PSZ pszReturn = 0;
966 ULONG cbBuf = strlen(pszBuf),
967 cbSearch = strlen(pszSearch);
968
969 if ((cbBuf) && (cbSearch))
970 {
971 PCSZ p = pszBuf;
972
973 do // while p
974 {
975 p = strstr(p, pszSearch);
976 if (p)
977 {
978 // string found:
979 // check if that's a word
980
981 if (strhIsWord(pszBuf,
982 p,
983 cbSearch,
984 pcszBeginChars,
985 pcszEndChars))
986 {
987 // valid end char:
988 pszReturn = (PSZ)p;
989 break;
990 }
991
992 p += cbSearch;
993 }
994 } while (p);
995
996 }
997 return (pszReturn);
998}
999
1000/*
1001 *@@ strhFindEOL:
1002 * returns a pointer to the next \r, \n or null character
1003 * following pszSearchIn. Stores the offset in *pulOffset.
1004 *
1005 * This should never return NULL because at some point,
1006 * there will be a null byte in your string.
1007 *
1008 *@@added V0.9.4 (2000-07-01) [umoeller]
1009 */
1010
1011PSZ strhFindEOL(PCSZ pcszSearchIn, // in: where to search
1012 PULONG pulOffset) // out: offset (ptr can be NULL)
1013{
1014 PCSZ p = pcszSearchIn,
1015 prc = 0;
1016 while (TRUE)
1017 {
1018 if ( (*p == '\r') || (*p == '\n') || (*p == 0) )
1019 {
1020 prc = p;
1021 break;
1022 }
1023 p++;
1024 }
1025
1026 if ((pulOffset) && (prc))
1027 *pulOffset = prc - pcszSearchIn;
1028
1029 return ((PSZ)prc);
1030}
1031
1032/*
1033 *@@ strhFindNextLine:
1034 * like strhFindEOL, but this returns the character
1035 * _after_ \r or \n. Note that this might return
1036 * a pointer to terminating NULL character also.
1037 */
1038
1039PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset)
1040{
1041 PSZ pEOL = strhFindEOL(pszSearchIn, NULL);
1042 // pEOL now points to the \r char or the terminating 0 byte;
1043 // if not null byte, advance pointer
1044 PSZ pNextLine = pEOL;
1045 if (*pNextLine == '\r')
1046 pNextLine++;
1047 if (*pNextLine == '\n')
1048 pNextLine++;
1049 if (pulOffset)
1050 *pulOffset = pNextLine - pszSearchIn;
1051 return (pNextLine);
1052}
1053
1054/*
1055 *@@ strhBeautifyTitle:
1056 * replaces all line breaks (0xd, 0xa) with spaces.
1057 *
1058 *@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space
1059 */
1060
1061BOOL strhBeautifyTitle(PSZ psz)
1062{
1063 BOOL rc = FALSE;
1064 CHAR *p = psz;
1065
1066 while(*p)
1067 if ( (*p == '\r')
1068 || (*p == '\n')
1069 )
1070 {
1071 rc = TRUE;
1072 if ( (p != psz)
1073 && (p[-1] == ' ')
1074 )
1075 memmove(p, p + 1, strlen(p));
1076 else
1077 *p++ = ' ';
1078 }
1079 else
1080 p++;
1081
1082 return (rc);
1083}
1084
1085/*
1086 * strhFindAttribValue:
1087 * searches for pszAttrib in pszSearchIn; if found,
1088 * returns the first character after the "=" char.
1089 * If "=" is not found, a space, \r, and \n are
1090 * also accepted. This function searches without
1091 * respecting case.
1092 *
1093 * <B>Example:</B>
1094 + strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH")
1095 +
1096 + returns ....................... ^ this address.
1097 *
1098 *@@added V0.9.0 [umoeller]
1099 *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations
1100 *@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario
1101 */
1102
1103PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1104{
1105 PSZ prc = 0;
1106 PSZ pszSearchIn2, p;
1107 ULONG cbAttrib = strlen(pszAttrib),
1108 ulLength = strlen(pszSearchIn);
1109
1110 // use alloca(), so memory is freed on function exit
1111 pszSearchIn2 = (PSZ)alloca(ulLength + 1);
1112 memcpy(pszSearchIn2, pszSearchIn, ulLength + 1);
1113
1114 // 1) find token, (space char, \n, \r, \t)
1115 p = strtok(pszSearchIn2, " \n\r\t");
1116 while (p)
1117 {
1118 CHAR c2;
1119 PSZ pOrig;
1120
1121 // check tag name
1122 if (!strnicmp(p, pszAttrib, cbAttrib))
1123 {
1124 // position in original string
1125 pOrig = (PSZ)pszSearchIn + (p - pszSearchIn2);
1126
1127 // yes:
1128 prc = pOrig + cbAttrib;
1129 c2 = *prc;
1130 while ( ( (c2 == ' ')
1131 || (c2 == '=')
1132 || (c2 == '\n')
1133 || (c2 == '\r')
1134 )
1135 && (c2 != 0)
1136 )
1137 c2 = *++prc;
1138
1139 break;
1140 }
1141
1142 p = strtok(NULL, " \n\r\t");
1143 }
1144
1145 return (prc);
1146}
1147
1148/* PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib)
1149{
1150 PSZ prc = 0;
1151 PSZ pszSearchIn2 = (PSZ)pszSearchIn,
1152 p,
1153 p2;
1154 ULONG cbAttrib = strlen(pszAttrib);
1155
1156 // 1) find space char
1157 while ((p = strchr(pszSearchIn2, ' ')))
1158 {
1159 CHAR c;
1160 p++;
1161 if (strlen(p) >= cbAttrib) // V0.9.9 (2001-03-27) [umoeller]
1162 {
1163 c = *(p+cbAttrib); // V0.9.3 (2000-05-19) [umoeller]
1164 // now check whether the p+strlen(pszAttrib)
1165 // is a valid end-of-tag character
1166 if ( (memicmp(p, (PVOID)pszAttrib, cbAttrib) == 0)
1167 && ( (c == ' ')
1168 || (c == '>')
1169 || (c == '=')
1170 || (c == '\r')
1171 || (c == '\n')
1172 || (c == 0)
1173 )
1174 )
1175 {
1176 // yes:
1177 CHAR c2;
1178 p2 = p + cbAttrib;
1179 c2 = *p2;
1180 while ( ( (c2 == ' ')
1181 || (c2 == '=')
1182 || (c2 == '\n')
1183 || (c2 == '\r')
1184 )
1185 && (c2 != 0)
1186 )
1187 c2 = *++p2;
1188
1189 prc = p2;
1190 break; // first while
1191 }
1192 }
1193 else
1194 break;
1195
1196 pszSearchIn2++;
1197 }
1198 return (prc);
1199} */
1200
1201/*
1202 * strhGetNumAttribValue:
1203 * stores the numerical parameter value of an HTML-style
1204 * tag in *pl.
1205 *
1206 * Returns the address of the tag parameter in the
1207 * search buffer, if found, or NULL.
1208 *
1209 * <B>Example:</B>
1210 + strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l);
1211 *
1212 * stores 123 in the "l" variable.
1213 *
1214 *@@added V0.9.0 [umoeller]
1215 *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed
1216 */
1217
1218PSZ strhGetNumAttribValue(const char *pszSearchIn, // in: where to search
1219 const char *pszTag, // e.g. "INDEX"
1220 PLONG pl) // out: numerical value
1221{
1222 PSZ pParam;
1223 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1224 {
1225 if ( (*pParam == '\"')
1226 || (*pParam == '\'')
1227 )
1228 pParam++; // V0.9.9 (2001-04-04) [umoeller]
1229
1230 sscanf(pParam, "%ld", pl);
1231 }
1232
1233 return (pParam);
1234}
1235
1236/*
1237 * strhGetTextAttr:
1238 * retrieves the attribute value of a textual HTML-style tag
1239 * in a newly allocated buffer, which is returned,
1240 * or NULL if attribute not found.
1241 * If an attribute value is to contain spaces, it
1242 * must be enclosed in quotes.
1243 *
1244 * The offset of the attribute data in pszSearchIn is
1245 * returned in *pulOffset so that you can do multiple
1246 * searches.
1247 *
1248 * This returns a new buffer, which should be free()'d after use.
1249 *
1250 * <B>Example:</B>
1251 + ULONG ulOfs = 0;
1252 + strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs)
1253 + ............^ ulOfs
1254 *
1255 * returns a new string with the value "blublub" (without
1256 * quotes) and sets ulOfs to 12.
1257 *
1258 *@@added V0.9.0 [umoeller]
1259 */
1260
1261PSZ strhGetTextAttr(const char *pszSearchIn,
1262 const char *pszTag,
1263 PULONG pulOffset) // out: offset where found
1264{
1265 PSZ pParam,
1266 pParam2,
1267 prc = NULL;
1268 ULONG ulCount = 0;
1269 LONG lNestingLevel = 0;
1270
1271 if ((pParam = strhFindAttribValue(pszSearchIn, pszTag)))
1272 {
1273 // determine end character to search for: a space
1274 CHAR cEnd = ' ';
1275 if (*pParam == '\"')
1276 {
1277 // or, if the data is enclosed in quotes, a quote
1278 cEnd = '\"';
1279 pParam++;
1280 }
1281
1282 if (pulOffset)
1283 // store the offset
1284 (*pulOffset) = pParam - (PSZ)pszSearchIn;
1285
1286 // now find end of attribute
1287 pParam2 = pParam;
1288 while (*pParam)
1289 {
1290 if (*pParam == cEnd)
1291 // end character found
1292 break;
1293 else if (*pParam == '<')
1294 // yet another opening tag found:
1295 // this is probably some "<" in the attributes
1296 lNestingLevel++;
1297 else if (*pParam == '>')
1298 {
1299 lNestingLevel--;
1300 if (lNestingLevel < 0)
1301 // end of tag found:
1302 break;
1303 }
1304 ulCount++;
1305 pParam++;
1306 }
1307
1308 // copy attribute to new buffer
1309 if (ulCount)
1310 {
1311 prc = (PSZ)malloc(ulCount+1);
1312 memcpy(prc, pParam2, ulCount);
1313 *(prc+ulCount) = 0;
1314 }
1315 }
1316 return (prc);
1317}
1318
1319/*
1320 * strhFindEndOfTag:
1321 * returns a pointer to the ">" char
1322 * which seems to terminate the tag beginning
1323 * after pszBeginOfTag.
1324 *
1325 * If additional "<" chars are found, we look
1326 * for additional ">" characters too.
1327 *
1328 * Note: You must pass the address of the opening
1329 * '<' character to this function.
1330 *
1331 * Example:
1332 + PSZ pszTest = "<BODY ATTR=\"<BODY>\">";
1333 + strhFindEndOfTag(pszTest)
1334 + returns.................................^ this.
1335 *
1336 *@@added V0.9.0 [umoeller]
1337 */
1338
1339PSZ strhFindEndOfTag(const char *pszBeginOfTag)
1340{
1341 PSZ p = (PSZ)pszBeginOfTag,
1342 prc = NULL;
1343 LONG lNestingLevel = 0;
1344
1345 while (*p)
1346 {
1347 if (*p == '<')
1348 // another opening tag found:
1349 lNestingLevel++;
1350 else if (*p == '>')
1351 {
1352 // closing tag found:
1353 lNestingLevel--;
1354 if (lNestingLevel < 1)
1355 {
1356 // corresponding: return this
1357 prc = p;
1358 break;
1359 }
1360 }
1361 p++;
1362 }
1363
1364 return (prc);
1365}
1366
1367/*
1368 * strhGetBlock:
1369 * this complex function searches the given string
1370 * for a pair of opening/closing HTML-style tags.
1371 *
1372 * If found, this routine returns TRUE and does
1373 * the following:
1374 *
1375 * 1) allocate a new buffer, copy the text
1376 * enclosed by the opening/closing tags
1377 * into it and set *ppszBlock to that
1378 * buffer;
1379 *
1380 * 2) if the opening tag has any attributes,
1381 * allocate another buffer, copy the
1382 * attributes into it and set *ppszAttrs
1383 * to that buffer; if no attributes are
1384 * found, *ppszAttrs will be NULL;
1385 *
1386 * 3) set *pulOffset to the offset from the
1387 * beginning of *ppszSearchIn where the
1388 * opening tag was found;
1389 *
1390 * 4) advance *ppszSearchIn to after the
1391 * closing tag, so that you can do
1392 * multiple searches without finding the
1393 * same tags twice.
1394 *
1395 * All buffers should be freed using free().
1396 *
1397 * This returns the following:
1398 * -- 0: no error
1399 * -- 1: tag not found at all (doesn't have to be an error)
1400 * -- 2: begin tag found, but no corresponding end tag found. This
1401 * is a real error.
1402 * -- 3: begin tag is not terminated by "&gt;" (e.g. "&lt;BEGINTAG whatever")
1403 *
1404 * <B>Example:</B>
1405 + PSZ pSearch = "&lt;PAGE INDEX=1&gt;This is page 1.&lt;/PAGE&gt;More text."
1406 + PSZ pszBlock, pszAttrs;
1407 + ULONG ulOfs;
1408 + strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs)
1409 *
1410 * would do the following:
1411 *
1412 * 1) set pszBlock to a new string containing "This is page 1."
1413 * without quotes;
1414 *
1415 * 2) set pszAttrs to a new string containing "&lt;PAGE INDEX=1&gt;";
1416 *
1417 * 3) set ulOfs to 0, because "&lt;PAGE" was found at the beginning;
1418 *
1419 * 4) pSearch would be advanced to point to the "More text"
1420 * string in the original buffer.
1421 *
1422 * Hey-hey. A one-shot function, fairly complicated, but indispensable
1423 * for HTML parsing.
1424 *
1425 *@@added V0.9.0 [umoeller]
1426 *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging)
1427 *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype
1428 *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing
1429 */
1430
1431ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search
1432 PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning)
1433 const char *pszTag,
1434 PSZ *ppszBlock, // out: block enclosed by the tags
1435 PSZ *ppszAttribs, // out: attributes of the opening tag
1436 PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found
1437 PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found
1438{
1439 ULONG ulrc = 1;
1440 PSZ pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset,
1441 pszSearch2 = pszBeginTag,
1442 pszClosingTag;
1443 ULONG cbTag = strlen(pszTag);
1444
1445 // go thru the block and check all tags if it's the
1446 // begin tag we're looking for
1447 while ((pszBeginTag = strchr(pszBeginTag, '<')))
1448 {
1449 if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0)
1450 // yes: stop
1451 break;
1452 else
1453 pszBeginTag++;
1454 }
1455
1456 if (pszBeginTag)
1457 {
1458 // we found <TAG>:
1459 ULONG ulNestingLevel = 0;
1460
1461 PSZ pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag);
1462 // strchr(pszBeginTag, '>');
1463 if (pszEndOfBeginTag)
1464 {
1465 // does the caller want the attributes?
1466 if (ppszAttribs)
1467 {
1468 // yes: then copy them
1469 ULONG ulAttrLen = pszEndOfBeginTag - pszBeginTag;
1470 PSZ pszAttrs = (PSZ)malloc(ulAttrLen + 1);
1471 strncpy(pszAttrs, pszBeginTag, ulAttrLen);
1472 // add terminating 0
1473 *(pszAttrs + ulAttrLen) = 0;
1474
1475 *ppszAttribs = pszAttrs;
1476 }
1477
1478 // output offset of where we found the begin tag
1479 if (pulOfsBeginTag)
1480 *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn;
1481
1482 // now find corresponding closing tag (e.g. "</BODY>"
1483 pszBeginTag = pszEndOfBeginTag+1;
1484 // now we're behind the '>' char of the opening tag
1485 // increase offset of that too
1486 if (pulOfsBeginBlock)
1487 *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn;
1488
1489 // find next closing tag;
1490 // for the first run, pszSearch2 points to right
1491 // after the '>' char of the opening tag
1492 pszSearch2 = pszBeginTag;
1493 while ( (pszSearch2) // fixed V0.9.3 (2000-05-06) [umoeller]
1494 && (pszClosingTag = strstr(pszSearch2, "<"))
1495 )
1496 {
1497 // if we have another opening tag before our closing
1498 // tag, we need to have several closing tags before
1499 // we're done
1500 if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0)
1501 ulNestingLevel++;
1502 else
1503 {
1504 // is this ours?
1505 if ( (*(pszClosingTag+1) == '/')
1506 && (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0)
1507 )
1508 {
1509 // we've found a matching closing tag; is
1510 // it ours?
1511 if (ulNestingLevel == 0)
1512 {
1513 // our closing tag found:
1514 // allocate mem for a new buffer
1515 // and extract all the text between
1516 // open and closing tags to it
1517 ULONG ulLen = pszClosingTag - pszBeginTag;
1518 if (ppszBlock)
1519 {
1520 PSZ pNew = (PSZ)malloc(ulLen + 1);
1521 strhncpy0(pNew, pszBeginTag, ulLen);
1522 *ppszBlock = pNew;
1523 }
1524
1525 // raise search offset to after the closing tag
1526 *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn;
1527
1528 ulrc = 0;
1529
1530 break;
1531 } else
1532 // not our closing tag:
1533 ulNestingLevel--;
1534 }
1535 }
1536 // no matching closing tag: search on after that
1537 pszSearch2 = strhFindEndOfTag(pszClosingTag);
1538 } // end while (pszClosingTag = strstr(pszSearch2, "<"))
1539
1540 if (!pszClosingTag)
1541 // no matching closing tag found:
1542 // return 2 (closing tag not found)
1543 ulrc = 2;
1544 } // end if (pszBeginTag)
1545 else
1546 // no matching ">" for opening tag found:
1547 ulrc = 3;
1548 }
1549
1550 return (ulrc);
1551}
1552
1553/* ******************************************************************
1554 *
1555 * Miscellaneous
1556 *
1557 ********************************************************************/
1558
1559/*
1560 *@@ strhArrayAppend:
1561 * this appends a string to a "string array".
1562 *
1563 * A string array is considered a sequence of
1564 * zero-terminated strings in memory. That is,
1565 * after each string's null-byte, the next
1566 * string comes up.
1567 *
1568 * This is useful for composing a single block
1569 * of memory from, say, list box entries, which
1570 * can then be written to OS2.INI in one flush.
1571 *
1572 * To append strings to such an array, call this
1573 * function for each string you wish to append.
1574 * This will re-allocate *ppszRoot with each call,
1575 * and update *pcbRoot, which then contains the
1576 * total size of all strings (including all null
1577 * terminators).
1578 *
1579 * Pass *pcbRoot to PrfSaveProfileData to have the
1580 * block saved.
1581 *
1582 * Note: On the first call, *ppszRoot and *pcbRoot
1583 * _must_ be both NULL, or this crashes.
1584 *
1585 *@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew
1586 */
1587
1588VOID strhArrayAppend(PSZ *ppszRoot, // in: root of array
1589 const char *pcszNew, // in: string to append
1590 ULONG cbNew, // in: size of that string or 0 to run strlen() here
1591 PULONG pcbRoot) // in/out: size of array
1592{
1593 PSZ pszTemp;
1594
1595 if (!cbNew) // V0.9.13 (2001-06-21) [umoeller]
1596 cbNew = strlen(pcszNew);
1597
1598 pszTemp = (PSZ)malloc(*pcbRoot
1599 + cbNew
1600 + 1); // two null bytes
1601 if (*ppszRoot)
1602 {
1603 // not first loop: copy old stuff
1604 memcpy(pszTemp,
1605 *ppszRoot,
1606 *pcbRoot);
1607 free(*ppszRoot);
1608 }
1609 // append new string
1610 strcpy(pszTemp + *pcbRoot,
1611 pcszNew);
1612 // update root
1613 *ppszRoot = pszTemp;
1614 // update length
1615 *pcbRoot += cbNew + 1;
1616}
1617
1618/*
1619 *@@ strhCreateDump:
1620 * this dumps a memory block into a string
1621 * and returns that string in a new buffer.
1622 *
1623 * You must free() the returned PSZ after use.
1624 *
1625 * The output looks like the following:
1626 *
1627 + 0000: FE FF 0E 02 90 00 00 00 ........
1628 + 0008: FD 01 00 00 57 50 46 6F ....WPFo
1629 + 0010: 6C 64 65 72 00 78 01 34 lder.x.4
1630 *
1631 * Each line is terminated with a newline (\n)
1632 * character only.
1633 *
1634 *@@added V0.9.1 (2000-01-22) [umoeller]
1635 */
1636
1637PSZ strhCreateDump(PBYTE pb, // in: start address of buffer
1638 ULONG ulSize, // in: size of buffer
1639 ULONG ulIndent) // in: indentation of every line
1640{
1641 PSZ pszReturn = 0;
1642 XSTRING strReturn;
1643 CHAR szTemp[1000];
1644
1645 PBYTE pbCurrent = pb; // current byte
1646 ULONG ulCount = 0,
1647 ulCharsInLine = 0; // if this grows > 7, a new line is started
1648 CHAR szLine[400] = "",
1649 szAscii[30] = " "; // ASCII representation; filled for every line
1650 PSZ pszLine = szLine,
1651 pszAscii = szAscii;
1652
1653 xstrInit(&strReturn, (ulSize * 30) + ulIndent);
1654
1655 for (pbCurrent = pb;
1656 ulCount < ulSize;
1657 pbCurrent++, ulCount++)
1658 {
1659 if (ulCharsInLine == 0)
1660 {
1661 memset(szLine, ' ', ulIndent);
1662 pszLine += ulIndent;
1663 }
1664 pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent);
1665
1666 if ( (*pbCurrent > 31) && (*pbCurrent < 127) )
1667 // printable character:
1668 *pszAscii = *pbCurrent;
1669 else
1670 *pszAscii = '.';
1671 pszAscii++;
1672
1673 ulCharsInLine++;
1674 if ( (ulCharsInLine > 7) // 8 bytes added?
1675 || (ulCount == ulSize-1) // end of buffer reached?
1676 )
1677 {
1678 // if we haven't had eight bytes yet,
1679 // fill buffer up to eight bytes with spaces
1680 ULONG ul2;
1681 for (ul2 = ulCharsInLine;
1682 ul2 < 8;
1683 ul2++)
1684 pszLine += sprintf(pszLine, " ");
1685
1686 sprintf(szTemp, "%04lX: %s %s\n",
1687 (ulCount & 0xFFFFFFF8), // offset in hex
1688 szLine, // bytes string
1689 szAscii); // ASCII string
1690 xstrcat(&strReturn, szTemp, 0);
1691
1692 // restart line buffer
1693 pszLine = szLine;
1694
1695 // clear ASCII buffer
1696 strcpy(szAscii, " ");
1697 pszAscii = szAscii;
1698
1699 // reset line counter
1700 ulCharsInLine = 0;
1701 }
1702 }
1703
1704 if (strReturn.cbAllocated)
1705 pszReturn = strReturn.psz;
1706
1707 return (pszReturn);
1708}
1709
1710/* ******************************************************************
1711 *
1712 * Fast string searches
1713 *
1714 ********************************************************************/
1715
1716#define ASSERT(a)
1717
1718/*
1719 * The following code has been taken from the "Standard
1720 * Function Library", file sflfind.c, and only slightly
1721 * modified to conform to the rest of this file.
1722 *
1723 * Written: 96/04/24 iMatix SFL project team <sfl@imatix.com>
1724 * Revised: 98/05/04
1725 *
1726 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
1727 *
1728 * The SFL Licence allows incorporating SFL code into other
1729 * programs, as long as the copyright is reprinted and the
1730 * code is marked as modified, so this is what we do.
1731 */
1732
1733/*
1734 *@@ strhmemfind:
1735 * searches for a pattern in a block of memory using the
1736 * Boyer-Moore-Horspool-Sunday algorithm.
1737 *
1738 * The block and pattern may contain any values; you must
1739 * explicitly provide their lengths. If you search for strings,
1740 * use strlen() on the buffers.
1741 *
1742 * Returns a pointer to the pattern if found within the block,
1743 * or NULL if the pattern was not found.
1744 *
1745 * This algorithm needs a "shift table" to cache data for the
1746 * search pattern. This table can be reused when performing
1747 * several searches with the same pattern.
1748 *
1749 * "shift" must point to an array big enough to hold 256 (8**2)
1750 * "size_t" values.
1751 *
1752 * If (*repeat_find == FALSE), the shift table is initialized.
1753 * So on the first search with a given pattern, *repeat_find
1754 * should be FALSE. This function sets it to TRUE after the
1755 * shift table is initialised, allowing the initialisation
1756 * phase to be skipped on subsequent searches.
1757 *
1758 * This function is most effective when repeated searches are
1759 * made for the same pattern in one or more large buffers.
1760 *
1761 * Example:
1762 *
1763 + PSZ pszHaystack = "This is a sample string.",
1764 + pszNeedle = "string";
1765 + size_t shift[256];
1766 + BOOL fRepeat = FALSE;
1767 +
1768 + PSZ pFound = strhmemfind(pszHaystack,
1769 + strlen(pszHaystack), // block size
1770 + pszNeedle,
1771 + strlen(pszNeedle), // pattern size
1772 + shift,
1773 + &fRepeat);
1774 *
1775 * Taken from the "Standard Function Library", file sflfind.c.
1776 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
1777 * Slightly modified by umoeller.
1778 *
1779 *@@added V0.9.3 (2000-05-08) [umoeller]
1780 */
1781
1782void* strhmemfind(const void *in_block, // in: block containing data
1783 size_t block_size, // in: size of block in bytes
1784 const void *in_pattern, // in: pattern to search for
1785 size_t pattern_size, // in: size of pattern block
1786 size_t *shift, // in/out: shift table (search buffer)
1787 BOOL *repeat_find) // in/out: if TRUE, *shift is already initialized
1788{
1789 size_t byte_nbr, // Distance through block
1790 match_size; // Size of matched part
1791 const unsigned char
1792 *match_base = NULL, // Base of match of pattern
1793 *match_ptr = NULL, // Point within current match
1794 *limit = NULL; // Last potiental match point
1795 const unsigned char
1796 *block = (unsigned char *) in_block, // Concrete pointer to block data
1797 *pattern = (unsigned char *) in_pattern; // Concrete pointer to search value
1798
1799 if ( (block == NULL)
1800 || (pattern == NULL)
1801 || (shift == NULL)
1802 )
1803 return (NULL);
1804
1805 // Pattern must be smaller or equal in size to string
1806 if (block_size < pattern_size)
1807 return (NULL); // Otherwise it's not found
1808
1809 if (pattern_size == 0) // Empty patterns match at start
1810 return ((void *)block);
1811
1812 // Build the shift table unless we're continuing a previous search
1813
1814 // The shift table determines how far to shift before trying to match
1815 // again, if a match at this point fails. If the byte after where the
1816 // end of our pattern falls is not in our pattern, then we start to
1817 // match again after that byte; otherwise we line up the last occurence
1818 // of that byte in our pattern under that byte, and try match again.
1819
1820 if (!repeat_find || !*repeat_find)
1821 {
1822 for (byte_nbr = 0;
1823 byte_nbr < 256;
1824 byte_nbr++)
1825 shift[byte_nbr] = pattern_size + 1;
1826 for (byte_nbr = 0;
1827 byte_nbr < pattern_size;
1828 byte_nbr++)
1829 shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr;
1830
1831 if (repeat_find)
1832 *repeat_find = TRUE;
1833 }
1834
1835 // Search for the block, each time jumping up by the amount
1836 // computed in the shift table
1837
1838 limit = block + (block_size - pattern_size + 1);
1839 ASSERT (limit > block);
1840
1841 for (match_base = block;
1842 match_base < limit;
1843 match_base += shift[*(match_base + pattern_size)])
1844 {
1845 match_ptr = match_base;
1846 match_size = 0;
1847
1848 // Compare pattern until it all matches, or we find a difference
1849 while (*match_ptr++ == pattern[match_size++])
1850 {
1851 ASSERT (match_size <= pattern_size &&
1852 match_ptr == (match_base + match_size));
1853
1854 // If we found a match, return the start address
1855 if (match_size >= pattern_size)
1856 return ((void*)(match_base));
1857
1858 }
1859 }
1860 return (NULL); // Found nothing
1861}
1862
1863/*
1864 *@@ strhtxtfind:
1865 * searches for a case-insensitive text pattern in a string
1866 * using the Boyer-Moore-Horspool-Sunday algorithm. The string and
1867 * pattern are null-terminated strings. Returns a pointer to the pattern
1868 * if found within the string, or NULL if the pattern was not found.
1869 * Will match strings irrespective of case. To match exact strings, use
1870 * strhfind(). Will not work on multibyte characters.
1871 *
1872 * Examples:
1873 + char *result;
1874 +
1875 + result = strhtxtfind ("AbracaDabra", "cad");
1876 + if (result)
1877 + puts (result);
1878 +
1879 * Taken from the "Standard Function Library", file sflfind.c.
1880 * Copyright: Copyright (c) 1991-99 iMatix Corporation.
1881 * Slightly modified.
1882 *
1883 *@@added V0.9.3 (2000-05-08) [umoeller]
1884 */
1885
1886char* strhtxtfind (const char *string, // String containing data
1887 const char *pattern) // Pattern to search for
1888{
1889 size_t
1890 shift [256]; // Shift distance for each value
1891 size_t
1892 string_size,
1893 pattern_size,
1894 byte_nbr, // Index into byte array
1895 match_size; // Size of matched part
1896 const char
1897 *match_base = NULL, // Base of match of pattern
1898 *match_ptr = NULL, // Point within current match
1899 *limit = NULL; // Last potiental match point
1900
1901 ASSERT (string); // Expect non-NULL pointers, but
1902 ASSERT (pattern); // fail gracefully if not debugging
1903 if (string == NULL || pattern == NULL)
1904 return (NULL);
1905
1906 string_size = strlen (string);
1907 pattern_size = strlen (pattern);
1908
1909 // Pattern must be smaller or equal in size to string
1910 if (string_size < pattern_size)
1911 return (NULL); // Otherwise it cannot be found
1912
1913 if (pattern_size == 0) // Empty string matches at start
1914 return (char *) string;
1915
1916 // Build the shift table
1917
1918 // The shift table determines how far to shift before trying to match
1919 // again, if a match at this point fails. If the byte after where the
1920 // end of our pattern falls is not in our pattern, then we start to
1921 // match again after that byte; otherwise we line up the last occurence
1922 // of that byte in our pattern under that byte, and try match again.
1923
1924 for (byte_nbr = 0; byte_nbr < 256; byte_nbr++)
1925 shift [byte_nbr] = pattern_size + 1;
1926
1927 for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++)
1928 shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr;
1929
1930 // Search for the string. If we don't find a match, move up by the
1931 // amount we computed in the shift table above, to find location of
1932 // the next potiental match.
1933
1934 limit = string + (string_size - pattern_size + 1);
1935 ASSERT (limit > string);
1936
1937 for (match_base = string;
1938 match_base < limit;
1939 match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))])
1940 {
1941 match_ptr = match_base;
1942 match_size = 0;
1943
1944 // Compare pattern until it all matches, or we find a difference
1945 while (tolower (*match_ptr++) == tolower (pattern [match_size++]))
1946 {
1947 ASSERT (match_size <= pattern_size &&
1948 match_ptr == (match_base + match_size));
1949
1950 // If we found a match, return the start address
1951 if (match_size >= pattern_size)
1952 return ((char *)(match_base));
1953 }
1954 }
1955 return (NULL); // Found nothing
1956}
1957
Note: See TracBrowser for help on using the repository browser.