| 1 |  | 
|---|
| 2 | /* | 
|---|
| 3 | *@@sourcefile stringh.c: | 
|---|
| 4 | *      contains string/text helper functions. These are good for | 
|---|
| 5 | *      parsing/splitting strings and other stuff used throughout | 
|---|
| 6 | *      XWorkplace. | 
|---|
| 7 | * | 
|---|
| 8 | *      Note that these functions are really a bunch of very mixed | 
|---|
| 9 | *      up string helpers, which you may or may not find helpful. | 
|---|
| 10 | *      If you're looking for string functions with memory | 
|---|
| 11 | *      management, look at xstring.c instead. | 
|---|
| 12 | * | 
|---|
| 13 | *      Usage: All OS/2 programs. | 
|---|
| 14 | * | 
|---|
| 15 | *      Function prefixes (new with V0.81): | 
|---|
| 16 | *      --  strh*       string helper functions. | 
|---|
| 17 | * | 
|---|
| 18 | *      Note: Version numbering in this file relates to XWorkplace version | 
|---|
| 19 | *            numbering. | 
|---|
| 20 | * | 
|---|
| 21 | *@@header "helpers\stringh.h" | 
|---|
| 22 | */ | 
|---|
| 23 |  | 
|---|
| 24 | /* | 
|---|
| 25 | *      Copyright (C) 1997-2006 Ulrich Mller. | 
|---|
| 26 | *      Parts Copyright (C) 1991-1999 iMatix Corporation. | 
|---|
| 27 | *      This file is part of the "XWorkplace helpers" source package. | 
|---|
| 28 | *      This is free software; you can redistribute it and/or modify | 
|---|
| 29 | *      it under the terms of the GNU General Public License as published | 
|---|
| 30 | *      by the Free Software Foundation, in version 2 as it comes in the | 
|---|
| 31 | *      "COPYING" file of the XWorkplace main distribution. | 
|---|
| 32 | *      This program is distributed in the hope that it will be useful, | 
|---|
| 33 | *      but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 34 | *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 35 | *      GNU General Public License for more details. | 
|---|
| 36 | */ | 
|---|
| 37 |  | 
|---|
| 38 | #define OS2EMX_PLAIN_CHAR | 
|---|
| 39 | // this is needed for "os2emx.h"; if this is defined, | 
|---|
| 40 | // emx will define PSZ as _signed_ char, otherwise | 
|---|
| 41 | // as unsigned char | 
|---|
| 42 |  | 
|---|
| 43 | #define INCL_WINSHELLDATA | 
|---|
| 44 | #define INCL_DOSERRORS | 
|---|
| 45 | #include <os2.h> | 
|---|
| 46 |  | 
|---|
| 47 | #include <stdlib.h> | 
|---|
| 48 | #include <stdio.h> | 
|---|
| 49 | #include <string.h> | 
|---|
| 50 | #include <ctype.h> | 
|---|
| 51 | #include <math.h> | 
|---|
| 52 |  | 
|---|
| 53 | #include "setup.h"                      // code generation and debugging options | 
|---|
| 54 |  | 
|---|
| 55 | #define DONT_REPLACE_STRINGH_MALLOC | 
|---|
| 56 | #include "helpers\stringh.h" | 
|---|
| 57 | #include "helpers\xstring.h"            // extended string helpers | 
|---|
| 58 |  | 
|---|
| 59 | #pragma hdrstop | 
|---|
| 60 |  | 
|---|
| 61 | /* | 
|---|
| 62 | *@@category: Helpers\C helpers\String management | 
|---|
| 63 | *      See stringh.c and xstring.c. | 
|---|
| 64 | */ | 
|---|
| 65 |  | 
|---|
| 66 | /* | 
|---|
| 67 | *@@category: Helpers\C helpers\String management\C string helpers | 
|---|
| 68 | *      See stringh.c. | 
|---|
| 69 | */ | 
|---|
| 70 |  | 
|---|
| 71 | #ifdef __DEBUG_MALLOC_ENABLED__ | 
|---|
| 72 |  | 
|---|
| 73 | /* | 
|---|
| 74 | *@@ strhStoreDebug: | 
|---|
| 75 | *      memory debug version of strhStore. | 
|---|
| 76 | * | 
|---|
| 77 | *@@added V0.9.16 (2001-12-08) [umoeller] | 
|---|
| 78 | */ | 
|---|
| 79 |  | 
|---|
| 80 | APIRET (strhStoreDebug)(PSZ *ppszTarget, | 
|---|
| 81 | PCSZ pcszSource, | 
|---|
| 82 | PULONG pulLength,        // out: length of new string (ptr can be NULL) | 
|---|
| 83 | PCSZ pcszSourceFile, | 
|---|
| 84 | unsigned long ulLine, | 
|---|
| 85 | PCSZ pcszFunction) | 
|---|
| 86 | { | 
|---|
| 87 | ULONG ulLength = 0; | 
|---|
| 88 |  | 
|---|
| 89 |  | 
|---|
| 90 |  | 
|---|
| 91 | if (ppszTarget) | 
|---|
| 92 | { | 
|---|
| 93 | if (*ppszTarget) | 
|---|
| 94 | free(*ppszTarget); | 
|---|
| 95 |  | 
|---|
| 96 | if (    (pcszSource) | 
|---|
| 97 | && (ulLength = strlen(pcszSource)) | 
|---|
| 98 | ) | 
|---|
| 99 | { | 
|---|
| 100 | if (*ppszTarget = (PSZ)memdMalloc(ulLength + 1, | 
|---|
| 101 | pcszSourceFile, | 
|---|
| 102 | ulLine, | 
|---|
| 103 | pcszFunction)) | 
|---|
| 104 | memcpy(*ppszTarget, pcszSource, ulLength + 1); | 
|---|
| 105 | else | 
|---|
| 106 | return ERROR_NOT_ENOUGH_MEMORY; | 
|---|
| 107 | } | 
|---|
| 108 | else | 
|---|
| 109 | *ppszTarget = NULL; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | if (pulLength) | 
|---|
| 113 | *pulLength = ulLength; | 
|---|
| 114 |  | 
|---|
| 115 | return NO_ERROR; | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | #endif | 
|---|
| 119 |  | 
|---|
| 120 | /* | 
|---|
| 121 | *@@ strhStore: | 
|---|
| 122 | *      stores a copy of the given string in the specified | 
|---|
| 123 | *      buffer. Uses strdup internally. | 
|---|
| 124 | * | 
|---|
| 125 | *      If *ppszTarget != NULL, the previous string is freed | 
|---|
| 126 | *      and set to NULL. | 
|---|
| 127 | *      If pcszSource != NULL, a copy of it is stored in the | 
|---|
| 128 | *      buffer. | 
|---|
| 129 | * | 
|---|
| 130 | *@@added V0.9.16 (2001-12-06) [umoeller] | 
|---|
| 131 | */ | 
|---|
| 132 |  | 
|---|
| 133 | APIRET strhStore(PSZ *ppszTarget, | 
|---|
| 134 | PCSZ pcszSource, | 
|---|
| 135 | PULONG pulLength)        // out: length of new string (ptr can be NULL) | 
|---|
| 136 | { | 
|---|
| 137 | ULONG ulLength = 0; | 
|---|
| 138 |  | 
|---|
| 139 | if (ppszTarget) | 
|---|
| 140 | { | 
|---|
| 141 | if (*ppszTarget) | 
|---|
| 142 | free(*ppszTarget); | 
|---|
| 143 |  | 
|---|
| 144 | if (    (pcszSource) | 
|---|
| 145 | && (ulLength = strlen(pcszSource)) | 
|---|
| 146 | ) | 
|---|
| 147 | { | 
|---|
| 148 | if (*ppszTarget = (PSZ)malloc(ulLength + 1)) | 
|---|
| 149 | memcpy(*ppszTarget, pcszSource, ulLength + 1); | 
|---|
| 150 | else | 
|---|
| 151 | return ERROR_NOT_ENOUGH_MEMORY; | 
|---|
| 152 | } | 
|---|
| 153 | else | 
|---|
| 154 | *ppszTarget = NULL; | 
|---|
| 155 | } | 
|---|
| 156 | else | 
|---|
| 157 | return ERROR_INVALID_PARAMETER; | 
|---|
| 158 |  | 
|---|
| 159 | if (pulLength) | 
|---|
| 160 | *pulLength = ulLength; | 
|---|
| 161 |  | 
|---|
| 162 | return NO_ERROR; | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 | /* | 
|---|
| 166 | *@@ strhcpy: | 
|---|
| 167 | *      like strdup, but this one doesn't crash if string2 is NULL, | 
|---|
| 168 | *      but sets the first byte in string1 to \0 instead. | 
|---|
| 169 | * | 
|---|
| 170 | *@@added V0.9.14 (2001-08-01) [umoeller] | 
|---|
| 171 | */ | 
|---|
| 172 |  | 
|---|
| 173 | PSZ strhcpy(PSZ string1, PCSZ string2) | 
|---|
| 174 | { | 
|---|
| 175 | if (string2) | 
|---|
| 176 | return strcpy(string1, string2); | 
|---|
| 177 |  | 
|---|
| 178 | *string1 = '\0'; | 
|---|
| 179 | return string1; | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | #ifdef __DEBUG_MALLOC_ENABLED__ | 
|---|
| 183 |  | 
|---|
| 184 | /* | 
|---|
| 185 | *@@ strhdupDebug: | 
|---|
| 186 | *      memory debug version of strhdup. | 
|---|
| 187 | * | 
|---|
| 188 | *@@added V0.9.0 [umoeller] | 
|---|
| 189 | */ | 
|---|
| 190 |  | 
|---|
| 191 | PSZ strhdupDebug(PCSZ pcszSource, | 
|---|
| 192 | unsigned long *pulLength, | 
|---|
| 193 | PCSZ pcszSourceFile, | 
|---|
| 194 | unsigned long ulLine, | 
|---|
| 195 | PCSZ pcszFunction) | 
|---|
| 196 | { | 
|---|
| 197 | PSZ     pszReturn = NULL; | 
|---|
| 198 | ULONG   ulLength = 0; | 
|---|
| 199 |  | 
|---|
| 200 | if (    (pcszSource) | 
|---|
| 201 | && (ulLength = strlen(pcszSource)) | 
|---|
| 202 | ) | 
|---|
| 203 | { | 
|---|
| 204 | if (pszReturn = (PSZ)memdMalloc(ulLength + 1, | 
|---|
| 205 | pcszSourceFile,     // fixed V0.9.16 (2001-12-08) [umoeller] | 
|---|
| 206 | ulLine, | 
|---|
| 207 | pcszFunction)) | 
|---|
| 208 | memcpy(pszReturn, pcszSource, ulLength + 1); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | if (pulLength) | 
|---|
| 212 | *pulLength = ulLength; | 
|---|
| 213 |  | 
|---|
| 214 | return pszReturn; | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | #endif // __DEBUG_MALLOC_ENABLED__ | 
|---|
| 218 |  | 
|---|
| 219 | /* | 
|---|
| 220 | *@@ strhdup: | 
|---|
| 221 | *      like strdup, but this one doesn't crash if pszSource | 
|---|
| 222 | *      is NULL, but returns NULL also. In addition, this | 
|---|
| 223 | *      can report the length of the string (V0.9.16). | 
|---|
| 224 | * | 
|---|
| 225 | *@@added V0.9.0 [umoeller] | 
|---|
| 226 | *@@changed V0.9.16 (2001-10-25) [umoeller]: added pulLength | 
|---|
| 227 | */ | 
|---|
| 228 |  | 
|---|
| 229 | PSZ strhdup(PCSZ pcszSource, | 
|---|
| 230 | unsigned long *pulLength)       // out: length of string excl. null terminator (ptr can be NULL) | 
|---|
| 231 | { | 
|---|
| 232 | PSZ     pszReturn = NULL; | 
|---|
| 233 | ULONG   ulLength = 0; | 
|---|
| 234 |  | 
|---|
| 235 | if (    (pcszSource) | 
|---|
| 236 | && (ulLength = strlen(pcszSource)) | 
|---|
| 237 | ) | 
|---|
| 238 | { | 
|---|
| 239 | if (pszReturn = (PSZ)malloc(ulLength + 1)) | 
|---|
| 240 | memcpy(pszReturn, pcszSource, ulLength + 1); | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | if (pulLength) | 
|---|
| 244 | *pulLength = ulLength; | 
|---|
| 245 |  | 
|---|
| 246 | return pszReturn; | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | *@@ strhcmp: | 
|---|
| 251 | *      better strcmp. This doesn't crash if any of the | 
|---|
| 252 | *      string pointers are NULL, but returns a proper | 
|---|
| 253 | *      value then. | 
|---|
| 254 | * | 
|---|
| 255 | *      Besides, this is guaranteed to only return -1, 0, | 
|---|
| 256 | *      or +1, while strcmp can return any positive or | 
|---|
| 257 | *      negative value. This is useful for tree comparison | 
|---|
| 258 | *      funcs. | 
|---|
| 259 | * | 
|---|
| 260 | *@@added V0.9.9 (2001-02-16) [umoeller] | 
|---|
| 261 | */ | 
|---|
| 262 |  | 
|---|
| 263 | int strhcmp(PCSZ p1, PCSZ p2) | 
|---|
| 264 | { | 
|---|
| 265 | if (p1 && p2) | 
|---|
| 266 | { | 
|---|
| 267 | int i = strcmp(p1, p2); | 
|---|
| 268 | if (i < 0) return -1; | 
|---|
| 269 | if (i > 0) return +1; | 
|---|
| 270 | } | 
|---|
| 271 | else if (p1) | 
|---|
| 272 | // but p2 is NULL: p1 greater than p2 then | 
|---|
| 273 | return +1; | 
|---|
| 274 | else if (p2) | 
|---|
| 275 | // but p1 is NULL: p1 less than p2 then | 
|---|
| 276 | return -1; | 
|---|
| 277 |  | 
|---|
| 278 | // return 0 if strcmp returned 0 above or both strings are NULL | 
|---|
| 279 | return 0; | 
|---|
| 280 | } | 
|---|
| 281 |  | 
|---|
| 282 | /* | 
|---|
| 283 | *@@ strhicmp: | 
|---|
| 284 | *      like strhcmp, but compares without respect | 
|---|
| 285 | *      to case. | 
|---|
| 286 | * | 
|---|
| 287 | *@@added V0.9.9 (2001-04-07) [umoeller] | 
|---|
| 288 | */ | 
|---|
| 289 |  | 
|---|
| 290 | int strhicmp(PCSZ p1, PCSZ p2) | 
|---|
| 291 | { | 
|---|
| 292 | if (p1 && p2) | 
|---|
| 293 | { | 
|---|
| 294 | int i = stricmp(p1, p2); | 
|---|
| 295 | if (i < 0) return -1; | 
|---|
| 296 | if (i > 0) return +1; | 
|---|
| 297 | } | 
|---|
| 298 | else if (p1) | 
|---|
| 299 | // but p2 is NULL: p1 greater than p2 then | 
|---|
| 300 | return +1; | 
|---|
| 301 | else if (p2) | 
|---|
| 302 | // but p1 is NULL: p1 less than p2 then | 
|---|
| 303 | return -1; | 
|---|
| 304 |  | 
|---|
| 305 | // return 0 if strcmp returned 0 above or both strings are NULL | 
|---|
| 306 | return 0; | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | /* | 
|---|
| 310 | *@@ strhistr: | 
|---|
| 311 | *      like strstr, but case-insensitive. | 
|---|
| 312 | * | 
|---|
| 313 | *@@changed V0.9.0 [umoeller]: crashed if null pointers were passed, thanks Rdiger Ihle | 
|---|
| 314 | */ | 
|---|
| 315 |  | 
|---|
| 316 | PSZ strhistr(PCSZ string1, PCSZ string2) | 
|---|
| 317 | { | 
|---|
| 318 | PSZ prc = NULL; | 
|---|
| 319 |  | 
|---|
| 320 | if ((string1) && (string2)) | 
|---|
| 321 | { | 
|---|
| 322 | PSZ pszSrchIn = strdup(string1); | 
|---|
| 323 | PSZ pszSrchFor = strdup(string2); | 
|---|
| 324 |  | 
|---|
| 325 | if ((pszSrchIn) && (pszSrchFor)) | 
|---|
| 326 | { | 
|---|
| 327 | strupr(pszSrchIn); | 
|---|
| 328 | strupr(pszSrchFor); | 
|---|
| 329 |  | 
|---|
| 330 | if (prc = strstr(pszSrchIn, pszSrchFor)) | 
|---|
| 331 | { | 
|---|
| 332 | // prc now has the first occurence of the string, | 
|---|
| 333 | // but in pszSrchIn; we need to map this | 
|---|
| 334 | // return value to the original string | 
|---|
| 335 | prc = (prc-pszSrchIn) // offset in pszSrchIn | 
|---|
| 336 | + (PSZ)string1; | 
|---|
| 337 | } | 
|---|
| 338 | } | 
|---|
| 339 | if (pszSrchFor) | 
|---|
| 340 | free(pszSrchFor); | 
|---|
| 341 | if (pszSrchIn) | 
|---|
| 342 | free(pszSrchIn); | 
|---|
| 343 | } | 
|---|
| 344 |  | 
|---|
| 345 | return prc; | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | /* | 
|---|
| 349 | *@@ strhncpy0: | 
|---|
| 350 | *      like strncpy, but always appends a 0 character. | 
|---|
| 351 | * | 
|---|
| 352 | *@@changed V0.9.16 (2002-01-09) [umoeller]: fixed crash on null pszSource | 
|---|
| 353 | */ | 
|---|
| 354 |  | 
|---|
| 355 | ULONG strhncpy0(PSZ pszTarget, | 
|---|
| 356 | PCSZ pszSource, | 
|---|
| 357 | ULONG cbSource) | 
|---|
| 358 | { | 
|---|
| 359 | ULONG ul = 0; | 
|---|
| 360 | PSZ     pTarget = pszTarget, | 
|---|
| 361 | pSource; | 
|---|
| 362 |  | 
|---|
| 363 | if (pSource = (PSZ)pszSource)       // V0.9.16 (2002-01-09) [umoeller] | 
|---|
| 364 | { | 
|---|
| 365 | for (ul = 0; ul < cbSource; ul++) | 
|---|
| 366 | if (*pSource) | 
|---|
| 367 | *pTarget++ = *pSource++; | 
|---|
| 368 | else | 
|---|
| 369 | break; | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | *pTarget = 0; | 
|---|
| 373 |  | 
|---|
| 374 | return ul; | 
|---|
| 375 | } | 
|---|
| 376 |  | 
|---|
| 377 | /* | 
|---|
| 378 | *@@ strhlen: | 
|---|
| 379 | *      like strlen, but doesn't crash on | 
|---|
| 380 | *      null strings, but returns 0 also. | 
|---|
| 381 | * | 
|---|
| 382 | *@@added V0.9.19 (2002-04-02) [umoeller] | 
|---|
| 383 | */ | 
|---|
| 384 |  | 
|---|
| 385 | ULONG strhlen(PCSZ pcsz) | 
|---|
| 386 | { | 
|---|
| 387 | if (pcsz) | 
|---|
| 388 | return strlen(pcsz); | 
|---|
| 389 |  | 
|---|
| 390 | return 0; | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | /* | 
|---|
| 394 | *@@ strhSize: | 
|---|
| 395 | *      returns the size of the given string, which | 
|---|
| 396 | *      is the memory required to allocate a copy, | 
|---|
| 397 | *      including the null terminator. | 
|---|
| 398 | * | 
|---|
| 399 | *      Returns 0 only if pcsz is NULL. If pcsz | 
|---|
| 400 | *      points to a null character, this returns 1. | 
|---|
| 401 | * | 
|---|
| 402 | *@@added V0.9.18 (2002-02-13) [umoeller] | 
|---|
| 403 | *@@changed V0.9.18 (2002-03-27) [umoeller]: now returning 1 for ptr to null byte | 
|---|
| 404 | */ | 
|---|
| 405 |  | 
|---|
| 406 | ULONG strhSize(PCSZ pcsz) | 
|---|
| 407 | { | 
|---|
| 408 | if (pcsz) //  && *pcsz) // V0.9.18 (2002-03-27) [umoeller] | 
|---|
| 409 | return (strlen(pcsz) + 1); | 
|---|
| 410 |  | 
|---|
| 411 | return 0; | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | /* | 
|---|
| 415 | * strhCount: | 
|---|
| 416 | *      this counts the occurences of c in pszSearch. | 
|---|
| 417 | */ | 
|---|
| 418 |  | 
|---|
| 419 | ULONG strhCount(PCSZ pszSearch, | 
|---|
| 420 | CHAR c) | 
|---|
| 421 | { | 
|---|
| 422 | PSZ         p = (PSZ)pszSearch; | 
|---|
| 423 | ULONG       ulCount = 0; | 
|---|
| 424 | while (TRUE) | 
|---|
| 425 | { | 
|---|
| 426 | p = strchr(p, c); | 
|---|
| 427 | if (p) | 
|---|
| 428 | { | 
|---|
| 429 | ulCount++; | 
|---|
| 430 | p++; | 
|---|
| 431 | } | 
|---|
| 432 | else | 
|---|
| 433 | break; | 
|---|
| 434 | } | 
|---|
| 435 | return ulCount; | 
|---|
| 436 | } | 
|---|
| 437 |  | 
|---|
| 438 | /* | 
|---|
| 439 | *@@ strhIsDecimal: | 
|---|
| 440 | *      returns TRUE if psz consists of decimal digits only. | 
|---|
| 441 | */ | 
|---|
| 442 |  | 
|---|
| 443 | BOOL strhIsDecimal(PSZ psz) | 
|---|
| 444 | { | 
|---|
| 445 | PSZ p = psz; | 
|---|
| 446 | while (*p != 0) | 
|---|
| 447 | { | 
|---|
| 448 | if (isdigit(*p) == 0) | 
|---|
| 449 | return FALSE; | 
|---|
| 450 | p++; | 
|---|
| 451 | } | 
|---|
| 452 |  | 
|---|
| 453 | return TRUE; | 
|---|
| 454 | } | 
|---|
| 455 |  | 
|---|
| 456 | #ifdef __DEBUG_MALLOC_ENABLED__ | 
|---|
| 457 |  | 
|---|
| 458 | /* | 
|---|
| 459 | *@@ strhSubstrDebug: | 
|---|
| 460 | *      memory debug version of strhSubstr. | 
|---|
| 461 | * | 
|---|
| 462 | *@@added V0.9.14 (2001-08-01) [umoeller] | 
|---|
| 463 | */ | 
|---|
| 464 |  | 
|---|
| 465 | PSZ strhSubstrDebug(PCSZ pBegin,      // in: first char | 
|---|
| 466 | PCSZ pEnd,        // in: last char (not included) | 
|---|
| 467 | PCSZ pcszSourceFile, | 
|---|
| 468 | unsigned long ulLine, | 
|---|
| 469 | PCSZ pcszFunction) | 
|---|
| 470 | { | 
|---|
| 471 | PSZ pszSubstr = NULL; | 
|---|
| 472 |  | 
|---|
| 473 | if (pEnd > pBegin)      // V0.9.9 (2001-04-04) [umoeller] | 
|---|
| 474 | { | 
|---|
| 475 | ULONG cbSubstr = (pEnd - pBegin); | 
|---|
| 476 | if (pszSubstr = (PSZ)memdMalloc(cbSubstr + 1, | 
|---|
| 477 | pcszSourceFile, | 
|---|
| 478 | ulLine, | 
|---|
| 479 | pcszFunction)) | 
|---|
| 480 | { | 
|---|
| 481 | // strhncpy0(pszSubstr, pBegin, cbSubstr); | 
|---|
| 482 | memcpy(pszSubstr, pBegin, cbSubstr);        // V0.9.9 (2001-04-04) [umoeller] | 
|---|
| 483 | *(pszSubstr + cbSubstr) = '\0'; | 
|---|
| 484 | } | 
|---|
| 485 | } | 
|---|
| 486 |  | 
|---|
| 487 | return pszSubstr; | 
|---|
| 488 | } | 
|---|
| 489 |  | 
|---|
| 490 | #endif // __DEBUG_MALLOC_ENABLED__ | 
|---|
| 491 |  | 
|---|
| 492 | /* | 
|---|
| 493 | *@@ strhSubstr: | 
|---|
| 494 | *      this creates a new PSZ containing the string | 
|---|
| 495 | *      from pBegin to pEnd, excluding the pEnd character. | 
|---|
| 496 | *      The new string is null-terminated. The caller | 
|---|
| 497 | *      must free() the new string after use. | 
|---|
| 498 | * | 
|---|
| 499 | *      Example: | 
|---|
| 500 | +              "1234567890" | 
|---|
| 501 | +                ^      ^ | 
|---|
| 502 | +                p1     p2 | 
|---|
| 503 | +          strhSubstr(p1, p2) | 
|---|
| 504 | *      would return a new string containing "2345678". | 
|---|
| 505 | * | 
|---|
| 506 | *@@changed V0.9.9 (2001-04-04) [umoeller]: fixed crashes with invalid pointers | 
|---|
| 507 | *@@changed V0.9.9 (2001-04-04) [umoeller]: now using memcpy for speed | 
|---|
| 508 | */ | 
|---|
| 509 |  | 
|---|
| 510 | PSZ strhSubstr(PCSZ pBegin,      // in: first char | 
|---|
| 511 | PCSZ pEnd)        // in: last char (not included) | 
|---|
| 512 | { | 
|---|
| 513 | PSZ pszSubstr = NULL; | 
|---|
| 514 |  | 
|---|
| 515 | if (pEnd > pBegin)      // V0.9.9 (2001-04-04) [umoeller] | 
|---|
| 516 | { | 
|---|
| 517 | ULONG cbSubstr = (pEnd - pBegin); | 
|---|
| 518 | if (pszSubstr = (PSZ)malloc(cbSubstr + 1)) | 
|---|
| 519 | { | 
|---|
| 520 | memcpy(pszSubstr, pBegin, cbSubstr);        // V0.9.9 (2001-04-04) [umoeller] | 
|---|
| 521 | *(pszSubstr + cbSubstr) = '\0'; | 
|---|
| 522 | } | 
|---|
| 523 | } | 
|---|
| 524 |  | 
|---|
| 525 | return pszSubstr; | 
|---|
| 526 | } | 
|---|
| 527 |  | 
|---|
| 528 | /* | 
|---|
| 529 | *@@ strhExtract: | 
|---|
| 530 | *      searches pszBuf for the cOpen character and returns | 
|---|
| 531 | *      the data in between cOpen and cClose, excluding | 
|---|
| 532 | *      those two characters, in a newly allocated buffer | 
|---|
| 533 | *      which you must free() afterwards. | 
|---|
| 534 | * | 
|---|
| 535 | *      Spaces and newlines/linefeeds are skipped. | 
|---|
| 536 | * | 
|---|
| 537 | *      If the search was successful, the new buffer | 
|---|
| 538 | *      is returned and, if (ppEnd != NULL), *ppEnd points | 
|---|
| 539 | *      to the first character after the cClose character | 
|---|
| 540 | *      found in the buffer. | 
|---|
| 541 | * | 
|---|
| 542 | *      If the search was not successful, NULL is | 
|---|
| 543 | *      returned, and *ppEnd is unchanged. | 
|---|
| 544 | * | 
|---|
| 545 | *      If another cOpen character is found before | 
|---|
| 546 | *      cClose, matching cClose characters will be skipped. | 
|---|
| 547 | *      You can therefore nest the cOpen and cClose | 
|---|
| 548 | *      characters. | 
|---|
| 549 | * | 
|---|
| 550 | *      This function ignores cOpen and cClose characters | 
|---|
| 551 | *      in C-style comments and strings surrounded by | 
|---|
| 552 | *      double quotes. | 
|---|
| 553 | * | 
|---|
| 554 | *      Example: | 
|---|
| 555 | * | 
|---|
| 556 | +          PSZ pszBuf = "KEYWORD { --blah-- } next", | 
|---|
| 557 | +              pEnd; | 
|---|
| 558 | +          strhExtract(pszBuf, | 
|---|
| 559 | +                      '{', '}', | 
|---|
| 560 | +                      &pEnd) | 
|---|
| 561 | * | 
|---|
| 562 | *      would return a new buffer containing " --blah-- ", | 
|---|
| 563 | *      and ppEnd would afterwards point to the space | 
|---|
| 564 | *      before "next" in the static buffer. | 
|---|
| 565 | * | 
|---|
| 566 | *@@added V0.9.0 [umoeller] | 
|---|
| 567 | */ | 
|---|
| 568 |  | 
|---|
| 569 | PSZ strhExtract(PCSZ pszBuf,    // in: search buffer | 
|---|
| 570 | CHAR cOpen,     // in: opening char | 
|---|
| 571 | CHAR cClose,    // in: closing char | 
|---|
| 572 | PCSZ *ppEnd)    // out: if != NULL, receives first character after closing char | 
|---|
| 573 | { | 
|---|
| 574 | PSZ pszReturn = NULL; | 
|---|
| 575 | PCSZ pOpen; | 
|---|
| 576 | if (    (pszBuf) | 
|---|
| 577 | && (pOpen = strchr(pszBuf, cOpen)) | 
|---|
| 578 | ) | 
|---|
| 579 | { | 
|---|
| 580 | // opening char found: | 
|---|
| 581 | // now go thru the whole rest of the buffer | 
|---|
| 582 | PCSZ     p = pOpen + 1; | 
|---|
| 583 | LONG    lLevel = 1;        // if this goes 0, we're done | 
|---|
| 584 | while (*p) | 
|---|
| 585 | { | 
|---|
| 586 | if (*p == cOpen) | 
|---|
| 587 | lLevel++; | 
|---|
| 588 | else if (*p == cClose) | 
|---|
| 589 | { | 
|---|
| 590 | lLevel--; | 
|---|
| 591 | if (lLevel <= 0) | 
|---|
| 592 | { | 
|---|
| 593 | // matching closing bracket found: | 
|---|
| 594 | // extract string | 
|---|
| 595 | pszReturn = strhSubstr(pOpen + 1,   // after cOpen | 
|---|
| 596 | p);          // excluding cClose | 
|---|
| 597 | if (ppEnd) | 
|---|
| 598 | *ppEnd = p + 1; | 
|---|
| 599 | break;      // while (*p) | 
|---|
| 600 | } | 
|---|
| 601 | } | 
|---|
| 602 | else if (*p == '\"') | 
|---|
| 603 | { | 
|---|
| 604 | // beginning of string: | 
|---|
| 605 | PCSZ p2 = p+1; | 
|---|
| 606 | // find end of string | 
|---|
| 607 | while ((*p2) && (*p2 != '\"')) | 
|---|
| 608 | p2++; | 
|---|
| 609 |  | 
|---|
| 610 | if (*p2 == '\"') | 
|---|
| 611 | // closing quote found: | 
|---|
| 612 | // search on after that | 
|---|
| 613 | p = p2;     // raised below | 
|---|
| 614 | else | 
|---|
| 615 | break;      // while (*p) | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | p++; | 
|---|
| 619 | } | 
|---|
| 620 | } | 
|---|
| 621 |  | 
|---|
| 622 | return pszReturn; | 
|---|
| 623 | } | 
|---|
| 624 |  | 
|---|
| 625 | /* | 
|---|
| 626 | *@@ strhQuote: | 
|---|
| 627 | *      similar to strhExtract, except that | 
|---|
| 628 | *      opening and closing chars are the same, | 
|---|
| 629 | *      and therefore no nesting is possible. | 
|---|
| 630 | *      Useful for extracting stuff between | 
|---|
| 631 | *      quotes. | 
|---|
| 632 | * | 
|---|
| 633 | *@@added V0.9.0 [umoeller] | 
|---|
| 634 | */ | 
|---|
| 635 |  | 
|---|
| 636 | PSZ strhQuote(PSZ pszBuf, | 
|---|
| 637 | CHAR cQuote, | 
|---|
| 638 | PSZ *ppEnd) | 
|---|
| 639 | { | 
|---|
| 640 | PSZ pszReturn = NULL, | 
|---|
| 641 | p1 = NULL; | 
|---|
| 642 | if ((p1 = strchr(pszBuf, cQuote))) | 
|---|
| 643 | { | 
|---|
| 644 | PSZ p2; | 
|---|
| 645 | if (p2 = strchr(p1+1, cQuote)) | 
|---|
| 646 | { | 
|---|
| 647 | pszReturn = strhSubstr(p1+1, p2); | 
|---|
| 648 | if (ppEnd) | 
|---|
| 649 | // store closing char | 
|---|
| 650 | *ppEnd = p2 + 1; | 
|---|
| 651 | } | 
|---|
| 652 | } | 
|---|
| 653 |  | 
|---|
| 654 | return pszReturn; | 
|---|
| 655 | } | 
|---|
| 656 |  | 
|---|
| 657 | /* | 
|---|
| 658 | *@@ strhStrip: | 
|---|
| 659 | *      removes all double spaces. | 
|---|
| 660 | *      This copies within the "psz" buffer. | 
|---|
| 661 | *      If any double spaces are found, the | 
|---|
| 662 | *      string will be shorter than before, | 
|---|
| 663 | *      but the buffer is _not_ reallocated, | 
|---|
| 664 | *      so there will be unused bytes at the | 
|---|
| 665 | *      end. | 
|---|
| 666 | * | 
|---|
| 667 | *      Returns the number of spaces removed. | 
|---|
| 668 | * | 
|---|
| 669 | *@@added V0.9.0 [umoeller] | 
|---|
| 670 | */ | 
|---|
| 671 |  | 
|---|
| 672 | ULONG strhStrip(PSZ psz)         // in/out: string | 
|---|
| 673 | { | 
|---|
| 674 | PSZ     p; | 
|---|
| 675 | ULONG   cb = strlen(psz), | 
|---|
| 676 | ulrc = 0; | 
|---|
| 677 |  | 
|---|
| 678 | for (p = psz; p < psz+cb; p++) | 
|---|
| 679 | { | 
|---|
| 680 | if ((*p == ' ') && (*(p+1) == ' ')) | 
|---|
| 681 | { | 
|---|
| 682 | PSZ p2 = p; | 
|---|
| 683 | while (*p2) | 
|---|
| 684 | { | 
|---|
| 685 | *p2 = *(p2+1); | 
|---|
| 686 | p2++; | 
|---|
| 687 | } | 
|---|
| 688 | cb--; | 
|---|
| 689 | p--; | 
|---|
| 690 | ulrc++; | 
|---|
| 691 | } | 
|---|
| 692 | } | 
|---|
| 693 | return ulrc; | 
|---|
| 694 | } | 
|---|
| 695 |  | 
|---|
| 696 | /* | 
|---|
| 697 | *@@ strhins: | 
|---|
| 698 | *      this inserts one string into another. | 
|---|
| 699 | * | 
|---|
| 700 | *      pszInsert is inserted into pszBuffer at offset | 
|---|
| 701 | *      ulInsertOfs (which counts from 0). | 
|---|
| 702 | * | 
|---|
| 703 | *      A newly allocated string is returned. pszBuffer is | 
|---|
| 704 | *      not changed. The new string should be free()'d after | 
|---|
| 705 | *      use. | 
|---|
| 706 | * | 
|---|
| 707 | *      Upon errors, NULL is returned. | 
|---|
| 708 | * | 
|---|
| 709 | *@@changed V0.9.0 [umoeller]: completely rewritten. | 
|---|
| 710 | */ | 
|---|
| 711 |  | 
|---|
| 712 | PSZ strhins(PCSZ pcszBuffer, | 
|---|
| 713 | ULONG ulInsertOfs, | 
|---|
| 714 | PCSZ pcszInsert) | 
|---|
| 715 | { | 
|---|
| 716 | PSZ     pszNew = NULL; | 
|---|
| 717 |  | 
|---|
| 718 | if ((pcszBuffer) && (pcszInsert)) | 
|---|
| 719 | { | 
|---|
| 720 | do { | 
|---|
| 721 | ULONG   cbBuffer = strlen(pcszBuffer); | 
|---|
| 722 | ULONG   cbInsert = strlen(pcszInsert); | 
|---|
| 723 |  | 
|---|
| 724 | // check string length | 
|---|
| 725 | if (ulInsertOfs > cbBuffer + 1) | 
|---|
| 726 | break;  // do | 
|---|
| 727 |  | 
|---|
| 728 | // OK, let's go. | 
|---|
| 729 | pszNew = (PSZ)malloc(cbBuffer + cbInsert + 1);  // additional null terminator | 
|---|
| 730 |  | 
|---|
| 731 | // copy stuff before pInsertPos | 
|---|
| 732 | memcpy(pszNew, | 
|---|
| 733 | pcszBuffer, | 
|---|
| 734 | ulInsertOfs); | 
|---|
| 735 | // copy string to be inserted | 
|---|
| 736 | memcpy(pszNew + ulInsertOfs, | 
|---|
| 737 | pcszInsert, | 
|---|
| 738 | cbInsert); | 
|---|
| 739 | // copy stuff after pInsertPos | 
|---|
| 740 | strcpy(pszNew + ulInsertOfs + cbInsert, | 
|---|
| 741 | pcszBuffer + ulInsertOfs); | 
|---|
| 742 | } while (FALSE); | 
|---|
| 743 | } | 
|---|
| 744 |  | 
|---|
| 745 | return pszNew; | 
|---|
| 746 | } | 
|---|
| 747 |  | 
|---|
| 748 | /* | 
|---|
| 749 | *@@ strhFindReplace: | 
|---|
| 750 | *      wrapper around xstrFindReplace to work with C strings. | 
|---|
| 751 | *      Note that *ppszBuf can get reallocated and must | 
|---|
| 752 | *      be free()'able. | 
|---|
| 753 | * | 
|---|
| 754 | *      Repetitive use of this wrapper is not recommended | 
|---|
| 755 | *      because it is considerably slower than xstrFindReplace. | 
|---|
| 756 | * | 
|---|
| 757 | *@@added V0.9.6 (2000-11-01) [umoeller] | 
|---|
| 758 | *@@changed V0.9.7 (2001-01-15) [umoeller]: renamed from strhrpl | 
|---|
| 759 | */ | 
|---|
| 760 |  | 
|---|
| 761 | ULONG strhFindReplace(PSZ *ppszBuf,                // in/out: string | 
|---|
| 762 | PULONG pulOfs,               // in: where to begin search (0 = start); | 
|---|
| 763 | // out: ofs of first char after replacement string | 
|---|
| 764 | PCSZ pcszSearch,      // in: search string; cannot be NULL | 
|---|
| 765 | PCSZ pcszReplace)     // in: replacement string; cannot be NULL | 
|---|
| 766 | { | 
|---|
| 767 | ULONG   ulrc = 0; | 
|---|
| 768 | XSTRING xstrBuf, | 
|---|
| 769 | xstrFind, | 
|---|
| 770 | xstrReplace; | 
|---|
| 771 | size_t  ShiftTable[256]; | 
|---|
| 772 | BOOL    fRepeat = FALSE; | 
|---|
| 773 | xstrInitSet(&xstrBuf, *ppszBuf); | 
|---|
| 774 | // reallocated and returned, so we're safe | 
|---|
| 775 | xstrInitSet(&xstrFind, (PSZ)pcszSearch); | 
|---|
| 776 | xstrInitSet(&xstrReplace, (PSZ)pcszReplace); | 
|---|
| 777 | // these two are never freed, so we're safe too | 
|---|
| 778 |  | 
|---|
| 779 | if ((ulrc = xstrFindReplace(&xstrBuf, | 
|---|
| 780 | pulOfs, | 
|---|
| 781 | &xstrFind, | 
|---|
| 782 | &xstrReplace, | 
|---|
| 783 | ShiftTable, | 
|---|
| 784 | &fRepeat))) | 
|---|
| 785 | // replaced: | 
|---|
| 786 | *ppszBuf = xstrBuf.psz; | 
|---|
| 787 |  | 
|---|
| 788 | return ulrc; | 
|---|
| 789 | } | 
|---|
| 790 |  | 
|---|
| 791 | /* | 
|---|
| 792 | * strhWords: | 
|---|
| 793 | *      returns the no. of words in "psz". | 
|---|
| 794 | *      A string is considered a "word" if | 
|---|
| 795 | *      it is surrounded by spaces only. | 
|---|
| 796 | * | 
|---|
| 797 | *@@added V0.9.0 [umoeller] | 
|---|
| 798 | */ | 
|---|
| 799 |  | 
|---|
| 800 | ULONG strhWords(PSZ psz) | 
|---|
| 801 | { | 
|---|
| 802 | PSZ p; | 
|---|
| 803 | ULONG cb = strlen(psz), | 
|---|
| 804 | ulWords = 0; | 
|---|
| 805 | if (cb > 1) | 
|---|
| 806 | { | 
|---|
| 807 | ulWords = 1; | 
|---|
| 808 | for (p = psz; p < psz+cb; p++) | 
|---|
| 809 | if (*p == ' ') | 
|---|
| 810 | ulWords++; | 
|---|
| 811 | } | 
|---|
| 812 | return ulWords; | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 | /* | 
|---|
| 816 | *@@ strhGetWord: | 
|---|
| 817 | *      finds word boundaries. | 
|---|
| 818 | * | 
|---|
| 819 | *      *ppszStart is used as the beginning of the | 
|---|
| 820 | *      search. | 
|---|
| 821 | * | 
|---|
| 822 | *      If a word is found, *ppszStart is set to | 
|---|
| 823 | *      the first character of the word which was | 
|---|
| 824 | *      found and *ppszEnd receives the address | 
|---|
| 825 | *      of the first character _after_ the word, | 
|---|
| 826 | *      which is probably a space or a \n or \r char. | 
|---|
| 827 | *      We then return TRUE. | 
|---|
| 828 | * | 
|---|
| 829 | *      The search is stopped if a null character | 
|---|
| 830 | *      is found or pLimit is reached. In that case, | 
|---|
| 831 | *      FALSE is returned. | 
|---|
| 832 | * | 
|---|
| 833 | *@@added V0.9.1 (2000-02-13) [umoeller] | 
|---|
| 834 | */ | 
|---|
| 835 |  | 
|---|
| 836 | BOOL strhGetWord(PSZ *ppszStart,        // in: start of search, | 
|---|
| 837 | // out: start of word (if TRUE is returned) | 
|---|
| 838 | PCSZ pLimit,    // in: ptr to last char after *ppszStart to be | 
|---|
| 839 | // searched; if the word does not end before | 
|---|
| 840 | // or with this char, FALSE is returned | 
|---|
| 841 | PCSZ pcszBeginChars, // stringh.h defines STRH_BEGIN_CHARS | 
|---|
| 842 | PCSZ pcszEndChars, // stringh.h defines STRH_END_CHARS | 
|---|
| 843 | PSZ *ppszEnd)          // out: first char _after_ word | 
|---|
| 844 | // (if TRUE is returned) | 
|---|
| 845 | { | 
|---|
| 846 | // characters after which a word can be started | 
|---|
| 847 | // PCSZ pcszBeginChars = "\x0d\x0a "; | 
|---|
| 848 | // PCSZ pcszEndChars = "\x0d\x0a /-"; | 
|---|
| 849 |  | 
|---|
| 850 | PSZ pStart = *ppszStart; | 
|---|
| 851 |  | 
|---|
| 852 | // find start of word | 
|---|
| 853 | while (     (pStart < (PSZ)pLimit) | 
|---|
| 854 | && (strchr(pcszBeginChars, *pStart)) | 
|---|
| 855 | ) | 
|---|
| 856 | // if char is a "before word" char: go for next | 
|---|
| 857 | pStart++; | 
|---|
| 858 |  | 
|---|
| 859 | if (pStart < (PSZ)pLimit) | 
|---|
| 860 | { | 
|---|
| 861 | // found a valid "word start" character | 
|---|
| 862 | // (which is not in pcszBeginChars): | 
|---|
| 863 |  | 
|---|
| 864 | // find end of word | 
|---|
| 865 | PSZ  pEndOfWord = pStart; | 
|---|
| 866 | while (     (pEndOfWord <= (PSZ)pLimit) | 
|---|
| 867 | && (strchr(pcszEndChars, *pEndOfWord) == 0) | 
|---|
| 868 | ) | 
|---|
| 869 | // if char is not an "end word" char: go for next | 
|---|
| 870 | pEndOfWord++; | 
|---|
| 871 |  | 
|---|
| 872 | if (pEndOfWord <= (PSZ)pLimit) | 
|---|
| 873 | { | 
|---|
| 874 | // whoa, got a word: | 
|---|
| 875 | *ppszStart = pStart; | 
|---|
| 876 | *ppszEnd = pEndOfWord; | 
|---|
| 877 | return TRUE; | 
|---|
| 878 | } | 
|---|
| 879 | } | 
|---|
| 880 |  | 
|---|
| 881 | return FALSE; | 
|---|
| 882 | } | 
|---|
| 883 |  | 
|---|
| 884 | /* | 
|---|
| 885 | *@@ strhIsWord: | 
|---|
| 886 | *      returns TRUE if p points to a "word" | 
|---|
| 887 | *      in pcszBuf. | 
|---|
| 888 | * | 
|---|
| 889 | *      p is considered a word if the character _before_ | 
|---|
| 890 | *      it is in pcszBeginChars and the char _after_ | 
|---|
| 891 | *      it (i.e. *(p+cbSearch)) is in pcszEndChars. | 
|---|
| 892 | * | 
|---|
| 893 | *@@added V0.9.6 (2000-11-12) [umoeller] | 
|---|
| 894 | *@@changed V0.9.18 (2002-02-23) [umoeller]: fixed end char check | 
|---|
| 895 | */ | 
|---|
| 896 |  | 
|---|
| 897 | BOOL strhIsWord(PCSZ pcszBuf, | 
|---|
| 898 | PCSZ p,                 // in: start of word | 
|---|
| 899 | ULONG cbSearch,         // in: length of word | 
|---|
| 900 | PCSZ pcszBeginChars,    // suggestion: "\x0d\x0a ()/\\-,." | 
|---|
| 901 | PCSZ pcszEndChars)      // suggestion: "\x0d\x0a ()/\\-,.:;" | 
|---|
| 902 | { | 
|---|
| 903 | // check previous char | 
|---|
| 904 | if (    (p == pcszBuf) | 
|---|
| 905 | || (strchr(pcszBeginChars, *(p-1))) | 
|---|
| 906 | ) | 
|---|
| 907 | { | 
|---|
| 908 | // OK, valid begin char: | 
|---|
| 909 | // check end char | 
|---|
| 910 | CHAR    cNextChar; | 
|---|
| 911 | if (!(cNextChar = p[cbSearch])) | 
|---|
| 912 | // null terminator: | 
|---|
| 913 | return TRUE; | 
|---|
| 914 | else | 
|---|
| 915 | { | 
|---|
| 916 | // not null terminator: check if char is | 
|---|
| 917 | // in the list of valid end chars | 
|---|
| 918 | if (strchr(pcszEndChars, cNextChar)) | 
|---|
| 919 | { | 
|---|
| 920 | // OK, is end char: avoid doubles of that char, | 
|---|
| 921 | // but allow spaces | 
|---|
| 922 | // fixed V0.9.18 (2002-02-23) [umoeller] | 
|---|
| 923 | CHAR cNextNext = p[cbSearch + 1]; | 
|---|
| 924 | if (    (cNextNext != cNextChar) | 
|---|
| 925 | || (cNextNext == ' ') | 
|---|
| 926 | || (cNextNext == 0) | 
|---|
| 927 | ) | 
|---|
| 928 | return TRUE; | 
|---|
| 929 | } | 
|---|
| 930 | } | 
|---|
| 931 | } | 
|---|
| 932 |  | 
|---|
| 933 | return FALSE; | 
|---|
| 934 | } | 
|---|
| 935 |  | 
|---|
| 936 | /* | 
|---|
| 937 | *@@ strhFindWord: | 
|---|
| 938 | *      searches for pszSearch in pszBuf, which is | 
|---|
| 939 | *      returned if found (or NULL if not). | 
|---|
| 940 | * | 
|---|
| 941 | *      As opposed to strstr, this finds pszSearch | 
|---|
| 942 | *      only if it is a "word". A search string is | 
|---|
| 943 | *      considered a word if the character _before_ | 
|---|
| 944 | *      it is in pcszBeginChars and the char _after_ | 
|---|
| 945 | *      it is in pcszEndChars. | 
|---|
| 946 | * | 
|---|
| 947 | *      Example: | 
|---|
| 948 | +          strhFindWord("This is an example.", "is"); | 
|---|
| 949 | +          returns ...........^ this, but not the "is" in "This". | 
|---|
| 950 | * | 
|---|
| 951 | *      The algorithm here uses strstr to find pszSearch in pszBuf | 
|---|
| 952 | *      and performs additional "is-word" checks for each item found | 
|---|
| 953 | *      (by calling strhIsWord). | 
|---|
| 954 | * | 
|---|
| 955 | *      Note that this function is fairly slow compared to xstrFindWord. | 
|---|
| 956 | * | 
|---|
| 957 | *@@added V0.9.0 (99-11-08) [umoeller] | 
|---|
| 958 | *@@changed V0.9.0 (99-11-10) [umoeller]: tried second algorithm, reverted to original... | 
|---|
| 959 | */ | 
|---|
| 960 |  | 
|---|
| 961 | PSZ strhFindWord(PCSZ pszBuf, | 
|---|
| 962 | PCSZ pszSearch, | 
|---|
| 963 | PCSZ pcszBeginChars,    // suggestion: "\x0d\x0a ()/\\-,." | 
|---|
| 964 | PCSZ pcszEndChars)      // suggestion: "\x0d\x0a ()/\\-,.:;" | 
|---|
| 965 | { | 
|---|
| 966 | PSZ     pszReturn = 0; | 
|---|
| 967 | ULONG   cbBuf = strlen(pszBuf), | 
|---|
| 968 | cbSearch = strlen(pszSearch); | 
|---|
| 969 |  | 
|---|
| 970 | if ((cbBuf) && (cbSearch)) | 
|---|
| 971 | { | 
|---|
| 972 | PCSZ p = pszBuf; | 
|---|
| 973 |  | 
|---|
| 974 | do  // while p | 
|---|
| 975 | { | 
|---|
| 976 | p = strstr(p, pszSearch); | 
|---|
| 977 | if (p) | 
|---|
| 978 | { | 
|---|
| 979 | // string found: | 
|---|
| 980 | // check if that's a word | 
|---|
| 981 |  | 
|---|
| 982 | if (strhIsWord(pszBuf, | 
|---|
| 983 | p, | 
|---|
| 984 | cbSearch, | 
|---|
| 985 | pcszBeginChars, | 
|---|
| 986 | pcszEndChars)) | 
|---|
| 987 | { | 
|---|
| 988 | // valid end char: | 
|---|
| 989 | pszReturn = (PSZ)p; | 
|---|
| 990 | break; | 
|---|
| 991 | } | 
|---|
| 992 |  | 
|---|
| 993 | p += cbSearch; | 
|---|
| 994 | } | 
|---|
| 995 | } while (p); | 
|---|
| 996 |  | 
|---|
| 997 | } | 
|---|
| 998 | return pszReturn; | 
|---|
| 999 | } | 
|---|
| 1000 |  | 
|---|
| 1001 | /* | 
|---|
| 1002 | *@@ strhFindEOL: | 
|---|
| 1003 | *      returns a pointer to the next \r, \n or null character | 
|---|
| 1004 | *      following pszSearchIn. Stores the offset in *pulOffset. | 
|---|
| 1005 | * | 
|---|
| 1006 | *      This should never return NULL because at some point, | 
|---|
| 1007 | *      there will be a null byte in your string. | 
|---|
| 1008 | * | 
|---|
| 1009 | *@@added V0.9.4 (2000-07-01) [umoeller] | 
|---|
| 1010 | */ | 
|---|
| 1011 |  | 
|---|
| 1012 | PSZ strhFindEOL(PCSZ pcszSearchIn,        // in: where to search | 
|---|
| 1013 | PULONG pulOffset)       // out: offset (ptr can be NULL) | 
|---|
| 1014 | { | 
|---|
| 1015 | PCSZ    p = pcszSearchIn, | 
|---|
| 1016 | prc = 0; | 
|---|
| 1017 | while (TRUE) | 
|---|
| 1018 | { | 
|---|
| 1019 | if ( (*p == '\r') || (*p == '\n') || (*p == 0) ) | 
|---|
| 1020 | { | 
|---|
| 1021 | prc = p; | 
|---|
| 1022 | break; | 
|---|
| 1023 | } | 
|---|
| 1024 | p++; | 
|---|
| 1025 | } | 
|---|
| 1026 |  | 
|---|
| 1027 | if ((pulOffset) && (prc)) | 
|---|
| 1028 | *pulOffset = prc - pcszSearchIn; | 
|---|
| 1029 |  | 
|---|
| 1030 | return ((PSZ)prc); | 
|---|
| 1031 | } | 
|---|
| 1032 |  | 
|---|
| 1033 | /* | 
|---|
| 1034 | *@@ strhFindNextLine: | 
|---|
| 1035 | *      like strhFindEOL, but this returns the character | 
|---|
| 1036 | *      _after_ \r or \n. Note that this might return | 
|---|
| 1037 | *      a pointer to terminating NULL character also. | 
|---|
| 1038 | */ | 
|---|
| 1039 |  | 
|---|
| 1040 | PSZ strhFindNextLine(PSZ pszSearchIn, PULONG pulOffset) | 
|---|
| 1041 | { | 
|---|
| 1042 | PSZ pEOL = strhFindEOL(pszSearchIn, NULL); | 
|---|
| 1043 | // pEOL now points to the \r char or the terminating 0 byte; | 
|---|
| 1044 | // if not null byte, advance pointer | 
|---|
| 1045 | PSZ pNextLine = pEOL; | 
|---|
| 1046 | if (*pNextLine == '\r') | 
|---|
| 1047 | pNextLine++; | 
|---|
| 1048 | if (*pNextLine == '\n') | 
|---|
| 1049 | pNextLine++; | 
|---|
| 1050 | if (pulOffset) | 
|---|
| 1051 | *pulOffset = pNextLine - pszSearchIn; | 
|---|
| 1052 | return pNextLine; | 
|---|
| 1053 | } | 
|---|
| 1054 |  | 
|---|
| 1055 | /* | 
|---|
| 1056 | *@@ strhBeautifyTitle: | 
|---|
| 1057 | *      replaces all line breaks (0xd, 0xa) with spaces. | 
|---|
| 1058 | *      Returns the new length of the string or 0 on | 
|---|
| 1059 | *      errors. | 
|---|
| 1060 | * | 
|---|
| 1061 | *@@changed V0.9.12 (2001-05-17) [pr]: multiple line break chars. end up as only 1 space | 
|---|
| 1062 | *@@changed V0.9.19 (2002-06-18) [umoeller]: now returning length | 
|---|
| 1063 | */ | 
|---|
| 1064 |  | 
|---|
| 1065 | ULONG strhBeautifyTitle(PSZ psz) | 
|---|
| 1066 | { | 
|---|
| 1067 | ULONG   ulrc; | 
|---|
| 1068 | PSZ     p = psz; | 
|---|
| 1069 |  | 
|---|
| 1070 | while (*p) | 
|---|
| 1071 | { | 
|---|
| 1072 | if (    (*p == '\r') | 
|---|
| 1073 | || (*p == '\n') | 
|---|
| 1074 | ) | 
|---|
| 1075 | { | 
|---|
| 1076 | if (    (p != psz) | 
|---|
| 1077 | && (p[-1] == ' ') | 
|---|
| 1078 | ) | 
|---|
| 1079 | memmove(p, p + 1, strlen(p)); | 
|---|
| 1080 | else | 
|---|
| 1081 | *p++ = ' '; | 
|---|
| 1082 | } | 
|---|
| 1083 | else | 
|---|
| 1084 | p++; | 
|---|
| 1085 | } | 
|---|
| 1086 |  | 
|---|
| 1087 | return (p - psz); | 
|---|
| 1088 | } | 
|---|
| 1089 |  | 
|---|
| 1090 | /* | 
|---|
| 1091 | *@@ strhBeautifyTitle: | 
|---|
| 1092 | *      like strhBeautifyTitle, but copies into | 
|---|
| 1093 | *      a new buffer. More efficient. | 
|---|
| 1094 | * | 
|---|
| 1095 | *@@added V0.9.19 (2002-06-18) [umoeller] | 
|---|
| 1096 | */ | 
|---|
| 1097 |  | 
|---|
| 1098 | ULONG strhBeautifyTitle2(PSZ pszTarget,     // out: beautified string | 
|---|
| 1099 | PCSZ pcszSource)   // in: string to be beautified (can be NULL) | 
|---|
| 1100 | { | 
|---|
| 1101 | ULONG   ulrc; | 
|---|
| 1102 | PCSZ    pSource = pcszSource; | 
|---|
| 1103 | PSZ     pTarget = pszTarget; | 
|---|
| 1104 | CHAR    c; | 
|---|
| 1105 | if (!pcszSource) | 
|---|
| 1106 | { | 
|---|
| 1107 | *pszTarget = '\0'; | 
|---|
| 1108 | return 0; | 
|---|
| 1109 | } | 
|---|
| 1110 |  | 
|---|
| 1111 | while (c = *pSource++) | 
|---|
| 1112 | { | 
|---|
| 1113 | if (    (c == '\r') | 
|---|
| 1114 | || (c == '\n') | 
|---|
| 1115 | ) | 
|---|
| 1116 | { | 
|---|
| 1117 | if (    (pTarget == pszTarget) | 
|---|
| 1118 | || (pTarget[-1] != ' ') | 
|---|
| 1119 | ) | 
|---|
| 1120 | *pTarget++ = ' '; | 
|---|
| 1121 | } | 
|---|
| 1122 | else | 
|---|
| 1123 | *pTarget++ = c; | 
|---|
| 1124 | } | 
|---|
| 1125 |  | 
|---|
| 1126 | // null-terminate | 
|---|
| 1127 | *pTarget = '\0'; | 
|---|
| 1128 |  | 
|---|
| 1129 | return (pTarget - pszTarget); | 
|---|
| 1130 | } | 
|---|
| 1131 |  | 
|---|
| 1132 | /* | 
|---|
| 1133 | * strhFindAttribValue: | 
|---|
| 1134 | *      searches for pszAttrib in pszSearchIn; if found, | 
|---|
| 1135 | *      returns the first character after the "=" char. | 
|---|
| 1136 | *      If "=" is not found, a space, \r, and \n are | 
|---|
| 1137 | *      also accepted. This function searches without | 
|---|
| 1138 | *      respecting case. | 
|---|
| 1139 | * | 
|---|
| 1140 | *      <B>Example:</B> | 
|---|
| 1141 | +          strhFindAttribValue("<PAGE BLAH=\"data\">", "BLAH") | 
|---|
| 1142 | + | 
|---|
| 1143 | +          returns ....................... ^ this address. | 
|---|
| 1144 | * | 
|---|
| 1145 | *@@added V0.9.0 [umoeller] | 
|---|
| 1146 | *@@changed V0.9.3 (2000-05-19) [umoeller]: some speed optimizations | 
|---|
| 1147 | *@@changed V0.9.12 (2001-05-22) [umoeller]: fixed space bug, thanks Yuri Dario | 
|---|
| 1148 | *@@changed WarpIN V1.0.11 (2006-08-29) [pr]: handle attrib names in quoted strings @@fixes 718 | 
|---|
| 1149 | *@@changed WarpIN V1.0.12 (2006-09-07) [pr]: fix attrib handling again @@fixes 718 @@fixes 836 | 
|---|
| 1150 | */ | 
|---|
| 1151 |  | 
|---|
| 1152 | PSZ strhFindAttribValue(const char *pszSearchIn, const char *pszAttrib) | 
|---|
| 1153 | { | 
|---|
| 1154 | PSZ    prc = 0; | 
|---|
| 1155 | PSZ    pszSearchIn2, p, pszStart, pszName, pszValue; | 
|---|
| 1156 | ULONG  cbAttrib = strlen(pszAttrib), | 
|---|
| 1157 | ulLength = strlen(pszSearchIn); | 
|---|
| 1158 | BOOL   fInQuote = FALSE; | 
|---|
| 1159 |  | 
|---|
| 1160 | // use alloca(), so memory is freed on function exit | 
|---|
| 1161 | pszSearchIn2 = (PSZ)alloca(ulLength + 1); | 
|---|
| 1162 | memcpy(pszSearchIn2, pszSearchIn, ulLength + 1); | 
|---|
| 1163 |  | 
|---|
| 1164 | // V1.0.12 (2006-09-07) [pr]: filter leading " and ' left over from the previous pass | 
|---|
| 1165 | for (p = pszSearchIn2;   *p == '\'' || *p == '"'  || *p == ' ' | 
|---|
| 1166 | || *p == '\n' || *p == '\r' || *p == '\t'; p++); | 
|---|
| 1167 | for (pszStart = p; *p; p++) | 
|---|
| 1168 | { | 
|---|
| 1169 | if (fInQuote) | 
|---|
| 1170 | { | 
|---|
| 1171 | // V1.0.12 (2006-09-07) [pr]: allow end of line to terminate a (broken) quote | 
|---|
| 1172 | if (*p == '"' || *p == '\n' || *p == '\r') | 
|---|
| 1173 | fInQuote = FALSE; | 
|---|
| 1174 | } | 
|---|
| 1175 | else | 
|---|
| 1176 | { | 
|---|
| 1177 | if (*p == '"') | 
|---|
| 1178 | fInQuote = TRUE; | 
|---|
| 1179 | else | 
|---|
| 1180 | { | 
|---|
| 1181 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') | 
|---|
| 1182 | { | 
|---|
| 1183 | *p = '\0'; | 
|---|
| 1184 | pszName = strtok(pszStart, "=>"); | 
|---|
| 1185 | pszStart = p + 1; | 
|---|
| 1186 | if (pszName && !stricmp(pszName, pszAttrib)) | 
|---|
| 1187 | { | 
|---|
| 1188 | pszValue = strtok(NULL, ""); | 
|---|
| 1189 | if (pszValue) | 
|---|
| 1190 | prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2); | 
|---|
| 1191 | else | 
|---|
| 1192 | prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib; | 
|---|
| 1193 |  | 
|---|
| 1194 | return(prc); | 
|---|
| 1195 | } | 
|---|
| 1196 | } | 
|---|
| 1197 | } | 
|---|
| 1198 | } | 
|---|
| 1199 | } | 
|---|
| 1200 |  | 
|---|
| 1201 | if (pszStart != p) | 
|---|
| 1202 | { | 
|---|
| 1203 | pszName = strtok(pszStart, "=>"); | 
|---|
| 1204 | if (pszName && !stricmp(pszName, pszAttrib)) | 
|---|
| 1205 | { | 
|---|
| 1206 | pszValue = strtok(NULL, ""); | 
|---|
| 1207 | if (pszValue) | 
|---|
| 1208 | prc = (PSZ)pszSearchIn + (pszValue - pszSearchIn2); | 
|---|
| 1209 | else | 
|---|
| 1210 | prc = (PSZ)pszSearchIn + (pszName - pszSearchIn2) + cbAttrib; | 
|---|
| 1211 | } | 
|---|
| 1212 | } | 
|---|
| 1213 |  | 
|---|
| 1214 | return prc; | 
|---|
| 1215 | } | 
|---|
| 1216 |  | 
|---|
| 1217 | /* | 
|---|
| 1218 | * strhGetNumAttribValue: | 
|---|
| 1219 | *      stores the numerical parameter value of an HTML-style | 
|---|
| 1220 | *      tag in *pl. | 
|---|
| 1221 | * | 
|---|
| 1222 | *      Returns the address of the tag parameter in the | 
|---|
| 1223 | *      search buffer, if found, or NULL. | 
|---|
| 1224 | * | 
|---|
| 1225 | *      <B>Example:</B> | 
|---|
| 1226 | +          strhGetNumAttribValue("<PAGE BLAH=123>, "BLAH", &l); | 
|---|
| 1227 | * | 
|---|
| 1228 | *      stores 123 in the "l" variable. | 
|---|
| 1229 | * | 
|---|
| 1230 | *@@added V0.9.0 [umoeller] | 
|---|
| 1231 | *@@changed V0.9.9 (2001-04-04) [umoeller]: this failed on "123" strings in quotes, fixed | 
|---|
| 1232 | */ | 
|---|
| 1233 |  | 
|---|
| 1234 | PSZ strhGetNumAttribValue(const char *pszSearchIn,       // in: where to search | 
|---|
| 1235 | const char *pszTag,            // e.g. "INDEX" | 
|---|
| 1236 | PLONG pl)              // out: numerical value | 
|---|
| 1237 | { | 
|---|
| 1238 | PSZ pParam; | 
|---|
| 1239 | if ((pParam = strhFindAttribValue(pszSearchIn, pszTag))) | 
|---|
| 1240 | { | 
|---|
| 1241 | if (    (*pParam == '\"') | 
|---|
| 1242 | || (*pParam == '\'') | 
|---|
| 1243 | ) | 
|---|
| 1244 | pParam++;           // V0.9.9 (2001-04-04) [umoeller] | 
|---|
| 1245 |  | 
|---|
| 1246 | sscanf(pParam, "%ld", pl); | 
|---|
| 1247 | } | 
|---|
| 1248 |  | 
|---|
| 1249 | return pParam; | 
|---|
| 1250 | } | 
|---|
| 1251 |  | 
|---|
| 1252 | /* | 
|---|
| 1253 | * strhGetTextAttr: | 
|---|
| 1254 | *      retrieves the attribute value of a textual HTML-style tag | 
|---|
| 1255 | *      in a newly allocated buffer, which is returned, | 
|---|
| 1256 | *      or NULL if attribute not found. | 
|---|
| 1257 | *      If an attribute value is to contain spaces, it | 
|---|
| 1258 | *      must be enclosed in quotes. | 
|---|
| 1259 | * | 
|---|
| 1260 | *      The offset of the attribute data in pszSearchIn is | 
|---|
| 1261 | *      returned in *pulOffset so that you can do multiple | 
|---|
| 1262 | *      searches. | 
|---|
| 1263 | * | 
|---|
| 1264 | *      This returns a new buffer, which should be free()'d after use. | 
|---|
| 1265 | * | 
|---|
| 1266 | *      <B>Example:</B> | 
|---|
| 1267 | +          ULONG   ulOfs = 0; | 
|---|
| 1268 | +          strhGetTextAttr("<PAGE BLAH="blublub">, "BLAH", &ulOfs) | 
|---|
| 1269 | +                           ............^ ulOfs | 
|---|
| 1270 | * | 
|---|
| 1271 | *      returns a new string with the value "blublub" (without | 
|---|
| 1272 | *      quotes) and sets ulOfs to 12. | 
|---|
| 1273 | * | 
|---|
| 1274 | *@@added V0.9.0 [umoeller] | 
|---|
| 1275 | *@@changed V1.0.13 (2006-09-10) [pr]: improved parsing | 
|---|
| 1276 | */ | 
|---|
| 1277 |  | 
|---|
| 1278 | PSZ strhGetTextAttr(const char *pszSearchIn, | 
|---|
| 1279 | const char *pszTag, | 
|---|
| 1280 | PULONG pulOffset)       // out: offset where found | 
|---|
| 1281 | { | 
|---|
| 1282 | PSZ     pParam, | 
|---|
| 1283 | pParam2, | 
|---|
| 1284 | prc = NULL; | 
|---|
| 1285 | ULONG   ulCount = 0; | 
|---|
| 1286 | LONG    lNestingLevel = 0; | 
|---|
| 1287 |  | 
|---|
| 1288 | if ((pParam = strhFindAttribValue(pszSearchIn, pszTag))) | 
|---|
| 1289 | { | 
|---|
| 1290 | // determine end character to search for: a space | 
|---|
| 1291 | CHAR cEnd = ' '; | 
|---|
| 1292 | // V1.0.3 (2004-11-10) [pr]: @@fixes 461 | 
|---|
| 1293 | // V1.0.13 (2006-09-10) [pr]: optimized | 
|---|
| 1294 | if ((*pParam == '\"') || (*pParam == '\'')) | 
|---|
| 1295 | { | 
|---|
| 1296 | // or, if the data is enclosed in quotes, a quote or single quote | 
|---|
| 1297 | cEnd = *pParam; | 
|---|
| 1298 | pParam++; | 
|---|
| 1299 | } | 
|---|
| 1300 |  | 
|---|
| 1301 | if (pulOffset) | 
|---|
| 1302 | // store the offset | 
|---|
| 1303 | (*pulOffset) = pParam - (PSZ)pszSearchIn; | 
|---|
| 1304 |  | 
|---|
| 1305 | // now find end of attribute | 
|---|
| 1306 | pParam2 = pParam; | 
|---|
| 1307 | while (*pParam) | 
|---|
| 1308 | { | 
|---|
| 1309 | // V1.0.13 (2006-09-10) [pr]: line end terminates non-quoted attribute | 
|---|
| 1310 | if (   (   (cEnd == ' ') | 
|---|
| 1311 | && ((*pParam == ' ') || (*pParam == '\r') || (*pParam == '\n'))) | 
|---|
| 1312 | || (*pParam == cEnd) | 
|---|
| 1313 | ) | 
|---|
| 1314 | // end character found | 
|---|
| 1315 | break; | 
|---|
| 1316 | else if (*pParam == '<') | 
|---|
| 1317 | // yet another opening tag found: | 
|---|
| 1318 | // this is probably some "<" in the attributes | 
|---|
| 1319 | lNestingLevel++; | 
|---|
| 1320 | else if (*pParam == '>') | 
|---|
| 1321 | { | 
|---|
| 1322 | lNestingLevel--; | 
|---|
| 1323 | if (lNestingLevel < 0) | 
|---|
| 1324 | // end of tag found: | 
|---|
| 1325 | break; | 
|---|
| 1326 | } | 
|---|
| 1327 | ulCount++; | 
|---|
| 1328 | pParam++; | 
|---|
| 1329 | } | 
|---|
| 1330 |  | 
|---|
| 1331 | // copy attribute to new buffer | 
|---|
| 1332 | if (ulCount) | 
|---|
| 1333 | { | 
|---|
| 1334 | prc = (PSZ)malloc(ulCount+1); | 
|---|
| 1335 | memcpy(prc, pParam2, ulCount); | 
|---|
| 1336 | *(prc+ulCount) = 0; | 
|---|
| 1337 | } | 
|---|
| 1338 | } | 
|---|
| 1339 | return prc; | 
|---|
| 1340 | } | 
|---|
| 1341 |  | 
|---|
| 1342 | /* | 
|---|
| 1343 | * strhFindEndOfTag: | 
|---|
| 1344 | *      returns a pointer to the ">" char | 
|---|
| 1345 | *      which seems to terminate the tag beginning | 
|---|
| 1346 | *      after pszBeginOfTag. | 
|---|
| 1347 | * | 
|---|
| 1348 | *      If additional "<" chars are found, we look | 
|---|
| 1349 | *      for additional ">" characters too. | 
|---|
| 1350 | * | 
|---|
| 1351 | *      Note: You must pass the address of the opening | 
|---|
| 1352 | *      '<' character to this function. | 
|---|
| 1353 | * | 
|---|
| 1354 | *      Example: | 
|---|
| 1355 | +          PSZ pszTest = "<BODY ATTR=\"<BODY>\">"; | 
|---|
| 1356 | +          strhFindEndOfTag(pszTest) | 
|---|
| 1357 | +      returns.................................^ this. | 
|---|
| 1358 | * | 
|---|
| 1359 | *@@added V0.9.0 [umoeller] | 
|---|
| 1360 | */ | 
|---|
| 1361 |  | 
|---|
| 1362 | PSZ strhFindEndOfTag(const char *pszBeginOfTag) | 
|---|
| 1363 | { | 
|---|
| 1364 | PSZ     p = (PSZ)pszBeginOfTag, | 
|---|
| 1365 | prc = NULL; | 
|---|
| 1366 | LONG    lNestingLevel = 0; | 
|---|
| 1367 |  | 
|---|
| 1368 | while (*p) | 
|---|
| 1369 | { | 
|---|
| 1370 | if (*p == '<') | 
|---|
| 1371 | // another opening tag found: | 
|---|
| 1372 | lNestingLevel++; | 
|---|
| 1373 | else if (*p == '>') | 
|---|
| 1374 | { | 
|---|
| 1375 | // closing tag found: | 
|---|
| 1376 | lNestingLevel--; | 
|---|
| 1377 | if (lNestingLevel < 1) | 
|---|
| 1378 | { | 
|---|
| 1379 | // corresponding: return this | 
|---|
| 1380 | prc = p; | 
|---|
| 1381 | break; | 
|---|
| 1382 | } | 
|---|
| 1383 | } | 
|---|
| 1384 | p++; | 
|---|
| 1385 | } | 
|---|
| 1386 |  | 
|---|
| 1387 | return prc; | 
|---|
| 1388 | } | 
|---|
| 1389 |  | 
|---|
| 1390 | /* | 
|---|
| 1391 | * strhGetBlock: | 
|---|
| 1392 | *      this complex function searches the given string | 
|---|
| 1393 | *      for a pair of opening/closing HTML-style tags. | 
|---|
| 1394 | * | 
|---|
| 1395 | *      If found, this routine returns TRUE and does | 
|---|
| 1396 | *      the following: | 
|---|
| 1397 | * | 
|---|
| 1398 | *          1)  allocate a new buffer, copy the text | 
|---|
| 1399 | *              enclosed by the opening/closing tags | 
|---|
| 1400 | *              into it and set *ppszBlock to that | 
|---|
| 1401 | *              buffer; | 
|---|
| 1402 | * | 
|---|
| 1403 | *          2)  if the opening tag has any attributes, | 
|---|
| 1404 | *              allocate another buffer, copy the | 
|---|
| 1405 | *              attributes into it and set *ppszAttrs | 
|---|
| 1406 | *              to that buffer; if no attributes are | 
|---|
| 1407 | *              found, *ppszAttrs will be NULL; | 
|---|
| 1408 | * | 
|---|
| 1409 | *          3)  set *pulOffset to the offset from the | 
|---|
| 1410 | *              beginning of *ppszSearchIn where the | 
|---|
| 1411 | *              opening tag was found; | 
|---|
| 1412 | * | 
|---|
| 1413 | *          4)  advance *ppszSearchIn to after the | 
|---|
| 1414 | *              closing tag, so that you can do | 
|---|
| 1415 | *              multiple searches without finding the | 
|---|
| 1416 | *              same tags twice. | 
|---|
| 1417 | * | 
|---|
| 1418 | *      All buffers should be freed using free(). | 
|---|
| 1419 | * | 
|---|
| 1420 | *      This returns the following: | 
|---|
| 1421 | *      --  0: no error | 
|---|
| 1422 | *      --  1: tag not found at all (doesn't have to be an error) | 
|---|
| 1423 | *      --  2: begin tag found, but no corresponding end tag found. This | 
|---|
| 1424 | *             is a real error. | 
|---|
| 1425 | *      --  3: begin tag is not terminated by ">" (e.g. "<BEGINTAG whatever") | 
|---|
| 1426 | * | 
|---|
| 1427 | *      <B>Example:</B> | 
|---|
| 1428 | +          PSZ pSearch = "<PAGE INDEX=1>This is page 1.</PAGE>More text." | 
|---|
| 1429 | +          PSZ pszBlock, pszAttrs; | 
|---|
| 1430 | +          ULONG ulOfs; | 
|---|
| 1431 | +          strhGetBlock(&pSearch, "PAGE", &pszBlock, &pszAttrs, &ulOfs) | 
|---|
| 1432 | * | 
|---|
| 1433 | *      would do the following: | 
|---|
| 1434 | * | 
|---|
| 1435 | *      1)  set pszBlock to a new string containing "This is page 1." | 
|---|
| 1436 | *          without quotes; | 
|---|
| 1437 | * | 
|---|
| 1438 | *      2)  set pszAttrs to a new string containing "<PAGE INDEX=1>"; | 
|---|
| 1439 | * | 
|---|
| 1440 | *      3)  set ulOfs to 0, because "<PAGE" was found at the beginning; | 
|---|
| 1441 | * | 
|---|
| 1442 | *      4)  pSearch would be advanced to point to the "More text" | 
|---|
| 1443 | *          string in the original buffer. | 
|---|
| 1444 | * | 
|---|
| 1445 | *      Hey-hey. A one-shot function, fairly complicated, but indispensable | 
|---|
| 1446 | *      for HTML parsing. | 
|---|
| 1447 | * | 
|---|
| 1448 | *@@added V0.9.0 [umoeller] | 
|---|
| 1449 | *@@changed V0.9.1 (2000-01-03) [umoeller]: fixed heap overwrites (thanks to string debugging) | 
|---|
| 1450 | *@@changed V0.9.1 (2000-01-06) [umoeller]: changed prototype | 
|---|
| 1451 | *@@changed V0.9.3 (2000-05-06) [umoeller]: NULL string check was missing | 
|---|
| 1452 | */ | 
|---|
| 1453 |  | 
|---|
| 1454 | ULONG strhGetBlock(const char *pszSearchIn, // in: buffer to search | 
|---|
| 1455 | PULONG pulSearchOffset, // in/out: offset where to start search (0 for beginning) | 
|---|
| 1456 | const char *pszTag, | 
|---|
| 1457 | PSZ *ppszBlock,      // out: block enclosed by the tags | 
|---|
| 1458 | PSZ *ppszAttribs,    // out: attributes of the opening tag | 
|---|
| 1459 | PULONG pulOfsBeginTag, // out: offset from pszSearchIn where opening tag was found | 
|---|
| 1460 | PULONG pulOfsBeginBlock) // out: offset from pszSearchIn where beginning of block was found | 
|---|
| 1461 | { | 
|---|
| 1462 | ULONG   ulrc = 1; | 
|---|
| 1463 | PSZ     pszBeginTag = (PSZ)pszSearchIn + *pulSearchOffset, | 
|---|
| 1464 | pszSearch2 = pszBeginTag, | 
|---|
| 1465 | pszClosingTag; | 
|---|
| 1466 | ULONG   cbTag = strlen(pszTag); | 
|---|
| 1467 |  | 
|---|
| 1468 | // go thru the block and check all tags if it's the | 
|---|
| 1469 | // begin tag we're looking for | 
|---|
| 1470 | while ((pszBeginTag = strchr(pszBeginTag, '<'))) | 
|---|
| 1471 | { | 
|---|
| 1472 | if (memicmp(pszBeginTag+1, (void*)pszTag, strlen(pszTag)) == 0) | 
|---|
| 1473 | // yes: stop | 
|---|
| 1474 | break; | 
|---|
| 1475 | else | 
|---|
| 1476 | pszBeginTag++; | 
|---|
| 1477 | } | 
|---|
| 1478 |  | 
|---|
| 1479 | if (pszBeginTag) | 
|---|
| 1480 | { | 
|---|
| 1481 | // we found <TAG>: | 
|---|
| 1482 | ULONG   ulNestingLevel = 0; | 
|---|
| 1483 |  | 
|---|
| 1484 | PSZ     pszEndOfBeginTag = strhFindEndOfTag(pszBeginTag); | 
|---|
| 1485 | // strchr(pszBeginTag, '>'); | 
|---|
| 1486 | if (pszEndOfBeginTag) | 
|---|
| 1487 | { | 
|---|
| 1488 | // does the caller want the attributes? | 
|---|
| 1489 | if (ppszAttribs) | 
|---|
| 1490 | { | 
|---|
| 1491 | // yes: then copy them | 
|---|
| 1492 | ULONG   ulAttrLen = pszEndOfBeginTag - pszBeginTag; | 
|---|
| 1493 | PSZ     pszAttrs = (PSZ)malloc(ulAttrLen + 1); | 
|---|
| 1494 | strncpy(pszAttrs, pszBeginTag, ulAttrLen); | 
|---|
| 1495 | // add terminating 0 | 
|---|
| 1496 | *(pszAttrs + ulAttrLen) = 0; | 
|---|
| 1497 |  | 
|---|
| 1498 | *ppszAttribs = pszAttrs; | 
|---|
| 1499 | } | 
|---|
| 1500 |  | 
|---|
| 1501 | // output offset of where we found the begin tag | 
|---|
| 1502 | if (pulOfsBeginTag) | 
|---|
| 1503 | *pulOfsBeginTag = pszBeginTag - (PSZ)pszSearchIn; | 
|---|
| 1504 |  | 
|---|
| 1505 | // now find corresponding closing tag (e.g. "</BODY>" | 
|---|
| 1506 | pszBeginTag = pszEndOfBeginTag+1; | 
|---|
| 1507 | // now we're behind the '>' char of the opening tag | 
|---|
| 1508 | // increase offset of that too | 
|---|
| 1509 | if (pulOfsBeginBlock) | 
|---|
| 1510 | *pulOfsBeginBlock = pszBeginTag - (PSZ)pszSearchIn; | 
|---|
| 1511 |  | 
|---|
| 1512 | // find next closing tag; | 
|---|
| 1513 | // for the first run, pszSearch2 points to right | 
|---|
| 1514 | // after the '>' char of the opening tag | 
|---|
| 1515 | pszSearch2 = pszBeginTag; | 
|---|
| 1516 | while (     (pszSearch2)        // fixed V0.9.3 (2000-05-06) [umoeller] | 
|---|
| 1517 | &&  (pszClosingTag = strstr(pszSearch2, "<")) | 
|---|
| 1518 | ) | 
|---|
| 1519 | { | 
|---|
| 1520 | // if we have another opening tag before our closing | 
|---|
| 1521 | // tag, we need to have several closing tags before | 
|---|
| 1522 | // we're done | 
|---|
| 1523 | if (memicmp(pszClosingTag+1, (void*)pszTag, cbTag) == 0) | 
|---|
| 1524 | ulNestingLevel++; | 
|---|
| 1525 | else | 
|---|
| 1526 | { | 
|---|
| 1527 | // is this ours? | 
|---|
| 1528 | if (    (*(pszClosingTag+1) == '/') | 
|---|
| 1529 | && (memicmp(pszClosingTag+2, (void*)pszTag, cbTag) == 0) | 
|---|
| 1530 | ) | 
|---|
| 1531 | { | 
|---|
| 1532 | // we've found a matching closing tag; is | 
|---|
| 1533 | // it ours? | 
|---|
| 1534 | if (ulNestingLevel == 0) | 
|---|
| 1535 | { | 
|---|
| 1536 | // our closing tag found: | 
|---|
| 1537 | // allocate mem for a new buffer | 
|---|
| 1538 | // and extract all the text between | 
|---|
| 1539 | // open and closing tags to it | 
|---|
| 1540 | ULONG ulLen = pszClosingTag - pszBeginTag; | 
|---|
| 1541 | if (ppszBlock) | 
|---|
| 1542 | { | 
|---|
| 1543 | PSZ pNew = (PSZ)malloc(ulLen + 1); | 
|---|
| 1544 | strhncpy0(pNew, pszBeginTag, ulLen); | 
|---|
| 1545 | *ppszBlock = pNew; | 
|---|
| 1546 | } | 
|---|
| 1547 |  | 
|---|
| 1548 | // raise search offset to after the closing tag | 
|---|
| 1549 | *pulSearchOffset = (pszClosingTag + cbTag + 1) - (PSZ)pszSearchIn; | 
|---|
| 1550 |  | 
|---|
| 1551 | ulrc = 0; | 
|---|
| 1552 |  | 
|---|
| 1553 | break; | 
|---|
| 1554 | } else | 
|---|
| 1555 | // not our closing tag: | 
|---|
| 1556 | ulNestingLevel--; | 
|---|
| 1557 | } | 
|---|
| 1558 | } | 
|---|
| 1559 | // no matching closing tag: search on after that | 
|---|
| 1560 | pszSearch2 = strhFindEndOfTag(pszClosingTag); | 
|---|
| 1561 | } // end while (pszClosingTag = strstr(pszSearch2, "<")) | 
|---|
| 1562 |  | 
|---|
| 1563 | if (!pszClosingTag) | 
|---|
| 1564 | // no matching closing tag found: | 
|---|
| 1565 | // return 2 (closing tag not found) | 
|---|
| 1566 | ulrc = 2; | 
|---|
| 1567 | } // end if (pszBeginTag) | 
|---|
| 1568 | else | 
|---|
| 1569 | // no matching ">" for opening tag found: | 
|---|
| 1570 | ulrc = 3; | 
|---|
| 1571 | } | 
|---|
| 1572 |  | 
|---|
| 1573 | return ulrc; | 
|---|
| 1574 | } | 
|---|
| 1575 |  | 
|---|
| 1576 | /* ****************************************************************** | 
|---|
| 1577 | * | 
|---|
| 1578 | *   Miscellaneous | 
|---|
| 1579 | * | 
|---|
| 1580 | ********************************************************************/ | 
|---|
| 1581 |  | 
|---|
| 1582 | /* | 
|---|
| 1583 | *@@ strhArrayAppend: | 
|---|
| 1584 | *      this appends a string to a "string array". | 
|---|
| 1585 | * | 
|---|
| 1586 | *      A string array is considered a sequence of | 
|---|
| 1587 | *      zero-terminated strings in memory. That is, | 
|---|
| 1588 | *      after each string's null-byte, the next | 
|---|
| 1589 | *      string comes up. | 
|---|
| 1590 | * | 
|---|
| 1591 | *      This is useful for composing a single block | 
|---|
| 1592 | *      of memory from, say, list box entries, which | 
|---|
| 1593 | *      can then be written to OS2.INI in one flush. | 
|---|
| 1594 | * | 
|---|
| 1595 | *      To append strings to such an array, call this | 
|---|
| 1596 | *      function for each string you wish to append. | 
|---|
| 1597 | *      This will re-allocate *ppszRoot with each call, | 
|---|
| 1598 | *      and update *pcbRoot, which then contains the | 
|---|
| 1599 | *      total size of all strings (including all null | 
|---|
| 1600 | *      terminators). | 
|---|
| 1601 | * | 
|---|
| 1602 | *      Pass *pcbRoot to PrfSaveProfileData to have the | 
|---|
| 1603 | *      block saved. | 
|---|
| 1604 | * | 
|---|
| 1605 | *      Note: On the first call, *ppszRoot and *pcbRoot | 
|---|
| 1606 | *      _must_ be both NULL, or this crashes. | 
|---|
| 1607 | * | 
|---|
| 1608 | *@@changed V0.9.13 (2001-06-21) [umoeller]: added cbNew | 
|---|
| 1609 | */ | 
|---|
| 1610 |  | 
|---|
| 1611 | VOID strhArrayAppend(PSZ *ppszRoot,         // in: root of array | 
|---|
| 1612 | const char *pcszNew,   // in: string to append | 
|---|
| 1613 | ULONG cbNew,           // in: size of that string or 0 to run strlen() here | 
|---|
| 1614 | PULONG pcbRoot)        // in/out: size of array | 
|---|
| 1615 | { | 
|---|
| 1616 | PSZ pszTemp; | 
|---|
| 1617 |  | 
|---|
| 1618 | if (!cbNew)     // V0.9.13 (2001-06-21) [umoeller] | 
|---|
| 1619 | cbNew = strlen(pcszNew); | 
|---|
| 1620 |  | 
|---|
| 1621 | pszTemp = (PSZ)malloc(*pcbRoot | 
|---|
| 1622 | + cbNew | 
|---|
| 1623 | + 1);    // two null bytes | 
|---|
| 1624 | if (*ppszRoot) | 
|---|
| 1625 | { | 
|---|
| 1626 | // not first loop: copy old stuff | 
|---|
| 1627 | memcpy(pszTemp, | 
|---|
| 1628 | *ppszRoot, | 
|---|
| 1629 | *pcbRoot); | 
|---|
| 1630 | free(*ppszRoot); | 
|---|
| 1631 | } | 
|---|
| 1632 | // append new string | 
|---|
| 1633 | strcpy(pszTemp + *pcbRoot, | 
|---|
| 1634 | pcszNew); | 
|---|
| 1635 | // update root | 
|---|
| 1636 | *ppszRoot = pszTemp; | 
|---|
| 1637 | // update length | 
|---|
| 1638 | *pcbRoot += cbNew + 1; | 
|---|
| 1639 | } | 
|---|
| 1640 |  | 
|---|
| 1641 | /* | 
|---|
| 1642 | *@@ strhCreateDump: | 
|---|
| 1643 | *      this dumps a memory block into a string | 
|---|
| 1644 | *      and returns that string in a new buffer. | 
|---|
| 1645 | * | 
|---|
| 1646 | *      You must free() the returned PSZ after use. | 
|---|
| 1647 | * | 
|---|
| 1648 | *      The output looks like the following: | 
|---|
| 1649 | * | 
|---|
| 1650 | +          0000:  FE FF 0E 02 90 00 00 00   ........ | 
|---|
| 1651 | +          0008:  FD 01 00 00 57 50 46 6F   ....WPFo | 
|---|
| 1652 | +          0010:  6C 64 65 72 00 78 01 34   lder.x.4 | 
|---|
| 1653 | * | 
|---|
| 1654 | *      Each line is terminated with a newline (\n) | 
|---|
| 1655 | *      character only. | 
|---|
| 1656 | * | 
|---|
| 1657 | *@@added V0.9.1 (2000-01-22) [umoeller] | 
|---|
| 1658 | */ | 
|---|
| 1659 |  | 
|---|
| 1660 | PSZ strhCreateDump(PBYTE pb,            // in: start address of buffer | 
|---|
| 1661 | ULONG ulSize,        // in: size of buffer | 
|---|
| 1662 | ULONG ulIndent)      // in: indentation of every line | 
|---|
| 1663 | { | 
|---|
| 1664 | PSZ     pszReturn = 0; | 
|---|
| 1665 | XSTRING strReturn; | 
|---|
| 1666 | CHAR    szTemp[1000]; | 
|---|
| 1667 |  | 
|---|
| 1668 | PBYTE   pbCurrent = pb;                 // current byte | 
|---|
| 1669 | ULONG   ulCount = 0, | 
|---|
| 1670 | ulCharsInLine = 0;              // if this grows > 7, a new line is started | 
|---|
| 1671 | CHAR    szLine[400] = "", | 
|---|
| 1672 | szAscii[30] = "         ";      // ASCII representation; filled for every line | 
|---|
| 1673 | PSZ     pszLine = szLine, | 
|---|
| 1674 | pszAscii = szAscii; | 
|---|
| 1675 |  | 
|---|
| 1676 | xstrInit(&strReturn, (ulSize * 30) + ulIndent); | 
|---|
| 1677 |  | 
|---|
| 1678 | for (pbCurrent = pb; | 
|---|
| 1679 | ulCount < ulSize; | 
|---|
| 1680 | pbCurrent++, ulCount++) | 
|---|
| 1681 | { | 
|---|
| 1682 | if (ulCharsInLine == 0) | 
|---|
| 1683 | { | 
|---|
| 1684 | memset(szLine, ' ', ulIndent); | 
|---|
| 1685 | pszLine += ulIndent; | 
|---|
| 1686 | } | 
|---|
| 1687 | pszLine += sprintf(pszLine, "%02lX ", (ULONG)*pbCurrent); | 
|---|
| 1688 |  | 
|---|
| 1689 | if ( (*pbCurrent > 31) && (*pbCurrent < 127) ) | 
|---|
| 1690 | // printable character: | 
|---|
| 1691 | *pszAscii = *pbCurrent; | 
|---|
| 1692 | else | 
|---|
| 1693 | *pszAscii = '.'; | 
|---|
| 1694 | pszAscii++; | 
|---|
| 1695 |  | 
|---|
| 1696 | ulCharsInLine++; | 
|---|
| 1697 | if (    (ulCharsInLine > 7)         // 8 bytes added? | 
|---|
| 1698 | || (ulCount == ulSize-1)       // end of buffer reached? | 
|---|
| 1699 | ) | 
|---|
| 1700 | { | 
|---|
| 1701 | // if we haven't had eight bytes yet, | 
|---|
| 1702 | // fill buffer up to eight bytes with spaces | 
|---|
| 1703 | ULONG   ul2; | 
|---|
| 1704 | for (ul2 = ulCharsInLine; | 
|---|
| 1705 | ul2 < 8; | 
|---|
| 1706 | ul2++) | 
|---|
| 1707 | pszLine += sprintf(pszLine, "   "); | 
|---|
| 1708 |  | 
|---|
| 1709 | sprintf(szTemp, "%04lX:  %s  %s\n", | 
|---|
| 1710 | (ulCount & 0xFFFFFFF8),  // offset in hex | 
|---|
| 1711 | szLine,         // bytes string | 
|---|
| 1712 | szAscii);       // ASCII string | 
|---|
| 1713 | xstrcat(&strReturn, szTemp, 0); | 
|---|
| 1714 |  | 
|---|
| 1715 | // restart line buffer | 
|---|
| 1716 | pszLine = szLine; | 
|---|
| 1717 |  | 
|---|
| 1718 | // clear ASCII buffer | 
|---|
| 1719 | strcpy(szAscii, "         "); | 
|---|
| 1720 | pszAscii = szAscii; | 
|---|
| 1721 |  | 
|---|
| 1722 | // reset line counter | 
|---|
| 1723 | ulCharsInLine = 0; | 
|---|
| 1724 | } | 
|---|
| 1725 | } | 
|---|
| 1726 |  | 
|---|
| 1727 | if (strReturn.cbAllocated) | 
|---|
| 1728 | pszReturn = strReturn.psz; | 
|---|
| 1729 |  | 
|---|
| 1730 | return pszReturn; | 
|---|
| 1731 | } | 
|---|
| 1732 |  | 
|---|
| 1733 | /* ****************************************************************** | 
|---|
| 1734 | * | 
|---|
| 1735 | *   Fast string searches | 
|---|
| 1736 | * | 
|---|
| 1737 | ********************************************************************/ | 
|---|
| 1738 |  | 
|---|
| 1739 | #define ASSERT(a) | 
|---|
| 1740 |  | 
|---|
| 1741 | /* | 
|---|
| 1742 | *      The following code has been taken from the "Standard | 
|---|
| 1743 | *      Function Library", file sflfind.c, and only slightly | 
|---|
| 1744 | *      modified to conform to the rest of this file. | 
|---|
| 1745 | * | 
|---|
| 1746 | *      Written:    96/04/24  iMatix SFL project team <sfl@imatix.com> | 
|---|
| 1747 | *      Revised:    98/05/04 | 
|---|
| 1748 | * | 
|---|
| 1749 | *      Copyright:  Copyright (c) 1991-99 iMatix Corporation. | 
|---|
| 1750 | * | 
|---|
| 1751 | *      The SFL Licence allows incorporating SFL code into other | 
|---|
| 1752 | *      programs, as long as the copyright is reprinted and the | 
|---|
| 1753 | *      code is marked as modified, so this is what we do. | 
|---|
| 1754 | */ | 
|---|
| 1755 |  | 
|---|
| 1756 | /* | 
|---|
| 1757 | *@@ strhmemfind: | 
|---|
| 1758 | *      searches for a pattern in a block of memory using the | 
|---|
| 1759 | *      Boyer-Moore-Horspool-Sunday algorithm. | 
|---|
| 1760 | * | 
|---|
| 1761 | *      The block and pattern may contain any values; you must | 
|---|
| 1762 | *      explicitly provide their lengths. If you search for strings, | 
|---|
| 1763 | *      use strlen() on the buffers. | 
|---|
| 1764 | * | 
|---|
| 1765 | *      Returns a pointer to the pattern if found within the block, | 
|---|
| 1766 | *      or NULL if the pattern was not found. | 
|---|
| 1767 | * | 
|---|
| 1768 | *      This algorithm needs a "shift table" to cache data for the | 
|---|
| 1769 | *      search pattern. This table can be reused when performing | 
|---|
| 1770 | *      several searches with the same pattern. | 
|---|
| 1771 | * | 
|---|
| 1772 | *      "shift" must point to an array big enough to hold 256 (8**2) | 
|---|
| 1773 | *      "size_t" values. | 
|---|
| 1774 | * | 
|---|
| 1775 | *      If (*repeat_find == FALSE), the shift table is initialized. | 
|---|
| 1776 | *      So on the first search with a given pattern, *repeat_find | 
|---|
| 1777 | *      should be FALSE. This function sets it to TRUE after the | 
|---|
| 1778 | *      shift table is initialised, allowing the initialisation | 
|---|
| 1779 | *      phase to be skipped on subsequent searches. | 
|---|
| 1780 | * | 
|---|
| 1781 | *      This function is most effective when repeated searches are | 
|---|
| 1782 | *      made for the same pattern in one or more large buffers. | 
|---|
| 1783 | * | 
|---|
| 1784 | *      Example: | 
|---|
| 1785 | * | 
|---|
| 1786 | +          PSZ     pszHaystack = "This is a sample string.", | 
|---|
| 1787 | +                  pszNeedle = "string"; | 
|---|
| 1788 | +          size_t  shift[256]; | 
|---|
| 1789 | +          BOOL    fRepeat = FALSE; | 
|---|
| 1790 | + | 
|---|
| 1791 | +          PSZ     pFound = strhmemfind(pszHaystack, | 
|---|
| 1792 | +                                       strlen(pszHaystack),   // block size | 
|---|
| 1793 | +                                       pszNeedle, | 
|---|
| 1794 | +                                       strlen(pszNeedle),     // pattern size | 
|---|
| 1795 | +                                       shift, | 
|---|
| 1796 | +                                       &fRepeat); | 
|---|
| 1797 | * | 
|---|
| 1798 | *      Taken from the "Standard Function Library", file sflfind.c. | 
|---|
| 1799 | *      Copyright:  Copyright (c) 1991-99 iMatix Corporation. | 
|---|
| 1800 | *      Slightly modified by umoeller. | 
|---|
| 1801 | * | 
|---|
| 1802 | *@@added V0.9.3 (2000-05-08) [umoeller] | 
|---|
| 1803 | */ | 
|---|
| 1804 |  | 
|---|
| 1805 | void* strhmemfind(const void *in_block,     // in: block containing data | 
|---|
| 1806 | size_t block_size,        // in: size of block in bytes | 
|---|
| 1807 | const void *in_pattern,   // in: pattern to search for | 
|---|
| 1808 | size_t pattern_size,      // in: size of pattern block | 
|---|
| 1809 | size_t *shift,            // in/out: shift table (search buffer) | 
|---|
| 1810 | BOOL *repeat_find)        // in/out: if TRUE, *shift is already initialized | 
|---|
| 1811 | { | 
|---|
| 1812 | size_t      byte_nbr,                       //  Distance through block | 
|---|
| 1813 | match_size;                     //  Size of matched part | 
|---|
| 1814 | const unsigned char | 
|---|
| 1815 | *match_base = NULL,             //  Base of match of pattern | 
|---|
| 1816 | *match_ptr  = NULL,             //  Point within current match | 
|---|
| 1817 | *limit      = NULL;             //  Last potiental match point | 
|---|
| 1818 | const unsigned char | 
|---|
| 1819 | *block   = (unsigned char *) in_block,   //  Concrete pointer to block data | 
|---|
| 1820 | *pattern = (unsigned char *) in_pattern; //  Concrete pointer to search value | 
|---|
| 1821 |  | 
|---|
| 1822 | if (    (block == NULL) | 
|---|
| 1823 | || (pattern == NULL) | 
|---|
| 1824 | || (shift == NULL) | 
|---|
| 1825 | ) | 
|---|
| 1826 | return NULL; | 
|---|
| 1827 |  | 
|---|
| 1828 | //  Pattern must be smaller or equal in size to string | 
|---|
| 1829 | if (block_size < pattern_size) | 
|---|
| 1830 | return NULL;                  //  Otherwise it's not found | 
|---|
| 1831 |  | 
|---|
| 1832 | if (pattern_size == 0)              //  Empty patterns match at start | 
|---|
| 1833 | return ((void *)block); | 
|---|
| 1834 |  | 
|---|
| 1835 | //  Build the shift table unless we're continuing a previous search | 
|---|
| 1836 |  | 
|---|
| 1837 | //  The shift table determines how far to shift before trying to match | 
|---|
| 1838 | //  again, if a match at this point fails.  If the byte after where the | 
|---|
| 1839 | //  end of our pattern falls is not in our pattern, then we start to | 
|---|
| 1840 | //  match again after that byte; otherwise we line up the last occurence | 
|---|
| 1841 | //  of that byte in our pattern under that byte, and try match again. | 
|---|
| 1842 |  | 
|---|
| 1843 | if (!repeat_find || !*repeat_find) | 
|---|
| 1844 | { | 
|---|
| 1845 | for (byte_nbr = 0; | 
|---|
| 1846 | byte_nbr < 256; | 
|---|
| 1847 | byte_nbr++) | 
|---|
| 1848 | shift[byte_nbr] = pattern_size + 1; | 
|---|
| 1849 | for (byte_nbr = 0; | 
|---|
| 1850 | byte_nbr < pattern_size; | 
|---|
| 1851 | byte_nbr++) | 
|---|
| 1852 | shift[(unsigned char)pattern[byte_nbr]] = pattern_size - byte_nbr; | 
|---|
| 1853 |  | 
|---|
| 1854 | if (repeat_find) | 
|---|
| 1855 | *repeat_find = TRUE; | 
|---|
| 1856 | } | 
|---|
| 1857 |  | 
|---|
| 1858 | //  Search for the block, each time jumping up by the amount | 
|---|
| 1859 | //  computed in the shift table | 
|---|
| 1860 |  | 
|---|
| 1861 | limit = block + (block_size - pattern_size + 1); | 
|---|
| 1862 | ASSERT (limit > block); | 
|---|
| 1863 |  | 
|---|
| 1864 | for (match_base = block; | 
|---|
| 1865 | match_base < limit; | 
|---|
| 1866 | match_base += shift[*(match_base + pattern_size)]) | 
|---|
| 1867 | { | 
|---|
| 1868 | match_ptr  = match_base; | 
|---|
| 1869 | match_size = 0; | 
|---|
| 1870 |  | 
|---|
| 1871 | //  Compare pattern until it all matches, or we find a difference | 
|---|
| 1872 | while (*match_ptr++ == pattern[match_size++]) | 
|---|
| 1873 | { | 
|---|
| 1874 | ASSERT (match_size <= pattern_size && | 
|---|
| 1875 | match_ptr == (match_base + match_size)); | 
|---|
| 1876 |  | 
|---|
| 1877 | // If we found a match, return the start address | 
|---|
| 1878 | if (match_size >= pattern_size) | 
|---|
| 1879 | return ((void*)(match_base)); | 
|---|
| 1880 |  | 
|---|
| 1881 | } | 
|---|
| 1882 | } | 
|---|
| 1883 | return NULL;                      //  Found nothing | 
|---|
| 1884 | } | 
|---|
| 1885 |  | 
|---|
| 1886 | /* | 
|---|
| 1887 | *@@ strhtxtfind: | 
|---|
| 1888 | *      searches for a case-insensitive text pattern in a string | 
|---|
| 1889 | *      using the Boyer-Moore-Horspool-Sunday algorithm.  The string and | 
|---|
| 1890 | *      pattern are null-terminated strings.  Returns a pointer to the pattern | 
|---|
| 1891 | *      if found within the string, or NULL if the pattern was not found. | 
|---|
| 1892 | *      Will match strings irrespective of case.  To match exact strings, use | 
|---|
| 1893 | *      strhfind().  Will not work on multibyte characters. | 
|---|
| 1894 | * | 
|---|
| 1895 | *      Examples: | 
|---|
| 1896 | +      char *result; | 
|---|
| 1897 | + | 
|---|
| 1898 | +      result = strhtxtfind ("AbracaDabra", "cad"); | 
|---|
| 1899 | +      if (result) | 
|---|
| 1900 | +          puts (result); | 
|---|
| 1901 | + | 
|---|
| 1902 | *      Taken from the "Standard Function Library", file sflfind.c. | 
|---|
| 1903 | *      Copyright:  Copyright (c) 1991-99 iMatix Corporation. | 
|---|
| 1904 | *      Slightly modified. | 
|---|
| 1905 | * | 
|---|
| 1906 | *@@added V0.9.3 (2000-05-08) [umoeller] | 
|---|
| 1907 | */ | 
|---|
| 1908 |  | 
|---|
| 1909 | char* strhtxtfind (const char *string,            //  String containing data | 
|---|
| 1910 | const char *pattern)           //  Pattern to search for | 
|---|
| 1911 | { | 
|---|
| 1912 | size_t | 
|---|
| 1913 | shift [256];                    //  Shift distance for each value | 
|---|
| 1914 | size_t | 
|---|
| 1915 | string_size, | 
|---|
| 1916 | pattern_size, | 
|---|
| 1917 | byte_nbr,                       //  Index into byte array | 
|---|
| 1918 | match_size;                     //  Size of matched part | 
|---|
| 1919 | const char | 
|---|
| 1920 | *match_base = NULL,             //  Base of match of pattern | 
|---|
| 1921 | *match_ptr  = NULL,             //  Point within current match | 
|---|
| 1922 | *limit      = NULL;             //  Last potiental match point | 
|---|
| 1923 |  | 
|---|
| 1924 | ASSERT (string);                    //  Expect non-NULL pointers, but | 
|---|
| 1925 | ASSERT (pattern);                   //  fail gracefully if not debugging | 
|---|
| 1926 | if (string == NULL || pattern == NULL) | 
|---|
| 1927 | return NULL; | 
|---|
| 1928 |  | 
|---|
| 1929 | string_size  = strlen (string); | 
|---|
| 1930 | pattern_size = strlen (pattern); | 
|---|
| 1931 |  | 
|---|
| 1932 | //  Pattern must be smaller or equal in size to string | 
|---|
| 1933 | if (string_size < pattern_size) | 
|---|
| 1934 | return NULL;                  //  Otherwise it cannot be found | 
|---|
| 1935 |  | 
|---|
| 1936 | if (pattern_size == 0)              //  Empty string matches at start | 
|---|
| 1937 | return (char *) string; | 
|---|
| 1938 |  | 
|---|
| 1939 | //  Build the shift table | 
|---|
| 1940 |  | 
|---|
| 1941 | //  The shift table determines how far to shift before trying to match | 
|---|
| 1942 | //  again, if a match at this point fails.  If the byte after where the | 
|---|
| 1943 | //  end of our pattern falls is not in our pattern, then we start to | 
|---|
| 1944 | //  match again after that byte; otherwise we line up the last occurence | 
|---|
| 1945 | //  of that byte in our pattern under that byte, and try match again. | 
|---|
| 1946 |  | 
|---|
| 1947 | for (byte_nbr = 0; byte_nbr < 256; byte_nbr++) | 
|---|
| 1948 | shift [byte_nbr] = pattern_size + 1; | 
|---|
| 1949 |  | 
|---|
| 1950 | for (byte_nbr = 0; byte_nbr < pattern_size; byte_nbr++) | 
|---|
| 1951 | shift [(unsigned char) tolower (pattern [byte_nbr])] = pattern_size - byte_nbr; | 
|---|
| 1952 |  | 
|---|
| 1953 | //  Search for the string.  If we don't find a match, move up by the | 
|---|
| 1954 | //  amount we computed in the shift table above, to find location of | 
|---|
| 1955 | //  the next potiental match. | 
|---|
| 1956 |  | 
|---|
| 1957 | limit = string + (string_size - pattern_size + 1); | 
|---|
| 1958 | ASSERT (limit > string); | 
|---|
| 1959 |  | 
|---|
| 1960 | for (match_base = string; | 
|---|
| 1961 | match_base < limit; | 
|---|
| 1962 | match_base += shift [(unsigned char) tolower (*(match_base + pattern_size))]) | 
|---|
| 1963 | { | 
|---|
| 1964 | match_ptr  = match_base; | 
|---|
| 1965 | match_size = 0; | 
|---|
| 1966 |  | 
|---|
| 1967 | //  Compare pattern until it all matches, or we find a difference | 
|---|
| 1968 | while (tolower (*match_ptr++) == tolower (pattern [match_size++])) | 
|---|
| 1969 | { | 
|---|
| 1970 | ASSERT (match_size <= pattern_size && | 
|---|
| 1971 | match_ptr == (match_base + match_size)); | 
|---|
| 1972 |  | 
|---|
| 1973 | //  If we found a match, return the start address | 
|---|
| 1974 | if (match_size >= pattern_size) | 
|---|
| 1975 | return ((char *)(match_base)); | 
|---|
| 1976 | } | 
|---|
| 1977 | } | 
|---|
| 1978 | return NULL;                      //  Found nothing | 
|---|
| 1979 | } | 
|---|
| 1980 |  | 
|---|