Changeset 1870 for trunk/src/kmk/strcache2.c
- Timestamp:
- Oct 17, 2008, 1:15:30 AM (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/kmk/strcache2.c
r1869 r1870 56 56 *******************************************************************************/ 57 57 /* The default size of a memory segment (1MB). */ 58 #define STRCACHE2_SEG_SIZE (1024U*1024U) 59 /* The initial hash table size (number of entries). Power of two. */ 60 #define STRCACHE2_INITIAL_SIZE 0x10000U 61 62 63 /******************************************************************************* 64 * Global Variables * 65 *******************************************************************************/ 66 /* the deleted filler hash table entry. */ 67 static struct strcache2_entry deleted_entry; 58 #define STRCACHE2_SEG_SIZE (1024U*1024U) 59 /* The default hash table shift (hash size give as a power of two). */ 60 #define STRCACHE2_HASH_SHIFT 16 61 62 68 63 69 64 … … 74 69 size_t size; 75 70 76 size = STRCACHE2_SEG_SIZE;71 size = cache->def_seg_size; 77 72 if (size < (size_t)minlen + sizeof (struct strcache2_seg)) 78 73 { … … 99 94 { 100 95 unsigned int ch0 = *str++; 101 hash = len--; 96 hash = 0; 97 len--; 102 98 while (len >= 2) 103 99 { … … 137 133 { 138 134 unsigned int ch0 = *str++; 139 hash = len--; 135 hash = 0; 136 len--; 140 137 while (len >= 2) 141 138 { … … 164 161 } 165 162 else 166 hash = 0; 167 return hash | 1; 163 hash = 1; 164 hash |= 1; 165 return hash; 168 166 } 169 167 … … 176 174 unsigned int ch0 = *str++; 177 175 ch0 = tolower (ch0); 178 hash = len--; 176 hash = 0; 177 len--; 179 178 while (len >= 2) 180 179 { … … 218 217 unsigned int ch0 = *str++; 219 218 ch0 = tolower (ch0); 220 hash = len--; 219 hash = 0; 220 len--; 221 221 while (len >= 2) 222 222 { … … 248 248 } 249 249 else 250 hash = 0; 250 hash = 1; 251 hash |= 1; 251 252 return hash; 252 253 } … … 258 259 /* the simple stuff first. */ 259 260 if ( entry == NULL 260 || entry == &deleted_entry261 261 || entry->hash1 != hash1 262 262 || entry->length != length) … … 264 264 265 265 return !cache->case_insensitive 266 ? memcmp ( cache+ 1, str, length) == 0266 ? memcmp (entry + 1, str, length) == 0 267 267 #if defined(_MSC_VER) || defined(__OS2__) 268 : _memicmp ( cache+ 1, str, length) == 0268 : _memicmp (entry + 1, str, length) == 0 269 269 #else 270 : strncasecmp ((const char *)( cache+ 1), str, length) == 0270 : strncasecmp ((const char *)(entry + 1), str, length) == 0 271 271 #endif 272 272 ; … … 276 276 strcache2_rehash (struct strcache2 *cache) 277 277 { 278 /* TODO */ 279 struct strcache2 **ptr = (struct strcache2 **)1; 280 assert(0); 281 *ptr = cache; 278 unsigned int src = cache->hash_size; 279 struct strcache2_entry **src_tab = cache->hash_tab; 280 struct strcache2_entry **dst_tab; 281 unsigned int hash_mask; 282 283 /* Allocate a new hash table twice the size of the current. */ 284 cache->hash_size <<= 1; 285 cache->hash_mask <<= 1; 286 cache->hash_mask |= 1; 287 cache->rehash_count <<= 1; 288 cache->hash_tab = dst_tab = (struct strcache2_entry **) 289 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *)); 290 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *)); 291 292 /* Copy the entries from the old to the new hash table. */ 293 hash_mask = cache->hash_mask; 294 while (src-- > 0) 295 { 296 struct strcache2_entry *entry = src_tab[src]; 297 if (entry) 298 { 299 unsigned int dst = entry->hash1 & hash_mask; 300 if (dst_tab[dst]) 301 { 302 unsigned int hash2 = entry->hash2; 303 if (!hash2) 304 entry->hash2 = hash2 = cache->case_insensitive 305 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length) 306 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length); 307 dst += hash2; 308 dst &= hash_mask; 309 while (dst_tab[dst]) 310 { 311 dst += hash2; 312 dst &= hash_mask; 313 } 314 } 315 316 dst_tab[dst] = entry; 317 } 318 } 319 320 /* That's it, just free the old table and we're done. */ 321 free (src_tab); 282 322 } 283 323 … … 344 384 /* Lookup the entry in the hash table, hoping for an 345 385 early match. */ 346 347 idx = hash1 / cache->hash_size; 386 idx = hash1 & cache->hash_mask; 348 387 entry = cache->hash_tab[idx]; 349 388 if (strcache2_is_equal (cache, entry, str, length, hash1)) … … 353 392 else 354 393 { 355 unsigned int deleted_idx = entry == &deleted_entry ? idx : ~0U; 356 cache->collision_count++; 394 cache->collision_1st_count++; 357 395 358 396 hash2 = cache->case_insensitive … … 360 398 : strcache2_case_sensitive_hash_2 (str, length); 361 399 idx += hash2; 362 idx /= cache->hash_size;400 idx &= cache->hash_mask; 363 401 entry = cache->hash_tab[idx]; 364 402 if (strcache2_is_equal (cache, entry, str, length, hash1)) … … 367 405 if (entry) 368 406 { 407 cache->collision_2nd_count++; 369 408 do 370 409 { 371 if (deleted_idx == ~0U && entry == &deleted_entry)372 deleted_idx = idx;373 374 410 idx += hash2; 375 idx /= cache->hash_size;411 idx &= cache->hash_mask; 376 412 entry = cache->hash_tab[idx]; 413 cache->collision_3rd_count++; 377 414 if (strcache2_is_equal (cache, entry, str, length, hash1)) 378 415 return (const char *)(entry + 1); … … 380 417 while (entry); 381 418 } 382 if (deleted_idx != ~0U) 383 idx = deleted_idx; 419 } 420 421 /* Not found, add it at IDX. */ 422 return strcache2_enter_string (cache, idx, str, length, hash1, hash2); 423 } 424 425 /* The public add/lookup string interface for prehashed strings. 426 Use strcache2_hash_str to calculate the hash of a string. */ 427 const char * 428 strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length, 429 unsigned int hash1, unsigned int hash2) 430 { 431 struct strcache2_entry const *entry; 432 unsigned int idx; 433 #ifndef NDEBUG 434 unsigned correct_hash; 435 436 correct_hash = cache->case_insensitive 437 ? strcache2_case_insensitive_hash_1 (str, length) 438 : strcache2_case_sensitive_hash_1 (str, length); 439 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash)); 440 if (hash2) 441 { 442 correct_hash = cache->case_insensitive 443 ? strcache2_case_insensitive_hash_2 (str, length) 444 : strcache2_case_sensitive_hash_2 (str, length); 445 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash)); 446 } 447 #endif /* NDEBUG */ 448 449 cache->lookup_count++; 450 451 /* Lookup the entry in the hash table, hoping for an 452 early match. */ 453 idx = hash1 & cache->hash_mask; 454 entry = cache->hash_tab[idx]; 455 if (strcache2_is_equal (cache, entry, str, length, hash1)) 456 return (const char *)(entry + 1); 457 if (entry) 458 { 459 cache->collision_1st_count++; 460 461 if (!hash2) 462 hash2 = cache->case_insensitive 463 ? strcache2_case_insensitive_hash_2 (str, length) 464 : strcache2_case_sensitive_hash_2 (str, length); 465 idx += hash2; 466 idx &= cache->hash_mask; 467 entry = cache->hash_tab[idx]; 468 if (strcache2_is_equal (cache, entry, str, length, hash1)) 469 return (const char *)(entry + 1); 470 471 if (entry) 472 { 473 cache->collision_2nd_count++; 474 do 475 { 476 idx += hash2; 477 idx &= cache->hash_mask; 478 entry = cache->hash_tab[idx]; 479 cache->collision_3rd_count++; 480 if (strcache2_is_equal (cache, entry, str, length, hash1)) 481 return (const char *)(entry + 1); 482 } 483 while (entry); 484 } 384 485 } 385 486 … … 393 494 { 394 495 /* Check mandatory alignment first. */ 395 396 496 if (!((size_t)str & (sizeof (void *) - 1))) 397 497 { 398 498 /* Check the segment list and consider the question answered if the 399 499 string is within one of them. (Could check it more thoroughly...) */ 400 401 500 struct strcache2_seg const *seg; 402 for (seg = cache->seg_head; seg; seg ++)501 for (seg = cache->seg_head; seg; seg = seg->next) 403 502 if ((size_t)(str - seg->start) < seg->size) 404 503 return 1; … … 451 550 ? strcache2_case_insensitive_hash_2 (str, entry->length) 452 551 : strcache2_case_sensitive_hash_2 (str, entry->length); 453 if (hash != entry->hash 1)552 if (hash != entry->hash2) 454 553 { 455 554 fprintf (stderr, … … 475 574 } 476 575 576 /* Calculates the case sensitive hash values of the string. 577 The first hash is returned, the other is put at HASH2P. */ 578 unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p) 579 { 580 *hash2p = strcache2_case_sensitive_hash_2 (str, length); 581 return strcache2_case_sensitive_hash_1 (str, length); 582 } 583 584 /* Calculates the case insensitive hash values of the string. 585 The first hash is returned, the other is put at HASH2P. */ 586 unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p) 587 { 588 *hash2p = strcache2_case_insensitive_hash_2 (str, length); 589 return strcache2_case_insensitive_hash_1 (str, length); 590 } 591 592 477 593 /* List of initialized string caches. */ 478 594 static struct strcache2 *strcache_head; … … 480 596 /* Initalizes a new cache. */ 481 597 void 482 strcache2_init (struct strcache2 *cache, const char *name, 483 int case_insensitive, int thread_safe) 484 { 598 strcache2_init (struct strcache2 *cache, const char *name, unsigned int size, 599 unsigned int def_seg_size, int case_insensitive, int thread_safe) 600 { 601 unsigned hash_shift; 485 602 assert (!thread_safe); 486 603 604 /* calc the size as a power of two */ 605 if (!size) 606 hash_shift = STRCACHE2_HASH_SHIFT; 607 else 608 { 609 assert (size <= (~0U / 2 + 1)); 610 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++) 611 /* nothing */; 612 } 613 614 /* adjust the default segment size */ 615 if (!def_seg_size) 616 def_seg_size = STRCACHE2_SEG_SIZE; 617 else if (def_seg_size < sizeof (struct strcache2_seg) * 10) 618 def_seg_size = sizeof (struct strcache2_seg) * 10; 619 else if ((def_seg_size & 0xfff) < 0xf00) 620 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU); 621 622 623 /* init the structure. */ 487 624 cache->case_insensitive = case_insensitive; 488 cache->hash_ size = STRCACHE2_INITIAL_SIZE;625 cache->hash_mask = (1U << hash_shift) - 1U; 489 626 cache->count = 0; 490 cache->rehash_count = STRCACHE2_INITIAL_SIZE / 4 * 3;491 cache->collision_count = 0;492 627 cache->lookup_count = 0; 628 cache->collision_1st_count = 0; 629 cache->collision_2nd_count = 0; 630 cache->collision_3rd_count = 0; 631 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */ 632 cache->init_size = 1U << hash_shift; 633 cache->hash_size = 1U << hash_shift; 634 cache->def_seg_size = def_seg_size; 493 635 cache->lock = NULL; 494 cache->seg_head = NULL;495 636 cache->name = name; 496 637 638 /* allocate the hash table and first segment. */ 497 639 cache->hash_tab = (struct strcache2_entry **) 498 xmalloc ( STRCACHE2_INITIAL_SIZE* sizeof (struct strcache2_entry *));499 memset (cache->hash_tab, '\0', STRCACHE2_INITIAL_SIZE* sizeof (struct strcache2_entry *));640 xmalloc (cache->init_size * sizeof (struct strcache2_entry *)); 641 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *)); 500 642 strcache2_new_seg (cache, 0); 501 643 … … 512 654 { 513 655 /* unlink it */ 514 515 656 if (strcache_head == cache) 516 657 strcache_head = cache->next; … … 525 666 526 667 /* free the memory segments */ 527 528 668 do 529 669 { … … 535 675 536 676 /* free the hash and clear the structure. */ 537 538 677 free (cache->hash_tab); 539 678 memset (cache, '\0', sizeof (struct strcache2)); 540 679 } 541 680 542 681 /* Print statistics a string cache. */ 543 682 void 544 683 strcache2_print_stats (struct strcache2 *cache, const char *prefix) … … 560 699 561 700 /* Segment statistics. */ 562 563 701 for (seg = cache->seg_head; seg; seg = seg->next) 564 702 { … … 570 708 } 571 709 seg_avg_bytes = seg_total_bytes / seg_count; 572 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),710 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"), 573 711 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes, 574 STRCACHE2_SEG_SIZE, seg_avail_bytes);712 cache->def_seg_size, seg_avail_bytes); 575 713 576 714 /* String statistics. */ 577 578 715 idx = cache->hash_size; 579 716 while (idx-- > 0) 580 717 { 581 718 struct strcache2_entry const *entry = cache->hash_tab[idx]; 582 if (entry && entry != &deleted_entry)719 if (entry) 583 720 { 584 721 unsigned int length = entry->length; … … 591 728 } 592 729 str_avg_len = cache->count ? str_total_len / cache->count : 0; 593 printf (_("%s %u strings: total len = %lu / max = %ul / avg = %ul / min = %ul\n"),730 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"), 594 731 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len); 595 732 596 733 /* Hash statistics. */ 597 598 idx = STRCACHE2_INITIAL_SIZE; 734 idx = cache->init_size; 599 735 rehashes = 0; 600 736 while (idx < cache->hash_size) … … 604 740 } 605 741 606 printf (_("%s hash size = %u lookups / collisions = %lu / %lu (%u%%) %u rehashes\n"), 607 prefix, cache->hash_size, cache->lookup_count, cache->collision_count, 608 (unsigned int)((float)cache->collision_count / cache->lookup_count), 609 rehashes); 610 742 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"), 743 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count); 744 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"), 745 prefix, 746 cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count), 747 cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count), 748 cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count)); 611 749 } 612 750
Note:
See TracChangeset
for help on using the changeset viewer.