Ignore:
Timestamp:
Oct 17, 2008, 1:15:30 AM (17 years ago)
Author:
bird
Message:

kmk: replaced strcache with strcacahe2.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/kmk/strcache2.c

    r1869 r1870  
    5656*******************************************************************************/
    5757/* The default size of a memory segment (1MB). */
    58 #define STRCACHE2_SEG_SIZE            (1024U*1024U)
    59 /* The initial hash table size (number of entries). Power of two. */
    60 #define STRCACHE2_INITIAL_SIZE        0x10000U
    61 
    62 
    63 /*******************************************************************************
    64 *   Global Variables                                                           *
    65 *******************************************************************************/
    66 /* the deleted filler hash table entry. */
    67 static struct strcache2_entry deleted_entry;
     58#define STRCACHE2_SEG_SIZE          (1024U*1024U)
     59/* The default hash table shift (hash size give as a power of two). */
     60#define STRCACHE2_HASH_SHIFT        16
     61
     62
    6863
    6964
     
    7469  size_t size;
    7570
    76   size = STRCACHE2_SEG_SIZE;
     71  size = cache->def_seg_size;
    7772  if (size < (size_t)minlen + sizeof (struct strcache2_seg))
    7873    {
     
    9994    {
    10095      unsigned int ch0 = *str++;
    101       hash = len--;
     96      hash = 0;
     97      len--;
    10298      while (len >= 2)
    10399        {
     
    137133    {
    138134      unsigned int ch0 = *str++;
    139       hash = len--;
     135      hash = 0;
     136      len--;
    140137      while (len >= 2)
    141138        {
     
    164161    }
    165162  else
    166     hash = 0;
    167   return hash | 1;
     163    hash = 1;
     164  hash |= 1;
     165  return hash;
    168166}
    169167
     
    176174      unsigned int ch0 = *str++;
    177175      ch0 = tolower (ch0);
    178       hash = len--;
     176      hash = 0;
     177      len--;
    179178      while (len >= 2)
    180179        {
     
    218217      unsigned int ch0 = *str++;
    219218      ch0 = tolower (ch0);
    220       hash = len--;
     219      hash = 0;
     220      len--;
    221221      while (len >= 2)
    222222        {
     
    248248    }
    249249  else
    250     hash = 0;
     250    hash = 1;
     251  hash |= 1;
    251252  return hash;
    252253}
     
    258259  /* the simple stuff first. */
    259260  if (   entry == NULL
    260       || entry == &deleted_entry
    261261      || entry->hash1 != hash1
    262262      || entry->length != length)
     
    264264
    265265  return !cache->case_insensitive
    266     ? memcmp (cache + 1, str, length) == 0
     266    ? memcmp (entry + 1, str, length) == 0
    267267#if defined(_MSC_VER) || defined(__OS2__)
    268     : _memicmp (cache + 1, str, length) == 0
     268    : _memicmp (entry + 1, str, length) == 0
    269269#else
    270     : strncasecmp ((const char *)(cache + 1), str, length) == 0
     270    : strncasecmp ((const char *)(entry + 1), str, length) == 0
    271271#endif
    272272    ;
     
    276276strcache2_rehash (struct strcache2 *cache)
    277277{
    278   /* TODO */
    279   struct strcache2 **ptr = (struct strcache2 **)1;
    280   assert(0);
    281   *ptr = cache;
     278  unsigned int src = cache->hash_size;
     279  struct strcache2_entry **src_tab = cache->hash_tab;
     280  struct strcache2_entry **dst_tab;
     281  unsigned int hash_mask;
     282
     283  /* Allocate a new hash table twice the size of the current. */
     284  cache->hash_size <<= 1;
     285  cache->hash_mask <<= 1;
     286  cache->hash_mask |= 1;
     287  cache->rehash_count <<= 1;
     288  cache->hash_tab = dst_tab = (struct strcache2_entry **)
     289    xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
     290  memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
     291
     292  /* Copy the entries from the old to the new hash table. */
     293  hash_mask = cache->hash_mask;
     294  while (src-- > 0)
     295    {
     296      struct strcache2_entry *entry = src_tab[src];
     297      if (entry)
     298        {
     299          unsigned int dst = entry->hash1 & hash_mask;
     300          if (dst_tab[dst])
     301            {
     302              unsigned int hash2 = entry->hash2;
     303              if (!hash2)
     304                entry->hash2 = hash2 = cache->case_insensitive
     305                  ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
     306                  : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
     307              dst += hash2;
     308              dst &= hash_mask;
     309              while (dst_tab[dst])
     310                {
     311                  dst += hash2;
     312                  dst &= hash_mask;
     313                }
     314            }
     315
     316          dst_tab[dst] = entry;
     317        }
     318    }
     319
     320  /* That's it, just free the old table and we're done. */
     321  free (src_tab);
    282322}
    283323
     
    344384  /* Lookup the entry in the hash table, hoping for an
    345385     early match. */
    346 
    347   idx = hash1 / cache->hash_size;
     386  idx = hash1 & cache->hash_mask;
    348387  entry = cache->hash_tab[idx];
    349388  if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    353392  else
    354393    {
    355       unsigned int deleted_idx = entry == &deleted_entry ? idx : ~0U;
    356       cache->collision_count++;
     394      cache->collision_1st_count++;
    357395
    358396      hash2 = cache->case_insensitive
     
    360398        : strcache2_case_sensitive_hash_2 (str, length);
    361399      idx += hash2;
    362       idx /= cache->hash_size;
     400      idx &= cache->hash_mask;
    363401      entry = cache->hash_tab[idx];
    364402      if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    367405      if (entry)
    368406        {
     407          cache->collision_2nd_count++;
    369408          do
    370409            {
    371               if (deleted_idx == ~0U && entry == &deleted_entry)
    372                 deleted_idx = idx;
    373 
    374410              idx += hash2;
    375               idx /= cache->hash_size;
     411              idx &= cache->hash_mask;
    376412              entry = cache->hash_tab[idx];
     413              cache->collision_3rd_count++;
    377414              if (strcache2_is_equal (cache, entry, str, length, hash1))
    378415                return (const char *)(entry + 1);
     
    380417          while (entry);
    381418        }
    382       if (deleted_idx != ~0U)
    383         idx = deleted_idx;
     419    }
     420
     421  /* Not found, add it at IDX. */
     422  return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
     423}
     424
     425/* The public add/lookup string interface for prehashed strings.
     426   Use strcache2_hash_str to calculate the hash of a string. */
     427const char *
     428strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
     429                      unsigned int hash1, unsigned int hash2)
     430{
     431  struct strcache2_entry const *entry;
     432  unsigned int idx;
     433#ifndef NDEBUG
     434  unsigned correct_hash;
     435
     436  correct_hash = cache->case_insensitive
     437               ? strcache2_case_insensitive_hash_1 (str, length)
     438               : strcache2_case_sensitive_hash_1 (str, length);
     439  MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
     440  if (hash2)
     441    {
     442      correct_hash = cache->case_insensitive
     443                   ? strcache2_case_insensitive_hash_2 (str, length)
     444                   : strcache2_case_sensitive_hash_2 (str, length);
     445      MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
     446    }
     447#endif /* NDEBUG */
     448
     449  cache->lookup_count++;
     450
     451  /* Lookup the entry in the hash table, hoping for an
     452     early match. */
     453  idx = hash1 & cache->hash_mask;
     454  entry = cache->hash_tab[idx];
     455  if (strcache2_is_equal (cache, entry, str, length, hash1))
     456    return (const char *)(entry + 1);
     457  if (entry)
     458    {
     459      cache->collision_1st_count++;
     460
     461      if (!hash2)
     462        hash2 = cache->case_insensitive
     463          ? strcache2_case_insensitive_hash_2 (str, length)
     464          : strcache2_case_sensitive_hash_2 (str, length);
     465      idx += hash2;
     466      idx &= cache->hash_mask;
     467      entry = cache->hash_tab[idx];
     468      if (strcache2_is_equal (cache, entry, str, length, hash1))
     469        return (const char *)(entry + 1);
     470
     471      if (entry)
     472        {
     473          cache->collision_2nd_count++;
     474          do
     475            {
     476              idx += hash2;
     477              idx &= cache->hash_mask;
     478              entry = cache->hash_tab[idx];
     479              cache->collision_3rd_count++;
     480              if (strcache2_is_equal (cache, entry, str, length, hash1))
     481                return (const char *)(entry + 1);
     482            }
     483          while (entry);
     484        }
    384485    }
    385486
     
    393494{
    394495  /* Check mandatory alignment first. */
    395 
    396496  if (!((size_t)str & (sizeof (void *) - 1)))
    397497    {
    398498      /* Check the segment list and consider the question answered if the
    399499         string is within one of them. (Could check it more thoroughly...) */
    400 
    401500      struct strcache2_seg const *seg;
    402       for (seg = cache->seg_head; seg; seg++)
     501      for (seg = cache->seg_head; seg; seg = seg->next)
    403502        if ((size_t)(str - seg->start) < seg->size)
    404503            return 1;
     
    451550        ? strcache2_case_insensitive_hash_2 (str, entry->length)
    452551        : strcache2_case_sensitive_hash_2 (str, entry->length);
    453       if (hash != entry->hash1)
     552      if (hash != entry->hash2)
    454553        {
    455554          fprintf (stderr,
     
    475574}
    476575
     576/* Calculates the case sensitive hash values of the string.
     577   The first hash is returned, the other is put at HASH2P. */
     578unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
     579{
     580  *hash2p = strcache2_case_sensitive_hash_2 (str, length);
     581  return    strcache2_case_sensitive_hash_1 (str, length);
     582}
     583
     584/* Calculates the case insensitive hash values of the string.
     585   The first hash is returned, the other is put at HASH2P. */
     586unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
     587{
     588  *hash2p = strcache2_case_insensitive_hash_2 (str, length);
     589  return    strcache2_case_insensitive_hash_1 (str, length);
     590}
     591
     592
    477593/* List of initialized string caches. */
    478594static struct strcache2 *strcache_head;
     
    480596/* Initalizes a new cache. */
    481597void
    482 strcache2_init (struct strcache2 *cache, const char *name,
    483                 int case_insensitive, int thread_safe)
    484 {
     598strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
     599                unsigned int def_seg_size, int case_insensitive, int thread_safe)
     600{
     601  unsigned hash_shift;
    485602  assert (!thread_safe);
    486603
     604  /* calc the size as a power of two */
     605  if (!size)
     606    hash_shift = STRCACHE2_HASH_SHIFT;
     607  else
     608    {
     609      assert (size <= (~0U / 2 + 1));
     610      for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
     611        /* nothing */;
     612    }
     613
     614  /* adjust the default segment size */
     615  if (!def_seg_size)
     616    def_seg_size = STRCACHE2_SEG_SIZE;
     617  else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
     618    def_seg_size = sizeof (struct strcache2_seg) * 10;
     619  else if ((def_seg_size & 0xfff) < 0xf00)
     620    def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
     621
     622
     623  /* init the structure. */
    487624  cache->case_insensitive = case_insensitive;
    488   cache->hash_size = STRCACHE2_INITIAL_SIZE;
     625  cache->hash_mask = (1U << hash_shift) - 1U;
    489626  cache->count = 0;
    490   cache->rehash_count = STRCACHE2_INITIAL_SIZE / 4 * 3;
    491   cache->collision_count = 0;
    492627  cache->lookup_count = 0;
     628  cache->collision_1st_count = 0;
     629  cache->collision_2nd_count = 0;
     630  cache->collision_3rd_count = 0;
     631  cache->rehash_count = (1U << hash_shift) / 4 * 3;   /* rehash at 75% */
     632  cache->init_size = 1U << hash_shift;
     633  cache->hash_size = 1U << hash_shift;
     634  cache->def_seg_size = def_seg_size;
    493635  cache->lock = NULL;
    494   cache->seg_head = NULL;
    495636  cache->name = name;
    496637
     638  /* allocate the hash table and first segment. */
    497639  cache->hash_tab = (struct strcache2_entry **)
    498     xmalloc (STRCACHE2_INITIAL_SIZE * sizeof (struct strcache2_entry *));
    499   memset (cache->hash_tab, '\0', STRCACHE2_INITIAL_SIZE * sizeof (struct strcache2_entry *));
     640    xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
     641  memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
    500642  strcache2_new_seg (cache, 0);
    501643
     
    512654{
    513655  /* unlink it */
    514 
    515656  if (strcache_head == cache)
    516657    strcache_head = cache->next;
     
    525666
    526667  /* free the memory segments */
    527 
    528668  do
    529669    {
     
    535675
    536676  /* free the hash and clear the structure. */
    537 
    538677  free (cache->hash_tab);
    539678  memset (cache, '\0', sizeof (struct strcache2));
    540679}
    541680
    542 
     681/* Print statistics a string cache. */
    543682void
    544683strcache2_print_stats (struct strcache2 *cache, const char *prefix)
     
    560699
    561700  /* Segment statistics. */
    562 
    563701  for (seg = cache->seg_head; seg; seg = seg->next)
    564702    {
     
    570708    }
    571709  seg_avg_bytes = seg_total_bytes / seg_count;
    572   printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u  avail = %lu\n"),
     710  printf (_("%s  %u segments: total = %lu / max = %lu / avg = %lu / def = %u  avail = %lu\n"),
    573711          prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
    574           STRCACHE2_SEG_SIZE, seg_avail_bytes);
     712          cache->def_seg_size, seg_avail_bytes);
    575713
    576714  /* String statistics. */
    577 
    578715  idx = cache->hash_size;
    579716  while (idx-- > 0)
    580717    {
    581718      struct strcache2_entry const *entry = cache->hash_tab[idx];
    582       if (entry && entry != &deleted_entry)
     719      if (entry)
    583720        {
    584721          unsigned int length = entry->length;
     
    591728    }
    592729  str_avg_len = cache->count ? str_total_len / cache->count : 0;
    593   printf (_("%s %u strings: total len = %lu / max = %ul / avg = %ul / min = %ul\n"),
     730  printf (_("%s  %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
    594731          prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
    595732
    596733  /* Hash statistics. */
    597 
    598   idx = STRCACHE2_INITIAL_SIZE;
     734  idx = cache->init_size;
    599735  rehashes = 0;
    600736  while (idx < cache->hash_size)
     
    604740    }
    605741
    606   printf (_("%s hash size = %u  lookups / collisions = %lu / %lu (%u%%)  %u rehashes\n"),
    607           prefix, cache->hash_size, cache->lookup_count, cache->collision_count,
    608           (unsigned int)((float)cache->collision_count / cache->lookup_count),
    609           rehashes);
    610 
     742  printf (_("%s  hash size = %u  mask = %#x  rehashed %u times  lookups = %lu\n"),
     743          prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
     744  printf (_("%s  hash collisions 1st = %lu (%u%%)  2nd = %lu (%u%%)  3rd = %lu (%u%%)\n"),
     745          prefix,
     746          cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count),
     747          cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count),
     748          cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count));
    611749}
    612750
Note: See TracChangeset for help on using the changeset viewer.