Ignore:
Timestamp:
Oct 21, 2008, 11:35:29 PM (17 years ago)
Author:
bird
Message:

strchache2: hash hacking (no results or changes yet).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/kmk/strcache2.c

    r1906 r1908  
    5656*******************************************************************************/
    5757/* The default size of a memory segment (1MB). */
    58 #define STRCACHE2_SEG_SIZE          (1024U*1024U)
     58#define STRCACHE2_SEG_SIZE              (1024U*1024U)
    5959/* The default hash table shift (hash size give as a power of two). */
    60 #define STRCACHE2_HASH_SHIFT        16
     60#define STRCACHE2_HASH_SHIFT            16
     61/** Does the modding / masking of a hash number into an index. */
     62#ifdef STRCACHE2_USE_MASK
     63# define STRCACHE2_MOD_IT(cache, hash)  ((hash) & (cache)->hash_mask)
     64#else
     65# define STRCACHE2_MOD_IT(cache, hash)  ((hash) % (cache)->hash_div)
     66#endif
    6167
    6268
     
    6975
    7076
     77/** Finds the closest primary number for power of two value (or something else
     78 *  useful if not support).   */
     79MY_INLINE strcache2_find_prime(unsigned int shift)
     80{
     81  switch (shift)
     82    {
     83      case  5:  return 31;
     84      case  6:  return 61;
     85      case  7:  return 127;
     86      case  8:  return 251;
     87      case  9:  return 509;
     88      case 10:  return 1021;
     89      case 11:  return 2039;
     90      case 12:  return 4093;
     91      case 13:  return 8191;
     92      case 14:  return 16381;
     93      case 15:  return 32749;
     94      case 16:  return 65521;
     95      case 17:  return 131063;
     96      case 18:  return 262139;
     97      case 19:  return 524269;
     98      case 20:  return 1048573;
     99      case 21:  return 2097143;
     100      case 22:  return 4194301;
     101      case 23:  return 8388593;
     102
     103      default:
     104          assert (0);
     105          return (1 << shift) - 1;
     106    }
     107}
     108
    71109static struct strcache2_seg *
    72110strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
     
    105143strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
    106144{
     145#if 1
    107146  /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
    108147           when running the two 100 times over typical kBuild strings that
     
    144183    hash = 0;
    145184  return hash;
     185#else
     186# if 1
     187  /* This is SDBM.  This specific form and loop unroll was benchmarked to
     188     be 28% faster than return_STRING_N_HASH_1.  (Now the weird thing is
     189     that putting the (ch) first in the assignment made it noticably slower.)
     190
     191     However, it is noticably slower in practice, most likely because of more
     192     collisions. Hrmpf.
     193     */
     194#  define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
     195  unsigned int hash = 0;
     196# else
     197 /* This is DJB2.  This specific form/unroll was benchmarked to be 27%
     198    fast than return_STRING_N_HASH_1.
     199
     200    Ditto. 2x Hrmpf */
     201#  define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
     202  unsigned int hash = 5381;
     203# endif
     204  while (len >= 4)
     205    {
     206      UPDATE_HASH (str[0]);
     207      UPDATE_HASH (str[1]);
     208      UPDATE_HASH (str[2]);
     209      UPDATE_HASH (str[3]);
     210      str += 4;
     211      len -= 4;
     212    }
     213  switch (len)
     214    {
     215      default:
     216      case 0:
     217        return hash;
     218      case 1:
     219        UPDATE_HASH (str[0]);
     220        return hash;
     221      case 2:
     222        UPDATE_HASH (str[0]);
     223        UPDATE_HASH (str[1]);
     224        return hash;
     225      case 3:
     226        UPDATE_HASH (str[0]);
     227        UPDATE_HASH (str[1]);
     228        UPDATE_HASH (str[2]);
     229        return hash;
     230    }
     231#endif
    146232}
    147233
     
    506592  struct strcache2_entry **src_tab = cache->hash_tab;
    507593  struct strcache2_entry **dst_tab;
     594#ifdef STRCACHE2_USE_MASK
    508595  unsigned int hash_mask;
     596#else
     597  unsigned int hash_div;
     598#endif
    509599
    510600  /* Allocate a new hash table twice the size of the current. */
    511601  cache->hash_size <<= 1;
     602#ifdef STRCACHE2_USE_MASK
    512603  cache->hash_mask <<= 1;
    513604  cache->hash_mask |= 1;
     605#else
     606  for (hash_div = 1; (1U << hash_div) < cache->hash_size; hash_div++)
     607    /* nothing */;
     608  cache->hash_div = strcache2_find_prime(hash_div);
     609#endif
    514610  cache->rehash_count <<= 1;
    515611  cache->hash_tab = dst_tab = (struct strcache2_entry **)
     
    518614
    519615  /* Copy the entries from the old to the new hash table. */
     616#ifdef STRCACHE2_USE_MASK
    520617  hash_mask = cache->hash_mask;
     618#else
     619  hash_div = cache->hash_div;
     620#endif
    521621  while (src-- > 0)
    522622    {
     
    524624      if (entry)
    525625        {
     626#ifdef STRCACHE2_USE_MASK
    526627          unsigned int dst = entry->hash1 & hash_mask;
     628#else
     629          unsigned int dst = entry->hash1 % hash_div;
     630#endif
    527631          if (dst_tab[dst])
    528632            {
     
    533637                  : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
    534638              dst += hash2;
     639#ifdef STRCACHE2_USE_MASK
    535640              dst &= hash_mask;
     641#else
     642              dst %= hash_div;
     643#endif
    536644              while (dst_tab[dst])
    537645                {
    538646                  dst += hash2;
     647#ifdef STRCACHE2_USE_MASK
    539648                  dst &= hash_mask;
     649#else
     650                  dst %= hash_div;
     651#endif
    540652                }
    541653            }
     
    612724  /* Lookup the entry in the hash table, hoping for an
    613725     early match. */
    614   idx = hash1 & cache->hash_mask;
     726  idx = STRCACHE2_MOD_IT(cache, hash1);
    615727  entry = cache->hash_tab[idx];
    616728  if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    624736      hash2 = strcache2_case_sensitive_hash_2 (str, length);
    625737      idx += hash2;
    626       idx &= cache->hash_mask;
     738      idx = STRCACHE2_MOD_IT(cache, idx);
    627739      entry = cache->hash_tab[idx];
    628740      if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    635747            {
    636748              idx += hash2;
    637               idx &= cache->hash_mask;
     749              idx = STRCACHE2_MOD_IT(cache, idx);
    638750              entry = cache->hash_tab[idx];
    639751              cache->collision_3rd_count++;
     
    674786  /* Lookup the entry in the hash table, hoping for an
    675787     early match. */
    676   idx = hash1 & cache->hash_mask;
     788  idx = STRCACHE2_MOD_IT(cache, hash1);
    677789  entry = cache->hash_tab[idx];
    678790  if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    687799          : strcache2_case_sensitive_hash_2 (str, length);
    688800      idx += hash2;
    689       idx &= cache->hash_mask;
     801      idx = STRCACHE2_MOD_IT(cache, idx);
    690802      entry = cache->hash_tab[idx];
    691803      if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    698810            {
    699811              idx += hash2;
    700               idx &= cache->hash_mask;
     812              idx = STRCACHE2_MOD_IT(cache, idx);
    701813              entry = cache->hash_tab[idx];
    702814              cache->collision_3rd_count++;
     
    727839  /* Lookup the entry in the hash table, hoping for an
    728840     early match. */
    729   idx = hash1 & cache->hash_mask;
     841  idx = STRCACHE2_MOD_IT(cache, hash1);
    730842  entry = cache->hash_tab[idx];
    731843  if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    739851      hash2 = strcache2_case_sensitive_hash_2 (str, length);
    740852      idx += hash2;
    741       idx &= cache->hash_mask;
     853      idx = STRCACHE2_MOD_IT(cache, idx);
    742854      entry = cache->hash_tab[idx];
    743855      if (strcache2_is_equal (cache, entry, str, length, hash1))
     
    750862            {
    751863              idx += hash2;
    752               idx &= cache->hash_mask;
     864              idx = STRCACHE2_MOD_IT(cache, idx);
    753865              entry = cache->hash_tab[idx];
    754866              cache->collision_3rd_count++;
     
    780892  /* Lookup the entry in the hash table, hoping for an
    781893     early match. */
    782   idx = hash1 & cache->hash_mask;
     894  idx = STRCACHE2_MOD_IT(cache, hash1);
    783895  entry = cache->hash_tab[idx];
    784896  if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    792904      hash2 = strcache2_case_insensitive_hash_2 (str, length);
    793905      idx += hash2;
    794       idx &= cache->hash_mask;
     906      idx = STRCACHE2_MOD_IT(cache, idx);
    795907      entry = cache->hash_tab[idx];
    796908      if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    803915            {
    804916              idx += hash2;
    805               idx &= cache->hash_mask;
     917              idx = STRCACHE2_MOD_IT(cache, idx);
    806918              entry = cache->hash_tab[idx];
    807919              cache->collision_3rd_count++;
     
    842954  /* Lookup the entry in the hash table, hoping for an
    843955     early match. */
    844   idx = hash1 & cache->hash_mask;
     956  idx = STRCACHE2_MOD_IT(cache, hash1);
    845957  entry = cache->hash_tab[idx];
    846958  if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    853965        hash2 = strcache2_case_insensitive_hash_2 (str, length);
    854966      idx += hash2;
    855       idx &= cache->hash_mask;
     967      idx = STRCACHE2_MOD_IT(cache, idx);
    856968      entry = cache->hash_tab[idx];
    857969      if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    864976            {
    865977              idx += hash2;
    866               idx &= cache->hash_mask;
     978              idx = STRCACHE2_MOD_IT(cache, idx);
    867979              entry = cache->hash_tab[idx];
    868980              cache->collision_3rd_count++;
     
    8931005  /* Lookup the entry in the hash table, hoping for an
    8941006     early match. */
    895   idx = hash1 & cache->hash_mask;
     1007  idx = STRCACHE2_MOD_IT(cache, hash1);
    8961008  entry = cache->hash_tab[idx];
    8971009  if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    9051017      hash2 = strcache2_case_insensitive_hash_2 (str, length);
    9061018      idx += hash2;
    907       idx &= cache->hash_mask;
     1019      idx = STRCACHE2_MOD_IT(cache, idx);
    9081020      entry = cache->hash_tab[idx];
    9091021      if (strcache2_is_iequal (cache, entry, str, length, hash1))
     
    9161028            {
    9171029              idx += hash2;
    918               idx &= cache->hash_mask;
     1030              idx = STRCACHE2_MOD_IT(cache, idx);
    9191031              entry = cache->hash_tab[idx];
    9201032              cache->collision_3rd_count++;
     
    10641176  /* init the structure. */
    10651177  cache->case_insensitive = case_insensitive;
     1178#ifdef STRCACHE2_USE_MASK
    10661179  cache->hash_mask = (1U << hash_shift) - 1U;
     1180#else
     1181  cache->hash_div = strcache2_find_prime(hash_shift);
     1182#endif
    10671183  cache->count = 0;
    10681184  cache->lookup_count = 0;
     
    11811297    }
    11821298
     1299#ifdef STRCACHE2_USE_MASK
    11831300  printf (_("%s  hash size = %u  mask = %#x  rehashed %u times  lookups = %lu\n"),
    11841301          prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
     1302#else
     1303  printf (_("%s  hash size = %u  div = %#x  rehashed %u times  lookups = %lu\n"),
     1304          prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
     1305#endif
    11851306  printf (_("%s  hash collisions 1st = %lu (%u%%)  2nd = %lu (%u%%)  3rd = %lu (%u%%)\n"),
    11861307          prefix,
Note: See TracChangeset for help on using the changeset viewer.