Changeset 391 for python/trunk/Objects/stringlib/fastsearch.h
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Objects/stringlib/fastsearch.h
r2 r391 6 6 /* fast search/count implementation, based on a mix between boyer- 7 7 moore and horspool, with a few more bells and whistles on the top. 8 for some more background, see: http://effbot.org/ stringlib*/8 for some more background, see: http://effbot.org/zone/stringlib.htm */ 9 9 10 10 /* note: fastsearch may access s[n], which isn't a problem when using … … 17 17 #define FAST_COUNT 0 18 18 #define FAST_SEARCH 1 19 #define FAST_RSEARCH 2 20 21 #if LONG_BIT >= 128 22 #define STRINGLIB_BLOOM_WIDTH 128 23 #elif LONG_BIT >= 64 24 #define STRINGLIB_BLOOM_WIDTH 64 25 #elif LONG_BIT >= 32 26 #define STRINGLIB_BLOOM_WIDTH 32 27 #else 28 #error "LONG_BIT is smaller than 32" 29 #endif 30 31 #define STRINGLIB_BLOOM_ADD(mask, ch) \ 32 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 33 #define STRINGLIB_BLOOM(mask, ch) \ 34 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 19 35 20 36 Py_LOCAL_INLINE(Py_ssize_t) 21 37 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, 22 38 const STRINGLIB_CHAR* p, Py_ssize_t m, 23 int mode)39 Py_ssize_t maxcount, int mode) 24 40 { 25 long mask;41 unsigned long mask; 26 42 Py_ssize_t skip, count = 0; 27 43 Py_ssize_t i, j, mlast, w; … … 29 45 w = n - m; 30 46 31 if (w < 0 )47 if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) 32 48 return -1; 33 49 … … 39 55 if (mode == FAST_COUNT) { 40 56 for (i = 0; i < n; i++) 57 if (s[i] == p[0]) { 58 count++; 59 if (count == maxcount) 60 return maxcount; 61 } 62 return count; 63 } else if (mode == FAST_SEARCH) { 64 for (i = 0; i < n; i++) 41 65 if (s[i] == p[0]) 42 count++; 43 return count; 44 } else { 45 for (i = 0; i < n; i++) 66 return i; 67 } else { /* FAST_RSEARCH */ 68 for (i = n - 1; i > -1; i--) 46 69 if (s[i] == p[0]) 47 70 return i; … … 51 74 52 75 mlast = m - 1; 76 skip = mlast - 1; 77 mask = 0; 53 78 54 /* create compressed boyer-moore delta 1 table */ 55 skip = mlast - 1; 56 /* process pattern[:-1] */ 57 for (mask = i = 0; i < mlast; i++) { 58 mask |= (1 << (p[i] & 0x1F)); 59 if (p[i] == p[mlast]) 60 skip = mlast - i - 1; 61 } 62 /* process pattern[-1] outside the loop */ 63 mask |= (1 << (p[mlast] & 0x1F)); 79 if (mode != FAST_RSEARCH) { 64 80 65 for (i = 0; i <= w; i++) { 66 /* note: using mlast in the skip path slows things down on x86 */ 67 if (s[i+m-1] == p[m-1]) { 68 /* candidate match */ 69 for (j = 0; j < mlast; j++) 70 if (s[i+j] != p[j]) 71 break; 72 if (j == mlast) { 73 /* got a match! */ 74 if (mode != FAST_COUNT) 81 /* create compressed boyer-moore delta 1 table */ 82 83 /* process pattern[:-1] */ 84 for (i = 0; i < mlast; i++) { 85 STRINGLIB_BLOOM_ADD(mask, p[i]); 86 if (p[i] == p[mlast]) 87 skip = mlast - i - 1; 88 } 89 /* process pattern[-1] outside the loop */ 90 STRINGLIB_BLOOM_ADD(mask, p[mlast]); 91 92 for (i = 0; i <= w; i++) { 93 /* note: using mlast in the skip path slows things down on x86 */ 94 if (s[i+m-1] == p[m-1]) { 95 /* candidate match */ 96 for (j = 0; j < mlast; j++) 97 if (s[i+j] != p[j]) 98 break; 99 if (j == mlast) { 100 /* got a match! */ 101 if (mode != FAST_COUNT) 102 return i; 103 count++; 104 if (count == maxcount) 105 return maxcount; 106 i = i + mlast; 107 continue; 108 } 109 /* miss: check if next character is part of pattern */ 110 if (!STRINGLIB_BLOOM(mask, s[i+m])) 111 i = i + m; 112 else 113 i = i + skip; 114 } else { 115 /* skip: check if next character is part of pattern */ 116 if (!STRINGLIB_BLOOM(mask, s[i+m])) 117 i = i + m; 118 } 119 } 120 } else { /* FAST_RSEARCH */ 121 122 /* create compressed boyer-moore delta 1 table */ 123 124 /* process pattern[0] outside the loop */ 125 STRINGLIB_BLOOM_ADD(mask, p[0]); 126 /* process pattern[:0:-1] */ 127 for (i = mlast; i > 0; i--) { 128 STRINGLIB_BLOOM_ADD(mask, p[i]); 129 if (p[i] == p[0]) 130 skip = i - 1; 131 } 132 133 for (i = w; i >= 0; i--) { 134 if (s[i] == p[0]) { 135 /* candidate match */ 136 for (j = mlast; j > 0; j--) 137 if (s[i+j] != p[j]) 138 break; 139 if (j == 0) 140 /* got a match! */ 75 141 return i; 76 count++; 77 i = i + mlast; 78 continue; 142 /* miss: check if previous character is part of pattern */ 143 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 144 i = i - m; 145 else 146 i = i - skip; 147 } else { 148 /* skip: check if previous character is part of pattern */ 149 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 150 i = i - m; 79 151 } 80 /* miss: check if next character is part of pattern */81 if (!(mask & (1 << (s[i+m] & 0x1F))))82 i = i + m;83 else84 i = i + skip;85 } else {86 /* skip: check if next character is part of pattern */87 if (!(mask & (1 << (s[i+m] & 0x1F))))88 i = i + m;89 152 } 90 153 } … … 96 159 97 160 #endif 98 99 /*100 Local variables:101 c-basic-offset: 4102 indent-tabs-mode: nil103 End:104 */
Note:
See TracChangeset
for help on using the changeset viewer.