| 1 | /* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
|
|---|
| 2 | For Intel 80x86, x>=3.
|
|---|
| 3 | Copyright (C) 1994-2000,2002,2005 Free Software Foundation, Inc.
|
|---|
| 4 | This file is part of the GNU C Library.
|
|---|
| 5 | Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
|
|---|
| 6 | Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
|---|
| 7 | This version is developed using the same algorithm as the fast C
|
|---|
| 8 | version which carries the following introduction:
|
|---|
| 9 | Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
|
|---|
| 10 | with help from Dan Sahlin (dan@sics.se) and
|
|---|
| 11 | commentary by Jim Blandy (jimb@ai.mit.edu);
|
|---|
| 12 | adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
|
|---|
| 13 | and implemented by Roland McGrath (roland@ai.mit.edu).
|
|---|
| 14 |
|
|---|
| 15 | The GNU C Library is free software; you can redistribute it and/or
|
|---|
| 16 | modify it under the terms of the GNU Lesser General Public
|
|---|
| 17 | License as published by the Free Software Foundation; either
|
|---|
| 18 | version 2.1 of the License, or (at your option) any later version.
|
|---|
| 19 |
|
|---|
| 20 | The GNU C Library is distributed in the hope that it will be useful,
|
|---|
| 21 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|---|
| 23 | Lesser General Public License for more details.
|
|---|
| 24 |
|
|---|
| 25 | You should have received a copy of the GNU Lesser General Public
|
|---|
| 26 | License along with the GNU C Library; if not, write to the Free
|
|---|
| 27 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|---|
| 28 | 02111-1307 USA. */
|
|---|
| 29 |
|
|---|
| 30 | #include <sysdep.h>
|
|---|
| 31 | #include "asm-syntax.h"
|
|---|
| 32 | #include "bp-sym.h"
|
|---|
| 33 | #include "bp-asm.h"
|
|---|
| 34 |
|
|---|
| 35 | #define PARMS LINKAGE+4 /* space for 1 saved reg */
|
|---|
| 36 | #define RTN PARMS
|
|---|
| 37 | #define STR RTN+RTN_SIZE
|
|---|
| 38 | #define CHR STR+PTR_SIZE
|
|---|
| 39 |
|
|---|
| 40 | .text
|
|---|
| 41 | ENTRY (BP_SYM (__rawmemchr))
|
|---|
| 42 | ENTER
|
|---|
| 43 |
|
|---|
| 44 | /* Save callee-safe register used in this function. */
|
|---|
| 45 | pushl %edi
|
|---|
| 46 | cfi_adjust_cfa_offset (4)
|
|---|
| 47 | cfi_rel_offset (edi, 0)
|
|---|
| 48 |
|
|---|
| 49 | /* Load parameters into registers. */
|
|---|
| 50 | movl STR(%esp), %eax
|
|---|
| 51 | movl CHR(%esp), %edx
|
|---|
| 52 | CHECK_BOUNDS_LOW (%eax, STR(%esp))
|
|---|
| 53 |
|
|---|
| 54 | /* At the moment %edx contains C. What we need for the
|
|---|
| 55 | algorithm is C in all bytes of the dword. Avoid
|
|---|
| 56 | operations on 16 bit words because these require an
|
|---|
| 57 | prefix byte (and one more cycle). */
|
|---|
| 58 | movb %dl, %dh /* Now it is 0|0|c|c */
|
|---|
| 59 | movl %edx, %ecx
|
|---|
| 60 | shll $16, %edx /* Now c|c|0|0 */
|
|---|
| 61 | movw %cx, %dx /* And finally c|c|c|c */
|
|---|
| 62 |
|
|---|
| 63 | /* Better performance can be achieved if the word (32
|
|---|
| 64 | bit) memory access is aligned on a four-byte-boundary.
|
|---|
| 65 | So process first bytes one by one until boundary is
|
|---|
| 66 | reached. Don't use a loop for better performance. */
|
|---|
| 67 |
|
|---|
| 68 | testb $3, %al /* correctly aligned ? */
|
|---|
| 69 | je L(1) /* yes => begin loop */
|
|---|
| 70 | cmpb %dl, (%eax) /* compare byte */
|
|---|
| 71 | je L(9) /* target found => return */
|
|---|
| 72 | incl %eax /* increment source pointer */
|
|---|
| 73 |
|
|---|
| 74 | testb $3, %al /* correctly aligned ? */
|
|---|
| 75 | je L(1) /* yes => begin loop */
|
|---|
| 76 | cmpb %dl, (%eax) /* compare byte */
|
|---|
| 77 | je L(9) /* target found => return */
|
|---|
| 78 | incl %eax /* increment source pointer */
|
|---|
| 79 |
|
|---|
| 80 | testb $3, %al /* correctly aligned ? */
|
|---|
| 81 | je L(1) /* yes => begin loop */
|
|---|
| 82 | cmpb %dl, (%eax) /* compare byte */
|
|---|
| 83 | je L(9) /* target found => return */
|
|---|
| 84 | incl %eax /* increment source pointer */
|
|---|
| 85 |
|
|---|
| 86 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
|---|
| 87 | change any of the hole bits of LONGWORD.
|
|---|
| 88 |
|
|---|
| 89 | 1) Is this safe? Will it catch all the zero bytes?
|
|---|
| 90 | Suppose there is a byte with all zeros. Any carry bits
|
|---|
| 91 | propagating from its left will fall into the hole at its
|
|---|
| 92 | least significant bit and stop. Since there will be no
|
|---|
| 93 | carry from its most significant bit, the LSB of the
|
|---|
| 94 | byte to the left will be unchanged, and the zero will be
|
|---|
| 95 | detected.
|
|---|
| 96 |
|
|---|
| 97 | 2) Is this worthwhile? Will it ignore everything except
|
|---|
| 98 | zero bytes? Suppose every byte of LONGWORD has a bit set
|
|---|
| 99 | somewhere. There will be a carry into bit 8. If bit 8
|
|---|
| 100 | is set, this will carry into bit 16. If bit 8 is clear,
|
|---|
| 101 | one of bits 9-15 must be set, so there will be a carry
|
|---|
| 102 | into bit 16. Similarly, there will be a carry into bit
|
|---|
| 103 | 24. If one of bits 24-31 is set, there will be a carry
|
|---|
| 104 | into bit 32 (=carry flag), so all of the hole bits will
|
|---|
| 105 | be changed.
|
|---|
| 106 |
|
|---|
| 107 | 3) But wait! Aren't we looking for C, not zero?
|
|---|
| 108 | Good point. So what we do is XOR LONGWORD with a longword,
|
|---|
| 109 | each of whose bytes is C. This turns each byte that is C
|
|---|
| 110 | into a zero. */
|
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 | /* Each round the main loop processes 16 bytes. */
|
|---|
| 114 | ALIGN (4)
|
|---|
| 115 |
|
|---|
| 116 | L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 117 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 118 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 119 | are now 0 */
|
|---|
| 120 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 121 | carry bits reported for each byte which
|
|---|
| 122 | is *not* 0 */
|
|---|
| 123 |
|
|---|
| 124 | /* According to the algorithm we had to reverse the effect of the
|
|---|
| 125 | XOR first and then test the overflow bits. But because the
|
|---|
| 126 | following XOR would destroy the carry flag and it would (in a
|
|---|
| 127 | representation with more than 32 bits) not alter then last
|
|---|
| 128 | overflow, we can now test this condition. If no carry is signaled
|
|---|
| 129 | no overflow must have occurred in the last byte => it was 0. */
|
|---|
| 130 | jnc L(8)
|
|---|
| 131 |
|
|---|
| 132 | /* We are only interested in carry bits that change due to the
|
|---|
| 133 | previous add, so remove original bits */
|
|---|
| 134 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 135 |
|
|---|
| 136 | /* Now test for the other three overflow bits. */
|
|---|
| 137 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 138 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 139 | the addition will not result in 0. */
|
|---|
| 140 |
|
|---|
| 141 | /* If at least one byte of the word is C we don't get 0 in %edi. */
|
|---|
| 142 | jnz L(8) /* found it => return pointer */
|
|---|
| 143 |
|
|---|
| 144 | /* This process is unfolded four times for better performance.
|
|---|
| 145 | we don't increment the source pointer each time. Instead we
|
|---|
| 146 | use offsets and increment by 16 in each run of the loop. But
|
|---|
| 147 | before probing for the matching byte we need some extra code
|
|---|
| 148 | (following LL(13) below). Even the len can be compared with
|
|---|
| 149 | constants instead of decrementing each time. */
|
|---|
| 150 |
|
|---|
| 151 | movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 152 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 153 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 154 | are now 0 */
|
|---|
| 155 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 156 | carry bits reported for each byte which
|
|---|
| 157 | is *not* 0 */
|
|---|
| 158 | jnc L(7) /* highest byte is C => return pointer */
|
|---|
| 159 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 160 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 161 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 162 | the addition will not result in 0. */
|
|---|
| 163 | jnz L(7) /* found it => return pointer */
|
|---|
| 164 |
|
|---|
| 165 | movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 166 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 167 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 168 | are now 0 */
|
|---|
| 169 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 170 | carry bits reported for each byte which
|
|---|
| 171 | is *not* 0 */
|
|---|
| 172 | jnc L(6) /* highest byte is C => return pointer */
|
|---|
| 173 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 174 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 175 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 176 | the addition will not result in 0. */
|
|---|
| 177 | jnz L(6) /* found it => return pointer */
|
|---|
| 178 |
|
|---|
| 179 | movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 180 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 181 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 182 | are now 0 */
|
|---|
| 183 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 184 | carry bits reported for each byte which
|
|---|
| 185 | is *not* 0 */
|
|---|
| 186 | jnc L(5) /* highest byte is C => return pointer */
|
|---|
| 187 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 188 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 189 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 190 | the addition will not result in 0. */
|
|---|
| 191 | jnz L(5) /* found it => return pointer */
|
|---|
| 192 |
|
|---|
| 193 | /* Adjust both counters for a full round, i.e. 16 bytes. */
|
|---|
| 194 | addl $16, %eax
|
|---|
| 195 | jmp L(1)
|
|---|
| 196 | /* add missing source pointer increments */
|
|---|
| 197 | L(5): addl $4, %eax
|
|---|
| 198 | L(6): addl $4, %eax
|
|---|
| 199 | L(7): addl $4, %eax
|
|---|
| 200 |
|
|---|
| 201 | /* Test for the matching byte in the word. %ecx contains a NUL
|
|---|
| 202 | char in the byte which originally was the byte we are looking
|
|---|
| 203 | at. */
|
|---|
| 204 | L(8): testb %cl, %cl /* test first byte in dword */
|
|---|
| 205 | jz L(9) /* if zero => return pointer */
|
|---|
| 206 | incl %eax /* increment source pointer */
|
|---|
| 207 |
|
|---|
| 208 | testb %ch, %ch /* test second byte in dword */
|
|---|
| 209 | jz L(9) /* if zero => return pointer */
|
|---|
| 210 | incl %eax /* increment source pointer */
|
|---|
| 211 |
|
|---|
| 212 | testl $0xff0000, %ecx /* test third byte in dword */
|
|---|
| 213 | jz L(9) /* if zero => return pointer */
|
|---|
| 214 | incl %eax /* increment source pointer */
|
|---|
| 215 |
|
|---|
| 216 | /* No further test needed we we know it is one of the four bytes. */
|
|---|
| 217 |
|
|---|
| 218 | L(9):
|
|---|
| 219 | CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
|
|---|
| 220 | RETURN_BOUNDED_POINTER (STR(%esp))
|
|---|
| 221 | popl %edi /* pop saved register */
|
|---|
| 222 | cfi_adjust_cfa_offset (-4)
|
|---|
| 223 | cfi_restore (edi)
|
|---|
| 224 |
|
|---|
| 225 | LEAVE
|
|---|
| 226 | RET_PTR
|
|---|
| 227 | END (BP_SYM (__rawmemchr))
|
|---|
| 228 |
|
|---|
| 229 | libc_hidden_def (BP_SYM (__rawmemchr))
|
|---|
| 230 | weak_alias (BP_SYM (__rawmemchr), BP_SYM (rawmemchr))
|
|---|