| 1 | /* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR
|
|---|
| 2 | or the final NUL byte.
|
|---|
| 3 | For Intel 80x86, x>=3.
|
|---|
| 4 | Copyright (C) 1994-1997, 1999, 2000, 2005 Free Software Foundation, Inc.
|
|---|
| 5 | This file is part of the GNU C Library.
|
|---|
| 6 | Contributed by Ulrich Drepper <drepper@gnu.org>
|
|---|
| 7 | Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
|
|---|
| 8 |
|
|---|
| 9 | The GNU C Library is free software; you can redistribute it and/or
|
|---|
| 10 | modify it under the terms of the GNU Lesser General Public
|
|---|
| 11 | License as published by the Free Software Foundation; either
|
|---|
| 12 | version 2.1 of the License, or (at your option) any later version.
|
|---|
| 13 |
|
|---|
| 14 | The GNU C Library is distributed in the hope that it will be useful,
|
|---|
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|---|
| 17 | Lesser General Public License for more details.
|
|---|
| 18 |
|
|---|
| 19 | You should have received a copy of the GNU Lesser General Public
|
|---|
| 20 | License along with the GNU C Library; if not, write to the Free
|
|---|
| 21 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|---|
| 22 | 02111-1307 USA. */
|
|---|
| 23 |
|
|---|
| 24 | #include <sysdep.h>
|
|---|
| 25 | #include "asm-syntax.h"
|
|---|
| 26 | #include "bp-sym.h"
|
|---|
| 27 | #include "bp-asm.h"
|
|---|
| 28 |
|
|---|
| 29 | #define PARMS LINKAGE+4 /* space for 1 saved reg */
|
|---|
| 30 | #define RTN PARMS
|
|---|
| 31 | #define STR RTN+RTN_SIZE
|
|---|
| 32 | #define CHR STR+PTR_SIZE
|
|---|
| 33 |
|
|---|
| 34 | .text
|
|---|
| 35 | ENTRY (BP_SYM (__strchrnul))
|
|---|
| 36 | ENTER
|
|---|
| 37 |
|
|---|
| 38 | pushl %edi /* Save callee-safe registers used here. */
|
|---|
| 39 | cfi_adjust_cfa_offset (4)
|
|---|
| 40 | cfi_rel_offset (edi, 0)
|
|---|
| 41 |
|
|---|
| 42 | movl STR(%esp), %eax
|
|---|
| 43 | movl CHR(%esp), %edx
|
|---|
| 44 | CHECK_BOUNDS_LOW (%eax, STR(%esp))
|
|---|
| 45 |
|
|---|
| 46 | /* At the moment %edx contains CHR. What we need for the
|
|---|
| 47 | algorithm is CHR in all bytes of the dword. Avoid
|
|---|
| 48 | operations on 16 bit words because these require an
|
|---|
| 49 | prefix byte (and one more cycle). */
|
|---|
| 50 | movb %dl, %dh /* now it is 0|0|c|c */
|
|---|
| 51 | movl %edx, %ecx
|
|---|
| 52 | shll $16, %edx /* now it is c|c|0|0 */
|
|---|
| 53 | movw %cx, %dx /* and finally c|c|c|c */
|
|---|
| 54 |
|
|---|
| 55 | /* Before we start with the main loop we process single bytes
|
|---|
| 56 | until the source pointer is aligned. This has two reasons:
|
|---|
| 57 | 1. aligned 32-bit memory access is faster
|
|---|
| 58 | and (more important)
|
|---|
| 59 | 2. we process in the main loop 32 bit in one step although
|
|---|
| 60 | we don't know the end of the string. But accessing at
|
|---|
| 61 | 4-byte alignment guarantees that we never access illegal
|
|---|
| 62 | memory if this would not also be done by the trivial
|
|---|
| 63 | implementation (this is because all processor inherent
|
|---|
| 64 | boundaries are multiples of 4. */
|
|---|
| 65 |
|
|---|
| 66 | testb $3, %al /* correctly aligned ? */
|
|---|
| 67 | jz L(11) /* yes => begin loop */
|
|---|
| 68 | movb (%eax), %cl /* load byte in question (we need it twice) */
|
|---|
| 69 | cmpb %cl, %dl /* compare byte */
|
|---|
| 70 | je L(6) /* target found => return */
|
|---|
| 71 | testb %cl, %cl /* is NUL? */
|
|---|
| 72 | jz L(6) /* yes => return NULL */
|
|---|
| 73 | incl %eax /* increment pointer */
|
|---|
| 74 |
|
|---|
| 75 | testb $3, %al /* correctly aligned ? */
|
|---|
| 76 | jz L(11) /* yes => begin loop */
|
|---|
| 77 | movb (%eax), %cl /* load byte in question (we need it twice) */
|
|---|
| 78 | cmpb %cl, %dl /* compare byte */
|
|---|
| 79 | je L(6) /* target found => return */
|
|---|
| 80 | testb %cl, %cl /* is NUL? */
|
|---|
| 81 | jz L(6) /* yes => return NULL */
|
|---|
| 82 | incl %eax /* increment pointer */
|
|---|
| 83 |
|
|---|
| 84 | testb $3, %al /* correctly aligned ? */
|
|---|
| 85 | jz L(11) /* yes => begin loop */
|
|---|
| 86 | movb (%eax), %cl /* load byte in question (we need it twice) */
|
|---|
| 87 | cmpb %cl, %dl /* compare byte */
|
|---|
| 88 | je L(6) /* target found => return */
|
|---|
| 89 | testb %cl, %cl /* is NUL? */
|
|---|
| 90 | jz L(6) /* yes => return NULL */
|
|---|
| 91 | incl %eax /* increment pointer */
|
|---|
| 92 |
|
|---|
| 93 | /* No we have reached alignment. */
|
|---|
| 94 | jmp L(11) /* begin loop */
|
|---|
| 95 |
|
|---|
| 96 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
|---|
| 97 | change any of the hole bits of LONGWORD.
|
|---|
| 98 |
|
|---|
| 99 | 1) Is this safe? Will it catch all the zero bytes?
|
|---|
| 100 | Suppose there is a byte with all zeros. Any carry bits
|
|---|
| 101 | propagating from its left will fall into the hole at its
|
|---|
| 102 | least significant bit and stop. Since there will be no
|
|---|
| 103 | carry from its most significant bit, the LSB of the
|
|---|
| 104 | byte to the left will be unchanged, and the zero will be
|
|---|
| 105 | detected.
|
|---|
| 106 |
|
|---|
| 107 | 2) Is this worthwhile? Will it ignore everything except
|
|---|
| 108 | zero bytes? Suppose every byte of LONGWORD has a bit set
|
|---|
| 109 | somewhere. There will be a carry into bit 8. If bit 8
|
|---|
| 110 | is set, this will carry into bit 16. If bit 8 is clear,
|
|---|
| 111 | one of bits 9-15 must be set, so there will be a carry
|
|---|
| 112 | into bit 16. Similarly, there will be a carry into bit
|
|---|
| 113 | 24. If one of bits 24-31 is set, there will be a carry
|
|---|
| 114 | into bit 32 (=carry flag), so all of the hole bits will
|
|---|
| 115 | be changed.
|
|---|
| 116 |
|
|---|
| 117 | 3) But wait! Aren't we looking for CHR, not zero?
|
|---|
| 118 | Good point. So what we do is XOR LONGWORD with a longword,
|
|---|
| 119 | each of whose bytes is CHR. This turns each byte that is CHR
|
|---|
| 120 | into a zero. */
|
|---|
| 121 |
|
|---|
| 122 | /* Each round the main loop processes 16 bytes. */
|
|---|
| 123 |
|
|---|
| 124 | ALIGN(4)
|
|---|
| 125 |
|
|---|
| 126 | L(1): addl $16, %eax /* adjust pointer for whole round */
|
|---|
| 127 |
|
|---|
| 128 | L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 129 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 130 | are now 0 */
|
|---|
| 131 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 132 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 133 | carry bits reported for each byte which
|
|---|
| 134 | is *not* CHR */
|
|---|
| 135 |
|
|---|
| 136 | /* According to the algorithm we had to reverse the effect of the
|
|---|
| 137 | XOR first and then test the overflow bits. But because the
|
|---|
| 138 | following XOR would destroy the carry flag and it would (in a
|
|---|
| 139 | representation with more than 32 bits) not alter then last
|
|---|
| 140 | overflow, we can now test this condition. If no carry is signaled
|
|---|
| 141 | no overflow must have occurred in the last byte => it was 0. */
|
|---|
| 142 | jnc L(7)
|
|---|
| 143 |
|
|---|
| 144 | /* We are only interested in carry bits that change due to the
|
|---|
| 145 | previous add, so remove original bits */
|
|---|
| 146 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 147 |
|
|---|
| 148 | /* Now test for the other three overflow bits. */
|
|---|
| 149 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 150 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 151 | the addition will not result in 0. */
|
|---|
| 152 |
|
|---|
| 153 | /* If at least one byte of the word is CHR we don't get 0 in %edi. */
|
|---|
| 154 | jnz L(7) /* found it => return pointer */
|
|---|
| 155 |
|
|---|
| 156 | /* Now we made sure the dword does not contain the character we are
|
|---|
| 157 | looking for. But because we deal with strings we have to check
|
|---|
| 158 | for the end of string before testing the next dword. */
|
|---|
| 159 |
|
|---|
| 160 | xorl %edx, %ecx /* restore original dword without reload */
|
|---|
| 161 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 162 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 163 | carry bits reported for each byte which
|
|---|
| 164 | is *not* 0 */
|
|---|
| 165 | jnc L(7) /* highest byte is NUL => return NULL */
|
|---|
| 166 | xorl %ecx, %edi /* (word+magic)^word */
|
|---|
| 167 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 168 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 169 | the addition will not result in 0. */
|
|---|
| 170 | jnz L(7) /* found NUL => return NULL */
|
|---|
| 171 |
|
|---|
| 172 | movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 173 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 174 | are now 0 */
|
|---|
| 175 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 176 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 177 | carry bits reported for each byte which
|
|---|
| 178 | is *not* CHR */
|
|---|
| 179 | jnc L(71) /* highest byte is CHR => return pointer */
|
|---|
| 180 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 181 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 182 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 183 | the addition will not result in 0. */
|
|---|
| 184 | jnz L(71) /* found it => return pointer */
|
|---|
| 185 | xorl %edx, %ecx /* restore original dword without reload */
|
|---|
| 186 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 187 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 188 | carry bits reported for each byte which
|
|---|
| 189 | is *not* 0 */
|
|---|
| 190 | jnc L(71) /* highest byte is NUL => return NULL */
|
|---|
| 191 | xorl %ecx, %edi /* (word+magic)^word */
|
|---|
| 192 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 193 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 194 | the addition will not result in 0. */
|
|---|
| 195 | jnz L(71) /* found NUL => return NULL */
|
|---|
| 196 |
|
|---|
| 197 | movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 198 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 199 | are now 0 */
|
|---|
| 200 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 201 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 202 | carry bits reported for each byte which
|
|---|
| 203 | is *not* CHR */
|
|---|
| 204 | jnc L(72) /* highest byte is CHR => return pointer */
|
|---|
| 205 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 206 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 207 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 208 | the addition will not result in 0. */
|
|---|
| 209 | jnz L(72) /* found it => return pointer */
|
|---|
| 210 | xorl %edx, %ecx /* restore original dword without reload */
|
|---|
| 211 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 212 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 213 | carry bits reported for each byte which
|
|---|
| 214 | is *not* 0 */
|
|---|
| 215 | jnc L(72) /* highest byte is NUL => return NULL */
|
|---|
| 216 | xorl %ecx, %edi /* (word+magic)^word */
|
|---|
| 217 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 218 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 219 | the addition will not result in 0. */
|
|---|
| 220 | jnz L(72) /* found NUL => return NULL */
|
|---|
| 221 |
|
|---|
| 222 | movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
|
|---|
| 223 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
|
|---|
| 224 | are now 0 */
|
|---|
| 225 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 226 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 227 | carry bits reported for each byte which
|
|---|
| 228 | is *not* CHR */
|
|---|
| 229 | jnc L(73) /* highest byte is CHR => return pointer */
|
|---|
| 230 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */
|
|---|
| 231 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 232 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 233 | the addition will not result in 0. */
|
|---|
| 234 | jnz L(73) /* found it => return pointer */
|
|---|
| 235 | xorl %edx, %ecx /* restore original dword without reload */
|
|---|
| 236 | movl $0xfefefeff, %edi /* magic value */
|
|---|
| 237 | addl %ecx, %edi /* add the magic value to the word. We get
|
|---|
| 238 | carry bits reported for each byte which
|
|---|
| 239 | is *not* 0 */
|
|---|
| 240 | jnc L(73) /* highest byte is NUL => return NULL */
|
|---|
| 241 | xorl %ecx, %edi /* (word+magic)^word */
|
|---|
| 242 | orl $0xfefefeff, %edi /* set all non-carry bits */
|
|---|
| 243 | incl %edi /* add 1: if one carry bit was *not* set
|
|---|
| 244 | the addition will not result in 0. */
|
|---|
| 245 | jz L(1) /* no NUL found => restart loop */
|
|---|
| 246 |
|
|---|
| 247 | L(73): addl $4, %eax /* adjust pointer */
|
|---|
| 248 | L(72): addl $4, %eax
|
|---|
| 249 | L(71): addl $4, %eax
|
|---|
| 250 |
|
|---|
| 251 | /* We now scan for the byte in which the character was matched.
|
|---|
| 252 | But we have to take care of the case that a NUL char is
|
|---|
| 253 | found before this in the dword. */
|
|---|
| 254 |
|
|---|
| 255 | L(7): testb %cl, %cl /* is first byte CHR? */
|
|---|
| 256 | jz L(6) /* yes => return pointer */
|
|---|
| 257 | cmpb %dl, %cl /* is first byte NUL? */
|
|---|
| 258 | je L(6) /* yes => return NULL */
|
|---|
| 259 | incl %eax /* it's not in the first byte */
|
|---|
| 260 |
|
|---|
| 261 | testb %ch, %ch /* is second byte CHR? */
|
|---|
| 262 | jz L(6) /* yes => return pointer */
|
|---|
| 263 | cmpb %dl, %ch /* is second byte NUL? */
|
|---|
| 264 | je L(6) /* yes => return NULL? */
|
|---|
| 265 | incl %eax /* it's not in the second byte */
|
|---|
| 266 |
|
|---|
| 267 | shrl $16, %ecx /* make upper byte accessible */
|
|---|
| 268 | testb %cl, %cl /* is third byte CHR? */
|
|---|
| 269 | jz L(6) /* yes => return pointer */
|
|---|
| 270 | cmpb %dl, %cl /* is third byte NUL? */
|
|---|
| 271 | je L(6) /* yes => return NULL */
|
|---|
| 272 |
|
|---|
| 273 | /* It must be in the fourth byte and it cannot be NUL. */
|
|---|
| 274 | incl %eax
|
|---|
| 275 |
|
|---|
| 276 | L(6): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
|
|---|
| 277 | RETURN_BOUNDED_POINTER (STR(%esp))
|
|---|
| 278 | popl %edi /* restore saved register content */
|
|---|
| 279 | cfi_adjust_cfa_offset (-4)
|
|---|
| 280 | cfi_restore (edi)
|
|---|
| 281 |
|
|---|
| 282 | LEAVE
|
|---|
| 283 | RET_PTR
|
|---|
| 284 | END (BP_SYM (__strchrnul))
|
|---|
| 285 |
|
|---|
| 286 | weak_alias (BP_SYM (__strchrnul), BP_SYM (strchrnul))
|
|---|