| 1 | /* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
 | 
|---|
| 2 |    For Intel 80x86, x>=3.
 | 
|---|
| 3 |    Copyright (C) 1994-2000,2002,2005 Free Software Foundation, Inc.
 | 
|---|
| 4 |    This file is part of the GNU C Library.
 | 
|---|
| 5 |    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
 | 
|---|
| 6 |    Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
 | 
|---|
| 7 |    This version is developed using the same algorithm as the fast C
 | 
|---|
| 8 |    version which carries the following introduction:
 | 
|---|
| 9 |    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
 | 
|---|
| 10 |    with help from Dan Sahlin (dan@sics.se) and
 | 
|---|
| 11 |    commentary by Jim Blandy (jimb@ai.mit.edu);
 | 
|---|
| 12 |    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
 | 
|---|
| 13 |    and implemented by Roland McGrath (roland@ai.mit.edu).
 | 
|---|
| 14 | 
 | 
|---|
| 15 |    The GNU C Library is free software; you can redistribute it and/or
 | 
|---|
| 16 |    modify it under the terms of the GNU Lesser General Public
 | 
|---|
| 17 |    License as published by the Free Software Foundation; either
 | 
|---|
| 18 |    version 2.1 of the License, or (at your option) any later version.
 | 
|---|
| 19 | 
 | 
|---|
| 20 |    The GNU C Library is distributed in the hope that it will be useful,
 | 
|---|
| 21 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 22 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
|---|
| 23 |    Lesser General Public License for more details.
 | 
|---|
| 24 | 
 | 
|---|
| 25 |    You should have received a copy of the GNU Lesser General Public
 | 
|---|
| 26 |    License along with the GNU C Library; if not, write to the Free
 | 
|---|
| 27 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 | 
|---|
| 28 |    02111-1307 USA.  */
 | 
|---|
| 29 | 
 | 
|---|
| 30 | #include <sysdep.h>
 | 
|---|
| 31 | #include "asm-syntax.h"
 | 
|---|
| 32 | #include "bp-sym.h"
 | 
|---|
| 33 | #include "bp-asm.h"
 | 
|---|
| 34 | 
 | 
|---|
| 35 | #define PARMS   LINKAGE+4       /* space for 1 saved reg */
 | 
|---|
| 36 | #define RTN     PARMS
 | 
|---|
| 37 | #define STR     RTN+RTN_SIZE
 | 
|---|
| 38 | #define CHR     STR+PTR_SIZE
 | 
|---|
| 39 | 
 | 
|---|
| 40 |         .text
 | 
|---|
| 41 | ENTRY (BP_SYM (__rawmemchr))
 | 
|---|
| 42 |         ENTER
 | 
|---|
| 43 | 
 | 
|---|
| 44 |         /* Save callee-safe register used in this function.  */
 | 
|---|
| 45 |         pushl %edi
 | 
|---|
| 46 |         cfi_adjust_cfa_offset (4)
 | 
|---|
| 47 |         cfi_rel_offset (edi, 0)
 | 
|---|
| 48 | 
 | 
|---|
| 49 |         /* Load parameters into registers.  */
 | 
|---|
| 50 |         movl STR(%esp), %eax
 | 
|---|
| 51 |         movl CHR(%esp), %edx
 | 
|---|
| 52 |         CHECK_BOUNDS_LOW (%eax, STR(%esp))
 | 
|---|
| 53 | 
 | 
|---|
| 54 |         /* At the moment %edx contains C.  What we need for the
 | 
|---|
| 55 |            algorithm is C in all bytes of the dword.  Avoid
 | 
|---|
| 56 |            operations on 16 bit words because these require an
 | 
|---|
| 57 |            prefix byte (and one more cycle).  */
 | 
|---|
| 58 |         movb %dl, %dh           /* Now it is 0|0|c|c */
 | 
|---|
| 59 |         movl %edx, %ecx
 | 
|---|
| 60 |         shll $16, %edx          /* Now c|c|0|0 */
 | 
|---|
| 61 |         movw %cx, %dx           /* And finally c|c|c|c */
 | 
|---|
| 62 | 
 | 
|---|
| 63 |         /* Better performance can be achieved if the word (32
 | 
|---|
| 64 |            bit) memory access is aligned on a four-byte-boundary.
 | 
|---|
| 65 |            So process first bytes one by one until boundary is
 | 
|---|
| 66 |            reached. Don't use a loop for better performance.  */
 | 
|---|
| 67 | 
 | 
|---|
| 68 |         testb $3, %al           /* correctly aligned ? */
 | 
|---|
| 69 |         je L(1)                 /* yes => begin loop */
 | 
|---|
| 70 |         cmpb %dl, (%eax)        /* compare byte */
 | 
|---|
| 71 |         je L(9)                 /* target found => return */
 | 
|---|
| 72 |         incl %eax               /* increment source pointer */
 | 
|---|
| 73 | 
 | 
|---|
| 74 |         testb $3, %al           /* correctly aligned ? */
 | 
|---|
| 75 |         je L(1)                 /* yes => begin loop */
 | 
|---|
| 76 |         cmpb %dl, (%eax)        /* compare byte */
 | 
|---|
| 77 |         je L(9)                 /* target found => return */
 | 
|---|
| 78 |         incl %eax               /* increment source pointer */
 | 
|---|
| 79 | 
 | 
|---|
| 80 |         testb $3, %al           /* correctly aligned ? */
 | 
|---|
| 81 |         je L(1)                 /* yes => begin loop */
 | 
|---|
| 82 |         cmpb %dl, (%eax)        /* compare byte */
 | 
|---|
| 83 |         je L(9)                 /* target found => return */
 | 
|---|
| 84 |         incl %eax               /* increment source pointer */
 | 
|---|
| 85 | 
 | 
|---|
| 86 |       /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
 | 
|---|
| 87 |          change any of the hole bits of LONGWORD.
 | 
|---|
| 88 | 
 | 
|---|
| 89 |          1) Is this safe?  Will it catch all the zero bytes?
 | 
|---|
| 90 |          Suppose there is a byte with all zeros.  Any carry bits
 | 
|---|
| 91 |          propagating from its left will fall into the hole at its
 | 
|---|
| 92 |          least significant bit and stop.  Since there will be no
 | 
|---|
| 93 |          carry from its most significant bit, the LSB of the
 | 
|---|
| 94 |          byte to the left will be unchanged, and the zero will be
 | 
|---|
| 95 |          detected.
 | 
|---|
| 96 | 
 | 
|---|
| 97 |          2) Is this worthwhile?  Will it ignore everything except
 | 
|---|
| 98 |          zero bytes?  Suppose every byte of LONGWORD has a bit set
 | 
|---|
| 99 |          somewhere.  There will be a carry into bit 8.  If bit 8
 | 
|---|
| 100 |          is set, this will carry into bit 16.  If bit 8 is clear,
 | 
|---|
| 101 |          one of bits 9-15 must be set, so there will be a carry
 | 
|---|
| 102 |          into bit 16.  Similarly, there will be a carry into bit
 | 
|---|
| 103 |          24.  If one of bits 24-31 is set, there will be a carry
 | 
|---|
| 104 |          into bit 32 (=carry flag), so all of the hole bits will
 | 
|---|
| 105 |          be changed.
 | 
|---|
| 106 | 
 | 
|---|
| 107 |          3) But wait!  Aren't we looking for C, not zero?
 | 
|---|
| 108 |          Good point.  So what we do is XOR LONGWORD with a longword,
 | 
|---|
| 109 |          each of whose bytes is C.  This turns each byte that is C
 | 
|---|
| 110 |          into a zero.  */
 | 
|---|
| 111 | 
 | 
|---|
| 112 | 
 | 
|---|
| 113 |         /* Each round the main loop processes 16 bytes.  */
 | 
|---|
| 114 |         ALIGN (4)
 | 
|---|
| 115 | 
 | 
|---|
| 116 | L(1):   movl (%eax), %ecx       /* get word (= 4 bytes) in question */
 | 
|---|
| 117 |         movl $0xfefefeff, %edi  /* magic value */
 | 
|---|
| 118 |         xorl %edx, %ecx         /* XOR with word c|c|c|c => bytes of str == c
 | 
|---|
| 119 |                                    are now 0 */
 | 
|---|
| 120 |         addl %ecx, %edi         /* add the magic value to the word.  We get
 | 
|---|
| 121 |                                    carry bits reported for each byte which
 | 
|---|
| 122 |                                    is *not* 0 */
 | 
|---|
| 123 | 
 | 
|---|
| 124 |         /* According to the algorithm we had to reverse the effect of the
 | 
|---|
| 125 |            XOR first and then test the overflow bits.  But because the
 | 
|---|
| 126 |            following XOR would destroy the carry flag and it would (in a
 | 
|---|
| 127 |            representation with more than 32 bits) not alter then last
 | 
|---|
| 128 |            overflow, we can now test this condition.  If no carry is signaled
 | 
|---|
| 129 |            no overflow must have occurred in the last byte => it was 0. */
 | 
|---|
| 130 |         jnc L(8)
 | 
|---|
| 131 | 
 | 
|---|
| 132 |         /* We are only interested in carry bits that change due to the
 | 
|---|
| 133 |            previous add, so remove original bits */
 | 
|---|
| 134 |         xorl %ecx, %edi         /* ((word^charmask)+magic)^(word^charmask) */
 | 
|---|
| 135 | 
 | 
|---|
| 136 |         /* Now test for the other three overflow bits.  */
 | 
|---|
| 137 |         orl $0xfefefeff, %edi   /* set all non-carry bits */
 | 
|---|
| 138 |         incl %edi               /* add 1: if one carry bit was *not* set
 | 
|---|
| 139 |                                    the addition will not result in 0.  */
 | 
|---|
| 140 | 
 | 
|---|
| 141 |         /* If at least one byte of the word is C we don't get 0 in %edi.  */
 | 
|---|
| 142 |         jnz L(8)                /* found it => return pointer */
 | 
|---|
| 143 | 
 | 
|---|
| 144 |         /* This process is unfolded four times for better performance.
 | 
|---|
| 145 |            we don't increment the source pointer each time.  Instead we
 | 
|---|
| 146 |            use offsets and increment by 16 in each run of the loop.  But
 | 
|---|
| 147 |            before probing for the matching byte we need some extra code
 | 
|---|
| 148 |            (following LL(13) below).  Even the len can be compared with
 | 
|---|
| 149 |            constants instead of decrementing each time.  */
 | 
|---|
| 150 | 
 | 
|---|
| 151 |         movl 4(%eax), %ecx      /* get word (= 4 bytes) in question */
 | 
|---|
| 152 |         movl $0xfefefeff, %edi  /* magic value */
 | 
|---|
| 153 |         xorl %edx, %ecx         /* XOR with word c|c|c|c => bytes of str == c
 | 
|---|
| 154 |                                    are now 0 */
 | 
|---|
| 155 |         addl %ecx, %edi         /* add the magic value to the word.  We get
 | 
|---|
| 156 |                                    carry bits reported for each byte which
 | 
|---|
| 157 |                                    is *not* 0 */
 | 
|---|
| 158 |         jnc L(7)                /* highest byte is C => return pointer */
 | 
|---|
| 159 |         xorl %ecx, %edi         /* ((word^charmask)+magic)^(word^charmask) */
 | 
|---|
| 160 |         orl $0xfefefeff, %edi   /* set all non-carry bits */
 | 
|---|
| 161 |         incl %edi               /* add 1: if one carry bit was *not* set
 | 
|---|
| 162 |                                    the addition will not result in 0.  */
 | 
|---|
| 163 |         jnz L(7)                /* found it => return pointer */
 | 
|---|
| 164 | 
 | 
|---|
| 165 |         movl 8(%eax), %ecx      /* get word (= 4 bytes) in question */
 | 
|---|
| 166 |         movl $0xfefefeff, %edi  /* magic value */
 | 
|---|
| 167 |         xorl %edx, %ecx         /* XOR with word c|c|c|c => bytes of str == c
 | 
|---|
| 168 |                                    are now 0 */
 | 
|---|
| 169 |         addl %ecx, %edi         /* add the magic value to the word.  We get
 | 
|---|
| 170 |                                    carry bits reported for each byte which
 | 
|---|
| 171 |                                    is *not* 0 */
 | 
|---|
| 172 |         jnc L(6)                /* highest byte is C => return pointer */
 | 
|---|
| 173 |         xorl %ecx, %edi         /* ((word^charmask)+magic)^(word^charmask) */
 | 
|---|
| 174 |         orl $0xfefefeff, %edi   /* set all non-carry bits */
 | 
|---|
| 175 |         incl %edi               /* add 1: if one carry bit was *not* set
 | 
|---|
| 176 |                                    the addition will not result in 0.  */
 | 
|---|
| 177 |         jnz L(6)                /* found it => return pointer */
 | 
|---|
| 178 | 
 | 
|---|
| 179 |         movl 12(%eax), %ecx     /* get word (= 4 bytes) in question */
 | 
|---|
| 180 |         movl $0xfefefeff, %edi  /* magic value */
 | 
|---|
| 181 |         xorl %edx, %ecx         /* XOR with word c|c|c|c => bytes of str == c
 | 
|---|
| 182 |                                    are now 0 */
 | 
|---|
| 183 |         addl %ecx, %edi         /* add the magic value to the word.  We get
 | 
|---|
| 184 |                                    carry bits reported for each byte which
 | 
|---|
| 185 |                                    is *not* 0 */
 | 
|---|
| 186 |         jnc L(5)                /* highest byte is C => return pointer */
 | 
|---|
| 187 |         xorl %ecx, %edi         /* ((word^charmask)+magic)^(word^charmask) */
 | 
|---|
| 188 |         orl $0xfefefeff, %edi   /* set all non-carry bits */
 | 
|---|
| 189 |         incl %edi               /* add 1: if one carry bit was *not* set
 | 
|---|
| 190 |                                    the addition will not result in 0.  */
 | 
|---|
| 191 |         jnz L(5)                /* found it => return pointer */
 | 
|---|
| 192 | 
 | 
|---|
| 193 |         /* Adjust both counters for a full round, i.e. 16 bytes.  */
 | 
|---|
| 194 |         addl $16, %eax
 | 
|---|
| 195 |         jmp L(1)
 | 
|---|
| 196 |         /* add missing source pointer increments */
 | 
|---|
| 197 | L(5):   addl $4, %eax
 | 
|---|
| 198 | L(6):   addl $4, %eax
 | 
|---|
| 199 | L(7):   addl $4, %eax
 | 
|---|
| 200 | 
 | 
|---|
| 201 |         /* Test for the matching byte in the word.  %ecx contains a NUL
 | 
|---|
| 202 |            char in the byte which originally was the byte we are looking
 | 
|---|
| 203 |            at.  */
 | 
|---|
| 204 | L(8):   testb %cl, %cl          /* test first byte in dword */
 | 
|---|
| 205 |         jz L(9)                 /* if zero => return pointer */
 | 
|---|
| 206 |         incl %eax               /* increment source pointer */
 | 
|---|
| 207 | 
 | 
|---|
| 208 |         testb %ch, %ch          /* test second byte in dword */
 | 
|---|
| 209 |         jz L(9)                 /* if zero => return pointer */
 | 
|---|
| 210 |         incl %eax               /* increment source pointer */
 | 
|---|
| 211 | 
 | 
|---|
| 212 |         testl $0xff0000, %ecx   /* test third byte in dword */
 | 
|---|
| 213 |         jz L(9)                 /* if zero => return pointer */
 | 
|---|
| 214 |         incl %eax               /* increment source pointer */
 | 
|---|
| 215 | 
 | 
|---|
| 216 |         /* No further test needed we we know it is one of the four bytes.  */
 | 
|---|
| 217 | 
 | 
|---|
| 218 | L(9):
 | 
|---|
| 219 |         CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
 | 
|---|
| 220 |         RETURN_BOUNDED_POINTER (STR(%esp))
 | 
|---|
| 221 |         popl %edi               /* pop saved register */
 | 
|---|
| 222 |         cfi_adjust_cfa_offset (-4)
 | 
|---|
| 223 |         cfi_restore (edi)
 | 
|---|
| 224 | 
 | 
|---|
| 225 |         LEAVE
 | 
|---|
| 226 |         RET_PTR
 | 
|---|
| 227 | END (BP_SYM (__rawmemchr))
 | 
|---|
| 228 | 
 | 
|---|
| 229 | libc_hidden_def (BP_SYM (__rawmemchr))
 | 
|---|
| 230 | weak_alias (BP_SYM (__rawmemchr), BP_SYM (rawmemchr))
 | 
|---|