Changeset 3613 for trunk/src/sed/lib
- Timestamp:
- Sep 19, 2024, 2:34:43 AM (10 months ago)
- Location:
- trunk/src/sed
- Files:
-
- 14 deleted
- 12 edited
- 201 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/sed
-
Property svn:mergeinfo
set to
/vendor/sed/current merged eligible
-
Property svn:mergeinfo
set to
-
trunk/src/sed/lib/alloca.c
r599 r3613 1 1 /* alloca.c -- allocate automatically reclaimed memory 2 (Mostly) portable public-domain implementation -- D A Gwyn 2 This file is in the public domain. */ 3 4 /* (Mostly) portable implementation -- D A Gwyn 3 5 4 6 This implementation of the PWB library alloca function, … … 22 24 your main control loop, etc. to force garbage collection. */ 23 25 24 #ifdef HAVE_CONFIG_H25 26 #include <config.h> 26 #endif27 27 28 #ifdef HAVE_STRING_H 28 #include <alloca.h> 29 29 30 #include <string.h> 30 #endif31 #ifdef HAVE_STDLIB_H32 31 #include <stdlib.h> 33 #endif34 32 35 #ifdef emacs 36 #include "blockinput.h" 37 #endif 38 39 /* If compiling with GCC 2, this file's not needed. */ 40 #if !defined (__GNUC__) || __GNUC__ < 2 33 /* If compiling with GCC or clang, this file is not needed. */ 34 #if !(defined __GNUC__ || defined __clang__) 41 35 42 36 /* If someone has defined alloca as a macro, 43 37 there must be some other way alloca is supposed to work. */ 44 #ifndef alloca 45 46 #ifdef emacs 47 #ifdef static 48 /* actually, only want this if static is defined as "" 49 -- this is for usg, in which emacs must undefine static 50 in order to make unexec workable 51 */ 52 #ifndef STACK_DIRECTION 53 you 54 lose 55 -- must know STACK_DIRECTION at compile-time 56 #endif /* STACK_DIRECTION undefined */ 57 #endif /* static */ 58 #endif /* emacs */ 59 60 /* If your stack is a linked list of frames, you have to 61 provide an "address metric" ADDRESS_FUNCTION macro. */ 62 63 #if defined (CRAY) && defined (CRAY_STACKSEG_END) 64 long i00afunc (); 65 #define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) 66 #else 67 #define ADDRESS_FUNCTION(arg) &(arg) 68 #endif 69 70 #if __STDC__ 71 typedef void *pointer; 72 #else 73 typedef char *pointer; 74 #endif 75 76 #ifndef NULL 77 #define NULL 0 78 #endif 79 80 /* Different portions of Emacs need to call different versions of 81 malloc. The Emacs executable needs alloca to call xmalloc, because 82 ordinary malloc isn't protected from input signals. On the other 83 hand, the utilities in lib-src need alloca to call malloc; some of 84 them are very simple, and don't have an xmalloc routine. 85 86 Non-Emacs programs expect this to call xmalloc. 87 88 Callers below should use malloc. */ 89 90 #ifndef emacs 91 #define malloc xmalloc 92 #endif 93 extern pointer malloc (); 38 # ifndef alloca 94 39 95 40 /* Define STACK_DIRECTION if you know the direction of stack … … 101 46 STACK_DIRECTION = 0 => direction of growth unknown */ 102 47 103 # ifndef STACK_DIRECTION104 # define STACK_DIRECTION 0/* Direction unknown. */105 # endif48 # ifndef STACK_DIRECTION 49 # define STACK_DIRECTION 0 /* Direction unknown. */ 50 # endif 106 51 107 # if STACK_DIRECTION != 052 # if STACK_DIRECTION != 0 108 53 109 # define STACK_DIR STACK_DIRECTION/* Known at compile-time. */54 # define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ 110 55 111 # else /* STACK_DIRECTION == 0; need run-time code. */56 # else /* STACK_DIRECTION == 0; need run-time code. */ 112 57 113 static int stack_dir; 114 # define STACK_DIRstack_dir58 static int stack_dir; /* 1 or -1 once known. */ 59 # define STACK_DIR stack_dir 115 60 116 static void117 find_stack_direction ( )61 static int 62 find_stack_direction (int *addr, int depth) 118 63 { 119 static char *addr = NULL; /* Address of first `dummy', once known. */ 120 auto char dummy; /* To get stack address. */ 121 122 if (addr == NULL) 123 { /* Initial entry. */ 124 addr = ADDRESS_FUNCTION (dummy); 125 126 find_stack_direction (); /* Recurse once. */ 127 } 128 else 129 { 130 /* Second entry. */ 131 if (ADDRESS_FUNCTION (dummy) > addr) 132 stack_dir = 1; /* Stack grew upward. */ 133 else 134 stack_dir = -1; /* Stack grew downward. */ 135 } 64 int dir, dummy = 0; 65 if (! addr) 66 addr = &dummy; 67 *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; 68 dir = depth ? find_stack_direction (addr, depth - 1) : 0; 69 return dir + dummy; 136 70 } 137 71 138 # endif /* STACK_DIRECTION == 0 */72 # endif /* STACK_DIRECTION == 0 */ 139 73 140 74 /* An "alloca header" is used to: … … 145 79 alignment chunk size. The following default should work okay. */ 146 80 147 # ifndefALIGN_SIZE148 # define ALIGN_SIZEsizeof(double)149 # endif81 # ifndef ALIGN_SIZE 82 # define ALIGN_SIZE sizeof(double) 83 # endif 150 84 151 85 typedef union hdr 152 86 { 153 char align[ALIGN_SIZE]; 87 char align[ALIGN_SIZE]; /* To force sizeof(header). */ 154 88 struct 155 89 { 156 union hdr *next; 157 char *deep; 90 union hdr *next; /* For chaining headers. */ 91 char *deep; /* For stack depth measure. */ 158 92 } h; 159 93 } header; 160 94 161 static header *last_alloca_header = NULL; 95 static header *last_alloca_header = NULL; /* -> last alloca header. */ 162 96 163 97 /* Return a pointer to at least SIZE bytes of storage, … … 168 102 implementations of C, for example under Gould's UTX/32. */ 169 103 170 pointer 171 alloca (size) 172 unsigned size; 104 void * 105 alloca (size_t size) 173 106 { 174 auto char probe; 175 register char *depth = ADDRESS_FUNCTION (probe);107 auto char probe; /* Probes stack depth: */ 108 register char *depth = &probe; 176 109 177 # if STACK_DIRECTION == 0178 if (STACK_DIR == 0) 179 find_stack_direction ();180 # endif110 # if STACK_DIRECTION == 0 111 if (STACK_DIR == 0) /* Unknown growth direction. */ 112 STACK_DIR = find_stack_direction (NULL, (size & 1) + 20); 113 # endif 181 114 182 115 /* Reclaim garbage, defined as all alloca'd storage that … … 184 117 185 118 { 186 register header *hp; /* Traverses linked list. */ 187 188 #ifdef emacs 189 BLOCK_INPUT; 190 #endif 119 register header *hp; /* Traverses linked list. */ 191 120 192 121 for (hp = last_alloca_header; hp != NULL;) 193 122 if ((STACK_DIR > 0 && hp->h.deep > depth) 194 195 196 123 || (STACK_DIR < 0 && hp->h.deep < depth)) 124 { 125 register header *np = hp->h.next; 197 126 198 free ((pointer) hp);/* Collect garbage. */127 free (hp); /* Collect garbage. */ 199 128 200 hp = np;/* -> next header. */201 129 hp = np; /* -> next header. */ 130 } 202 131 else 203 break;/* Rest are not deeper. */132 break; /* Rest are not deeper. */ 204 133 205 last_alloca_header = hp; /* -> last valid storage. */ 206 207 #ifdef emacs 208 UNBLOCK_INPUT; 209 #endif 134 last_alloca_header = hp; /* -> last valid storage. */ 210 135 } 211 136 212 137 if (size == 0) 213 return NULL; 138 return NULL; /* No allocation required. */ 214 139 215 140 /* Allocate combined header + user data storage. */ 216 141 217 142 { 218 register pointer new = malloc (sizeof (header) + size);219 143 /* Address of header. */ 144 register header *new; 220 145 221 if (new == 0) 222 abort(); 146 size_t combined_size = sizeof (header) + size; 147 if (combined_size < sizeof (header)) 148 memory_full (); 223 149 224 ((header *) new)->h.next = last_alloca_header; 225 ((header *) new)->h.deep = depth; 150 new = malloc (combined_size); 226 151 227 last_alloca_header = (header *) new; 152 if (! new) 153 memory_full (); 154 155 new->h.next = last_alloca_header; 156 new->h.deep = depth; 157 158 last_alloca_header = new; 228 159 229 160 /* User storage begins just after header. */ 230 161 231 return ( pointer) ((char *) new + sizeof (header));162 return (void *) (new + 1); 232 163 } 233 164 } 234 165 235 #if defined (CRAY) && defined (CRAY_STACKSEG_END) 236 237 #ifdef DEBUG_I00AFUNC 238 #include <stdio.h> 239 #endif 240 241 #ifndef CRAY_STACK 242 #define CRAY_STACK 243 #ifndef CRAY2 244 /* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ 245 struct stack_control_header 246 { 247 long shgrow:32; /* Number of times stack has grown. */ 248 long shaseg:32; /* Size of increments to stack. */ 249 long shhwm:32; /* High water mark of stack. */ 250 long shsize:32; /* Current size of stack (all segments). */ 251 }; 252 253 /* The stack segment linkage control information occurs at 254 the high-address end of a stack segment. (The stack 255 grows from low addresses to high addresses.) The initial 256 part of the stack segment linkage control information is 257 0200 (octal) words. This provides for register storage 258 for the routine which overflows the stack. */ 259 260 struct stack_segment_linkage 261 { 262 long ss[0200]; /* 0200 overflow words. */ 263 long sssize:32; /* Number of words in this segment. */ 264 long ssbase:32; /* Offset to stack base. */ 265 long:32; 266 long sspseg:32; /* Offset to linkage control of previous 267 segment of stack. */ 268 long:32; 269 long sstcpt:32; /* Pointer to task common address block. */ 270 long sscsnm; /* Private control structure number for 271 microtasking. */ 272 long ssusr1; /* Reserved for user. */ 273 long ssusr2; /* Reserved for user. */ 274 long sstpid; /* Process ID for pid based multi-tasking. */ 275 long ssgvup; /* Pointer to multitasking thread giveup. */ 276 long sscray[7]; /* Reserved for Cray Research. */ 277 long ssa0; 278 long ssa1; 279 long ssa2; 280 long ssa3; 281 long ssa4; 282 long ssa5; 283 long ssa6; 284 long ssa7; 285 long sss0; 286 long sss1; 287 long sss2; 288 long sss3; 289 long sss4; 290 long sss5; 291 long sss6; 292 long sss7; 293 }; 294 295 #else /* CRAY2 */ 296 /* The following structure defines the vector of words 297 returned by the STKSTAT library routine. */ 298 struct stk_stat 299 { 300 long now; /* Current total stack size. */ 301 long maxc; /* Amount of contiguous space which would 302 be required to satisfy the maximum 303 stack demand to date. */ 304 long high_water; /* Stack high-water mark. */ 305 long overflows; /* Number of stack overflow ($STKOFEN) calls. */ 306 long hits; /* Number of internal buffer hits. */ 307 long extends; /* Number of block extensions. */ 308 long stko_mallocs; /* Block allocations by $STKOFEN. */ 309 long underflows; /* Number of stack underflow calls ($STKRETN). */ 310 long stko_free; /* Number of deallocations by $STKRETN. */ 311 long stkm_free; /* Number of deallocations by $STKMRET. */ 312 long segments; /* Current number of stack segments. */ 313 long maxs; /* Maximum number of stack segments so far. */ 314 long pad_size; /* Stack pad size. */ 315 long current_address; /* Current stack segment address. */ 316 long current_size; /* Current stack segment size. This 317 number is actually corrupted by STKSTAT to 318 include the fifteen word trailer area. */ 319 long initial_address; /* Address of initial segment. */ 320 long initial_size; /* Size of initial segment. */ 321 }; 322 323 /* The following structure describes the data structure which trails 324 any stack segment. I think that the description in 'asdef' is 325 out of date. I only describe the parts that I am sure about. */ 326 327 struct stk_trailer 328 { 329 long this_address; /* Address of this block. */ 330 long this_size; /* Size of this block (does not include 331 this trailer). */ 332 long unknown2; 333 long unknown3; 334 long link; /* Address of trailer block of previous 335 segment. */ 336 long unknown5; 337 long unknown6; 338 long unknown7; 339 long unknown8; 340 long unknown9; 341 long unknown10; 342 long unknown11; 343 long unknown12; 344 long unknown13; 345 long unknown14; 346 }; 347 348 #endif /* CRAY2 */ 349 #endif /* not CRAY_STACK */ 350 351 #ifdef CRAY2 352 /* Determine a "stack measure" for an arbitrary ADDRESS. 353 I doubt that "lint" will like this much. */ 354 355 static long 356 i00afunc (long *address) 357 { 358 struct stk_stat status; 359 struct stk_trailer *trailer; 360 long *block, size; 361 long result = 0; 362 363 /* We want to iterate through all of the segments. The first 364 step is to get the stack status structure. We could do this 365 more quickly and more directly, perhaps, by referencing the 366 $LM00 common block, but I know that this works. */ 367 368 STKSTAT (&status); 369 370 /* Set up the iteration. */ 371 372 trailer = (struct stk_trailer *) (status.current_address 373 + status.current_size 374 - 15); 375 376 /* There must be at least one stack segment. Therefore it is 377 a fatal error if "trailer" is null. */ 378 379 if (trailer == 0) 380 abort (); 381 382 /* Discard segments that do not contain our argument address. */ 383 384 while (trailer != 0) 385 { 386 block = (long *) trailer->this_address; 387 size = trailer->this_size; 388 if (block == 0 || size == 0) 389 abort (); 390 trailer = (struct stk_trailer *) trailer->link; 391 if ((block <= address) && (address < (block + size))) 392 break; 393 } 394 395 /* Set the result to the offset in this segment and add the sizes 396 of all predecessor segments. */ 397 398 result = address - block; 399 400 if (trailer == 0) 401 { 402 return result; 403 } 404 405 do 406 { 407 if (trailer->this_size <= 0) 408 abort (); 409 result += trailer->this_size; 410 trailer = (struct stk_trailer *) trailer->link; 411 } 412 while (trailer != 0); 413 414 /* We are done. Note that if you present a bogus address (one 415 not in any segment), you will get a different number back, formed 416 from subtracting the address of the first block. This is probably 417 not what you want. */ 418 419 return (result); 420 } 421 422 #else /* not CRAY2 */ 423 /* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. 424 Determine the number of the cell within the stack, 425 given the address of the cell. The purpose of this 426 routine is to linearize, in some sense, stack addresses 427 for alloca. */ 428 429 static long 430 i00afunc (long address) 431 { 432 long stkl = 0; 433 434 long size, pseg, this_segment, stack; 435 long result = 0; 436 437 struct stack_segment_linkage *ssptr; 438 439 /* Register B67 contains the address of the end of the 440 current stack segment. If you (as a subprogram) store 441 your registers on the stack and find that you are past 442 the contents of B67, you have overflowed the segment. 443 444 B67 also points to the stack segment linkage control 445 area, which is what we are really interested in. */ 446 447 stkl = CRAY_STACKSEG_END (); 448 ssptr = (struct stack_segment_linkage *) stkl; 449 450 /* If one subtracts 'size' from the end of the segment, 451 one has the address of the first word of the segment. 452 453 If this is not the first segment, 'pseg' will be 454 nonzero. */ 455 456 pseg = ssptr->sspseg; 457 size = ssptr->sssize; 458 459 this_segment = stkl - size; 460 461 /* It is possible that calling this routine itself caused 462 a stack overflow. Discard stack segments which do not 463 contain the target address. */ 464 465 while (!(this_segment <= address && address <= stkl)) 466 { 467 #ifdef DEBUG_I00AFUNC 468 fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); 469 #endif 470 if (pseg == 0) 471 break; 472 stkl = stkl - pseg; 473 ssptr = (struct stack_segment_linkage *) stkl; 474 size = ssptr->sssize; 475 pseg = ssptr->sspseg; 476 this_segment = stkl - size; 477 } 478 479 result = address - this_segment; 480 481 /* If you subtract pseg from the current end of the stack, 482 you get the address of the previous stack segment's end. 483 This seems a little convoluted to me, but I'll bet you save 484 a cycle somewhere. */ 485 486 while (pseg != 0) 487 { 488 #ifdef DEBUG_I00AFUNC 489 fprintf (stderr, "%011o %011o\n", pseg, size); 490 #endif 491 stkl = stkl - pseg; 492 ssptr = (struct stack_segment_linkage *) stkl; 493 size = ssptr->sssize; 494 pseg = ssptr->sspseg; 495 result += size; 496 } 497 return (result); 498 } 499 500 #endif /* not CRAY2 */ 501 #endif /* CRAY */ 502 503 #endif /* no alloca */ 504 #endif /* not GCC version 2 */ 166 # endif /* no alloca */ 167 #endif /* not GCC || clang */ -
trunk/src/sed/lib/memchr.c
r599 r3613 1 /* Copyright (C) 1991, 1993, 1996, 1997 Free Software Foundation, Inc. 1 /* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2022 2 Free Software Foundation, Inc. 3 2 4 Based on strlen implementation by Torbjorn Granlund (tege@sics.se), 3 5 with help from Dan Sahlin (dan@sics.se) and … … 7 9 8 10 NOTE: The canonical source of this file is maintained with the GNU C Library. 9 Bugs can be reported to bug-glibc@ gnu.org.11 Bugs can be reported to bug-glibc@prep.ai.mit.edu. 10 12 11 This program is free software; you can redistribute it and/or modify it12 under the terms of the GNU General Public License as published by the13 Free Software Foundation; either version 2, or (at your option) any14 later version.13 This file is free software: you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as 15 published by the Free Software Foundation; either version 2.1 of the 16 License, or (at your option) any later version. 15 17 16 This programis distributed in the hope that it will be useful,18 This file is distributed in the hope that it will be useful, 17 19 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License for more details.21 GNU Lesser General Public License for more details. 20 22 21 You should have received a copy of the GNU General Public License 22 along with this program; if not, write to the Free Software 23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 24 USA. */ 23 You should have received a copy of the GNU Lesser General Public License 24 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 25 25 26 #if def HAVE_CONFIG_H27 # include <config.h>26 #ifndef _LIBC 27 # include <config.h> 28 28 #endif 29 29 30 #undef __ptr_t 31 #if defined (__cplusplus) || (defined (__STDC__) && __STDC__) 32 # define __ptr_t void * 33 #else /* Not C++ or ANSI C. */ 34 # define __ptr_t char * 35 #endif /* C++ or ANSI C. */ 30 #include <string.h> 36 31 37 #if defined (_LIBC) 38 # include <string.h> 32 #include <stddef.h> 33 34 #if defined _LIBC 35 # include <memcopy.h> 36 #else 37 # define reg_char char 39 38 #endif 40 39 41 #if defined (HAVE_LIMITS_H) || defined (_LIBC) 42 # include <limits.h> 40 #include <limits.h> 41 42 #if HAVE_BP_SYM_H || defined _LIBC 43 # include <bp-sym.h> 44 #else 45 # define BP_SYM(sym) sym 43 46 #endif 44 47 45 #define LONG_MAX_32_BITS 2147483647 46 47 #ifndef LONG_MAX 48 #define LONG_MAX LONG_MAX_32_BITS 48 #undef __memchr 49 #ifdef _LIBC 50 # undef memchr 49 51 #endif 50 52 51 #include <sys/types.h> 52 53 #undef memchr 54 53 #ifndef weak_alias 54 # define __memchr memchr 55 #endif 55 56 56 57 /* Search no more than N bytes of S for C. */ 57 __ptr_t 58 memchr (s, c, n) 59 const __ptr_t s; 60 int c; 61 size_t n; 58 void * 59 __memchr (void const *s, int c_in, size_t n) 62 60 { 61 /* On 32-bit hardware, choosing longword to be a 32-bit unsigned 62 long instead of a 64-bit uintmax_t tends to give better 63 performance. On 64-bit hardware, unsigned long is generally 64 64 bits already. Change this typedef to experiment with 65 performance. */ 66 typedef unsigned long int longword; 67 63 68 const unsigned char *char_ptr; 64 const unsigned long int *longword_ptr; 65 unsigned long int longword, magic_bits, charmask; 69 const longword *longword_ptr; 70 longword repeated_one; 71 longword repeated_c; 72 unsigned reg_char c; 66 73 67 c = (unsigned char) c ;74 c = (unsigned char) c_in; 68 75 69 /* Handle the first few characters by reading one characterat a time.76 /* Handle the first few bytes by reading one byte at a time. 70 77 Do this until CHAR_PTR is aligned on a longword boundary. */ 71 78 for (char_ptr = (const unsigned char *) s; 72 n > 0 && ((unsigned long int) char_ptr 73 & (sizeof (longword) - 1)) != 0; 79 n > 0 && (size_t) char_ptr % sizeof (longword) != 0; 74 80 --n, ++char_ptr) 75 81 if (*char_ptr == c) 76 return (__ptr_t) char_ptr; 82 return (void *) char_ptr; 83 84 longword_ptr = (const longword *) char_ptr; 77 85 78 86 /* All these elucidatory comments refer to 4-byte longwords, 79 but the theory applies equally well to 8-byte longwords. */87 but the theory applies equally well to any size longwords. */ 80 88 81 longword_ptr = (unsigned long int *) char_ptr; 89 /* Compute auxiliary longword values: 90 repeated_one is a value which has a 1 in every byte. 91 repeated_c has c in every byte. */ 92 repeated_one = 0x01010101; 93 repeated_c = c | (c << 8); 94 repeated_c |= repeated_c << 16; 95 if (0xffffffffU < (longword) -1) 96 { 97 repeated_one |= repeated_one << 31 << 1; 98 repeated_c |= repeated_c << 31 << 1; 99 if (8 < sizeof (longword)) 100 { 101 size_t i; 82 102 83 /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits 84 the "holes." Note that there is a hole just to the left of 85 each byte, with an extra at the end: 103 for (i = 64; i < sizeof (longword) * 8; i *= 2) 104 { 105 repeated_one |= repeated_one << i; 106 repeated_c |= repeated_c << i; 107 } 108 } 109 } 86 110 87 bits: 01111110 11111110 11111110 11111111 88 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD 111 /* Instead of the traditional loop which tests each byte, we will test a 112 longword at a time. The tricky part is testing if *any of the four* 113 bytes in the longword in question are equal to c. We first use an xor 114 with repeated_c. This reduces the task to testing whether *any of the 115 four* bytes in longword1 is zero. 89 116 90 The 1-bits make sure that carries propagate to the next 0-bit. 91 The 0-bits provide holes for carries to fall into. */ 117 We compute tmp = 118 ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). 119 That is, we perform the following operations: 120 1. Subtract repeated_one. 121 2. & ~longword1. 122 3. & a mask consisting of 0x80 in every byte. 123 Consider what happens in each byte: 124 - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, 125 and step 3 transforms it into 0x80. A carry can also be propagated 126 to more significant bytes. 127 - If a byte of longword1 is nonzero, let its lowest 1 bit be at 128 position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, 129 the byte ends in a single bit of value 0 and k bits of value 1. 130 After step 2, the result is just k bits of value 1: 2^k - 1. After 131 step 3, the result is 0. And no carry is produced. 132 So, if longword1 has only non-zero bytes, tmp is zero. 133 Whereas if longword1 has a zero byte, call j the position of the least 134 significant zero byte. Then the result has a zero at positions 0, ..., 135 j-1 and a 0x80 at position j. We cannot predict the result at the more 136 significant bytes (positions j+1..3), but it does not matter since we 137 already have a non-zero bit at position 8*j+7. 92 138 93 if (sizeof (longword) != 4 && sizeof (longword) != 8)94 abort ();139 So, the test whether any byte in longword1 is zero is equivalent to 140 testing whether tmp is nonzero. */ 95 141 96 #if LONG_MAX <= LONG_MAX_32_BITS97 magic_bits = 0x7efefeff;98 #else99 magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;100 #endif101 102 /* Set up a longword, each of whose bytes is C. */103 charmask = c | (c << 8);104 charmask |= charmask << 16;105 #if LONG_MAX > LONG_MAX_32_BITS106 charmask |= charmask << 32;107 #endif108 109 /* Instead of the traditional loop which tests each character,110 we will test a longword at a time. The tricky part is testing111 if *any of the four* bytes in the longword in question are zero. */112 142 while (n >= sizeof (longword)) 113 143 { 114 /* We tentatively exit the loop if adding MAGIC_BITS to 115 LONGWORD fails to change any of the hole bits of LONGWORD. 144 longword longword1 = *longword_ptr ^ repeated_c; 116 145 117 1) Is this safe? Will it catch all the zero bytes? 118 Suppose there is a byte with all zeros. Any carry bits 119 propagating from its left will fall into the hole at its 120 least significant bit and stop. Since there will be no 121 carry from its most significant bit, the LSB of the 122 byte to the left will be unchanged, and the zero will be 123 detected. 124 125 2) Is this worthwhile? Will it ignore everything except 126 zero bytes? Suppose every byte of LONGWORD has a bit set 127 somewhere. There will be a carry into bit 8. If bit 8 128 is set, this will carry into bit 16. If bit 8 is clear, 129 one of bits 9-15 must be set, so there will be a carry 130 into bit 16. Similarly, there will be a carry into bit 131 24. If one of bits 24-30 is set, there will be a carry 132 into bit 31, so all of the hole bits will be changed. 133 134 The one misfire occurs when bits 24-30 are clear and bit 135 31 is set; in this case, the hole at bit 31 is not 136 changed. If we had access to the processor carry flag, 137 we could close this loophole by putting the fourth hole 138 at bit 32! 139 140 So it ignores everything except 128's, when they're aligned 141 properly. 142 143 3) But wait! Aren't we looking for C, not zero? 144 Good point. So what we do is XOR LONGWORD with a longword, 145 each of whose bytes is C. This turns each byte that is C 146 into a zero. */ 147 148 longword = *longword_ptr++ ^ charmask; 149 150 /* Add MAGIC_BITS to LONGWORD. */ 151 if ((((longword + magic_bits) 152 153 /* Set those bits that were unchanged by the addition. */ 154 ^ ~longword) 155 156 /* Look at only the hole bits. If any of the hole bits 157 are unchanged, most likely one of the bytes was a 158 zero. */ 159 & ~magic_bits) != 0) 160 { 161 /* Which of the bytes was C? If none of them were, it was 162 a misfire; continue the search. */ 163 164 const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); 165 166 if (cp[0] == c) 167 return (__ptr_t) cp; 168 if (cp[1] == c) 169 return (__ptr_t) &cp[1]; 170 if (cp[2] == c) 171 return (__ptr_t) &cp[2]; 172 if (cp[3] == c) 173 return (__ptr_t) &cp[3]; 174 #if LONG_MAX > 2147483647 175 if (cp[4] == c) 176 return (__ptr_t) &cp[4]; 177 if (cp[5] == c) 178 return (__ptr_t) &cp[5]; 179 if (cp[6] == c) 180 return (__ptr_t) &cp[6]; 181 if (cp[7] == c) 182 return (__ptr_t) &cp[7]; 183 #endif 184 } 185 146 if ((((longword1 - repeated_one) & ~longword1) 147 & (repeated_one << 7)) != 0) 148 break; 149 longword_ptr++; 186 150 n -= sizeof (longword); 187 151 } … … 189 153 char_ptr = (const unsigned char *) longword_ptr; 190 154 191 while (n-- > 0) 155 /* At this point, we know that either n < sizeof (longword), or one of the 156 sizeof (longword) bytes starting at char_ptr is == c. On little-endian 157 machines, we could determine the first such byte without any further 158 memory accesses, just by looking at the tmp result from the last loop 159 iteration. But this does not work on big-endian machines. Choose code 160 that works in both cases. */ 161 162 for (; n > 0; --n, ++char_ptr) 192 163 { 193 164 if (*char_ptr == c) 194 return (__ptr_t) char_ptr; 195 else 196 ++char_ptr; 165 return (void *) char_ptr; 197 166 } 198 167 199 return 0;168 return NULL; 200 169 } 170 #ifdef weak_alias 171 weak_alias (__memchr, BP_SYM (memchr)) 172 #endif -
trunk/src/sed/lib/obstack.c
r599 r3613 1 /* obstack.c - subroutines used implicitly by object stack macros -*- C -*- 2 Copyright (C) 1988,89,90,91,92,93,94,96,97 Free Software Foundation, Inc. 3 4 This file is part of the GNU C Library. Its master source is NOT part of 5 the C library, however. The master source lives in /gd/gnu/lib. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Library General Public License as 9 published by the Free Software Foundation; either version 2 of the 1 /* obstack.c - subroutines used implicitly by object stack macros 2 Copyright (C) 1988-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 This file is free software: you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as 7 published by the Free Software Foundation, either version 3 of the 10 8 License, or (at your option) any later version. 11 9 12 Th e GNU C Libraryis distributed in the hope that it will be useful,10 This file is distributed in the hope that it will be useful, 13 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Library General Public License for more details. 16 17 You should have received a copy of the GNU Library General Public 18 License along with the GNU C Library; see the file COPYING.LIB. If not, 19 write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20 Boston, MA 02110-1301, USA. */ 21 22 #ifdef HAVE_CONFIG_H 23 #include <config.h> 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 18 19 #ifdef _LIBC 20 # include <obstack.h> 21 #else 22 # include <config.h> 23 # include "obstack.h" 24 24 #endif 25 25 26 #include "obstack.h" 27 28 /* NOTE BEFORE MODIFYING THIS FILE: This version number must be 29 incremented whenever callers compiled using an old obstack.h can no 30 longer properly call the functions in this obstack.c. */ 31 #define OBSTACK_INTERFACE_VERSION 1 26 /* NOTE BEFORE MODIFYING THIS FILE: _OBSTACK_INTERFACE_VERSION in 27 obstack.h must be incremented whenever callers compiled using an old 28 obstack.h can no longer properly call the functions in this file. */ 32 29 33 30 /* Comment out all this code if we are using the GNU C Library, and are not … … 37 34 and linking in this code is a waste when using the GNU C library 38 35 (especially if it is a shared library). Rather than having every GNU 39 program understand `configure --with-gnu-libc' and omit the object36 program understand 'configure --with-gnu-libc' and omit the object 40 37 files, it is simpler to just do this in the source for each such file. */ 41 42 #include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */ 43 #if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1 44 #include <gnu-versions.h> 45 #if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION 46 #define ELIDE_CODE 38 #if !defined _LIBC && defined __GNU_LIBRARY__ && __GNU_LIBRARY__ > 1 39 # include <gnu-versions.h> 40 # if (_GNU_OBSTACK_INTERFACE_VERSION == _OBSTACK_INTERFACE_VERSION \ 41 || (_GNU_OBSTACK_INTERFACE_VERSION == 1 \ 42 && _OBSTACK_INTERFACE_VERSION == 2 \ 43 && defined SIZEOF_INT && defined SIZEOF_SIZE_T \ 44 && SIZEOF_INT == SIZEOF_SIZE_T)) 45 # define _OBSTACK_ELIDE_CODE 46 # endif 47 47 #endif 48 #endif 49 50 51 #ifndef ELIDE_CODE 52 53 54 #if defined (__STDC__) && __STDC__ 55 #define POINTER void * 56 #else 57 #define POINTER char * 58 #endif 48 49 #ifndef _OBSTACK_ELIDE_CODE 50 /* If GCC, or if an oddball (testing?) host that #defines __alignof__, 51 use the already-supplied __alignof__. Otherwise, this must be Gnulib 52 (as glibc assumes GCC); defer to Gnulib's alignof_type. */ 53 # if !defined __GNUC__ && !defined __alignof__ 54 # include <alignof.h> 55 # define __alignof__(type) alignof_type (type) 56 # endif 57 # include <stdlib.h> 58 # include <stdint.h> 59 60 # ifndef MAX 61 # define MAX(a,b) ((a) > (b) ? (a) : (b)) 62 # endif 59 63 60 64 /* Determine default alignment. */ 61 struct fooalign {char x; double d;}; 62 #define DEFAULT_ALIGNMENT \ 63 ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0)) 65 64 66 /* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT. 65 67 But in fact it might be less smart and round addresses to as much as 66 DEFAULT_ROUNDING. So we prepare for it to do that. */ 67 union fooround {long x; double d;}; 68 #define DEFAULT_ROUNDING (sizeof (union fooround)) 69 70 #ifdef original_glibc_code 71 /**//* When we copy a long block of data, this is the unit to do it with. */ 72 /**//* On some machines, copying successive ints does not work; */ 73 /**//* in such a case, redefine COPYING_UNIT to `long' (if that works) */ 74 /**//* or `char' as a last resort. */ 75 /**/#ifndef COPYING_UNIT 76 /**/#define COPYING_UNIT int 77 /**/#endif 78 #endif 79 80 /* The functions allocating more room by calling `obstack_chunk_alloc' 81 jump to the handler pointed to by `obstack_alloc_failed_handler'. 82 This variable by default points to the internal function 83 `print_and_abort'. */ 84 #if defined (__STDC__) && __STDC__ 85 static void print_and_abort (void); 86 void (*obstack_alloc_failed_handler) (void) = print_and_abort; 87 #else 88 static void print_and_abort (); 89 void (*obstack_alloc_failed_handler) () = print_and_abort; 90 #endif 91 92 /* Exit value used when `print_and_abort' is used. */ 93 #if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H 94 #include <stdlib.h> 95 #endif 96 #ifndef EXIT_FAILURE 97 #define EXIT_FAILURE 1 98 #endif 99 int obstack_exit_failure = EXIT_FAILURE; 100 101 /* The non-GNU-C macros copy the obstack into this global variable 102 to avoid multiple evaluation. */ 103 104 struct obstack *_obstack; 105 106 /* Define a macro that either calls functions with the traditional malloc/free 107 calling interface, or calls functions with the mmalloc/mfree interface 108 (that adds an extra first argument), based on the state of use_extra_arg. 109 For free, do not use ?:, since some compilers, like the MIPS compilers, 110 do not allow (expr) ? void : void. */ 111 112 #if defined (__STDC__) && __STDC__ 113 #define CALL_CHUNKFUN(h, size) \ 114 (((h) -> use_extra_arg) \ 115 ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ 116 : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size))) 117 118 #define CALL_FREEFUN(h, old_chunk) \ 119 do { \ 120 if ((h) -> use_extra_arg) \ 121 (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ 122 else \ 123 (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \ 124 } while (0) 125 #else 126 #define CALL_CHUNKFUN(h, size) \ 127 (((h) -> use_extra_arg) \ 128 ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ 129 : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size))) 130 131 #define CALL_FREEFUN(h, old_chunk) \ 132 do { \ 133 if ((h) -> use_extra_arg) \ 134 (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ 135 else \ 136 (*(void (*) ()) (h)->freefun) ((old_chunk)); \ 137 } while (0) 138 #endif 139 68 DEFAULT_ROUNDING. So we prepare for it to do that. 69 70 DEFAULT_ALIGNMENT cannot be an enum constant; see gnulib's alignof.h. */ 71 #define DEFAULT_ALIGNMENT MAX (__alignof__ (long double), \ 72 MAX (__alignof__ (uintmax_t), \ 73 __alignof__ (void *))) 74 #define DEFAULT_ROUNDING MAX (sizeof (long double), \ 75 MAX (sizeof (uintmax_t), \ 76 sizeof (void *))) 77 78 /* Call functions with either the traditional malloc/free calling 79 interface, or the mmalloc/mfree interface (that adds an extra first 80 argument), based on the value of use_extra_arg. */ 81 82 static void * 83 call_chunkfun (struct obstack *h, size_t size) 84 { 85 if (h->use_extra_arg) 86 return h->chunkfun.extra (h->extra_arg, size); 87 else 88 return h->chunkfun.plain (size); 89 } 90 91 static void 92 call_freefun (struct obstack *h, void *old_chunk) 93 { 94 if (h->use_extra_arg) 95 h->freefun.extra (h->extra_arg, old_chunk); 96 else 97 h->freefun.plain (old_chunk); 98 } 140 99 141 100 142 101 /* Initialize an obstack H for use. Specify chunk size SIZE (0 means default). 143 102 Objects start on multiples of ALIGNMENT (0 means use default). 144 CHUNKFUN is the function to use to allocate chunks, 145 and FREEFUN the function to free them. 146 147 Return nonzero if successful, zero if out of memory. 148 To recover from an out of memory error, 149 free up some memory, then call this again. */ 150 151 int 152 _obstack_begin (h, size, alignment, chunkfun, freefun) 153 struct obstack *h; 154 int size; 155 int alignment; 156 #if defined (__STDC__) && __STDC__ 157 POINTER (*chunkfun) (long); 158 void (*freefun) (void *); 159 #else 160 POINTER (*chunkfun) (); 161 void (*freefun) (); 162 #endif 163 { 164 register struct _obstack_chunk *chunk; /* points to new chunk */ 103 104 Return nonzero if successful, calls obstack_alloc_failed_handler if 105 allocation fails. */ 106 107 static int 108 _obstack_begin_worker (struct obstack *h, 109 _OBSTACK_SIZE_T size, _OBSTACK_SIZE_T alignment) 110 { 111 struct _obstack_chunk *chunk; /* points to new chunk */ 165 112 166 113 if (alignment == 0) … … 170 117 { 171 118 /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. 172 173 174 175 176 177 178 119 Use the values for range checking, because if range checking is off, 120 the extra bytes won't be missed terribly, but if range checking is on 121 and we used a larger request, a whole extra 4096 bytes would be 122 allocated. 123 124 These number are irrelevant to the new GNU malloc. I suspect it is 125 less sensitive to the size of the request. */ 179 126 int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) 180 181 127 + 4 + DEFAULT_ROUNDING - 1) 128 & ~(DEFAULT_ROUNDING - 1)); 182 129 size = 4096 - extra; 183 130 } 184 131 185 #if defined (__STDC__) && __STDC__186 h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;187 h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;188 #else189 h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;190 h->freefun = freefun;191 #endif192 132 h->chunk_size = size; 193 133 h->alignment_mask = alignment - 1; 194 h->use_extra_arg = 0; 195 196 chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); 134 135 chunk = h->chunk = call_chunkfun (h, h->chunk_size); 197 136 if (!chunk) 198 137 (*obstack_alloc_failed_handler) (); 199 h->next_free = h->object_base = chunk->contents;200 h->chunk_limit = chunk->limit201 138 h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents, 139 alignment - 1); 140 h->chunk_limit = chunk->limit = (char *) chunk + h->chunk_size; 202 141 chunk->prev = 0; 203 142 /* The initial chunk now contains no empty object. */ … … 208 147 209 148 int 210 _obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg) 211 struct obstack *h; 212 int size; 213 int alignment; 214 #if defined (__STDC__) && __STDC__ 215 POINTER (*chunkfun) (POINTER, long); 216 void (*freefun) (POINTER, POINTER); 217 #else 218 POINTER (*chunkfun) (); 219 void (*freefun) (); 220 #endif 221 POINTER arg; 222 { 223 register struct _obstack_chunk *chunk; /* points to new chunk */ 224 225 if (alignment == 0) 226 alignment = DEFAULT_ALIGNMENT; 227 if (size == 0) 228 /* Default size is what GNU malloc can fit in a 4096-byte block. */ 229 { 230 /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc. 231 Use the values for range checking, because if range checking is off, 232 the extra bytes won't be missed terribly, but if range checking is on 233 and we used a larger request, a whole extra 4096 bytes would be 234 allocated. 235 236 These number are irrelevant to the new GNU malloc. I suspect it is 237 less sensitive to the size of the request. */ 238 int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1)) 239 + 4 + DEFAULT_ROUNDING - 1) 240 & ~(DEFAULT_ROUNDING - 1)); 241 size = 4096 - extra; 242 } 243 244 #if defined(__STDC__) && __STDC__ 245 h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun; 246 h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; 247 #else 248 h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun; 249 h->freefun = freefun; 250 #endif 251 h->chunk_size = size; 252 h->alignment_mask = alignment - 1; 149 _obstack_begin (struct obstack *h, 150 _OBSTACK_SIZE_T size, _OBSTACK_SIZE_T alignment, 151 void *(*chunkfun) (size_t), 152 void (*freefun) (void *)) 153 { 154 h->chunkfun.plain = chunkfun; 155 h->freefun.plain = freefun; 156 h->use_extra_arg = 0; 157 return _obstack_begin_worker (h, size, alignment); 158 } 159 160 int 161 _obstack_begin_1 (struct obstack *h, 162 _OBSTACK_SIZE_T size, _OBSTACK_SIZE_T alignment, 163 void *(*chunkfun) (void *, size_t), 164 void (*freefun) (void *, void *), 165 void *arg) 166 { 167 h->chunkfun.extra = chunkfun; 168 h->freefun.extra = freefun; 253 169 h->extra_arg = arg; 254 170 h->use_extra_arg = 1; 255 256 chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); 257 if (!chunk) 258 (*obstack_alloc_failed_handler) (); 259 h->next_free = h->object_base = chunk->contents; 260 h->chunk_limit = chunk->limit 261 = (char *) chunk + h->chunk_size; 262 chunk->prev = 0; 263 /* The initial chunk now contains no empty object. */ 264 h->maybe_empty_object = 0; 265 h->alloc_failed = 0; 266 return 1; 171 return _obstack_begin_worker (h, size, alignment); 267 172 } 268 173 … … 274 179 275 180 void 276 _obstack_newchunk (h, length) 277 struct obstack *h; 278 int length; 279 { 280 register struct _obstack_chunk *old_chunk = h->chunk; 281 register struct _obstack_chunk *new_chunk; 282 register long new_size; 283 register int obj_size = h->next_free - h->object_base; 181 _obstack_newchunk (struct obstack *h, _OBSTACK_SIZE_T length) 182 { 183 struct _obstack_chunk *old_chunk = h->chunk; 184 struct _obstack_chunk *new_chunk = 0; 185 size_t obj_size = h->next_free - h->object_base; 186 char *object_base; 284 187 285 188 /* Compute size for new chunk. */ 286 new_size = (obj_size + length) + (obj_size >> 3) + 100; 189 size_t sum1 = obj_size + length; 190 size_t sum2 = sum1 + h->alignment_mask; 191 size_t new_size = sum2 + (obj_size >> 3) + 100; 192 if (new_size < sum2) 193 new_size = sum2; 287 194 if (new_size < h->chunk_size) 288 195 new_size = h->chunk_size; 289 196 290 197 /* Allocate and initialize the new chunk. */ 291 new_chunk = CALL_CHUNKFUN (h, new_size); 198 if (obj_size <= sum1 && sum1 <= sum2) 199 new_chunk = call_chunkfun (h, new_size); 292 200 if (!new_chunk) 293 (*obstack_alloc_failed_handler) 201 (*obstack_alloc_failed_handler)(); 294 202 h->chunk = new_chunk; 295 203 new_chunk->prev = old_chunk; 296 204 new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size; 297 205 298 _obstack_memcpy(new_chunk->contents, h->object_base, obj_size); 299 300 /* If the object just copied was the only data in OLD_CHUNK, */ 301 /* free that chunk and remove it from the chain. */ 302 /* But not if that chunk might contain an empty object. */ 303 if (h->object_base == old_chunk->contents && ! h->maybe_empty_object) 206 /* Compute an aligned object_base in the new chunk */ 207 object_base = 208 __PTR_ALIGN ((char *) new_chunk, new_chunk->contents, h->alignment_mask); 209 210 /* Move the existing object to the new chunk. */ 211 memcpy (object_base, h->object_base, obj_size); 212 213 /* If the object just copied was the only data in OLD_CHUNK, 214 free that chunk and remove it from the chain. 215 But not if that chunk might contain an empty object. */ 216 if (!h->maybe_empty_object 217 && (h->object_base 218 == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents, 219 h->alignment_mask))) 304 220 { 305 221 new_chunk->prev = old_chunk->prev; 306 CALL_FREEFUN(h, old_chunk);307 } 308 309 h->object_base = new_chunk->contents;222 call_freefun (h, old_chunk); 223 } 224 225 h->object_base = object_base; 310 226 h->next_free = h->object_base + obj_size; 311 227 /* The new chunk certainly contains no empty object yet. */ … … 317 233 If you use it in a program, you are probably losing. */ 318 234 319 #if defined (__STDC__) && __STDC__320 235 /* Suppress -Wmissing-prototypes warning. We don't want to declare this in 321 236 obstack.h because it is just for debugging. */ 322 int _obstack_allocated_p (struct obstack *h, POINTER obj); 323 #endif 237 int _obstack_allocated_p (struct obstack *h, void *obj) __attribute_pure__; 324 238 325 239 int 326 _obstack_allocated_p (h, obj) 327 struct obstack *h; 328 POINTER obj; 329 { 330 register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ 331 register struct _obstack_chunk *plp; /* point to previous chunk if any */ 240 _obstack_allocated_p (struct obstack *h, void *obj) 241 { 242 struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ 243 struct _obstack_chunk *plp; /* point to previous chunk if any */ 332 244 333 245 lp = (h)->chunk; … … 335 247 the beginning of the chunk but might be an empty object exactly 336 248 at the end of an adjacent chunk. */ 337 while (lp != 0 && (( POINTER) lp >= obj || (POINTER) (lp)->limit < obj))249 while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj)) 338 250 { 339 251 plp = lp->prev; … … 343 255 } 344 256 345 346 257 /* Free objects in obstack H, including OBJ and everything allocate 347 258 more recently than OBJ. If OBJ is zero, free everything in H. */ 348 259 349 #undef obstack_free350 351 /* This function has two names with identical definitions.352 This is the first one, called from non-ANSI code. */353 354 260 void 355 _obstack_free (h, obj) 356 struct obstack *h; 357 POINTER obj; 358 { 359 register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ 360 register struct _obstack_chunk *plp; /* point to previous chunk if any */ 261 _obstack_free (struct obstack *h, void *obj) 262 { 263 struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ 264 struct _obstack_chunk *plp; /* point to previous chunk if any */ 361 265 362 266 lp = h->chunk; … … 364 268 But there can be an empty object at that address 365 269 at the end of another chunk. */ 366 while (lp != 0 && (( POINTER) lp >= obj || (POINTER) (lp)->limit < obj))270 while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj)) 367 271 { 368 272 plp = lp->prev; 369 CALL_FREEFUN(h, lp);273 call_freefun (h, lp); 370 274 lp = plp; 371 275 /* If we switch chunks, we can't tell whether the new current 372 276 chunk contains an empty object, so assume that it may. */ 373 277 h->maybe_empty_object = 1; 374 278 } … … 384 288 } 385 289 386 /* This function is used from ANSI code. */ 387 388 void 389 obstack_free (h, obj) 390 struct obstack *h; 391 POINTER obj; 392 { 393 register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ 394 register struct _obstack_chunk *plp; /* point to previous chunk if any */ 395 396 lp = h->chunk; 397 /* We use >= because there cannot be an object at the beginning of a chunk. 398 But there can be an empty object at that address 399 at the end of another chunk. */ 400 while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) 401 { 402 plp = lp->prev; 403 CALL_FREEFUN (h, lp); 404 lp = plp; 405 /* If we switch chunks, we can't tell whether the new current 406 chunk contains an empty object, so assume that it may. */ 407 h->maybe_empty_object = 1; 408 } 409 if (lp) 410 { 411 h->object_base = h->next_free = (char *) (obj); 412 h->chunk_limit = lp->limit; 413 h->chunk = lp; 414 } 415 else if (obj != 0) 416 /* obj is not in any of the chunks! */ 417 abort (); 418 } 419 420 421 int 422 _obstack_memory_used (h) 423 struct obstack *h; 424 { 425 register struct _obstack_chunk* lp; 426 register int nbytes = 0; 290 _OBSTACK_SIZE_T 291 _obstack_memory_used (struct obstack *h) 292 { 293 struct _obstack_chunk *lp; 294 _OBSTACK_SIZE_T nbytes = 0; 427 295 428 296 for (lp = h->chunk; lp != 0; lp = lp->prev) … … 433 301 } 434 302 435 303 # ifndef _OBSTACK_NO_ERROR_HANDLER 436 304 /* Define the error handler. */ 437 #ifndef _ 438 # ifdef HAVE_LIBINTL_H 439 # include <libintl.h> 305 # include <stdio.h> 306 307 /* Exit value used when 'print_and_abort' is used. */ 308 # ifdef _LIBC 309 int obstack_exit_failure = EXIT_FAILURE; 310 # else 311 # include "exitfail.h" 312 # define obstack_exit_failure exit_failure 313 # endif 314 315 # ifdef _LIBC 316 # include <libintl.h> 317 # else 318 # include "gettext.h" 319 # endif 440 320 # ifndef _ 441 # define _(Str) gettext (Str) 442 # endif 443 # else 444 # define _(Str) (Str) 445 # endif 446 #endif 447 448 static void 449 print_and_abort () 450 { 451 fputs (_("memory exhausted\n"), stderr); 321 # define _(msgid) gettext (msgid) 322 # endif 323 324 # ifdef _LIBC 325 # include <libio/iolibio.h> 326 # endif 327 328 static __attribute_noreturn__ void 329 print_and_abort (void) 330 { 331 /* Don't change any of these strings. Yes, it would be possible to add 332 the newline to the string and use fputs or so. But this must not 333 happen because the "memory exhausted" message appears in other places 334 like this and the translation should be reused instead of creating 335 a very similar string which requires a separate translation. */ 336 # ifdef _LIBC 337 (void) __fxprintf (NULL, "%s\n", _("memory exhausted")); 338 # else 339 fprintf (stderr, "%s\n", _("memory exhausted")); 340 # endif 452 341 exit (obstack_exit_failure); 453 342 } 454 343 455 456 #if 0 457 /* These are now turned off because the applications do not use it 458 and it uses bcopy via obstack_grow, which causes trouble on sysV. */ 459 460 /* Now define the functional versions of the obstack macros. 461 Define them to simply use the corresponding macros to do the job. */ 462 463 #if defined (__STDC__) && __STDC__ 464 /* These function definitions do not work with non-ANSI preprocessors; 465 they won't pass through the macro names in parentheses. */ 466 467 /* The function names appear in parentheses in order to prevent 468 the macro-definitions of the names from being expanded there. */ 469 470 POINTER (obstack_base) (obstack) 471 struct obstack *obstack; 472 { 473 return obstack_base (obstack); 474 } 475 476 POINTER (obstack_next_free) (obstack) 477 struct obstack *obstack; 478 { 479 return obstack_next_free (obstack); 480 } 481 482 int (obstack_object_size) (obstack) 483 struct obstack *obstack; 484 { 485 return obstack_object_size (obstack); 486 } 487 488 int (obstack_room) (obstack) 489 struct obstack *obstack; 490 { 491 return obstack_room (obstack); 492 } 493 494 int (obstack_make_room) (obstack, length) 495 struct obstack *obstack; 496 int length; 497 { 498 return obstack_make_room (obstack, length); 499 } 500 501 void (obstack_grow) (obstack, pointer, length) 502 struct obstack *obstack; 503 POINTER pointer; 504 int length; 505 { 506 obstack_grow (obstack, pointer, length); 507 } 508 509 void (obstack_grow0) (obstack, pointer, length) 510 struct obstack *obstack; 511 POINTER pointer; 512 int length; 513 { 514 obstack_grow0 (obstack, pointer, length); 515 } 516 517 void (obstack_1grow) (obstack, character) 518 struct obstack *obstack; 519 int character; 520 { 521 obstack_1grow (obstack, character); 522 } 523 524 void (obstack_blank) (obstack, length) 525 struct obstack *obstack; 526 int length; 527 { 528 obstack_blank (obstack, length); 529 } 530 531 void (obstack_1grow_fast) (obstack, character) 532 struct obstack *obstack; 533 int character; 534 { 535 obstack_1grow_fast (obstack, character); 536 } 537 538 void (obstack_blank_fast) (obstack, length) 539 struct obstack *obstack; 540 int length; 541 { 542 obstack_blank_fast (obstack, length); 543 } 544 545 POINTER (obstack_finish) (obstack) 546 struct obstack *obstack; 547 { 548 return obstack_finish (obstack); 549 } 550 551 POINTER (obstack_alloc) (obstack, length) 552 struct obstack *obstack; 553 int length; 554 { 555 return obstack_alloc (obstack, length); 556 } 557 558 POINTER (obstack_copy) (obstack, pointer, length) 559 struct obstack *obstack; 560 POINTER pointer; 561 int length; 562 { 563 return obstack_copy (obstack, pointer, length); 564 } 565 566 POINTER (obstack_copy0) (obstack, pointer, length) 567 struct obstack *obstack; 568 POINTER pointer; 569 int length; 570 { 571 return obstack_copy0 (obstack, pointer, length); 572 } 573 574 #endif /* __STDC__ */ 575 576 #endif /* 0 */ 577 578 #endif /* !ELIDE_CODE */ 344 /* The functions allocating more room by calling 'obstack_chunk_alloc' 345 jump to the handler pointed to by 'obstack_alloc_failed_handler'. 346 This can be set to a user defined function which should either 347 abort gracefully or use longjump - but shouldn't return. This 348 variable by default points to the internal function 349 'print_and_abort'. */ 350 __attribute_noreturn__ void (*obstack_alloc_failed_handler) (void) 351 = print_and_abort; 352 # endif /* !_OBSTACK_NO_ERROR_HANDLER */ 353 #endif /* !_OBSTACK_ELIDE_CODE */ -
trunk/src/sed/lib/obstack.h
r599 r3613 1 1 /* obstack.h - object stack macros 2 Copyright (C) 1988,89,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. Its master source is NOT part of 4 the C library, however. The master source lives in /gd/gnu/lib. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 2 Copyright (C) 1988-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 This file is free software: you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as 7 published by the Free Software Foundation, either version 3 of the 8 License, or (at your option) any later version. 9 10 This file is distributed in the hope that it will be useful, 12 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. */ 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 20 17 21 18 /* Summary: 22 19 23 All the apparent functions defined here are macros. The idea24 is that you would use these pre-tested macros to solve a25 very specific set of problems, and they would run fast.26 Caution: no side-effects in arguments please!! They may be27 evaluated MANY times!!28 29 These macros operate a stack of objects. Each object starts life30 small, and may grow to maturity. (Consider building a word syllable31 by syllable.) An object can move while it is growing. Once it has32 been "finished" it never changes address again. So the "top of the33 stack" is typically an immature growing object, while the rest of the34 stack is of mature, fixed size and fixed address objects.35 36 These routines grab large chunks of memory, using a function you37 supply, called `obstack_chunk_alloc'. On occasion, they free chunks,38 by calling `obstack_chunk_free'. You must define them and declare39 them before using any obstack macros.40 41 Each independent stack is represented by a `struct obstack'.42 Each of the obstack macros expects a pointer to such a structure43 as the first argument.44 45 One motivation for this package is the problem of growing char strings46 in symbol tables. Unless you are "fascist pig with a read-only mind"47 --Gosper's immortal quote from HAKMEM item 154, out of context--you48 would not like to put any arbitrary upper limit on the length of your49 symbols.50 51 In practice this often means you will build many short symbols and a52 few long symbols. At the time you are reading a symbol you don't know53 how long it is. One traditional method is to read a symbol into a54 buffer, realloc()ating the buffer every time you try to read a symbol55 that is longer than the buffer. This is beaut, but you still will56 want to copy the symbol from the buffer to a more permanent57 symbol-table entry say about half the time.58 59 With obstacks, you can work differently. Use one obstack for all symbol60 names. As you read a symbol, grow the name in the obstack gradually.61 When the name is complete, finalize it. Then, if the symbol exists already,62 free the newly read name.63 64 The way we do this is to take a large chunk, allocating memory from65 low addresses. When you want to build a symbol in the chunk you just66 add chars above the current "high water mark" in the chunk. When you67 have finished adding chars, because you got to the end of the symbol,68 you know how long the chars are, and you can create a new object.69 Mostly the chars will not burst over the highest address of the chunk,70 because you would typically expect a chunk to be (say) 100 times as71 long as an average object.72 73 In case that isn't clear, when we have enough chars to make up74 the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)75 so we just point to it where it lies. No moving of chars is76 needed and this is the second win: potentially long strings need77 never be explicitly shuffled. Once an object is formed, it does not78 change its address during its lifetime.79 80 When the chars burst over a chunk boundary, we allocate a larger81 chunk, and then copy the partly formed object from the end of the old82 chunk to the beginning of the new larger chunk. We then carry on83 accreting characters to the end of the object as we normally would.84 85 A special macro is provided to add a single char at a time to a86 growing object. This allows the use of register variables, which87 break the ordinary 'growth' macro.88 89 Summary:90 91 92 93 94 95 96 97 98 Because of the way we do it, you can `unwind'an obstack99 100 101 */20 All the apparent functions defined here are macros. The idea 21 is that you would use these pre-tested macros to solve a 22 very specific set of problems, and they would run fast. 23 Caution: no side-effects in arguments please!! They may be 24 evaluated MANY times!! 25 26 These macros operate a stack of objects. Each object starts life 27 small, and may grow to maturity. (Consider building a word syllable 28 by syllable.) An object can move while it is growing. Once it has 29 been "finished" it never changes address again. So the "top of the 30 stack" is typically an immature growing object, while the rest of the 31 stack is of mature, fixed size and fixed address objects. 32 33 These routines grab large chunks of memory, using a function you 34 supply, called 'obstack_chunk_alloc'. On occasion, they free chunks, 35 by calling 'obstack_chunk_free'. You must define them and declare 36 them before using any obstack macros. 37 38 Each independent stack is represented by a 'struct obstack'. 39 Each of the obstack macros expects a pointer to such a structure 40 as the first argument. 41 42 One motivation for this package is the problem of growing char strings 43 in symbol tables. Unless you are "fascist pig with a read-only mind" 44 --Gosper's immortal quote from HAKMEM item 154, out of context--you 45 would not like to put any arbitrary upper limit on the length of your 46 symbols. 47 48 In practice this often means you will build many short symbols and a 49 few long symbols. At the time you are reading a symbol you don't know 50 how long it is. One traditional method is to read a symbol into a 51 buffer, realloc()ating the buffer every time you try to read a symbol 52 that is longer than the buffer. This is beaut, but you still will 53 want to copy the symbol from the buffer to a more permanent 54 symbol-table entry say about half the time. 55 56 With obstacks, you can work differently. Use one obstack for all symbol 57 names. As you read a symbol, grow the name in the obstack gradually. 58 When the name is complete, finalize it. Then, if the symbol exists already, 59 free the newly read name. 60 61 The way we do this is to take a large chunk, allocating memory from 62 low addresses. When you want to build a symbol in the chunk you just 63 add chars above the current "high water mark" in the chunk. When you 64 have finished adding chars, because you got to the end of the symbol, 65 you know how long the chars are, and you can create a new object. 66 Mostly the chars will not burst over the highest address of the chunk, 67 because you would typically expect a chunk to be (say) 100 times as 68 long as an average object. 69 70 In case that isn't clear, when we have enough chars to make up 71 the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed) 72 so we just point to it where it lies. No moving of chars is 73 needed and this is the second win: potentially long strings need 74 never be explicitly shuffled. Once an object is formed, it does not 75 change its address during its lifetime. 76 77 When the chars burst over a chunk boundary, we allocate a larger 78 chunk, and then copy the partly formed object from the end of the old 79 chunk to the beginning of the new larger chunk. We then carry on 80 accreting characters to the end of the object as we normally would. 81 82 A special macro is provided to add a single char at a time to a 83 growing object. This allows the use of register variables, which 84 break the ordinary 'growth' macro. 85 86 Summary: 87 We allocate large chunks. 88 We carve out one object at a time from the current chunk. 89 Once carved, an object never moves. 90 We are free to append data of any size to the currently 91 growing object. 92 Exactly one object is growing in an obstack at any one time. 93 You can run one obstack per control block. 94 You may have as many control blocks as you dare. 95 Because of the way we do it, you can "unwind" an obstack 96 back to a previous state. (You may remove objects much 97 as you would with a stack.) 98 */ 102 99 103 100 … … 107 104 #define _OBSTACK_H 1 108 105 109 #ifdef HAVE_CONFIG_H 110 #include "config.h" 106 #ifndef _OBSTACK_INTERFACE_VERSION 107 # define _OBSTACK_INTERFACE_VERSION 2 108 #endif 109 110 #include <stddef.h> /* For size_t and ptrdiff_t. */ 111 #include <string.h> /* For __GNU_LIBRARY__, and memcpy. */ 112 113 #if __STDC_VERSION__ < 199901L || defined __HP_cc 114 # define __FLEXIBLE_ARRAY_MEMBER 1 115 #else 116 # define __FLEXIBLE_ARRAY_MEMBER 117 #endif 118 119 #if _OBSTACK_INTERFACE_VERSION == 1 120 /* For binary compatibility with obstack version 1, which used "int" 121 and "long" for these two types. */ 122 # define _OBSTACK_SIZE_T unsigned int 123 # define _CHUNK_SIZE_T unsigned long 124 # define _OBSTACK_CAST(type, expr) ((type) (expr)) 125 #else 126 /* Version 2 with sane types, especially for 64-bit hosts. */ 127 # define _OBSTACK_SIZE_T size_t 128 # define _CHUNK_SIZE_T size_t 129 # define _OBSTACK_CAST(type, expr) (expr) 130 #endif 131 132 /* If B is the base of an object addressed by P, return the result of 133 aligning P to the next multiple of A + 1. B and P must be of type 134 char *. A + 1 must be a power of 2. */ 135 136 #define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A))) 137 138 /* Similar to __BPTR_ALIGN (B, P, A), except optimize the common case 139 where pointers can be converted to integers, aligned as integers, 140 and converted back again. If ptrdiff_t is narrower than a 141 pointer (e.g., the AS/400), play it safe and compute the alignment 142 relative to B. Otherwise, use the faster strategy of computing the 143 alignment relative to 0. */ 144 145 #define __PTR_ALIGN(B, P, A) \ 146 __BPTR_ALIGN (sizeof (ptrdiff_t) < sizeof (void *) ? (B) : (char *) 0, \ 147 P, A) 148 149 #ifndef __attribute_pure__ 150 # define __attribute_pure__ _GL_ATTRIBUTE_PURE 151 #endif 152 153 /* Not the same as _Noreturn, since it also works with function pointers. */ 154 #ifndef __attribute_noreturn__ 155 # if 2 < __GNUC__ + (8 <= __GNUC_MINOR__) || defined __clang__ || 0x5110 <= __SUNPRO_C 156 # define __attribute_noreturn__ __attribute__ ((__noreturn__)) 157 # else 158 # define __attribute_noreturn__ 159 # endif 111 160 #endif 112 161 … … 115 164 #endif 116 165 117 118 /* We use subtraction of (char *) 0 instead of casting to int 119 because on word-addressable machines a simple cast to int 120 may ignore the byte-within-word field of the pointer. */ 121 122 #ifndef __PTR_TO_INT 123 # define __PTR_TO_INT(P) ((P) - (char *) 0) 124 #endif 125 126 #ifndef __INT_TO_PTR 127 # define __INT_TO_PTR(P) ((P) + (char *) 0) 128 #endif 129 130 /* We need the type of the resulting object. If __PTRDIFF_TYPE__ is 131 defined, as with GNU C, use that; that way we don't pollute the 132 namespace with <stddef.h>'s symbols. Otherwise, if <stddef.h> is 133 available, include it and use ptrdiff_t. In traditional C, long is 134 the best that we can do. */ 135 136 #ifdef __PTRDIFF_TYPE__ 137 # define PTR_INT_TYPE __PTRDIFF_TYPE__ 138 #else 139 # ifdef HAVE_STDDEF_H 140 # include <stddef.h> 141 # define PTR_INT_TYPE ptrdiff_t 142 # else 143 # define PTR_INT_TYPE long 144 # endif 145 #endif 146 147 #if defined _LIBC || defined HAVE_STRING_H 148 # include <string.h> 149 # define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) 150 #else 151 # ifdef memcpy 152 # define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) 153 # else 154 # define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N)) 155 # endif 156 #endif 157 158 struct _obstack_chunk /* Lives at front of each chunk. */ 166 struct _obstack_chunk /* Lives at front of each chunk. */ 159 167 { 160 char *limit;/* 1 past end of this chunk */161 struct _obstack_chunk *prev; 162 char contents[4];/* objects begin here */168 char *limit; /* 1 past end of this chunk */ 169 struct _obstack_chunk *prev; /* address of prior chunk or NULL */ 170 char contents[__FLEXIBLE_ARRAY_MEMBER]; /* objects begin here */ 163 171 }; 164 172 165 struct obstack 173 struct obstack /* control current object in current chunk */ 166 174 { 167 long chunk_size; /* preferred size to allocate chunks in */ 168 struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ 169 char *object_base; /* address of object we are building */ 170 char *next_free; /* where to add next char to current object */ 171 char *chunk_limit; /* address of char after current chunk */ 172 PTR_INT_TYPE temp; /* Temporary for some macros. */ 173 int alignment_mask; /* Mask of alignment for each object. */ 174 #if defined __STDC__ && __STDC__ 175 /* These prototypes vary based on `use_extra_arg', and we use 176 casts to the prototypeless function type in all assignments, 177 but having prototypes here quiets -Wstrict-prototypes. */ 178 struct _obstack_chunk *(*chunkfun) (void *, long); 179 void (*freefun) (void *, struct _obstack_chunk *); 180 void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ 181 #else 182 struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */ 183 void (*freefun) (); /* User's function to free a chunk. */ 184 char *extra_arg; /* first arg for chunk alloc/dealloc funcs */ 185 #endif 186 unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */ 187 unsigned maybe_empty_object:1;/* There is a possibility that the current 188 chunk contains a zero-length object. This 189 prevents freeing the chunk if we allocate 190 a bigger chunk to replace it. */ 191 unsigned alloc_failed:1; /* No longer used, as we now call the failed 192 handler on error, but retained for binary 193 compatibility. */ 175 _CHUNK_SIZE_T chunk_size; /* preferred size to allocate chunks in */ 176 struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ 177 char *object_base; /* address of object we are building */ 178 char *next_free; /* where to add next char to current object */ 179 char *chunk_limit; /* address of char after current chunk */ 180 union 181 { 182 _OBSTACK_SIZE_T i; 183 void *p; 184 } temp; /* Temporary for some macros. */ 185 _OBSTACK_SIZE_T alignment_mask; /* Mask of alignment for each object. */ 186 187 /* These prototypes vary based on 'use_extra_arg'. */ 188 union 189 { 190 void *(*plain) (size_t); 191 void *(*extra) (void *, size_t); 192 } chunkfun; 193 union 194 { 195 void (*plain) (void *); 196 void (*extra) (void *, void *); 197 } freefun; 198 199 void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ 200 unsigned use_extra_arg : 1; /* chunk alloc/dealloc funcs take extra arg */ 201 unsigned maybe_empty_object : 1; /* There is a possibility that the current 202 chunk contains a zero-length object. This 203 prevents freeing the chunk if we allocate 204 a bigger chunk to replace it. */ 205 unsigned alloc_failed : 1; /* No longer used, as we now call the failed 206 handler on error, but retained for binary 207 compatibility. */ 194 208 }; 195 209 196 210 /* Declare the external functions we use; they are in obstack.c. */ 197 211 198 #if defined __STDC__ && __STDC__ 199 extern void _obstack_newchunk (struct obstack *, int); 212 extern void _obstack_newchunk (struct obstack *, _OBSTACK_SIZE_T); 200 213 extern void _obstack_free (struct obstack *, void *); 201 extern int _obstack_begin (struct obstack *, int, int, 202 void *(*) (long), void (*) (void *)); 203 extern int _obstack_begin_1 (struct obstack *, int, int, 204 void *(*) (void *, long), 205 void (*) (void *, void *), void *); 206 extern int _obstack_memory_used (struct obstack *); 207 #else 208 extern void _obstack_newchunk (); 209 extern void _obstack_free (); 210 extern int _obstack_begin (); 211 extern int _obstack_begin_1 (); 212 extern int _obstack_memory_used (); 213 #endif 214 215 216 #if defined __STDC__ && __STDC__ 217 218 /* Do the function-declarations after the structs 219 but before defining the macros. */ 220 221 void obstack_init (struct obstack *obstack); 222 223 void * obstack_alloc (struct obstack *obstack, int size); 224 225 void * obstack_copy (struct obstack *obstack, const void *address, int size); 226 void * obstack_copy0 (struct obstack *obstack, const void *address, int size); 227 228 void obstack_free (struct obstack *obstack, void *block); 229 230 void obstack_blank (struct obstack *obstack, int size); 231 232 void obstack_grow (struct obstack *obstack, const void *data, int size); 233 void obstack_grow0 (struct obstack *obstack, const void *data, int size); 234 235 void obstack_1grow (struct obstack *obstack, int data_char); 236 void obstack_ptr_grow (struct obstack *obstack, const void *data); 237 void obstack_int_grow (struct obstack *obstack, int data); 238 239 void * obstack_finish (struct obstack *obstack); 240 241 int obstack_object_size (struct obstack *obstack); 242 243 int obstack_room (struct obstack *obstack); 244 void obstack_make_room (struct obstack *obstack, int size); 245 void obstack_1grow_fast (struct obstack *obstack, int data_char); 246 void obstack_ptr_grow_fast (struct obstack *obstack, const void *data); 247 void obstack_int_grow_fast (struct obstack *obstack, int data); 248 void obstack_blank_fast (struct obstack *obstack, int size); 249 250 void * obstack_base (struct obstack *obstack); 251 void * obstack_next_free (struct obstack *obstack); 252 int obstack_alignment_mask (struct obstack *obstack); 253 int obstack_chunk_size (struct obstack *obstack); 254 int obstack_memory_used (struct obstack *obstack); 255 256 #endif /* __STDC__ */ 257 258 /* Non-ANSI C cannot really support alternative functions for these macros, 259 so we do not declare them. */ 260 261 /* Error handler called when `obstack_chunk_alloc' failed to allocate 214 extern int _obstack_begin (struct obstack *, 215 _OBSTACK_SIZE_T, _OBSTACK_SIZE_T, 216 void *(*) (size_t), void (*) (void *)); 217 extern int _obstack_begin_1 (struct obstack *, 218 _OBSTACK_SIZE_T, _OBSTACK_SIZE_T, 219 void *(*) (void *, size_t), 220 void (*) (void *, void *), void *); 221 extern _OBSTACK_SIZE_T _obstack_memory_used (struct obstack *) 222 __attribute_pure__; 223 224 225 /* Error handler called when 'obstack_chunk_alloc' failed to allocate 262 226 more memory. This can be set to a user defined function which 263 227 should either abort gracefully or use longjump - but shouldn't 264 228 return. The default action is to print a message and abort. */ 265 #if defined __STDC__ && __STDC__ 266 extern void (*obstack_alloc_failed_handler) (void); 267 #else 268 extern void (*obstack_alloc_failed_handler) (); 269 #endif 270 271 /* Exit value used when `print_and_abort' is used. */ 229 extern __attribute_noreturn__ void (*obstack_alloc_failed_handler) (void); 230 231 /* Exit value used when 'print_and_abort' is used. */ 272 232 extern int obstack_exit_failure; 273 274 233 275 234 /* Pointer to beginning of object being allocated or to be allocated next. … … 277 236 because a new chunk might be needed to hold the final size. */ 278 237 279 #define obstack_base(h) (( h)->object_base)238 #define obstack_base(h) ((void *) (h)->object_base) 280 239 281 240 /* Size for allocating ordinary chunks. */ … … 285 244 /* Pointer to next byte not yet allocated in current chunk. */ 286 245 287 #define obstack_next_free(h) ((h)->next_free)246 #define obstack_next_free(h) ((void *) (h)->next_free) 288 247 289 248 /* Mask specifying low bits that should be clear in address of an object. */ … … 291 250 #define obstack_alignment_mask(h) ((h)->alignment_mask) 292 251 293 /* To prevent prototype warnings provide complete argument list in 294 standard C version. */ 295 #if defined __STDC__ && __STDC__ 296 297 # define obstack_init(h) \ 298 _obstack_begin ((h), 0, 0, \ 299 (void *(*) (long)) obstack_chunk_alloc, \ 300 (void (*) (void *)) obstack_chunk_free) 301 302 # define obstack_begin(h, size) \ 303 _obstack_begin ((h), (size), 0, \ 304 (void *(*) (long)) obstack_chunk_alloc, \ 305 (void (*) (void *)) obstack_chunk_free) 306 307 # define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ 308 _obstack_begin ((h), (size), (alignment), \ 309 (void *(*) (long)) (chunkfun), \ 310 (void (*) (void *)) (freefun)) 311 312 # define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ 313 _obstack_begin_1 ((h), (size), (alignment), \ 314 (void *(*) (void *, long)) (chunkfun), \ 315 (void (*) (void *, void *)) (freefun), (arg)) 316 317 # define obstack_chunkfun(h, newchunkfun) \ 318 ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun)) 319 320 # define obstack_freefun(h, newfreefun) \ 321 ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun)) 322 323 #else 324 325 # define obstack_init(h) \ 326 _obstack_begin ((h), 0, 0, \ 327 (void *(*) ()) obstack_chunk_alloc, \ 328 (void (*) ()) obstack_chunk_free) 329 330 # define obstack_begin(h, size) \ 331 _obstack_begin ((h), (size), 0, \ 332 (void *(*) ()) obstack_chunk_alloc, \ 333 (void (*) ()) obstack_chunk_free) 334 335 # define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ 336 _obstack_begin ((h), (size), (alignment), \ 337 (void *(*) ()) (chunkfun), \ 338 (void (*) ()) (freefun)) 339 340 # define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ 341 _obstack_begin_1 ((h), (size), (alignment), \ 342 (void *(*) ()) (chunkfun), \ 343 (void (*) ()) (freefun), (arg)) 344 345 # define obstack_chunkfun(h, newchunkfun) \ 346 ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun)) 347 348 # define obstack_freefun(h, newfreefun) \ 349 ((h) -> freefun = (void (*)()) (newfreefun)) 350 351 #endif 352 353 #define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar) 354 355 #define obstack_blank_fast(h,n) ((h)->next_free += (n)) 252 /* To prevent prototype warnings provide complete argument list. */ 253 #define obstack_init(h) \ 254 _obstack_begin ((h), 0, 0, \ 255 _OBSTACK_CAST (void *(*) (size_t), obstack_chunk_alloc), \ 256 _OBSTACK_CAST (void (*) (void *), obstack_chunk_free)) 257 258 #define obstack_begin(h, size) \ 259 _obstack_begin ((h), (size), 0, \ 260 _OBSTACK_CAST (void *(*) (size_t), obstack_chunk_alloc), \ 261 _OBSTACK_CAST (void (*) (void *), obstack_chunk_free)) 262 263 #define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ 264 _obstack_begin ((h), (size), (alignment), \ 265 _OBSTACK_CAST (void *(*) (size_t), chunkfun), \ 266 _OBSTACK_CAST (void (*) (void *), freefun)) 267 268 #define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ 269 _obstack_begin_1 ((h), (size), (alignment), \ 270 _OBSTACK_CAST (void *(*) (void *, size_t), chunkfun), \ 271 _OBSTACK_CAST (void (*) (void *, void *), freefun), arg) 272 273 #define obstack_chunkfun(h, newchunkfun) \ 274 ((void) ((h)->chunkfun.extra = (void *(*) (void *, size_t)) (newchunkfun))) 275 276 #define obstack_freefun(h, newfreefun) \ 277 ((void) ((h)->freefun.extra = (void *(*) (void *, void *)) (newfreefun))) 278 279 #define obstack_1grow_fast(h, achar) ((void) (*((h)->next_free)++ = (achar))) 280 281 #define obstack_blank_fast(h, n) ((void) ((h)->next_free += (n))) 356 282 357 283 #define obstack_memory_used(h) _obstack_memory_used (h) 358 284 359 360 #if defined __GNUC__ && defined __STDC__ && __STDC__ 361 /* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and 362 does not implement __extension__. But that compiler doesn't define 363 __GNUC_MINOR__. */ 364 # if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__) 285 #if defined __GNUC__ || defined __clang__ 286 # if !(defined __GNUC_MINOR__ && __GNUC__ * 1000 + __GNUC_MINOR__ >= 2008 \ 287 || defined __clang__) 365 288 # define __extension__ 366 289 # endif … … 369 292 we can define these macros to compute all args only once 370 293 without using a global variable. 371 Also, we can avoid using the `temp' slot, to make faster code. */ 372 373 # define obstack_object_size(OBSTACK) \ 374 __extension__ \ 375 ({ struct obstack *__o = (OBSTACK); \ 376 (unsigned) (__o->next_free - __o->object_base); }) 377 378 # define obstack_room(OBSTACK) \ 379 __extension__ \ 380 ({ struct obstack *__o = (OBSTACK); \ 381 (unsigned) (__o->chunk_limit - __o->next_free); }) 382 383 # define obstack_make_room(OBSTACK,length) \ 384 __extension__ \ 385 ({ struct obstack *__o = (OBSTACK); \ 386 int __len = (length); \ 387 if (__o->chunk_limit - __o->next_free < __len) \ 388 _obstack_newchunk (__o, __len); \ 389 (void) 0; }) 390 391 # define obstack_empty_p(OBSTACK) \ 392 __extension__ \ 393 ({ struct obstack *__o = (OBSTACK); \ 394 (__o->chunk->prev == 0 && __o->next_free - __o->chunk->contents == 0); }) 395 396 # define obstack_grow(OBSTACK,where,length) \ 397 __extension__ \ 398 ({ struct obstack *__o = (OBSTACK); \ 399 int __len = (length); \ 400 if (__o->next_free + __len > __o->chunk_limit) \ 401 _obstack_newchunk (__o, __len); \ 402 _obstack_memcpy (__o->next_free, (where), __len); \ 403 __o->next_free += __len; \ 404 (void) 0; }) 405 406 # define obstack_grow0(OBSTACK,where,length) \ 407 __extension__ \ 408 ({ struct obstack *__o = (OBSTACK); \ 409 int __len = (length); \ 410 if (__o->next_free + __len + 1 > __o->chunk_limit) \ 411 _obstack_newchunk (__o, __len + 1); \ 412 _obstack_memcpy (__o->next_free, (where), __len); \ 413 __o->next_free += __len; \ 414 *(__o->next_free)++ = 0; \ 415 (void) 0; }) 416 417 # define obstack_1grow(OBSTACK,datum) \ 418 __extension__ \ 419 ({ struct obstack *__o = (OBSTACK); \ 420 if (__o->next_free + 1 > __o->chunk_limit) \ 421 _obstack_newchunk (__o, 1); \ 422 *(__o->next_free)++ = (datum); \ 423 (void) 0; }) 294 Also, we can avoid using the 'temp' slot, to make faster code. */ 295 296 # define obstack_object_size(OBSTACK) \ 297 __extension__ \ 298 ({ struct obstack const *__o = (OBSTACK); \ 299 (_OBSTACK_SIZE_T) (__o->next_free - __o->object_base); }) 300 301 /* The local variable is named __o1 to avoid a shadowed variable 302 warning when invoked from other obstack macros. */ 303 # define obstack_room(OBSTACK) \ 304 __extension__ \ 305 ({ struct obstack const *__o1 = (OBSTACK); \ 306 (_OBSTACK_SIZE_T) (__o1->chunk_limit - __o1->next_free); }) 307 308 # define obstack_make_room(OBSTACK, length) \ 309 __extension__ \ 310 ({ struct obstack *__o = (OBSTACK); \ 311 _OBSTACK_SIZE_T __len = (length); \ 312 if (obstack_room (__o) < __len) \ 313 _obstack_newchunk (__o, __len); \ 314 (void) 0; }) 315 316 # define obstack_empty_p(OBSTACK) \ 317 __extension__ \ 318 ({ struct obstack const *__o = (OBSTACK); \ 319 (__o->chunk->prev == 0 \ 320 && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \ 321 __o->chunk->contents, \ 322 __o->alignment_mask)); }) 323 324 # define obstack_grow(OBSTACK, where, length) \ 325 __extension__ \ 326 ({ struct obstack *__o = (OBSTACK); \ 327 _OBSTACK_SIZE_T __len = (length); \ 328 if (obstack_room (__o) < __len) \ 329 _obstack_newchunk (__o, __len); \ 330 memcpy (__o->next_free, where, __len); \ 331 __o->next_free += __len; \ 332 (void) 0; }) 333 334 # define obstack_grow0(OBSTACK, where, length) \ 335 __extension__ \ 336 ({ struct obstack *__o = (OBSTACK); \ 337 _OBSTACK_SIZE_T __len = (length); \ 338 if (obstack_room (__o) < __len + 1) \ 339 _obstack_newchunk (__o, __len + 1); \ 340 memcpy (__o->next_free, where, __len); \ 341 __o->next_free += __len; \ 342 *(__o->next_free)++ = 0; \ 343 (void) 0; }) 344 345 # define obstack_1grow(OBSTACK, datum) \ 346 __extension__ \ 347 ({ struct obstack *__o = (OBSTACK); \ 348 if (obstack_room (__o) < 1) \ 349 _obstack_newchunk (__o, 1); \ 350 obstack_1grow_fast (__o, datum); }) 424 351 425 352 /* These assume that the obstack alignment is good enough for pointers … … 427 354 shares that much alignment. */ 428 355 429 # define obstack_ptr_grow(OBSTACK,datum) \ 430 __extension__ \ 431 ({ struct obstack *__o = (OBSTACK); \ 432 if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ 433 _obstack_newchunk (__o, sizeof (void *)); \ 434 ((*((void **)__o->next_free) = (datum)), (__o->next_free += sizeof (void *))); \ 435 (void) 0; }) 436 437 # define obstack_int_grow(OBSTACK,datum) \ 438 __extension__ \ 439 ({ struct obstack *__o = (OBSTACK); \ 440 if (__o->next_free + sizeof (int) > __o->chunk_limit) \ 441 _obstack_newchunk (__o, sizeof (int)); \ 442 ((*((int *)__o->next_free) = (datum)), (__o->next_free += sizeof (int ))); \ 443 (void) 0; }) 444 445 # define obstack_ptr_grow_fast(h,aptr) \ 446 (((*((void **) (h)->next_free) = (aptr)), ( (h)->next_free += sizeof (void *)))) 447 448 # define obstack_int_grow_fast(h,aint) \ 449 (((*((int *) (h)->next_free) = (aint)), ( (h)->next_free += sizeof (int )))) 450 451 # define obstack_blank(OBSTACK,length) \ 452 __extension__ \ 453 ({ struct obstack *__o = (OBSTACK); \ 454 int __len = (length); \ 455 if (__o->chunk_limit - __o->next_free < __len) \ 456 _obstack_newchunk (__o, __len); \ 457 __o->next_free += __len; \ 458 (void) 0; }) 459 460 # define obstack_alloc(OBSTACK,length) \ 461 __extension__ \ 462 ({ struct obstack *__h = (OBSTACK); \ 463 obstack_blank (__h, (length)); \ 464 obstack_finish (__h); }) 465 466 # define obstack_copy(OBSTACK,where,length) \ 467 __extension__ \ 468 ({ struct obstack *__h = (OBSTACK); \ 469 obstack_grow (__h, (where), (length)); \ 470 obstack_finish (__h); }) 471 472 # define obstack_copy0(OBSTACK,where,length) \ 473 __extension__ \ 474 ({ struct obstack *__h = (OBSTACK); \ 475 obstack_grow0 (__h, (where), (length)); \ 476 obstack_finish (__h); }) 477 478 /* The local variable is named __o1 to avoid a name conflict 479 when obstack_blank is called. */ 480 # define obstack_finish(OBSTACK) \ 481 __extension__ \ 482 ({ struct obstack *__o1 = (OBSTACK); \ 483 void *value; \ 484 value = (void *) __o1->object_base; \ 485 if (__o1->next_free == value) \ 486 __o1->maybe_empty_object = 1; \ 487 __o1->next_free \ 488 = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\ 489 & ~ (__o1->alignment_mask)); \ 490 if (__o1->next_free - (char *)__o1->chunk \ 491 > __o1->chunk_limit - (char *)__o1->chunk) \ 492 __o1->next_free = __o1->chunk_limit; \ 493 __o1->object_base = __o1->next_free; \ 494 value; }) 495 496 # define obstack_free(OBSTACK, OBJ) \ 497 __extension__ \ 498 ({ struct obstack *__o = (OBSTACK); \ 499 void *__obj = (OBJ); \ 500 if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \ 501 __o->next_free = __o->object_base = (char *)__obj; \ 502 else (obstack_free) (__o, __obj); }) 503 504 505 #else /* not __GNUC__ or not __STDC__ */ 506 507 # define obstack_object_size(h) \ 508 (unsigned) ((h)->next_free - (h)->object_base) 509 510 # define obstack_room(h) \ 511 (unsigned) ((h)->chunk_limit - (h)->next_free) 512 513 # define obstack_empty_p(h) \ 514 ((h)->chunk->prev == 0 && (h)->next_free - (h)->chunk->contents == 0) 356 # define obstack_ptr_grow(OBSTACK, datum) \ 357 __extension__ \ 358 ({ struct obstack *__o = (OBSTACK); \ 359 if (obstack_room (__o) < sizeof (void *)) \ 360 _obstack_newchunk (__o, sizeof (void *)); \ 361 obstack_ptr_grow_fast (__o, datum); }) 362 363 # define obstack_int_grow(OBSTACK, datum) \ 364 __extension__ \ 365 ({ struct obstack *__o = (OBSTACK); \ 366 if (obstack_room (__o) < sizeof (int)) \ 367 _obstack_newchunk (__o, sizeof (int)); \ 368 obstack_int_grow_fast (__o, datum); }) 369 370 # define obstack_ptr_grow_fast(OBSTACK, aptr) \ 371 __extension__ \ 372 ({ struct obstack *__o1 = (OBSTACK); \ 373 void *__p1 = __o1->next_free; \ 374 *(const void **) __p1 = (aptr); \ 375 __o1->next_free += sizeof (const void *); \ 376 (void) 0; }) 377 378 # define obstack_int_grow_fast(OBSTACK, aint) \ 379 __extension__ \ 380 ({ struct obstack *__o1 = (OBSTACK); \ 381 void *__p1 = __o1->next_free; \ 382 *(int *) __p1 = (aint); \ 383 __o1->next_free += sizeof (int); \ 384 (void) 0; }) 385 386 # define obstack_blank(OBSTACK, length) \ 387 __extension__ \ 388 ({ struct obstack *__o = (OBSTACK); \ 389 _OBSTACK_SIZE_T __len = (length); \ 390 if (obstack_room (__o) < __len) \ 391 _obstack_newchunk (__o, __len); \ 392 obstack_blank_fast (__o, __len); }) 393 394 # define obstack_alloc(OBSTACK, length) \ 395 __extension__ \ 396 ({ struct obstack *__h = (OBSTACK); \ 397 obstack_blank (__h, (length)); \ 398 obstack_finish (__h); }) 399 400 # define obstack_copy(OBSTACK, where, length) \ 401 __extension__ \ 402 ({ struct obstack *__h = (OBSTACK); \ 403 obstack_grow (__h, (where), (length)); \ 404 obstack_finish (__h); }) 405 406 # define obstack_copy0(OBSTACK, where, length) \ 407 __extension__ \ 408 ({ struct obstack *__h = (OBSTACK); \ 409 obstack_grow0 (__h, (where), (length)); \ 410 obstack_finish (__h); }) 411 412 /* The local variable is named __o1 to avoid a shadowed variable 413 warning when invoked from other obstack macros, typically obstack_free. */ 414 # define obstack_finish(OBSTACK) \ 415 __extension__ \ 416 ({ struct obstack *__o1 = (OBSTACK); \ 417 void *__value = (void *) __o1->object_base; \ 418 if (__o1->next_free == __value) \ 419 __o1->maybe_empty_object = 1; \ 420 __o1->next_free \ 421 = __PTR_ALIGN (__o1->object_base, __o1->next_free, \ 422 __o1->alignment_mask); \ 423 if ((size_t) (__o1->next_free - (char *) __o1->chunk) \ 424 > (size_t) (__o1->chunk_limit - (char *) __o1->chunk)) \ 425 __o1->next_free = __o1->chunk_limit; \ 426 __o1->object_base = __o1->next_free; \ 427 __value; }) 428 429 # define obstack_free(OBSTACK, OBJ) \ 430 __extension__ \ 431 ({ struct obstack *__o = (OBSTACK); \ 432 void *__obj = (void *) (OBJ); \ 433 if (__obj > (void *) __o->chunk && __obj < (void *) __o->chunk_limit) \ 434 __o->next_free = __o->object_base = (char *) __obj; \ 435 else \ 436 _obstack_free (__o, __obj); }) 437 438 #else /* not __GNUC__ */ 439 440 # define obstack_object_size(h) \ 441 ((_OBSTACK_SIZE_T) ((h)->next_free - (h)->object_base)) 442 443 # define obstack_room(h) \ 444 ((_OBSTACK_SIZE_T) ((h)->chunk_limit - (h)->next_free)) 445 446 # define obstack_empty_p(h) \ 447 ((h)->chunk->prev == 0 \ 448 && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \ 449 (h)->chunk->contents, \ 450 (h)->alignment_mask)) 515 451 516 452 /* Note that the call to _obstack_newchunk is enclosed in (..., 0) … … 520 456 but some compilers won't accept it. */ 521 457 522 # define obstack_make_room(h,length) \ 523 ( (h)->temp = (length), \ 524 (((h)->next_free + (h)->temp > (h)->chunk_limit) \ 525 ? (_obstack_newchunk ((h), (h)->temp), 0) : 0)) 526 527 # define obstack_grow(h,where,length) \ 528 ( (h)->temp = (length), \ 529 (((h)->next_free + (h)->temp > (h)->chunk_limit) \ 530 ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ 531 _obstack_memcpy ((h)->next_free, (where), (h)->temp), \ 532 (h)->next_free += (h)->temp) 533 534 # define obstack_grow0(h,where,length) \ 535 ( (h)->temp = (length), \ 536 (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \ 537 ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \ 538 _obstack_memcpy ((h)->next_free, (where), (h)->temp), \ 539 (h)->next_free += (h)->temp, \ 540 *((h)->next_free)++ = 0) 541 542 # define obstack_1grow(h,datum) \ 543 ( (((h)->next_free + 1 > (h)->chunk_limit) \ 544 ? (_obstack_newchunk ((h), 1), 0) : 0), \ 545 (*((h)->next_free)++ = (datum))) 546 547 # define obstack_ptr_grow(h,datum) \ 548 ( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ 549 ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ 550 (*((const char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = (datum))) 551 552 # define obstack_int_grow(h,datum) \ 553 ( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \ 554 ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ 555 (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = (datum))) 556 557 # define obstack_ptr_grow_fast(h,aptr) \ 558 (((*((const char **) (h)->next_free) = (aptr)), ( (h)->next_free += sizeof (const char *)))) 559 560 # define obstack_int_grow_fast(h,aint) \ 561 (((*((int *) (h)->next_free) = (aint)), ( (h)->next_free += sizeof (int )))) 562 563 # define obstack_blank(h,length) \ 564 ( (h)->temp = (length), \ 565 (((h)->chunk_limit - (h)->next_free < (h)->temp) \ 566 ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ 567 ((h)->next_free += (h)->temp)) 568 569 # define obstack_alloc(h,length) \ 570 (obstack_blank ((h), (length)), obstack_finish ((h))) 571 572 # define obstack_copy(h,where,length) \ 573 (obstack_grow ((h), (where), (length)), obstack_finish ((h))) 574 575 # define obstack_copy0(h,where,length) \ 576 (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) 577 578 # define obstack_finish(h) \ 579 ( ((h)->next_free == (h)->object_base \ 580 ? (((h)->maybe_empty_object = 1), 0) \ 581 : 0), \ 582 (h)->temp = __PTR_TO_INT ((h)->object_base), \ 583 (h)->next_free \ 584 = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \ 585 & ~ ((h)->alignment_mask)), \ 586 (((h)->next_free - (char *) (h)->chunk \ 587 > (h)->chunk_limit - (char *) (h)->chunk) \ 588 ? ((h)->next_free = (h)->chunk_limit) : 0), \ 589 (h)->object_base = (h)->next_free, \ 590 __INT_TO_PTR ((h)->temp)) 591 592 # if defined __STDC__ && __STDC__ 593 # define obstack_free(h,obj) \ 594 ( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ 595 (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ 596 ? (int) ((h)->next_free = (h)->object_base \ 597 = (h)->temp + (char *) (h)->chunk) \ 598 : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0))) 599 # else 600 # define obstack_free(h,obj) \ 601 ( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ 602 (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ 603 ? (int) ((h)->next_free = (h)->object_base \ 604 = (h)->temp + (char *) (h)->chunk) \ 605 : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0))) 606 # endif 607 608 #endif /* not __GNUC__ or not __STDC__ */ 458 # define obstack_make_room(h, length) \ 459 ((h)->temp.i = (length), \ 460 ((obstack_room (h) < (h)->temp.i) \ 461 ? (_obstack_newchunk (h, (h)->temp.i), 0) : 0), \ 462 (void) 0) 463 464 # define obstack_grow(h, where, length) \ 465 ((h)->temp.i = (length), \ 466 ((obstack_room (h) < (h)->temp.i) \ 467 ? (_obstack_newchunk ((h), (h)->temp.i), 0) : 0), \ 468 memcpy ((h)->next_free, where, (h)->temp.i), \ 469 (h)->next_free += (h)->temp.i, \ 470 (void) 0) 471 472 # define obstack_grow0(h, where, length) \ 473 ((h)->temp.i = (length), \ 474 ((obstack_room (h) < (h)->temp.i + 1) \ 475 ? (_obstack_newchunk ((h), (h)->temp.i + 1), 0) : 0), \ 476 memcpy ((h)->next_free, where, (h)->temp.i), \ 477 (h)->next_free += (h)->temp.i, \ 478 *((h)->next_free)++ = 0, \ 479 (void) 0) 480 481 # define obstack_1grow(h, datum) \ 482 (((obstack_room (h) < 1) \ 483 ? (_obstack_newchunk ((h), 1), 0) : 0), \ 484 obstack_1grow_fast (h, datum)) 485 486 # define obstack_ptr_grow(h, datum) \ 487 (((obstack_room (h) < sizeof (char *)) \ 488 ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ 489 obstack_ptr_grow_fast (h, datum)) 490 491 # define obstack_int_grow(h, datum) \ 492 (((obstack_room (h) < sizeof (int)) \ 493 ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ 494 obstack_int_grow_fast (h, datum)) 495 496 # define obstack_ptr_grow_fast(h, aptr) \ 497 (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr), \ 498 (void) 0) 499 500 # define obstack_int_grow_fast(h, aint) \ 501 (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint), \ 502 (void) 0) 503 504 # define obstack_blank(h, length) \ 505 ((h)->temp.i = (length), \ 506 ((obstack_room (h) < (h)->temp.i) \ 507 ? (_obstack_newchunk ((h), (h)->temp.i), 0) : 0), \ 508 obstack_blank_fast (h, (h)->temp.i)) 509 510 # define obstack_alloc(h, length) \ 511 (obstack_blank ((h), (length)), obstack_finish ((h))) 512 513 # define obstack_copy(h, where, length) \ 514 (obstack_grow ((h), (where), (length)), obstack_finish ((h))) 515 516 # define obstack_copy0(h, where, length) \ 517 (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) 518 519 # define obstack_finish(h) \ 520 (((h)->next_free == (h)->object_base \ 521 ? (((h)->maybe_empty_object = 1), 0) \ 522 : 0), \ 523 (h)->temp.p = (h)->object_base, \ 524 (h)->next_free \ 525 = __PTR_ALIGN ((h)->object_base, (h)->next_free, \ 526 (h)->alignment_mask), \ 527 (((size_t) ((h)->next_free - (char *) (h)->chunk) \ 528 > (size_t) ((h)->chunk_limit - (char *) (h)->chunk)) \ 529 ? ((h)->next_free = (h)->chunk_limit) : 0), \ 530 (h)->object_base = (h)->next_free, \ 531 (h)->temp.p) 532 533 # define obstack_free(h, obj) \ 534 ((h)->temp.p = (void *) (obj), \ 535 (((h)->temp.p > (void *) (h)->chunk \ 536 && (h)->temp.p < (void *) (h)->chunk_limit) \ 537 ? (void) ((h)->next_free = (h)->object_base = (char *) (h)->temp.p) \ 538 : _obstack_free ((h), (h)->temp.p))) 539 540 #endif /* not __GNUC__ */ 609 541 610 542 #ifdef __cplusplus 611 } 612 #endif 613 614 #endif /* obstack.h*/543 } /* C++ */ 544 #endif 545 546 #endif /* _OBSTACK_H */ -
trunk/src/sed/lib/regcomp.c
r3550 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20 #ifdef _LIBC 21 # include <locale/weight.h> 22 #endif 20 23 21 24 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, … … 25 28 char *fastmap); 26 29 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); 27 #ifdef RE_ENABLE_I18N28 30 static void free_charset (re_charset_t *cset); 29 #endif /* RE_ENABLE_I18N */30 31 static void free_workarea_compile (regex_t *preg); 31 32 static reg_errcode_t create_initial_state (re_dfa_t *dfa); 32 #ifdef RE_ENABLE_I18N33 33 static void optimize_utf8 (re_dfa_t *dfa); 34 #endif35 34 static reg_errcode_t analyze (regex_t *preg); 36 35 static reg_errcode_t preorder (bin_tree_t *root, … … 47 46 static reg_errcode_t calc_next (void *extra, bin_tree_t *node); 48 47 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); 49 static int duplicate_node (re_dfa_t *dfa, intorg_idx, unsigned int constraint);50 static int search_duplicated_node (const re_dfa_t *dfa, intorg_node,48 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint); 49 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node, 51 50 unsigned int constraint); 52 51 static reg_errcode_t calc_eclosure (re_dfa_t *dfa); 53 52 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, 54 int node, introot);53 Idx node, bool root); 55 54 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); 56 static intfetch_number (re_string_t *input, re_token_t *token,55 static Idx fetch_number (re_string_t *input, re_token_t *token, 57 56 reg_syntax_t syntax); 58 57 static int peek_token (re_token_t *token, re_string_t *input, 59 reg_syntax_t syntax) internal_function;58 reg_syntax_t syntax); 60 59 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, 61 60 reg_syntax_t syntax, reg_errcode_t *err); 62 61 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, 63 62 re_token_t *token, reg_syntax_t syntax, 64 intnest, reg_errcode_t *err);63 Idx nest, reg_errcode_t *err); 65 64 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, 66 65 re_token_t *token, reg_syntax_t syntax, 67 intnest, reg_errcode_t *err);66 Idx nest, reg_errcode_t *err); 68 67 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, 69 68 re_token_t *token, reg_syntax_t syntax, 70 intnest, reg_errcode_t *err);69 Idx nest, reg_errcode_t *err); 71 70 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, 72 71 re_token_t *token, reg_syntax_t syntax, 73 intnest, reg_errcode_t *err);72 Idx nest, reg_errcode_t *err); 74 73 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, 75 74 re_dfa_t *dfa, re_token_t *token, … … 83 82 re_dfa_t *dfa, 84 83 reg_syntax_t syntax, 85 intaccept_hyphen);84 bool accept_hyphen); 86 85 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, 87 86 re_string_t *regexp, 88 87 re_token_t *token); 89 #ifdef RE_ENABLE_I18N90 88 static reg_errcode_t build_equiv_class (bitset_t sbcset, 91 89 re_charset_t *mbcset, 92 int*equiv_class_alloc,90 Idx *equiv_class_alloc, 93 91 const unsigned char *name); 94 92 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, 95 93 bitset_t sbcset, 96 94 re_charset_t *mbcset, 97 int*char_class_alloc,98 const unsignedchar *class_name,95 Idx *char_class_alloc, 96 const char *class_name, 99 97 reg_syntax_t syntax); 100 #else /* not RE_ENABLE_I18N */101 static reg_errcode_t build_equiv_class (bitset_t sbcset,102 const unsigned char *name);103 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,104 bitset_t sbcset,105 const unsigned char *class_name,106 reg_syntax_t syntax);107 #endif /* not RE_ENABLE_I18N */108 98 static bin_tree_t *build_charclass_op (re_dfa_t *dfa, 109 99 RE_TRANSLATE_TYPE trans, 110 const unsignedchar *class_name,111 const unsignedchar *extra,112 intnon_match, reg_errcode_t *err);100 const char *class_name, 101 const char *extra, 102 bool non_match, reg_errcode_t *err); 113 103 static bin_tree_t *create_tree (re_dfa_t *dfa, 114 104 bin_tree_t *left, bin_tree_t *right, … … 128 118 but why not be nice? */ 129 119 130 const char __re_error_msgid[] attribute_hidden=120 static const char __re_error_msgid[] = 131 121 { 132 122 #define REG_NOERROR_IDX 0 … … 152 142 "\0" 153 143 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 154 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */144 gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */ 155 145 "\0" 156 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")146 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=") 157 147 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ 158 148 "\0" … … 182 172 }; 183 173 184 const size_t __re_error_msgid_idx[] attribute_hidden=174 static const size_t __re_error_msgid_idx[] = 185 175 { 186 176 REG_NOERROR_IDX, … … 210 200 Returns 0 if the pattern was valid, otherwise an error string. 211 201 212 Assumes the `allocated' (and perhaps `buffer') and `translate' fields202 Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields 213 203 are set in BUFP on entry. */ 214 204 215 205 const char * 216 re_compile_pattern (pattern, length, bufp) 217 const char *pattern; 218 size_t length; 219 struct re_pattern_buffer *bufp; 206 re_compile_pattern (const char *pattern, size_t length, 207 struct re_pattern_buffer *bufp) 220 208 { 221 209 reg_errcode_t ret; … … 235 223 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 236 224 } 237 #ifdef _LIBC238 225 weak_alias (__re_compile_pattern, re_compile_pattern) 239 #endif 240 241 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 226 227 /* Set by 're_set_syntax' to the current regexp syntax to recognize. Can 242 228 also be assigned to arbitrarily: each pattern buffer stores its own 243 229 syntax, so it can be changed between regex compilations. */ … … 255 241 256 242 reg_syntax_t 257 re_set_syntax (syntax) 258 reg_syntax_t syntax; 243 re_set_syntax (reg_syntax_t syntax) 259 244 { 260 245 reg_syntax_t ret = re_syntax_options; … … 263 248 return ret; 264 249 } 265 #ifdef _LIBC266 250 weak_alias (__re_set_syntax, re_set_syntax) 267 #endif268 251 269 252 int 270 re_compile_fastmap (bufp) 271 struct re_pattern_buffer *bufp; 272 { 273 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 253 re_compile_fastmap (struct re_pattern_buffer *bufp) 254 { 255 re_dfa_t *dfa = bufp->buffer; 274 256 char *fastmap = bufp->fastmap; 275 257 … … 285 267 return 0; 286 268 } 287 #ifdef _LIBC288 269 weak_alias (__re_compile_fastmap, re_compile_fastmap) 289 #endif 290 291 static inline void 292 __attribute ((always_inline)) 293 re_set_fastmap (char *fastmap, int icase, int ch) 270 271 static __always_inline void 272 re_set_fastmap (char *fastmap, bool icase, int ch) 294 273 { 295 274 fastmap[ch] = 1; … … 305 284 char *fastmap) 306 285 { 307 re_dfa_t *dfa = (re_dfa_t *)bufp->buffer;308 intnode_cnt;309 inticase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));286 re_dfa_t *dfa = bufp->buffer; 287 Idx node_cnt; 288 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); 310 289 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) 311 290 { 312 intnode = init_state->nodes.elems[node_cnt];291 Idx node = init_state->nodes.elems[node_cnt]; 313 292 re_token_type_t type = dfa->nodes[node].type; 314 293 … … 316 295 { 317 296 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); 318 #ifdef RE_ENABLE_I18N319 297 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 320 298 { 321 unsigned char *buf = alloca (dfa->mb_cur_max), *p; 299 unsigned char buf[MB_LEN_MAX]; 300 unsigned char *p; 322 301 wchar_t wc; 323 302 mbstate_t state; … … 330 309 *p++ = dfa->nodes[node].opr.c; 331 310 memset (&state, '\0', sizeof (state)); 332 if ( mbrtowc (&wc, (const char *) buf, p - buf,333 &state) == p - buf334 && (__wcrtomb ((char *) buf, towlower (wc), &state)311 if (__mbrtowc (&wc, (const char *) buf, p - buf, 312 &state) == p - buf 313 && (__wcrtomb ((char *) buf, __towlower (wc), &state) 335 314 != (size_t) -1)) 336 re_set_fastmap (fastmap, 0, buf[0]);315 re_set_fastmap (fastmap, false, buf[0]); 337 316 } 338 #endif339 317 } 340 318 else if (type == SIMPLE_BRACKET) … … 350 328 } 351 329 } 352 #ifdef RE_ENABLE_I18N353 330 else if (type == COMPLEX_BRACKET) 354 331 { 355 int i;356 332 re_charset_t *cset = dfa->nodes[node].opr.mbcset; 357 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes 358 || cset->nranges || cset->nchar_classes) 359 { 360 # ifdef _LIBC 361 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) 333 Idx i; 334 335 #ifdef _LIBC 336 /* See if we have to try all bytes which start multiple collation 337 elements. 338 e.g. In da_DK, we want to catch 'a' since "aa" is a valid 339 collation element, and don't catch 'b' since 'b' is 340 the only collation element which starts from 'b' (and 341 it is caught by SIMPLE_BRACKET). */ 342 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 343 && (cset->ncoll_syms || cset->nranges)) 362 344 { 363 /* In this case we want to catch the bytes which are364 the first byte of any collation elements.365 e.g. In da_DK, we want to catch 'a' since "aa"366 is a valid collation element, and don't catch367 'b' since 'b' is the only collation element368 which starts from 'b'. */369 345 const int32_t *table = (const int32_t *) 370 346 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); … … 373 349 re_set_fastmap (fastmap, icase, i); 374 350 } 375 # else 376 if (dfa->mb_cur_max > 1) 377 for (i = 0; i < SBC_MAX; ++i) 378 if (__btowc (i) == WEOF) 379 re_set_fastmap (fastmap, icase, i); 380 # endif /* not _LIBC */ 351 #endif /* _LIBC */ 352 353 /* See if we have to start the match at all multibyte characters, 354 i.e. where we would not find an invalid sequence. This only 355 applies to multibyte character sets; for single byte character 356 sets, the SIMPLE_BRACKET again suffices. */ 357 if (dfa->mb_cur_max > 1 358 && (cset->nchar_classes || cset->non_match || cset->nranges 359 #ifdef _LIBC 360 || cset->nequiv_classes 361 #endif /* _LIBC */ 362 )) 363 { 364 unsigned char c = 0; 365 do 366 { 367 mbstate_t mbs; 368 memset (&mbs, 0, sizeof (mbs)); 369 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) 370 re_set_fastmap (fastmap, false, (int) c); 371 } 372 while (++c != 0); 381 373 } 382 for (i = 0; i < cset->nmbchars; ++i) 374 375 else 383 376 { 384 char buf[256]; 385 mbstate_t state; 386 memset (&state, '\0', sizeof (state)); 387 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) 388 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); 389 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 377 /* ... Else catch all bytes which can start the mbchars. */ 378 for (i = 0; i < cset->nmbchars; ++i) 390 379 { 391 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) 392 != (size_t) -1) 393 re_set_fastmap (fastmap, 0, *(unsigned char *) buf); 380 char buf[256]; 381 mbstate_t state; 382 memset (&state, '\0', sizeof (state)); 383 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) 384 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); 385 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 386 { 387 if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state) 388 != (size_t) -1) 389 re_set_fastmap (fastmap, false, *(unsigned char *) buf); 390 } 394 391 } 395 392 } 396 393 } 397 #endif /* RE_ENABLE_I18N */ 398 else if (type == OP_PERIOD 399 #ifdef RE_ENABLE_I18N 400 || type == OP_UTF8_PERIOD 401 #endif /* RE_ENABLE_I18N */ 402 || type == END_OF_RE) 394 else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE) 403 395 { 404 396 memset (fastmap, '\1', sizeof (char) * SBC_MAX); … … 417 409 since POSIX says we shouldn't. Thus, we set 418 410 419 `buffer' to the compiled pattern;420 `used' to the length of the compiled pattern;421 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the411 'buffer' to the compiled pattern; 412 'used' to the length of the compiled pattern; 413 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the 422 414 REG_EXTENDED bit in CFLAGS is set; otherwise, to 423 415 RE_SYNTAX_POSIX_BASIC; 424 `newline_anchor' to REG_NEWLINE being set in CFLAGS;425 `fastmap' to an allocated space for the fastmap;426 `fastmap_accurate' to zero;427 `re_nsub' to the number of subexpressions in PATTERN.416 'newline_anchor' to REG_NEWLINE being set in CFLAGS; 417 'fastmap' to an allocated space for the fastmap; 418 'fastmap_accurate' to zero; 419 're_nsub' to the number of subexpressions in PATTERN. 428 420 429 421 PATTERN is the address of the pattern string. … … 448 440 449 441 int 450 regcomp (preg, pattern, cflags) 451 regex_t *__restrict preg; 452 const char *__restrict pattern; 453 int cflags; 442 regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags) 454 443 { 455 444 reg_errcode_t ret; … … 463 452 /* Try to allocate space for the fastmap. */ 464 453 preg->fastmap = re_malloc (char, SBC_MAX); 465 if ( BE (preg->fastmap == NULL, 0))454 if (__glibc_unlikely (preg->fastmap == NULL)) 466 455 return REG_ESPACE; 467 456 … … 489 478 490 479 /* We have already checked preg->fastmap != NULL. */ 491 if ( BE (ret == REG_NOERROR, 1))480 if (__glibc_likely (ret == REG_NOERROR)) 492 481 /* Compute the fastmap now, since regexec cannot modify the pattern 493 482 buffer. This function never fails in this implementation. */ … … 502 491 return (int) ret; 503 492 } 504 #ifdef _LIBC 493 libc_hidden_def (__regcomp) 505 494 weak_alias (__regcomp, regcomp) 506 #endif507 495 508 496 /* Returns a message corresponding to an error code, ERRCODE, returned … … 510 498 511 499 size_t 512 regerror (errcode, preg, errbuf, errbuf_size) 513 int errcode; 514 const regex_t *__restrict preg; 515 char *__restrict errbuf; 516 size_t errbuf_size; 500 regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf, 501 size_t errbuf_size) 517 502 { 518 503 const char *msg; 519 504 size_t msg_size; 520 521 if (BE (errcode < 0 522 || errcode >= (int) (sizeof (__re_error_msgid_idx) 523 / sizeof (__re_error_msgid_idx[0])), 0)) 505 int nerrcodes = sizeof __re_error_msgid_idx / sizeof __re_error_msgid_idx[0]; 506 507 if (__glibc_unlikely (errcode < 0 || errcode >= nerrcodes)) 524 508 /* Only error codes returned by the rest of the code should be passed 525 509 to this routine. If we are given anything else, or if other regex … … 532 516 msg_size = strlen (msg) + 1; /* Includes the null. */ 533 517 534 if (BE (errbuf_size != 0, 1)) 535 { 536 if (BE (msg_size > errbuf_size, 0)) 537 { 538 #if defined HAVE_MEMPCPY || defined _LIBC 539 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 540 #else 541 memcpy (errbuf, msg, errbuf_size - 1); 542 errbuf[errbuf_size - 1] = 0; 543 #endif 544 } 545 else 546 memcpy (errbuf, msg, msg_size); 518 if (__glibc_likely (errbuf_size != 0)) 519 { 520 size_t cpy_size = msg_size; 521 if (__glibc_unlikely (msg_size > errbuf_size)) 522 { 523 cpy_size = errbuf_size - 1; 524 errbuf[cpy_size] = '\0'; 525 } 526 memcpy (errbuf, msg, cpy_size); 547 527 } 548 528 549 529 return msg_size; 550 530 } 551 #ifdef _LIBC552 531 weak_alias (__regerror, regerror) 553 #endif 554 555 556 #ifdef RE_ENABLE_I18N 532 533 557 534 /* This static array is used for the map to single-byte characters when 558 535 UTF-8 is used. Otherwise we would allocate memory just to initialize … … 562 539 { 563 540 /* Set the first 128 bits. */ 564 #if def _MSC_VER565 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 0, 0, 0, 0541 #if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__ 542 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 566 543 #else 567 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 544 # if 4 * BITSET_WORD_BITS < ASCII_CHARS 545 # error "bitset_word_t is narrower than 32 bits" 546 # elif 3 * BITSET_WORD_BITS < ASCII_CHARS 547 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 548 # elif 2 * BITSET_WORD_BITS < ASCII_CHARS 549 BITSET_WORD_MAX, BITSET_WORD_MAX, 550 # elif 1 * BITSET_WORD_BITS < ASCII_CHARS 551 BITSET_WORD_MAX, 552 # endif 553 (BITSET_WORD_MAX 554 >> (SBC_MAX % BITSET_WORD_BITS == 0 555 ? 0 556 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) 568 557 #endif 569 558 }; 570 #endif571 559 572 560 … … 574 562 free_dfa_content (re_dfa_t *dfa) 575 563 { 576 inti, j;564 Idx i, j; 577 565 578 566 if (dfa->nodes) … … 603 591 free_state (state); 604 592 } 605 593 re_free (entry->array); 606 594 } 607 595 re_free (dfa->state_table); 608 #ifdef RE_ENABLE_I18N609 596 if (dfa->sb_char != utf8_sb_map) 610 597 re_free (dfa->sb_char); 611 #endif612 598 re_free (dfa->subexp_map); 613 599 #ifdef DEBUG … … 622 608 623 609 void 624 regfree (preg) 625 regex_t *preg; 626 { 627 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 628 if (BE (dfa != NULL, 1)) 629 free_dfa_content (dfa); 610 regfree (regex_t *preg) 611 { 612 re_dfa_t *dfa = preg->buffer; 613 if (__glibc_likely (dfa != NULL)) 614 { 615 lock_fini (dfa->lock); 616 free_dfa_content (dfa); 617 } 630 618 preg->buffer = NULL; 631 619 preg->allocated = 0; … … 637 625 preg->translate = NULL; 638 626 } 639 #ifdef _LIBC 627 libc_hidden_def (__regfree) 640 628 weak_alias (__regfree, regfree) 641 #endif642 629 643 630 … … 657 644 weak_function 658 645 # endif 659 re_comp (s) 660 const char *s; 646 re_comp (const char *s) 661 647 { 662 648 reg_errcode_t ret; … … 681 667 if (re_comp_buf.fastmap == NULL) 682 668 { 683 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);669 re_comp_buf.fastmap = re_malloc (char, SBC_MAX); 684 670 if (re_comp_buf.fastmap == NULL) 685 671 return (char *) gettext (__re_error_msgid … … 687 673 } 688 674 689 /* Since `re_exec' always passes NULL for the `regs' argument, we675 /* Since 're_exec' always passes NULL for the 'regs' argument, we 690 676 don't need to initialize the pattern buffer fields which affect it. */ 691 677 … … 698 684 return NULL; 699 685 700 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */686 /* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */ 701 687 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 702 688 } … … 734 720 735 721 /* Initialize the dfa. */ 736 dfa = (re_dfa_t *)preg->buffer;737 if ( BE (preg->allocated < sizeof (re_dfa_t), 0))722 dfa = preg->buffer; 723 if (__glibc_unlikely (preg->allocated < sizeof (re_dfa_t))) 738 724 { 739 725 /* If zero allocated, but buffer is non-null, try to realloc … … 745 731 return REG_ESPACE; 746 732 preg->allocated = sizeof (re_dfa_t); 747 preg->buffer = (unsigned char *)dfa;733 preg->buffer = dfa; 748 734 } 749 735 preg->used = sizeof (re_dfa_t); 750 736 751 737 err = init_dfa (dfa, length); 752 if (BE (err != REG_NOERROR, 0)) 738 if (__glibc_unlikely (err == REG_NOERROR && lock_init (dfa->lock) != 0)) 739 err = REG_ESPACE; 740 if (__glibc_unlikely (err != REG_NOERROR)) 753 741 { 754 742 free_dfa_content (dfa); … … 763 751 #endif 764 752 765 __libc_lock_init (dfa->lock);766 767 753 err = re_string_construct (®exp, pattern, length, preg->translate, 768 syntax & RE_ICASE, dfa);769 if ( BE (err != REG_NOERROR, 0))754 (syntax & RE_ICASE) != 0, dfa); 755 if (__glibc_unlikely (err != REG_NOERROR)) 770 756 { 771 757 re_compile_internal_free_return: 772 758 free_workarea_compile (preg); 773 759 re_string_destruct (®exp); 760 lock_fini (dfa->lock); 774 761 free_dfa_content (dfa); 775 762 preg->buffer = NULL; … … 781 768 preg->re_nsub = 0; 782 769 dfa->str_tree = parse (®exp, preg, syntax, &err); 783 if ( BE (dfa->str_tree == NULL, 0))770 if (__glibc_unlikely (dfa->str_tree == NULL)) 784 771 goto re_compile_internal_free_return; 785 772 786 773 /* Analyze the tree and create the nfa. */ 787 774 err = analyze (preg); 788 if ( BE (err != REG_NOERROR, 0))775 if (__glibc_unlikely (err != REG_NOERROR)) 789 776 goto re_compile_internal_free_return; 790 777 791 #ifdef RE_ENABLE_I18N792 778 /* If possible, do searching in single byte encoding to speed things up. */ 793 779 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) 794 780 optimize_utf8 (dfa); 795 #endif796 781 797 782 /* Then create the initial state of the dfa. */ … … 802 787 re_string_destruct (®exp); 803 788 804 if (BE (err != REG_NOERROR, 0)) 805 { 789 if (__glibc_unlikely (err != REG_NOERROR)) 790 { 791 lock_fini (dfa->lock); 806 792 free_dfa_content (dfa); 807 793 preg->buffer = NULL; … … 818 804 init_dfa (re_dfa_t *dfa, size_t pat_len) 819 805 { 820 unsigned int table_size;806 __re_size_t table_size; 821 807 #ifndef _LIBC 822 c har *codeset_name;808 const char *codeset_name; 823 809 #endif 810 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); 811 size_t max_object_size = 812 MAX (sizeof (struct re_state_table_entry), 813 MAX (sizeof (re_token_t), 814 MAX (sizeof (re_node_set), 815 MAX (sizeof (regmatch_t), 816 max_i18n_object_size)))); 824 817 825 818 memset (dfa, '\0', sizeof (re_dfa_t)); … … 828 821 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; 829 822 830 /* Avoid overflows. */ 831 if (pat_len == SIZE_MAX) 823 /* Avoid overflows. The extra "/ 2" is for the table_size doubling 824 calculation below, and for similar doubling calculations 825 elsewhere. And it's <= rather than <, because some of the 826 doubling calculations add 1 afterwards. */ 827 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 828 <= pat_len)) 832 829 return REG_ESPACE; 833 830 … … 855 852 if (get_crt_codepage() == MY_CP_UTF8) 856 853 # else 857 # ifdef HAVE_LANGINFO_CODESET858 854 codeset_name = nl_langinfo (CODESET); 859 # else 860 codeset_name = getenv ("LC_ALL"); 861 if (codeset_name == NULL || codeset_name[0] == '\0') 862 codeset_name = getenv ("LC_CTYPE"); 863 if (codeset_name == NULL || codeset_name[0] == '\0') 864 codeset_name = getenv ("LANG"); 865 if (codeset_name == NULL) 866 codeset_name = ""; 867 else if (strchr (codeset_name, '.') != NULL) 868 codeset_name = strchr (codeset_name, '.') + 1; 869 # endif 870 871 if (strcasecmp (codeset_name, "UTF-8") == 0 872 || strcasecmp (codeset_name, "UTF8") == 0) 855 if ((codeset_name[0] == 'U' || codeset_name[0] == 'u') 856 && (codeset_name[1] == 'T' || codeset_name[1] == 't') 857 && (codeset_name[2] == 'F' || codeset_name[2] == 'f') 858 && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0) 873 859 # endif 874 860 dfa->is_utf8 = 1; … … 879 865 #endif 880 866 881 #ifdef RE_ENABLE_I18N882 867 if (dfa->mb_cur_max > 1) 883 868 { … … 889 874 890 875 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 891 if ( BE (dfa->sb_char == NULL, 0))876 if (__glibc_unlikely (dfa->sb_char == NULL)) 892 877 return REG_ESPACE; 893 878 … … 899 884 if (wch != WEOF) 900 885 dfa->sb_char[i] |= (bitset_word_t) 1 << j; 901 # 886 #ifndef _LIBC 902 887 if (isascii (ch) && wch != ch) 903 888 dfa->map_notascii = 1; 904 # 889 #endif 905 890 } 906 891 } 907 892 } 908 #endif 909 910 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) 893 894 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) 911 895 return REG_ESPACE; 912 896 return REG_NOERROR; … … 918 902 919 903 static void 920 internal_function921 904 init_word_char (re_dfa_t *dfa) 922 905 { 923 int i, j, ch; 906 int i = 0; 907 int j; 908 int ch = 0; 924 909 dfa->word_ops_used = 1; 925 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 910 if (__glibc_likely (dfa->map_notascii == 0)) 911 { 912 bitset_word_t bits0 = 0x00000000; 913 bitset_word_t bits1 = 0x03ff0000; 914 bitset_word_t bits2 = 0x87fffffe; 915 bitset_word_t bits3 = 0x07fffffe; 916 if (BITSET_WORD_BITS == 64) 917 { 918 /* Pacify gcc -Woverflow on 32-bit platformns. */ 919 dfa->word_char[0] = bits1 << 31 << 1 | bits0; 920 dfa->word_char[1] = bits3 << 31 << 1 | bits2; 921 i = 2; 922 } 923 else if (BITSET_WORD_BITS == 32) 924 { 925 dfa->word_char[0] = bits0; 926 dfa->word_char[1] = bits1; 927 dfa->word_char[2] = bits2; 928 dfa->word_char[3] = bits3; 929 i = 4; 930 } 931 else 932 goto general_case; 933 ch = 128; 934 935 if (__glibc_likely (dfa->is_utf8)) 936 { 937 memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); 938 return; 939 } 940 } 941 942 general_case: 943 for (; i < BITSET_WORDS; ++i) 926 944 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 927 945 if (isalnum (ch) || ch == '_') … … 934 952 free_workarea_compile (regex_t *preg) 935 953 { 936 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;954 re_dfa_t *dfa = preg->buffer; 937 955 bin_tree_storage_t *storage, *next; 938 956 for (storage = dfa->str_tree_storage; storage; storage = next) … … 953 971 create_initial_state (re_dfa_t *dfa) 954 972 { 955 intfirst, i;973 Idx first, i; 956 974 reg_errcode_t err; 957 975 re_node_set init_nodes; … … 962 980 dfa->init_node = first; 963 981 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); 964 if ( BE (err != REG_NOERROR, 0))982 if (__glibc_unlikely (err != REG_NOERROR)) 965 983 return err; 966 984 … … 972 990 for (i = 0; i < init_nodes.nelem; ++i) 973 991 { 974 intnode_idx = init_nodes.elems[i];992 Idx node_idx = init_nodes.elems[i]; 975 993 re_token_type_t type = dfa->nodes[node_idx].type; 976 994 977 intclexp_idx;995 Idx clexp_idx; 978 996 if (type != OP_BACK_REF) 979 997 continue; … … 991 1009 if (type == OP_BACK_REF) 992 1010 { 993 intdest_idx = dfa->edests[node_idx].elems[0];1011 Idx dest_idx = dfa->edests[node_idx].elems[0]; 994 1012 if (!re_node_set_contains (&init_nodes, dest_idx)) 995 1013 { 996 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); 1014 reg_errcode_t merge_err 1015 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); 1016 if (merge_err != REG_NOERROR) 1017 return merge_err; 997 1018 i = 0; 998 1019 } … … 1003 1024 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); 1004 1025 /* We don't check ERR here, since the initial state must not be NULL. */ 1005 if ( BE (dfa->init_state == NULL, 0))1026 if (__glibc_unlikely (dfa->init_state == NULL)) 1006 1027 return err; 1007 1028 if (dfa->init_state->has_constraint) … … 1015 1036 CONTEXT_NEWLINE 1016 1037 | CONTEXT_BEGBUF); 1017 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL 1018 || dfa->init_state_begbuf == NULL, 0)) 1038 if (__glibc_unlikely (dfa->init_state_word == NULL 1039 || dfa->init_state_nl == NULL 1040 || dfa->init_state_begbuf == NULL)) 1019 1041 return err; 1020 1042 } … … 1028 1050 1029 1051 1030 #ifdef RE_ENABLE_I18N1031 1052 /* If it is possible to do searching in single byte encoding instead of UTF-8 1032 1053 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change … … 1036 1057 optimize_utf8 (re_dfa_t *dfa) 1037 1058 { 1038 int node, i, mb_chars = 0, has_period = 0; 1059 Idx node; 1060 int i; 1061 bool mb_chars = false; 1062 bool has_period = false; 1039 1063 1040 1064 for (node = 0; node < dfa->nodes_len; ++node) … … 1042 1066 { 1043 1067 case CHARACTER: 1044 if (dfa->nodes[node].opr.c >= 0x80)1045 mb_chars = 1;1068 if (dfa->nodes[node].opr.c >= ASCII_CHARS) 1069 mb_chars = true; 1046 1070 break; 1047 1071 case ANCHOR: 1048 switch (dfa->nodes[node].opr. idx)1072 switch (dfa->nodes[node].opr.ctx_type) 1049 1073 { 1050 1074 case LINE_FIRST: … … 1054 1078 break; 1055 1079 default: 1056 /* Word anchors etc. cannot be handled. */ 1080 /* Word anchors etc. cannot be handled. It's okay to test 1081 opr.ctx_type since constraints (for all DFA nodes) are 1082 created by ORing one or more opr.ctx_type values. */ 1057 1083 return; 1058 1084 } 1059 1085 break; 1060 1086 case OP_PERIOD: 1061 has_period = 1;1062 1087 has_period = true; 1088 break; 1063 1089 case OP_BACK_REF: 1064 1090 case OP_ALT: … … 1071 1097 return; 1072 1098 case SIMPLE_BRACKET: 1073 /* Just double check. The non-ASCII range starts at 0x80. */ 1074 assert (0x80 % BITSET_WORD_BITS == 0); 1075 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) 1076 if (dfa->nodes[node].opr.sbcset[i]) 1077 return; 1099 /* Just double check. */ 1100 { 1101 int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0 1102 ? 0 1103 : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS); 1104 for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) 1105 { 1106 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0) 1107 return; 1108 rshift = 0; 1109 } 1110 } 1078 1111 break; 1079 1112 default: … … 1085 1118 { 1086 1119 if (dfa->nodes[node].type == CHARACTER 1087 && dfa->nodes[node].opr.c >= 0x80)1120 && dfa->nodes[node].opr.c >= ASCII_CHARS) 1088 1121 dfa->nodes[node].mb_partial = 0; 1089 1122 else if (dfa->nodes[node].type == OP_PERIOD) … … 1096 1129 dfa->has_mb_node = dfa->nbackref > 0 || has_period; 1097 1130 } 1098 #endif1099 1131 1100 1132 … … 1105 1137 analyze (regex_t *preg) 1106 1138 { 1107 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;1139 re_dfa_t *dfa = preg->buffer; 1108 1140 reg_errcode_t ret; 1109 1141 1110 1142 /* Allocate arrays. */ 1111 dfa->nexts = re_malloc ( int, dfa->nodes_alloc);1112 dfa->org_indices = re_malloc ( int, dfa->nodes_alloc);1143 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc); 1144 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); 1113 1145 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); 1114 1146 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); 1115 if ( BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL1116 || dfa->eclosures == NULL, 0))1147 if (__glibc_unlikely (dfa->nexts == NULL || dfa->org_indices == NULL 1148 || dfa->edests == NULL || dfa->eclosures == NULL)) 1117 1149 return REG_ESPACE; 1118 1150 1119 dfa->subexp_map = re_malloc ( int, preg->re_nsub);1151 dfa->subexp_map = re_malloc (Idx, preg->re_nsub); 1120 1152 if (dfa->subexp_map != NULL) 1121 1153 { 1122 inti;1154 Idx i; 1123 1155 for (i = 0; i < preg->re_nsub; i++) 1124 1156 dfa->subexp_map[i] = i; … … 1129 1161 if (i == preg->re_nsub) 1130 1162 { 1131 free (dfa->subexp_map);1163 re_free (dfa->subexp_map); 1132 1164 dfa->subexp_map = NULL; 1133 1165 } … … 1135 1167 1136 1168 ret = postorder (dfa->str_tree, lower_subexps, preg); 1137 if ( BE (ret != REG_NOERROR, 0))1169 if (__glibc_unlikely (ret != REG_NOERROR)) 1138 1170 return ret; 1139 1171 ret = postorder (dfa->str_tree, calc_first, dfa); 1140 if ( BE (ret != REG_NOERROR, 0))1172 if (__glibc_unlikely (ret != REG_NOERROR)) 1141 1173 return ret; 1142 1174 preorder (dfa->str_tree, calc_next, dfa); 1143 1175 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); 1144 if ( BE (ret != REG_NOERROR, 0))1176 if (__glibc_unlikely (ret != REG_NOERROR)) 1145 1177 return ret; 1146 1178 ret = calc_eclosure (dfa); 1147 if ( BE (ret != REG_NOERROR, 0))1179 if (__glibc_unlikely (ret != REG_NOERROR)) 1148 1180 return ret; 1149 1181 … … 1154 1186 { 1155 1187 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); 1156 if ( BE (dfa->inveclosures == NULL, 0))1157 1188 if (__glibc_unlikely (dfa->inveclosures == NULL)) 1189 return REG_ESPACE; 1158 1190 ret = calc_inveclosure (dfa); 1159 1191 } … … 1177 1209 while (node->left || node->right) 1178 1210 if (node->left) 1179 1180 1181 1211 node = node->left; 1212 else 1213 node = node->right; 1182 1214 1183 1215 do 1184 1216 { 1185 1217 reg_errcode_t err = fn (extra, node); 1186 if ( BE (err != REG_NOERROR, 0))1218 if (__glibc_unlikely (err != REG_NOERROR)) 1187 1219 return err; 1188 1220 if (node->parent == NULL) 1189 1221 return REG_NOERROR; 1190 1222 prev = node; … … 1206 1238 { 1207 1239 reg_errcode_t err = fn (extra, node); 1208 if ( BE (err != REG_NOERROR, 0))1240 if (__glibc_unlikely (err != REG_NOERROR)) 1209 1241 return err; 1210 1242 … … 1220 1252 node = node->parent; 1221 1253 if (!node) 1222 1254 return REG_NOERROR; 1223 1255 } 1224 1256 node = node->right; … … 1243 1275 1244 1276 else if (node->token.type == SUBEXP 1245 1246 { 1247 intother_idx = node->left->token.opr.idx;1277 && node->left && node->left->token.type == SUBEXP) 1278 { 1279 Idx other_idx = node->left->token.opr.idx; 1248 1280 1249 1281 node->left = node->left->left; 1250 1282 if (node->left) 1251 1283 node->left->parent = node; 1252 1284 1253 1285 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; 1254 1286 if (other_idx < BITSET_WORD_BITS) 1255 1287 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); 1256 1288 } 1257 1289 … … 1286 1318 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) 1287 1319 { 1288 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;1320 re_dfa_t *dfa = preg->buffer; 1289 1321 bin_tree_t *body = node->left; 1290 1322 bin_tree_t *op, *cls, *tree1, *tree; … … 1307 1339 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; 1308 1340 tree = create_tree (dfa, op, tree1, CONCAT); 1309 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) 1341 if (__glibc_unlikely (tree == NULL || tree1 == NULL 1342 || op == NULL || cls == NULL)) 1310 1343 { 1311 1344 *err = REG_ESPACE; … … 1333 1366 node->first = node; 1334 1367 node->node_idx = re_dfa_add_node (dfa, node->token); 1335 if (BE (node->node_idx == -1, 0)) 1336 return REG_ESPACE; 1368 if (__glibc_unlikely (node->node_idx == -1)) 1369 return REG_ESPACE; 1370 if (node->token.type == ANCHOR) 1371 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; 1337 1372 } 1338 1373 return REG_NOERROR; … … 1356 1391 node->left->next = node->next; 1357 1392 if (node->right) 1358 1393 node->right->next = node->next; 1359 1394 break; 1360 1395 } … … 1367 1402 { 1368 1403 re_dfa_t *dfa = (re_dfa_t *) extra; 1369 intidx = node->node_idx;1404 Idx idx = node->node_idx; 1370 1405 reg_errcode_t err = REG_NOERROR; 1371 1406 … … 1376 1411 1377 1412 case END_OF_RE: 1378 assert(node->next == NULL);1413 DEBUG_ASSERT (node->next == NULL); 1379 1414 break; 1380 1415 … … 1382 1417 case OP_ALT: 1383 1418 { 1384 intleft, right;1419 Idx left, right; 1385 1420 dfa->has_plural_match = 1; 1386 1421 if (node->left != NULL) … … 1392 1427 else 1393 1428 right = node->next->node_idx; 1394 assert(left > -1);1395 assert(right > -1);1429 DEBUG_ASSERT (left > -1); 1430 DEBUG_ASSERT (right > -1); 1396 1431 err = re_node_set_init_2 (dfa->edests + idx, left, right); 1397 1432 } … … 1407 1442 dfa->nexts[idx] = node->next->node_idx; 1408 1443 if (node->token.type == OP_BACK_REF) 1409 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);1444 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); 1410 1445 break; 1411 1446 1412 1447 default: 1413 assert(!IS_EPSILON_NODE (node->token.type));1448 DEBUG_ASSERT (!IS_EPSILON_NODE (node->token.type)); 1414 1449 dfa->nexts[idx] = node->next->node_idx; 1415 1450 break; … … 1424 1459 1425 1460 static reg_errcode_t 1426 internal_function 1427 duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, 1428 int root_node, unsigned int init_constraint) 1429 { 1430 int org_node, clone_node, ret;1461 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, 1462 Idx root_node, unsigned int init_constraint) 1463 { 1464 Idx org_node, clone_node; 1465 bool ok; 1431 1466 unsigned int constraint = init_constraint; 1432 1467 for (org_node = top_org_node, clone_node = top_clone_node;;) 1433 1468 { 1434 intorg_dest, clone_dest;1469 Idx org_dest, clone_dest; 1435 1470 if (dfa->nodes[org_node].type == OP_BACK_REF) 1436 1471 { … … 1442 1477 re_node_set_empty (dfa->edests + clone_node); 1443 1478 clone_dest = duplicate_node (dfa, org_dest, constraint); 1444 if ( BE (clone_dest == -1, 0))1479 if (__glibc_unlikely (clone_dest == -1)) 1445 1480 return REG_ESPACE; 1446 1481 dfa->nexts[clone_node] = dfa->nexts[org_node]; 1447 ret= re_node_set_insert (dfa->edests + clone_node, clone_dest);1448 if ( BE (ret < 0, 0))1482 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1483 if (__glibc_unlikely (! ok)) 1449 1484 return REG_ESPACE; 1450 1485 } … … 1463 1498 org_dest = dfa->edests[org_node].elems[0]; 1464 1499 re_node_set_empty (dfa->edests + clone_node); 1465 if (dfa->nodes[org_node].type == ANCHOR) 1500 /* If the node is root_node itself, it means the epsilon closure 1501 has a loop. Then tie it to the destination of the root_node. */ 1502 if (org_node == root_node && clone_node != org_node) 1466 1503 { 1467 /* In case of the node has another constraint, append it. */ 1468 if (org_node == root_node && clone_node != org_node) 1469 { 1470 /* ...but if the node is root_node itself, it means the 1471 epsilon closure have a loop, then tie it to the 1472 destination of the root_node. */ 1473 ret = re_node_set_insert (dfa->edests + clone_node, 1474 org_dest); 1475 if (BE (ret < 0, 0)) 1476 return REG_ESPACE; 1477 break; 1478 } 1479 constraint |= dfa->nodes[org_node].opr.ctx_type; 1504 ok = re_node_set_insert (dfa->edests + clone_node, org_dest); 1505 if (__glibc_unlikely (! ok)) 1506 return REG_ESPACE; 1507 break; 1480 1508 } 1509 /* In case the node has another constraint, append it. */ 1510 constraint |= dfa->nodes[org_node].constraint; 1481 1511 clone_dest = duplicate_node (dfa, org_dest, constraint); 1482 if ( BE (clone_dest == -1, 0))1512 if (__glibc_unlikely (clone_dest == -1)) 1483 1513 return REG_ESPACE; 1484 ret= re_node_set_insert (dfa->edests + clone_node, clone_dest);1485 if ( BE (ret < 0, 0))1514 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1515 if (__glibc_unlikely (! ok)) 1486 1516 return REG_ESPACE; 1487 1517 } … … 1496 1526 if (clone_dest == -1) 1497 1527 { 1498 /* There are no such aduplicated node, create a new one. */1528 /* There is no such duplicated node, create a new one. */ 1499 1529 reg_errcode_t err; 1500 1530 clone_dest = duplicate_node (dfa, org_dest, constraint); 1501 if ( BE (clone_dest == -1, 0))1531 if (__glibc_unlikely (clone_dest == -1)) 1502 1532 return REG_ESPACE; 1503 ret= re_node_set_insert (dfa->edests + clone_node, clone_dest);1504 if ( BE (ret < 0, 0))1533 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1534 if (__glibc_unlikely (! ok)) 1505 1535 return REG_ESPACE; 1506 1536 err = duplicate_node_closure (dfa, org_dest, clone_dest, 1507 1537 root_node, constraint); 1508 if ( BE (err != REG_NOERROR, 0))1538 if (__glibc_unlikely (err != REG_NOERROR)) 1509 1539 return err; 1510 1540 } 1511 1541 else 1512 1542 { 1513 /* There are a duplicated node which satisfythe constraint,1543 /* There is a duplicated node which satisfies the constraint, 1514 1544 use it to avoid infinite loop. */ 1515 ret= re_node_set_insert (dfa->edests + clone_node, clone_dest);1516 if ( BE (ret < 0, 0))1545 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1546 if (__glibc_unlikely (! ok)) 1517 1547 return REG_ESPACE; 1518 1548 } … … 1520 1550 org_dest = dfa->edests[org_node].elems[1]; 1521 1551 clone_dest = duplicate_node (dfa, org_dest, constraint); 1522 if ( BE (clone_dest == -1, 0))1552 if (__glibc_unlikely (clone_dest == -1)) 1523 1553 return REG_ESPACE; 1524 ret= re_node_set_insert (dfa->edests + clone_node, clone_dest);1525 if ( BE (ret < 0, 0))1554 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1555 if (__glibc_unlikely (! ok)) 1526 1556 return REG_ESPACE; 1527 1557 } … … 1535 1565 satisfies the constraint CONSTRAINT. */ 1536 1566 1537 static int1538 search_duplicated_node (const re_dfa_t *dfa, intorg_node,1567 static Idx 1568 search_duplicated_node (const re_dfa_t *dfa, Idx org_node, 1539 1569 unsigned int constraint) 1540 1570 { 1541 intidx;1571 Idx idx; 1542 1572 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) 1543 1573 { … … 1553 1583 available. */ 1554 1584 1555 static int1556 duplicate_node (re_dfa_t *dfa, intorg_idx, unsigned int constraint)1557 { 1558 intdup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);1559 if ( BE (dup_idx != -1,1))1585 static Idx 1586 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) 1587 { 1588 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1589 if (__glibc_likely (dup_idx != -1)) 1560 1590 { 1561 1591 dfa->nodes[dup_idx].constraint = constraint; 1562 if (dfa->nodes[org_idx].type == ANCHOR) 1563 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; 1592 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; 1564 1593 dfa->nodes[dup_idx].duplicated = 1; 1565 1594 … … 1573 1602 calc_inveclosure (re_dfa_t *dfa) 1574 1603 { 1575 int src, idx, ret; 1604 Idx src, idx; 1605 bool ok; 1576 1606 for (idx = 0; idx < dfa->nodes_len; ++idx) 1577 1607 re_node_set_init_empty (dfa->inveclosures + idx); … … 1579 1609 for (src = 0; src < dfa->nodes_len; ++src) 1580 1610 { 1581 int*elems = dfa->eclosures[src].elems;1611 Idx *elems = dfa->eclosures[src].elems; 1582 1612 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) 1583 1613 { 1584 ret= re_node_set_insert_last (dfa->inveclosures + elems[idx], src);1585 if ( BE (ret == -1, 0))1614 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); 1615 if (__glibc_unlikely (! ok)) 1586 1616 return REG_ESPACE; 1587 1617 } … … 1596 1626 calc_eclosure (re_dfa_t *dfa) 1597 1627 { 1598 int node_idx, incomplete; 1599 #ifdef DEBUG 1600 assert (dfa->nodes_len > 0); 1601 #endif 1602 incomplete = 0; 1628 Idx node_idx; 1629 bool incomplete; 1630 DEBUG_ASSERT (dfa->nodes_len > 0); 1631 incomplete = false; 1603 1632 /* For each nodes, calculate epsilon closure. */ 1604 1633 for (node_idx = 0; ; ++node_idx) … … 1610 1639 if (!incomplete) 1611 1640 break; 1612 incomplete = 0;1641 incomplete = false; 1613 1642 node_idx = 0; 1614 1643 } 1615 1644 1616 #ifdef DEBUG 1617 assert (dfa->eclosures[node_idx].nelem != -1); 1618 #endif 1645 DEBUG_ASSERT (dfa->eclosures[node_idx].nelem != -1); 1619 1646 1620 1647 /* If we have already calculated, skip it. */ 1621 1648 if (dfa->eclosures[node_idx].nelem != 0) 1622 1649 continue; 1623 /* Calculate epsilon closure of `node_idx'. */1624 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);1625 if ( BE (err != REG_NOERROR, 0))1650 /* Calculate epsilon closure of 'node_idx'. */ 1651 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); 1652 if (__glibc_unlikely (err != REG_NOERROR)) 1626 1653 return err; 1627 1654 1628 1655 if (dfa->eclosures[node_idx].nelem == 0) 1629 1656 { 1630 incomplete = 1;1657 incomplete = true; 1631 1658 re_node_set_free (&eclosure_elem); 1632 1659 } … … 1638 1665 1639 1666 static reg_errcode_t 1640 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, introot)1667 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) 1641 1668 { 1642 1669 reg_errcode_t err; 1643 unsigned int constraint; 1644 int i, incomplete; 1670 Idx i; 1645 1671 re_node_set eclosure; 1646 incomplete = 0;1672 bool incomplete = false; 1647 1673 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); 1648 if ( BE (err != REG_NOERROR, 0))1674 if (__glibc_unlikely (err != REG_NOERROR)) 1649 1675 return err; 1676 1677 /* An epsilon closure includes itself. */ 1678 eclosure.elems[eclosure.nelem++] = node; 1650 1679 1651 1680 /* This indicates that we are calculating this node now. … … 1653 1682 dfa->eclosures[node].nelem = -1; 1654 1683 1655 constraint = ((dfa->nodes[node].type == ANCHOR) 1656 ? dfa->nodes[node].opr.ctx_type : 0); 1657 /* If the current node has constraints, duplicate all nodes. 1658 Since they must inherit the constraints. */ 1659 if (constraint 1684 /* If the current node has constraints, duplicate all nodes 1685 since they must inherit the constraints. */ 1686 if (dfa->nodes[node].constraint 1660 1687 && dfa->edests[node].nelem 1661 1688 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) 1662 1689 { 1663 int org_node, cur_node; 1664 org_node = cur_node = node; 1665 err = duplicate_node_closure (dfa, node, node, node, constraint); 1666 if (BE (err != REG_NOERROR, 0)) 1690 err = duplicate_node_closure (dfa, node, node, node, 1691 dfa->nodes[node].constraint); 1692 if (__glibc_unlikely (err != REG_NOERROR)) 1667 1693 return err; 1668 1694 } … … 1673 1699 { 1674 1700 re_node_set eclosure_elem; 1675 intedest = dfa->edests[node].elems[i];1676 /* If calculating the epsilon closure of `edest' is in progress,1701 Idx edest = dfa->edests[node].elems[i]; 1702 /* If calculating the epsilon closure of 'edest' is in progress, 1677 1703 return intermediate result. */ 1678 1704 if (dfa->eclosures[edest].nelem == -1) 1679 1705 { 1680 incomplete = 1;1706 incomplete = true; 1681 1707 continue; 1682 1708 } 1683 /* If we haven't calculated the epsilon closure of `edest' yet,1709 /* If we haven't calculated the epsilon closure of 'edest' yet, 1684 1710 calculate now. Otherwise use calculated epsilon closure. */ 1685 1711 if (dfa->eclosures[edest].nelem == 0) 1686 1712 { 1687 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);1688 if ( BE (err != REG_NOERROR, 0))1713 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); 1714 if (__glibc_unlikely (err != REG_NOERROR)) 1689 1715 return err; 1690 1716 } 1691 1717 else 1692 1718 eclosure_elem = dfa->eclosures[edest]; 1693 /* Merge the epsilon closure of `edest'. */ 1694 re_node_set_merge (&eclosure, &eclosure_elem); 1695 /* If the epsilon closure of `edest' is incomplete, 1719 /* Merge the epsilon closure of 'edest'. */ 1720 err = re_node_set_merge (&eclosure, &eclosure_elem); 1721 if (__glibc_unlikely (err != REG_NOERROR)) 1722 return err; 1723 /* If the epsilon closure of 'edest' is incomplete, 1696 1724 the epsilon closure of this node is also incomplete. */ 1697 1725 if (dfa->eclosures[edest].nelem == 0) 1698 1726 { 1699 incomplete = 1;1727 incomplete = true; 1700 1728 re_node_set_free (&eclosure_elem); 1701 1729 } 1702 1730 } 1703 1731 1704 /* Epsilon closures include itself. */1705 re_node_set_insert (&eclosure, node);1706 1732 if (incomplete && !root) 1707 1733 dfa->eclosures[node].nelem = 0; … … 1719 1745 1720 1746 static void 1721 internal_function1722 1747 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) 1723 1748 { … … 1729 1754 1730 1755 static int 1731 internal_function1732 1756 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1733 1757 { … … 1744 1768 1745 1769 token->word_char = 0; 1746 #ifdef RE_ENABLE_I18N1747 1770 token->mb_partial = 0; 1748 if (input->mb_cur_max > 1 &&1749 !re_string_first_byte (input, re_string_cur_idx (input)))1771 if (input->mb_cur_max > 1 1772 && !re_string_first_byte (input, re_string_cur_idx (input))) 1750 1773 { 1751 1774 token->type = CHARACTER; … … 1753 1776 return 1; 1754 1777 } 1755 #endif1756 1778 if (c == '\\') 1757 1779 { … … 1766 1788 token->opr.c = c2; 1767 1789 token->type = CHARACTER; 1768 #ifdef RE_ENABLE_I18N1769 1790 if (input->mb_cur_max > 1) 1770 1791 { … … 1774 1795 } 1775 1796 else 1776 #endif1777 1797 token->word_char = IS_WORD_CHAR (c2) != 0; 1778 1798 … … 1880 1900 1881 1901 token->type = CHARACTER; 1882 #ifdef RE_ENABLE_I18N1883 1902 if (input->mb_cur_max > 1) 1884 1903 { … … 1887 1906 } 1888 1907 else 1889 #endif1890 1908 token->word_char = IS_WORD_CHAR (token->opr.c); 1891 1909 … … 1934 1952 break; 1935 1953 case '^': 1936 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&1937 re_string_cur_idx (input) != 0)1954 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) 1955 && re_string_cur_idx (input) != 0) 1938 1956 { 1939 1957 char prev = re_string_peek_byte (input, -1); … … 1945 1963 break; 1946 1964 case '$': 1947 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&1948 re_string_cur_idx (input) + 1 != re_string_length (input))1965 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) 1966 && re_string_cur_idx (input) + 1 != re_string_length (input)) 1949 1967 { 1950 1968 re_token_t next; … … 1968 1986 1969 1987 static int 1970 internal_function1971 1988 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1972 1989 { … … 1980 1997 token->opr.c = c; 1981 1998 1982 #ifdef RE_ENABLE_I18N 1983 if (input->mb_cur_max > 1 && 1984 !re_string_first_byte (input, re_string_cur_idx (input))) 1999 if (input->mb_cur_max > 1 2000 && !re_string_first_byte (input, re_string_cur_idx (input))) 1985 2001 { 1986 2002 token->type = CHARACTER; 1987 2003 return 1; 1988 2004 } 1989 #endif /* RE_ENABLE_I18N */1990 2005 1991 2006 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) … … 2015 2030 token->type = OP_OPEN_COLL_ELEM; 2016 2031 break; 2032 2017 2033 case '=': 2018 2034 token->type = OP_OPEN_EQUIV_CLASS; 2019 2035 break; 2036 2020 2037 case ':': 2021 2038 if (syntax & RE_CHAR_CLASSES) … … 2024 2041 break; 2025 2042 } 2026 /* else fall through. */2043 FALLTHROUGH; 2027 2044 default: 2028 2045 token->type = CHARACTER; … … 2035 2052 switch (c) 2036 2053 { 2037 case '-':2038 token->type = OP_CHARSET_RANGE;2039 break;2040 2054 case ']': 2041 2055 token->type = OP_CLOSE_BRACKET; … … 2044 2058 token->type = OP_NON_MATCH_LIST; 2045 2059 break; 2060 case '-': 2061 /* In V7 Unix grep and Unix awk and mawk, [...---...] 2062 (3 adjacent minus signs) stands for a single minus sign. 2063 Support that without breaking anything else. */ 2064 if (! (re_string_cur_idx (input) + 2 < re_string_length (input) 2065 && re_string_peek_byte (input, 1) == '-' 2066 && re_string_peek_byte (input, 2) == '-')) 2067 { 2068 token->type = OP_CHARSET_RANGE; 2069 break; 2070 } 2071 re_string_skip_bytes (input, 2); 2072 FALLTHROUGH; 2046 2073 default: 2047 2074 token->type = CHARACTER; … … 2055 2082 /* Entry point of the parser. 2056 2083 Parse the regular expression REGEXP and return the structure tree. 2057 If an error is occured, ERR is set by error code, and return NULL.2084 If an error occurs, ERR is set by error code, and return NULL. 2058 2085 This function build the following tree, from regular expression <reg_exp>: 2059 2086 CAT … … 2069 2096 reg_errcode_t *err) 2070 2097 { 2071 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;2098 re_dfa_t *dfa = preg->buffer; 2072 2099 bin_tree_t *tree, *eor, *root; 2073 2100 re_token_t current_token; … … 2075 2102 fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2076 2103 tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); 2077 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2104 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2078 2105 return NULL; 2079 2106 eor = create_tree (dfa, NULL, NULL, END_OF_RE); … … 2082 2109 else 2083 2110 root = eor; 2084 if ( BE (eor == NULL || root == NULL, 0))2111 if (__glibc_unlikely (eor == NULL || root == NULL)) 2085 2112 { 2086 2113 *err = REG_ESPACE; … … 2097 2124 <branch1> <branch2> 2098 2125 2099 ALT means alternative, which represents the operator `|'. */2126 ALT means alternative, which represents the operator '|'. */ 2100 2127 2101 2128 static bin_tree_t * 2102 2129 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2103 reg_syntax_t syntax, intnest, reg_errcode_t *err)2104 { 2105 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;2130 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2131 { 2132 re_dfa_t *dfa = preg->buffer; 2106 2133 bin_tree_t *tree, *branch = NULL; 2134 bitset_word_t initial_bkref_map = dfa->completed_bkref_map; 2107 2135 tree = parse_branch (regexp, preg, token, syntax, nest, err); 2108 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2136 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2109 2137 return NULL; 2110 2138 … … 2115 2143 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2116 2144 { 2145 bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map; 2146 dfa->completed_bkref_map = initial_bkref_map; 2117 2147 branch = parse_branch (regexp, preg, token, syntax, nest, err); 2118 if (BE (*err != REG_NOERROR && branch == NULL, 0)) 2119 return NULL; 2148 if (__glibc_unlikely (*err != REG_NOERROR && branch == NULL)) 2149 { 2150 if (tree != NULL) 2151 postorder (tree, free_tree, NULL); 2152 return NULL; 2153 } 2154 dfa->completed_bkref_map |= accumulated_bkref_map; 2120 2155 } 2121 2156 else 2122 2157 branch = NULL; 2123 2158 tree = create_tree (dfa, tree, branch, OP_ALT); 2124 if ( BE (tree == NULL, 0))2159 if (__glibc_unlikely (tree == NULL)) 2125 2160 { 2126 2161 *err = REG_ESPACE; … … 2142 2177 static bin_tree_t * 2143 2178 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, 2144 reg_syntax_t syntax, intnest, reg_errcode_t *err)2145 { 2146 bin_tree_t *tree, *exp ;2147 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;2179 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2180 { 2181 bin_tree_t *tree, *expr; 2182 re_dfa_t *dfa = preg->buffer; 2148 2183 tree = parse_expression (regexp, preg, token, syntax, nest, err); 2149 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2184 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2150 2185 return NULL; 2151 2186 … … 2153 2188 && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2154 2189 { 2155 exp = parse_expression (regexp, preg, token, syntax, nest, err); 2156 if (BE (*err != REG_NOERROR && exp == NULL, 0)) 2157 { 2190 expr = parse_expression (regexp, preg, token, syntax, nest, err); 2191 if (__glibc_unlikely (*err != REG_NOERROR && expr == NULL)) 2192 { 2193 if (tree != NULL) 2194 postorder (tree, free_tree, NULL); 2158 2195 return NULL; 2159 2196 } 2160 if (tree != NULL && exp != NULL)2161 { 2162 tree = create_tree (dfa, tree, exp, CONCAT);2163 if ( tree == NULL)2197 if (tree != NULL && expr != NULL) 2198 { 2199 bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT); 2200 if (newtree == NULL) 2164 2201 { 2202 postorder (expr, free_tree, NULL); 2203 postorder (tree, free_tree, NULL); 2165 2204 *err = REG_ESPACE; 2166 2205 return NULL; 2167 2206 } 2207 tree = newtree; 2168 2208 } 2169 2209 else if (tree == NULL) 2170 tree = exp ;2171 /* Otherwise exp == NULL, we don't need to create new tree. */2210 tree = expr; 2211 /* Otherwise expr == NULL, we don't need to create new tree. */ 2172 2212 } 2173 2213 return tree; … … 2182 2222 static bin_tree_t * 2183 2223 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, 2184 reg_syntax_t syntax, intnest, reg_errcode_t *err)2185 { 2186 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;2224 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2225 { 2226 re_dfa_t *dfa = preg->buffer; 2187 2227 bin_tree_t *tree; 2188 2228 switch (token->type) … … 2190 2230 case CHARACTER: 2191 2231 tree = create_token_tree (dfa, NULL, NULL, token); 2192 if ( BE (tree == NULL, 0))2232 if (__glibc_unlikely (tree == NULL)) 2193 2233 { 2194 2234 *err = REG_ESPACE; 2195 2235 return NULL; 2196 2236 } 2197 #ifdef RE_ENABLE_I18N2198 2237 if (dfa->mb_cur_max > 1) 2199 2238 { … … 2205 2244 mbc_remain = create_token_tree (dfa, NULL, NULL, token); 2206 2245 tree = create_tree (dfa, tree, mbc_remain, CONCAT); 2207 if ( BE (mbc_remain == NULL || tree == NULL, 0))2246 if (__glibc_unlikely (mbc_remain == NULL || tree == NULL)) 2208 2247 { 2209 2248 *err = REG_ESPACE; … … 2212 2251 } 2213 2252 } 2214 #endif2215 2253 break; 2254 2216 2255 case OP_OPEN_SUBEXP: 2217 2256 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); 2218 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2257 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2219 2258 return NULL; 2220 2259 break; 2260 2221 2261 case OP_OPEN_BRACKET: 2222 2262 tree = parse_bracket_exp (regexp, dfa, token, syntax, err); 2223 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2263 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2224 2264 return NULL; 2225 2265 break; 2266 2226 2267 case OP_BACK_REF: 2227 if (! BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))2268 if (!__glibc_likely (dfa->completed_bkref_map & (1 << token->opr.idx))) 2228 2269 { 2229 2270 *err = REG_ESUBREG; … … 2232 2273 dfa->used_bkref_map |= 1 << token->opr.idx; 2233 2274 tree = create_token_tree (dfa, NULL, NULL, token); 2234 if ( BE (tree == NULL, 0))2275 if (__glibc_unlikely (tree == NULL)) 2235 2276 { 2236 2277 *err = REG_ESPACE; … … 2240 2281 dfa->has_mb_node = 1; 2241 2282 break; 2283 2242 2284 case OP_OPEN_DUP_NUM: 2243 2285 if (syntax & RE_CONTEXT_INVALID_DUP) … … 2246 2288 return NULL; 2247 2289 } 2248 /* FALLTHROUGH */2290 FALLTHROUGH; 2249 2291 case OP_DUP_ASTERISK: 2250 2292 case OP_DUP_PLUS: … … 2260 2302 return parse_expression (regexp, preg, token, syntax, nest, err); 2261 2303 } 2262 /* else fall through */2304 FALLTHROUGH; 2263 2305 case OP_CLOSE_SUBEXP: 2264 if ((token->type == OP_CLOSE_SUBEXP) &&2265 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))2306 if ((token->type == OP_CLOSE_SUBEXP) 2307 && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) 2266 2308 { 2267 2309 *err = REG_ERPAREN; 2268 2310 return NULL; 2269 2311 } 2270 /* else fall through */2312 FALLTHROUGH; 2271 2313 case OP_CLOSE_DUP_NUM: 2272 2314 /* We treat it as a normal character. */ … … 2277 2319 by peek_token. */ 2278 2320 tree = create_token_tree (dfa, NULL, NULL, token); 2279 if ( BE (tree == NULL, 0))2321 if (__glibc_unlikely (tree == NULL)) 2280 2322 { 2281 2323 *err = REG_ESPACE; … … 2283 2325 } 2284 2326 break; 2327 2285 2328 case ANCHOR: 2286 2329 if ((token->opr.ctx_type … … 2289 2332 init_word_char (dfa); 2290 2333 if (token->opr.ctx_type == WORD_DELIM 2291 2334 || token->opr.ctx_type == NOT_WORD_DELIM) 2292 2335 { 2293 2336 bin_tree_t *tree_first, *tree_last; … … 2297 2340 tree_first = create_token_tree (dfa, NULL, NULL, token); 2298 2341 token->opr.ctx_type = WORD_LAST; 2299 2300 2301 2342 } 2343 else 2344 { 2302 2345 token->opr.ctx_type = INSIDE_WORD; 2303 2346 tree_first = create_token_tree (dfa, NULL, NULL, token); 2304 2347 token->opr.ctx_type = INSIDE_NOTWORD; 2305 2348 } 2306 2349 tree_last = create_token_tree (dfa, NULL, NULL, token); 2307 2350 tree = create_tree (dfa, tree_first, tree_last, OP_ALT); 2308 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) 2351 if (__glibc_unlikely (tree_first == NULL || tree_last == NULL 2352 || tree == NULL)) 2309 2353 { 2310 2354 *err = REG_ESPACE; … … 2315 2359 { 2316 2360 tree = create_token_tree (dfa, NULL, NULL, token); 2317 if ( BE (tree == NULL, 0))2361 if (__glibc_unlikely (tree == NULL)) 2318 2362 { 2319 2363 *err = REG_ESPACE; … … 2327 2371 fetch_token (token, regexp, syntax); 2328 2372 return tree; 2373 2329 2374 case OP_PERIOD: 2330 2375 tree = create_token_tree (dfa, NULL, NULL, token); 2331 if ( BE (tree == NULL, 0))2376 if (__glibc_unlikely (tree == NULL)) 2332 2377 { 2333 2378 *err = REG_ESPACE; … … 2337 2382 dfa->has_mb_node = 1; 2338 2383 break; 2384 2339 2385 case OP_WORD: 2340 2386 case OP_NOTWORD: 2341 2387 tree = build_charclass_op (dfa, regexp->trans, 2342 (const unsigned char *)"alnum",2343 (const unsigned char *)"_",2388 "alnum", 2389 "_", 2344 2390 token->type == OP_NOTWORD, err); 2345 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2391 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2346 2392 return NULL; 2347 2393 break; 2394 2348 2395 case OP_SPACE: 2349 2396 case OP_NOTSPACE: 2350 2397 tree = build_charclass_op (dfa, regexp->trans, 2351 (const unsigned char *)"space",2352 (const unsigned char *)"",2398 "space", 2399 "", 2353 2400 token->type == OP_NOTSPACE, err); 2354 if ( BE (*err != REG_NOERROR && tree == NULL, 0))2401 if (__glibc_unlikely (*err != REG_NOERROR && tree == NULL)) 2355 2402 return NULL; 2356 2403 break; 2404 2357 2405 case OP_ALT: 2358 2406 case END_OF_RE: 2359 2407 return NULL; 2408 2360 2409 case BACK_SLASH: 2361 2410 *err = REG_EESCAPE; 2362 2411 return NULL; 2412 2363 2413 default: 2364 2414 /* Must not happen? */ 2365 #ifdef DEBUG 2366 assert (0); 2367 #endif 2415 DEBUG_ASSERT (false); 2368 2416 return NULL; 2369 2417 } … … 2373 2421 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) 2374 2422 { 2375 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); 2376 if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2377 return NULL; 2423 bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, 2424 syntax, err); 2425 if (__glibc_unlikely (*err != REG_NOERROR && dup_tree == NULL)) 2426 { 2427 if (tree != NULL) 2428 postorder (tree, free_tree, NULL); 2429 return NULL; 2430 } 2431 tree = dup_tree; 2378 2432 /* In BRE consecutive duplications are not allowed. */ 2379 2433 if ((syntax & RE_CONTEXT_INVALID_DUP) … … 2381 2435 || token->type == OP_OPEN_DUP_NUM)) 2382 2436 { 2437 if (tree != NULL) 2438 postorder (tree, free_tree, NULL); 2383 2439 *err = REG_BADRPT; 2384 2440 return NULL; … … 2398 2454 static bin_tree_t * 2399 2455 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2400 reg_syntax_t syntax, intnest, reg_errcode_t *err)2401 { 2402 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;2456 reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2457 { 2458 re_dfa_t *dfa = preg->buffer; 2403 2459 bin_tree_t *tree; 2404 2460 size_t cur_nsub; … … 2413 2469 { 2414 2470 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); 2415 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) 2416 *err = REG_EPAREN; 2417 if (BE (*err != REG_NOERROR, 0)) 2471 if (__glibc_unlikely (*err == REG_NOERROR 2472 && token->type != OP_CLOSE_SUBEXP)) 2473 { 2474 if (tree != NULL) 2475 postorder (tree, free_tree, NULL); 2476 *err = REG_EPAREN; 2477 } 2478 if (__glibc_unlikely (*err != REG_NOERROR)) 2418 2479 return NULL; 2419 2480 } … … 2423 2484 2424 2485 tree = create_tree (dfa, tree, NULL, SUBEXP); 2425 if ( BE (tree == NULL, 0))2486 if (__glibc_unlikely (tree == NULL)) 2426 2487 { 2427 2488 *err = REG_ESPACE; … … 2439 2500 { 2440 2501 bin_tree_t *tree = NULL, *old_tree = NULL; 2441 inti, start, end, start_idx = re_string_cur_idx (regexp);2502 Idx i, start, end, start_idx = re_string_cur_idx (regexp); 2442 2503 re_token_t start_token = *token; 2443 2504 … … 2456 2517 } 2457 2518 } 2458 if ( BE (start != -2, 1))2519 if (__glibc_likely (start != -2)) 2459 2520 { 2460 2521 /* We treat "{n}" as "{n,n}". */ … … 2463 2524 ? fetch_number (regexp, token, syntax) : -2)); 2464 2525 } 2465 if ( BE (start == -2 || end == -2, 0))2526 if (__glibc_unlikely (start == -2 || end == -2)) 2466 2527 { 2467 2528 /* Invalid sequence. */ 2468 if ( BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))2529 if (__glibc_unlikely (!(syntax & RE_INVALID_INTERVAL_ORD))) 2469 2530 { 2470 2531 if (token->type == END_OF_RE) … … 2485 2546 } 2486 2547 2487 if (BE (end != -1 && start > end, 0)) 2548 if (__glibc_unlikely ((end != -1 && start > end) 2549 || token->type != OP_CLOSE_DUP_NUM)) 2488 2550 { 2489 2551 /* First number greater than second. */ … … 2491 2553 return NULL; 2492 2554 } 2555 2556 if (__glibc_unlikely (RE_DUP_MAX < (end == -1 ? start : end))) 2557 { 2558 *err = REG_ESIZE; 2559 return NULL; 2560 } 2493 2561 } 2494 2562 else … … 2500 2568 fetch_token (token, regexp, syntax); 2501 2569 2502 if ( BE (elem == NULL, 0))2570 if (__glibc_unlikely (elem == NULL)) 2503 2571 return NULL; 2504 if ( BE (start == 0 && end == 0,0))2572 if (__glibc_unlikely (start == 0 && end == 0)) 2505 2573 { 2506 2574 postorder (elem, free_tree, NULL); … … 2509 2577 2510 2578 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ 2511 if ( BE (start > 0,0))2579 if (__glibc_unlikely (start > 0)) 2512 2580 { 2513 2581 tree = elem; … … 2516 2584 elem = duplicate_tree (elem, dfa); 2517 2585 tree = create_tree (dfa, tree, elem, CONCAT); 2518 if ( BE (elem == NULL || tree == NULL, 0))2586 if (__glibc_unlikely (elem == NULL || tree == NULL)) 2519 2587 goto parse_dup_op_espace; 2520 2588 } … … 2525 2593 /* Duplicate ELEM before it is marked optional. */ 2526 2594 elem = duplicate_tree (elem, dfa); 2595 if (__glibc_unlikely (elem == NULL)) 2596 goto parse_dup_op_espace; 2527 2597 old_tree = tree; 2528 2598 } … … 2531 2601 2532 2602 if (elem->token.type == SUBEXP) 2533 postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); 2534 2535 tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); 2536 if (BE (tree == NULL, 0)) 2603 { 2604 uintptr_t subidx = elem->token.opr.idx; 2605 postorder (elem, mark_opt_subexp, (void *) subidx); 2606 } 2607 2608 tree = create_tree (dfa, elem, NULL, 2609 (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); 2610 if (__glibc_unlikely (tree == NULL)) 2537 2611 goto parse_dup_op_espace; 2538 2612 … … 2540 2614 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have 2541 2615 already created the start+1-th copy. */ 2542 for (i = start + 2; i <= end; ++i) 2543 { 2544 elem = duplicate_tree (elem, dfa); 2545 tree = create_tree (dfa, tree, elem, CONCAT); 2546 if (BE (elem == NULL || tree == NULL, 0)) 2547 goto parse_dup_op_espace; 2548 2549 tree = create_tree (dfa, tree, NULL, OP_ALT); 2550 if (BE (tree == NULL, 0)) 2551 goto parse_dup_op_espace; 2552 } 2616 if (TYPE_SIGNED (Idx) || end != -1) 2617 for (i = start + 2; i <= end; ++i) 2618 { 2619 elem = duplicate_tree (elem, dfa); 2620 tree = create_tree (dfa, tree, elem, CONCAT); 2621 if (__glibc_unlikely (elem == NULL || tree == NULL)) 2622 goto parse_dup_op_espace; 2623 2624 tree = create_tree (dfa, tree, NULL, OP_ALT); 2625 if (__glibc_unlikely (tree == NULL)) 2626 goto parse_dup_op_espace; 2627 } 2553 2628 2554 2629 if (old_tree) … … 2567 2642 2568 2643 #ifndef _LIBC 2569 /* Local function for parse_bracket_exp only used in case of NOT _LIBC. 2570 Build the range expression which starts from START_ELEM, and ends 2571 at END_ELEM. The result are written to MBCSET and SBCSET. 2572 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2573 mbcset->range_ends, is a pointer argument sinse we may 2574 update it. */ 2644 2645 /* Convert the byte B to the corresponding wide character. In a 2646 unibyte locale, treat B as itself. In a multibyte locale, return 2647 WEOF if B is an encoding error. */ 2648 static wint_t 2649 parse_byte (unsigned char b, re_dfa_t const *dfa) 2650 { 2651 return dfa->mb_cur_max > 1 ? __btowc (b) : b; 2652 } 2653 2654 /* Local function for parse_bracket_exp used in _LIBC environment. 2655 Build the range expression which starts from START_ELEM, and ends 2656 at END_ELEM. The result are written to MBCSET and SBCSET. 2657 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2658 mbcset->range_ends, is a pointer argument since we may 2659 update it. */ 2575 2660 2576 2661 static reg_errcode_t 2577 internal_function 2578 # ifdef RE_ENABLE_I18N 2579 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, 2580 bracket_elem_t *start_elem, bracket_elem_t *end_elem) 2581 # else /* not RE_ENABLE_I18N */ 2582 build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, 2583 bracket_elem_t *end_elem) 2584 # endif /* not RE_ENABLE_I18N */ 2585 { 2586 unsigned int start_ch, end_ch; 2662 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, 2663 bracket_elem_t *start_elem, bracket_elem_t *end_elem, 2664 re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules, 2665 const unsigned char *collseqmb, const char *collseqwc, 2666 int_fast32_t table_size, const void *symb_table, 2667 const unsigned char *extra) 2668 { 2587 2669 /* Equivalence Classes and Character Classes can't be a range start/end. */ 2588 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2589 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2590 0)) 2670 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS 2671 || start_elem->type == CHAR_CLASS 2672 || end_elem->type == EQUIV_CLASS 2673 || end_elem->type == CHAR_CLASS)) 2591 2674 return REG_ERANGE; 2592 2675 2593 2676 /* We can handle no multi character collating elements without libc 2594 2677 support. */ 2595 if ( BE((start_elem->type == COLL_SYM2596 2597 2598 && strlen ((char *) end_elem->opr.name) > 1), 0))2678 if (__glibc_unlikely ((start_elem->type == COLL_SYM 2679 && strlen ((char *) start_elem->opr.name) > 1) 2680 || (end_elem->type == COLL_SYM 2681 && strlen ((char *) end_elem->opr.name) > 1))) 2599 2682 return REG_ECOLLATE; 2600 2683 2601 # ifdef RE_ENABLE_I18N 2602 { 2603 wchar_t wc; 2604 wint_t start_wc; 2605 wint_t end_wc; 2606 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; 2607 2684 unsigned int 2608 2685 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch 2609 2686 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2610 : 0)) ;2687 : 0)), 2611 2688 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch 2612 2689 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2613 2690 : 0)); 2691 wint_t 2614 2692 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) 2615 ? __btowc (start_ch) : start_elem->opr.wch);2693 ? parse_byte (start_ch, dfa) : start_elem->opr.wch), 2616 2694 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) 2617 ? __btowc (end_ch) : end_elem->opr.wch); 2618 if (start_wc == WEOF || end_wc == WEOF) 2619 return REG_ECOLLATE; 2620 cmp_buf[0] = start_wc; 2621 cmp_buf[4] = end_wc; 2622 if (wcscoll (cmp_buf, cmp_buf + 4) > 0) 2623 return REG_ERANGE; 2624 2625 /* Got valid collation sequence values, add them as a new entry. 2626 However, for !_LIBC we have no collation elements: if the 2627 character set is single byte, the single byte character set 2628 that we build below suffices. parse_bracket_exp passes 2629 no MBCSET if dfa->mb_cur_max == 1. */ 2630 if (mbcset) 2631 { 2632 /* Check the space of the arrays. */ 2633 if (BE (*range_alloc == mbcset->nranges, 0)) 2634 { 2635 /* There is not enough space, need realloc. */ 2636 wchar_t *new_array_start, *new_array_end; 2637 int new_nranges; 2638 2639 /* +1 in case of mbcset->nranges is 0. */ 2640 new_nranges = 2 * mbcset->nranges + 1; 2641 /* Use realloc since mbcset->range_starts and mbcset->range_ends 2642 are NULL if *range_alloc == 0. */ 2643 new_array_start = re_realloc (mbcset->range_starts, wchar_t, 2644 new_nranges); 2645 new_array_end = re_realloc (mbcset->range_ends, wchar_t, 2646 new_nranges); 2647 2648 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2649 return REG_ESPACE; 2650 2651 mbcset->range_starts = new_array_start; 2652 mbcset->range_ends = new_array_end; 2653 *range_alloc = new_nranges; 2654 } 2655 2656 mbcset->range_starts[mbcset->nranges] = start_wc; 2657 mbcset->range_ends[mbcset->nranges++] = end_wc; 2658 } 2659 2660 /* Build the table for single byte characters. */ 2661 for (wc = 0; wc < SBC_MAX; ++wc) 2662 { 2663 cmp_buf[2] = wc; 2664 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 2665 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 2666 bitset_set (sbcset, wc); 2667 } 2668 } 2669 # else /* not RE_ENABLE_I18N */ 2670 { 2671 unsigned int ch; 2672 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch 2673 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2674 : 0)); 2675 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch 2676 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2677 : 0)); 2678 if (start_ch > end_ch) 2679 return REG_ERANGE; 2680 /* Build the table for single byte characters. */ 2681 for (ch = 0; ch < SBC_MAX; ++ch) 2682 if (start_ch <= ch && ch <= end_ch) 2683 bitset_set (sbcset, ch); 2684 } 2685 # endif /* not RE_ENABLE_I18N */ 2695 ? parse_byte (end_ch, dfa) : end_elem->opr.wch); 2696 2697 if (start_wc == WEOF || end_wc == WEOF) 2698 return REG_ECOLLATE; 2699 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) 2700 && start_wc > end_wc)) 2701 return REG_ERANGE; 2702 2703 /* Got valid collation sequence values, add them as a new entry. 2704 However, for !_LIBC we have no collation elements: if the 2705 character set is single byte, the single byte character set 2706 that we build below suffices. parse_bracket_exp passes 2707 no MBCSET if dfa->mb_cur_max == 1. */ 2708 if (dfa->mb_cur_max > 1) 2709 { 2710 /* Check the space of the arrays. */ 2711 if (__glibc_unlikely (*range_alloc == mbcset->nranges)) 2712 { 2713 /* There is not enough space, need realloc. */ 2714 wchar_t *new_array_start, *new_array_end; 2715 Idx new_nranges; 2716 2717 /* +1 in case of mbcset->nranges is 0. */ 2718 new_nranges = 2 * mbcset->nranges + 1; 2719 /* Use realloc since mbcset->range_starts and mbcset->range_ends 2720 are NULL if *range_alloc == 0. */ 2721 new_array_start = re_realloc (mbcset->range_starts, wchar_t, 2722 new_nranges); 2723 new_array_end = re_realloc (mbcset->range_ends, wchar_t, 2724 new_nranges); 2725 2726 if (__glibc_unlikely (new_array_start == NULL 2727 || new_array_end == NULL)) 2728 { 2729 re_free (new_array_start); 2730 re_free (new_array_end); 2731 return REG_ESPACE; 2732 } 2733 2734 mbcset->range_starts = new_array_start; 2735 mbcset->range_ends = new_array_end; 2736 *range_alloc = new_nranges; 2737 } 2738 2739 mbcset->range_starts[mbcset->nranges] = start_wc; 2740 mbcset->range_ends[mbcset->nranges++] = end_wc; 2741 } 2742 2743 /* Build the table for single byte characters. */ 2744 for (wchar_t wc = 0; wc < SBC_MAX; ++wc) 2745 { 2746 if (start_wc <= wc && wc <= end_wc) 2747 bitset_set (sbcset, wc); 2748 } 2749 2686 2750 return REG_NOERROR; 2687 2751 } … … 2689 2753 2690 2754 #ifndef _LIBC 2691 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC. .2755 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC. 2692 2756 Build the collating element which is represented by NAME. 2693 2757 The result are written to MBCSET and SBCSET. … … 2696 2760 2697 2761 static reg_errcode_t 2698 internal_function2699 # ifdef RE_ENABLE_I18N2700 2762 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2701 int *coll_sym_alloc, const unsigned char *name) 2702 # else /* not RE_ENABLE_I18N */ 2703 build_collating_symbol (bitset_t sbcset, const unsigned char *name) 2704 # endif /* not RE_ENABLE_I18N */ 2763 Idx *coll_sym_alloc, const unsigned char *name, 2764 uint_fast32_t nrules, int_fast32_t table_size, 2765 const void *symb_table, const unsigned char *extra) 2705 2766 { 2706 2767 size_t name_len = strlen ((const char *) name); 2707 if ( BE (name_len != 1, 0))2768 if (__glibc_unlikely (name_len != 1)) 2708 2769 return REG_ECOLLATE; 2709 2770 else … … 2715 2776 #endif /* not _LIBC */ 2716 2777 2717 /* This function parse bracket expression like "[abc]", "[a-c]",2718 "[[.a-a.]]" etc. */2719 2720 static bin_tree_t *2721 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,2722 reg_syntax_t syntax, reg_errcode_t *err)2723 {2724 2778 #ifdef _LIBC 2725 const unsigned char *collseqmb; 2726 const char *collseqwc; 2727 uint32_t nrules; 2728 int32_t table_size; 2729 const int32_t *symb_table; 2730 const unsigned char *extra; 2731 2732 /* Local function for parse_bracket_exp used in _LIBC environement. 2733 Seek the collating symbol entry correspondings to NAME. 2734 Return the index of the symbol in the SYMB_TABLE. */ 2735 2736 auto inline int32_t 2737 __attribute ((always_inline)) 2738 seek_collating_symbol_entry (name, name_len) 2739 const unsigned char *name; 2740 size_t name_len; 2741 { 2742 int32_t hash = elem_hash ((const char *) name, name_len); 2743 int32_t elem = hash % table_size; 2744 if (symb_table[2 * elem] != 0) 2745 { 2746 int32_t second = hash % (table_size - 2) + 1; 2747 2748 do 2749 { 2750 /* First compare the hashing value. */ 2751 if (symb_table[2 * elem] == hash 2752 /* Compare the length of the name. */ 2753 && name_len == extra[symb_table[2 * elem + 1]] 2754 /* Compare the name. */ 2755 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], 2756 name_len) == 0) 2757 { 2758 /* Yep, this is the entry. */ 2759 break; 2760 } 2761 2762 /* Next entry. */ 2763 elem += second; 2764 } 2765 while (symb_table[2 * elem] != 0); 2766 } 2767 return elem; 2768 } 2769 2770 /* Local function for parse_bracket_exp used in _LIBC environement. 2771 Look up the collation sequence value of BR_ELEM. 2772 Return the value if succeeded, UINT_MAX otherwise. */ 2773 2774 auto inline unsigned int 2775 __attribute ((always_inline)) 2776 lookup_collation_sequence_value (br_elem) 2777 bracket_elem_t *br_elem; 2778 { 2779 if (br_elem->type == SB_CHAR) 2780 { 2781 /* 2782 if (MB_CUR_MAX == 1) 2783 */ 2784 if (nrules == 0) 2785 return collseqmb[br_elem->opr.ch]; 2786 else 2787 { 2788 wint_t wc = __btowc (br_elem->opr.ch); 2789 return __collseq_table_lookup (collseqwc, wc); 2790 } 2791 } 2792 else if (br_elem->type == MB_CHAR) 2793 { 2794 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2795 } 2796 else if (br_elem->type == COLL_SYM) 2797 { 2798 size_t sym_name_len = strlen ((char *) br_elem->opr.name); 2799 if (nrules != 0) 2800 { 2801 int32_t elem, idx; 2802 elem = seek_collating_symbol_entry (br_elem->opr.name, 2803 sym_name_len); 2804 if (symb_table[2 * elem] != 0) 2805 { 2806 /* We found the entry. */ 2807 idx = symb_table[2 * elem + 1]; 2808 /* Skip the name of collating element name. */ 2809 idx += 1 + extra[idx]; 2810 /* Skip the byte sequence of the collating element. */ 2811 idx += 1 + extra[idx]; 2812 /* Adjust for the alignment. */ 2813 idx = (idx + 3) & ~3; 2814 /* Skip the multibyte collation sequence value. */ 2815 idx += sizeof (unsigned int); 2816 /* Skip the wide char sequence of the collating element. */ 2817 idx += sizeof (unsigned int) * 2818 (1 + *(unsigned int *) (extra + idx)); 2819 /* Return the collation sequence value. */ 2820 return *(unsigned int *) (extra + idx); 2821 } 2822 else if (symb_table[2 * elem] == 0 && sym_name_len == 1) 2823 { 2824 /* No valid character. Match it as a single byte 2825 character. */ 2826 return collseqmb[br_elem->opr.name[0]]; 2827 } 2828 } 2829 else if (sym_name_len == 1) 2830 return collseqmb[br_elem->opr.name[0]]; 2831 } 2832 return UINT_MAX; 2833 } 2834 2835 /* Local function for parse_bracket_exp used in _LIBC environement. 2836 Build the range expression which starts from START_ELEM, and ends 2837 at END_ELEM. The result are written to MBCSET and SBCSET. 2838 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2839 mbcset->range_ends, is a pointer argument sinse we may 2840 update it. */ 2841 2842 auto inline reg_errcode_t 2843 __attribute ((always_inline)) 2844 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) 2845 re_charset_t *mbcset; 2846 int *range_alloc; 2847 bitset_t sbcset; 2848 bracket_elem_t *start_elem, *end_elem; 2849 { 2850 unsigned int ch; 2851 uint32_t start_collseq; 2852 uint32_t end_collseq; 2853 2854 /* Equivalence Classes and Character Classes can't be a range 2855 start/end. */ 2856 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2857 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2858 0)) 2859 return REG_ERANGE; 2860 2861 start_collseq = lookup_collation_sequence_value (start_elem); 2862 end_collseq = lookup_collation_sequence_value (end_elem); 2863 /* Check start/end collation sequence values. */ 2864 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) 2865 return REG_ECOLLATE; 2866 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) 2867 return REG_ERANGE; 2868 2869 /* Got valid collation sequence values, add them as a new entry. 2870 However, if we have no collation elements, and the character set 2871 is single byte, the single byte character set that we 2872 build below suffices. */ 2873 if (nrules > 0 || dfa->mb_cur_max > 1) 2874 { 2875 /* Check the space of the arrays. */ 2876 if (BE (*range_alloc == mbcset->nranges, 0)) 2877 { 2878 /* There is not enough space, need realloc. */ 2879 uint32_t *new_array_start; 2880 uint32_t *new_array_end; 2881 int new_nranges; 2882 2883 /* +1 in case of mbcset->nranges is 0. */ 2884 new_nranges = 2 * mbcset->nranges + 1; 2885 new_array_start = re_realloc (mbcset->range_starts, uint32_t, 2886 new_nranges); 2887 new_array_end = re_realloc (mbcset->range_ends, uint32_t, 2888 new_nranges); 2889 2890 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2891 return REG_ESPACE; 2892 2893 mbcset->range_starts = new_array_start; 2894 mbcset->range_ends = new_array_end; 2895 *range_alloc = new_nranges; 2896 } 2897 2898 mbcset->range_starts[mbcset->nranges] = start_collseq; 2899 mbcset->range_ends[mbcset->nranges++] = end_collseq; 2900 } 2901 2902 /* Build the table for single byte characters. */ 2903 for (ch = 0; ch < SBC_MAX; ch++) 2904 { 2905 uint32_t ch_collseq; 2906 /* 2907 if (MB_CUR_MAX == 1) 2908 */ 2909 if (nrules == 0) 2910 ch_collseq = collseqmb[ch]; 2911 else 2912 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); 2913 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) 2914 bitset_set (sbcset, ch); 2915 } 2916 return REG_NOERROR; 2917 } 2918 2919 /* Local function for parse_bracket_exp used in _LIBC environement. 2920 Build the collating element which is represented by NAME. 2921 The result are written to MBCSET and SBCSET. 2922 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2923 pointer argument sinse we may update it. */ 2924 2925 auto inline reg_errcode_t 2926 __attribute ((always_inline)) 2927 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) 2928 re_charset_t *mbcset; 2929 int *coll_sym_alloc; 2930 bitset_t sbcset; 2931 const unsigned char *name; 2932 { 2933 int32_t elem, idx; 2934 size_t name_len = strlen ((const char *) name); 2779 /* Local function for parse_bracket_exp used in _LIBC environment. 2780 Seek the collating symbol entry corresponding to NAME. 2781 Return the index of the symbol in the SYMB_TABLE, 2782 or -1 if not found. */ 2783 2784 static __always_inline int32_t 2785 seek_collating_symbol_entry (const unsigned char *name, size_t name_len, 2786 const int32_t *symb_table, 2787 int_fast32_t table_size, 2788 const unsigned char *extra) 2789 { 2790 int_fast32_t elem; 2791 2792 for (elem = 0; elem < table_size; elem++) 2793 if (symb_table[2 * elem] != 0) 2794 { 2795 int32_t idx = symb_table[2 * elem + 1]; 2796 /* Skip the name of collating element name. */ 2797 idx += 1 + extra[idx]; 2798 if (/* Compare the length of the name. */ 2799 name_len == extra[idx] 2800 /* Compare the name. */ 2801 && memcmp (name, &extra[idx + 1], name_len) == 0) 2802 /* Yep, this is the entry. */ 2803 return elem; 2804 } 2805 return -1; 2806 } 2807 2808 /* Local function for parse_bracket_exp used in _LIBC environment. 2809 Look up the collation sequence value of BR_ELEM. 2810 Return the value if succeeded, UINT_MAX otherwise. */ 2811 2812 static __always_inline unsigned int 2813 lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules, 2814 const unsigned char *collseqmb, 2815 const char *collseqwc, 2816 int_fast32_t table_size, 2817 const int32_t *symb_table, 2818 const unsigned char *extra) 2819 { 2820 if (br_elem->type == SB_CHAR) 2821 { 2822 /* if (MB_CUR_MAX == 1) */ 2823 if (nrules == 0) 2824 return collseqmb[br_elem->opr.ch]; 2825 else 2826 { 2827 wint_t wc = __btowc (br_elem->opr.ch); 2828 return __collseq_table_lookup (collseqwc, wc); 2829 } 2830 } 2831 else if (br_elem->type == MB_CHAR) 2832 { 2935 2833 if (nrules != 0) 2936 { 2937 elem = seek_collating_symbol_entry (name, name_len); 2938 if (symb_table[2 * elem] != 0) 2834 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2835 } 2836 else if (br_elem->type == COLL_SYM) 2837 { 2838 size_t sym_name_len = strlen ((char *) br_elem->opr.name); 2839 if (nrules != 0) 2840 { 2841 int32_t elem, idx; 2842 elem = seek_collating_symbol_entry (br_elem->opr.name, 2843 sym_name_len, 2844 symb_table, table_size, 2845 extra); 2846 if (elem != -1) 2939 2847 { 2940 2848 /* We found the entry. */ … … 2942 2850 /* Skip the name of collating element name. */ 2943 2851 idx += 1 + extra[idx]; 2852 /* Skip the byte sequence of the collating element. */ 2853 idx += 1 + extra[idx]; 2854 /* Adjust for the alignment. */ 2855 idx = (idx + 3) & ~3; 2856 /* Skip the multibyte collation sequence value. */ 2857 idx += sizeof (unsigned int); 2858 /* Skip the wide char sequence of the collating element. */ 2859 idx += sizeof (unsigned int) * 2860 (1 + *(unsigned int *) (extra + idx)); 2861 /* Return the collation sequence value. */ 2862 return *(unsigned int *) (extra + idx); 2944 2863 } 2945 else if (sym b_table[2 * elem] == 0 &&name_len == 1)2864 else if (sym_name_len == 1) 2946 2865 { 2947 /* No valid character , treat it as a normal2866 /* No valid character. Match it as a single byte 2948 2867 character. */ 2949 bitset_set (sbcset, name[0]); 2950 return REG_NOERROR; 2868 return collseqmb[br_elem->opr.name[0]]; 2951 2869 } 2952 else 2953 return REG_ECOLLATE; 2954 2955 /* Got valid collation sequence, add it as a new entry. */ 2956 /* Check the space of the arrays. */ 2957 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) 2958 { 2959 /* Not enough, realloc it. */ 2960 /* +1 in case of mbcset->ncoll_syms is 0. */ 2961 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; 2962 /* Use realloc since mbcset->coll_syms is NULL 2963 if *alloc == 0. */ 2964 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, 2965 new_coll_sym_alloc); 2966 if (BE (new_coll_syms == NULL, 0)) 2967 return REG_ESPACE; 2968 mbcset->coll_syms = new_coll_syms; 2969 *coll_sym_alloc = new_coll_sym_alloc; 2970 } 2971 mbcset->coll_syms[mbcset->ncoll_syms++] = idx; 2870 } 2871 else if (sym_name_len == 1) 2872 return collseqmb[br_elem->opr.name[0]]; 2873 } 2874 return UINT_MAX; 2875 } 2876 2877 /* Local function for parse_bracket_exp used in _LIBC environment. 2878 Build the range expression which starts from START_ELEM, and ends 2879 at END_ELEM. The result are written to MBCSET and SBCSET. 2880 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2881 mbcset->range_ends, is a pointer argument since we may 2882 update it. */ 2883 2884 static __always_inline reg_errcode_t 2885 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, 2886 bracket_elem_t *start_elem, bracket_elem_t *end_elem, 2887 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules, 2888 const unsigned char *collseqmb, const char *collseqwc, 2889 int_fast32_t table_size, const int32_t *symb_table, 2890 const unsigned char *extra) 2891 { 2892 unsigned int ch; 2893 uint32_t start_collseq; 2894 uint32_t end_collseq; 2895 2896 /* Equivalence Classes and Character Classes can't be a range 2897 start/end. */ 2898 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS 2899 || start_elem->type == CHAR_CLASS 2900 || end_elem->type == EQUIV_CLASS 2901 || end_elem->type == CHAR_CLASS)) 2902 return REG_ERANGE; 2903 2904 /* FIXME: Implement rational ranges here, too. */ 2905 start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc, 2906 table_size, symb_table, extra); 2907 end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc, 2908 table_size, symb_table, extra); 2909 /* Check start/end collation sequence values. */ 2910 if (__glibc_unlikely (start_collseq == UINT_MAX 2911 || end_collseq == UINT_MAX)) 2912 return REG_ECOLLATE; 2913 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) 2914 && start_collseq > end_collseq)) 2915 return REG_ERANGE; 2916 2917 /* Got valid collation sequence values, add them as a new entry. 2918 However, if we have no collation elements, and the character set 2919 is single byte, the single byte character set that we 2920 build below suffices. */ 2921 if (nrules > 0 || dfa->mb_cur_max > 1) 2922 { 2923 /* Check the space of the arrays. */ 2924 if (__glibc_unlikely (*range_alloc == mbcset->nranges)) 2925 { 2926 /* There is not enough space, need realloc. */ 2927 uint32_t *new_array_start; 2928 uint32_t *new_array_end; 2929 int new_nranges; 2930 2931 /* +1 in case of mbcset->nranges is 0. */ 2932 new_nranges = 2 * mbcset->nranges + 1; 2933 new_array_start = re_realloc (mbcset->range_starts, uint32_t, 2934 new_nranges); 2935 new_array_end = re_realloc (mbcset->range_ends, uint32_t, 2936 new_nranges); 2937 2938 if (__glibc_unlikely (new_array_start == NULL 2939 || new_array_end == NULL)) 2940 return REG_ESPACE; 2941 2942 mbcset->range_starts = new_array_start; 2943 mbcset->range_ends = new_array_end; 2944 *range_alloc = new_nranges; 2945 } 2946 2947 mbcset->range_starts[mbcset->nranges] = start_collseq; 2948 mbcset->range_ends[mbcset->nranges++] = end_collseq; 2949 } 2950 2951 /* Build the table for single byte characters. */ 2952 for (ch = 0; ch < SBC_MAX; ch++) 2953 { 2954 uint32_t ch_collseq; 2955 /* if (MB_CUR_MAX == 1) */ 2956 if (nrules == 0) 2957 ch_collseq = collseqmb[ch]; 2958 else 2959 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); 2960 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) 2961 bitset_set (sbcset, ch); 2962 } 2963 return REG_NOERROR; 2964 } 2965 2966 /* Local function for parse_bracket_exp used in _LIBC environment. 2967 Build the collating element which is represented by NAME. 2968 The result are written to MBCSET and SBCSET. 2969 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2970 pointer argument since we may update it. */ 2971 2972 static __always_inline reg_errcode_t 2973 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2974 Idx *coll_sym_alloc, const unsigned char *name, 2975 uint_fast32_t nrules, int_fast32_t table_size, 2976 const int32_t *symb_table, const unsigned char *extra) 2977 { 2978 int32_t elem, idx; 2979 size_t name_len = strlen ((const char *) name); 2980 if (nrules != 0) 2981 { 2982 elem = seek_collating_symbol_entry (name, name_len, symb_table, 2983 table_size, extra); 2984 if (elem != -1) 2985 { 2986 /* We found the entry. */ 2987 idx = symb_table[2 * elem + 1]; 2988 /* Skip the name of collating element name. */ 2989 idx += 1 + extra[idx]; 2990 } 2991 else if (name_len == 1) 2992 { 2993 /* No valid character, treat it as a normal 2994 character. */ 2995 bitset_set (sbcset, name[0]); 2972 2996 return REG_NOERROR; 2973 2997 } 2974 2998 else 2975 { 2976 if (BE (name_len != 1, 0)) 2977 return REG_ECOLLATE; 2978 else 2979 { 2980 bitset_set (sbcset, name[0]); 2981 return REG_NOERROR; 2982 } 2983 } 2984 } 2985 #endif 2999 return REG_ECOLLATE; 3000 3001 /* Got valid collation sequence, add it as a new entry. */ 3002 /* Check the space of the arrays. */ 3003 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms)) 3004 { 3005 /* Not enough, realloc it. */ 3006 /* +1 in case of mbcset->ncoll_syms is 0. */ 3007 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; 3008 /* Use realloc since mbcset->coll_syms is NULL 3009 if *alloc == 0. */ 3010 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, 3011 new_coll_sym_alloc); 3012 if (__glibc_unlikely (new_coll_syms == NULL)) 3013 return REG_ESPACE; 3014 mbcset->coll_syms = new_coll_syms; 3015 *coll_sym_alloc = new_coll_sym_alloc; 3016 } 3017 mbcset->coll_syms[mbcset->ncoll_syms++] = idx; 3018 return REG_NOERROR; 3019 } 3020 else 3021 { 3022 if (__glibc_unlikely (name_len != 1)) 3023 return REG_ECOLLATE; 3024 else 3025 { 3026 bitset_set (sbcset, name[0]); 3027 return REG_NOERROR; 3028 } 3029 } 3030 } 3031 #endif /* _LIBC */ 3032 3033 /* This function parse bracket expression like "[abc]", "[a-c]", 3034 "[[.a-a.]]" etc. */ 3035 3036 static bin_tree_t * 3037 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 3038 reg_syntax_t syntax, reg_errcode_t *err) 3039 { 3040 const unsigned char *collseqmb = NULL; 3041 const char *collseqwc = NULL; 3042 uint_fast32_t nrules = 0; 3043 int_fast32_t table_size = 0; 3044 const void *symb_table = NULL; 3045 const unsigned char *extra = NULL; 2986 3046 2987 3047 re_token_t br_token; 2988 3048 re_bitset_ptr_t sbcset; 2989 #ifdef RE_ENABLE_I18N2990 3049 re_charset_t *mbcset; 2991 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; 2992 int equiv_class_alloc = 0, char_class_alloc = 0; 2993 #endif /* not RE_ENABLE_I18N */ 2994 int non_match = 0; 3050 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; 3051 Idx equiv_class_alloc = 0, char_class_alloc = 0; 3052 bool non_match = false; 2995 3053 bin_tree_t *work_tree; 2996 3054 int token_len; 2997 int first_round = 1;3055 bool first_round = true; 2998 3056 #ifdef _LIBC 2999 3057 collseqmb = (const unsigned char *) … … 3007 3065 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3008 3066 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); 3009 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3010 _NL_COLLATE_SYMB_TABLEMB); 3067 symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB); 3011 3068 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3012 3069 _NL_COLLATE_SYMB_EXTRAMB); … … 3014 3071 #endif 3015 3072 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3016 #ifdef RE_ENABLE_I18N3017 3073 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3018 #endif /* RE_ENABLE_I18N */ 3019 #ifdef RE_ENABLE_I18N 3020 if (BE (sbcset == NULL || mbcset == NULL, 0)) 3021 #else 3022 if (BE (sbcset == NULL, 0)) 3023 #endif /* RE_ENABLE_I18N */ 3024 { 3074 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) 3075 { 3076 re_free (sbcset); 3077 re_free (mbcset); 3025 3078 *err = REG_ESPACE; 3026 3079 return NULL; … … 3028 3081 3029 3082 token_len = peek_token_bracket (token, regexp, syntax); 3030 if ( BE (token->type == END_OF_RE, 0))3083 if (__glibc_unlikely (token->type == END_OF_RE)) 3031 3084 { 3032 3085 *err = REG_BADPAT; … … 3035 3088 if (token->type == OP_NON_MATCH_LIST) 3036 3089 { 3037 #ifdef RE_ENABLE_I18N3038 3090 mbcset->non_match = 1; 3039 #endif /* not RE_ENABLE_I18N */ 3040 non_match = 1; 3091 non_match = true; 3041 3092 if (syntax & RE_HAT_LISTS_NOT_NEWLINE) 3042 bitset_set (sbcset, '\ 0');3093 bitset_set (sbcset, '\n'); 3043 3094 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3044 3095 token_len = peek_token_bracket (token, regexp, syntax); 3045 if ( BE (token->type == END_OF_RE, 0))3096 if (__glibc_unlikely (token->type == END_OF_RE)) 3046 3097 { 3047 3098 *err = REG_BADPAT; … … 3060 3111 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; 3061 3112 reg_errcode_t ret; 3062 int token_len2 = 0, is_range_exp = 0; 3113 int token_len2 = 0; 3114 bool is_range_exp = false; 3063 3115 re_token_t token2; 3064 3116 3065 3117 start_elem.opr.name = start_name_buf; 3118 start_elem.type = COLL_SYM; 3066 3119 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, 3067 3120 syntax, first_round); 3068 if ( BE (ret != REG_NOERROR, 0))3121 if (__glibc_unlikely (ret != REG_NOERROR)) 3069 3122 { 3070 3123 *err = ret; 3071 3124 goto parse_bracket_exp_free_return; 3072 3125 } 3073 first_round = 0;3126 first_round = false; 3074 3127 3075 3128 /* Get information about the next token. We need it in any case. */ … … 3079 3132 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) 3080 3133 { 3081 if ( BE (token->type == END_OF_RE, 0))3134 if (__glibc_unlikely (token->type == END_OF_RE)) 3082 3135 { 3083 3136 *err = REG_EBRACK; … … 3088 3141 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ 3089 3142 token_len2 = peek_token_bracket (&token2, regexp, syntax); 3090 if ( BE (token2.type == END_OF_RE, 0))3143 if (__glibc_unlikely (token2.type == END_OF_RE)) 3091 3144 { 3092 3145 *err = REG_EBRACK; … … 3100 3153 } 3101 3154 else 3102 is_range_exp = 1;3155 is_range_exp = true; 3103 3156 } 3104 3157 } 3105 3158 3106 if (is_range_exp == 1)3159 if (is_range_exp == true) 3107 3160 { 3108 3161 end_elem.opr.name = end_name_buf; 3162 end_elem.type = COLL_SYM; 3109 3163 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, 3110 dfa, syntax, 1);3111 if ( BE (ret != REG_NOERROR, 0))3164 dfa, syntax, true); 3165 if (__glibc_unlikely (ret != REG_NOERROR)) 3112 3166 { 3113 3167 *err = ret; … … 3117 3171 token_len = peek_token_bracket (token, regexp, syntax); 3118 3172 3119 #ifdef _LIBC3120 3173 *err = build_range_exp (sbcset, mbcset, &range_alloc, 3121 &start_elem, &end_elem); 3122 #else 3123 # ifdef RE_ENABLE_I18N 3124 *err = build_range_exp (sbcset, 3125 dfa->mb_cur_max > 1 ? mbcset : NULL, 3126 &range_alloc, &start_elem, &end_elem); 3127 # else 3128 *err = build_range_exp (sbcset, &start_elem, &end_elem); 3129 # endif 3130 #endif /* RE_ENABLE_I18N */ 3131 if (BE (*err != REG_NOERROR, 0)) 3174 &start_elem, &end_elem, 3175 dfa, syntax, nrules, collseqmb, collseqwc, 3176 table_size, symb_table, extra); 3177 if (__glibc_unlikely (*err != REG_NOERROR)) 3132 3178 goto parse_bracket_exp_free_return; 3133 3179 } … … 3139 3185 bitset_set (sbcset, start_elem.opr.ch); 3140 3186 break; 3141 #ifdef RE_ENABLE_I18N3142 3187 case MB_CHAR: 3143 3188 /* Check whether the array has enough space. */ 3144 if ( BE (mbchar_alloc == mbcset->nmbchars, 0))3189 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) 3145 3190 { 3146 3191 wchar_t *new_mbchars; … … 3151 3196 new_mbchars = re_realloc (mbcset->mbchars, wchar_t, 3152 3197 mbchar_alloc); 3153 if ( BE (new_mbchars == NULL, 0))3198 if (__glibc_unlikely (new_mbchars == NULL)) 3154 3199 goto parse_bracket_exp_espace; 3155 3200 mbcset->mbchars = new_mbchars; … … 3157 3202 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; 3158 3203 break; 3159 #endif /* RE_ENABLE_I18N */3160 3204 case EQUIV_CLASS: 3161 3205 *err = build_equiv_class (sbcset, 3162 #ifdef RE_ENABLE_I18N3163 3206 mbcset, &equiv_class_alloc, 3164 #endif /* RE_ENABLE_I18N */3165 3207 start_elem.opr.name); 3166 if ( BE (*err != REG_NOERROR, 0))3208 if (__glibc_unlikely (*err != REG_NOERROR)) 3167 3209 goto parse_bracket_exp_free_return; 3168 3210 break; 3169 3211 case COLL_SYM: 3170 3212 *err = build_collating_symbol (sbcset, 3171 #ifdef RE_ENABLE_I18N3172 3213 mbcset, &coll_sym_alloc, 3173 #endif /* RE_ENABLE_I18N */ 3174 start_elem.opr.name);3175 if ( BE (*err != REG_NOERROR, 0))3214 start_elem.opr.name, 3215 nrules, table_size, symb_table, extra); 3216 if (__glibc_unlikely (*err != REG_NOERROR)) 3176 3217 goto parse_bracket_exp_free_return; 3177 3218 break; 3178 3219 case CHAR_CLASS: 3179 3220 *err = build_charclass (regexp->trans, sbcset, 3180 #ifdef RE_ENABLE_I18N3181 3221 mbcset, &char_class_alloc, 3182 #endif /* RE_ENABLE_I18N */ 3183 s tart_elem.opr.name, syntax);3184 if ( BE (*err != REG_NOERROR, 0))3222 (const char *) start_elem.opr.name, 3223 syntax); 3224 if (__glibc_unlikely (*err != REG_NOERROR)) 3185 3225 goto parse_bracket_exp_free_return; 3186 3226 break; 3187 3227 default: 3188 assert (0);3228 DEBUG_ASSERT (false); 3189 3229 break; 3190 3230 } 3191 3231 } 3192 if ( BE (token->type == END_OF_RE, 0))3232 if (__glibc_unlikely (token->type == END_OF_RE)) 3193 3233 { 3194 3234 *err = REG_EBRACK; … … 3205 3245 bitset_not (sbcset); 3206 3246 3207 #ifdef RE_ENABLE_I18N3208 3247 /* Ensure only single byte characters are set. */ 3209 3248 if (dfa->mb_cur_max > 1) … … 3221 3260 br_token.opr.mbcset = mbcset; 3222 3261 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3223 if ( BE (mbc_tree == NULL, 0))3262 if (__glibc_unlikely (mbc_tree == NULL)) 3224 3263 goto parse_bracket_exp_espace; 3225 3264 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) … … 3230 3269 if (sbc_idx < BITSET_WORDS) 3231 3270 { 3232 3233 3234 3235 3236 if (BE (work_tree == NULL, 0))3237 3238 3239 3240 3241 if (BE (work_tree == NULL, 0))3242 3271 /* Build a tree for simple bracket. */ 3272 br_token.type = SIMPLE_BRACKET; 3273 br_token.opr.sbcset = sbcset; 3274 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3275 if (__glibc_unlikely (work_tree == NULL)) 3276 goto parse_bracket_exp_espace; 3277 3278 /* Then join them by ALT node. */ 3279 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); 3280 if (__glibc_unlikely (work_tree == NULL)) 3281 goto parse_bracket_exp_espace; 3243 3282 } 3244 3283 else … … 3249 3288 } 3250 3289 else 3251 #endif /* not RE_ENABLE_I18N */ 3252 { 3253 #ifdef RE_ENABLE_I18N 3290 { 3254 3291 free_charset (mbcset); 3255 #endif3256 3292 /* Build a tree for simple bracket. */ 3257 3293 br_token.type = SIMPLE_BRACKET; 3258 3294 br_token.opr.sbcset = sbcset; 3259 3295 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3260 if ( BE (work_tree == NULL, 0))3261 3296 if (__glibc_unlikely (work_tree == NULL)) 3297 goto parse_bracket_exp_espace; 3262 3298 } 3263 3299 return work_tree; … … 3267 3303 parse_bracket_exp_free_return: 3268 3304 re_free (sbcset); 3269 #ifdef RE_ENABLE_I18N3270 3305 free_charset (mbcset); 3271 #endif /* RE_ENABLE_I18N */3272 3306 return NULL; 3273 3307 } … … 3278 3312 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, 3279 3313 re_token_t *token, int token_len, re_dfa_t *dfa, 3280 reg_syntax_t syntax, int accept_hyphen) 3281 { 3282 #ifdef RE_ENABLE_I18N 3314 reg_syntax_t syntax, bool accept_hyphen) 3315 { 3283 3316 int cur_char_size; 3284 3317 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); … … 3290 3323 return REG_NOERROR; 3291 3324 } 3292 #endif /* RE_ENABLE_I18N */3293 3325 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3294 3326 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS 3295 3327 || token->type == OP_OPEN_EQUIV_CLASS) 3296 3328 return parse_bracket_symbol (elem, regexp, token); 3297 if ( BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)3329 if (__glibc_unlikely (token->type == OP_CHARSET_RANGE) && !accept_hyphen) 3298 3330 { 3299 3331 /* A '-' must only appear as anything but a range indicator before … … 3360 3392 The result are written to MBCSET and SBCSET. 3361 3393 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, 3362 is a pointer argument sin se we may update it. */3394 is a pointer argument since we may update it. */ 3363 3395 3364 3396 static reg_errcode_t 3365 #ifdef RE_ENABLE_I18N3366 3397 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, 3367 int *equiv_class_alloc, const unsigned char *name) 3368 #else /* not RE_ENABLE_I18N */ 3369 build_equiv_class (bitset_t sbcset, const unsigned char *name) 3370 #endif /* not RE_ENABLE_I18N */ 3398 Idx *equiv_class_alloc, const unsigned char *name) 3371 3399 { 3372 3400 #ifdef _LIBC … … 3380 3408 unsigned int ch; 3381 3409 size_t len; 3382 /* This #include defines a local function! */3383 # include <locale/weight.h>3384 3410 /* Calculate the index for equivalence class. */ 3385 3411 cp = name; … … 3391 3417 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3392 3418 _NL_COLLATE_INDIRECTMB); 3393 idx1 = findidx ( &cp);3394 if ( BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))3419 idx1 = findidx (table, indirect, extra, &cp, -1); 3420 if (__glibc_unlikely (idx1 == 0 || *cp != '\0')) 3395 3421 /* This isn't a valid character. */ 3396 3422 return REG_ECOLLATE; 3397 3423 3398 /* Build single byte matcing table for this equivalence class. */ 3399 char_buf[1] = (unsigned char) '\0'; 3400 len = weights[idx1]; 3424 /* Build single byte matching table for this equivalence class. */ 3425 len = weights[idx1 & 0xffffff]; 3401 3426 for (ch = 0; ch < SBC_MAX; ++ch) 3402 3427 { 3403 3428 char_buf[0] = ch; 3404 3429 cp = char_buf; 3405 idx2 = findidx ( &cp);3430 idx2 = findidx (table, indirect, extra, &cp, 1); 3406 3431 /* 3407 3432 idx2 = table[ch]; … … 3410 3435 /* This isn't a valid character. */ 3411 3436 continue; 3412 if (len == weights[idx2]) 3413 { 3414 int cnt = 0; 3415 while (cnt <= len && 3416 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) 3417 ++cnt; 3418 3419 if (cnt > len) 3420 bitset_set (sbcset, ch); 3421 } 3437 /* Compare only if the length matches and the collation rule 3438 index is the same. */ 3439 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24) 3440 && memcmp (weights + (idx1 & 0xffffff) + 1, 3441 weights + (idx2 & 0xffffff) + 1, len) == 0) 3442 bitset_set (sbcset, ch); 3422 3443 } 3423 3444 /* Check whether the array has enough space. */ 3424 if ( BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))3445 if (__glibc_unlikely (*equiv_class_alloc == mbcset->nequiv_classes)) 3425 3446 { 3426 3447 /* Not enough, realloc it. */ 3427 3448 /* +1 in case of mbcset->nequiv_classes is 0. */ 3428 intnew_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;3449 Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; 3429 3450 /* Use realloc since the array is NULL if *alloc == 0. */ 3430 3451 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, 3431 3452 int32_t, 3432 3453 new_equiv_class_alloc); 3433 if ( BE (new_equiv_classes == NULL, 0))3454 if (__glibc_unlikely (new_equiv_classes == NULL)) 3434 3455 return REG_ESPACE; 3435 3456 mbcset->equiv_classes = new_equiv_classes; … … 3441 3462 #endif /* _LIBC */ 3442 3463 { 3443 if ( BE (strlen ((const char *) name) != 1, 0))3464 if (__glibc_unlikely (strlen ((const char *) name) != 1)) 3444 3465 return REG_ECOLLATE; 3445 3466 bitset_set (sbcset, *name); … … 3452 3473 The result are written to MBCSET and SBCSET. 3453 3474 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, 3454 is a pointer argument sin se we may update it. */3475 is a pointer argument since we may update it. */ 3455 3476 3456 3477 static reg_errcode_t 3457 #ifdef RE_ENABLE_I18N3458 3478 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3459 re_charset_t *mbcset, int *char_class_alloc, 3460 const unsigned char *class_name, reg_syntax_t syntax) 3461 #else /* not RE_ENABLE_I18N */ 3462 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3463 const unsigned char *class_name, reg_syntax_t syntax) 3464 #endif /* not RE_ENABLE_I18N */ 3479 re_charset_t *mbcset, Idx *char_class_alloc, 3480 const char *class_name, reg_syntax_t syntax) 3465 3481 { 3466 3482 int i; 3467 const char *name = (const char *)class_name;3483 const char *name = class_name; 3468 3484 3469 3485 /* In case of REG_ICASE "upper" and "lower" match the both of … … 3473 3489 name = "alpha"; 3474 3490 3475 #ifdef RE_ENABLE_I18N3476 3491 /* Check the space of the arrays. */ 3477 if ( BE (*char_class_alloc == mbcset->nchar_classes, 0))3492 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) 3478 3493 { 3479 3494 /* Not enough, realloc it. */ 3480 3495 /* +1 in case of mbcset->nchar_classes is 0. */ 3481 intnew_char_class_alloc = 2 * mbcset->nchar_classes + 1;3496 Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1; 3482 3497 /* Use realloc since array is NULL if *alloc == 0. */ 3483 3498 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, 3484 3499 new_char_class_alloc); 3485 if ( BE (new_char_classes == NULL, 0))3500 if (__glibc_unlikely (new_char_classes == NULL)) 3486 3501 return REG_ESPACE; 3487 3502 mbcset->char_classes = new_char_classes; … … 3489 3504 } 3490 3505 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); 3491 #endif /* RE_ENABLE_I18N */3492 3506 3493 3507 #define BUILD_CHARCLASS_LOOP(ctype_func) \ 3494 3508 do { \ 3495 if ( BE (trans != NULL, 0)) \3509 if (__glibc_unlikely (trans != NULL)) \ 3496 3510 { \ 3497 3511 for (i = 0; i < SBC_MAX; ++i) \ 3498 3512 if (ctype_func (i)) \ 3499 3513 bitset_set (sbcset, trans[i]); \ 3500 3514 } \ … … 3502 3516 { \ 3503 3517 for (i = 0; i < SBC_MAX; ++i) \ 3504 3518 if (ctype_func (i)) \ 3505 3519 bitset_set (sbcset, i); \ 3506 3520 } \ … … 3539 3553 static bin_tree_t * 3540 3554 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, 3541 const unsignedchar *class_name,3542 const unsigned char *extra, intnon_match,3555 const char *class_name, 3556 const char *extra, bool non_match, 3543 3557 reg_errcode_t *err) 3544 3558 { 3545 3559 re_bitset_ptr_t sbcset; 3546 #ifdef RE_ENABLE_I18N3547 3560 re_charset_t *mbcset; 3548 int alloc = 0; 3549 #endif /* not RE_ENABLE_I18N */ 3561 Idx alloc = 0; 3550 3562 reg_errcode_t ret; 3551 re_token_t br_token;3552 3563 bin_tree_t *tree; 3553 3564 3554 3565 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3555 #ifdef RE_ENABLE_I18N 3556 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3557 #endif /* RE_ENABLE_I18N */ 3558 3559 #ifdef RE_ENABLE_I18N 3560 if (BE (sbcset == NULL || mbcset == NULL, 0)) 3561 #else /* not RE_ENABLE_I18N */ 3562 if (BE (sbcset == NULL, 0)) 3563 #endif /* not RE_ENABLE_I18N */ 3566 if (__glibc_unlikely (sbcset == NULL)) 3564 3567 { 3565 3568 *err = REG_ESPACE; 3566 3569 return NULL; 3567 3570 } 3568 3569 if (non_match) 3570 { 3571 #ifdef RE_ENABLE_I18N 3572 /* 3573 if (syntax & RE_HAT_LISTS_NOT_NEWLINE) 3574 bitset_set(cset->sbcset, '\0'); 3575 */ 3576 mbcset->non_match = 1; 3577 #endif /* not RE_ENABLE_I18N */ 3578 } 3571 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3572 if (__glibc_unlikely (mbcset == NULL)) 3573 { 3574 re_free (sbcset); 3575 *err = REG_ESPACE; 3576 return NULL; 3577 } 3578 mbcset->non_match = non_match; 3579 3579 3580 3580 /* We don't care the syntax in this case. */ 3581 ret = build_charclass (trans, sbcset, 3582 #ifdef RE_ENABLE_I18N 3583 mbcset, &alloc, 3584 #endif /* RE_ENABLE_I18N */ 3585 class_name, 0); 3586 3587 if (BE (ret != REG_NOERROR, 0)) 3581 ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0); 3582 3583 if (__glibc_unlikely (ret != REG_NOERROR)) 3588 3584 { 3589 3585 re_free (sbcset); 3590 #ifdef RE_ENABLE_I18N3591 3586 free_charset (mbcset); 3592 #endif /* RE_ENABLE_I18N */3593 3587 *err = ret; 3594 3588 return NULL; … … 3602 3596 bitset_not (sbcset); 3603 3597 3604 #ifdef RE_ENABLE_I18N3605 3598 /* Ensure only single byte characters are set. */ 3606 3599 if (dfa->mb_cur_max > 1) 3607 3600 bitset_mask (sbcset, dfa->sb_char); 3608 #endif3609 3601 3610 3602 /* Build a tree for simple bracket. */ 3611 br_token.type = SIMPLE_BRACKET; 3612 br_token.opr.sbcset = sbcset; 3603 re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset }; 3613 3604 tree = create_token_tree (dfa, NULL, NULL, &br_token); 3614 if ( BE (tree == NULL, 0))3605 if (__glibc_unlikely (tree == NULL)) 3615 3606 goto build_word_op_espace; 3616 3607 3617 #ifdef RE_ENABLE_I18N3618 3608 if (dfa->mb_cur_max > 1) 3619 3609 { … … 3624 3614 dfa->has_mb_node = 1; 3625 3615 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3626 if ( BE (mbc_tree == NULL, 0))3616 if (__glibc_unlikely (mbc_tree == NULL)) 3627 3617 goto build_word_op_espace; 3628 3618 /* Then join them by ALT node. */ 3629 3619 tree = create_tree (dfa, tree, mbc_tree, OP_ALT); 3630 if ( BE (mbc_tree != NULL, 1))3620 if (__glibc_likely (mbc_tree != NULL)) 3631 3621 return tree; 3632 3622 } … … 3636 3626 return tree; 3637 3627 } 3638 #else /* not RE_ENABLE_I18N */3639 return tree;3640 #endif /* not RE_ENABLE_I18N */3641 3628 3642 3629 build_word_op_espace: 3643 3630 re_free (sbcset); 3644 #ifdef RE_ENABLE_I18N3645 3631 free_charset (mbcset); 3646 #endif /* RE_ENABLE_I18N */3647 3632 *err = REG_ESPACE; 3648 3633 return NULL; … … 3650 3635 3651 3636 /* This is intended for the expressions like "a{1,3}". 3652 Fetch a number from `input', and return the number. 3653 Return -1, if the number field is empty like "{,1}". 3654 Return -2, If an error is occured. */ 3655 3656 static int 3637 Fetch a number from 'input', and return the number. 3638 Return -1 if the number field is empty like "{,1}". 3639 Return RE_DUP_MAX + 1 if the number field is too large. 3640 Return -2 if an error occurred. */ 3641 3642 static Idx 3657 3643 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) 3658 3644 { 3659 intnum = -1;3645 Idx num = -1; 3660 3646 unsigned char c; 3661 3647 while (1) … … 3663 3649 fetch_token (token, input, syntax); 3664 3650 c = token->opr.c; 3665 if ( BE (token->type == END_OF_RE, 0))3651 if (__glibc_unlikely (token->type == END_OF_RE)) 3666 3652 return -2; 3667 3653 if (token->type == OP_CLOSE_DUP_NUM || c == ',') 3668 3654 break; 3669 3655 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) 3670 ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); 3671 num = (num > RE_DUP_MAX) ? -2 : num; 3656 ? -2 3657 : num == -1 3658 ? c - '0' 3659 : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); 3672 3660 } 3673 3661 return num; … … 3675 3663 3676 3664 3677 #ifdef RE_ENABLE_I18N3678 3665 static void 3679 3666 free_charset (re_charset_t *cset) 3680 3667 { 3681 3668 re_free (cset->mbchars); 3682 # 3669 #ifdef _LIBC 3683 3670 re_free (cset->coll_syms); 3684 3671 re_free (cset->equiv_classes); 3672 #endif 3685 3673 re_free (cset->range_starts); 3686 3674 re_free (cset->range_ends); 3687 # endif3688 3675 re_free (cset->char_classes); 3689 3676 re_free (cset); 3690 3677 } 3691 #endif /* RE_ENABLE_I18N */3692 3678 3693 3679 … … 3700 3686 re_token_type_t type) 3701 3687 { 3702 re_token_t t; 3703 t.type = type; 3688 re_token_t t = { .type = type }; 3704 3689 return create_token_tree (dfa, left, right, &t); 3705 3690 } … … 3710 3695 { 3711 3696 bin_tree_t *tree; 3712 if ( BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))3697 if (__glibc_unlikely (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE)) 3713 3698 { 3714 3699 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); … … 3745 3730 mark_opt_subexp (void *extra, bin_tree_t *node) 3746 3731 { 3747 int idx = (int) (long) extra;3732 Idx idx = (uintptr_t) extra; 3748 3733 if (node->token.type == SUBEXP && node->token.opr.idx == idx) 3749 3734 node->token.opt_subexp = 1; … … 3757 3742 free_token (re_token_t *node) 3758 3743 { 3759 #ifdef RE_ENABLE_I18N3760 3744 if (node->type == COMPLEX_BRACKET && node->duplicated == 0) 3761 3745 free_charset (node->opr.mbcset); 3762 else 3763 #endif /* RE_ENABLE_I18N */ 3764 if (node->type == SIMPLE_BRACKET && node->duplicated == 0) 3765 re_free (node->opr.sbcset); 3746 else if (node->type == SIMPLE_BRACKET && node->duplicated == 0) 3747 re_free (node->opr.sbcset); 3766 3748 } 3767 3749 … … 3814 3796 dup_node = dup_node->parent; 3815 3797 if (!node) 3816 3798 return dup_root; 3817 3799 } 3818 3800 node = node->right; -
trunk/src/sed/lib/regex.c
r599 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 #ifdef HAVE_CONFIG_H 22 #include "config.h" 20 #define __STDC_WANT_IEC_60559_BFP_EXT__ 21 22 #ifndef _LIBC 23 # include <libc-config.h> 24 25 # if __GNUC_PREREQ (4, 6) 26 # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" 27 # pragma GCC diagnostic ignored "-Wvla" 28 # endif 29 # if __GNUC_PREREQ (4, 3) 30 # pragma GCC diagnostic ignored "-Wold-style-definition" 31 # pragma GCC diagnostic ignored "-Wtype-limits" 32 # endif 23 33 #endif 24 34 25 /* Make sure no one compiles this code with a C++ compiler. */26 #if def __cplusplus35 /* Make sure no one compiles this code with a C++ compiler. */ 36 #if defined __cplusplus && defined _LIBC 27 37 # error "This is C code, use a C compiler" 28 38 #endif -
trunk/src/sed/lib/regex_internal.c
r2727 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 20 21 static void re_string_construct_common (const char *str, int len, 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20 static void re_string_construct_common (const char *str, Idx len, 22 21 re_string_t *pstr, 23 RE_TRANSLATE_TYPE trans, inticase,24 const re_dfa_t *dfa) internal_function;22 RE_TRANSLATE_TYPE trans, bool icase, 23 const re_dfa_t *dfa); 25 24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, 26 25 const re_node_set *nodes, 27 unsigned int hash) internal_function;26 re_hashval_t hash); 28 27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, 29 28 const re_node_set *nodes, 30 29 unsigned int context, 31 unsigned int hash) internal_function; 30 re_hashval_t hash); 31 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 32 Idx new_buf_len); 33 static void build_wcs_buffer (re_string_t *pstr); 34 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); 35 static void build_upper_buffer (re_string_t *pstr); 36 static void re_string_translate_buffer (re_string_t *pstr); 37 static unsigned int re_string_context_at (const re_string_t *input, Idx idx, 38 int eflags) __attribute__ ((pure)); 32 39 33 40 … … 38 45 39 46 static reg_errcode_t 40 internal_function 41 re_string_allocate (re_string_t *pstr, const char *str, int len, intinit_len,42 RE_TRANSLATE_TYPE trans, inticase, const re_dfa_t *dfa)47 __attribute_warn_unused_result__ 48 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, 49 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) 43 50 { 44 51 reg_errcode_t ret; 45 intinit_buf_len;52 Idx init_buf_len; 46 53 47 54 /* Ensure at least one character fits into the buffers. */ … … 52 59 53 60 ret = re_string_realloc_buffers (pstr, init_buf_len); 54 if ( BE (ret != REG_NOERROR, 0))61 if (__glibc_unlikely (ret != REG_NOERROR)) 55 62 return ret; 56 63 … … 66 73 67 74 static reg_errcode_t 68 internal_function 69 re_string_construct (re_string_t *pstr, const char *str, intlen,70 RE_TRANSLATE_TYPE trans, inticase, const re_dfa_t *dfa)75 __attribute_warn_unused_result__ 76 re_string_construct (re_string_t *pstr, const char *str, Idx len, 77 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) 71 78 { 72 79 reg_errcode_t ret; … … 77 84 { 78 85 ret = re_string_realloc_buffers (pstr, len + 1); 79 if ( BE (ret != REG_NOERROR, 0))86 if (__glibc_unlikely (ret != REG_NOERROR)) 80 87 return ret; 81 88 } … … 84 91 if (icase) 85 92 { 86 #ifdef RE_ENABLE_I18N87 93 if (dfa->mb_cur_max > 1) 88 94 { … … 90 96 { 91 97 ret = build_wcs_upper_buffer (pstr); 92 if ( BE (ret != REG_NOERROR, 0))98 if (__glibc_unlikely (ret != REG_NOERROR)) 93 99 return ret; 94 100 if (pstr->valid_raw_len >= len) … … 97 103 break; 98 104 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 99 if ( BE (ret != REG_NOERROR, 0))105 if (__glibc_unlikely (ret != REG_NOERROR)) 100 106 return ret; 101 107 } 102 108 } 103 109 else 104 #endif /* RE_ENABLE_I18N */105 110 build_upper_buffer (pstr); 106 111 } 107 112 else 108 113 { 109 #ifdef RE_ENABLE_I18N110 114 if (dfa->mb_cur_max > 1) 111 115 build_wcs_buffer (pstr); 112 116 else 113 #endif /* RE_ENABLE_I18N */114 117 { 115 118 if (trans != NULL) … … 129 132 130 133 static reg_errcode_t 131 internal_function 132 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) 133 { 134 #ifdef RE_ENABLE_I18N 134 __attribute_warn_unused_result__ 135 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) 136 { 135 137 if (pstr->mb_cur_max > 1) 136 138 { 137 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); 138 if (BE (new_wcs == NULL, 0)) 139 wint_t *new_wcs; 140 141 /* Avoid overflow in realloc. */ 142 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); 143 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) 144 < new_buf_len)) 145 return REG_ESPACE; 146 147 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); 148 if (__glibc_unlikely (new_wcs == NULL)) 139 149 return REG_ESPACE; 140 150 pstr->wcs = new_wcs; 141 151 if (pstr->offsets != NULL) 142 152 { 143 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);144 if ( BE (new_offsets == NULL, 0))153 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len); 154 if (__glibc_unlikely (new_offsets == NULL)) 145 155 return REG_ESPACE; 146 156 pstr->offsets = new_offsets; 147 157 } 148 158 } 149 #endif /* RE_ENABLE_I18N */150 159 if (pstr->mbs_allocated) 151 160 { 152 161 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, 153 162 new_buf_len); 154 if ( BE (new_mbs == NULL, 0))163 if (__glibc_unlikely (new_mbs == NULL)) 155 164 return REG_ESPACE; 156 165 pstr->mbs = new_mbs; … … 162 171 163 172 static void 164 internal_function 165 re_string_construct_common (const char *str, int len, re_string_t *pstr, 166 RE_TRANSLATE_TYPE trans, int icase, 173 re_string_construct_common (const char *str, Idx len, re_string_t *pstr, 174 RE_TRANSLATE_TYPE trans, bool icase, 167 175 const re_dfa_t *dfa) 168 176 { … … 171 179 pstr->raw_len = len; 172 180 pstr->trans = trans; 173 pstr->icase = icase ? 1 : 0;181 pstr->icase = icase; 174 182 pstr->mbs_allocated = (trans != NULL || icase); 175 183 pstr->mb_cur_max = dfa->mb_cur_max; … … 180 188 } 181 189 182 #ifdef RE_ENABLE_I18N183 190 184 191 /* Build wide character buffer PSTR->WCS. … … 194 201 195 202 static void 196 internal_function197 203 build_wcs_buffer (re_string_t *pstr) 198 204 { 199 205 #ifdef _LIBC 200 206 unsigned char buf[MB_LEN_MAX]; 201 assert(MB_LEN_MAX >= pstr->mb_cur_max);207 DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max); 202 208 #else 203 209 unsigned char buf[64]; 204 210 #endif 205 211 mbstate_t prev_st; 206 intbyte_idx, end_idx, remain_len;212 Idx byte_idx, end_idx, remain_len; 207 213 size_t mbclen; 208 214 … … 218 224 prev_st = pstr->cur_state; 219 225 /* Apply the translation if we need. */ 220 if ( BE (pstr->trans != NULL, 0))226 if (__glibc_unlikely (pstr->trans != NULL)) 221 227 { 222 228 int i, ch; … … 231 237 else 232 238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; 233 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); 234 if (BE (mbclen == (size_t) -2, 0)) 239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); 240 if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0 241 || (mbclen == (size_t) -2 242 && pstr->bufs_len >= pstr->len))) 243 { 244 /* We treat these cases as a singlebyte character. */ 245 mbclen = 1; 246 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; 247 if (__glibc_unlikely (pstr->trans != NULL)) 248 wc = pstr->trans[wc]; 249 pstr->cur_state = prev_st; 250 } 251 else if (__glibc_unlikely (mbclen == (size_t) -2)) 235 252 { 236 253 /* The buffer doesn't have enough space, finish to build. */ 237 254 pstr->cur_state = prev_st; 238 255 break; 239 }240 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))241 {242 /* We treat these cases as a singlebyte character. */243 mbclen = 1;244 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];245 if (BE (pstr->trans != NULL, 0))246 wc = pstr->trans[wc];247 pstr->cur_state = prev_st;248 256 } 249 257 … … 261 269 but for REG_ICASE. */ 262 270 263 static int264 internal_function 271 static reg_errcode_t 272 __attribute_warn_unused_result__ 265 273 build_wcs_upper_buffer (re_string_t *pstr) 266 274 { 267 275 mbstate_t prev_st; 268 intsrc_idx, byte_idx, end_idx, remain_len;276 Idx src_idx, byte_idx, end_idx, remain_len; 269 277 size_t mbclen; 270 278 #ifdef _LIBC 271 279 char buf[MB_LEN_MAX]; 272 assert (MB_LEN_MAX >= pstr->mb_cur_max);280 DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX); 273 281 #else 274 282 char buf[64]; … … 285 293 { 286 294 wchar_t wc; 287 288 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) 289 295 unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; 296 297 if (isascii (ch) && mbsinit (&pstr->cur_state)) 290 298 { 291 /* In case of a singlebyte character. */292 pstr->mbs[byte_idx]293 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);294 299 /* The next step uses the assumption that wchar_t is encoded 295 300 ASCII-safe: all ASCII values can be converted like this. */ 296 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; 297 ++byte_idx; 298 continue; 301 wchar_t wcu = __towupper (ch); 302 if (isascii (wcu)) 303 { 304 pstr->mbs[byte_idx] = wcu; 305 pstr->wcs[byte_idx] = wcu; 306 byte_idx++; 307 continue; 308 } 299 309 } 300 310 301 311 remain_len = end_idx - byte_idx; 302 312 prev_st = pstr->cur_state; 303 mbclen = mbrtowc (&wc,304 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx305 + byte_idx), remain_len, &pstr->cur_state);306 if ( BE (mbclen + 2 > 2, 1))313 mbclen = __mbrtowc (&wc, 314 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx 315 + byte_idx), remain_len, &pstr->cur_state); 316 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) 307 317 { 308 wchar_t wcu = wc;309 if ( iswlower (wc))318 wchar_t wcu = __towupper (wc); 319 if (wcu != wc) 310 320 { 311 321 size_t mbcdlen; 312 322 313 wcu = towupper (wc); 314 mbcdlen = wcrtomb (buf, wcu, &prev_st); 315 if (BE (mbclen == mbcdlen, 1)) 323 mbcdlen = __wcrtomb (buf, wcu, &prev_st); 324 if (__glibc_likely (mbclen == mbcdlen)) 316 325 memcpy (pstr->mbs + byte_idx, buf, mbclen); 317 326 else … … 329 338 pstr->wcs[byte_idx++] = WEOF; 330 339 } 331 else if (mbclen == (size_t) -1 || mbclen == 0) 340 else if (mbclen == (size_t) -1 || mbclen == 0 341 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) 332 342 { 333 /* It is an invalid character or '\0'. Just use the byte. */334 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];343 /* It is an invalid character, an incomplete character 344 at the end of the string, or '\0'. Just use the byte. */ 335 345 pstr->mbs[byte_idx] = ch; 336 346 /* And also cast it to wide char. */ 337 347 pstr->wcs[byte_idx++] = (wchar_t) ch; 338 if ( BE (mbclen == (size_t) -1, 0))348 if (__glibc_unlikely (mbclen == (size_t) -1)) 339 349 pstr->cur_state = prev_st; 340 350 } … … 358 368 remain_len = end_idx - byte_idx; 359 369 prev_st = pstr->cur_state; 360 if ( BE (pstr->trans != NULL, 0))370 if (__glibc_unlikely (pstr->trans != NULL)) 361 371 { 362 372 int i, ch; … … 371 381 else 372 382 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; 373 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);374 if ( BE (mbclen + 2 > 2, 1))383 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); 384 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) 375 385 { 376 wchar_t wcu = wc;377 if ( iswlower (wc))386 wchar_t wcu = __towupper (wc); 387 if (wcu != wc) 378 388 { 379 389 size_t mbcdlen; 380 390 381 wcu = towupper (wc); 382 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); 383 if (BE (mbclen == mbcdlen, 1)) 391 mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st); 392 if (__glibc_likely (mbclen == mbcdlen)) 384 393 memcpy (pstr->mbs + byte_idx, buf, mbclen); 385 394 else if (mbcdlen != (size_t) -1) … … 395 404 if (pstr->offsets == NULL) 396 405 { 397 pstr->offsets = re_malloc ( int, pstr->bufs_len);406 pstr->offsets = re_malloc (Idx, pstr->bufs_len); 398 407 399 408 if (pstr->offsets == NULL) … … 425 434 continue; 426 435 } 427 428 436 else 437 memcpy (pstr->mbs + byte_idx, p, mbclen); 429 438 } 430 439 else 431 440 memcpy (pstr->mbs + byte_idx, p, mbclen); 432 441 433 if ( BE (pstr->offsets_needed != 0,0))442 if (__glibc_unlikely (pstr->offsets_needed != 0)) 434 443 { 435 444 size_t i; … … 444 453 pstr->wcs[byte_idx++] = WEOF; 445 454 } 446 else if (mbclen == (size_t) -1 || mbclen == 0) 455 else if (mbclen == (size_t) -1 || mbclen == 0 456 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) 447 457 { 448 458 /* It is an invalid character or '\0'. Just use the byte. */ 449 459 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; 450 460 451 if ( BE (pstr->trans != NULL, 0))461 if (__glibc_unlikely (pstr->trans != NULL)) 452 462 ch = pstr->trans [ch]; 453 463 pstr->mbs[byte_idx] = ch; 454 464 455 if ( BE (pstr->offsets_needed != 0,0))465 if (__glibc_unlikely (pstr->offsets_needed != 0)) 456 466 pstr->offsets[byte_idx] = src_idx; 457 467 ++src_idx; … … 459 469 /* And also cast it to wide char. */ 460 470 pstr->wcs[byte_idx++] = (wchar_t) ch; 461 if ( BE (mbclen == (size_t) -1, 0))471 if (__glibc_unlikely (mbclen == (size_t) -1)) 462 472 pstr->cur_state = prev_st; 463 473 } … … 477 487 Return the index. */ 478 488 479 static int 480 internal_function 481 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) 489 static Idx 490 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) 482 491 { 483 492 mbstate_t prev_st; 484 intrawbuf_idx;493 Idx rawbuf_idx; 485 494 size_t mbclen; 486 w char_t wc = 0;495 wint_t wc = WEOF; 487 496 488 497 /* Skip the characters which are not necessary to check. */ … … 490 499 rawbuf_idx < new_raw_idx;) 491 500 { 492 int remain_len;493 remain_len = pstr->len - rawbuf_idx;501 wchar_t wc2; 502 Idx remain_len = pstr->raw_len - rawbuf_idx; 494 503 prev_st = pstr->cur_state; 495 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, 496 remain_len, &pstr->cur_state); 497 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) 498 { 499 /* We treat these cases as a singlebyte character. */ 504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, 505 remain_len, &pstr->cur_state); 506 if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1 507 || mbclen == 0)) 508 { 509 /* We treat these cases as a single byte character. */ 510 if (mbclen == 0 || remain_len == 0) 511 wc = L'\0'; 512 else 513 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); 500 514 mbclen = 1; 501 515 pstr->cur_state = prev_st; 502 516 } 517 else 518 wc = wc2; 503 519 /* Then proceed the next character. */ 504 520 rawbuf_idx += mbclen; 505 521 } 506 *last_wc = (wint_t)wc;522 *last_wc = wc; 507 523 return rawbuf_idx; 508 524 } 509 #endif /* RE_ENABLE_I18N */510 525 511 526 /* Build the buffer PSTR->MBS, and apply the translation if we need. … … 513 528 514 529 static void 515 internal_function516 530 build_upper_buffer (re_string_t *pstr) 517 531 { 518 intchar_idx, end_idx;532 Idx char_idx, end_idx; 519 533 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; 520 534 … … 522 536 { 523 537 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; 524 if ( BE (pstr->trans != NULL, 0))538 if (__glibc_unlikely (pstr->trans != NULL)) 525 539 ch = pstr->trans[ch]; 526 if (islower (ch)) 527 pstr->mbs[char_idx] = toupper (ch); 528 else 529 pstr->mbs[char_idx] = ch; 540 pstr->mbs[char_idx] = toupper (ch); 530 541 } 531 542 pstr->valid_len = char_idx; … … 536 547 537 548 static void 538 internal_function539 549 re_string_translate_buffer (re_string_t *pstr) 540 550 { 541 intbuf_idx, end_idx;551 Idx buf_idx, end_idx; 542 552 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; 543 553 … … 557 567 558 568 static reg_errcode_t 559 internal_function 560 re_string_reconstruct (re_string_t *pstr, int idx, int eflags) 561 { 562 int offset = idx - pstr->raw_mbs_idx; 563 if (BE (offset < 0, 0)) 569 __attribute_warn_unused_result__ 570 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) 571 { 572 Idx offset; 573 574 if (__glibc_unlikely (pstr->raw_mbs_idx <= idx)) 575 offset = idx - pstr->raw_mbs_idx; 576 else 564 577 { 565 578 /* Reset buffer. */ 566 #ifdef RE_ENABLE_I18N567 579 if (pstr->mb_cur_max > 1) 568 580 memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); 569 #endif /* RE_ENABLE_I18N */570 581 pstr->len = pstr->raw_len; 571 582 pstr->stop = pstr->raw_stop; … … 581 592 } 582 593 583 if (BE (offset != 0, 1)) 584 { 585 /* Are the characters which are already checked remain? */ 586 if (BE (offset < pstr->valid_raw_len, 1) 587 #ifdef RE_ENABLE_I18N 588 /* Handling this would enlarge the code too much. 589 Accept a slowdown in that case. */ 590 && pstr->offsets_needed == 0 591 #endif 592 ) 594 if (__glibc_likely (offset != 0)) 595 { 596 /* Should the already checked characters be kept? */ 597 if (__glibc_likely (offset < pstr->valid_raw_len)) 593 598 { 594 599 /* Yes, move them to the front of the buffer. */ 595 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags); 596 #ifdef RE_ENABLE_I18N 597 if (pstr->mb_cur_max > 1) 598 memmove (pstr->wcs, pstr->wcs + offset, 599 (pstr->valid_len - offset) * sizeof (wint_t)); 600 #endif /* RE_ENABLE_I18N */ 601 if (BE (pstr->mbs_allocated, 0)) 602 memmove (pstr->mbs, pstr->mbs + offset, 603 pstr->valid_len - offset); 604 pstr->valid_len -= offset; 605 pstr->valid_raw_len -= offset; 606 #if DEBUG 607 assert (pstr->valid_len > 0); 608 #endif 600 if (__glibc_unlikely (pstr->offsets_needed)) 601 { 602 Idx low = 0, high = pstr->valid_len, mid; 603 do 604 { 605 mid = (high + low) / 2; 606 if (pstr->offsets[mid] > offset) 607 high = mid; 608 else if (pstr->offsets[mid] < offset) 609 low = mid + 1; 610 else 611 break; 612 } 613 while (low < high); 614 if (pstr->offsets[mid] < offset) 615 ++mid; 616 pstr->tip_context = re_string_context_at (pstr, mid - 1, 617 eflags); 618 /* This can be quite complicated, so handle specially 619 only the common and easy case where the character with 620 different length representation of lower and upper 621 case is present at or after offset. */ 622 if (pstr->valid_len > offset 623 && mid == offset && pstr->offsets[mid] == offset) 624 { 625 memmove (pstr->wcs, pstr->wcs + offset, 626 (pstr->valid_len - offset) * sizeof (wint_t)); 627 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); 628 pstr->valid_len -= offset; 629 pstr->valid_raw_len -= offset; 630 for (low = 0; low < pstr->valid_len; low++) 631 pstr->offsets[low] = pstr->offsets[low + offset] - offset; 632 } 633 else 634 { 635 /* Otherwise, just find out how long the partial multibyte 636 character at offset is and fill it with WEOF/255. */ 637 pstr->len = pstr->raw_len - idx + offset; 638 pstr->stop = pstr->raw_stop - idx + offset; 639 pstr->offsets_needed = 0; 640 while (mid > 0 && pstr->offsets[mid - 1] == offset) 641 --mid; 642 while (mid < pstr->valid_len) 643 if (pstr->wcs[mid] != WEOF) 644 break; 645 else 646 ++mid; 647 if (mid == pstr->valid_len) 648 pstr->valid_len = 0; 649 else 650 { 651 pstr->valid_len = pstr->offsets[mid] - offset; 652 if (pstr->valid_len) 653 { 654 for (low = 0; low < pstr->valid_len; ++low) 655 pstr->wcs[low] = WEOF; 656 memset (pstr->mbs, 255, pstr->valid_len); 657 } 658 } 659 pstr->valid_raw_len = pstr->valid_len; 660 } 661 } 662 else 663 { 664 pstr->tip_context = re_string_context_at (pstr, offset - 1, 665 eflags); 666 if (pstr->mb_cur_max > 1) 667 memmove (pstr->wcs, pstr->wcs + offset, 668 (pstr->valid_len - offset) * sizeof (wint_t)); 669 if (__glibc_unlikely (pstr->mbs_allocated)) 670 memmove (pstr->mbs, pstr->mbs + offset, 671 pstr->valid_len - offset); 672 pstr->valid_len -= offset; 673 pstr->valid_raw_len -= offset; 674 DEBUG_ASSERT (pstr->valid_len > 0); 675 } 609 676 } 610 677 else 611 678 { 612 679 /* No, skip all characters until IDX. */ 613 #ifdef RE_ENABLE_I18N 614 if (BE (pstr->offsets_needed, 0)) 680 Idx prev_valid_len = pstr->valid_len; 681 682 if (__glibc_unlikely (pstr->offsets_needed)) 615 683 { 616 684 pstr->len = pstr->raw_len - idx + offset; … … 618 686 pstr->offsets_needed = 0; 619 687 } 620 #endif621 688 pstr->valid_len = 0; 622 pstr->valid_raw_len = 0;623 #ifdef RE_ENABLE_I18N624 689 if (pstr->mb_cur_max > 1) 625 690 { 626 intwcs_idx;691 Idx wcs_idx; 627 692 wint_t wc = WEOF; 628 693 629 694 if (pstr->is_utf8) 630 695 { 631 const unsigned char *raw, *p, * q, *end;696 const unsigned char *raw, *p, *end; 632 697 633 698 /* Special case UTF-8. Multi-byte chars start with any … … 635 700 raw = pstr->raw_mbs + pstr->raw_mbs_idx; 636 701 end = raw + (offset - pstr->mb_cur_max); 702 if (end < pstr->raw_mbs) 703 end = pstr->raw_mbs; 637 704 p = raw + offset - 1; 638 705 #ifdef _LIBC 639 706 /* We know the wchar_t encoding is UCS4, so for the simple 640 707 case, ASCII characters, skip the conversion step. */ 641 if (isascii (*p) && BE (pstr->trans == NULL, 1))708 if (isascii (*p) && __glibc_likely (pstr->trans == NULL)) 642 709 { 643 710 memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); 644 pstr->valid_len = 0;711 /* pstr->valid_len = 0; */ 645 712 wc = (wchar_t) *p; 646 713 } … … 652 719 mbstate_t cur_state; 653 720 wchar_t wc2; 654 intmlen = raw + pstr->len - p;721 Idx mlen = raw + pstr->len - p; 655 722 unsigned char buf[6]; 656 723 size_t mbclen; 657 724 658 q= p;659 if ( BE (pstr->trans != NULL, 0))725 const unsigned char *pp = p; 726 if (__glibc_unlikely (pstr->trans != NULL)) 660 727 { 661 728 int i = mlen < 6 ? mlen : 6; 662 729 while (--i >= 0) 663 730 buf[i] = pstr->trans[p[i]]; 664 q= buf;731 pp = buf; 665 732 } 666 733 /* XXX Don't use mbrtowc, we know which conversion 667 734 to use (UTF-8 -> UCS4). */ 668 735 memset (&cur_state, 0, sizeof (cur_state)); 669 mbclen = mbrtowc (&wc2, (const char *)p, mlen,670 &cur_state);736 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen, 737 &cur_state); 671 738 if (raw + offset - p <= mbclen 672 739 && mbclen < (size_t) -2) … … 683 750 if (wc == WEOF) 684 751 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; 685 if (BE (pstr->valid_len, 0)) 752 if (wc == WEOF) 753 pstr->tip_context 754 = re_string_context_at (pstr, prev_valid_len - 1, eflags); 755 else 756 pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0) 757 && IS_WIDE_WORD_CHAR (wc)) 758 ? CONTEXT_WORD 759 : ((IS_WIDE_NEWLINE (wc) 760 && pstr->newline_anchor) 761 ? CONTEXT_NEWLINE : 0)); 762 if (__glibc_unlikely (pstr->valid_len)) 686 763 { 687 764 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) … … 691 768 } 692 769 pstr->valid_raw_len = pstr->valid_len; 693 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)694 && IS_WIDE_WORD_CHAR (wc))695 ? CONTEXT_WORD696 : ((IS_WIDE_NEWLINE (wc)697 && pstr->newline_anchor)698 ? CONTEXT_NEWLINE : 0));699 770 } 700 771 else 701 #endif /* RE_ENABLE_I18N */702 772 { 703 773 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; 774 pstr->valid_raw_len = 0; 704 775 if (pstr->trans) 705 776 c = pstr->trans[c]; … … 710 781 } 711 782 } 712 if (! BE (pstr->mbs_allocated, 0))783 if (!__glibc_unlikely (pstr->mbs_allocated)) 713 784 pstr->mbs += offset; 714 785 } … … 718 789 719 790 /* Then build the buffers. */ 720 #ifdef RE_ENABLE_I18N721 791 if (pstr->mb_cur_max > 1) 722 792 { 723 793 if (pstr->icase) 724 794 { 725 int ret = build_wcs_upper_buffer (pstr);726 if ( BE (ret != REG_NOERROR, 0))795 reg_errcode_t ret = build_wcs_upper_buffer (pstr); 796 if (__glibc_unlikely (ret != REG_NOERROR)) 727 797 return ret; 728 798 } … … 731 801 } 732 802 else 733 #endif /* RE_ENABLE_I18N */ 734 if (BE (pstr->mbs_allocated, 0)) 803 if (__glibc_unlikely (pstr->mbs_allocated)) 735 804 { 736 805 if (pstr->icase) … … 747 816 748 817 static unsigned char 749 internal_function __attribute ((pure)) 750 re_string_peek_byte_case (const re_string_t *pstr, int idx) 751 { 752 int ch, off; 818 __attribute__ ((pure)) 819 re_string_peek_byte_case (const re_string_t *pstr, Idx idx) 820 { 821 int ch; 822 Idx off; 753 823 754 824 /* Handle the common (easiest) cases first. */ 755 if ( BE (!pstr->mbs_allocated, 1))825 if (__glibc_likely (!pstr->mbs_allocated)) 756 826 return re_string_peek_byte (pstr, idx); 757 827 758 #ifdef RE_ENABLE_I18N759 828 if (pstr->mb_cur_max > 1 760 829 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) 761 830 return re_string_peek_byte (pstr, idx); 762 #endif763 831 764 832 off = pstr->cur_idx + idx; 765 #ifdef RE_ENABLE_I18N766 833 if (pstr->offsets_needed) 767 834 off = pstr->offsets[off]; 768 #endif769 835 770 836 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; 771 837 772 #ifdef RE_ENABLE_I18N773 838 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I 774 839 this function returns CAPITAL LETTER I instead of first byte of … … 777 842 if (pstr->offsets_needed && !isascii (ch)) 778 843 return re_string_peek_byte (pstr, idx); 779 #endif780 844 781 845 return ch; … … 783 847 784 848 static unsigned char 785 internal_function786 849 re_string_fetch_byte_case (re_string_t *pstr) 787 850 { 788 if ( BE (!pstr->mbs_allocated, 1))851 if (__glibc_likely (!pstr->mbs_allocated)) 789 852 return re_string_fetch_byte (pstr); 790 853 791 #ifdef RE_ENABLE_I18N792 854 if (pstr->offsets_needed) 793 855 { 794 int off, ch; 856 Idx off; 857 int ch; 795 858 796 859 /* For tr_TR.UTF-8 [[:islower:]] there is … … 814 877 return ch; 815 878 } 816 #endif817 879 818 880 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; … … 820 882 821 883 static void 822 internal_function823 884 re_string_destruct (re_string_t *pstr) 824 885 { 825 #ifdef RE_ENABLE_I18N826 886 re_free (pstr->wcs); 827 887 re_free (pstr->offsets); 828 #endif /* RE_ENABLE_I18N */829 888 if (pstr->mbs_allocated) 830 889 re_free (pstr->mbs); … … 834 893 835 894 static unsigned int 836 internal_function 837 re_string_context_at (const re_string_t *input, int idx, int eflags) 895 re_string_context_at (const re_string_t *input, Idx idx, int eflags) 838 896 { 839 897 int c; 840 if ( BE (idx < 0,0))898 if (__glibc_unlikely (idx < 0)) 841 899 /* In this case, we use the value stored in input->tip_context, 842 900 since we can't know the character in input->mbs[-1] here. */ 843 901 return input->tip_context; 844 if ( BE (idx == input->len, 0))902 if (__glibc_unlikely (idx == input->len)) 845 903 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF 846 904 : CONTEXT_NEWLINE | CONTEXT_ENDBUF); 847 #ifdef RE_ENABLE_I18N848 905 if (input->mb_cur_max > 1) 849 906 { 850 907 wint_t wc; 851 intwc_idx = idx;908 Idx wc_idx = idx; 852 909 while(input->wcs[wc_idx] == WEOF) 853 910 { 854 #ifdef DEBUG 855 /* It must not happen. */ 856 assert (wc_idx >= 0); 857 #endif 911 DEBUG_ASSERT (wc_idx >= 0); 858 912 --wc_idx; 859 913 if (wc_idx < 0) … … 861 915 } 862 916 wc = input->wcs[wc_idx]; 863 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) 917 if (__glibc_unlikely (input->word_ops_used != 0) 918 && IS_WIDE_WORD_CHAR (wc)) 864 919 return CONTEXT_WORD; 865 920 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor … … 867 922 } 868 923 else 869 #endif870 924 { 871 925 c = re_string_byte_at (input, idx); … … 880 934 881 935 static reg_errcode_t 882 internal_function 883 re_node_set_alloc (re_node_set *set, intsize)936 __attribute_warn_unused_result__ 937 re_node_set_alloc (re_node_set *set, Idx size) 884 938 { 885 939 set->alloc = size; 886 940 set->nelem = 0; 887 set->elems = re_malloc (int, size); 888 if (BE (set->elems == NULL, 0)) 941 set->elems = re_malloc (Idx, size); 942 if (__glibc_unlikely (set->elems == NULL) 943 && (MALLOC_0_IS_NONNULL || size != 0)) 889 944 return REG_ESPACE; 890 945 return REG_NOERROR; … … 892 947 893 948 static reg_errcode_t 894 internal_function 895 re_node_set_init_1 (re_node_set *set, intelem)949 __attribute_warn_unused_result__ 950 re_node_set_init_1 (re_node_set *set, Idx elem) 896 951 { 897 952 set->alloc = 1; 898 953 set->nelem = 1; 899 set->elems = re_malloc ( int, 1);900 if ( BE (set->elems == NULL, 0))954 set->elems = re_malloc (Idx, 1); 955 if (__glibc_unlikely (set->elems == NULL)) 901 956 { 902 957 set->alloc = set->nelem = 0; … … 908 963 909 964 static reg_errcode_t 910 internal_function 911 re_node_set_init_2 (re_node_set *set, int elem1, intelem2)965 __attribute_warn_unused_result__ 966 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2) 912 967 { 913 968 set->alloc = 2; 914 set->elems = re_malloc ( int, 2);915 if ( BE (set->elems == NULL, 0))969 set->elems = re_malloc (Idx, 2); 970 if (__glibc_unlikely (set->elems == NULL)) 916 971 return REG_ESPACE; 917 972 if (elem1 == elem2) … … 938 993 939 994 static reg_errcode_t 940 internal_function 995 __attribute_warn_unused_result__ 941 996 re_node_set_init_copy (re_node_set *dest, const re_node_set *src) 942 997 { … … 945 1000 { 946 1001 dest->alloc = dest->nelem; 947 dest->elems = re_malloc ( int, dest->alloc);948 if ( BE (dest->elems == NULL, 0))1002 dest->elems = re_malloc (Idx, dest->alloc); 1003 if (__glibc_unlikely (dest->elems == NULL)) 949 1004 { 950 1005 dest->alloc = dest->nelem = 0; 951 1006 return REG_ESPACE; 952 1007 } 953 memcpy (dest->elems, src->elems, src->nelem * sizeof ( int));1008 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); 954 1009 } 955 1010 else … … 963 1018 964 1019 static reg_errcode_t 965 internal_function 1020 __attribute_warn_unused_result__ 966 1021 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, 967 1022 const re_node_set *src2) 968 1023 { 969 inti1, i2, is, id, delta, sbase;1024 Idx i1, i2, is, id, delta, sbase; 970 1025 if (src1->nelem == 0 || src2->nelem == 0) 971 1026 return REG_NOERROR; … … 975 1030 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) 976 1031 { 977 intnew_alloc = src1->nelem + src2->nelem + dest->alloc;978 int *new_elems = re_realloc (dest->elems, int, new_alloc);979 if ( BE (new_elems == NULL, 0))980 1032 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc; 1033 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc); 1034 if (__glibc_unlikely (new_elems == NULL)) 1035 return REG_ESPACE; 981 1036 dest->elems = new_elems; 982 1037 dest->alloc = new_alloc; … … 997 1052 --id; 998 1053 999 1054 if (id < 0 || dest->elems[id] != src1->elems[i1]) 1000 1055 dest->elems[--sbase] = src1->elems[i1]; 1001 1056 … … 1028 1083 for (;;) 1029 1084 { 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1085 if (dest->elems[is] > dest->elems[id]) 1086 { 1087 /* Copy from the top. */ 1088 dest->elems[id + delta--] = dest->elems[is--]; 1089 if (delta == 0) 1090 break; 1091 } 1092 else 1093 { 1094 /* Slide from the bottom. */ 1095 dest->elems[id + delta] = dest->elems[id]; 1096 if (--id < 0) 1097 break; 1098 } 1044 1099 } 1045 1100 1046 1101 /* Copy remaining SRC elements. */ 1047 memcpy (dest->elems, dest->elems + sbase, delta * sizeof ( int));1102 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx)); 1048 1103 1049 1104 return REG_NOERROR; … … 1054 1109 1055 1110 static reg_errcode_t 1056 internal_function 1111 __attribute_warn_unused_result__ 1057 1112 re_node_set_init_union (re_node_set *dest, const re_node_set *src1, 1058 1113 const re_node_set *src2) 1059 1114 { 1060 inti1, i2, id;1115 Idx i1, i2, id; 1061 1116 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) 1062 1117 { 1063 1118 dest->alloc = src1->nelem + src2->nelem; 1064 dest->elems = re_malloc ( int, dest->alloc);1065 if ( BE (dest->elems == NULL, 0))1119 dest->elems = re_malloc (Idx, dest->alloc); 1120 if (__glibc_unlikely (dest->elems == NULL)) 1066 1121 return REG_ESPACE; 1067 1122 } … … 1090 1145 { 1091 1146 memcpy (dest->elems + id, src1->elems + i1, 1092 (src1->nelem - i1) * sizeof ( int));1147 (src1->nelem - i1) * sizeof (Idx)); 1093 1148 id += src1->nelem - i1; 1094 1149 } … … 1096 1151 { 1097 1152 memcpy (dest->elems + id, src2->elems + i2, 1098 (src2->nelem - i2) * sizeof ( int));1153 (src2->nelem - i2) * sizeof (Idx)); 1099 1154 id += src2->nelem - i2; 1100 1155 } … … 1107 1162 1108 1163 static reg_errcode_t 1109 internal_function 1164 __attribute_warn_unused_result__ 1110 1165 re_node_set_merge (re_node_set *dest, const re_node_set *src) 1111 1166 { 1112 intis, id, sbase, delta;1167 Idx is, id, sbase, delta; 1113 1168 if (src == NULL || src->nelem == 0) 1114 1169 return REG_NOERROR; 1115 1170 if (dest->alloc < 2 * src->nelem + dest->nelem) 1116 1171 { 1117 intnew_alloc = 2 * (src->nelem + dest->alloc);1118 int *new_buffer = re_realloc (dest->elems, int, new_alloc);1119 if ( BE (new_buffer == NULL, 0))1172 Idx new_alloc = 2 * (src->nelem + dest->alloc); 1173 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc); 1174 if (__glibc_unlikely (new_buffer == NULL)) 1120 1175 return REG_ESPACE; 1121 1176 dest->elems = new_buffer; … … 1123 1178 } 1124 1179 1125 if (BE (dest->nelem == 0, 0)) 1126 { 1180 if (__glibc_unlikely (dest->nelem == 0)) 1181 { 1182 /* Although we already guaranteed above that dest->alloc != 0 and 1183 therefore dest->elems != NULL, add a debug assertion to pacify 1184 GCC 11.2.1's -fanalyzer. */ 1185 DEBUG_ASSERT (dest->elems); 1127 1186 dest->nelem = src->nelem; 1128 memcpy (dest->elems, src->elems, src->nelem * sizeof ( int));1187 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); 1129 1188 return REG_NOERROR; 1130 1189 } … … 1136 1195 { 1137 1196 if (dest->elems[id] == src->elems[is]) 1138 1197 is--, id--; 1139 1198 else if (dest->elems[id] < src->elems[is]) 1140 1199 dest->elems[--sbase] = src->elems[is--]; 1141 1200 else /* if (dest->elems[id] > src->elems[is]) */ 1142 1201 --id; 1143 1202 } 1144 1203 … … 1147 1206 /* If DEST is exhausted, the remaining items of SRC must be unique. */ 1148 1207 sbase -= is + 1; 1149 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof ( int));1208 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx)); 1150 1209 } 1151 1210 … … 1162 1221 { 1163 1222 if (dest->elems[is] > dest->elems[id]) 1164 1223 { 1165 1224 /* Copy from the top. */ 1166 1225 dest->elems[id + delta--] = dest->elems[is--]; 1167 1226 if (delta == 0) 1168 1227 break; 1169 1228 } 1170 1229 else 1171 1172 1173 1230 { 1231 /* Slide from the bottom. */ 1232 dest->elems[id + delta] = dest->elems[id]; 1174 1233 if (--id < 0) 1175 1234 { 1176 1235 /* Copy remaining SRC elements. */ 1177 1236 memcpy (dest->elems, dest->elems + sbase, 1178 delta * sizeof (int));1237 delta * sizeof (Idx)); 1179 1238 break; 1180 1239 } … … 1187 1246 /* Insert the new element ELEM to the re_node_set* SET. 1188 1247 SET should not already have ELEM. 1189 return -1 if an error is occured, return 1 otherwise. */1190 1191 static int1192 internal_function 1193 re_node_set_insert (re_node_set *set, intelem)1194 { 1195 intidx;1248 Return true if successful. */ 1249 1250 static bool 1251 __attribute_warn_unused_result__ 1252 re_node_set_insert (re_node_set *set, Idx elem) 1253 { 1254 Idx idx; 1196 1255 /* In case the set is empty. */ 1197 1256 if (set->alloc == 0) 1198 { 1199 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) 1200 return 1; 1201 else 1202 return -1; 1203 } 1204 1205 if (BE (set->nelem, 0) == 0) 1206 { 1207 /* We already guaranteed above that set->alloc != 0. */ 1257 return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR); 1258 1259 if (__glibc_unlikely (set->nelem) == 0) 1260 { 1261 /* Although we already guaranteed above that set->alloc != 0 and 1262 therefore set->elems != NULL, add a debug assertion to pacify 1263 GCC 11.2 -fanalyzer. */ 1264 DEBUG_ASSERT (set->elems); 1208 1265 set->elems[0] = elem; 1209 1266 ++set->nelem; 1210 return 1;1267 return true; 1211 1268 } 1212 1269 … … 1214 1271 if (set->alloc == set->nelem) 1215 1272 { 1216 int*new_elems;1273 Idx *new_elems; 1217 1274 set->alloc = set->alloc * 2; 1218 new_elems = re_realloc (set->elems, int, set->alloc);1219 if ( BE (new_elems == NULL, 0))1220 return -1;1275 new_elems = re_realloc (set->elems, Idx, set->alloc); 1276 if (__glibc_unlikely (new_elems == NULL)) 1277 return false; 1221 1278 set->elems = new_elems; 1222 1279 } … … 1226 1283 if (elem < set->elems[0]) 1227 1284 { 1228 idx = 0;1229 1285 for (idx = set->nelem; idx > 0; idx--) 1230 1286 set->elems[idx] = set->elems[idx - 1]; 1231 1287 } 1232 1288 else 1233 1289 { 1234 1290 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) 1235 set->elems[idx] = set->elems[idx - 1]; 1291 set->elems[idx] = set->elems[idx - 1]; 1292 DEBUG_ASSERT (set->elems[idx - 1] < elem); 1236 1293 } 1237 1294 … … 1239 1296 set->elems[idx] = elem; 1240 1297 ++set->nelem; 1241 return 1;1298 return true; 1242 1299 } 1243 1300 1244 1301 /* Insert the new element ELEM to the re_node_set* SET. 1245 1302 SET should not already have any element greater than or equal to ELEM. 1246 Return -1 if an error is occured, return 1 otherwise. */1247 1248 static int1249 internal_function 1250 re_node_set_insert_last (re_node_set *set, intelem)1303 Return true if successful. */ 1304 1305 static bool 1306 __attribute_warn_unused_result__ 1307 re_node_set_insert_last (re_node_set *set, Idx elem) 1251 1308 { 1252 1309 /* Realloc if we need. */ 1253 1310 if (set->alloc == set->nelem) 1254 1311 { 1255 int*new_elems;1312 Idx *new_elems; 1256 1313 set->alloc = (set->alloc + 1) * 2; 1257 new_elems = re_realloc (set->elems, int, set->alloc);1258 if ( BE (new_elems == NULL, 0))1259 return -1;1314 new_elems = re_realloc (set->elems, Idx, set->alloc); 1315 if (__glibc_unlikely (new_elems == NULL)) 1316 return false; 1260 1317 set->elems = new_elems; 1261 1318 } … … 1263 1320 /* Insert the new element. */ 1264 1321 set->elems[set->nelem++] = elem; 1265 return 1;1322 return true; 1266 1323 } 1267 1324 1268 1325 /* Compare two node sets SET1 and SET2. 1269 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */1270 1271 static int1272 internal_function __attribute((pure))1326 Return true if SET1 and SET2 are equivalent. */ 1327 1328 static bool 1329 __attribute__ ((pure)) 1273 1330 re_node_set_compare (const re_node_set *set1, const re_node_set *set2) 1274 1331 { 1275 inti;1332 Idx i; 1276 1333 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) 1277 return 0;1334 return false; 1278 1335 for (i = set1->nelem ; --i >= 0 ; ) 1279 1336 if (set1->elems[i] != set2->elems[i]) 1280 return 0;1281 return 1;1337 return false; 1338 return true; 1282 1339 } 1283 1340 1284 1341 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ 1285 1342 1286 static int1287 internal_function __attribute((pure))1288 re_node_set_contains (const re_node_set *set, intelem)1289 { 1290 unsigned int idx, right, mid;1343 static Idx 1344 __attribute__ ((pure)) 1345 re_node_set_contains (const re_node_set *set, Idx elem) 1346 { 1347 __re_size_t idx, right, mid; 1291 1348 if (set->nelem <= 0) 1292 1349 return 0; … … 1307 1364 1308 1365 static void 1309 internal_function 1310 re_node_set_remove_at (re_node_set *set, int idx) 1366 re_node_set_remove_at (re_node_set *set, Idx idx) 1311 1367 { 1312 1368 if (idx < 0 || idx >= set->nelem) … … 1320 1376 1321 1377 /* Add the token TOKEN to dfa->nodes, and return the index of the token. 1322 Or return -1, if an error will be occured. */ 1323 1324 static int 1325 internal_function 1378 Or return -1 if an error occurred. */ 1379 1380 static Idx 1326 1381 re_dfa_add_node (re_dfa_t *dfa, re_token_t token) 1327 1382 { 1328 int type = token.type; 1329 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) 1383 if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc)) 1330 1384 { 1331 1385 size_t new_nodes_alloc = dfa->nodes_alloc * 2; 1332 int*new_nexts, *new_indices;1386 Idx *new_nexts, *new_indices; 1333 1387 re_node_set *new_edests, *new_eclosures; 1334 1388 re_token_t *new_nodes; 1335 1389 1336 /* Avoid overflows. */ 1337 if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) 1390 /* Avoid overflows in realloc. */ 1391 const size_t max_object_size = MAX (sizeof (re_token_t), 1392 MAX (sizeof (re_node_set), 1393 sizeof (Idx))); 1394 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) 1395 < new_nodes_alloc)) 1338 1396 return -1; 1339 1397 1340 1398 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); 1341 if ( BE (new_nodes == NULL, 0))1399 if (__glibc_unlikely (new_nodes == NULL)) 1342 1400 return -1; 1343 1401 dfa->nodes = new_nodes; 1344 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); 1345 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); 1402 dfa->nodes_alloc = new_nodes_alloc; 1403 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); 1404 if (new_nexts != NULL) 1405 dfa->nexts = new_nexts; 1406 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); 1407 if (new_indices != NULL) 1408 dfa->org_indices = new_indices; 1346 1409 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); 1410 if (new_edests != NULL) 1411 dfa->edests = new_edests; 1347 1412 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); 1348 if (BE (new_nexts == NULL || new_indices == NULL 1349 || new_edests == NULL || new_eclosures == NULL, 0)) 1413 if (new_eclosures != NULL) 1414 dfa->eclosures = new_eclosures; 1415 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL 1416 || new_edests == NULL || new_eclosures == NULL)) 1350 1417 return -1; 1351 dfa->nexts = new_nexts;1352 dfa->org_indices = new_indices;1353 dfa->edests = new_edests;1354 dfa->eclosures = new_eclosures;1355 dfa->nodes_alloc = new_nodes_alloc;1356 1418 } 1357 1419 dfa->nodes[dfa->nodes_len] = token; 1358 1420 dfa->nodes[dfa->nodes_len].constraint = 0; 1359 #ifdef RE_ENABLE_I18N1360 1421 dfa->nodes[dfa->nodes_len].accept_mb = 1361 ( type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;1362 #endif 1422 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) 1423 || token.type == COMPLEX_BRACKET); 1363 1424 dfa->nexts[dfa->nodes_len] = -1; 1364 1425 re_node_set_init_empty (dfa->edests + dfa->nodes_len); … … 1367 1428 } 1368 1429 1369 static inline unsigned int 1370 internal_function 1430 static re_hashval_t 1371 1431 calc_state_hash (const re_node_set *nodes, unsigned int context) 1372 1432 { 1373 unsigned int hash = nodes->nelem + context;1374 inti;1433 re_hashval_t hash = nodes->nelem + context; 1434 Idx i; 1375 1435 for (i = 0 ; i < nodes->nelem ; i++) 1376 1436 hash += nodes->elems[i]; … … 1388 1448 1389 1449 static re_dfastate_t * 1390 internal_function 1450 __attribute_warn_unused_result__ 1391 1451 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, 1392 1452 const re_node_set *nodes) 1393 1453 { 1394 unsigned int hash;1454 re_hashval_t hash; 1395 1455 re_dfastate_t *new_state; 1396 1456 struct re_state_table_entry *spot; 1397 int i; 1398 if (BE (nodes->nelem == 0, 0)) 1457 Idx i; 1458 #if defined GCC_LINT || defined lint 1459 /* Suppress bogus uninitialized-variable warnings. */ 1460 *err = REG_NOERROR; 1461 #endif 1462 if (__glibc_unlikely (nodes->nelem == 0)) 1399 1463 { 1400 1464 *err = REG_NOERROR; … … 1415 1479 /* There are no appropriate state in the dfa, create the new one. */ 1416 1480 new_state = create_ci_newstate (dfa, nodes, hash); 1417 if ( BE (new_state == NULL, 0))1481 if (__glibc_unlikely (new_state == NULL)) 1418 1482 *err = REG_ESPACE; 1419 1483 … … 1432 1496 1433 1497 static re_dfastate_t * 1434 internal_function 1498 __attribute_warn_unused_result__ 1435 1499 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, 1436 1500 const re_node_set *nodes, unsigned int context) 1437 1501 { 1438 unsigned int hash;1502 re_hashval_t hash; 1439 1503 re_dfastate_t *new_state; 1440 1504 struct re_state_table_entry *spot; 1441 int i; 1505 Idx i; 1506 #if defined GCC_LINT || defined lint 1507 /* Suppress bogus uninitialized-variable warnings. */ 1508 *err = REG_NOERROR; 1509 #endif 1442 1510 if (nodes->nelem == 0) 1443 1511 { … … 1456 1524 return state; 1457 1525 } 1458 /* There are no appropriate state in `dfa', create the new one. */1526 /* There are no appropriate state in 'dfa', create the new one. */ 1459 1527 new_state = create_cd_newstate (dfa, nodes, context, hash); 1460 if ( BE (new_state == NULL, 0))1528 if (__glibc_unlikely (new_state == NULL)) 1461 1529 *err = REG_ESPACE; 1462 1530 … … 1469 1537 1470 1538 static reg_errcode_t 1539 __attribute_warn_unused_result__ 1471 1540 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, 1472 unsigned int hash)1541 re_hashval_t hash) 1473 1542 { 1474 1543 struct re_state_table_entry *spot; 1475 1544 reg_errcode_t err; 1476 inti;1545 Idx i; 1477 1546 1478 1547 newstate->hash = hash; 1479 1548 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); 1480 if ( BE (err != REG_NOERROR, 0))1549 if (__glibc_unlikely (err != REG_NOERROR)) 1481 1550 return REG_ESPACE; 1482 1551 for (i = 0; i < newstate->nodes.nelem; i++) 1483 1552 { 1484 intelem = newstate->nodes.elems[i];1553 Idx elem = newstate->nodes.elems[i]; 1485 1554 if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) 1486 re_node_set_insert_last (&newstate->non_eps_nodes, elem); 1555 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem)) 1556 return REG_ESPACE; 1487 1557 } 1488 1558 1489 1559 spot = dfa->state_table + (hash & dfa->state_hash_mask); 1490 if ( BE (spot->alloc <= spot->num, 0))1491 { 1492 intnew_alloc = 2 * spot->num + 2;1560 if (__glibc_unlikely (spot->alloc <= spot->num)) 1561 { 1562 Idx new_alloc = 2 * spot->num + 2; 1493 1563 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, 1494 1564 new_alloc); 1495 if ( BE (new_array == NULL, 0))1565 if (__glibc_unlikely (new_array == NULL)) 1496 1566 return REG_ESPACE; 1497 1567 spot->array = new_array; … … 1518 1588 } 1519 1589 1520 /* Create the new state which is independ of contexts.1590 /* Create the new state which is independent of contexts. 1521 1591 Return the new state if succeeded, otherwise return NULL. */ 1522 1592 1523 1593 static re_dfastate_t * 1524 internal_function 1594 __attribute_warn_unused_result__ 1525 1595 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, 1526 unsigned int hash)1527 { 1528 inti;1596 re_hashval_t hash) 1597 { 1598 Idx i; 1529 1599 reg_errcode_t err; 1530 1600 re_dfastate_t *newstate; 1531 1601 1532 1602 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); 1533 if ( BE (newstate == NULL, 0))1603 if (__glibc_unlikely (newstate == NULL)) 1534 1604 return NULL; 1535 1605 err = re_node_set_init_copy (&newstate->nodes, nodes); 1536 if ( BE (err != REG_NOERROR, 0))1606 if (__glibc_unlikely (err != REG_NOERROR)) 1537 1607 { 1538 1608 re_free (newstate); … … 1547 1617 if (type == CHARACTER && !node->constraint) 1548 1618 continue; 1549 #ifdef RE_ENABLE_I18N1550 1619 newstate->accept_mb |= node->accept_mb; 1551 #endif /* RE_ENABLE_I18N */1552 1620 1553 1621 /* If the state has the halt node, the state is a halt state. */ … … 1560 1628 } 1561 1629 err = register_state (dfa, newstate, hash); 1562 if ( BE (err != REG_NOERROR, 0))1630 if (__glibc_unlikely (err != REG_NOERROR)) 1563 1631 { 1564 1632 free_state (newstate); … … 1572 1640 1573 1641 static re_dfastate_t * 1574 internal_function 1642 __attribute_warn_unused_result__ 1575 1643 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, 1576 unsigned int context, unsigned int hash)1577 { 1578 inti, nctx_nodes = 0;1644 unsigned int context, re_hashval_t hash) 1645 { 1646 Idx i, nctx_nodes = 0; 1579 1647 reg_errcode_t err; 1580 1648 re_dfastate_t *newstate; 1581 1649 1582 1650 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); 1583 if ( BE (newstate == NULL, 0))1651 if (__glibc_unlikely (newstate == NULL)) 1584 1652 return NULL; 1585 1653 err = re_node_set_init_copy (&newstate->nodes, nodes); 1586 if ( BE (err != REG_NOERROR, 0))1654 if (__glibc_unlikely (err != REG_NOERROR)) 1587 1655 { 1588 1656 re_free (newstate); … … 1595 1663 for (i = 0 ; i < nodes->nelem ; i++) 1596 1664 { 1597 unsigned int constraint = 0;1598 1665 re_token_t *node = dfa->nodes + nodes->elems[i]; 1599 1666 re_token_type_t type = node->type; 1600 if (node->constraint) 1601 constraint = node->constraint; 1667 unsigned int constraint = node->constraint; 1602 1668 1603 1669 if (type == CHARACTER && !constraint) 1604 1670 continue; 1605 #ifdef RE_ENABLE_I18N1606 1671 newstate->accept_mb |= node->accept_mb; 1607 #endif /* RE_ENABLE_I18N */1608 1672 1609 1673 /* If the state has the halt node, the state is a halt state. */ … … 1612 1676 else if (type == OP_BACK_REF) 1613 1677 newstate->has_backref = 1; 1614 else if (type == ANCHOR)1615 constraint = node->opr.ctx_type;1616 1678 1617 1679 if (constraint) … … 1619 1681 if (newstate->entrance_nodes == &newstate->nodes) 1620 1682 { 1621 newstate->entrance_nodes = re_malloc (re_node_set, 1);1622 if ( BE (newstate->entrance_nodes == NULL, 0))1683 re_node_set *entrance_nodes = re_malloc (re_node_set, 1); 1684 if (__glibc_unlikely (entrance_nodes == NULL)) 1623 1685 { 1624 1686 free_state (newstate); 1625 1687 return NULL; 1626 1688 } 1627 re_node_set_init_copy (newstate->entrance_nodes, nodes); 1689 newstate->entrance_nodes = entrance_nodes; 1690 if (re_node_set_init_copy (newstate->entrance_nodes, nodes) 1691 != REG_NOERROR) 1692 { 1693 free_state (newstate); 1694 return NULL; 1695 } 1628 1696 nctx_nodes = 0; 1629 1697 newstate->has_constraint = 1; … … 1638 1706 } 1639 1707 err = register_state (dfa, newstate, hash); 1640 if ( BE (err != REG_NOERROR, 0))1708 if (__glibc_unlikely (err != REG_NOERROR)) 1641 1709 { 1642 1710 free_state (newstate); -
trunk/src/sed/lib/regex_internal.h
r2660 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 20 #ifndef _REGEX_INTERNAL_H 22 21 #define _REGEX_INTERNAL_H 1 23 22 24 #include <assert.h>25 23 #include <ctype.h> 26 24 #include <stdio.h> … … 28 26 #include <string.h> 29 27 30 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC 31 # include <langinfo.h> 32 #endif 33 #if defined HAVE_LOCALE_H || defined _LIBC 34 # include <locale.h> 35 #endif 36 #if defined HAVE_WCHAR_H || defined _LIBC 37 # include <wchar.h> 38 #endif /* HAVE_WCHAR_H || _LIBC */ 39 #if defined HAVE_WCTYPE_H || defined _LIBC 40 # include <wctype.h> 41 #endif /* HAVE_WCTYPE_H || _LIBC */ 42 #if defined HAVE_STDBOOL_H || defined _LIBC || defined(__HAIKU__) /* haiku hack */ 43 # include <stdbool.h> 44 #endif /* HAVE_STDBOOL_H || _LIBC */ 45 #if defined _LIBC 46 # include <bits/libc-lock.h> 28 #include <langinfo.h> 29 #include <locale.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <stdint.h> 33 34 #ifndef _LIBC 35 # include <dynarray.h> 36 #endif 37 38 #include <intprops.h> 39 #include <verify.h> 40 41 #if defined DEBUG && DEBUG != 0 42 # include <assert.h> 43 # define DEBUG_ASSERT(x) assert (x) 47 44 #else 48 # define __libc_lock_define(CLASS,NAME) 49 # define __libc_lock_init(NAME) do { } while (0) 50 # define __libc_lock_lock(NAME) do { } while (0) 51 # define __libc_lock_unlock(NAME) do { } while (0) 45 # define DEBUG_ASSERT(x) assume (x) 46 #endif 47 48 #ifdef _LIBC 49 # include <libc-lock.h> 50 # define lock_define(name) __libc_lock_define (, name) 51 # define lock_init(lock) (__libc_lock_init (lock), 0) 52 # define lock_fini(lock) ((void) 0) 53 # define lock_lock(lock) __libc_lock_lock (lock) 54 # define lock_unlock(lock) __libc_lock_unlock (lock) 55 #elif defined GNULIB_LOCK && !defined GNULIB_REGEX_SINGLE_THREAD 56 # include "glthread/lock.h" 57 # define lock_define(name) gl_lock_define (, name) 58 # define lock_init(lock) glthread_lock_init (&(lock)) 59 # define lock_fini(lock) glthread_lock_destroy (&(lock)) 60 # define lock_lock(lock) glthread_lock_lock (&(lock)) 61 # define lock_unlock(lock) glthread_lock_unlock (&(lock)) 62 #elif defined GNULIB_PTHREAD && !defined GNULIB_REGEX_SINGLE_THREAD 63 # include <pthread.h> 64 # define lock_define(name) pthread_mutex_t name; 65 # define lock_init(lock) pthread_mutex_init (&(lock), 0) 66 # define lock_fini(lock) pthread_mutex_destroy (&(lock)) 67 # define lock_lock(lock) pthread_mutex_lock (&(lock)) 68 # define lock_unlock(lock) pthread_mutex_unlock (&(lock)) 69 #else 70 # define lock_define(name) 71 # define lock_init(lock) 0 72 # define lock_fini(lock) ((void) 0) 73 /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC. */ 74 # define lock_lock(lock) ((void) dfa) 75 # define lock_unlock(lock) ((void) 0) 52 76 #endif 53 77 54 78 /* In case that the system doesn't have isblank(). */ 55 #if !defined _LIBC && ! defined HAVE_ISBLANK && !defined isblank79 #if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK)) 56 80 # define isblank(ch) ((ch) == ' ' || (ch) == '\t') 81 #endif 82 83 /* regex code assumes isascii has its usual numeric meaning, 84 even if the portable character set uses EBCDIC encoding, 85 and even if wint_t is wider than int. */ 86 #ifndef _LIBC 87 # undef isascii 88 # define isascii(c) (((c) & ~0x7f) == 0) 57 89 #endif 58 90 … … 61 93 # define _RE_DEFINE_LOCALE_FUNCTIONS 1 62 94 # include <locale/localeinfo.h> 63 # include <locale/elem-hash.h>64 95 # include <locale/coll-lookup.h> 65 96 # endif … … 72 103 # undef gettext 73 104 # define gettext(msgid) \ 74 INTUSE(__dcgettext)(_libc_intl_domainname, msgid, LC_MESSAGES)105 __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) 75 106 # endif 76 107 #else 108 # undef gettext 77 109 # define gettext(msgid) (msgid) 78 110 #endif … … 84 116 #endif 85 117 86 #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC 87 # if defined(__OS2__) /* setlocale() misbehaves in LIBC 0.6.1 and earlier, breaking /[a-z]/. */ 88 # if defined(__KLIBC_VERSION__) 89 # if __KLIBC_VERSION__ >= 0x00060002 90 # define RE_ENABLE_I18N 91 # endif 92 # endif 93 # else 94 # define RE_ENABLE_I18N 95 # endif 96 #endif 97 98 #if __GNUC__ >= 3 99 # define BE(expr, val) __builtin_expect (expr, val) 100 #else 101 # define BE(expr, val) (expr) 102 # ifndef inline /* bird: silly since the rest of sed depends on this working.. */ 103 # define inline 104 # endif 105 #endif 106 107 /* Number of single byte character. */ 108 #define SBC_MAX 256 118 /* Number of ASCII characters. */ 119 #define ASCII_CHARS 0x80 120 121 /* Number of single byte characters. */ 122 #define SBC_MAX (UCHAR_MAX + 1) 109 123 110 124 #define COLL_ELEM_LEN_MAX 8 … … 116 130 /* Rename to standard API for using out of glibc. */ 117 131 #ifndef _LIBC 132 # undef __wctype 133 # undef __iswalnum 134 # undef __iswctype 135 # undef __towlower 136 # undef __towupper 118 137 # define __wctype wctype 138 # define __iswalnum iswalnum 119 139 # define __iswctype iswctype 140 # define __towlower towlower 141 # define __towupper towupper 120 142 # define __btowc btowc 121 # ifndef __mempcpy /* keep quiet if string.h defines it (bird) */ 122 # define __mempcpy mempcpy 123 # endif 143 # define __mbrtowc mbrtowc 124 144 # define __wcrtomb wcrtomb 125 145 # define __regfree regfree 126 # define attribute_hidden127 146 #endif /* not _LIBC */ 128 147 129 #ifdef __GNUC__ 130 # define __attribute(arg) __attribute__ (arg) 148 /* Types related to integers. Unless protected by #ifdef _LIBC, the 149 regex code should avoid exact-width types like int32_t and uint64_t 150 as some non-GCC platforms lack them, an issue when this code is 151 used in Gnulib. */ 152 153 #ifndef SSIZE_MAX 154 # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) 155 #endif 156 #ifndef ULONG_WIDTH 157 # define ULONG_WIDTH REGEX_UINTEGER_WIDTH (ULONG_MAX) 158 /* The number of usable bits in an unsigned integer type with maximum 159 value MAX, as an int expression suitable in #if. Cover all known 160 practical hosts. This implementation exploits the fact that MAX is 161 1 less than a power of 2, and merely counts the number of 1 bits in 162 MAX; "COBn" means "count the number of 1 bits in the low-order n bits". */ 163 # define REGEX_UINTEGER_WIDTH(max) REGEX_COB128 (max) 164 # define REGEX_COB128(n) (REGEX_COB64 ((n) >> 31 >> 31 >> 2) + REGEX_COB64 (n)) 165 # define REGEX_COB64(n) (REGEX_COB32 ((n) >> 31 >> 1) + REGEX_COB32 (n)) 166 # define REGEX_COB32(n) (REGEX_COB16 ((n) >> 16) + REGEX_COB16 (n)) 167 # define REGEX_COB16(n) (REGEX_COB8 ((n) >> 8) + REGEX_COB8 (n)) 168 # define REGEX_COB8(n) (REGEX_COB4 ((n) >> 4) + REGEX_COB4 (n)) 169 # define REGEX_COB4(n) (!!((n) & 8) + !!((n) & 4) + !!((n) & 2) + ((n) & 1)) 170 # if ULONG_MAX / 2 + 1 != 1ul << (ULONG_WIDTH - 1) 171 # error "ULONG_MAX out of range" 172 # endif 173 #endif 174 175 /* The type of indexes into strings. This is signed, not size_t, 176 since the API requires indexes to fit in regoff_t anyway, and using 177 signed integers makes the code a bit smaller and presumably faster. 178 The traditional GNU regex implementation uses int for indexes. 179 The POSIX-compatible implementation uses a possibly-wider type. 180 The name 'Idx' is three letters to minimize the hassle of 181 reindenting a lot of regex code that formerly used 'int'. */ 182 typedef regoff_t Idx; 183 #ifdef _REGEX_LARGE_OFFSETS 184 # define IDX_MAX SSIZE_MAX 131 185 #else 132 # define __attribute(arg) 133 #endif 134 135 #ifndef SIZE_MAX 136 #define SIZE_MAX ((size_t)-1) 137 #endif 138 139 extern const char __re_error_msgid[] attribute_hidden; 140 extern const size_t __re_error_msgid_idx[] attribute_hidden; 186 # define IDX_MAX INT_MAX 187 #endif 188 189 /* A hash value, suitable for computing hash tables. */ 190 typedef __re_size_t re_hashval_t; 141 191 142 192 /* An integer used to represent a set of bits. It must be unsigned, … … 146 196 #define BITSET_WORD_MAX ULONG_MAX 147 197 /* Number of bits in a bitset_word_t. */ 148 #define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) 149 /* Number of bitset_word_t in a bit_set. */ 150 #define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) 198 #define BITSET_WORD_BITS ULONG_WIDTH 199 200 /* Number of bitset_word_t values in a bitset_t. */ 201 #define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) 202 151 203 typedef bitset_word_t bitset_t[BITSET_WORDS]; 152 204 typedef bitset_word_t *re_bitset_ptr_t; 153 205 typedef const bitset_word_t *re_const_bitset_ptr_t; 154 155 #define bitset_set(set,i) \156 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)157 #define bitset_clear(set,i) \158 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))159 #define bitset_contain(set,i) \160 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))161 #define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))162 #define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))163 #define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))164 206 165 207 #define PREV_WORD_CONSTRAINT 0x0001 … … 190 232 typedef struct 191 233 { 192 intalloc;193 intnelem;194 int*elems;234 Idx alloc; 235 Idx nelem; 236 Idx *elems; 195 237 } re_node_set; 196 238 … … 205 247 OP_BACK_REF = 4, 206 248 OP_PERIOD = 5, 207 #ifdef RE_ENABLE_I18N208 249 COMPLEX_BRACKET = 6, 209 250 OP_UTF8_PERIOD = 7, 210 #endif /* RE_ENABLE_I18N */211 251 212 252 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used … … 246 286 } re_token_type_t; 247 287 248 #ifdef RE_ENABLE_I18N249 288 typedef struct 250 289 { … … 252 291 wchar_t *mbchars; 253 292 293 #ifdef _LIBC 254 294 /* Collating symbols. */ 255 # ifdef _LIBC256 295 int32_t *coll_syms; 257 # endif 258 296 #endif 297 298 #ifdef _LIBC 259 299 /* Equivalence classes. */ 260 # ifdef _LIBC261 300 int32_t *equiv_classes; 262 # 301 #endif 263 302 264 303 /* Range expressions. */ 265 # 304 #ifdef _LIBC 266 305 uint32_t *range_starts; 267 306 uint32_t *range_ends; 268 # else /* not _LIBC */307 #else 269 308 wchar_t *range_starts; 270 309 wchar_t *range_ends; 271 # endif /* not _LIBC */310 #endif 272 311 273 312 /* Character classes. */ … … 278 317 279 318 /* # of multibyte characters. */ 280 intnmbchars;319 Idx nmbchars; 281 320 282 321 /* # of collating symbols. */ 283 intncoll_syms;322 Idx ncoll_syms; 284 323 285 324 /* # of equivalence classes. */ 286 intnequiv_classes;325 Idx nequiv_classes; 287 326 288 327 /* # of range expressions. */ 289 intnranges;328 Idx nranges; 290 329 291 330 /* # of character classes. */ 292 intnchar_classes;331 Idx nchar_classes; 293 332 } re_charset_t; 294 #endif /* RE_ENABLE_I18N */295 333 296 334 typedef struct … … 300 338 unsigned char c; /* for CHARACTER */ 301 339 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ 302 #ifdef RE_ENABLE_I18N303 340 re_charset_t *mbcset; /* for COMPLEX_BRACKET */ 304 #endif /* RE_ENABLE_I18N */ 305 int idx; /* for BACK_REF */ 341 Idx idx; /* for BACK_REF */ 306 342 re_context_type ctx_type; /* for ANCHOR */ 307 343 } opr; 308 #if __GNUC__ >= 2344 #if (__GNUC__ >= 2 || defined __clang__) && !defined __STRICT_ANSI__ 309 345 re_token_type_t type : 8; 310 346 #else … … 314 350 unsigned int duplicated : 1; 315 351 unsigned int opt_subexp : 1; 316 #ifdef RE_ENABLE_I18N317 352 unsigned int accept_mb : 1; 318 353 /* These 2 bits can be moved into the union if needed (e.g. if running out 319 354 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ 320 355 unsigned int mb_partial : 1; 321 #endif322 356 unsigned int word_char : 1; 323 357 } re_token_t; … … 334 368 the same address that RAW_MBS points. */ 335 369 unsigned char *mbs; 336 #ifdef RE_ENABLE_I18N337 370 /* Store the wide character string which is corresponding to MBS. */ 338 371 wint_t *wcs; 339 int*offsets;372 Idx *offsets; 340 373 mbstate_t cur_state; 341 #endif342 374 /* Index in RAW_MBS. Each character mbs[i] corresponds to 343 375 raw_mbs[raw_mbs_idx + i]. */ 344 intraw_mbs_idx;376 Idx raw_mbs_idx; 345 377 /* The length of the valid characters in the buffers. */ 346 intvalid_len;378 Idx valid_len; 347 379 /* The corresponding number of bytes in raw_mbs array. */ 348 intvalid_raw_len;380 Idx valid_raw_len; 349 381 /* The length of the buffers MBS and WCS. */ 350 intbufs_len;382 Idx bufs_len; 351 383 /* The index in MBS, which is updated by re_string_fetch_byte. */ 352 intcur_idx;384 Idx cur_idx; 353 385 /* length of RAW_MBS array. */ 354 intraw_len;386 Idx raw_len; 355 387 /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ 356 intlen;388 Idx len; 357 389 /* End of the buffer may be shorter than its length in the cases such 358 390 as re_match_2, re_search_2. Then, we use STOP for end of the buffer 359 391 instead of LEN. */ 360 intraw_stop;392 Idx raw_stop; 361 393 /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ 362 intstop;394 Idx stop; 363 395 364 396 /* The context of mbs[0]. We store the context independently, since … … 370 402 /* Copy of re_dfa_t's word_char. */ 371 403 re_const_bitset_ptr_t word_char; 372 /* 1if REG_ICASE. */404 /* true if REG_ICASE. */ 373 405 unsigned char icase; 374 406 unsigned char is_utf8; … … 387 419 388 420 #ifndef _LIBC 389 # ifdef __i386__ 390 # ifdef __OS2__ 391 # define internal_function __attribute ((regparm (3))) 392 # else 393 # define internal_function __attribute ((regparm (3), stdcall)) 394 # endif 395 # else 396 # define internal_function 397 # endif 398 #endif 399 400 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 401 int new_buf_len) 402 internal_function; 403 #ifdef RE_ENABLE_I18N 404 static void build_wcs_buffer (re_string_t *pstr) internal_function; 405 static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; 406 #endif /* RE_ENABLE_I18N */ 407 static void build_upper_buffer (re_string_t *pstr) internal_function; 408 static void re_string_translate_buffer (re_string_t *pstr) internal_function; 409 static unsigned int re_string_context_at (const re_string_t *input, int idx, 410 int eflags) 411 internal_function __attribute ((pure)); 421 # define IS_IN(libc) false 422 #endif 423 412 424 #define re_string_peek_byte(pstr, offset) \ 413 425 ((pstr)->mbs[(pstr)->cur_idx + offset]) … … 427 439 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) 428 440 429 #if HAVE_ALLOCA_H 430 # include <alloca.h> 431 #elif HAVE_MALLOC_H 432 # include <malloc.h> 433 #endif 434 435 #ifndef _LIBC 436 # if HAVE_ALLOCA 437 /* The OS usually guarantees only one guard page at the bottom of the stack, 438 and a page size can be as small as 4096 bytes. So we cannot safely 439 allocate anything larger than 4096 bytes. Also care for the possibility 440 of a few compiler-allocated temporary stack slots. */ 441 # define __libc_use_alloca(n) ((n) < 4032) 442 # else 443 /* alloca is implemented with malloc, so just use malloc. */ 444 # define __libc_use_alloca(n) 0 445 # endif 441 #ifdef _LIBC 442 # define MALLOC_0_IS_NONNULL 1 443 #elif !defined MALLOC_0_IS_NONNULL 444 # define MALLOC_0_IS_NONNULL 0 445 #endif 446 447 #ifndef MAX 448 # define MAX(a,b) ((a) < (b) ? (b) : (a)) 449 #endif 450 #ifndef MIN 451 # define MIN(a,b) ((a) < (b) ? (a) : (b)) 446 452 #endif 447 453 … … 460 466 re_token_t token; 461 467 462 /* `node_idx' is the index in dfa->nodes, if `type' == 0.463 Otherwise `type' indicate the type of this node. */464 intnode_idx;468 /* 'node_idx' is the index in dfa->nodes, if 'type' == 0. 469 Otherwise 'type' indicate the type of this node. */ 470 Idx node_idx; 465 471 }; 466 472 typedef struct bin_tree_t bin_tree_t; … … 489 495 #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') 490 496 #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) 491 #define IS_WIDE_WORD_CHAR(ch) ( iswalnum (ch) || (ch) == L'_')497 #define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_') 492 498 #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) 493 499 … … 506 512 struct re_dfastate_t 507 513 { 508 unsigned int hash;514 re_hashval_t hash; 509 515 re_node_set nodes; 510 516 re_node_set non_eps_nodes; … … 514 520 unsigned int context : 4; 515 521 unsigned int halt : 1; 516 /* If this state can accept `multi byte'.522 /* If this state can accept "multi byte". 517 523 Note that we refer to multibyte characters, and multi character 518 collating elements as `multi byte'. */524 collating elements as "multi byte". */ 519 525 unsigned int accept_mb : 1; 520 526 /* If this state has backreference node(s). */ … … 526 532 struct re_state_table_entry 527 533 { 528 intnum;529 intalloc;534 Idx num; 535 Idx alloc; 530 536 re_dfastate_t **array; 531 537 }; … … 535 541 typedef struct 536 542 { 537 intnext_idx;538 intalloc;543 Idx next_idx; 544 Idx alloc; 539 545 re_dfastate_t **array; 540 546 } state_array_t; … … 544 550 typedef struct 545 551 { 546 intnode;547 intstr_idx; /* The position NODE match at. */552 Idx node; 553 Idx str_idx; /* The position NODE match at. */ 548 554 state_array_t path; 549 555 } re_sub_match_last_t; … … 555 561 typedef struct 556 562 { 557 intstr_idx;558 intnode;563 Idx str_idx; 564 Idx node; 559 565 state_array_t *path; 560 intalasts; /* Allocation size of LASTS. */561 intnlasts; /* The number of LASTS. */566 Idx alasts; /* Allocation size of LASTS. */ 567 Idx nlasts; /* The number of LASTS. */ 562 568 re_sub_match_last_t **lasts; 563 569 } re_sub_match_top_t; … … 565 571 struct re_backref_cache_entry 566 572 { 567 int node; 568 int str_idx; 569 int subexp_from; 570 int subexp_to; 573 Idx node; 574 Idx str_idx; 575 Idx subexp_from; 576 Idx subexp_to; 577 bitset_word_t eps_reachable_subexps_map; 571 578 char more; 572 char unused;573 unsigned short int eps_reachable_subexps_map;574 579 }; 575 580 … … 578 583 /* The string object corresponding to the input string. */ 579 584 re_string_t input; 580 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)581 585 const re_dfa_t *const dfa; 582 #else583 const re_dfa_t *dfa;584 #endif585 586 /* EFLAGS of the argument of regexec. */ 586 587 int eflags; 587 588 /* Where the matching ends. */ 588 intmatch_last;589 intlast_node;589 Idx match_last; 590 Idx last_node; 590 591 /* The state log used by the matcher. */ 591 592 re_dfastate_t **state_log; 592 intstate_log_top;593 Idx state_log_top; 593 594 /* Back reference cache. */ 594 intnbkref_ents;595 intabkref_ents;595 Idx nbkref_ents; 596 Idx abkref_ents; 596 597 struct re_backref_cache_entry *bkref_ents; 597 598 int max_mb_elem_len; 598 intnsub_tops;599 intasub_tops;599 Idx nsub_tops; 600 Idx asub_tops; 600 601 re_sub_match_top_t **sub_tops; 601 602 } re_match_context_t; … … 605 606 re_dfastate_t **sifted_states; 606 607 re_dfastate_t **limited_states; 607 intlast_node;608 intlast_str_idx;608 Idx last_node; 609 Idx last_str_idx; 609 610 re_node_set limits; 610 611 } re_sift_context_t; … … 612 613 struct re_fail_stack_ent_t 613 614 { 614 intidx;615 intnode;615 Idx idx; 616 Idx node; 616 617 regmatch_t *regs; 617 618 re_node_set eps_via_nodes; … … 620 621 struct re_fail_stack_t 621 622 { 622 intnum;623 intalloc;623 Idx num; 624 Idx alloc; 624 625 struct re_fail_stack_ent_t *stack; 625 626 }; … … 630 631 size_t nodes_alloc; 631 632 size_t nodes_len; 632 int*nexts;633 int*org_indices;633 Idx *nexts; 634 Idx *org_indices; 634 635 re_node_set *edests; 635 636 re_node_set *eclosures; … … 645 646 int str_tree_storage_idx; 646 647 647 /* number of subexpressions `re_nsub' is in regex_t. */648 unsigned int state_hash_mask;649 intinit_node;650 intnbackref; /* The number of backreference in this dfa. */648 /* number of subexpressions 're_nsub' is in regex_t. */ 649 re_hashval_t state_hash_mask; 650 Idx init_node; 651 Idx nbackref; /* The number of backreference in this dfa. */ 651 652 652 653 /* Bitmap expressing which backreference is used. */ … … 665 666 bitset_t word_char; 666 667 reg_syntax_t syntax; 667 int*subexp_map;668 Idx *subexp_map; 668 669 #ifdef DEBUG 669 670 char* re_str; 670 671 #endif 671 __libc_lock_define (,lock)672 lock_define (lock) 672 673 }; 673 674 … … 701 702 702 703 703 /* Inline functions for bitset operation. */ 704 /* Functions for bitset_t operation. */ 705 706 static inline void 707 bitset_set (bitset_t set, Idx i) 708 { 709 set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; 710 } 711 712 static inline void 713 bitset_clear (bitset_t set, Idx i) 714 { 715 set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); 716 } 717 718 static inline bool 719 bitset_contain (const bitset_t set, Idx i) 720 { 721 return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; 722 } 723 724 static inline void 725 bitset_empty (bitset_t set) 726 { 727 memset (set, '\0', sizeof (bitset_t)); 728 } 729 730 static inline void 731 bitset_set_all (bitset_t set) 732 { 733 memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); 734 if (SBC_MAX % BITSET_WORD_BITS != 0) 735 set[BITSET_WORDS - 1] = 736 ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; 737 } 738 739 static inline void 740 bitset_copy (bitset_t dest, const bitset_t src) 741 { 742 memcpy (dest, src, sizeof (bitset_t)); 743 } 744 704 745 static inline void 705 746 bitset_not (bitset_t set) 706 747 { 707 748 int bitset_i; 708 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)749 for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) 709 750 set[bitset_i] = ~set[bitset_i]; 751 if (SBC_MAX % BITSET_WORD_BITS != 0) 752 set[BITSET_WORDS - 1] = 753 ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) 754 & ~set[BITSET_WORDS - 1]); 710 755 } 711 756 … … 726 771 } 727 772 728 #ifdef RE_ENABLE_I18N 729 /* Inline functions for re_string. */ 730 static inline int 731 internal_function __attribute ((pure)) 732 re_string_char_size_at (const re_string_t *pstr, int idx) 773 /* Functions for re_string. */ 774 static int 775 __attribute__ ((pure, unused)) 776 re_string_char_size_at (const re_string_t *pstr, Idx idx) 733 777 { 734 778 int byte_idx; … … 741 785 } 742 786 743 static inlinewint_t744 internal_function __attribute ((pure))745 re_string_wchar_at (const re_string_t *pstr, intidx)787 static wint_t 788 __attribute__ ((pure, unused)) 789 re_string_wchar_at (const re_string_t *pstr, Idx idx) 746 790 { 747 791 if (pstr->mb_cur_max == 1) … … 750 794 } 751 795 796 #ifdef _LIBC 797 # include <locale/weight.h> 798 #endif 799 752 800 static int 753 internal_function __attribute ((pure))754 re_string_elem_size_at (const re_string_t *pstr, intidx)755 { 756 # 801 __attribute__ ((pure, unused)) 802 re_string_elem_size_at (const re_string_t *pstr, Idx idx) 803 { 804 #ifdef _LIBC 757 805 const unsigned char *p, *extra; 758 806 const int32_t *table, *indirect; 759 int32_t tmp;760 # include <locale/weight.h>761 807 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 762 808 … … 769 815 _NL_COLLATE_INDIRECTMB); 770 816 p = pstr->mbs + idx; 771 tmp = findidx (&p);817 findidx (table, indirect, extra, &p, pstr->len - idx); 772 818 return p - pstr->mbs - idx; 773 819 } 774 else 775 # endif /* _LIBC */ 776 return 1; 777 } 778 #endif /* RE_ENABLE_I18N */ 820 #endif /* _LIBC */ 821 822 return 1; 823 } 824 825 #ifdef _LIBC 826 # if __GNUC__ >= 7 827 # define FALLTHROUGH __attribute__ ((__fallthrough__)) 828 # else 829 # define FALLTHROUGH ((void) 0) 830 # endif 831 #else 832 # include "attribute.h" 833 #endif 779 834 780 835 #endif /* _REGEX_INTERNAL_H */ -
trunk/src/sed/lib/regexec.c
r606 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 22 int n) internal_function; 23 static void match_ctx_clean (re_match_context_t *mctx) internal_function; 24 static void match_ctx_free (re_match_context_t *cache) internal_function; 25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, 26 int str_idx, int from, int to) 27 internal_function; 28 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 29 internal_function; 30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, 31 int str_idx) internal_function; 21 Idx n); 22 static void match_ctx_clean (re_match_context_t *mctx); 23 static void match_ctx_free (re_match_context_t *cache); 24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, 25 Idx str_idx, Idx from, Idx to); 26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx); 27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, 28 Idx str_idx); 32 29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, 33 int node, int str_idx) 34 internal_function; 30 Idx node, Idx str_idx); 35 31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 36 re_dfastate_t **limited_sts, int last_node, 37 int last_str_idx) 38 internal_function; 32 re_dfastate_t **limited_sts, Idx last_node, 33 Idx last_str_idx); 39 34 static reg_errcode_t re_search_internal (const regex_t *preg, 40 const char *string, intlength,41 int start, int range, intstop,35 const char *string, Idx length, 36 Idx start, Idx last_start, Idx stop, 42 37 size_t nmatch, regmatch_t pmatch[], 43 int eflags) internal_function; 44 static int re_search_2_stub (struct re_pattern_buffer *bufp, 45 const char *string1, int length1, 46 const char *string2, int length2, 47 int start, int range, struct re_registers *regs, 48 int stop, int ret_len) internal_function; 49 static int re_search_stub (struct re_pattern_buffer *bufp, 50 const char *string, int length, int start, 51 int range, int stop, struct re_registers *regs, 52 int ret_len) internal_function; 38 int eflags); 39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, 40 const char *string1, Idx length1, 41 const char *string2, Idx length2, 42 Idx start, regoff_t range, 43 struct re_registers *regs, 44 Idx stop, bool ret_len); 45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp, 46 const char *string, Idx length, Idx start, 47 regoff_t range, Idx stop, 48 struct re_registers *regs, 49 bool ret_len); 53 50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 54 int nregs, int regs_allocated) internal_function; 55 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 56 internal_function; 57 static int check_matching (re_match_context_t *mctx, int fl_longest_match, 58 int *p_match_first) internal_function; 59 static int check_halt_state_context (const re_match_context_t *mctx, 60 const re_dfastate_t *state, int idx) 61 internal_function; 51 Idx nregs, int regs_allocated); 52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); 53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, 54 Idx *p_match_first); 55 static Idx check_halt_state_context (const re_match_context_t *mctx, 56 const re_dfastate_t *state, Idx idx); 62 57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 63 regmatch_t *prev_idx_match, intcur_node,64 int cur_idx, int nmatch) internal_function;58 regmatch_t *prev_idx_match, Idx cur_node, 59 Idx cur_idx, Idx nmatch); 65 60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 66 int str_idx, int dest_node, int nregs, 67 regmatch_t *regs, 68 re_node_set *eps_via_nodes) 69 internal_function; 61 Idx str_idx, Idx dest_node, Idx nregs, 62 regmatch_t *regs, regmatch_t *prevregs, 63 re_node_set *eps_via_nodes); 70 64 static reg_errcode_t set_regs (const regex_t *preg, 71 65 const re_match_context_t *mctx, 72 66 size_t nmatch, regmatch_t *pmatch, 73 int fl_backtrack) internal_function; 74 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) 75 internal_function; 76 77 #ifdef RE_ENABLE_I18N 67 bool fl_backtrack); 68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); 69 78 70 static int sift_states_iter_mb (const re_match_context_t *mctx, 79 71 re_sift_context_t *sctx, 80 int node_idx, int str_idx, int max_str_idx) 81 internal_function; 82 #endif /* RE_ENABLE_I18N */ 72 Idx node_idx, Idx str_idx, Idx max_str_idx); 83 73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, 84 re_sift_context_t *sctx) 85 internal_function; 74 re_sift_context_t *sctx); 86 75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, 87 re_sift_context_t *sctx, int str_idx, 88 re_node_set *cur_dest) 89 internal_function; 76 re_sift_context_t *sctx, Idx str_idx, 77 re_node_set *cur_dest); 90 78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, 91 79 re_sift_context_t *sctx, 92 int str_idx, 93 re_node_set *dest_nodes) 94 internal_function; 80 Idx str_idx, 81 re_node_set *dest_nodes); 95 82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, 96 83 re_node_set *dest_nodes, 97 const re_node_set *candidates) 98 internal_function; 99 static int check_dst_limits (const re_match_context_t *mctx, 100 re_node_set *limits, 101 int dst_node, int dst_idx, int src_node, 102 int src_idx) internal_function; 84 const re_node_set *candidates); 85 static bool check_dst_limits (const re_match_context_t *mctx, 86 const re_node_set *limits, 87 Idx dst_node, Idx dst_idx, Idx src_node, 88 Idx src_idx); 103 89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, 104 int boundaries, int subexp_idx, 105 int from_node, int bkref_idx) 106 internal_function; 90 int boundaries, Idx subexp_idx, 91 Idx from_node, Idx bkref_idx); 107 92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx, 108 int limit, intsubexp_idx,109 int node, intstr_idx,110 int bkref_idx) internal_function;93 Idx limit, Idx subexp_idx, 94 Idx node, Idx str_idx, 95 Idx bkref_idx); 111 96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, 112 97 re_node_set *dest_nodes, … … 114 99 re_node_set *limits, 115 100 struct re_backref_cache_entry *bkref_ents, 116 int str_idx) internal_function;101 Idx str_idx); 117 102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, 118 103 re_sift_context_t *sctx, 119 int str_idx, const re_node_set *candidates) 120 internal_function; 104 Idx str_idx, const re_node_set *candidates); 121 105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa, 122 106 re_dfastate_t **dst, 123 re_dfastate_t **src, int num) 124 internal_function; 107 re_dfastate_t **src, Idx num); 125 108 static re_dfastate_t *find_recover_state (reg_errcode_t *err, 126 re_match_context_t *mctx) internal_function;109 re_match_context_t *mctx); 127 110 static re_dfastate_t *transit_state (reg_errcode_t *err, 128 111 re_match_context_t *mctx, 129 re_dfastate_t *state) internal_function;112 re_dfastate_t *state); 130 113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, 131 114 re_match_context_t *mctx, 132 re_dfastate_t *next_state) 133 internal_function; 115 re_dfastate_t *next_state); 134 116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, 135 117 re_node_set *cur_nodes, 136 int str_idx) internal_function;118 Idx str_idx); 137 119 #if 0 138 120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, 139 121 re_match_context_t *mctx, 140 re_dfastate_t *pstate) 141 internal_function; 122 re_dfastate_t *pstate); 142 123 #endif 143 #ifdef RE_ENABLE_I18N144 124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx, 145 re_dfastate_t *pstate) 146 internal_function; 147 #endif /* RE_ENABLE_I18N */ 125 re_dfastate_t *pstate); 148 126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, 149 const re_node_set *nodes) 150 internal_function; 127 const re_node_set *nodes); 151 128 static reg_errcode_t get_subexp (re_match_context_t *mctx, 152 int bkref_node, int bkref_str_idx) 153 internal_function; 129 Idx bkref_node, Idx bkref_str_idx); 154 130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, 155 131 const re_sub_match_top_t *sub_top, 156 132 re_sub_match_last_t *sub_last, 157 int bkref_node, int bkref_str) 158 internal_function; 159 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 160 int subexp_idx, int type) internal_function; 133 Idx bkref_node, Idx bkref_str); 134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 135 Idx subexp_idx, int type); 161 136 static reg_errcode_t check_arrival (re_match_context_t *mctx, 162 state_array_t *path, inttop_node,163 int top_str, int last_node, intlast_str,164 int type) internal_function;137 state_array_t *path, Idx top_node, 138 Idx top_str, Idx last_node, Idx last_str, 139 int type); 165 140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, 166 intstr_idx,141 Idx str_idx, 167 142 re_node_set *cur_nodes, 168 re_node_set *next_nodes) 169 internal_function; 143 re_node_set *next_nodes); 170 144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, 171 145 re_node_set *cur_nodes, 172 int ex_subexp, int type) 173 internal_function; 146 Idx ex_subexp, int type); 174 147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, 175 148 re_node_set *dst_nodes, 176 int target, intex_subexp,177 int type) internal_function;149 Idx target, Idx ex_subexp, 150 int type); 178 151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, 179 re_node_set *cur_nodes, int cur_str, 180 int subexp_num, int type) 181 internal_function; 182 static int build_trtable (const re_dfa_t *dfa, 183 re_dfastate_t *state) internal_function; 184 #ifdef RE_ENABLE_I18N 185 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 186 const re_string_t *input, int idx) 187 internal_function; 188 # ifdef _LIBC 152 re_node_set *cur_nodes, Idx cur_str, 153 Idx subexp_num, int type); 154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); 155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 156 const re_string_t *input, Idx idx); 157 #ifdef _LIBC 189 158 static unsigned int find_collation_sequence_value (const unsigned char *mbs, 190 size_t name_len) 191 internal_function; 192 # endif /* _LIBC */ 193 #endif /* RE_ENABLE_I18N */ 194 static int group_nodes_into_DFAstates (const re_dfa_t *dfa, 159 size_t name_len); 160 #endif 161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, 195 162 const re_dfastate_t *state, 196 163 re_node_set *states_node, 197 bitset_t *states_ch) internal_function; 198 static int check_node_accept (const re_match_context_t *mctx, 199 const re_token_t *node, int idx) 200 internal_function; 201 static reg_errcode_t extend_buffers (re_match_context_t *mctx) 202 internal_function; 164 bitset_t *states_ch); 165 static bool check_node_accept (const re_match_context_t *mctx, 166 const re_token_t *node, Idx idx); 167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); 203 168 204 169 … … 209 174 210 175 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 211 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at176 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 212 177 least NMATCH elements, and we set them to the offsets of the 213 178 corresponding matched substrings. 214 179 215 EFLAGS specifies `execution flags'which affect matching: if180 EFLAGS specifies "execution flags" which affect matching: if 216 181 REG_NOTBOL is set, then ^ does not match at the beginning of the 217 182 string; if REG_NOTEOL is set, then $ does not match at the end. 218 183 219 We return 0 if we find a match and REG_NOMATCH if not. */ 184 Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if 185 EFLAGS is invalid. */ 220 186 221 187 int 222 regexec (preg, string, nmatch, pmatch, eflags) 223 const regex_t *__restrict preg; 224 const char *__restrict string; 225 size_t nmatch; 226 regmatch_t pmatch[]; 227 int eflags; 188 regexec (const regex_t *__restrict preg, const char *__restrict string, 189 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) 228 190 { 229 191 reg_errcode_t err; 230 intstart, length;231 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;192 Idx start, length; 193 re_dfa_t *dfa = preg->buffer; 232 194 233 195 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) … … 245 207 } 246 208 247 __libc_lock_lock (dfa->lock);209 lock_lock (dfa->lock); 248 210 if (preg->no_sub) 249 err = re_search_internal (preg, string, length, start, length - start,211 err = re_search_internal (preg, string, length, start, length, 250 212 length, 0, NULL, eflags); 251 213 else 252 err = re_search_internal (preg, string, length, start, length - start,214 err = re_search_internal (preg, string, length, start, length, 253 215 length, nmatch, pmatch, eflags); 254 __libc_lock_unlock (dfa->lock);216 lock_unlock (dfa->lock); 255 217 return err != REG_NOERROR; 256 218 } 257 219 258 220 #ifdef _LIBC 221 libc_hidden_def (__regexec) 222 259 223 # include <shlib-compat.h> 260 224 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); … … 267 231 __compat_regexec (const regex_t *__restrict preg, 268 232 const char *__restrict string, size_t nmatch, 269 regmatch_t pmatch[ ], int eflags)233 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) 270 234 { 271 235 return regexec (preg, string, nmatch, pmatch, … … 297 261 298 262 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match 299 and all groups is st roed in REGS. (For the "_2" variants, the offsets are263 and all groups is stored in REGS. (For the "_2" variants, the offsets are 300 264 computed relative to the concatenation, not relative to the individual 301 265 strings.) 302 266 303 267 On success, re_match* functions return the length of the match, re_search* 304 return the position of the start of the match. Return value -1 means no 305 match was found and -2 indicates an internal error. */ 306 307 int 308 re_match (bufp, string, length, start, regs) 309 struct re_pattern_buffer *bufp; 310 const char *string; 311 int length, start; 312 struct re_registers *regs; 313 { 314 return re_search_stub (bufp, string, length, start, 0, length, regs, 1); 268 return the position of the start of the match. They return -1 on 269 match failure, -2 on error. */ 270 271 regoff_t 272 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, 273 Idx start, struct re_registers *regs) 274 { 275 return re_search_stub (bufp, string, length, start, 0, length, regs, true); 315 276 } 316 277 #ifdef _LIBC … … 318 279 #endif 319 280 320 int 321 re_search (bufp, string, length, start, range, regs) 322 struct re_pattern_buffer *bufp; 323 const char *string; 324 int length, start, range; 325 struct re_registers *regs; 326 { 327 return re_search_stub (bufp, string, length, start, range, length, regs, 0); 281 regoff_t 282 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length, 283 Idx start, regoff_t range, struct re_registers *regs) 284 { 285 return re_search_stub (bufp, string, length, start, range, length, regs, 286 false); 328 287 } 329 288 #ifdef _LIBC … … 331 290 #endif 332 291 333 int 334 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) 335 struct re_pattern_buffer *bufp; 336 const char *string1, *string2; 337 int length1, length2, start, stop; 338 struct re_registers *regs; 292 regoff_t 293 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, 294 const char *string2, Idx length2, Idx start, 295 struct re_registers *regs, Idx stop) 339 296 { 340 297 return re_search_2_stub (bufp, string1, length1, string2, length2, 341 start, 0, regs, stop, 1);298 start, 0, regs, stop, true); 342 299 } 343 300 #ifdef _LIBC … … 345 302 #endif 346 303 347 int 348 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) 349 struct re_pattern_buffer *bufp; 350 const char *string1, *string2; 351 int length1, length2, start, range, stop; 352 struct re_registers *regs; 304 regoff_t 305 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, 306 const char *string2, Idx length2, Idx start, regoff_t range, 307 struct re_registers *regs, Idx stop) 353 308 { 354 309 return re_search_2_stub (bufp, string1, length1, string2, length2, 355 start, range, regs, stop, 0);310 start, range, regs, stop, false); 356 311 } 357 312 #ifdef _LIBC … … 359 314 #endif 360 315 361 static int 362 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, 363 stop, ret_len) 364 struct re_pattern_buffer *bufp; 365 const char *string1, *string2; 366 int length1, length2, start, range, stop, ret_len; 367 struct re_registers *regs; 316 static regoff_t 317 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, 318 Idx length1, const char *string2, Idx length2, Idx start, 319 regoff_t range, struct re_registers *regs, 320 Idx stop, bool ret_len) 368 321 { 369 322 const char *str; 370 int rval; 371 int len = length1 + length2; 372 int free_str = 0; 373 374 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) 323 regoff_t rval; 324 Idx len; 325 char *s = NULL; 326 327 if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0 328 || INT_ADD_WRAPV (length1, length2, &len)))) 375 329 return -2; 376 330 … … 379 333 if (length1 > 0) 380 334 { 381 char *s = re_malloc (char, len);382 383 if ( BE (s == NULL, 0))335 s = re_malloc (char, len); 336 337 if (__glibc_unlikely (s == NULL)) 384 338 return -2; 385 339 #ifdef _LIBC … … 390 344 #endif 391 345 str = s; 392 free_str = 1;393 346 } 394 347 else … … 399 352 rval = re_search_stub (bufp, str, len, start, range, stop, regs, 400 353 ret_len); 401 if (free_str) 402 re_free ((char *) str); 354 re_free (s); 403 355 return rval; 404 356 } … … 406 358 /* The parameters have the same meaning as those of re_search. 407 359 Additional parameters: 408 If RET_LEN is nonzerothe length of the match is returned (re_match style);360 If RET_LEN is true the length of the match is returned (re_match style); 409 361 otherwise the position of the match is returned. */ 410 362 411 static int 412 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) 413 struct re_pattern_buffer *bufp; 414 const char *string; 415 int length, start, range, stop, ret_len; 416 struct re_registers *regs; 363 static regoff_t 364 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, 365 Idx start, regoff_t range, Idx stop, struct re_registers *regs, 366 bool ret_len) 417 367 { 418 368 reg_errcode_t result; 419 369 regmatch_t *pmatch; 420 int nregs, rval; 370 Idx nregs; 371 regoff_t rval; 421 372 int eflags = 0; 422 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 373 re_dfa_t *dfa = bufp->buffer; 374 Idx last_start = start + range; 423 375 424 376 /* Check for out-of-range. */ 425 if ( BE (start < 0 || start > length, 0))377 if (__glibc_unlikely (start < 0 || start > length)) 426 378 return -1; 427 if (BE (start + range > length, 0)) 428 range = length - start; 429 else if (BE (start + range < 0, 0)) 430 range = -start; 431 432 __libc_lock_lock (dfa->lock); 379 if (__glibc_unlikely (length < last_start 380 || (0 <= range && last_start < start))) 381 last_start = length; 382 else if (__glibc_unlikely (last_start < 0 383 || (range < 0 && start <= last_start))) 384 last_start = 0; 385 386 lock_lock (dfa->lock); 433 387 434 388 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; … … 436 390 437 391 /* Compile fastmap if we haven't yet. */ 438 if ( range > 0&& bufp->fastmap != NULL && !bufp->fastmap_accurate)392 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) 439 393 re_compile_fastmap (bufp); 440 394 441 if ( BE (bufp->no_sub, 0))395 if (__glibc_unlikely (bufp->no_sub)) 442 396 regs = NULL; 443 397 … … 445 399 if (regs == NULL) 446 400 nregs = 1; 447 else if ( BE (bufp->regs_allocated == REGS_FIXED &&448 regs->num_regs < bufp->re_nsub + 1, 0))401 else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED 402 && regs->num_regs <= bufp->re_nsub)) 449 403 { 450 404 nregs = regs->num_regs; 451 if ( BE (nregs < 1, 0))405 if (__glibc_unlikely (nregs < 1)) 452 406 { 453 407 /* Nothing can be copied to regs. */ … … 459 413 nregs = bufp->re_nsub + 1; 460 414 pmatch = re_malloc (regmatch_t, nregs); 461 if ( BE (pmatch == NULL, 0))415 if (__glibc_unlikely (pmatch == NULL)) 462 416 { 463 417 rval = -2; … … 465 419 } 466 420 467 result = re_search_internal (bufp, string, length, start, range, stop,421 result = re_search_internal (bufp, string, length, start, last_start, stop, 468 422 nregs, pmatch, eflags); 469 423 470 424 rval = 0; 471 425 472 /* I hope we needn't fill the r regs with -1's when no match was found. */426 /* I hope we needn't fill their regs with -1's when no match was found. */ 473 427 if (result != REG_NOERROR) 474 rval = -1;428 rval = result == REG_NOMATCH ? -1 : -2; 475 429 else if (regs != NULL) 476 430 { … … 478 432 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, 479 433 bufp->regs_allocated); 480 if ( BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))434 if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED)) 481 435 rval = -2; 482 436 } 483 437 484 if ( BE (rval == 0, 1))438 if (__glibc_likely (rval == 0)) 485 439 { 486 440 if (ret_len) 487 441 { 488 assert(pmatch[0].rm_so == start);442 DEBUG_ASSERT (pmatch[0].rm_so == start); 489 443 rval = pmatch[0].rm_eo - start; 490 444 } … … 494 448 re_free (pmatch); 495 449 out: 496 __libc_lock_unlock (dfa->lock);450 lock_unlock (dfa->lock); 497 451 return rval; 498 452 } 499 453 500 454 static unsigned 501 re_copy_regs (regs, pmatch, nregs, regs_allocated) 502 struct re_registers *regs; 503 regmatch_t *pmatch; 504 int nregs, regs_allocated; 455 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, 456 int regs_allocated) 505 457 { 506 458 int rval = REGS_REALLOCATE; 507 inti;508 intneed_regs = nregs + 1;509 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code459 Idx i; 460 Idx need_regs = nregs + 1; 461 /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code 510 462 uses. */ 511 463 … … 514 466 { /* No. So allocate them with malloc. */ 515 467 regs->start = re_malloc (regoff_t, need_regs); 468 if (__glibc_unlikely (regs->start == NULL)) 469 return REGS_UNALLOCATED; 516 470 regs->end = re_malloc (regoff_t, need_regs); 517 if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) 518 return REGS_UNALLOCATED; 471 if (__glibc_unlikely (regs->end == NULL)) 472 { 473 re_free (regs->start); 474 return REGS_UNALLOCATED; 475 } 519 476 regs->num_regs = need_regs; 520 477 } … … 523 480 allocated, reallocate them. If we need fewer, just 524 481 leave it alone. */ 525 if ( BE (need_regs > regs->num_regs, 0))482 if (__glibc_unlikely (need_regs > regs->num_regs)) 526 483 { 527 484 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); 528 regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);529 if ( BE (new_start == NULL, 0) || BE (new_end == NULL, 0))485 regoff_t *new_end; 486 if (__glibc_unlikely (new_start == NULL)) 530 487 return REGS_UNALLOCATED; 488 new_end = re_realloc (regs->end, regoff_t, need_regs); 489 if (__glibc_unlikely (new_end == NULL)) 490 { 491 re_free (new_start); 492 return REGS_UNALLOCATED; 493 } 531 494 regs->start = new_start; 532 495 regs->end = new_end; … … 536 499 else 537 500 { 538 assert(regs_allocated == REGS_FIXED);501 DEBUG_ASSERT (regs_allocated == REGS_FIXED); 539 502 /* This function may not be called with REGS_FIXED and nregs too big. */ 540 assert (regs->num_regs >= nregs);503 DEBUG_ASSERT (nregs <= regs->num_regs); 541 504 rval = REGS_FIXED; 542 505 } … … 568 531 569 532 void 570 re_set_registers (bufp, regs, num_regs, starts, ends) 571 struct re_pattern_buffer *bufp; 572 struct re_registers *regs; 573 unsigned num_regs; 574 regoff_t *starts, *ends; 533 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, 534 __re_size_t num_regs, regoff_t *starts, regoff_t *ends) 575 535 { 576 536 if (num_regs) … … 585 545 bufp->regs_allocated = REGS_UNALLOCATED; 586 546 regs->num_regs = 0; 587 regs->start = regs->end = (regoff_t *) 0;547 regs->start = regs->end = NULL; 588 548 } 589 549 } … … 601 561 weak_function 602 562 # endif 603 re_exec (s) 604 const char *s; 563 re_exec (const char *s) 605 564 { 606 565 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); … … 613 572 /* Searches for a compiled pattern PREG in the string STRING, whose 614 573 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same 615 m ingings with regexec. START, and RANGE have the same meanings616 with re_search.574 meaning as with regexec. LAST_START is START + RANGE, where 575 START and RANGE have the same meaning as with re_search. 617 576 Return REG_NOERROR if we find a match, and REG_NOMATCH if not, 618 577 otherwise return the error code. 619 578 Note: We assume front end functions already check ranges. 620 ( START + RANGE >= 0 && START + RANGE<= LENGTH) */579 (0 <= LAST_START && LAST_START <= LENGTH) */ 621 580 622 581 static reg_errcode_t 623 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, 624 eflags) 625 const regex_t *preg; 626 const char *string; 627 int length, start, range, stop, eflags; 628 size_t nmatch; 629 regmatch_t pmatch[]; 582 __attribute_warn_unused_result__ 583 re_search_internal (const regex_t *preg, const char *string, Idx length, 584 Idx start, Idx last_start, Idx stop, size_t nmatch, 585 regmatch_t pmatch[], int eflags) 630 586 { 631 587 reg_errcode_t err; 632 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 633 int left_lim, right_lim, incr; 634 int fl_longest_match, match_first, match_kind, match_last = -1; 635 int extra_nmatch; 636 int sb, ch; 637 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) 588 const re_dfa_t *dfa = preg->buffer; 589 Idx left_lim, right_lim; 590 int incr; 591 bool fl_longest_match; 592 int match_kind; 593 Idx match_first; 594 Idx match_last = -1; 595 Idx extra_nmatch; 596 bool sb; 597 int ch; 638 598 re_match_context_t mctx = { .dfa = dfa }; 639 #else 640 re_match_context_t mctx; 641 #endif 642 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate 643 && range && !preg->can_be_null) ? preg->fastmap : NULL; 599 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate 600 && start != last_start && !preg->can_be_null) 601 ? preg->fastmap : NULL); 644 602 RE_TRANSLATE_TYPE t = preg->translate; 645 646 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))647 memset (&mctx, '\0', sizeof (re_match_context_t));648 mctx.dfa = dfa;649 #endif650 603 651 604 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; … … 653 606 654 607 /* Check if the DFA haven't been compiled. */ 655 if (BE (preg->used == 0 || dfa->init_state == NULL 656 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL 657 || dfa->init_state_begbuf == NULL, 0)) 608 if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL 609 || dfa->init_state_word == NULL 610 || dfa->init_state_nl == NULL 611 || dfa->init_state_begbuf == NULL)) 658 612 return REG_NOMATCH; 659 613 660 #ifdef DEBUG661 614 /* We assume front-end functions already check them. */ 662 assert (start + range >= 0 && start + range <= length); 663 #endif 615 DEBUG_ASSERT (0 <= last_start && last_start <= length); 664 616 665 617 /* If initial states with non-begbuf contexts have no elements, … … 671 623 || !preg->newline_anchor)) 672 624 { 673 if (start != 0 && start + range!= 0)625 if (start != 0 && last_start != 0) 674 626 return REG_NOMATCH; 675 start = range= 0;627 start = last_start = 0; 676 628 } 677 629 … … 680 632 681 633 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, 682 preg->translate, preg->syntax & RE_ICASE, dfa); 683 if (BE (err != REG_NOERROR, 0)) 634 preg->translate, (preg->syntax & RE_ICASE) != 0, 635 dfa); 636 if (__glibc_unlikely (err != REG_NOERROR)) 684 637 goto free_return; 685 638 mctx.input.stop = stop; … … 688 641 689 642 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); 690 if ( BE (err != REG_NOERROR, 0))643 if (__glibc_unlikely (err != REG_NOERROR)) 691 644 goto free_return; 692 645 … … 697 650 if (nmatch > 1 || dfa->has_mb_node) 698 651 { 699 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 700 if (BE (mctx.state_log == NULL, 0)) 652 /* Avoid overflow. */ 653 if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) 654 <= mctx.input.bufs_len))) 701 655 { 702 656 err = REG_ESPACE; 703 657 goto free_return; 704 658 } 705 } 706 else 707 mctx.state_log = NULL; 659 660 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 661 if (__glibc_unlikely (mctx.state_log == NULL)) 662 { 663 err = REG_ESPACE; 664 goto free_return; 665 } 666 } 708 667 709 668 match_first = start; … … 711 670 : CONTEXT_NEWLINE | CONTEXT_BEGBUF; 712 671 713 /* Check incrementally whether of not the input string match. */714 incr = ( range < 0) ? -1 : 1;715 left_lim = ( range < 0) ? start + range: start;716 right_lim = ( range < 0) ? start : start + range;672 /* Check incrementally whether the input string matches. */ 673 incr = (last_start < start) ? -1 : 1; 674 left_lim = (last_start < start) ? last_start : start; 675 right_lim = (last_start < start) ? start : last_start; 717 676 sb = dfa->mb_cur_max == 1; 718 677 match_kind = 719 678 (fastmap 720 679 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) 721 | ( range >= 0? 2 : 0)680 | (start <= last_start ? 2 : 0) 722 681 | (t != NULL ? 1 : 0)) 723 682 : 8); … … 742 701 case 7: 743 702 /* Fastmap with single-byte translation, match forward. */ 744 while ( BE (match_first < right_lim, 1)703 while (__glibc_likely (match_first < right_lim) 745 704 && !fastmap[t[(unsigned char) string[match_first]]]) 746 705 ++match_first; … … 749 708 case 6: 750 709 /* Fastmap without translation, match forward. */ 751 while ( BE (match_first < right_lim, 1)710 while (__glibc_likely (match_first < right_lim) 752 711 && !fastmap[(unsigned char) string[match_first]]) 753 712 ++match_first; 754 713 755 714 forward_match_found_start_or_reached_end: 756 if ( BE (match_first == right_lim, 0))715 if (__glibc_unlikely (match_first == right_lim)) 757 716 { 758 717 ch = match_first >= length … … 786 745 /* If MATCH_FIRST is out of the valid range, reconstruct the 787 746 buffers. */ 788 unsigned int offset = match_first - mctx.input.raw_mbs_idx; 789 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) 747 __re_size_t offset = match_first - mctx.input.raw_mbs_idx; 748 if (__glibc_unlikely (offset 749 >= (__re_size_t) mctx.input.valid_raw_len)) 790 750 { 791 751 err = re_string_reconstruct (&mctx.input, match_first, 792 752 eflags); 793 if ( BE (err != REG_NOERROR, 0))753 if (__glibc_unlikely (err != REG_NOERROR)) 794 754 goto free_return; 795 755 796 756 offset = match_first - mctx.input.raw_mbs_idx; 797 757 } 798 /* If MATCH_FIRST is out of the buffer, leave it as '\0'. 799 Note that MATCH_FIRST must not be smaller than 0. */ 800 ch = (match_first >= length 801 ? 0 : re_string_byte_at (&mctx.input, offset)); 758 /* Use buffer byte if OFFSET is in buffer, otherwise '\0'. */ 759 ch = (offset < mctx.input.valid_len 760 ? re_string_byte_at (&mctx.input, offset) : 0); 802 761 if (fastmap[ch]) 803 762 break; 804 763 match_first += incr; 805 764 if (match_first < left_lim || match_first > right_lim) 806 807 808 809 765 { 766 err = REG_NOMATCH; 767 goto free_return; 768 } 810 769 } 811 770 break; … … 815 774 the matching starts from the beginning of the buffer. */ 816 775 err = re_string_reconstruct (&mctx.input, match_first, eflags); 817 if ( BE (err != REG_NOERROR, 0))776 if (__glibc_unlikely (err != REG_NOERROR)) 818 777 goto free_return; 819 778 820 #ifdef RE_ENABLE_I18N 821 /* Don't consider this char as a possible match start if it part, 822 yet isn't the head, of a multibyte character. */ 779 /* Don't consider this char as a possible match start if it part, 780 yet isn't the head, of a multibyte character. */ 823 781 if (!sb && !re_string_first_byte (&mctx.input, 0)) 824 782 continue; 825 #endif826 783 827 784 /* It seems to be appropriate one, then use the matcher. */ … … 829 786 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; 830 787 match_last = check_matching (&mctx, fl_longest_match, 831 range >= 0? &match_first : NULL);788 start <= last_start ? &match_first : NULL); 832 789 if (match_last != -1) 833 790 { 834 if ( BE (match_last == -2, 0))791 if (__glibc_unlikely (match_last == -2)) 835 792 { 836 793 err = REG_ESPACE; … … 852 809 if (err == REG_NOERROR) 853 810 break; 854 if ( BE (err != REG_NOMATCH, 0))811 if (__glibc_unlikely (err != REG_NOMATCH)) 855 812 goto free_return; 856 813 match_last = -1; … … 864 821 } 865 822 866 #ifdef DEBUG 867 assert (match_last != -1); 868 assert (err == REG_NOERROR); 869 #endif 823 DEBUG_ASSERT (match_last != -1); 824 DEBUG_ASSERT (err == REG_NOERROR); 870 825 871 826 /* Set pmatch[] if we need. */ 872 827 if (nmatch > 0) 873 828 { 874 intreg_idx;829 Idx reg_idx; 875 830 876 831 /* Initialize registers. */ … … 881 836 pmatch[0].rm_so = 0; 882 837 pmatch[0].rm_eo = mctx.match_last; 838 /* FIXME: This function should fail if mctx.match_last exceeds 839 the maximum possible regoff_t value. We need a new error 840 code REG_OVERFLOW. */ 883 841 884 842 if (!preg->no_sub && nmatch > 1) … … 886 844 err = set_regs (preg, &mctx, nmatch, pmatch, 887 845 dfa->has_plural_match && dfa->nbackref > 0); 888 if ( BE (err != REG_NOERROR, 0))846 if (__glibc_unlikely (err != REG_NOERROR)) 889 847 goto free_return; 890 848 } 891 849 892 /* At last, add the offset to the each registers, since we slided850 /* At last, add the offset to each register, since we slid 893 851 the buffers so that we could assume that the matching starts 894 852 from 0. */ … … 896 854 if (pmatch[reg_idx].rm_so != -1) 897 855 { 898 #ifdef RE_ENABLE_I18N 899 if (BE (mctx.input.offsets_needed != 0, 0)) 856 if (__glibc_unlikely (mctx.input.offsets_needed != 0)) 900 857 { 901 858 pmatch[reg_idx].rm_so = … … 908 865 : mctx.input.offsets[pmatch[reg_idx].rm_eo]); 909 866 } 910 #else911 assert (mctx.input.offsets_needed == 0);912 #endif913 867 pmatch[reg_idx].rm_so += match_first; 914 868 pmatch[reg_idx].rm_eo += match_first; … … 921 875 922 876 if (dfa->subexp_map) 923 924 925 926 927 928 929 930 877 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) 878 if (dfa->subexp_map[reg_idx] != reg_idx) 879 { 880 pmatch[reg_idx + 1].rm_so 881 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; 882 pmatch[reg_idx + 1].rm_eo 883 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; 884 } 931 885 } 932 886 … … 940 894 941 895 static reg_errcode_t 942 prune_impossible_nodes (mctx) 943 re_match_context_t *mctx; 896 __attribute_warn_unused_result__ 897 prune_impossible_nodes (re_match_context_t *mctx) 944 898 { 945 899 const re_dfa_t *const dfa = mctx->dfa; 946 inthalt_node, match_last;900 Idx halt_node, match_last; 947 901 reg_errcode_t ret; 948 902 re_dfastate_t **sifted_states; 949 903 re_dfastate_t **lim_states = NULL; 950 904 re_sift_context_t sctx; 951 #ifdef DEBUG 952 assert (mctx->state_log != NULL); 953 #endif 905 DEBUG_ASSERT (mctx->state_log != NULL); 954 906 match_last = mctx->match_last; 955 907 halt_node = mctx->last_node; 908 909 /* Avoid overflow. */ 910 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) 911 <= match_last)) 912 return REG_ESPACE; 913 956 914 sifted_states = re_malloc (re_dfastate_t *, match_last + 1); 957 if ( BE (sifted_states == NULL, 0))915 if (__glibc_unlikely (sifted_states == NULL)) 958 916 { 959 917 ret = REG_ESPACE; … … 963 921 { 964 922 lim_states = re_malloc (re_dfastate_t *, match_last + 1); 965 if ( BE (lim_states == NULL, 0))923 if (__glibc_unlikely (lim_states == NULL)) 966 924 { 967 925 ret = REG_ESPACE; … … 976 934 ret = sift_states_backward (mctx, &sctx); 977 935 re_node_set_free (&sctx.limits); 978 if ( BE (ret != REG_NOERROR, 0))936 if (__glibc_unlikely (ret != REG_NOERROR)) 979 937 goto free_return; 980 938 if (sifted_states[0] != NULL || lim_states[0] != NULL) … … 998 956 re_free (lim_states); 999 957 lim_states = NULL; 1000 if ( BE (ret != REG_NOERROR, 0))958 if (__glibc_unlikely (ret != REG_NOERROR)) 1001 959 goto free_return; 1002 960 } … … 1006 964 ret = sift_states_backward (mctx, &sctx); 1007 965 re_node_set_free (&sctx.limits); 1008 if ( BE (ret != REG_NOERROR, 0))966 if (__glibc_unlikely (ret != REG_NOERROR)) 1009 967 goto free_return; 968 if (sifted_states[0] == NULL) 969 { 970 ret = REG_NOMATCH; 971 goto free_return; 972 } 1010 973 } 1011 974 re_free (mctx->state_log); … … 1025 988 since initial states may have constraints like "\<", "^", etc.. */ 1026 989 1027 static inline re_dfastate_t * 1028 __attribute ((always_inline)) internal_function 990 static __always_inline re_dfastate_t * 1029 991 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 1030 intidx)992 Idx idx) 1031 993 { 1032 994 const re_dfa_t *const dfa = mctx->dfa; … … 1059 1021 1060 1022 /* Check whether the regular expression match input string INPUT or not, 1061 and return the index where the matching end , return -1 if not match,1062 orreturn -2 in case of an error.1023 and return the index where the matching end. Return -1 if 1024 there is no match, and return -2 in case of an error. 1063 1025 FL_LONGEST_MATCH means we want the POSIX longest matching. 1064 1026 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1065 1027 next place where we may want to try matching. 1066 Note that the matcher assume that the maching starts from the current1028 Note that the matcher assumes that the matching starts from the current 1067 1029 index of the buffer. */ 1068 1030 1069 static int1070 internal_function 1071 check_matching (re_match_context_t *mctx, intfl_longest_match,1072 int*p_match_first)1031 static Idx 1032 __attribute_warn_unused_result__ 1033 check_matching (re_match_context_t *mctx, bool fl_longest_match, 1034 Idx *p_match_first) 1073 1035 { 1074 1036 const re_dfa_t *const dfa = mctx->dfa; 1075 1037 reg_errcode_t err; 1076 intmatch = 0;1077 intmatch_last = -1;1078 intcur_str_idx = re_string_cur_idx (&mctx->input);1038 Idx match = 0; 1039 Idx match_last = -1; 1040 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 1079 1041 re_dfastate_t *cur_state; 1080 intat_init_state = p_match_first != NULL;1081 intnext_start_idx = cur_str_idx;1042 bool at_init_state = p_match_first != NULL; 1043 Idx next_start_idx = cur_str_idx; 1082 1044 1083 1045 err = REG_NOERROR; 1084 1046 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); 1085 1047 /* An initial state must not be NULL (invalid). */ 1086 if ( BE (cur_state == NULL, 0))1087 { 1088 assert(err == REG_ESPACE);1048 if (__glibc_unlikely (cur_state == NULL)) 1049 { 1050 DEBUG_ASSERT (err == REG_ESPACE); 1089 1051 return -2; 1090 1052 } … … 1096 1058 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them 1097 1059 later. E.g. Processing back references. */ 1098 if ( BE (dfa->nbackref, 0))1099 { 1100 at_init_state = 0;1060 if (__glibc_unlikely (dfa->nbackref)) 1061 { 1062 at_init_state = false; 1101 1063 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); 1102 if ( BE (err != REG_NOERROR, 0))1064 if (__glibc_unlikely (err != REG_NOERROR)) 1103 1065 return err; 1104 1066 … … 1106 1068 { 1107 1069 err = transit_state_bkref (mctx, &cur_state->nodes); 1108 if ( BE (err != REG_NOERROR, 0))1109 1070 if (__glibc_unlikely (err != REG_NOERROR)) 1071 return err; 1110 1072 } 1111 1073 } … … 1113 1075 1114 1076 /* If the RE accepts NULL string. */ 1115 if ( BE (cur_state->halt, 0))1077 if (__glibc_unlikely (cur_state->halt)) 1116 1078 { 1117 1079 if (!cur_state->has_constraint … … 1131 1093 { 1132 1094 re_dfastate_t *old_state = cur_state; 1133 int next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1134 1135 if (BE (next_char_idx >= mctx->input.bufs_len, 0) 1136 || (BE (next_char_idx >= mctx->input.valid_len, 0) 1137 && mctx->input.valid_len < mctx->input.len)) 1138 { 1139 err = extend_buffers (mctx); 1140 if (BE (err != REG_NOERROR, 0)) 1141 { 1142 assert (err == REG_ESPACE); 1095 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1096 1097 if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len) 1098 && mctx->input.bufs_len < mctx->input.len) 1099 || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len) 1100 && mctx->input.valid_len < mctx->input.len)) 1101 { 1102 err = extend_buffers (mctx, next_char_idx + 1); 1103 if (__glibc_unlikely (err != REG_NOERROR)) 1104 { 1105 DEBUG_ASSERT (err == REG_ESPACE); 1143 1106 return -2; 1144 1107 } 1145 1108 } 1146 1109 1147 1110 cur_state = transit_state (&err, mctx, cur_state); … … 1154 1117 state using the state log, if available and if we have not 1155 1118 already found a valid (even if not the longest) match. */ 1156 if ( BE (err != REG_NOERROR, 0))1119 if (__glibc_unlikely (err != REG_NOERROR)) 1157 1120 return -2; 1158 1121 … … 1163 1126 } 1164 1127 1165 if ( BE (at_init_state, 0))1128 if (__glibc_unlikely (at_init_state)) 1166 1129 { 1167 1130 if (old_state == cur_state) 1168 1131 next_start_idx = next_char_idx; 1169 1132 else 1170 at_init_state = 0;1133 at_init_state = false; 1171 1134 } 1172 1135 … … 1199 1162 /* Check NODE match the current context. */ 1200 1163 1201 static int 1202 internal_function 1203 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) 1164 static bool 1165 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) 1204 1166 { 1205 1167 re_token_type_t type = dfa->nodes[node].type; 1206 1168 unsigned int constraint = dfa->nodes[node].constraint; 1207 1169 if (type != END_OF_RE) 1208 return 0;1170 return false; 1209 1171 if (!constraint) 1210 return 1;1172 return true; 1211 1173 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) 1212 return 0;1213 return 1;1174 return false; 1175 return true; 1214 1176 } 1215 1177 … … 1218 1180 match the context, return the node. */ 1219 1181 1220 static int 1221 internal_function 1182 static Idx 1222 1183 check_halt_state_context (const re_match_context_t *mctx, 1223 const re_dfastate_t *state, intidx)1224 { 1225 inti;1184 const re_dfastate_t *state, Idx idx) 1185 { 1186 Idx i; 1226 1187 unsigned int context; 1227 #ifdef DEBUG 1228 assert (state->halt); 1229 #endif 1188 DEBUG_ASSERT (state->halt); 1230 1189 context = re_string_context_at (&mctx->input, idx, mctx->eflags); 1231 1190 for (i = 0; i < state->nodes.nelem; ++i) … … 1237 1196 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1238 1197 corresponding to the DFA). 1239 Return the destination node, and update EPS_VIA_NODES , return -1 in case1240 of errors. */1241 1242 static int1243 internal_function 1244 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,1245 int *pidx, intnode, re_node_set *eps_via_nodes,1198 Return the destination node, and update EPS_VIA_NODES; 1199 return -1 on match failure, -2 on error. */ 1200 1201 static Idx 1202 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, 1203 regmatch_t *prevregs, 1204 Idx *pidx, Idx node, re_node_set *eps_via_nodes, 1246 1205 struct re_fail_stack_t *fs) 1247 1206 { 1248 1207 const re_dfa_t *const dfa = mctx->dfa; 1249 int i, err;1250 1208 if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1251 1209 { 1252 1210 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1253 1211 re_node_set *edests = &dfa->edests[node]; 1254 int dest_node; 1255 err = re_node_set_insert (eps_via_nodes, node); 1256 if (BE (err < 0, 0)) 1257 return -2; 1258 /* Pick up a valid destination, or return -1 if none is found. */ 1259 for (dest_node = -1, i = 0; i < edests->nelem; ++i) 1260 { 1261 int candidate = edests->elems[i]; 1212 1213 if (! re_node_set_contains (eps_via_nodes, node)) 1214 { 1215 bool ok = re_node_set_insert (eps_via_nodes, node); 1216 if (__glibc_unlikely (! ok)) 1217 return -2; 1218 } 1219 1220 /* Pick a valid destination, or return -1 if none is found. */ 1221 Idx dest_node = -1; 1222 for (Idx i = 0; i < edests->nelem; i++) 1223 { 1224 Idx candidate = edests->elems[i]; 1262 1225 if (!re_node_set_contains (cur_nodes, candidate)) 1263 1226 continue; … … 1265 1228 dest_node = candidate; 1266 1229 1267 1230 else 1268 1231 { 1269 1232 /* In order to avoid infinite loop like "(a*)*", return the second 1270 1233 epsilon-transition if the first was already considered. */ 1271 1234 if (re_node_set_contains (eps_via_nodes, dest_node)) 1272 1235 return candidate; 1273 1236 1274 1237 /* Otherwise, push the second epsilon-transition on the fail stack. */ 1275 1238 else if (fs != NULL 1276 1239 && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1277 1240 prevregs, eps_via_nodes)) 1278 1241 return -2; 1279 1242 … … 1286 1249 else 1287 1250 { 1288 intnaccepted = 0;1251 Idx naccepted = 0; 1289 1252 re_token_type_t type = dfa->nodes[node].type; 1290 1253 1291 #ifdef RE_ENABLE_I18N1292 1254 if (dfa->nodes[node].accept_mb) 1293 1255 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); 1294 else 1295 #endif /* RE_ENABLE_I18N */ 1296 if (type == OP_BACK_REF) 1297 { 1298 int subexp_idx = dfa->nodes[node].opr.idx + 1; 1299 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1256 else if (type == OP_BACK_REF) 1257 { 1258 Idx subexp_idx = dfa->nodes[node].opr.idx + 1; 1259 if (subexp_idx < nregs) 1260 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1300 1261 if (fs != NULL) 1301 1262 { 1302 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) 1263 if (subexp_idx >= nregs 1264 || regs[subexp_idx].rm_so == -1 1265 || regs[subexp_idx].rm_eo == -1) 1303 1266 return -1; 1304 1267 else if (naccepted) 1305 1268 { 1306 1269 char *buf = (char *) re_string_get_buffer (&mctx->input); 1307 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1308 naccepted) != 0) 1270 if (mctx->input.valid_len - *pidx < naccepted 1271 || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1272 naccepted) 1273 != 0)) 1309 1274 return -1; 1310 1275 } … … 1313 1278 if (naccepted == 0) 1314 1279 { 1315 intdest_node;1316 err= re_node_set_insert (eps_via_nodes, node);1317 if ( BE (err < 0, 0))1280 Idx dest_node; 1281 bool ok = re_node_set_insert (eps_via_nodes, node); 1282 if (__glibc_unlikely (! ok)) 1318 1283 return -2; 1319 1284 dest_node = dfa->edests[node].elems[0]; … … 1327 1292 || check_node_accept (mctx, dfa->nodes + node, *pidx)) 1328 1293 { 1329 intdest_node = dfa->nexts[node];1294 Idx dest_node = dfa->nexts[node]; 1330 1295 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; 1331 1296 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL … … 1341 1306 1342 1307 static reg_errcode_t 1343 internal_function 1344 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, 1345 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1308 __attribute_warn_unused_result__ 1309 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, 1310 Idx nregs, regmatch_t *regs, regmatch_t *prevregs, 1311 re_node_set *eps_via_nodes) 1346 1312 { 1347 1313 reg_errcode_t err; 1348 int num = fs->num++;1349 if ( fs->num == fs->alloc)1314 Idx num = fs->num; 1315 if (num == fs->alloc) 1350 1316 { 1351 1317 struct re_fail_stack_ent_t *new_array; 1352 new_array = re alloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)1353 * fs->alloc * 2));1318 new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, 1319 fs->alloc * 2); 1354 1320 if (new_array == NULL) 1355 1321 return REG_ESPACE; … … 1359 1325 fs->stack[num].idx = str_idx; 1360 1326 fs->stack[num].node = dest_node; 1361 fs->stack[num].regs = re_malloc (regmatch_t, nregs);1327 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); 1362 1328 if (fs->stack[num].regs == NULL) 1363 1329 return REG_ESPACE; 1330 fs->num = num + 1; 1364 1331 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1332 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); 1365 1333 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1366 1334 return err; 1367 1335 } 1368 1336 1369 static int 1370 internal_function 1371 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, 1372 regmatch_t *regs, re_node_set *eps_via_nodes) 1373 { 1374 int num = --fs->num; 1375 assert (num >= 0); 1337 static Idx 1338 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, 1339 regmatch_t *regs, regmatch_t *prevregs, 1340 re_node_set *eps_via_nodes) 1341 { 1342 if (fs == NULL || fs->num == 0) 1343 return -1; 1344 Idx num = --fs->num; 1376 1345 *pidx = fs->stack[num].idx; 1377 1346 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1347 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs); 1378 1348 re_node_set_free (eps_via_nodes); 1379 1349 re_free (fs->stack[num].regs); 1380 1350 *eps_via_nodes = fs->stack[num].eps_via_nodes; 1351 DEBUG_ASSERT (0 <= fs->stack[num].node); 1381 1352 return fs->stack[num].node; 1382 1353 } 1354 1355 1356 #define DYNARRAY_STRUCT regmatch_list 1357 #define DYNARRAY_ELEMENT regmatch_t 1358 #define DYNARRAY_PREFIX regmatch_list_ 1359 #include <malloc/dynarray-skeleton.c> 1383 1360 1384 1361 /* Set the positions where the subexpressions are starts/ends to registers … … 1388 1365 1389 1366 static reg_errcode_t 1390 internal_function 1367 __attribute_warn_unused_result__ 1391 1368 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, 1392 regmatch_t *pmatch, intfl_backtrack)1393 { 1394 const re_dfa_t *dfa = (const re_dfa_t *)preg->buffer;1395 intidx, cur_node;1369 regmatch_t *pmatch, bool fl_backtrack) 1370 { 1371 const re_dfa_t *dfa = preg->buffer; 1372 Idx idx, cur_node; 1396 1373 re_node_set eps_via_nodes; 1397 1374 struct re_fail_stack_t *fs; 1398 1375 struct re_fail_stack_t fs_body = { 0, 2, NULL }; 1399 regmatch_t *prev_idx_match; 1400 int prev_idx_match_malloced = 0; 1401 1402 #ifdef DEBUG 1403 assert (nmatch > 1); 1404 assert (mctx->state_log != NULL); 1405 #endif 1376 struct regmatch_list prev_match; 1377 regmatch_list_init (&prev_match); 1378 1379 DEBUG_ASSERT (nmatch > 1); 1380 DEBUG_ASSERT (mctx->state_log != NULL); 1406 1381 if (fl_backtrack) 1407 1382 { … … 1417 1392 re_node_set_init_empty (&eps_via_nodes); 1418 1393 1419 if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) 1420 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); 1421 else 1422 { 1423 prev_idx_match = re_malloc (regmatch_t, nmatch); 1424 if (prev_idx_match == NULL) 1425 { 1426 free_fail_stack_return (fs); 1427 return REG_ESPACE; 1428 } 1429 prev_idx_match_malloced = 1; 1430 } 1394 if (!regmatch_list_resize (&prev_match, nmatch)) 1395 { 1396 regmatch_list_free (&prev_match); 1397 free_fail_stack_return (fs); 1398 return REG_ESPACE; 1399 } 1400 regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match); 1431 1401 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1432 1402 … … 1435 1405 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1436 1406 1437 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1438 { 1439 int reg_idx; 1407 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1408 || (fs && re_node_set_contains (&eps_via_nodes, cur_node))) 1409 { 1410 Idx reg_idx; 1411 cur_node = -1; 1440 1412 if (fs) 1441 1413 { 1442 1414 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1443 1415 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1444 break; 1445 if (reg_idx == nmatch) 1446 { 1447 re_node_set_free (&eps_via_nodes); 1448 if (prev_idx_match_malloced) 1449 re_free (prev_idx_match); 1450 return free_fail_stack_return (fs); 1451 } 1452 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1453 &eps_via_nodes); 1454 } 1455 else 1416 { 1417 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1418 prev_idx_match, &eps_via_nodes); 1419 break; 1420 } 1421 } 1422 if (cur_node < 0) 1456 1423 { 1457 1424 re_node_set_free (&eps_via_nodes); 1458 if (prev_idx_match_malloced) 1459 re_free (prev_idx_match); 1460 return REG_NOERROR; 1425 regmatch_list_free (&prev_match); 1426 return free_fail_stack_return (fs); 1461 1427 } 1462 1428 } 1463 1429 1464 1430 /* Proceed to next node. */ 1465 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1431 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match, 1432 &idx, cur_node, 1466 1433 &eps_via_nodes, fs); 1467 1434 1468 if ( BE (cur_node < 0,0))1469 { 1470 if ( BE (cur_node == -2, 0))1435 if (__glibc_unlikely (cur_node < 0)) 1436 { 1437 if (__glibc_unlikely (cur_node == -2)) 1471 1438 { 1472 1439 re_node_set_free (&eps_via_nodes); 1473 if (prev_idx_match_malloced) 1474 re_free (prev_idx_match); 1440 regmatch_list_free (&prev_match); 1475 1441 free_fail_stack_return (fs); 1476 1442 return REG_ESPACE; 1477 1443 } 1478 if (fs) 1479 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1480 &eps_via_nodes); 1481 else 1444 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1445 prev_idx_match, &eps_via_nodes); 1446 if (cur_node < 0) 1482 1447 { 1483 1448 re_node_set_free (&eps_via_nodes); 1484 if (prev_idx_match_malloced)1485 re_free (prev_idx_match);1449 regmatch_list_free (&prev_match); 1450 free_fail_stack_return (fs); 1486 1451 return REG_NOMATCH; 1487 1452 } … … 1489 1454 } 1490 1455 re_node_set_free (&eps_via_nodes); 1491 if (prev_idx_match_malloced) 1492 re_free (prev_idx_match); 1456 regmatch_list_free (&prev_match); 1493 1457 return free_fail_stack_return (fs); 1494 1458 } 1495 1459 1496 1460 static reg_errcode_t 1497 internal_function1498 1461 free_fail_stack_return (struct re_fail_stack_t *fs) 1499 1462 { 1500 1463 if (fs) 1501 1464 { 1502 intfs_idx;1465 Idx fs_idx; 1503 1466 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) 1504 1467 { … … 1512 1475 1513 1476 static void 1514 internal_function1515 1477 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 1516 regmatch_t *prev_idx_match, int cur_node, int cur_idx, intnmatch)1478 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) 1517 1479 { 1518 1480 int type = dfa->nodes[cur_node].type; 1519 1481 if (type == OP_OPEN_SUBEXP) 1520 1482 { 1521 intreg_num = dfa->nodes[cur_node].opr.idx + 1;1483 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1522 1484 1523 1485 /* We are at the first node of this sub expression. */ … … 1530 1492 else if (type == OP_CLOSE_SUBEXP) 1531 1493 { 1532 int reg_num = dfa->nodes[cur_node].opr.idx + 1; 1494 /* We are at the last node of this sub expression. */ 1495 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1533 1496 if (reg_num < nmatch) 1534 1497 { 1535 /* We are at the last node of this sub expression. */1536 1498 if (pmatch[reg_num].rm_so < cur_idx) 1537 1499 { … … 1564 1526 Updated state_log will be wrote to STATE_LOG. 1565 1527 1566 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...1528 Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1567 1529 1. When STR_IDX == MATCH_LAST(the last index in the state_log): 1568 If `a' isn't the LAST_NODE and `a' can't epsilon transit to1569 the LAST_NODE, we throw away the node `a'.1570 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts1571 string `s' and transit to `b':1530 If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to 1531 the LAST_NODE, we throw away the node 'a'. 1532 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts 1533 string 's' and transit to 'b': 1572 1534 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw 1573 away the node `a'.1535 away the node 'a'. 1574 1536 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is 1575 thrown away, we throw away the node `a'.1537 thrown away, we throw away the node 'a'. 1576 1538 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': 1577 1539 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the 1578 node `a'.1540 node 'a'. 1579 1541 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, 1580 we throw away the node `a'. */1542 we throw away the node 'a'. */ 1581 1543 1582 1544 #define STATE_NODE_CONTAINS(state,node) \ … … 1584 1546 1585 1547 static reg_errcode_t 1586 internal_function1587 1548 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) 1588 1549 { 1589 1550 reg_errcode_t err; 1590 1551 int null_cnt = 0; 1591 intstr_idx = sctx->last_str_idx;1552 Idx str_idx = sctx->last_str_idx; 1592 1553 re_node_set cur_dest; 1593 1554 1594 #ifdef DEBUG 1595 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1596 #endif 1555 DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1597 1556 1598 1557 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon 1599 1558 transit to the last_node and the last_node itself. */ 1600 1559 err = re_node_set_init_1 (&cur_dest, sctx->last_node); 1601 if ( BE (err != REG_NOERROR, 0))1560 if (__glibc_unlikely (err != REG_NOERROR)) 1602 1561 return err; 1603 1562 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1604 if ( BE (err != REG_NOERROR, 0))1563 if (__glibc_unlikely (err != REG_NOERROR)) 1605 1564 goto free_return; 1606 1565 … … 1623 1582 { 1624 1583 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); 1625 if (BE (err != REG_NOERROR, 0))1584 if (__glibc_unlikely (err != REG_NOERROR)) 1626 1585 goto free_return; 1627 1586 } … … 1632 1591 And update state_log. */ 1633 1592 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1634 if ( BE (err != REG_NOERROR, 0))1593 if (__glibc_unlikely (err != REG_NOERROR)) 1635 1594 goto free_return; 1636 1595 } … … 1642 1601 1643 1602 static reg_errcode_t 1644 internal_function 1603 __attribute_warn_unused_result__ 1645 1604 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, 1646 intstr_idx, re_node_set *cur_dest)1605 Idx str_idx, re_node_set *cur_dest) 1647 1606 { 1648 1607 const re_dfa_t *const dfa = mctx->dfa; 1649 1608 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; 1650 inti;1609 Idx i; 1651 1610 1652 1611 /* Then build the next sifted state. 1653 We build the next sifted state on `cur_dest', and update1654 `sifted_states[str_idx]' with `cur_dest'.1612 We build the next sifted state on 'cur_dest', and update 1613 'sifted_states[str_idx]' with 'cur_dest'. 1655 1614 Note: 1656 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.1657 `cur_src' points the node_set of the old `state_log[str_idx]'1615 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. 1616 'cur_src' points the node_set of the old 'state_log[str_idx]' 1658 1617 (with the epsilon nodes pre-filtered out). */ 1659 1618 for (i = 0; i < cur_src->nelem; i++) 1660 1619 { 1661 intprev_node = cur_src->elems[i];1620 Idx prev_node = cur_src->elems[i]; 1662 1621 int naccepted = 0; 1663 int ret; 1664 1665 #ifdef DEBUG 1666 re_token_type_t type = dfa->nodes[prev_node].type; 1667 assert (!IS_EPSILON_NODE (type)); 1668 #endif 1669 #ifdef RE_ENABLE_I18N 1670 /* If the node may accept `multi byte'. */ 1622 bool ok; 1623 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); 1624 1625 /* If the node may accept "multi byte". */ 1671 1626 if (dfa->nodes[prev_node].accept_mb) 1672 1627 naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1673 1628 str_idx, sctx->last_str_idx); 1674 #endif /* RE_ENABLE_I18N */1675 1629 1676 1630 /* We don't check backreferences here. … … 1687 1641 if (sctx->limits.nelem) 1688 1642 { 1689 intto_idx = str_idx + naccepted;1643 Idx to_idx = str_idx + naccepted; 1690 1644 if (check_dst_limits (mctx, &sctx->limits, 1691 1645 dfa->nexts[prev_node], to_idx, … … 1693 1647 continue; 1694 1648 } 1695 ret= re_node_set_insert (cur_dest, prev_node);1696 if ( BE (ret == -1, 0))1649 ok = re_node_set_insert (cur_dest, prev_node); 1650 if (__glibc_unlikely (! ok)) 1697 1651 return REG_ESPACE; 1698 1652 } … … 1704 1658 1705 1659 static reg_errcode_t 1706 internal_function 1707 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) 1708 { 1709 int top = mctx->state_log_top; 1710 1711 if (next_state_log_idx >= mctx->input.bufs_len1660 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) 1661 { 1662 Idx top = mctx->state_log_top; 1663 1664 if ((next_state_log_idx >= mctx->input.bufs_len 1665 && mctx->input.bufs_len < mctx->input.len) 1712 1666 || (next_state_log_idx >= mctx->input.valid_len 1713 1667 && mctx->input.valid_len < mctx->input.len)) 1714 1668 { 1715 1669 reg_errcode_t err; 1716 err = extend_buffers (mctx );1717 if ( BE (err != REG_NOERROR, 0))1670 err = extend_buffers (mctx, next_state_log_idx + 1); 1671 if (__glibc_unlikely (err != REG_NOERROR)) 1718 1672 return err; 1719 1673 } … … 1721 1675 if (top < next_state_log_idx) 1722 1676 { 1677 DEBUG_ASSERT (mctx->state_log != NULL); 1723 1678 memset (mctx->state_log + top + 1, '\0', 1724 1679 sizeof (re_dfastate_t *) * (next_state_log_idx - top)); … … 1729 1684 1730 1685 static reg_errcode_t 1731 internal_function1732 1686 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, 1733 re_dfastate_t **src, intnum)1734 { 1735 intst_idx;1687 re_dfastate_t **src, Idx num) 1688 { 1689 Idx st_idx; 1736 1690 reg_errcode_t err; 1737 1691 for (st_idx = 0; st_idx < num; ++st_idx) … … 1744 1698 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, 1745 1699 &src[st_idx]->nodes); 1746 if ( BE (err != REG_NOERROR, 0))1700 if (__glibc_unlikely (err != REG_NOERROR)) 1747 1701 return err; 1748 1702 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); 1749 1703 re_node_set_free (&merged_set); 1750 if ( BE (err != REG_NOERROR, 0))1704 if (__glibc_unlikely (err != REG_NOERROR)) 1751 1705 return err; 1752 1706 } … … 1756 1710 1757 1711 static reg_errcode_t 1758 internal_function1759 1712 update_cur_sifted_state (const re_match_context_t *mctx, 1760 re_sift_context_t *sctx, intstr_idx,1713 re_sift_context_t *sctx, Idx str_idx, 1761 1714 re_node_set *dest_nodes) 1762 1715 { … … 1776 1729 DEST_NODE. */ 1777 1730 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); 1778 if ( BE (err != REG_NOERROR, 0))1731 if (__glibc_unlikely (err != REG_NOERROR)) 1779 1732 return err; 1780 1733 … … 1784 1737 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, 1785 1738 mctx->bkref_ents, str_idx); 1786 if ( BE (err != REG_NOERROR, 0))1739 if (__glibc_unlikely (err != REG_NOERROR)) 1787 1740 return err; 1788 1741 } … … 1790 1743 1791 1744 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); 1792 if ( BE (err != REG_NOERROR, 0))1745 if (__glibc_unlikely (err != REG_NOERROR)) 1793 1746 return err; 1794 1747 } … … 1797 1750 { 1798 1751 err = sift_states_bkref (mctx, sctx, str_idx, candidates); 1799 if ( BE (err != REG_NOERROR, 0))1752 if (__glibc_unlikely (err != REG_NOERROR)) 1800 1753 return err; 1801 1754 } … … 1804 1757 1805 1758 static reg_errcode_t 1806 internal_function 1759 __attribute_warn_unused_result__ 1807 1760 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, 1808 1761 const re_node_set *candidates) 1809 1762 { 1810 1763 reg_errcode_t err = REG_NOERROR; 1811 inti;1764 Idx i; 1812 1765 1813 1766 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); 1814 if ( BE (err != REG_NOERROR, 0))1767 if (__glibc_unlikely (err != REG_NOERROR)) 1815 1768 return err; 1816 1769 … … 1818 1771 { 1819 1772 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); 1820 if ( BE (err != REG_NOERROR, 0))1821 1773 if (__glibc_unlikely (err != REG_NOERROR)) 1774 return REG_ESPACE; 1822 1775 for (i = 0; i < dest_nodes->nelem; i++) 1823 re_node_set_merge (&state->inveclosure, 1824 dfa->inveclosures + dest_nodes->elems[i]); 1776 { 1777 err = re_node_set_merge (&state->inveclosure, 1778 dfa->inveclosures + dest_nodes->elems[i]); 1779 if (__glibc_unlikely (err != REG_NOERROR)) 1780 return REG_ESPACE; 1781 } 1825 1782 } 1826 1783 return re_node_set_add_intersect (dest_nodes, candidates, … … 1829 1786 1830 1787 static reg_errcode_t 1831 internal_function 1832 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, 1788 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, 1833 1789 const re_node_set *candidates) 1834 1790 { 1835 intecl_idx;1791 Idx ecl_idx; 1836 1792 reg_errcode_t err; 1837 1793 re_node_set *inv_eclosure = dfa->inveclosures + node; … … 1840 1796 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1841 1797 { 1842 intcur_node = inv_eclosure->elems[ecl_idx];1798 Idx cur_node = inv_eclosure->elems[ecl_idx]; 1843 1799 if (cur_node == node) 1844 1800 continue; 1845 1801 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) 1846 1802 { 1847 intedst1 = dfa->edests[cur_node].elems[0];1848 intedst2 = ((dfa->edests[cur_node].nelem > 1)1803 Idx edst1 = dfa->edests[cur_node].elems[0]; 1804 Idx edst2 = ((dfa->edests[cur_node].nelem > 1) 1849 1805 ? dfa->edests[cur_node].elems[1] : -1); 1850 1806 if ((!re_node_set_contains (inv_eclosure, edst1) … … 1856 1812 err = re_node_set_add_intersect (&except_nodes, candidates, 1857 1813 dfa->inveclosures + cur_node); 1858 if ( BE (err != REG_NOERROR, 0))1814 if (__glibc_unlikely (err != REG_NOERROR)) 1859 1815 { 1860 1816 re_node_set_free (&except_nodes); … … 1866 1822 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1867 1823 { 1868 intcur_node = inv_eclosure->elems[ecl_idx];1824 Idx cur_node = inv_eclosure->elems[ecl_idx]; 1869 1825 if (!re_node_set_contains (&except_nodes, cur_node)) 1870 1826 { 1871 intidx = re_node_set_contains (dest_nodes, cur_node) - 1;1827 Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; 1872 1828 re_node_set_remove_at (dest_nodes, idx); 1873 1829 } … … 1877 1833 } 1878 1834 1879 static int 1880 internal_function 1881 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, 1882 int dst_node, int dst_idx, int src_node, int src_idx) 1835 static bool 1836 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, 1837 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) 1883 1838 { 1884 1839 const re_dfa_t *const dfa = mctx->dfa; 1885 intlim_idx, src_pos, dst_pos;1886 1887 intdst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);1888 intsrc_bkref_idx = search_cur_bkref_entry (mctx, src_idx);1840 Idx lim_idx, src_pos, dst_pos; 1841 1842 Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); 1843 Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); 1889 1844 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 1890 1845 { 1891 intsubexp_idx;1846 Idx subexp_idx; 1892 1847 struct re_backref_cache_entry *ent; 1893 1848 ent = mctx->bkref_ents + limits->elems[lim_idx]; … … 1908 1863 continue; /* This is unrelated limitation. */ 1909 1864 else 1910 return 1;1911 } 1912 return 0;1865 return true; 1866 } 1867 return false; 1913 1868 } 1914 1869 1915 1870 static int 1916 internal_function1917 1871 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, 1918 int subexp_idx, int from_node, intbkref_idx)1872 Idx subexp_idx, Idx from_node, Idx bkref_idx) 1919 1873 { 1920 1874 const re_dfa_t *const dfa = mctx->dfa; 1921 1875 const re_node_set *eclosures = dfa->eclosures + from_node; 1922 intnode_idx;1876 Idx node_idx; 1923 1877 1924 1878 /* Else, we are on the boundary: examine the nodes on the epsilon … … 1926 1880 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) 1927 1881 { 1928 intnode = eclosures->elems[node_idx];1882 Idx node = eclosures->elems[node_idx]; 1929 1883 switch (dfa->nodes[node].type) 1930 1884 { … … 1934 1888 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; 1935 1889 do 1936 { 1937 int dst, cpos; 1890 { 1891 Idx dst; 1892 int cpos; 1938 1893 1939 1894 if (ent->node != node) … … 1955 1910 { 1956 1911 if (boundaries & 1) 1957 1912 return -1; 1958 1913 else /* if (boundaries & 2) */ 1959 1914 return 0; 1960 1915 } 1961 1916 … … 1971 1926 ent->eps_reachable_subexps_map 1972 1927 &= ~((bitset_word_t) 1 << subexp_idx); 1973 1928 } 1974 1929 while (ent++->more); 1975 1930 } … … 1995 1950 1996 1951 static int 1997 internal_function 1998 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, 1999 int subexp_idx, int from_node, int str_idx, 2000 int bkref_idx) 1952 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, 1953 Idx subexp_idx, Idx from_node, Idx str_idx, 1954 Idx bkref_idx) 2001 1955 { 2002 1956 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; … … 2025 1979 2026 1980 static reg_errcode_t 2027 internal_function2028 1981 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, 2029 1982 const re_node_set *candidates, re_node_set *limits, 2030 struct re_backref_cache_entry *bkref_ents, intstr_idx)1983 struct re_backref_cache_entry *bkref_ents, Idx str_idx) 2031 1984 { 2032 1985 reg_errcode_t err; 2033 intnode_idx, lim_idx;1986 Idx node_idx, lim_idx; 2034 1987 2035 1988 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 2036 1989 { 2037 intsubexp_idx;1990 Idx subexp_idx; 2038 1991 struct re_backref_cache_entry *ent; 2039 1992 ent = bkref_ents + limits->elems[lim_idx]; … … 2045 1998 if (ent->subexp_to == str_idx) 2046 1999 { 2047 intops_node = -1;2048 intcls_node = -1;2000 Idx ops_node = -1; 2001 Idx cls_node = -1; 2049 2002 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2050 2003 { 2051 intnode = dest_nodes->elems[node_idx];2004 Idx node = dest_nodes->elems[node_idx]; 2052 2005 re_token_type_t type = dfa->nodes[node].type; 2053 2006 if (type == OP_OPEN_SUBEXP … … 2065 2018 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, 2066 2019 candidates); 2067 if ( BE (err != REG_NOERROR, 0))2020 if (__glibc_unlikely (err != REG_NOERROR)) 2068 2021 return err; 2069 2022 } … … 2073 2026 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2074 2027 { 2075 intnode = dest_nodes->elems[node_idx];2028 Idx node = dest_nodes->elems[node_idx]; 2076 2029 if (!re_node_set_contains (dfa->inveclosures + node, 2077 2030 cls_node) … … 2083 2036 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2084 2037 candidates); 2085 if ( BE (err != REG_NOERROR, 0))2038 if (__glibc_unlikely (err != REG_NOERROR)) 2086 2039 return err; 2087 2040 --node_idx; … … 2093 2046 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2094 2047 { 2095 intnode = dest_nodes->elems[node_idx];2048 Idx node = dest_nodes->elems[node_idx]; 2096 2049 re_token_type_t type = dfa->nodes[node].type; 2097 2050 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) … … 2103 2056 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2104 2057 candidates); 2105 if ( BE (err != REG_NOERROR, 0))2058 if (__glibc_unlikely (err != REG_NOERROR)) 2106 2059 return err; 2107 2060 } … … 2113 2066 2114 2067 static reg_errcode_t 2115 internal_function 2068 __attribute_warn_unused_result__ 2116 2069 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, 2117 intstr_idx, const re_node_set *candidates)2070 Idx str_idx, const re_node_set *candidates) 2118 2071 { 2119 2072 const re_dfa_t *const dfa = mctx->dfa; 2120 2073 reg_errcode_t err; 2121 intnode_idx, node;2074 Idx node_idx, node; 2122 2075 re_sift_context_t local_sctx; 2123 intfirst_idx = search_cur_bkref_entry (mctx, str_idx);2076 Idx first_idx = search_cur_bkref_entry (mctx, str_idx); 2124 2077 2125 2078 if (first_idx == -1) … … 2130 2083 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) 2131 2084 { 2132 intenabled_idx;2085 Idx enabled_idx; 2133 2086 re_token_type_t type; 2134 2087 struct re_backref_cache_entry *entry; … … 2145 2098 do 2146 2099 { 2147 intsubexp_len;2148 intto_idx;2149 intdst_node;2150 int ret;2100 Idx subexp_len; 2101 Idx to_idx; 2102 Idx dst_node; 2103 bool ok; 2151 2104 re_dfastate_t *cur_state; 2152 2105 … … 2169 2122 local_sctx = *sctx; 2170 2123 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); 2171 if ( BE (err != REG_NOERROR, 0))2124 if (__glibc_unlikely (err != REG_NOERROR)) 2172 2125 goto free_return; 2173 2126 } 2174 2127 local_sctx.last_node = node; 2175 2128 local_sctx.last_str_idx = str_idx; 2176 ret= re_node_set_insert (&local_sctx.limits, enabled_idx);2177 if ( BE (ret < 0, 0))2129 ok = re_node_set_insert (&local_sctx.limits, enabled_idx); 2130 if (__glibc_unlikely (! ok)) 2178 2131 { 2179 2132 err = REG_ESPACE; … … 2182 2135 cur_state = local_sctx.sifted_states[str_idx]; 2183 2136 err = sift_states_backward (mctx, &local_sctx); 2184 if ( BE (err != REG_NOERROR, 0))2137 if (__glibc_unlikely (err != REG_NOERROR)) 2185 2138 goto free_return; 2186 2139 if (sctx->limited_states != NULL) … … 2189 2142 local_sctx.sifted_states, 2190 2143 str_idx + 1); 2191 if ( BE (err != REG_NOERROR, 0))2144 if (__glibc_unlikely (err != REG_NOERROR)) 2192 2145 goto free_return; 2193 2146 } … … 2196 2149 2197 2150 /* mctx->bkref_ents may have changed, reload the pointer. */ 2198 2151 entry = mctx->bkref_ents + enabled_idx; 2199 2152 } 2200 2153 while (enabled_idx++, entry++->more); … … 2211 2164 2212 2165 2213 #ifdef RE_ENABLE_I18N2214 2166 static int 2215 internal_function2216 2167 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, 2217 int node_idx, int str_idx, intmax_str_idx)2168 Idx node_idx, Idx str_idx, Idx max_str_idx) 2218 2169 { 2219 2170 const re_dfa_t *const dfa = mctx->dfa; 2220 2171 int naccepted; 2221 /* Check the node can accept `multi byte'. */2172 /* Check the node can accept "multi byte". */ 2222 2173 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2223 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&2224 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],2225 dfa->nexts[node_idx]))2226 /* The node can't accept the `multi byte', or the2174 if (naccepted > 0 && str_idx + naccepted <= max_str_idx 2175 && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2176 dfa->nexts[node_idx])) 2177 /* The node can't accept the "multi byte", or the 2227 2178 destination was already thrown away, then the node 2228 could 't accept the current input `multi byte'. */2179 couldn't accept the current input "multi byte". */ 2229 2180 naccepted = 0; 2230 2181 /* Otherwise, it is sure that the node could accept 2231 `naccepted' bytes input. */2182 'naccepted' bytes input. */ 2232 2183 return naccepted; 2233 2184 } 2234 #endif /* RE_ENABLE_I18N */2235 2236 2185 2237 2186 … … 2240 2189 /* Return the next state to which the current state STATE will transit by 2241 2190 accepting the current input byte, and update STATE_LOG if necessary. 2191 Return NULL on failure. 2242 2192 If STATE can accept a multibyte char/collating element/back reference 2243 2193 update the destination of STATE_LOG. */ 2244 2194 2245 2195 static re_dfastate_t * 2246 internal_function 2196 __attribute_warn_unused_result__ 2247 2197 transit_state (reg_errcode_t *err, re_match_context_t *mctx, 2248 2198 re_dfastate_t *state) … … 2251 2201 unsigned char ch; 2252 2202 2253 #ifdef RE_ENABLE_I18N2254 2203 /* If the current state can accept multibyte. */ 2255 if ( BE (state->accept_mb, 0))2204 if (__glibc_unlikely (state->accept_mb)) 2256 2205 { 2257 2206 *err = transit_state_mb (mctx, state); 2258 if ( BE (*err != REG_NOERROR, 0))2207 if (__glibc_unlikely (*err != REG_NOERROR)) 2259 2208 return NULL; 2260 2209 } 2261 #endif /* RE_ENABLE_I18N */2262 2210 2263 2211 /* Then decide the next state with the single byte. */ … … 2273 2221 { 2274 2222 trtable = state->trtable; 2275 if ( BE (trtable != NULL, 1))2223 if (__glibc_likely (trtable != NULL)) 2276 2224 return trtable[ch]; 2277 2225 2278 2226 trtable = state->word_trtable; 2279 if ( BE (trtable != NULL, 1))2280 2227 if (__glibc_likely (trtable != NULL)) 2228 { 2281 2229 unsigned int context; 2282 2230 context … … 2301 2249 2302 2250 /* Update the state_log if we need */ 2303 re_dfastate_t * 2304 internal_function 2251 static re_dfastate_t * 2305 2252 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, 2306 2253 re_dfastate_t *next_state) 2307 2254 { 2308 2255 const re_dfa_t *const dfa = mctx->dfa; 2309 intcur_idx = re_string_cur_idx (&mctx->input);2256 Idx cur_idx = re_string_cur_idx (&mctx->input); 2310 2257 2311 2258 if (cur_idx > mctx->state_log_top) … … 2324 2271 re_node_set next_nodes, *log_nodes, *table_nodes = NULL; 2325 2272 /* If (state_log[cur_idx] != 0), it implies that cur_idx is 2326 2327 2328 2273 the destination of a multibyte char/collating element/ 2274 back reference. Then the next state is the union set of 2275 these destinations and the results of the transition table. */ 2329 2276 pstate = mctx->state_log[cur_idx]; 2330 2277 log_nodes = pstate->entrance_nodes; 2331 2278 if (next_state != NULL) 2332 2333 2334 2279 { 2280 table_nodes = next_state->entrance_nodes; 2281 *err = re_node_set_init_union (&next_nodes, table_nodes, 2335 2282 log_nodes); 2336 if (BE (*err != REG_NOERROR, 0))2283 if (__glibc_unlikely (*err != REG_NOERROR)) 2337 2284 return NULL; 2338 2285 } 2339 2286 else 2340 2287 next_nodes = *log_nodes; 2341 2288 /* Note: We already add the nodes of the initial state, 2342 2289 then we don't need to add them here. */ … … 2346 2293 mctx->eflags); 2347 2294 next_state = mctx->state_log[cur_idx] 2348 2295 = re_acquire_state_context (err, dfa, &next_nodes, context); 2349 2296 /* We don't need to check errors here, since the return value of 2350 2297 this function is next_state and ERR is already set. */ 2351 2298 2352 2299 if (table_nodes != NULL) 2353 2354 } 2355 2356 if ( BE (dfa->nbackref, 0) && next_state != NULL)2300 re_node_set_free (&next_nodes); 2301 } 2302 2303 if (__glibc_unlikely (dfa->nbackref) && next_state != NULL) 2357 2304 { 2358 2305 /* Check OP_OPEN_SUBEXP in the current state in case that we use them … … 2361 2308 *err = check_subexp_matching_top (mctx, &next_state->nodes, 2362 2309 cur_idx); 2363 if ( BE (*err != REG_NOERROR, 0))2310 if (__glibc_unlikely (*err != REG_NOERROR)) 2364 2311 return NULL; 2365 2312 … … 2368 2315 { 2369 2316 *err = transit_state_bkref (mctx, &next_state->nodes); 2370 if ( BE (*err != REG_NOERROR, 0))2317 if (__glibc_unlikely (*err != REG_NOERROR)) 2371 2318 return NULL; 2372 2319 next_state = mctx->state_log[cur_idx]; … … 2380 2327 multi-byte match, then look in the log for a state 2381 2328 from which to restart matching. */ 2382 re_dfastate_t * 2383 internal_function 2329 static re_dfastate_t * 2384 2330 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) 2385 2331 { … … 2387 2333 do 2388 2334 { 2389 intmax = mctx->state_log_top;2390 intcur_str_idx = re_string_cur_idx (&mctx->input);2335 Idx max = mctx->state_log_top; 2336 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2391 2337 2392 2338 do 2393 2339 { 2394 2395 2396 2340 if (++cur_str_idx > max) 2341 return NULL; 2342 re_string_skip_bytes (&mctx->input, 1); 2397 2343 } 2398 2344 while (mctx->state_log[cur_str_idx] == NULL); … … 2409 2355 OP_OPEN_SUBEXP and which have corresponding back references in the regular 2410 2356 expression. And register them to use them later for evaluating the 2411 correspo ding back references. */2357 corresponding back references. */ 2412 2358 2413 2359 static reg_errcode_t 2414 internal_function2415 2360 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, 2416 intstr_idx)2361 Idx str_idx) 2417 2362 { 2418 2363 const re_dfa_t *const dfa = mctx->dfa; 2419 intnode_idx;2364 Idx node_idx; 2420 2365 reg_errcode_t err; 2421 2366 … … 2427 2372 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) 2428 2373 { 2429 intnode = cur_nodes->elems[node_idx];2374 Idx node = cur_nodes->elems[node_idx]; 2430 2375 if (dfa->nodes[node].type == OP_OPEN_SUBEXP 2431 2376 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS … … 2434 2379 { 2435 2380 err = match_ctx_add_subtop (mctx, node, str_idx); 2436 if ( BE (err != REG_NOERROR, 0))2381 if (__glibc_unlikely (err != REG_NOERROR)) 2437 2382 return err; 2438 2383 } … … 2443 2388 #if 0 2444 2389 /* Return the next state to which the current state STATE will transit by 2445 accepting the current input byte. */2390 accepting the current input byte. Return NULL on failure. */ 2446 2391 2447 2392 static re_dfastate_t * … … 2452 2397 re_node_set next_nodes; 2453 2398 re_dfastate_t *next_state; 2454 intnode_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);2399 Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); 2455 2400 unsigned int context; 2456 2401 2457 2402 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); 2458 if ( BE (*err != REG_NOERROR, 0))2403 if (__glibc_unlikely (*err != REG_NOERROR)) 2459 2404 return NULL; 2460 2405 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) 2461 2406 { 2462 intcur_node = state->nodes.elems[node_cnt];2407 Idx cur_node = state->nodes.elems[node_cnt]; 2463 2408 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) 2464 2409 { 2465 2410 *err = re_node_set_merge (&next_nodes, 2466 2411 dfa->eclosures + dfa->nexts[cur_node]); 2467 if ( BE (*err != REG_NOERROR, 0))2412 if (__glibc_unlikely (*err != REG_NOERROR)) 2468 2413 { 2469 2414 re_node_set_free (&next_nodes); … … 2483 2428 #endif 2484 2429 2485 #ifdef RE_ENABLE_I18N2486 2430 static reg_errcode_t 2487 internal_function2488 2431 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) 2489 2432 { 2490 2433 const re_dfa_t *const dfa = mctx->dfa; 2491 2434 reg_errcode_t err; 2492 inti;2435 Idx i; 2493 2436 2494 2437 for (i = 0; i < pstate->nodes.nelem; ++i) 2495 2438 { 2496 2439 re_node_set dest_nodes, *new_nodes; 2497 int cur_node_idx = pstate->nodes.elems[i]; 2498 int naccepted, dest_idx; 2440 Idx cur_node_idx = pstate->nodes.elems[i]; 2441 int naccepted; 2442 Idx dest_idx; 2499 2443 unsigned int context; 2500 2444 re_dfastate_t *dest_state; 2501 2445 2502 2446 if (!dfa->nodes[cur_node_idx].accept_mb) 2503 2447 continue; 2504 2448 2505 2449 if (dfa->nodes[cur_node_idx].constraint) … … 2519 2463 continue; 2520 2464 2521 /* The node can accepts `naccepted' bytes. */2465 /* The node can accepts 'naccepted' bytes. */ 2522 2466 dest_idx = re_string_cur_idx (&mctx->input) + naccepted; 2523 2467 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2524 2468 : mctx->max_mb_elem_len); 2525 2469 err = clean_state_log_if_needed (mctx, dest_idx); 2526 if ( BE (err != REG_NOERROR, 0))2470 if (__glibc_unlikely (err != REG_NOERROR)) 2527 2471 return err; 2528 #ifdef DEBUG 2529 assert (dfa->nexts[cur_node_idx] != -1); 2530 #endif 2472 DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1); 2531 2473 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; 2532 2474 … … 2538 2480 err = re_node_set_init_union (&dest_nodes, 2539 2481 dest_state->entrance_nodes, new_nodes); 2540 if ( BE (err != REG_NOERROR, 0))2482 if (__glibc_unlikely (err != REG_NOERROR)) 2541 2483 return err; 2542 2484 } … … 2547 2489 if (dest_state != NULL) 2548 2490 re_node_set_free (&dest_nodes); 2549 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) 2491 if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL 2492 && err != REG_NOERROR)) 2550 2493 return err; 2551 2494 } 2552 2495 return REG_NOERROR; 2553 2496 } 2554 #endif /* RE_ENABLE_I18N */2555 2497 2556 2498 static reg_errcode_t 2557 internal_function2558 2499 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) 2559 2500 { 2560 2501 const re_dfa_t *const dfa = mctx->dfa; 2561 2502 reg_errcode_t err; 2562 inti;2563 intcur_str_idx = re_string_cur_idx (&mctx->input);2503 Idx i; 2504 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2564 2505 2565 2506 for (i = 0; i < nodes->nelem; ++i) 2566 2507 { 2567 intdest_str_idx, prev_nelem, bkc_idx;2568 intnode_idx = nodes->elems[i];2508 Idx dest_str_idx, prev_nelem, bkc_idx; 2509 Idx node_idx = nodes->elems[i]; 2569 2510 unsigned int context; 2570 2511 const re_token_t *node = dfa->nodes + node_idx; 2571 2512 re_node_set *new_dest_nodes; 2572 2513 2573 /* Check whether `node' is a backreference or not. */2514 /* Check whether 'node' is a backreference or not. */ 2574 2515 if (node->type != OP_BACK_REF) 2575 2516 continue; … … 2583 2524 } 2584 2525 2585 /* `node' is a backreference.2526 /* 'node' is a backreference. 2586 2527 Check the substring which the substring matched. */ 2587 2528 bkc_idx = mctx->nbkref_ents; 2588 2529 err = get_subexp (mctx, node_idx, cur_str_idx); 2589 if ( BE (err != REG_NOERROR, 0))2530 if (__glibc_unlikely (err != REG_NOERROR)) 2590 2531 goto free_return; 2591 2532 2592 /* And add the epsilon closures (which is `new_dest_nodes') of2533 /* And add the epsilon closures (which is 'new_dest_nodes') of 2593 2534 the backreference to appropriate state_log. */ 2594 #ifdef DEBUG 2595 assert (dfa->nexts[node_idx] != -1); 2596 #endif 2535 DEBUG_ASSERT (dfa->nexts[node_idx] != -1); 2597 2536 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) 2598 2537 { 2599 intsubexp_len;2538 Idx subexp_len; 2600 2539 re_dfastate_t *dest_state; 2601 2540 struct re_backref_cache_entry *bkref_ent; … … 2614 2553 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 2615 2554 : mctx->state_log[cur_str_idx]->nodes.nelem); 2616 /* Add `new_dest_node' to state_log. */2555 /* Add 'new_dest_node' to state_log. */ 2617 2556 if (dest_state == NULL) 2618 2557 { … … 2620 2559 = re_acquire_state_context (&err, dfa, new_dest_nodes, 2621 2560 context); 2622 if ( BE(mctx->state_log[dest_str_idx] == NULL2623 && err != REG_NOERROR, 0))2561 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL 2562 && err != REG_NOERROR)) 2624 2563 goto free_return; 2625 2564 } … … 2630 2569 dest_state->entrance_nodes, 2631 2570 new_dest_nodes); 2632 if ( BE (err != REG_NOERROR, 0))2571 if (__glibc_unlikely (err != REG_NOERROR)) 2633 2572 { 2634 2573 re_node_set_free (&dest_nodes); … … 2638 2577 = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2639 2578 re_node_set_free (&dest_nodes); 2640 if ( BE(mctx->state_log[dest_str_idx] == NULL2641 && err != REG_NOERROR, 0))2579 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL 2580 && err != REG_NOERROR)) 2642 2581 goto free_return; 2643 2582 } … … 2649 2588 err = check_subexp_matching_top (mctx, new_dest_nodes, 2650 2589 cur_str_idx); 2651 if ( BE (err != REG_NOERROR, 0))2590 if (__glibc_unlikely (err != REG_NOERROR)) 2652 2591 goto free_return; 2653 2592 err = transit_state_bkref (mctx, new_dest_nodes); 2654 if ( BE (err != REG_NOERROR, 0))2593 if (__glibc_unlikely (err != REG_NOERROR)) 2655 2594 goto free_return; 2656 2595 } … … 2669 2608 2670 2609 static reg_errcode_t 2671 internal_function 2672 get_subexp (re_match_context_t *mctx, int bkref_node, intbkref_str_idx)2610 __attribute_warn_unused_result__ 2611 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) 2673 2612 { 2674 2613 const re_dfa_t *const dfa = mctx->dfa; 2675 intsubexp_num, sub_top_idx;2614 Idx subexp_num, sub_top_idx; 2676 2615 const char *buf = (const char *) re_string_get_buffer (&mctx->input); 2677 2616 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ 2678 intcache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);2617 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); 2679 2618 if (cache_idx != -1) 2680 2619 { … … 2682 2621 = mctx->bkref_ents + cache_idx; 2683 2622 do 2684 2623 if (entry->node == bkref_node) 2685 2624 return REG_NOERROR; /* We already checked it. */ 2686 2625 while (entry++->more); … … 2695 2634 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; 2696 2635 re_sub_match_last_t *sub_last; 2697 intsub_last_idx, sl_str, bkref_str_off;2636 Idx sub_last_idx, sl_str, bkref_str_off; 2698 2637 2699 2638 if (dfa->nodes[sub_top->node].opr.idx != subexp_num) … … 2706 2645 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) 2707 2646 { 2708 int sl_str_diff;2647 regoff_t sl_str_diff; 2709 2648 sub_last = sub_top->lasts[sub_last_idx]; 2710 2649 sl_str_diff = sub_last->str_idx - sl_str; … … 2713 2652 if (sl_str_diff > 0) 2714 2653 { 2715 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) 2654 if (__glibc_unlikely (bkref_str_off + sl_str_diff 2655 > mctx->input.valid_len)) 2716 2656 { 2717 2657 /* Not enough chars for a successful match. */ … … 2722 2662 bkref_str_off 2723 2663 + sl_str_diff); 2724 if ( BE (err != REG_NOERROR, 0))2664 if (__glibc_unlikely (err != REG_NOERROR)) 2725 2665 return err; 2726 2666 buf = (const char *) re_string_get_buffer (&mctx->input); … … 2741 2681 if (err == REG_NOMATCH) 2742 2682 continue; 2743 if ( BE (err != REG_NOERROR, 0))2683 if (__glibc_unlikely (err != REG_NOERROR)) 2744 2684 return err; 2745 2685 } … … 2752 2692 for (; sl_str <= bkref_str_idx; ++sl_str) 2753 2693 { 2754 int cls_node, sl_str_off; 2694 Idx cls_node; 2695 regoff_t sl_str_off; 2755 2696 const re_node_set *nodes; 2756 2697 sl_str_off = sl_str - sub_top->str_idx; … … 2759 2700 if (sl_str_off > 0) 2760 2701 { 2761 if ( BE (bkref_str_off >= mctx->input.valid_len, 0))2702 if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len)) 2762 2703 { 2763 2704 /* If we are at the end of the input, we cannot match. */ … … 2765 2706 break; 2766 2707 2767 err = extend_buffers (mctx );2768 if ( BE (err != REG_NOERROR, 0))2708 err = extend_buffers (mctx, bkref_str_off + 1); 2709 if (__glibc_unlikely (err != REG_NOERROR)) 2769 2710 return err; 2770 2711 … … 2797 2738 if (err == REG_NOMATCH) 2798 2739 continue; 2799 if ( BE (err != REG_NOERROR, 0))2740 if (__glibc_unlikely (err != REG_NOERROR)) 2800 2741 return err; 2801 2742 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); 2802 if ( BE (sub_last == NULL, 0))2743 if (__glibc_unlikely (sub_last == NULL)) 2803 2744 return REG_ESPACE; 2804 2745 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2805 2746 bkref_str_idx); 2747 buf = (const char *) re_string_get_buffer (&mctx->input); 2806 2748 if (err == REG_NOMATCH) 2807 2749 continue; 2750 if (__glibc_unlikely (err != REG_NOERROR)) 2751 return err; 2808 2752 } 2809 2753 } … … 2818 2762 2819 2763 static reg_errcode_t 2820 internal_function2821 2764 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, 2822 re_sub_match_last_t *sub_last, int bkref_node, intbkref_str)2765 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) 2823 2766 { 2824 2767 reg_errcode_t err; 2825 intto_idx;2768 Idx to_idx; 2826 2769 /* Can the subexpression arrive the back reference? */ 2827 2770 err = check_arrival (mctx, &sub_last->path, sub_last->node, … … 2832 2775 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, 2833 2776 sub_last->str_idx); 2834 if ( BE (err != REG_NOERROR, 0))2777 if (__glibc_unlikely (err != REG_NOERROR)) 2835 2778 return err; 2836 2779 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; … … 2846 2789 E.g. RE: (a){2} */ 2847 2790 2848 static int 2849 internal_function 2791 static Idx 2850 2792 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 2851 intsubexp_idx, int type)2852 { 2853 intcls_idx;2793 Idx subexp_idx, int type) 2794 { 2795 Idx cls_idx; 2854 2796 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) 2855 2797 { 2856 intcls_node = nodes->elems[cls_idx];2798 Idx cls_node = nodes->elems[cls_idx]; 2857 2799 const re_token_t *node = dfa->nodes + cls_node; 2858 2800 if (node->type == type … … 2866 2808 LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2867 2809 heavily reused. 2868 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2810 Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot, 2811 REG_ESPACE if memory is exhausted. */ 2869 2812 2870 2813 static reg_errcode_t 2871 internal_function 2872 check_arrival (re_match_context_t *mctx, state_array_t *path, inttop_node,2873 int top_str, int last_node, intlast_str, int type)2814 __attribute_warn_unused_result__ 2815 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, 2816 Idx top_str, Idx last_node, Idx last_str, int type) 2874 2817 { 2875 2818 const re_dfa_t *const dfa = mctx->dfa; 2876 2819 reg_errcode_t err = REG_NOERROR; 2877 intsubexp_num, backup_cur_idx, str_idx, null_cnt;2820 Idx subexp_num, backup_cur_idx, str_idx, null_cnt; 2878 2821 re_dfastate_t *cur_state = NULL; 2879 2822 re_node_set *cur_nodes, next_nodes; … … 2883 2826 subexp_num = dfa->nodes[top_node].opr.idx; 2884 2827 /* Extend the buffer if we need. */ 2885 if ( BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))2828 if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1)) 2886 2829 { 2887 2830 re_dfastate_t **new_array; 2888 int old_alloc = path->alloc; 2889 path->alloc += last_str + mctx->max_mb_elem_len + 1; 2890 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); 2891 if (BE (new_array == NULL, 0)) 2892 { 2893 path->alloc = old_alloc; 2894 return REG_ESPACE; 2895 } 2831 Idx old_alloc = path->alloc; 2832 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; 2833 Idx new_alloc; 2834 if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc)) 2835 return REG_ESPACE; 2836 new_alloc = old_alloc + incr_alloc; 2837 if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc)) 2838 return REG_ESPACE; 2839 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); 2840 if (__glibc_unlikely (new_array == NULL)) 2841 return REG_ESPACE; 2896 2842 path->array = new_array; 2843 path->alloc = new_alloc; 2897 2844 memset (new_array + old_alloc, '\0', 2898 2845 sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); 2899 2846 } 2900 2847 2901 #ifdef __GNUC__ /* silly buggers. */2902 str_idx = path->next_idx ?: top_str;2903 #else2904 2848 str_idx = path->next_idx ? path->next_idx : top_str; 2905 #endif2906 2849 2907 2850 /* Temporary modify MCTX. */ … … 2916 2859 { 2917 2860 err = re_node_set_init_1 (&next_nodes, top_node); 2918 if ( BE (err != REG_NOERROR, 0))2861 if (__glibc_unlikely (err != REG_NOERROR)) 2919 2862 return err; 2920 2863 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2921 if ( BE (err != REG_NOERROR, 0))2864 if (__glibc_unlikely (err != REG_NOERROR)) 2922 2865 { 2923 2866 re_node_set_free (&next_nodes); … … 2931 2874 { 2932 2875 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); 2933 if ( BE (err != REG_NOERROR, 0))2876 if (__glibc_unlikely (err != REG_NOERROR)) 2934 2877 return err; 2935 2878 } … … 2943 2886 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2944 2887 subexp_num, type); 2945 if ( BE (err != REG_NOERROR, 0))2888 if (__glibc_unlikely (err != REG_NOERROR)) 2946 2889 { 2947 2890 re_node_set_free (&next_nodes); … … 2950 2893 } 2951 2894 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2952 if ( BE (cur_state == NULL && err != REG_NOERROR, 0))2895 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) 2953 2896 { 2954 2897 re_node_set_free (&next_nodes); … … 2965 2908 err = re_node_set_merge (&next_nodes, 2966 2909 &mctx->state_log[str_idx + 1]->nodes); 2967 if ( BE (err != REG_NOERROR, 0))2910 if (__glibc_unlikely (err != REG_NOERROR)) 2968 2911 { 2969 2912 re_node_set_free (&next_nodes); … … 2976 2919 &cur_state->non_eps_nodes, 2977 2920 &next_nodes); 2978 if ( BE (err != REG_NOERROR, 0))2921 if (__glibc_unlikely (err != REG_NOERROR)) 2979 2922 { 2980 2923 re_node_set_free (&next_nodes); … … 2986 2929 { 2987 2930 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2988 if ( BE (err != REG_NOERROR, 0))2931 if (__glibc_unlikely (err != REG_NOERROR)) 2989 2932 { 2990 2933 re_node_set_free (&next_nodes); … … 2993 2936 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2994 2937 subexp_num, type); 2995 if ( BE (err != REG_NOERROR, 0))2938 if (__glibc_unlikely (err != REG_NOERROR)) 2996 2939 { 2997 2940 re_node_set_free (&next_nodes); … … 3001 2944 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 3002 2945 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 3003 if ( BE (cur_state == NULL && err != REG_NOERROR, 0))2946 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) 3004 2947 { 3005 2948 re_node_set_free (&next_nodes); … … 3034 2977 3035 2978 static reg_errcode_t 3036 internal_function 3037 check_arrival_add_next_nodes (re_match_context_t *mctx, intstr_idx,2979 __attribute_warn_unused_result__ 2980 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, 3038 2981 re_node_set *cur_nodes, re_node_set *next_nodes) 3039 2982 { 3040 2983 const re_dfa_t *const dfa = mctx->dfa; 3041 int result;3042 intcur_idx;2984 bool ok; 2985 Idx cur_idx; 3043 2986 reg_errcode_t err = REG_NOERROR; 3044 2987 re_node_set union_set; … … 3047 2990 { 3048 2991 int naccepted = 0; 3049 int cur_node = cur_nodes->elems[cur_idx]; 3050 #ifdef DEBUG 3051 re_token_type_t type = dfa->nodes[cur_node].type; 3052 assert (!IS_EPSILON_NODE (type)); 3053 #endif 3054 #ifdef RE_ENABLE_I18N 3055 /* If the node may accept `multi byte'. */ 2992 Idx cur_node = cur_nodes->elems[cur_idx]; 2993 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); 2994 2995 /* If the node may accept "multi byte". */ 3056 2996 if (dfa->nodes[cur_node].accept_mb) 3057 2997 { … … 3061 3001 { 3062 3002 re_dfastate_t *dest_state; 3063 intnext_node = dfa->nexts[cur_node];3064 intnext_idx = str_idx + naccepted;3003 Idx next_node = dfa->nexts[cur_node]; 3004 Idx next_idx = str_idx + naccepted; 3065 3005 dest_state = mctx->state_log[next_idx]; 3066 3006 re_node_set_empty (&union_set); … … 3068 3008 { 3069 3009 err = re_node_set_merge (&union_set, &dest_state->nodes); 3070 if ( BE (err != REG_NOERROR, 0))3010 if (__glibc_unlikely (err != REG_NOERROR)) 3071 3011 { 3072 3012 re_node_set_free (&union_set); … … 3074 3014 } 3075 3015 } 3076 result= re_node_set_insert (&union_set, next_node);3077 if ( BE (result < 0, 0))3016 ok = re_node_set_insert (&union_set, next_node); 3017 if (__glibc_unlikely (! ok)) 3078 3018 { 3079 3019 re_node_set_free (&union_set); … … 3082 3022 mctx->state_log[next_idx] = re_acquire_state (&err, dfa, 3083 3023 &union_set); 3084 if ( BE(mctx->state_log[next_idx] == NULL3085 && err != REG_NOERROR, 0))3024 if (__glibc_unlikely (mctx->state_log[next_idx] == NULL 3025 && err != REG_NOERROR)) 3086 3026 { 3087 3027 re_node_set_free (&union_set); … … 3090 3030 } 3091 3031 } 3092 #endif /* RE_ENABLE_I18N */ 3032 3093 3033 if (naccepted 3094 3034 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) 3095 3035 { 3096 result= re_node_set_insert (next_nodes, dfa->nexts[cur_node]);3097 if ( BE (result < 0, 0))3036 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); 3037 if (__glibc_unlikely (! ok)) 3098 3038 { 3099 3039 re_node_set_free (&union_set); … … 3113 3053 3114 3054 static reg_errcode_t 3115 internal_function3116 3055 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, 3117 intex_subexp, int type)3056 Idx ex_subexp, int type) 3118 3057 { 3119 3058 reg_errcode_t err; 3120 intidx, outside_node;3059 Idx idx, outside_node; 3121 3060 re_node_set new_nodes; 3122 #ifdef DEBUG 3123 assert (cur_nodes->nelem); 3124 #endif 3061 DEBUG_ASSERT (cur_nodes->nelem); 3125 3062 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); 3126 if ( BE (err != REG_NOERROR, 0))3063 if (__glibc_unlikely (err != REG_NOERROR)) 3127 3064 return err; 3128 3065 /* Create a new node set NEW_NODES with the nodes which are epsilon … … 3131 3068 for (idx = 0; idx < cur_nodes->nelem; ++idx) 3132 3069 { 3133 intcur_node = cur_nodes->elems[idx];3070 Idx cur_node = cur_nodes->elems[idx]; 3134 3071 const re_node_set *eclosure = dfa->eclosures + cur_node; 3135 3072 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); … … 3138 3075 /* There are no problematic nodes, just merge them. */ 3139 3076 err = re_node_set_merge (&new_nodes, eclosure); 3140 if ( BE (err != REG_NOERROR, 0))3077 if (__glibc_unlikely (err != REG_NOERROR)) 3141 3078 { 3142 3079 re_node_set_free (&new_nodes); … … 3149 3086 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, 3150 3087 ex_subexp, type); 3151 if ( BE (err != REG_NOERROR, 0))3088 if (__glibc_unlikely (err != REG_NOERROR)) 3152 3089 { 3153 3090 re_node_set_free (&new_nodes); … … 3166 3103 3167 3104 static reg_errcode_t 3168 internal_function 3105 __attribute_warn_unused_result__ 3169 3106 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, 3170 int target, intex_subexp, int type)3171 { 3172 intcur_node;3107 Idx target, Idx ex_subexp, int type) 3108 { 3109 Idx cur_node; 3173 3110 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) 3174 3111 { 3175 int err;3112 bool ok; 3176 3113 3177 3114 if (dfa->nodes[cur_node].type == type … … 3180 3117 if (type == OP_CLOSE_SUBEXP) 3181 3118 { 3182 err= re_node_set_insert (dst_nodes, cur_node);3183 if ( BE (err == -1, 0))3119 ok = re_node_set_insert (dst_nodes, cur_node); 3120 if (__glibc_unlikely (! ok)) 3184 3121 return REG_ESPACE; 3185 3122 } 3186 3123 break; 3187 3124 } 3188 err= re_node_set_insert (dst_nodes, cur_node);3189 if ( BE (err == -1, 0))3125 ok = re_node_set_insert (dst_nodes, cur_node); 3126 if (__glibc_unlikely (! ok)) 3190 3127 return REG_ESPACE; 3191 3128 if (dfa->edests[cur_node].nelem == 0) … … 3193 3130 if (dfa->edests[cur_node].nelem == 2) 3194 3131 { 3132 reg_errcode_t err; 3195 3133 err = check_arrival_expand_ecl_sub (dfa, dst_nodes, 3196 3134 dfa->edests[cur_node].elems[1], 3197 3135 ex_subexp, type); 3198 if ( BE (err != REG_NOERROR, 0))3136 if (__glibc_unlikely (err != REG_NOERROR)) 3199 3137 return err; 3200 3138 } … … 3210 3148 3211 3149 static reg_errcode_t 3212 internal_function 3150 __attribute_warn_unused_result__ 3213 3151 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, 3214 int cur_str, intsubexp_num, int type)3152 Idx cur_str, Idx subexp_num, int type) 3215 3153 { 3216 3154 const re_dfa_t *const dfa = mctx->dfa; 3217 3155 reg_errcode_t err; 3218 intcache_idx_start = search_cur_bkref_entry (mctx, cur_str);3156 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); 3219 3157 struct re_backref_cache_entry *ent; 3220 3158 … … 3226 3164 do 3227 3165 { 3228 intto_idx, next_node;3166 Idx to_idx, next_node; 3229 3167 3230 3168 /* Is this entry ENT is appropriate? */ … … 3248 3186 err3 = re_node_set_merge (cur_nodes, &new_dests); 3249 3187 re_node_set_free (&new_dests); 3250 if ( BE(err != REG_NOERROR || err2 != REG_NOERROR3251 || err3 != REG_NOERROR, 0))3188 if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR 3189 || err3 != REG_NOERROR)) 3252 3190 { 3253 3191 err = (err != REG_NOERROR ? err … … 3264 3202 if (mctx->state_log[to_idx]) 3265 3203 { 3266 int ret;3204 bool ok; 3267 3205 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, 3268 3206 next_node)) … … 3270 3208 err = re_node_set_init_copy (&union_set, 3271 3209 &mctx->state_log[to_idx]->nodes); 3272 ret= re_node_set_insert (&union_set, next_node);3273 if ( BE (err != REG_NOERROR || ret < 0, 0))3210 ok = re_node_set_insert (&union_set, next_node); 3211 if (__glibc_unlikely (err != REG_NOERROR || ! ok)) 3274 3212 { 3275 3213 re_node_set_free (&union_set); … … 3281 3219 { 3282 3220 err = re_node_set_init_1 (&union_set, next_node); 3283 if ( BE (err != REG_NOERROR, 0))3221 if (__glibc_unlikely (err != REG_NOERROR)) 3284 3222 return err; 3285 3223 } 3286 3224 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); 3287 3225 re_node_set_free (&union_set); 3288 if ( BE(mctx->state_log[to_idx] == NULL3289 && err != REG_NOERROR, 0))3226 if (__glibc_unlikely (mctx->state_log[to_idx] == NULL 3227 && err != REG_NOERROR)) 3290 3228 return err; 3291 3229 } … … 3296 3234 3297 3235 /* Build transition table for the state. 3298 Return 1 if succeeded, otherwise return NULL. */ 3299 3300 static int 3301 internal_function 3236 Return true if successful. */ 3237 3238 static bool __attribute_noinline__ 3302 3239 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) 3303 3240 { 3304 3241 reg_errcode_t err; 3305 int i, j, ch, need_word_trtable = 0; 3242 Idx i, j; 3243 int ch; 3244 bool need_word_trtable = false; 3306 3245 bitset_word_t elem, mask; 3307 bool dests_node_malloced = false; 3308 bool dest_states_malloced = false; 3309 int ndests; /* Number of the destination states from `state'. */ 3246 Idx ndests; /* Number of the destination states from 'state'. */ 3310 3247 re_dfastate_t **trtable; 3311 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3312 re_node_set follows, *dests_node; 3313 bitset_t *dests_ch; 3248 re_dfastate_t *dest_states[SBC_MAX]; 3249 re_dfastate_t *dest_states_word[SBC_MAX]; 3250 re_dfastate_t *dest_states_nl[SBC_MAX]; 3251 re_node_set follows; 3314 3252 bitset_t acceptable; 3315 3253 3316 struct dests_alloc3317 {3318 re_node_set dests_node[SBC_MAX];3319 bitset_t dests_ch[SBC_MAX];3320 } *dests_alloc;3321 3322 3254 /* We build DFA states which corresponds to the destination nodes 3323 from `state'. `dests_node[i]' represents the nodes which i-th3324 destination state contains, and `dests_ch[i]' represents the3255 from 'state'. 'dests_node[i]' represents the nodes which i-th 3256 destination state contains, and 'dests_ch[i]' represents the 3325 3257 characters which i-th destination state accepts. */ 3326 if (__libc_use_alloca (sizeof (struct dests_alloc))) 3327 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); 3328 else 3329 { 3330 dests_alloc = re_malloc (struct dests_alloc, 1); 3331 if (BE (dests_alloc == NULL, 0)) 3332 return 0; 3333 dests_node_malloced = true; 3334 } 3335 dests_node = dests_alloc->dests_node; 3336 dests_ch = dests_alloc->dests_ch; 3337 3338 /* Initialize transiton table. */ 3258 re_node_set dests_node[SBC_MAX]; 3259 bitset_t dests_ch[SBC_MAX]; 3260 3261 /* Initialize transition table. */ 3339 3262 state->word_trtable = state->trtable = NULL; 3340 3263 3341 /* At first, group all nodes belonging to `state' into several3264 /* At first, group all nodes belonging to 'state' into several 3342 3265 destinations. */ 3343 3266 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3344 if (BE (ndests <= 0, 0)) 3345 { 3346 if (dests_node_malloced) 3347 free (dests_alloc); 3348 /* Return 0 in case of an error, 1 otherwise. */ 3267 if (__glibc_unlikely (ndests <= 0)) 3268 { 3269 /* Return false in case of an error, true otherwise. */ 3349 3270 if (ndests == 0) 3350 3271 { 3351 3272 state->trtable = (re_dfastate_t **) 3352 3273 calloc (sizeof (re_dfastate_t *), SBC_MAX); 3353 return 1; 3354 } 3355 return 0; 3274 if (__glibc_unlikely (state->trtable == NULL)) 3275 return false; 3276 return true; 3277 } 3278 return false; 3356 3279 } 3357 3280 3358 3281 err = re_node_set_alloc (&follows, ndests + 1); 3359 if (BE (err != REG_NOERROR, 0)) 3360 goto out_free; 3361 3362 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX 3363 + ndests * 3 * sizeof (re_dfastate_t *))) 3364 dest_states = (re_dfastate_t **) 3365 alloca (ndests * 3 * sizeof (re_dfastate_t *)); 3366 else 3367 { 3368 dest_states = (re_dfastate_t **) 3369 malloc (ndests * 3 * sizeof (re_dfastate_t *)); 3370 if (BE (dest_states == NULL, 0)) 3371 { 3372 out_free: 3373 if (dest_states_malloced) 3374 free (dest_states); 3375 re_node_set_free (&follows); 3376 for (i = 0; i < ndests; ++i) 3377 re_node_set_free (dests_node + i); 3378 if (dests_node_malloced) 3379 free (dests_alloc); 3380 return 0; 3381 } 3382 dest_states_malloced = true; 3383 } 3384 dest_states_word = dest_states + ndests; 3385 dest_states_nl = dest_states_word + ndests; 3282 if (__glibc_unlikely (err != REG_NOERROR)) 3283 { 3284 out_free: 3285 re_node_set_free (&follows); 3286 for (i = 0; i < ndests; ++i) 3287 re_node_set_free (dests_node + i); 3288 return false; 3289 } 3290 3386 3291 bitset_empty (acceptable); 3387 3292 … … 3389 3294 for (i = 0; i < ndests; ++i) 3390 3295 { 3391 intnext_node;3296 Idx next_node; 3392 3297 re_node_set_empty (&follows); 3393 3298 /* Merge the follows of this destination states. */ … … 3398 3303 { 3399 3304 err = re_node_set_merge (&follows, dfa->eclosures + next_node); 3400 if ( BE (err != REG_NOERROR, 0))3305 if (__glibc_unlikely (err != REG_NOERROR)) 3401 3306 goto out_free; 3402 3307 } 3403 3308 } 3404 3309 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); 3405 if ( BE (dest_states[i] == NULL && err != REG_NOERROR, 0))3310 if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR)) 3406 3311 goto out_free; 3407 3312 /* If the new state has context constraint, … … 3411 3316 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, 3412 3317 CONTEXT_WORD); 3413 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) 3318 if (__glibc_unlikely (dest_states_word[i] == NULL 3319 && err != REG_NOERROR)) 3414 3320 goto out_free; 3415 3321 3416 3322 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) 3417 need_word_trtable = 1;3323 need_word_trtable = true; 3418 3324 3419 3325 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, 3420 3326 CONTEXT_NEWLINE); 3421 if ( BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))3327 if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR)) 3422 3328 goto out_free; 3423 3329 } 3424 3330 else 3425 3331 { … … 3430 3336 } 3431 3337 3432 if (! BE (need_word_trtable, 0))3338 if (!__glibc_unlikely (need_word_trtable)) 3433 3339 { 3434 3340 /* We don't care about whether the following character is a word … … 3438 3344 trtable = state->trtable = 3439 3345 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); 3440 if ( BE (trtable == NULL, 0))3346 if (__glibc_unlikely (trtable == NULL)) 3441 3347 goto out_free; 3442 3348 … … 3446 3352 elem; 3447 3353 mask <<= 1, elem >>= 1, ++ch) 3448 if ( BE (elem & 1, 0))3354 if (__glibc_unlikely (elem & 1)) 3449 3355 { 3450 3356 /* There must be exactly one destination which accepts … … 3469 3375 trtable = state->word_trtable = 3470 3376 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); 3471 if ( BE (trtable == NULL, 0))3377 if (__glibc_unlikely (trtable == NULL)) 3472 3378 goto out_free; 3473 3379 … … 3477 3383 elem; 3478 3384 mask <<= 1, elem >>= 1, ++ch) 3479 if ( BE (elem & 1, 0))3385 if (__glibc_unlikely (elem & 1)) 3480 3386 { 3481 3387 /* There must be exactly one destination which accepts … … 3507 3413 } 3508 3414 3509 if (dest_states_malloced)3510 free (dest_states);3511 3512 3415 re_node_set_free (&follows); 3513 3416 for (i = 0; i < ndests; ++i) 3514 3417 re_node_set_free (dests_node + i); 3515 3516 if (dests_node_malloced) 3517 free (dests_alloc); 3518 3519 return 1; 3418 return true; 3520 3419 } 3521 3420 … … 3523 3422 Then for all destinations, set the nodes belonging to the destination 3524 3423 to DESTS_NODE[i] and set the characters accepted by the destination 3525 to DEST_CH[i]. This function return the number of destinations. */3526 3527 static int 3528 internal_function 3424 to DEST_CH[i]. Return the number of destinations if successful, 3425 -1 on internal error. */ 3426 3427 static Idx 3529 3428 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3530 3429 re_node_set *dests_node, bitset_t *dests_ch) 3531 3430 { 3532 3431 reg_errcode_t err; 3533 int result;3534 inti, j, k;3535 int ndests; /* Number of the destinations from `state'. */3432 bool ok; 3433 Idx i, j, k; 3434 Idx ndests; /* Number of the destinations from 'state'. */ 3536 3435 bitset_t accepts; /* Characters a node can accept. */ 3537 3436 const re_node_set *cur_nodes = &state->nodes; … … 3539 3438 ndests = 0; 3540 3439 3541 /* For all the nodes belonging to `state', */3440 /* For all the nodes belonging to 'state', */ 3542 3441 for (i = 0; i < cur_nodes->nelem; ++i) 3543 3442 { … … 3555 3454 else if (type == OP_PERIOD) 3556 3455 { 3557 #ifdef RE_ENABLE_I18N3558 3456 if (dfa->mb_cur_max > 1) 3559 3457 bitset_merge (accepts, dfa->sb_char); 3560 3458 else 3561 #endif3562 3459 bitset_set_all (accepts); 3563 3460 if (!(dfa->syntax & RE_DOT_NEWLINE)) … … 3566 3463 bitset_clear (accepts, '\0'); 3567 3464 } 3568 #ifdef RE_ENABLE_I18N3569 3465 else if (type == OP_UTF8_PERIOD) 3570 { 3571 memset (accepts, '\xff', sizeof (bitset_t) / 2); 3466 { 3467 if (ASCII_CHARS % BITSET_WORD_BITS == 0) 3468 memset (accepts, -1, ASCII_CHARS / CHAR_BIT); 3469 else 3470 bitset_merge (accepts, utf8_sb_map); 3572 3471 if (!(dfa->syntax & RE_DOT_NEWLINE)) 3573 3472 bitset_clear (accepts, '\n'); 3574 3473 if (dfa->syntax & RE_DOT_NOT_NULL) 3575 3474 bitset_clear (accepts, '\0'); 3576 } 3577 #endif 3475 } 3578 3476 else 3579 3477 continue; 3580 3478 3581 /* Check the `accepts' and sift the characters which are not3479 /* Check the 'accepts' and sift the characters which are not 3582 3480 match it the context. */ 3583 3481 if (constraint) … … 3606 3504 continue; 3607 3505 } 3608 #ifdef RE_ENABLE_I18N3609 3506 if (dfa->mb_cur_max > 1) 3610 3507 for (j = 0; j < BITSET_WORDS; ++j) 3611 3508 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); 3612 3509 else 3613 #endif3614 3510 for (j = 0; j < BITSET_WORDS; ++j) 3615 3511 any_set |= (accepts[j] &= dfa->word_char[j]); … … 3625 3521 continue; 3626 3522 } 3627 #ifdef RE_ENABLE_I18N3628 3523 if (dfa->mb_cur_max > 1) 3629 3524 for (j = 0; j < BITSET_WORDS; ++j) 3630 3525 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); 3631 3526 else 3632 #endif3633 3527 for (j = 0; j < BITSET_WORDS; ++j) 3634 3528 any_set |= (accepts[j] &= ~dfa->word_char[j]); … … 3638 3532 } 3639 3533 3640 /* Then divide `accepts' into DFA states, or create a new3534 /* Then divide 'accepts' into DFA states, or create a new 3641 3535 state. Above, we make sure that accepts is not empty. */ 3642 3536 for (j = 0; j < ndests; ++j) … … 3651 3545 continue; 3652 3546 3653 /* Enumerate the intersection set of this state and `accepts'. */3547 /* Enumerate the intersection set of this state and 'accepts'. */ 3654 3548 has_intersec = 0; 3655 3549 for (k = 0; k < BITSET_WORDS; ++k) … … 3659 3553 continue; 3660 3554 3661 /* Then check if this state is a subset of `accepts'. */3555 /* Then check if this state is a subset of 'accepts'. */ 3662 3556 not_subset = not_consumed = 0; 3663 3557 for (k = 0; k < BITSET_WORDS; ++k) … … 3667 3561 } 3668 3562 3669 /* If this state isn't a subset of `accepts', create a3670 new group state, which has the `remains'. */3563 /* If this state isn't a subset of 'accepts', create a 3564 new group state, which has the 'remains'. */ 3671 3565 if (not_subset) 3672 3566 { … … 3674 3568 bitset_copy (dests_ch[j], intersec); 3675 3569 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); 3676 if ( BE (err != REG_NOERROR, 0))3570 if (__glibc_unlikely (err != REG_NOERROR)) 3677 3571 goto error_return; 3678 3572 ++ndests; … … 3680 3574 3681 3575 /* Put the position in the current group. */ 3682 result= re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);3683 if ( BE (result < 0, 0))3576 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); 3577 if (__glibc_unlikely (! ok)) 3684 3578 goto error_return; 3685 3579 … … 3693 3587 bitset_copy (dests_ch[ndests], accepts); 3694 3588 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); 3695 if ( BE (err != REG_NOERROR, 0))3589 if (__glibc_unlikely (err != REG_NOERROR)) 3696 3590 goto error_return; 3697 3591 ++ndests; … … 3699 3593 } 3700 3594 } 3595 assume (ndests <= SBC_MAX); 3701 3596 return ndests; 3702 3597 error_return: … … 3706 3601 } 3707 3602 3708 #ifdef RE_ENABLE_I18N 3709 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts. 3603 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. 3710 3604 Return the number of the bytes the node accepts. 3711 3605 STR_IDX is the current index of the input string. … … 3715 3609 can only accept one byte. */ 3716 3610 3611 #ifdef _LIBC 3612 # include <locale/weight.h> 3613 #endif 3614 3717 3615 static int 3718 internal_function 3719 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 3720 const re_string_t *input, int str_idx) 3616 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 3617 const re_string_t *input, Idx str_idx) 3721 3618 { 3722 3619 const re_token_t *node = dfa->nodes + node_idx; 3723 3620 int char_len, elem_len; 3724 inti;3725 3726 if ( BE (node->type == OP_UTF8_PERIOD, 0))3621 Idx i; 3622 3623 if (__glibc_unlikely (node->type == OP_UTF8_PERIOD)) 3727 3624 { 3728 3625 unsigned char c = re_string_byte_at (input, str_idx), d; 3729 if ( BE (c < 0xc2, 1))3626 if (__glibc_likely (c < 0xc2)) 3730 3627 return 0; 3731 3628 … … 3779 3676 { 3780 3677 if (char_len <= 1) 3781 3678 return 0; 3782 3679 /* FIXME: I don't think this if is needed, as both '\n' 3783 3680 and '\0' are char_len == 1. */ 3784 3681 /* '.' accepts any one character except the following two cases. */ 3785 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&3786 re_string_byte_at (input, str_idx) == '\n') ||3787 ((dfa->syntax & RE_DOT_NOT_NULL) &&3788 re_string_byte_at (input, str_idx) == '\0'))3682 if ((!(dfa->syntax & RE_DOT_NEWLINE) 3683 && re_string_byte_at (input, str_idx) == '\n') 3684 || ((dfa->syntax & RE_DOT_NOT_NULL) 3685 && re_string_byte_at (input, str_idx) == '\0')) 3789 3686 return 0; 3790 3687 return char_len; … … 3798 3695 { 3799 3696 const re_charset_t *cset = node->opr.mbcset; 3800 # 3697 #ifdef _LIBC 3801 3698 const unsigned char *pin 3802 3699 = ((const unsigned char *) re_string_get_buffer (input) + str_idx); 3803 intj;3700 Idx j; 3804 3701 uint32_t nrules; 3805 # endif /* _LIBC */3702 #endif 3806 3703 int match_len = 0; 3807 3704 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) … … 3826 3723 } 3827 3724 3828 # 3725 #ifdef _LIBC 3829 3726 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3830 3727 if (nrules != 0) … … 3834 3731 const unsigned char *weights, *extra; 3835 3732 const char *collseqwc; 3836 int32_t idx;3837 /* This #include defines a local function! */3838 # include <locale/weight.h>3839 3733 3840 3734 /* match with collating_symbol? */ … … 3872 3766 } 3873 3767 /* match with range expression? */ 3768 /* FIXME: Implement rational ranges here, too. */ 3874 3769 for (i = 0; i < cset->nranges; ++i) 3875 3770 if (cset->range_starts[i] <= in_collseq … … 3892 3787 indirect = (const int32_t *) 3893 3788 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3894 idx = findidx (&cp); 3789 int32_t idx = findidx (table, indirect, extra, &cp, elem_len); 3790 int32_t rule = idx >> 24; 3791 idx &= 0xffffff; 3895 3792 if (idx > 0) 3896 for (i = 0; i < cset->nequiv_classes; ++i)3897 {3898 int32_t equiv_class_idx = cset->equiv_classes[i];3899 size_t weight_len = weights[idx];3900 if (weight_len == weights[equiv_class_idx])3901 {3902 int cnt = 0;3903 while (cnt <= weight_len3904 && (weights[equiv_class_idx + 1 + cnt]3905 == weights[idx + 1 + cnt]))3906 ++cnt;3907 if (cnt > weight_len)3908 3909 3910 3911 3912 3913 3793 { 3794 size_t weight_len = weights[idx]; 3795 for (i = 0; i < cset->nequiv_classes; ++i) 3796 { 3797 int32_t equiv_class_idx = cset->equiv_classes[i]; 3798 int32_t equiv_class_rule = equiv_class_idx >> 24; 3799 equiv_class_idx &= 0xffffff; 3800 if (weights[equiv_class_idx] == weight_len 3801 && equiv_class_rule == rule 3802 && memcmp (weights + idx + 1, 3803 weights + equiv_class_idx + 1, 3804 weight_len) == 0) 3805 { 3806 match_len = elem_len; 3807 goto check_node_accept_bytes_match; 3808 } 3809 } 3810 } 3914 3811 } 3915 3812 } 3916 3813 else 3917 # 3814 #endif /* _LIBC */ 3918 3815 { 3919 3816 /* match with range expression? */ 3920 #if __GNUC__ >= 23921 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};3922 #else3923 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};3924 cmp_buf[2] = wc;3925 #endif3926 3817 for (i = 0; i < cset->nranges; ++i) 3927 3818 { 3928 cmp_buf[0] = cset->range_starts[i]; 3929 cmp_buf[4] = cset->range_ends[i]; 3930 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 3931 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 3819 if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) 3932 3820 { 3933 3821 match_len = char_len; … … 3950 3838 } 3951 3839 3952 # 3840 #ifdef _LIBC 3953 3841 static unsigned int 3954 internal_function3955 3842 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) 3956 3843 { … … 3977 3864 for (idx = 0; idx < extrasize;) 3978 3865 { 3979 int mbs_cnt, found = 0; 3866 int mbs_cnt; 3867 bool found = false; 3980 3868 int32_t elem_mbs_len; 3981 3869 /* Skip the name of collating element name. */ … … 3989 3877 if (mbs_cnt == elem_mbs_len) 3990 3878 /* Found the entry. */ 3991 found = 1;3879 found = true; 3992 3880 } 3993 3881 /* Skip the byte sequence of the collating element. */ … … 3998 3886 idx += sizeof (uint32_t); 3999 3887 /* Skip the wide char sequence of the collating element. */ 4000 idx = idx + sizeof (uint32_t) * ( extra[idx]+ 1);3888 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); 4001 3889 /* If we found the entry, return the sequence value. */ 4002 3890 if (found) … … 4008 3896 } 4009 3897 } 4010 # endif /* _LIBC */ 4011 #endif /* RE_ENABLE_I18N */ 3898 #endif /* _LIBC */ 4012 3899 4013 3900 /* Check whether the node accepts the byte which is IDX-th 4014 3901 byte of the INPUT. */ 4015 3902 4016 static int 4017 internal_function 3903 static bool 4018 3904 check_node_accept (const re_match_context_t *mctx, const re_token_t *node, 4019 intidx)3905 Idx idx) 4020 3906 { 4021 3907 unsigned char ch; … … 4025 3911 case CHARACTER: 4026 3912 if (node->opr.c != ch) 4027 return 0;3913 return false; 4028 3914 break; 4029 3915 4030 3916 case SIMPLE_BRACKET: 4031 3917 if (!bitset_contain (node->opr.sbcset, ch)) 4032 return 0;3918 return false; 4033 3919 break; 4034 3920 4035 #ifdef RE_ENABLE_I18N4036 3921 case OP_UTF8_PERIOD: 4037 if (ch >= 0x80) 4038 return 0; 4039 /* FALLTHROUGH */ 4040 #endif 3922 if (ch >= ASCII_CHARS) 3923 return false; 3924 FALLTHROUGH; 4041 3925 case OP_PERIOD: 4042 3926 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) 4043 3927 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) 4044 return 0;3928 return false; 4045 3929 break; 4046 3930 4047 3931 default: 4048 return 0;3932 return false; 4049 3933 } 4050 3934 … … 4056 3940 mctx->eflags); 4057 3941 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 4058 return 0;4059 } 4060 4061 return 1;3942 return false; 3943 } 3944 3945 return true; 4062 3946 } 4063 3947 … … 4065 3949 4066 3950 static reg_errcode_t 4067 internal_function 4068 extend_buffers (re_match_context_t *mctx )3951 __attribute_warn_unused_result__ 3952 extend_buffers (re_match_context_t *mctx, int min_len) 4069 3953 { 4070 3954 reg_errcode_t ret; 4071 3955 re_string_t *pstr = &mctx->input; 4072 3956 4073 /* Double the lengthes of the buffers. */ 4074 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 4075 if (BE (ret != REG_NOERROR, 0)) 3957 /* Avoid overflow. */ 3958 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 3959 <= pstr->bufs_len)) 3960 return REG_ESPACE; 3961 3962 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ 3963 ret = re_string_realloc_buffers (pstr, 3964 MAX (min_len, 3965 MIN (pstr->len, pstr->bufs_len * 2))); 3966 if (__glibc_unlikely (ret != REG_NOERROR)) 4076 3967 return ret; 4077 3968 … … 4084 3975 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, 4085 3976 pstr->bufs_len + 1); 4086 if ( BE (new_array == NULL, 0))3977 if (__glibc_unlikely (new_array == NULL)) 4087 3978 return REG_ESPACE; 4088 3979 mctx->state_log = new_array; … … 4092 3983 if (pstr->icase) 4093 3984 { 4094 #ifdef RE_ENABLE_I18N4095 3985 if (pstr->mb_cur_max > 1) 4096 3986 { 4097 3987 ret = build_wcs_upper_buffer (pstr); 4098 if ( BE (ret != REG_NOERROR, 0))3988 if (__glibc_unlikely (ret != REG_NOERROR)) 4099 3989 return ret; 4100 3990 } 4101 3991 else 4102 #endif /* RE_ENABLE_I18N */4103 3992 build_upper_buffer (pstr); 4104 3993 } 4105 3994 else 4106 3995 { 4107 #ifdef RE_ENABLE_I18N4108 3996 if (pstr->mb_cur_max > 1) 4109 3997 build_wcs_buffer (pstr); 4110 3998 else 4111 #endif /* RE_ENABLE_I18N */4112 3999 { 4113 4000 if (pstr->trans != NULL) … … 4125 4012 4126 4013 static reg_errcode_t 4127 internal_function 4128 match_ctx_init (re_match_context_t *mctx, int eflags, intn)4014 __attribute_warn_unused_result__ 4015 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) 4129 4016 { 4130 4017 mctx->eflags = eflags; … … 4132 4019 if (n > 0) 4133 4020 { 4021 /* Avoid overflow. */ 4022 size_t max_object_size = 4023 MAX (sizeof (struct re_backref_cache_entry), 4024 sizeof (re_sub_match_top_t *)); 4025 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n)) 4026 return REG_ESPACE; 4027 4134 4028 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); 4135 4029 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); 4136 if ( BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))4030 if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL)) 4137 4031 return REG_ESPACE; 4138 4032 } … … 4153 4047 4154 4048 static void 4155 internal_function4156 4049 match_ctx_clean (re_match_context_t *mctx) 4157 4050 { 4158 intst_idx;4051 Idx st_idx; 4159 4052 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) 4160 4053 { 4161 intsl_idx;4054 Idx sl_idx; 4162 4055 re_sub_match_top_t *top = mctx->sub_tops[st_idx]; 4163 4056 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) … … 4173 4066 re_free (top->path); 4174 4067 } 4175 free (top);4068 re_free (top); 4176 4069 } 4177 4070 … … 4183 4076 4184 4077 static void 4185 internal_function4186 4078 match_ctx_free (re_match_context_t *mctx) 4187 4079 { … … 4198 4090 4199 4091 static reg_errcode_t 4200 internal_function 4201 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, intfrom,4202 intto)4092 __attribute_warn_unused_result__ 4093 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, 4094 Idx to) 4203 4095 { 4204 4096 if (mctx->nbkref_ents >= mctx->abkref_ents) … … 4207 4099 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, 4208 4100 mctx->abkref_ents * 2); 4209 if ( BE (new_entry == NULL, 0))4101 if (__glibc_unlikely (new_entry == NULL)) 4210 4102 { 4211 4103 re_free (mctx->bkref_ents); … … 4235 4127 to all zeros if FROM != TO. */ 4236 4128 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map 4237 = (from == to ? ~0: 0);4129 = (from == to ? -1 : 0); 4238 4130 4239 4131 mctx->bkref_ents[mctx->nbkref_ents++].more = 0; … … 4243 4135 } 4244 4136 4245 /* Search for the first entry which hasthe same str_idx, or -1 if none is4137 /* Return the first entry with the same str_idx, or -1 if none is 4246 4138 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ 4247 4139 4248 static int 4249 internal_function 4250 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 4251 { 4252 int left, right, mid, last; 4140 static Idx 4141 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) 4142 { 4143 Idx left, right, mid, last; 4253 4144 last = right = mctx->nbkref_ents; 4254 4145 for (left = 0; left < right;) … … 4270 4161 4271 4162 static reg_errcode_t 4272 internal_function 4273 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) 4274 { 4275 #ifdef DEBUG 4276 assert (mctx->sub_tops != NULL); 4277 assert (mctx->asub_tops > 0); 4278 #endif 4279 if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) 4280 { 4281 int new_asub_tops = mctx->asub_tops * 2; 4163 __attribute_warn_unused_result__ 4164 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) 4165 { 4166 DEBUG_ASSERT (mctx->sub_tops != NULL); 4167 DEBUG_ASSERT (mctx->asub_tops > 0); 4168 if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops)) 4169 { 4170 Idx new_asub_tops = mctx->asub_tops * 2; 4282 4171 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, 4283 4172 re_sub_match_top_t *, 4284 4173 new_asub_tops); 4285 if ( BE (new_array == NULL, 0))4174 if (__glibc_unlikely (new_array == NULL)) 4286 4175 return REG_ESPACE; 4287 4176 mctx->sub_tops = new_array; … … 4289 4178 } 4290 4179 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); 4291 if ( BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))4180 if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL)) 4292 4181 return REG_ESPACE; 4293 4182 mctx->sub_tops[mctx->nsub_tops]->node = node; … … 4297 4186 4298 4187 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4299 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4188 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. 4189 Return the new entry if successful, NULL if memory is exhausted. */ 4300 4190 4301 4191 static re_sub_match_last_t * 4302 internal_function 4303 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) 4192 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) 4304 4193 { 4305 4194 re_sub_match_last_t *new_entry; 4306 if ( BE (subtop->nlasts == subtop->alasts, 0))4307 { 4308 intnew_alasts = 2 * subtop->alasts + 1;4195 if (__glibc_unlikely (subtop->nlasts == subtop->alasts)) 4196 { 4197 Idx new_alasts = 2 * subtop->alasts + 1; 4309 4198 re_sub_match_last_t **new_array = re_realloc (subtop->lasts, 4310 4199 re_sub_match_last_t *, 4311 4200 new_alasts); 4312 if ( BE (new_array == NULL, 0))4201 if (__glibc_unlikely (new_array == NULL)) 4313 4202 return NULL; 4314 4203 subtop->lasts = new_array; … … 4316 4205 } 4317 4206 new_entry = calloc (1, sizeof (re_sub_match_last_t)); 4318 if ( BE (new_entry != NULL, 1))4207 if (__glibc_likely (new_entry != NULL)) 4319 4208 { 4320 4209 subtop->lasts[subtop->nlasts] = new_entry; … … 4327 4216 4328 4217 static void 4329 internal_function4330 4218 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 4331 re_dfastate_t **limited_sts, int last_node, intlast_str_idx)4219 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) 4332 4220 { 4333 4221 sctx->sifted_states = sifted_sts; -
trunk/src/sed/lib/strerror.c
r599 r3613 1 /* strerror -- return a string corresponding to an error number. 2 This is a quickie version only intended as compatability glue 3 for systems which predate the ANSI C definition of the function; 4 the glibc version is recommended for more general use. 1 /* strerror.c --- POSIX compatible system error routine 5 2 6 Copyright (C) 1998Free Software Foundation, Inc.3 Copyright (C) 2007-2022 Free Software Foundation, Inc. 7 4 8 This program is free software; you can redistribute it and/or modify it9 under the terms of the GNU General Public License as published by the10 Free Software Foundation; either version 2, or (at your option) any11 later version.5 This file is free software: you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as 7 published by the Free Software Foundation; either version 2.1 of the 8 License, or (at your option) any later version. 12 9 13 This programis distributed in the hope that it will be useful,10 This file is distributed in the hope that it will be useful, 14 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details.13 GNU Lesser General Public License for more details. 17 14 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, 51 Franklin Street, Fifth Floor, 21 Boston, MA 02110-1301, USA. */ 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 22 17 23 #include "config.h"18 #include <config.h> 24 19 25 #ifndef HAVE_STRERROR 20 /* Specification. */ 21 #include <string.h> 26 22 27 # ifndef BOOTSTRAP 28 # include <stdio.h> 29 # endif 30 # ifdef HAVE_STRING_H 31 # include <string.h> 32 # endif 33 # include <errno.h> 34 # undef strerror 23 #include <errno.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 35 27 36 extern int sys_nerr; 37 extern char *sys_errlist[]; 28 #include "intprops.h" 29 #include "strerror-override.h" 30 31 /* Use the system functions, not the gnulib overrides in this file. */ 32 #undef sprintf 38 33 39 34 char * 40 strerror (e)41 int e; 35 strerror (int n) 36 #undef strerror 42 37 { 43 static char unknown_string[] =44 "Unknown error code #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";38 static char buf[STACKBUF_LEN]; 39 size_t len; 45 40 46 if (0<=e && e<sys_nerr) 47 return sys_errlist[e]; 48 sprintf(unknown_string+20, "%d", e); 49 return unknown_string; 41 /* Cast away const, due to the historical signature of strerror; 42 callers should not be modifying the string. */ 43 const char *msg = strerror_override (n); 44 if (msg) 45 return (char *) msg; 46 47 msg = strerror (n); 48 49 /* Our strerror_r implementation might use the system's strerror 50 buffer, so all other clients of strerror have to see the error 51 copied into a buffer that we manage. This is not thread-safe, 52 even if the system strerror is, but portable programs shouldn't 53 be using strerror if they care about thread-safety. */ 54 if (!msg || !*msg) 55 { 56 static char const fmt[] = "Unknown error %d"; 57 static_assert (sizeof buf >= sizeof (fmt) + INT_STRLEN_BOUND (n)); 58 sprintf (buf, fmt, n); 59 errno = EINVAL; 60 return buf; 61 } 62 63 /* Fix STACKBUF_LEN if this ever aborts. */ 64 len = strlen (msg); 65 if (sizeof buf <= len) 66 abort (); 67 68 memcpy (buf, msg, len + 1); 69 return buf; 50 70 } 51 52 #endif /* !HAVE_STRERROR */ -
trunk/src/sed/lib/strverscmp.c
r599 r3613 1 1 /* Compare strings while treating digits characters numerically. 2 Copyright (C) 1997 , 2000, 2002 Free Software Foundation, Inc.2 Copyright (C) 1997-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 Contributed by Jean-Fran çois Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.4 Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997. 5 5 6 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU L ibrary General Public License as8 published by the Free Software Foundation; either version 2 of the9 License, or (at your option) any later version.7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 10 11 11 The GNU C Library is distributed in the hope that it will be useful, 12 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 L ibraryGeneral Public License for more details.14 Lesser General Public License for more details. 15 15 16 You should have received a copy of the GNU Library General Public 17 License along with the GNU C Library; see the file COPYING.LIB. If not, 18 write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 Boston, MA 02110-1301, USA. */ 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 #if HAVE_CONFIG_H 22 # include <config.h> 20 #ifndef _LIBC 21 # include <libc-config.h> 22 # define __strverscmp strverscmp 23 23 #endif 24 24 25 #include <stdint.h> 25 26 #include <string.h> 26 27 #include <ctype.h> … … 28 29 /* states: S_N: normal, S_I: comparing integral part, S_F: comparing 29 30 fractional parts, S_Z: idem but with leading Zeroes only */ 30 #define S_N 0x031 #define S_I 0x432 #define S_F 0x833 #define S_Z 0xC31 #define S_N 0x0 32 #define S_I 0x3 33 #define S_F 0x6 34 #define S_Z 0x9 34 35 35 36 /* result_type: CMP: return diff; LEN: compare using len_diff/diff */ 36 #define CMP 237 #define LEN 337 #define CMP 2 38 #define LEN 3 38 39 39 40 /* ISDIGIT differs from isdigit, as follows:41 - Its arg may be any int or unsigned int; it need not be an unsigned char.42 - It's guaranteed to evaluate its argument exactly once.43 - It's typically faster.44 POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to45 ISDIGIT_LOCALE unless it's important to use the locale's definition46 of `digit' even when the host does not conform to POSIX. */47 #define ISDIGIT(c) ((unsigned) (c) - '0' <= 9)48 49 #undef __strverscmp50 #undef strverscmp51 52 #ifndef weak_alias53 # define __strverscmp strverscmp54 #endif55 40 56 41 /* Compare S1 and S2 as strings holding indices/version numbers, … … 64 49 const unsigned char *p1 = (const unsigned char *) s1; 65 50 const unsigned char *p2 = (const unsigned char *) s2; 66 unsigned char c1, c2;67 int state;68 int diff;69 51 70 /* Symbol(s) 0 [1-9] others (padding)71 Transition (10) 0 (01) d (00) x (11) -*/72 static const u nsigned int next_state[] =52 /* Symbol(s) 0 [1-9] others 53 Transition (10) 0 (01) d (00) x */ 54 static const uint_least8_t next_state[] = 73 55 { 74 /* state x d 0 -*/75 /* S_N */ S_N, S_I, S_Z, S_N,76 /* S_I */ S_N, S_I, S_I, S_I,77 /* S_F */ S_N, S_F, S_F, S_F,78 /* S_Z */ S_N, S_F, S_Z , S_Z56 /* state x d 0 */ 57 /* S_N */ S_N, S_I, S_Z, 58 /* S_I */ S_N, S_I, S_I, 59 /* S_F */ S_N, S_F, S_F, 60 /* S_Z */ S_N, S_F, S_Z 79 61 }; 80 62 81 static const int result_type[] =63 static const int_least8_t result_type[] = 82 64 { 83 /* state x/x x/d x/0 x/- d/x d/d d/0 d/- 84 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */ 65 /* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */ 85 66 86 /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 87 CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 88 /* S_I */ CMP, -1, -1, CMP, 1, LEN, LEN, CMP, 89 1, LEN, LEN, CMP, CMP, CMP, CMP, CMP, 90 /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 91 CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 92 /* S_Z */ CMP, 1, 1, CMP, -1, CMP, CMP, CMP, 93 -1, CMP, CMP, CMP 67 /* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP, 68 /* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN, 69 /* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 70 /* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP 94 71 }; 95 72 … … 97 74 return 0; 98 75 99 c1 = *p1++;100 c2 = *p2++;76 unsigned char c1 = *p1++; 77 unsigned char c2 = *p2++; 101 78 /* Hint: '0' is a digit too. */ 102 state = S_N | ((c1 == '0') + (ISDIGIT(c1) != 0));79 int state = S_N + ((c1 == '0') + (isdigit (c1) != 0)); 103 80 104 while ((diff = c1 - c2) == 0 && c1 != '\0') 81 int diff; 82 while ((diff = c1 - c2) == 0) 105 83 { 84 if (c1 == '\0') 85 return diff; 86 106 87 state = next_state[state]; 107 88 c1 = *p1++; 108 89 c2 = *p2++; 109 state |= (c1 == '0') + (ISDIGIT(c1) != 0);90 state += (c1 == '0') + (isdigit (c1) != 0); 110 91 } 111 92 112 state = result_type[state << 2 | ((c2 == '0') + (ISDIGIT (c2) != 0))];93 state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))]; 113 94 114 95 switch (state) 115 96 { 116 97 case CMP: 117 98 return diff; 118 99 119 100 case LEN: 120 while ( ISDIGIT(*p1++))121 if (! ISDIGIT(*p2++))101 while (isdigit (*p1++)) 102 if (!isdigit (*p2++)) 122 103 return 1; 123 104 124 return ISDIGIT(*p2) ? -1 : diff;105 return isdigit (*p2) ? -1 : diff; 125 106 126 107 default: 127 108 return state; 128 109 } 129 110 } 130 #ifdef weak_alias 111 libc_hidden_def (__strverscmp) 131 112 weak_alias (__strverscmp, strverscmp) 132 #endif
Note:
See TracChangeset
for help on using the changeset viewer.