Changeset 3613 for trunk/src/sed/lib/regex_internal.c
- Timestamp:
- Sep 19, 2024, 2:34:43 AM (10 months ago)
- Location:
- trunk/src/sed
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/sed
-
Property svn:mergeinfo
set to
/vendor/sed/current merged eligible
-
Property svn:mergeinfo
set to
-
trunk/src/sed/lib/regex_internal.c
r2727 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 20 21 static void re_string_construct_common (const char *str, int len, 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20 static void re_string_construct_common (const char *str, Idx len, 22 21 re_string_t *pstr, 23 RE_TRANSLATE_TYPE trans, inticase,24 const re_dfa_t *dfa) internal_function;22 RE_TRANSLATE_TYPE trans, bool icase, 23 const re_dfa_t *dfa); 25 24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, 26 25 const re_node_set *nodes, 27 unsigned int hash) internal_function;26 re_hashval_t hash); 28 27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, 29 28 const re_node_set *nodes, 30 29 unsigned int context, 31 unsigned int hash) internal_function; 30 re_hashval_t hash); 31 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 32 Idx new_buf_len); 33 static void build_wcs_buffer (re_string_t *pstr); 34 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); 35 static void build_upper_buffer (re_string_t *pstr); 36 static void re_string_translate_buffer (re_string_t *pstr); 37 static unsigned int re_string_context_at (const re_string_t *input, Idx idx, 38 int eflags) __attribute__ ((pure)); 32 39 33 40 … … 38 45 39 46 static reg_errcode_t 40 internal_function 41 re_string_allocate (re_string_t *pstr, const char *str, int len, intinit_len,42 RE_TRANSLATE_TYPE trans, inticase, const re_dfa_t *dfa)47 __attribute_warn_unused_result__ 48 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, 49 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) 43 50 { 44 51 reg_errcode_t ret; 45 intinit_buf_len;52 Idx init_buf_len; 46 53 47 54 /* Ensure at least one character fits into the buffers. */ … … 52 59 53 60 ret = re_string_realloc_buffers (pstr, init_buf_len); 54 if ( BE (ret != REG_NOERROR, 0))61 if (__glibc_unlikely (ret != REG_NOERROR)) 55 62 return ret; 56 63 … … 66 73 67 74 static reg_errcode_t 68 internal_function 69 re_string_construct (re_string_t *pstr, const char *str, intlen,70 RE_TRANSLATE_TYPE trans, inticase, const re_dfa_t *dfa)75 __attribute_warn_unused_result__ 76 re_string_construct (re_string_t *pstr, const char *str, Idx len, 77 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) 71 78 { 72 79 reg_errcode_t ret; … … 77 84 { 78 85 ret = re_string_realloc_buffers (pstr, len + 1); 79 if ( BE (ret != REG_NOERROR, 0))86 if (__glibc_unlikely (ret != REG_NOERROR)) 80 87 return ret; 81 88 } … … 84 91 if (icase) 85 92 { 86 #ifdef RE_ENABLE_I18N87 93 if (dfa->mb_cur_max > 1) 88 94 { … … 90 96 { 91 97 ret = build_wcs_upper_buffer (pstr); 92 if ( BE (ret != REG_NOERROR, 0))98 if (__glibc_unlikely (ret != REG_NOERROR)) 93 99 return ret; 94 100 if (pstr->valid_raw_len >= len) … … 97 103 break; 98 104 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 99 if ( BE (ret != REG_NOERROR, 0))105 if (__glibc_unlikely (ret != REG_NOERROR)) 100 106 return ret; 101 107 } 102 108 } 103 109 else 104 #endif /* RE_ENABLE_I18N */105 110 build_upper_buffer (pstr); 106 111 } 107 112 else 108 113 { 109 #ifdef RE_ENABLE_I18N110 114 if (dfa->mb_cur_max > 1) 111 115 build_wcs_buffer (pstr); 112 116 else 113 #endif /* RE_ENABLE_I18N */114 117 { 115 118 if (trans != NULL) … … 129 132 130 133 static reg_errcode_t 131 internal_function 132 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) 133 { 134 #ifdef RE_ENABLE_I18N 134 __attribute_warn_unused_result__ 135 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) 136 { 135 137 if (pstr->mb_cur_max > 1) 136 138 { 137 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); 138 if (BE (new_wcs == NULL, 0)) 139 wint_t *new_wcs; 140 141 /* Avoid overflow in realloc. */ 142 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); 143 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) 144 < new_buf_len)) 145 return REG_ESPACE; 146 147 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); 148 if (__glibc_unlikely (new_wcs == NULL)) 139 149 return REG_ESPACE; 140 150 pstr->wcs = new_wcs; 141 151 if (pstr->offsets != NULL) 142 152 { 143 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);144 if ( BE (new_offsets == NULL, 0))153 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len); 154 if (__glibc_unlikely (new_offsets == NULL)) 145 155 return REG_ESPACE; 146 156 pstr->offsets = new_offsets; 147 157 } 148 158 } 149 #endif /* RE_ENABLE_I18N */150 159 if (pstr->mbs_allocated) 151 160 { 152 161 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, 153 162 new_buf_len); 154 if ( BE (new_mbs == NULL, 0))163 if (__glibc_unlikely (new_mbs == NULL)) 155 164 return REG_ESPACE; 156 165 pstr->mbs = new_mbs; … … 162 171 163 172 static void 164 internal_function 165 re_string_construct_common (const char *str, int len, re_string_t *pstr, 166 RE_TRANSLATE_TYPE trans, int icase, 173 re_string_construct_common (const char *str, Idx len, re_string_t *pstr, 174 RE_TRANSLATE_TYPE trans, bool icase, 167 175 const re_dfa_t *dfa) 168 176 { … … 171 179 pstr->raw_len = len; 172 180 pstr->trans = trans; 173 pstr->icase = icase ? 1 : 0;181 pstr->icase = icase; 174 182 pstr->mbs_allocated = (trans != NULL || icase); 175 183 pstr->mb_cur_max = dfa->mb_cur_max; … … 180 188 } 181 189 182 #ifdef RE_ENABLE_I18N183 190 184 191 /* Build wide character buffer PSTR->WCS. … … 194 201 195 202 static void 196 internal_function197 203 build_wcs_buffer (re_string_t *pstr) 198 204 { 199 205 #ifdef _LIBC 200 206 unsigned char buf[MB_LEN_MAX]; 201 assert(MB_LEN_MAX >= pstr->mb_cur_max);207 DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max); 202 208 #else 203 209 unsigned char buf[64]; 204 210 #endif 205 211 mbstate_t prev_st; 206 intbyte_idx, end_idx, remain_len;212 Idx byte_idx, end_idx, remain_len; 207 213 size_t mbclen; 208 214 … … 218 224 prev_st = pstr->cur_state; 219 225 /* Apply the translation if we need. */ 220 if ( BE (pstr->trans != NULL, 0))226 if (__glibc_unlikely (pstr->trans != NULL)) 221 227 { 222 228 int i, ch; … … 231 237 else 232 238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; 233 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); 234 if (BE (mbclen == (size_t) -2, 0)) 239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); 240 if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0 241 || (mbclen == (size_t) -2 242 && pstr->bufs_len >= pstr->len))) 243 { 244 /* We treat these cases as a singlebyte character. */ 245 mbclen = 1; 246 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; 247 if (__glibc_unlikely (pstr->trans != NULL)) 248 wc = pstr->trans[wc]; 249 pstr->cur_state = prev_st; 250 } 251 else if (__glibc_unlikely (mbclen == (size_t) -2)) 235 252 { 236 253 /* The buffer doesn't have enough space, finish to build. */ 237 254 pstr->cur_state = prev_st; 238 255 break; 239 }240 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))241 {242 /* We treat these cases as a singlebyte character. */243 mbclen = 1;244 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];245 if (BE (pstr->trans != NULL, 0))246 wc = pstr->trans[wc];247 pstr->cur_state = prev_st;248 256 } 249 257 … … 261 269 but for REG_ICASE. */ 262 270 263 static int264 internal_function 271 static reg_errcode_t 272 __attribute_warn_unused_result__ 265 273 build_wcs_upper_buffer (re_string_t *pstr) 266 274 { 267 275 mbstate_t prev_st; 268 intsrc_idx, byte_idx, end_idx, remain_len;276 Idx src_idx, byte_idx, end_idx, remain_len; 269 277 size_t mbclen; 270 278 #ifdef _LIBC 271 279 char buf[MB_LEN_MAX]; 272 assert (MB_LEN_MAX >= pstr->mb_cur_max);280 DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX); 273 281 #else 274 282 char buf[64]; … … 285 293 { 286 294 wchar_t wc; 287 288 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) 289 295 unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; 296 297 if (isascii (ch) && mbsinit (&pstr->cur_state)) 290 298 { 291 /* In case of a singlebyte character. */292 pstr->mbs[byte_idx]293 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);294 299 /* The next step uses the assumption that wchar_t is encoded 295 300 ASCII-safe: all ASCII values can be converted like this. */ 296 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; 297 ++byte_idx; 298 continue; 301 wchar_t wcu = __towupper (ch); 302 if (isascii (wcu)) 303 { 304 pstr->mbs[byte_idx] = wcu; 305 pstr->wcs[byte_idx] = wcu; 306 byte_idx++; 307 continue; 308 } 299 309 } 300 310 301 311 remain_len = end_idx - byte_idx; 302 312 prev_st = pstr->cur_state; 303 mbclen = mbrtowc (&wc,304 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx305 + byte_idx), remain_len, &pstr->cur_state);306 if ( BE (mbclen + 2 > 2, 1))313 mbclen = __mbrtowc (&wc, 314 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx 315 + byte_idx), remain_len, &pstr->cur_state); 316 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) 307 317 { 308 wchar_t wcu = wc;309 if ( iswlower (wc))318 wchar_t wcu = __towupper (wc); 319 if (wcu != wc) 310 320 { 311 321 size_t mbcdlen; 312 322 313 wcu = towupper (wc); 314 mbcdlen = wcrtomb (buf, wcu, &prev_st); 315 if (BE (mbclen == mbcdlen, 1)) 323 mbcdlen = __wcrtomb (buf, wcu, &prev_st); 324 if (__glibc_likely (mbclen == mbcdlen)) 316 325 memcpy (pstr->mbs + byte_idx, buf, mbclen); 317 326 else … … 329 338 pstr->wcs[byte_idx++] = WEOF; 330 339 } 331 else if (mbclen == (size_t) -1 || mbclen == 0) 340 else if (mbclen == (size_t) -1 || mbclen == 0 341 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) 332 342 { 333 /* It is an invalid character or '\0'. Just use the byte. */334 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];343 /* It is an invalid character, an incomplete character 344 at the end of the string, or '\0'. Just use the byte. */ 335 345 pstr->mbs[byte_idx] = ch; 336 346 /* And also cast it to wide char. */ 337 347 pstr->wcs[byte_idx++] = (wchar_t) ch; 338 if ( BE (mbclen == (size_t) -1, 0))348 if (__glibc_unlikely (mbclen == (size_t) -1)) 339 349 pstr->cur_state = prev_st; 340 350 } … … 358 368 remain_len = end_idx - byte_idx; 359 369 prev_st = pstr->cur_state; 360 if ( BE (pstr->trans != NULL, 0))370 if (__glibc_unlikely (pstr->trans != NULL)) 361 371 { 362 372 int i, ch; … … 371 381 else 372 382 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; 373 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);374 if ( BE (mbclen + 2 > 2, 1))383 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); 384 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2)) 375 385 { 376 wchar_t wcu = wc;377 if ( iswlower (wc))386 wchar_t wcu = __towupper (wc); 387 if (wcu != wc) 378 388 { 379 389 size_t mbcdlen; 380 390 381 wcu = towupper (wc); 382 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); 383 if (BE (mbclen == mbcdlen, 1)) 391 mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st); 392 if (__glibc_likely (mbclen == mbcdlen)) 384 393 memcpy (pstr->mbs + byte_idx, buf, mbclen); 385 394 else if (mbcdlen != (size_t) -1) … … 395 404 if (pstr->offsets == NULL) 396 405 { 397 pstr->offsets = re_malloc ( int, pstr->bufs_len);406 pstr->offsets = re_malloc (Idx, pstr->bufs_len); 398 407 399 408 if (pstr->offsets == NULL) … … 425 434 continue; 426 435 } 427 428 436 else 437 memcpy (pstr->mbs + byte_idx, p, mbclen); 429 438 } 430 439 else 431 440 memcpy (pstr->mbs + byte_idx, p, mbclen); 432 441 433 if ( BE (pstr->offsets_needed != 0,0))442 if (__glibc_unlikely (pstr->offsets_needed != 0)) 434 443 { 435 444 size_t i; … … 444 453 pstr->wcs[byte_idx++] = WEOF; 445 454 } 446 else if (mbclen == (size_t) -1 || mbclen == 0) 455 else if (mbclen == (size_t) -1 || mbclen == 0 456 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) 447 457 { 448 458 /* It is an invalid character or '\0'. Just use the byte. */ 449 459 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; 450 460 451 if ( BE (pstr->trans != NULL, 0))461 if (__glibc_unlikely (pstr->trans != NULL)) 452 462 ch = pstr->trans [ch]; 453 463 pstr->mbs[byte_idx] = ch; 454 464 455 if ( BE (pstr->offsets_needed != 0,0))465 if (__glibc_unlikely (pstr->offsets_needed != 0)) 456 466 pstr->offsets[byte_idx] = src_idx; 457 467 ++src_idx; … … 459 469 /* And also cast it to wide char. */ 460 470 pstr->wcs[byte_idx++] = (wchar_t) ch; 461 if ( BE (mbclen == (size_t) -1, 0))471 if (__glibc_unlikely (mbclen == (size_t) -1)) 462 472 pstr->cur_state = prev_st; 463 473 } … … 477 487 Return the index. */ 478 488 479 static int 480 internal_function 481 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) 489 static Idx 490 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) 482 491 { 483 492 mbstate_t prev_st; 484 intrawbuf_idx;493 Idx rawbuf_idx; 485 494 size_t mbclen; 486 w char_t wc = 0;495 wint_t wc = WEOF; 487 496 488 497 /* Skip the characters which are not necessary to check. */ … … 490 499 rawbuf_idx < new_raw_idx;) 491 500 { 492 int remain_len;493 remain_len = pstr->len - rawbuf_idx;501 wchar_t wc2; 502 Idx remain_len = pstr->raw_len - rawbuf_idx; 494 503 prev_st = pstr->cur_state; 495 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, 496 remain_len, &pstr->cur_state); 497 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) 498 { 499 /* We treat these cases as a singlebyte character. */ 504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, 505 remain_len, &pstr->cur_state); 506 if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1 507 || mbclen == 0)) 508 { 509 /* We treat these cases as a single byte character. */ 510 if (mbclen == 0 || remain_len == 0) 511 wc = L'\0'; 512 else 513 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); 500 514 mbclen = 1; 501 515 pstr->cur_state = prev_st; 502 516 } 517 else 518 wc = wc2; 503 519 /* Then proceed the next character. */ 504 520 rawbuf_idx += mbclen; 505 521 } 506 *last_wc = (wint_t)wc;522 *last_wc = wc; 507 523 return rawbuf_idx; 508 524 } 509 #endif /* RE_ENABLE_I18N */510 525 511 526 /* Build the buffer PSTR->MBS, and apply the translation if we need. … … 513 528 514 529 static void 515 internal_function516 530 build_upper_buffer (re_string_t *pstr) 517 531 { 518 intchar_idx, end_idx;532 Idx char_idx, end_idx; 519 533 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; 520 534 … … 522 536 { 523 537 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; 524 if ( BE (pstr->trans != NULL, 0))538 if (__glibc_unlikely (pstr->trans != NULL)) 525 539 ch = pstr->trans[ch]; 526 if (islower (ch)) 527 pstr->mbs[char_idx] = toupper (ch); 528 else 529 pstr->mbs[char_idx] = ch; 540 pstr->mbs[char_idx] = toupper (ch); 530 541 } 531 542 pstr->valid_len = char_idx; … … 536 547 537 548 static void 538 internal_function539 549 re_string_translate_buffer (re_string_t *pstr) 540 550 { 541 intbuf_idx, end_idx;551 Idx buf_idx, end_idx; 542 552 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; 543 553 … … 557 567 558 568 static reg_errcode_t 559 internal_function 560 re_string_reconstruct (re_string_t *pstr, int idx, int eflags) 561 { 562 int offset = idx - pstr->raw_mbs_idx; 563 if (BE (offset < 0, 0)) 569 __attribute_warn_unused_result__ 570 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) 571 { 572 Idx offset; 573 574 if (__glibc_unlikely (pstr->raw_mbs_idx <= idx)) 575 offset = idx - pstr->raw_mbs_idx; 576 else 564 577 { 565 578 /* Reset buffer. */ 566 #ifdef RE_ENABLE_I18N567 579 if (pstr->mb_cur_max > 1) 568 580 memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); 569 #endif /* RE_ENABLE_I18N */570 581 pstr->len = pstr->raw_len; 571 582 pstr->stop = pstr->raw_stop; … … 581 592 } 582 593 583 if (BE (offset != 0, 1)) 584 { 585 /* Are the characters which are already checked remain? */ 586 if (BE (offset < pstr->valid_raw_len, 1) 587 #ifdef RE_ENABLE_I18N 588 /* Handling this would enlarge the code too much. 589 Accept a slowdown in that case. */ 590 && pstr->offsets_needed == 0 591 #endif 592 ) 594 if (__glibc_likely (offset != 0)) 595 { 596 /* Should the already checked characters be kept? */ 597 if (__glibc_likely (offset < pstr->valid_raw_len)) 593 598 { 594 599 /* Yes, move them to the front of the buffer. */ 595 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags); 596 #ifdef RE_ENABLE_I18N 597 if (pstr->mb_cur_max > 1) 598 memmove (pstr->wcs, pstr->wcs + offset, 599 (pstr->valid_len - offset) * sizeof (wint_t)); 600 #endif /* RE_ENABLE_I18N */ 601 if (BE (pstr->mbs_allocated, 0)) 602 memmove (pstr->mbs, pstr->mbs + offset, 603 pstr->valid_len - offset); 604 pstr->valid_len -= offset; 605 pstr->valid_raw_len -= offset; 606 #if DEBUG 607 assert (pstr->valid_len > 0); 608 #endif 600 if (__glibc_unlikely (pstr->offsets_needed)) 601 { 602 Idx low = 0, high = pstr->valid_len, mid; 603 do 604 { 605 mid = (high + low) / 2; 606 if (pstr->offsets[mid] > offset) 607 high = mid; 608 else if (pstr->offsets[mid] < offset) 609 low = mid + 1; 610 else 611 break; 612 } 613 while (low < high); 614 if (pstr->offsets[mid] < offset) 615 ++mid; 616 pstr->tip_context = re_string_context_at (pstr, mid - 1, 617 eflags); 618 /* This can be quite complicated, so handle specially 619 only the common and easy case where the character with 620 different length representation of lower and upper 621 case is present at or after offset. */ 622 if (pstr->valid_len > offset 623 && mid == offset && pstr->offsets[mid] == offset) 624 { 625 memmove (pstr->wcs, pstr->wcs + offset, 626 (pstr->valid_len - offset) * sizeof (wint_t)); 627 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); 628 pstr->valid_len -= offset; 629 pstr->valid_raw_len -= offset; 630 for (low = 0; low < pstr->valid_len; low++) 631 pstr->offsets[low] = pstr->offsets[low + offset] - offset; 632 } 633 else 634 { 635 /* Otherwise, just find out how long the partial multibyte 636 character at offset is and fill it with WEOF/255. */ 637 pstr->len = pstr->raw_len - idx + offset; 638 pstr->stop = pstr->raw_stop - idx + offset; 639 pstr->offsets_needed = 0; 640 while (mid > 0 && pstr->offsets[mid - 1] == offset) 641 --mid; 642 while (mid < pstr->valid_len) 643 if (pstr->wcs[mid] != WEOF) 644 break; 645 else 646 ++mid; 647 if (mid == pstr->valid_len) 648 pstr->valid_len = 0; 649 else 650 { 651 pstr->valid_len = pstr->offsets[mid] - offset; 652 if (pstr->valid_len) 653 { 654 for (low = 0; low < pstr->valid_len; ++low) 655 pstr->wcs[low] = WEOF; 656 memset (pstr->mbs, 255, pstr->valid_len); 657 } 658 } 659 pstr->valid_raw_len = pstr->valid_len; 660 } 661 } 662 else 663 { 664 pstr->tip_context = re_string_context_at (pstr, offset - 1, 665 eflags); 666 if (pstr->mb_cur_max > 1) 667 memmove (pstr->wcs, pstr->wcs + offset, 668 (pstr->valid_len - offset) * sizeof (wint_t)); 669 if (__glibc_unlikely (pstr->mbs_allocated)) 670 memmove (pstr->mbs, pstr->mbs + offset, 671 pstr->valid_len - offset); 672 pstr->valid_len -= offset; 673 pstr->valid_raw_len -= offset; 674 DEBUG_ASSERT (pstr->valid_len > 0); 675 } 609 676 } 610 677 else 611 678 { 612 679 /* No, skip all characters until IDX. */ 613 #ifdef RE_ENABLE_I18N 614 if (BE (pstr->offsets_needed, 0)) 680 Idx prev_valid_len = pstr->valid_len; 681 682 if (__glibc_unlikely (pstr->offsets_needed)) 615 683 { 616 684 pstr->len = pstr->raw_len - idx + offset; … … 618 686 pstr->offsets_needed = 0; 619 687 } 620 #endif621 688 pstr->valid_len = 0; 622 pstr->valid_raw_len = 0;623 #ifdef RE_ENABLE_I18N624 689 if (pstr->mb_cur_max > 1) 625 690 { 626 intwcs_idx;691 Idx wcs_idx; 627 692 wint_t wc = WEOF; 628 693 629 694 if (pstr->is_utf8) 630 695 { 631 const unsigned char *raw, *p, * q, *end;696 const unsigned char *raw, *p, *end; 632 697 633 698 /* Special case UTF-8. Multi-byte chars start with any … … 635 700 raw = pstr->raw_mbs + pstr->raw_mbs_idx; 636 701 end = raw + (offset - pstr->mb_cur_max); 702 if (end < pstr->raw_mbs) 703 end = pstr->raw_mbs; 637 704 p = raw + offset - 1; 638 705 #ifdef _LIBC 639 706 /* We know the wchar_t encoding is UCS4, so for the simple 640 707 case, ASCII characters, skip the conversion step. */ 641 if (isascii (*p) && BE (pstr->trans == NULL, 1))708 if (isascii (*p) && __glibc_likely (pstr->trans == NULL)) 642 709 { 643 710 memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); 644 pstr->valid_len = 0;711 /* pstr->valid_len = 0; */ 645 712 wc = (wchar_t) *p; 646 713 } … … 652 719 mbstate_t cur_state; 653 720 wchar_t wc2; 654 intmlen = raw + pstr->len - p;721 Idx mlen = raw + pstr->len - p; 655 722 unsigned char buf[6]; 656 723 size_t mbclen; 657 724 658 q= p;659 if ( BE (pstr->trans != NULL, 0))725 const unsigned char *pp = p; 726 if (__glibc_unlikely (pstr->trans != NULL)) 660 727 { 661 728 int i = mlen < 6 ? mlen : 6; 662 729 while (--i >= 0) 663 730 buf[i] = pstr->trans[p[i]]; 664 q= buf;731 pp = buf; 665 732 } 666 733 /* XXX Don't use mbrtowc, we know which conversion 667 734 to use (UTF-8 -> UCS4). */ 668 735 memset (&cur_state, 0, sizeof (cur_state)); 669 mbclen = mbrtowc (&wc2, (const char *)p, mlen,670 &cur_state);736 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen, 737 &cur_state); 671 738 if (raw + offset - p <= mbclen 672 739 && mbclen < (size_t) -2) … … 683 750 if (wc == WEOF) 684 751 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; 685 if (BE (pstr->valid_len, 0)) 752 if (wc == WEOF) 753 pstr->tip_context 754 = re_string_context_at (pstr, prev_valid_len - 1, eflags); 755 else 756 pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0) 757 && IS_WIDE_WORD_CHAR (wc)) 758 ? CONTEXT_WORD 759 : ((IS_WIDE_NEWLINE (wc) 760 && pstr->newline_anchor) 761 ? CONTEXT_NEWLINE : 0)); 762 if (__glibc_unlikely (pstr->valid_len)) 686 763 { 687 764 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) … … 691 768 } 692 769 pstr->valid_raw_len = pstr->valid_len; 693 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)694 && IS_WIDE_WORD_CHAR (wc))695 ? CONTEXT_WORD696 : ((IS_WIDE_NEWLINE (wc)697 && pstr->newline_anchor)698 ? CONTEXT_NEWLINE : 0));699 770 } 700 771 else 701 #endif /* RE_ENABLE_I18N */702 772 { 703 773 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; 774 pstr->valid_raw_len = 0; 704 775 if (pstr->trans) 705 776 c = pstr->trans[c]; … … 710 781 } 711 782 } 712 if (! BE (pstr->mbs_allocated, 0))783 if (!__glibc_unlikely (pstr->mbs_allocated)) 713 784 pstr->mbs += offset; 714 785 } … … 718 789 719 790 /* Then build the buffers. */ 720 #ifdef RE_ENABLE_I18N721 791 if (pstr->mb_cur_max > 1) 722 792 { 723 793 if (pstr->icase) 724 794 { 725 int ret = build_wcs_upper_buffer (pstr);726 if ( BE (ret != REG_NOERROR, 0))795 reg_errcode_t ret = build_wcs_upper_buffer (pstr); 796 if (__glibc_unlikely (ret != REG_NOERROR)) 727 797 return ret; 728 798 } … … 731 801 } 732 802 else 733 #endif /* RE_ENABLE_I18N */ 734 if (BE (pstr->mbs_allocated, 0)) 803 if (__glibc_unlikely (pstr->mbs_allocated)) 735 804 { 736 805 if (pstr->icase) … … 747 816 748 817 static unsigned char 749 internal_function __attribute ((pure)) 750 re_string_peek_byte_case (const re_string_t *pstr, int idx) 751 { 752 int ch, off; 818 __attribute__ ((pure)) 819 re_string_peek_byte_case (const re_string_t *pstr, Idx idx) 820 { 821 int ch; 822 Idx off; 753 823 754 824 /* Handle the common (easiest) cases first. */ 755 if ( BE (!pstr->mbs_allocated, 1))825 if (__glibc_likely (!pstr->mbs_allocated)) 756 826 return re_string_peek_byte (pstr, idx); 757 827 758 #ifdef RE_ENABLE_I18N759 828 if (pstr->mb_cur_max > 1 760 829 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) 761 830 return re_string_peek_byte (pstr, idx); 762 #endif763 831 764 832 off = pstr->cur_idx + idx; 765 #ifdef RE_ENABLE_I18N766 833 if (pstr->offsets_needed) 767 834 off = pstr->offsets[off]; 768 #endif769 835 770 836 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; 771 837 772 #ifdef RE_ENABLE_I18N773 838 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I 774 839 this function returns CAPITAL LETTER I instead of first byte of … … 777 842 if (pstr->offsets_needed && !isascii (ch)) 778 843 return re_string_peek_byte (pstr, idx); 779 #endif780 844 781 845 return ch; … … 783 847 784 848 static unsigned char 785 internal_function786 849 re_string_fetch_byte_case (re_string_t *pstr) 787 850 { 788 if ( BE (!pstr->mbs_allocated, 1))851 if (__glibc_likely (!pstr->mbs_allocated)) 789 852 return re_string_fetch_byte (pstr); 790 853 791 #ifdef RE_ENABLE_I18N792 854 if (pstr->offsets_needed) 793 855 { 794 int off, ch; 856 Idx off; 857 int ch; 795 858 796 859 /* For tr_TR.UTF-8 [[:islower:]] there is … … 814 877 return ch; 815 878 } 816 #endif817 879 818 880 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; … … 820 882 821 883 static void 822 internal_function823 884 re_string_destruct (re_string_t *pstr) 824 885 { 825 #ifdef RE_ENABLE_I18N826 886 re_free (pstr->wcs); 827 887 re_free (pstr->offsets); 828 #endif /* RE_ENABLE_I18N */829 888 if (pstr->mbs_allocated) 830 889 re_free (pstr->mbs); … … 834 893 835 894 static unsigned int 836 internal_function 837 re_string_context_at (const re_string_t *input, int idx, int eflags) 895 re_string_context_at (const re_string_t *input, Idx idx, int eflags) 838 896 { 839 897 int c; 840 if ( BE (idx < 0,0))898 if (__glibc_unlikely (idx < 0)) 841 899 /* In this case, we use the value stored in input->tip_context, 842 900 since we can't know the character in input->mbs[-1] here. */ 843 901 return input->tip_context; 844 if ( BE (idx == input->len, 0))902 if (__glibc_unlikely (idx == input->len)) 845 903 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF 846 904 : CONTEXT_NEWLINE | CONTEXT_ENDBUF); 847 #ifdef RE_ENABLE_I18N848 905 if (input->mb_cur_max > 1) 849 906 { 850 907 wint_t wc; 851 intwc_idx = idx;908 Idx wc_idx = idx; 852 909 while(input->wcs[wc_idx] == WEOF) 853 910 { 854 #ifdef DEBUG 855 /* It must not happen. */ 856 assert (wc_idx >= 0); 857 #endif 911 DEBUG_ASSERT (wc_idx >= 0); 858 912 --wc_idx; 859 913 if (wc_idx < 0) … … 861 915 } 862 916 wc = input->wcs[wc_idx]; 863 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) 917 if (__glibc_unlikely (input->word_ops_used != 0) 918 && IS_WIDE_WORD_CHAR (wc)) 864 919 return CONTEXT_WORD; 865 920 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor … … 867 922 } 868 923 else 869 #endif870 924 { 871 925 c = re_string_byte_at (input, idx); … … 880 934 881 935 static reg_errcode_t 882 internal_function 883 re_node_set_alloc (re_node_set *set, intsize)936 __attribute_warn_unused_result__ 937 re_node_set_alloc (re_node_set *set, Idx size) 884 938 { 885 939 set->alloc = size; 886 940 set->nelem = 0; 887 set->elems = re_malloc (int, size); 888 if (BE (set->elems == NULL, 0)) 941 set->elems = re_malloc (Idx, size); 942 if (__glibc_unlikely (set->elems == NULL) 943 && (MALLOC_0_IS_NONNULL || size != 0)) 889 944 return REG_ESPACE; 890 945 return REG_NOERROR; … … 892 947 893 948 static reg_errcode_t 894 internal_function 895 re_node_set_init_1 (re_node_set *set, intelem)949 __attribute_warn_unused_result__ 950 re_node_set_init_1 (re_node_set *set, Idx elem) 896 951 { 897 952 set->alloc = 1; 898 953 set->nelem = 1; 899 set->elems = re_malloc ( int, 1);900 if ( BE (set->elems == NULL, 0))954 set->elems = re_malloc (Idx, 1); 955 if (__glibc_unlikely (set->elems == NULL)) 901 956 { 902 957 set->alloc = set->nelem = 0; … … 908 963 909 964 static reg_errcode_t 910 internal_function 911 re_node_set_init_2 (re_node_set *set, int elem1, intelem2)965 __attribute_warn_unused_result__ 966 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2) 912 967 { 913 968 set->alloc = 2; 914 set->elems = re_malloc ( int, 2);915 if ( BE (set->elems == NULL, 0))969 set->elems = re_malloc (Idx, 2); 970 if (__glibc_unlikely (set->elems == NULL)) 916 971 return REG_ESPACE; 917 972 if (elem1 == elem2) … … 938 993 939 994 static reg_errcode_t 940 internal_function 995 __attribute_warn_unused_result__ 941 996 re_node_set_init_copy (re_node_set *dest, const re_node_set *src) 942 997 { … … 945 1000 { 946 1001 dest->alloc = dest->nelem; 947 dest->elems = re_malloc ( int, dest->alloc);948 if ( BE (dest->elems == NULL, 0))1002 dest->elems = re_malloc (Idx, dest->alloc); 1003 if (__glibc_unlikely (dest->elems == NULL)) 949 1004 { 950 1005 dest->alloc = dest->nelem = 0; 951 1006 return REG_ESPACE; 952 1007 } 953 memcpy (dest->elems, src->elems, src->nelem * sizeof ( int));1008 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); 954 1009 } 955 1010 else … … 963 1018 964 1019 static reg_errcode_t 965 internal_function 1020 __attribute_warn_unused_result__ 966 1021 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, 967 1022 const re_node_set *src2) 968 1023 { 969 inti1, i2, is, id, delta, sbase;1024 Idx i1, i2, is, id, delta, sbase; 970 1025 if (src1->nelem == 0 || src2->nelem == 0) 971 1026 return REG_NOERROR; … … 975 1030 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) 976 1031 { 977 intnew_alloc = src1->nelem + src2->nelem + dest->alloc;978 int *new_elems = re_realloc (dest->elems, int, new_alloc);979 if ( BE (new_elems == NULL, 0))980 1032 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc; 1033 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc); 1034 if (__glibc_unlikely (new_elems == NULL)) 1035 return REG_ESPACE; 981 1036 dest->elems = new_elems; 982 1037 dest->alloc = new_alloc; … … 997 1052 --id; 998 1053 999 1054 if (id < 0 || dest->elems[id] != src1->elems[i1]) 1000 1055 dest->elems[--sbase] = src1->elems[i1]; 1001 1056 … … 1028 1083 for (;;) 1029 1084 { 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1085 if (dest->elems[is] > dest->elems[id]) 1086 { 1087 /* Copy from the top. */ 1088 dest->elems[id + delta--] = dest->elems[is--]; 1089 if (delta == 0) 1090 break; 1091 } 1092 else 1093 { 1094 /* Slide from the bottom. */ 1095 dest->elems[id + delta] = dest->elems[id]; 1096 if (--id < 0) 1097 break; 1098 } 1044 1099 } 1045 1100 1046 1101 /* Copy remaining SRC elements. */ 1047 memcpy (dest->elems, dest->elems + sbase, delta * sizeof ( int));1102 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx)); 1048 1103 1049 1104 return REG_NOERROR; … … 1054 1109 1055 1110 static reg_errcode_t 1056 internal_function 1111 __attribute_warn_unused_result__ 1057 1112 re_node_set_init_union (re_node_set *dest, const re_node_set *src1, 1058 1113 const re_node_set *src2) 1059 1114 { 1060 inti1, i2, id;1115 Idx i1, i2, id; 1061 1116 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) 1062 1117 { 1063 1118 dest->alloc = src1->nelem + src2->nelem; 1064 dest->elems = re_malloc ( int, dest->alloc);1065 if ( BE (dest->elems == NULL, 0))1119 dest->elems = re_malloc (Idx, dest->alloc); 1120 if (__glibc_unlikely (dest->elems == NULL)) 1066 1121 return REG_ESPACE; 1067 1122 } … … 1090 1145 { 1091 1146 memcpy (dest->elems + id, src1->elems + i1, 1092 (src1->nelem - i1) * sizeof ( int));1147 (src1->nelem - i1) * sizeof (Idx)); 1093 1148 id += src1->nelem - i1; 1094 1149 } … … 1096 1151 { 1097 1152 memcpy (dest->elems + id, src2->elems + i2, 1098 (src2->nelem - i2) * sizeof ( int));1153 (src2->nelem - i2) * sizeof (Idx)); 1099 1154 id += src2->nelem - i2; 1100 1155 } … … 1107 1162 1108 1163 static reg_errcode_t 1109 internal_function 1164 __attribute_warn_unused_result__ 1110 1165 re_node_set_merge (re_node_set *dest, const re_node_set *src) 1111 1166 { 1112 intis, id, sbase, delta;1167 Idx is, id, sbase, delta; 1113 1168 if (src == NULL || src->nelem == 0) 1114 1169 return REG_NOERROR; 1115 1170 if (dest->alloc < 2 * src->nelem + dest->nelem) 1116 1171 { 1117 intnew_alloc = 2 * (src->nelem + dest->alloc);1118 int *new_buffer = re_realloc (dest->elems, int, new_alloc);1119 if ( BE (new_buffer == NULL, 0))1172 Idx new_alloc = 2 * (src->nelem + dest->alloc); 1173 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc); 1174 if (__glibc_unlikely (new_buffer == NULL)) 1120 1175 return REG_ESPACE; 1121 1176 dest->elems = new_buffer; … … 1123 1178 } 1124 1179 1125 if (BE (dest->nelem == 0, 0)) 1126 { 1180 if (__glibc_unlikely (dest->nelem == 0)) 1181 { 1182 /* Although we already guaranteed above that dest->alloc != 0 and 1183 therefore dest->elems != NULL, add a debug assertion to pacify 1184 GCC 11.2.1's -fanalyzer. */ 1185 DEBUG_ASSERT (dest->elems); 1127 1186 dest->nelem = src->nelem; 1128 memcpy (dest->elems, src->elems, src->nelem * sizeof ( int));1187 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); 1129 1188 return REG_NOERROR; 1130 1189 } … … 1136 1195 { 1137 1196 if (dest->elems[id] == src->elems[is]) 1138 1197 is--, id--; 1139 1198 else if (dest->elems[id] < src->elems[is]) 1140 1199 dest->elems[--sbase] = src->elems[is--]; 1141 1200 else /* if (dest->elems[id] > src->elems[is]) */ 1142 1201 --id; 1143 1202 } 1144 1203 … … 1147 1206 /* If DEST is exhausted, the remaining items of SRC must be unique. */ 1148 1207 sbase -= is + 1; 1149 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof ( int));1208 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx)); 1150 1209 } 1151 1210 … … 1162 1221 { 1163 1222 if (dest->elems[is] > dest->elems[id]) 1164 1223 { 1165 1224 /* Copy from the top. */ 1166 1225 dest->elems[id + delta--] = dest->elems[is--]; 1167 1226 if (delta == 0) 1168 1227 break; 1169 1228 } 1170 1229 else 1171 1172 1173 1230 { 1231 /* Slide from the bottom. */ 1232 dest->elems[id + delta] = dest->elems[id]; 1174 1233 if (--id < 0) 1175 1234 { 1176 1235 /* Copy remaining SRC elements. */ 1177 1236 memcpy (dest->elems, dest->elems + sbase, 1178 delta * sizeof (int));1237 delta * sizeof (Idx)); 1179 1238 break; 1180 1239 } … … 1187 1246 /* Insert the new element ELEM to the re_node_set* SET. 1188 1247 SET should not already have ELEM. 1189 return -1 if an error is occured, return 1 otherwise. */1190 1191 static int1192 internal_function 1193 re_node_set_insert (re_node_set *set, intelem)1194 { 1195 intidx;1248 Return true if successful. */ 1249 1250 static bool 1251 __attribute_warn_unused_result__ 1252 re_node_set_insert (re_node_set *set, Idx elem) 1253 { 1254 Idx idx; 1196 1255 /* In case the set is empty. */ 1197 1256 if (set->alloc == 0) 1198 { 1199 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) 1200 return 1; 1201 else 1202 return -1; 1203 } 1204 1205 if (BE (set->nelem, 0) == 0) 1206 { 1207 /* We already guaranteed above that set->alloc != 0. */ 1257 return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR); 1258 1259 if (__glibc_unlikely (set->nelem) == 0) 1260 { 1261 /* Although we already guaranteed above that set->alloc != 0 and 1262 therefore set->elems != NULL, add a debug assertion to pacify 1263 GCC 11.2 -fanalyzer. */ 1264 DEBUG_ASSERT (set->elems); 1208 1265 set->elems[0] = elem; 1209 1266 ++set->nelem; 1210 return 1;1267 return true; 1211 1268 } 1212 1269 … … 1214 1271 if (set->alloc == set->nelem) 1215 1272 { 1216 int*new_elems;1273 Idx *new_elems; 1217 1274 set->alloc = set->alloc * 2; 1218 new_elems = re_realloc (set->elems, int, set->alloc);1219 if ( BE (new_elems == NULL, 0))1220 return -1;1275 new_elems = re_realloc (set->elems, Idx, set->alloc); 1276 if (__glibc_unlikely (new_elems == NULL)) 1277 return false; 1221 1278 set->elems = new_elems; 1222 1279 } … … 1226 1283 if (elem < set->elems[0]) 1227 1284 { 1228 idx = 0;1229 1285 for (idx = set->nelem; idx > 0; idx--) 1230 1286 set->elems[idx] = set->elems[idx - 1]; 1231 1287 } 1232 1288 else 1233 1289 { 1234 1290 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) 1235 set->elems[idx] = set->elems[idx - 1]; 1291 set->elems[idx] = set->elems[idx - 1]; 1292 DEBUG_ASSERT (set->elems[idx - 1] < elem); 1236 1293 } 1237 1294 … … 1239 1296 set->elems[idx] = elem; 1240 1297 ++set->nelem; 1241 return 1;1298 return true; 1242 1299 } 1243 1300 1244 1301 /* Insert the new element ELEM to the re_node_set* SET. 1245 1302 SET should not already have any element greater than or equal to ELEM. 1246 Return -1 if an error is occured, return 1 otherwise. */1247 1248 static int1249 internal_function 1250 re_node_set_insert_last (re_node_set *set, intelem)1303 Return true if successful. */ 1304 1305 static bool 1306 __attribute_warn_unused_result__ 1307 re_node_set_insert_last (re_node_set *set, Idx elem) 1251 1308 { 1252 1309 /* Realloc if we need. */ 1253 1310 if (set->alloc == set->nelem) 1254 1311 { 1255 int*new_elems;1312 Idx *new_elems; 1256 1313 set->alloc = (set->alloc + 1) * 2; 1257 new_elems = re_realloc (set->elems, int, set->alloc);1258 if ( BE (new_elems == NULL, 0))1259 return -1;1314 new_elems = re_realloc (set->elems, Idx, set->alloc); 1315 if (__glibc_unlikely (new_elems == NULL)) 1316 return false; 1260 1317 set->elems = new_elems; 1261 1318 } … … 1263 1320 /* Insert the new element. */ 1264 1321 set->elems[set->nelem++] = elem; 1265 return 1;1322 return true; 1266 1323 } 1267 1324 1268 1325 /* Compare two node sets SET1 and SET2. 1269 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */1270 1271 static int1272 internal_function __attribute((pure))1326 Return true if SET1 and SET2 are equivalent. */ 1327 1328 static bool 1329 __attribute__ ((pure)) 1273 1330 re_node_set_compare (const re_node_set *set1, const re_node_set *set2) 1274 1331 { 1275 inti;1332 Idx i; 1276 1333 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) 1277 return 0;1334 return false; 1278 1335 for (i = set1->nelem ; --i >= 0 ; ) 1279 1336 if (set1->elems[i] != set2->elems[i]) 1280 return 0;1281 return 1;1337 return false; 1338 return true; 1282 1339 } 1283 1340 1284 1341 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ 1285 1342 1286 static int1287 internal_function __attribute((pure))1288 re_node_set_contains (const re_node_set *set, intelem)1289 { 1290 unsigned int idx, right, mid;1343 static Idx 1344 __attribute__ ((pure)) 1345 re_node_set_contains (const re_node_set *set, Idx elem) 1346 { 1347 __re_size_t idx, right, mid; 1291 1348 if (set->nelem <= 0) 1292 1349 return 0; … … 1307 1364 1308 1365 static void 1309 internal_function 1310 re_node_set_remove_at (re_node_set *set, int idx) 1366 re_node_set_remove_at (re_node_set *set, Idx idx) 1311 1367 { 1312 1368 if (idx < 0 || idx >= set->nelem) … … 1320 1376 1321 1377 /* Add the token TOKEN to dfa->nodes, and return the index of the token. 1322 Or return -1, if an error will be occured. */ 1323 1324 static int 1325 internal_function 1378 Or return -1 if an error occurred. */ 1379 1380 static Idx 1326 1381 re_dfa_add_node (re_dfa_t *dfa, re_token_t token) 1327 1382 { 1328 int type = token.type; 1329 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) 1383 if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc)) 1330 1384 { 1331 1385 size_t new_nodes_alloc = dfa->nodes_alloc * 2; 1332 int*new_nexts, *new_indices;1386 Idx *new_nexts, *new_indices; 1333 1387 re_node_set *new_edests, *new_eclosures; 1334 1388 re_token_t *new_nodes; 1335 1389 1336 /* Avoid overflows. */ 1337 if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) 1390 /* Avoid overflows in realloc. */ 1391 const size_t max_object_size = MAX (sizeof (re_token_t), 1392 MAX (sizeof (re_node_set), 1393 sizeof (Idx))); 1394 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) 1395 < new_nodes_alloc)) 1338 1396 return -1; 1339 1397 1340 1398 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); 1341 if ( BE (new_nodes == NULL, 0))1399 if (__glibc_unlikely (new_nodes == NULL)) 1342 1400 return -1; 1343 1401 dfa->nodes = new_nodes; 1344 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); 1345 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); 1402 dfa->nodes_alloc = new_nodes_alloc; 1403 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); 1404 if (new_nexts != NULL) 1405 dfa->nexts = new_nexts; 1406 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); 1407 if (new_indices != NULL) 1408 dfa->org_indices = new_indices; 1346 1409 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); 1410 if (new_edests != NULL) 1411 dfa->edests = new_edests; 1347 1412 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); 1348 if (BE (new_nexts == NULL || new_indices == NULL 1349 || new_edests == NULL || new_eclosures == NULL, 0)) 1413 if (new_eclosures != NULL) 1414 dfa->eclosures = new_eclosures; 1415 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL 1416 || new_edests == NULL || new_eclosures == NULL)) 1350 1417 return -1; 1351 dfa->nexts = new_nexts;1352 dfa->org_indices = new_indices;1353 dfa->edests = new_edests;1354 dfa->eclosures = new_eclosures;1355 dfa->nodes_alloc = new_nodes_alloc;1356 1418 } 1357 1419 dfa->nodes[dfa->nodes_len] = token; 1358 1420 dfa->nodes[dfa->nodes_len].constraint = 0; 1359 #ifdef RE_ENABLE_I18N1360 1421 dfa->nodes[dfa->nodes_len].accept_mb = 1361 ( type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;1362 #endif 1422 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) 1423 || token.type == COMPLEX_BRACKET); 1363 1424 dfa->nexts[dfa->nodes_len] = -1; 1364 1425 re_node_set_init_empty (dfa->edests + dfa->nodes_len); … … 1367 1428 } 1368 1429 1369 static inline unsigned int 1370 internal_function 1430 static re_hashval_t 1371 1431 calc_state_hash (const re_node_set *nodes, unsigned int context) 1372 1432 { 1373 unsigned int hash = nodes->nelem + context;1374 inti;1433 re_hashval_t hash = nodes->nelem + context; 1434 Idx i; 1375 1435 for (i = 0 ; i < nodes->nelem ; i++) 1376 1436 hash += nodes->elems[i]; … … 1388 1448 1389 1449 static re_dfastate_t * 1390 internal_function 1450 __attribute_warn_unused_result__ 1391 1451 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, 1392 1452 const re_node_set *nodes) 1393 1453 { 1394 unsigned int hash;1454 re_hashval_t hash; 1395 1455 re_dfastate_t *new_state; 1396 1456 struct re_state_table_entry *spot; 1397 int i; 1398 if (BE (nodes->nelem == 0, 0)) 1457 Idx i; 1458 #if defined GCC_LINT || defined lint 1459 /* Suppress bogus uninitialized-variable warnings. */ 1460 *err = REG_NOERROR; 1461 #endif 1462 if (__glibc_unlikely (nodes->nelem == 0)) 1399 1463 { 1400 1464 *err = REG_NOERROR; … … 1415 1479 /* There are no appropriate state in the dfa, create the new one. */ 1416 1480 new_state = create_ci_newstate (dfa, nodes, hash); 1417 if ( BE (new_state == NULL, 0))1481 if (__glibc_unlikely (new_state == NULL)) 1418 1482 *err = REG_ESPACE; 1419 1483 … … 1432 1496 1433 1497 static re_dfastate_t * 1434 internal_function 1498 __attribute_warn_unused_result__ 1435 1499 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, 1436 1500 const re_node_set *nodes, unsigned int context) 1437 1501 { 1438 unsigned int hash;1502 re_hashval_t hash; 1439 1503 re_dfastate_t *new_state; 1440 1504 struct re_state_table_entry *spot; 1441 int i; 1505 Idx i; 1506 #if defined GCC_LINT || defined lint 1507 /* Suppress bogus uninitialized-variable warnings. */ 1508 *err = REG_NOERROR; 1509 #endif 1442 1510 if (nodes->nelem == 0) 1443 1511 { … … 1456 1524 return state; 1457 1525 } 1458 /* There are no appropriate state in `dfa', create the new one. */1526 /* There are no appropriate state in 'dfa', create the new one. */ 1459 1527 new_state = create_cd_newstate (dfa, nodes, context, hash); 1460 if ( BE (new_state == NULL, 0))1528 if (__glibc_unlikely (new_state == NULL)) 1461 1529 *err = REG_ESPACE; 1462 1530 … … 1469 1537 1470 1538 static reg_errcode_t 1539 __attribute_warn_unused_result__ 1471 1540 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, 1472 unsigned int hash)1541 re_hashval_t hash) 1473 1542 { 1474 1543 struct re_state_table_entry *spot; 1475 1544 reg_errcode_t err; 1476 inti;1545 Idx i; 1477 1546 1478 1547 newstate->hash = hash; 1479 1548 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); 1480 if ( BE (err != REG_NOERROR, 0))1549 if (__glibc_unlikely (err != REG_NOERROR)) 1481 1550 return REG_ESPACE; 1482 1551 for (i = 0; i < newstate->nodes.nelem; i++) 1483 1552 { 1484 intelem = newstate->nodes.elems[i];1553 Idx elem = newstate->nodes.elems[i]; 1485 1554 if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) 1486 re_node_set_insert_last (&newstate->non_eps_nodes, elem); 1555 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem)) 1556 return REG_ESPACE; 1487 1557 } 1488 1558 1489 1559 spot = dfa->state_table + (hash & dfa->state_hash_mask); 1490 if ( BE (spot->alloc <= spot->num, 0))1491 { 1492 intnew_alloc = 2 * spot->num + 2;1560 if (__glibc_unlikely (spot->alloc <= spot->num)) 1561 { 1562 Idx new_alloc = 2 * spot->num + 2; 1493 1563 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, 1494 1564 new_alloc); 1495 if ( BE (new_array == NULL, 0))1565 if (__glibc_unlikely (new_array == NULL)) 1496 1566 return REG_ESPACE; 1497 1567 spot->array = new_array; … … 1518 1588 } 1519 1589 1520 /* Create the new state which is independ of contexts.1590 /* Create the new state which is independent of contexts. 1521 1591 Return the new state if succeeded, otherwise return NULL. */ 1522 1592 1523 1593 static re_dfastate_t * 1524 internal_function 1594 __attribute_warn_unused_result__ 1525 1595 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, 1526 unsigned int hash)1527 { 1528 inti;1596 re_hashval_t hash) 1597 { 1598 Idx i; 1529 1599 reg_errcode_t err; 1530 1600 re_dfastate_t *newstate; 1531 1601 1532 1602 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); 1533 if ( BE (newstate == NULL, 0))1603 if (__glibc_unlikely (newstate == NULL)) 1534 1604 return NULL; 1535 1605 err = re_node_set_init_copy (&newstate->nodes, nodes); 1536 if ( BE (err != REG_NOERROR, 0))1606 if (__glibc_unlikely (err != REG_NOERROR)) 1537 1607 { 1538 1608 re_free (newstate); … … 1547 1617 if (type == CHARACTER && !node->constraint) 1548 1618 continue; 1549 #ifdef RE_ENABLE_I18N1550 1619 newstate->accept_mb |= node->accept_mb; 1551 #endif /* RE_ENABLE_I18N */1552 1620 1553 1621 /* If the state has the halt node, the state is a halt state. */ … … 1560 1628 } 1561 1629 err = register_state (dfa, newstate, hash); 1562 if ( BE (err != REG_NOERROR, 0))1630 if (__glibc_unlikely (err != REG_NOERROR)) 1563 1631 { 1564 1632 free_state (newstate); … … 1572 1640 1573 1641 static re_dfastate_t * 1574 internal_function 1642 __attribute_warn_unused_result__ 1575 1643 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, 1576 unsigned int context, unsigned int hash)1577 { 1578 inti, nctx_nodes = 0;1644 unsigned int context, re_hashval_t hash) 1645 { 1646 Idx i, nctx_nodes = 0; 1579 1647 reg_errcode_t err; 1580 1648 re_dfastate_t *newstate; 1581 1649 1582 1650 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); 1583 if ( BE (newstate == NULL, 0))1651 if (__glibc_unlikely (newstate == NULL)) 1584 1652 return NULL; 1585 1653 err = re_node_set_init_copy (&newstate->nodes, nodes); 1586 if ( BE (err != REG_NOERROR, 0))1654 if (__glibc_unlikely (err != REG_NOERROR)) 1587 1655 { 1588 1656 re_free (newstate); … … 1595 1663 for (i = 0 ; i < nodes->nelem ; i++) 1596 1664 { 1597 unsigned int constraint = 0;1598 1665 re_token_t *node = dfa->nodes + nodes->elems[i]; 1599 1666 re_token_type_t type = node->type; 1600 if (node->constraint) 1601 constraint = node->constraint; 1667 unsigned int constraint = node->constraint; 1602 1668 1603 1669 if (type == CHARACTER && !constraint) 1604 1670 continue; 1605 #ifdef RE_ENABLE_I18N1606 1671 newstate->accept_mb |= node->accept_mb; 1607 #endif /* RE_ENABLE_I18N */1608 1672 1609 1673 /* If the state has the halt node, the state is a halt state. */ … … 1612 1676 else if (type == OP_BACK_REF) 1613 1677 newstate->has_backref = 1; 1614 else if (type == ANCHOR)1615 constraint = node->opr.ctx_type;1616 1678 1617 1679 if (constraint) … … 1619 1681 if (newstate->entrance_nodes == &newstate->nodes) 1620 1682 { 1621 newstate->entrance_nodes = re_malloc (re_node_set, 1);1622 if ( BE (newstate->entrance_nodes == NULL, 0))1683 re_node_set *entrance_nodes = re_malloc (re_node_set, 1); 1684 if (__glibc_unlikely (entrance_nodes == NULL)) 1623 1685 { 1624 1686 free_state (newstate); 1625 1687 return NULL; 1626 1688 } 1627 re_node_set_init_copy (newstate->entrance_nodes, nodes); 1689 newstate->entrance_nodes = entrance_nodes; 1690 if (re_node_set_init_copy (newstate->entrance_nodes, nodes) 1691 != REG_NOERROR) 1692 { 1693 free_state (newstate); 1694 return NULL; 1695 } 1628 1696 nctx_nodes = 0; 1629 1697 newstate->has_constraint = 1; … … 1638 1706 } 1639 1707 err = register_state (dfa, newstate, hash); 1640 if ( BE (err != REG_NOERROR, 0))1708 if (__glibc_unlikely (err != REG_NOERROR)) 1641 1709 { 1642 1710 free_state (newstate);
Note:
See TracChangeset
for help on using the changeset viewer.