Changeset 3613 for trunk/src/sed/lib/regexec.c
- Timestamp:
- Sep 19, 2024, 2:34:43 AM (10 months ago)
- Location:
- trunk/src/sed
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/sed
-
Property svn:mergeinfo
set to
/vendor/sed/current merged eligible
-
Property svn:mergeinfo
set to
-
trunk/src/sed/lib/regexec.c
r606 r3613 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002 , 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 15 15 16 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 20 19 21 20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 22 int n) internal_function; 23 static void match_ctx_clean (re_match_context_t *mctx) internal_function; 24 static void match_ctx_free (re_match_context_t *cache) internal_function; 25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, 26 int str_idx, int from, int to) 27 internal_function; 28 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 29 internal_function; 30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, 31 int str_idx) internal_function; 21 Idx n); 22 static void match_ctx_clean (re_match_context_t *mctx); 23 static void match_ctx_free (re_match_context_t *cache); 24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, 25 Idx str_idx, Idx from, Idx to); 26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx); 27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, 28 Idx str_idx); 32 29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, 33 int node, int str_idx) 34 internal_function; 30 Idx node, Idx str_idx); 35 31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 36 re_dfastate_t **limited_sts, int last_node, 37 int last_str_idx) 38 internal_function; 32 re_dfastate_t **limited_sts, Idx last_node, 33 Idx last_str_idx); 39 34 static reg_errcode_t re_search_internal (const regex_t *preg, 40 const char *string, intlength,41 int start, int range, intstop,35 const char *string, Idx length, 36 Idx start, Idx last_start, Idx stop, 42 37 size_t nmatch, regmatch_t pmatch[], 43 int eflags) internal_function; 44 static int re_search_2_stub (struct re_pattern_buffer *bufp, 45 const char *string1, int length1, 46 const char *string2, int length2, 47 int start, int range, struct re_registers *regs, 48 int stop, int ret_len) internal_function; 49 static int re_search_stub (struct re_pattern_buffer *bufp, 50 const char *string, int length, int start, 51 int range, int stop, struct re_registers *regs, 52 int ret_len) internal_function; 38 int eflags); 39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, 40 const char *string1, Idx length1, 41 const char *string2, Idx length2, 42 Idx start, regoff_t range, 43 struct re_registers *regs, 44 Idx stop, bool ret_len); 45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp, 46 const char *string, Idx length, Idx start, 47 regoff_t range, Idx stop, 48 struct re_registers *regs, 49 bool ret_len); 53 50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 54 int nregs, int regs_allocated) internal_function; 55 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 56 internal_function; 57 static int check_matching (re_match_context_t *mctx, int fl_longest_match, 58 int *p_match_first) internal_function; 59 static int check_halt_state_context (const re_match_context_t *mctx, 60 const re_dfastate_t *state, int idx) 61 internal_function; 51 Idx nregs, int regs_allocated); 52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); 53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, 54 Idx *p_match_first); 55 static Idx check_halt_state_context (const re_match_context_t *mctx, 56 const re_dfastate_t *state, Idx idx); 62 57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 63 regmatch_t *prev_idx_match, intcur_node,64 int cur_idx, int nmatch) internal_function;58 regmatch_t *prev_idx_match, Idx cur_node, 59 Idx cur_idx, Idx nmatch); 65 60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 66 int str_idx, int dest_node, int nregs, 67 regmatch_t *regs, 68 re_node_set *eps_via_nodes) 69 internal_function; 61 Idx str_idx, Idx dest_node, Idx nregs, 62 regmatch_t *regs, regmatch_t *prevregs, 63 re_node_set *eps_via_nodes); 70 64 static reg_errcode_t set_regs (const regex_t *preg, 71 65 const re_match_context_t *mctx, 72 66 size_t nmatch, regmatch_t *pmatch, 73 int fl_backtrack) internal_function; 74 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) 75 internal_function; 76 77 #ifdef RE_ENABLE_I18N 67 bool fl_backtrack); 68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); 69 78 70 static int sift_states_iter_mb (const re_match_context_t *mctx, 79 71 re_sift_context_t *sctx, 80 int node_idx, int str_idx, int max_str_idx) 81 internal_function; 82 #endif /* RE_ENABLE_I18N */ 72 Idx node_idx, Idx str_idx, Idx max_str_idx); 83 73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, 84 re_sift_context_t *sctx) 85 internal_function; 74 re_sift_context_t *sctx); 86 75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, 87 re_sift_context_t *sctx, int str_idx, 88 re_node_set *cur_dest) 89 internal_function; 76 re_sift_context_t *sctx, Idx str_idx, 77 re_node_set *cur_dest); 90 78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, 91 79 re_sift_context_t *sctx, 92 int str_idx, 93 re_node_set *dest_nodes) 94 internal_function; 80 Idx str_idx, 81 re_node_set *dest_nodes); 95 82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, 96 83 re_node_set *dest_nodes, 97 const re_node_set *candidates) 98 internal_function; 99 static int check_dst_limits (const re_match_context_t *mctx, 100 re_node_set *limits, 101 int dst_node, int dst_idx, int src_node, 102 int src_idx) internal_function; 84 const re_node_set *candidates); 85 static bool check_dst_limits (const re_match_context_t *mctx, 86 const re_node_set *limits, 87 Idx dst_node, Idx dst_idx, Idx src_node, 88 Idx src_idx); 103 89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, 104 int boundaries, int subexp_idx, 105 int from_node, int bkref_idx) 106 internal_function; 90 int boundaries, Idx subexp_idx, 91 Idx from_node, Idx bkref_idx); 107 92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx, 108 int limit, intsubexp_idx,109 int node, intstr_idx,110 int bkref_idx) internal_function;93 Idx limit, Idx subexp_idx, 94 Idx node, Idx str_idx, 95 Idx bkref_idx); 111 96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, 112 97 re_node_set *dest_nodes, … … 114 99 re_node_set *limits, 115 100 struct re_backref_cache_entry *bkref_ents, 116 int str_idx) internal_function;101 Idx str_idx); 117 102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, 118 103 re_sift_context_t *sctx, 119 int str_idx, const re_node_set *candidates) 120 internal_function; 104 Idx str_idx, const re_node_set *candidates); 121 105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa, 122 106 re_dfastate_t **dst, 123 re_dfastate_t **src, int num) 124 internal_function; 107 re_dfastate_t **src, Idx num); 125 108 static re_dfastate_t *find_recover_state (reg_errcode_t *err, 126 re_match_context_t *mctx) internal_function;109 re_match_context_t *mctx); 127 110 static re_dfastate_t *transit_state (reg_errcode_t *err, 128 111 re_match_context_t *mctx, 129 re_dfastate_t *state) internal_function;112 re_dfastate_t *state); 130 113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, 131 114 re_match_context_t *mctx, 132 re_dfastate_t *next_state) 133 internal_function; 115 re_dfastate_t *next_state); 134 116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, 135 117 re_node_set *cur_nodes, 136 int str_idx) internal_function;118 Idx str_idx); 137 119 #if 0 138 120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, 139 121 re_match_context_t *mctx, 140 re_dfastate_t *pstate) 141 internal_function; 122 re_dfastate_t *pstate); 142 123 #endif 143 #ifdef RE_ENABLE_I18N144 124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx, 145 re_dfastate_t *pstate) 146 internal_function; 147 #endif /* RE_ENABLE_I18N */ 125 re_dfastate_t *pstate); 148 126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, 149 const re_node_set *nodes) 150 internal_function; 127 const re_node_set *nodes); 151 128 static reg_errcode_t get_subexp (re_match_context_t *mctx, 152 int bkref_node, int bkref_str_idx) 153 internal_function; 129 Idx bkref_node, Idx bkref_str_idx); 154 130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, 155 131 const re_sub_match_top_t *sub_top, 156 132 re_sub_match_last_t *sub_last, 157 int bkref_node, int bkref_str) 158 internal_function; 159 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 160 int subexp_idx, int type) internal_function; 133 Idx bkref_node, Idx bkref_str); 134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 135 Idx subexp_idx, int type); 161 136 static reg_errcode_t check_arrival (re_match_context_t *mctx, 162 state_array_t *path, inttop_node,163 int top_str, int last_node, intlast_str,164 int type) internal_function;137 state_array_t *path, Idx top_node, 138 Idx top_str, Idx last_node, Idx last_str, 139 int type); 165 140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, 166 intstr_idx,141 Idx str_idx, 167 142 re_node_set *cur_nodes, 168 re_node_set *next_nodes) 169 internal_function; 143 re_node_set *next_nodes); 170 144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, 171 145 re_node_set *cur_nodes, 172 int ex_subexp, int type) 173 internal_function; 146 Idx ex_subexp, int type); 174 147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, 175 148 re_node_set *dst_nodes, 176 int target, intex_subexp,177 int type) internal_function;149 Idx target, Idx ex_subexp, 150 int type); 178 151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, 179 re_node_set *cur_nodes, int cur_str, 180 int subexp_num, int type) 181 internal_function; 182 static int build_trtable (const re_dfa_t *dfa, 183 re_dfastate_t *state) internal_function; 184 #ifdef RE_ENABLE_I18N 185 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 186 const re_string_t *input, int idx) 187 internal_function; 188 # ifdef _LIBC 152 re_node_set *cur_nodes, Idx cur_str, 153 Idx subexp_num, int type); 154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); 155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 156 const re_string_t *input, Idx idx); 157 #ifdef _LIBC 189 158 static unsigned int find_collation_sequence_value (const unsigned char *mbs, 190 size_t name_len) 191 internal_function; 192 # endif /* _LIBC */ 193 #endif /* RE_ENABLE_I18N */ 194 static int group_nodes_into_DFAstates (const re_dfa_t *dfa, 159 size_t name_len); 160 #endif 161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, 195 162 const re_dfastate_t *state, 196 163 re_node_set *states_node, 197 bitset_t *states_ch) internal_function; 198 static int check_node_accept (const re_match_context_t *mctx, 199 const re_token_t *node, int idx) 200 internal_function; 201 static reg_errcode_t extend_buffers (re_match_context_t *mctx) 202 internal_function; 164 bitset_t *states_ch); 165 static bool check_node_accept (const re_match_context_t *mctx, 166 const re_token_t *node, Idx idx); 167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); 203 168 204 169 … … 209 174 210 175 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 211 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at176 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 212 177 least NMATCH elements, and we set them to the offsets of the 213 178 corresponding matched substrings. 214 179 215 EFLAGS specifies `execution flags'which affect matching: if180 EFLAGS specifies "execution flags" which affect matching: if 216 181 REG_NOTBOL is set, then ^ does not match at the beginning of the 217 182 string; if REG_NOTEOL is set, then $ does not match at the end. 218 183 219 We return 0 if we find a match and REG_NOMATCH if not. */ 184 Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if 185 EFLAGS is invalid. */ 220 186 221 187 int 222 regexec (preg, string, nmatch, pmatch, eflags) 223 const regex_t *__restrict preg; 224 const char *__restrict string; 225 size_t nmatch; 226 regmatch_t pmatch[]; 227 int eflags; 188 regexec (const regex_t *__restrict preg, const char *__restrict string, 189 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) 228 190 { 229 191 reg_errcode_t err; 230 intstart, length;231 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;192 Idx start, length; 193 re_dfa_t *dfa = preg->buffer; 232 194 233 195 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) … … 245 207 } 246 208 247 __libc_lock_lock (dfa->lock);209 lock_lock (dfa->lock); 248 210 if (preg->no_sub) 249 err = re_search_internal (preg, string, length, start, length - start,211 err = re_search_internal (preg, string, length, start, length, 250 212 length, 0, NULL, eflags); 251 213 else 252 err = re_search_internal (preg, string, length, start, length - start,214 err = re_search_internal (preg, string, length, start, length, 253 215 length, nmatch, pmatch, eflags); 254 __libc_lock_unlock (dfa->lock);216 lock_unlock (dfa->lock); 255 217 return err != REG_NOERROR; 256 218 } 257 219 258 220 #ifdef _LIBC 221 libc_hidden_def (__regexec) 222 259 223 # include <shlib-compat.h> 260 224 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); … … 267 231 __compat_regexec (const regex_t *__restrict preg, 268 232 const char *__restrict string, size_t nmatch, 269 regmatch_t pmatch[ ], int eflags)233 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) 270 234 { 271 235 return regexec (preg, string, nmatch, pmatch, … … 297 261 298 262 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match 299 and all groups is st roed in REGS. (For the "_2" variants, the offsets are263 and all groups is stored in REGS. (For the "_2" variants, the offsets are 300 264 computed relative to the concatenation, not relative to the individual 301 265 strings.) 302 266 303 267 On success, re_match* functions return the length of the match, re_search* 304 return the position of the start of the match. Return value -1 means no 305 match was found and -2 indicates an internal error. */ 306 307 int 308 re_match (bufp, string, length, start, regs) 309 struct re_pattern_buffer *bufp; 310 const char *string; 311 int length, start; 312 struct re_registers *regs; 313 { 314 return re_search_stub (bufp, string, length, start, 0, length, regs, 1); 268 return the position of the start of the match. They return -1 on 269 match failure, -2 on error. */ 270 271 regoff_t 272 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, 273 Idx start, struct re_registers *regs) 274 { 275 return re_search_stub (bufp, string, length, start, 0, length, regs, true); 315 276 } 316 277 #ifdef _LIBC … … 318 279 #endif 319 280 320 int 321 re_search (bufp, string, length, start, range, regs) 322 struct re_pattern_buffer *bufp; 323 const char *string; 324 int length, start, range; 325 struct re_registers *regs; 326 { 327 return re_search_stub (bufp, string, length, start, range, length, regs, 0); 281 regoff_t 282 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length, 283 Idx start, regoff_t range, struct re_registers *regs) 284 { 285 return re_search_stub (bufp, string, length, start, range, length, regs, 286 false); 328 287 } 329 288 #ifdef _LIBC … … 331 290 #endif 332 291 333 int 334 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) 335 struct re_pattern_buffer *bufp; 336 const char *string1, *string2; 337 int length1, length2, start, stop; 338 struct re_registers *regs; 292 regoff_t 293 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, 294 const char *string2, Idx length2, Idx start, 295 struct re_registers *regs, Idx stop) 339 296 { 340 297 return re_search_2_stub (bufp, string1, length1, string2, length2, 341 start, 0, regs, stop, 1);298 start, 0, regs, stop, true); 342 299 } 343 300 #ifdef _LIBC … … 345 302 #endif 346 303 347 int 348 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) 349 struct re_pattern_buffer *bufp; 350 const char *string1, *string2; 351 int length1, length2, start, range, stop; 352 struct re_registers *regs; 304 regoff_t 305 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, 306 const char *string2, Idx length2, Idx start, regoff_t range, 307 struct re_registers *regs, Idx stop) 353 308 { 354 309 return re_search_2_stub (bufp, string1, length1, string2, length2, 355 start, range, regs, stop, 0);310 start, range, regs, stop, false); 356 311 } 357 312 #ifdef _LIBC … … 359 314 #endif 360 315 361 static int 362 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, 363 stop, ret_len) 364 struct re_pattern_buffer *bufp; 365 const char *string1, *string2; 366 int length1, length2, start, range, stop, ret_len; 367 struct re_registers *regs; 316 static regoff_t 317 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, 318 Idx length1, const char *string2, Idx length2, Idx start, 319 regoff_t range, struct re_registers *regs, 320 Idx stop, bool ret_len) 368 321 { 369 322 const char *str; 370 int rval; 371 int len = length1 + length2; 372 int free_str = 0; 373 374 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) 323 regoff_t rval; 324 Idx len; 325 char *s = NULL; 326 327 if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0 328 || INT_ADD_WRAPV (length1, length2, &len)))) 375 329 return -2; 376 330 … … 379 333 if (length1 > 0) 380 334 { 381 char *s = re_malloc (char, len);382 383 if ( BE (s == NULL, 0))335 s = re_malloc (char, len); 336 337 if (__glibc_unlikely (s == NULL)) 384 338 return -2; 385 339 #ifdef _LIBC … … 390 344 #endif 391 345 str = s; 392 free_str = 1;393 346 } 394 347 else … … 399 352 rval = re_search_stub (bufp, str, len, start, range, stop, regs, 400 353 ret_len); 401 if (free_str) 402 re_free ((char *) str); 354 re_free (s); 403 355 return rval; 404 356 } … … 406 358 /* The parameters have the same meaning as those of re_search. 407 359 Additional parameters: 408 If RET_LEN is nonzerothe length of the match is returned (re_match style);360 If RET_LEN is true the length of the match is returned (re_match style); 409 361 otherwise the position of the match is returned. */ 410 362 411 static int 412 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) 413 struct re_pattern_buffer *bufp; 414 const char *string; 415 int length, start, range, stop, ret_len; 416 struct re_registers *regs; 363 static regoff_t 364 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, 365 Idx start, regoff_t range, Idx stop, struct re_registers *regs, 366 bool ret_len) 417 367 { 418 368 reg_errcode_t result; 419 369 regmatch_t *pmatch; 420 int nregs, rval; 370 Idx nregs; 371 regoff_t rval; 421 372 int eflags = 0; 422 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 373 re_dfa_t *dfa = bufp->buffer; 374 Idx last_start = start + range; 423 375 424 376 /* Check for out-of-range. */ 425 if ( BE (start < 0 || start > length, 0))377 if (__glibc_unlikely (start < 0 || start > length)) 426 378 return -1; 427 if (BE (start + range > length, 0)) 428 range = length - start; 429 else if (BE (start + range < 0, 0)) 430 range = -start; 431 432 __libc_lock_lock (dfa->lock); 379 if (__glibc_unlikely (length < last_start 380 || (0 <= range && last_start < start))) 381 last_start = length; 382 else if (__glibc_unlikely (last_start < 0 383 || (range < 0 && start <= last_start))) 384 last_start = 0; 385 386 lock_lock (dfa->lock); 433 387 434 388 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; … … 436 390 437 391 /* Compile fastmap if we haven't yet. */ 438 if ( range > 0&& bufp->fastmap != NULL && !bufp->fastmap_accurate)392 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) 439 393 re_compile_fastmap (bufp); 440 394 441 if ( BE (bufp->no_sub, 0))395 if (__glibc_unlikely (bufp->no_sub)) 442 396 regs = NULL; 443 397 … … 445 399 if (regs == NULL) 446 400 nregs = 1; 447 else if ( BE (bufp->regs_allocated == REGS_FIXED &&448 regs->num_regs < bufp->re_nsub + 1, 0))401 else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED 402 && regs->num_regs <= bufp->re_nsub)) 449 403 { 450 404 nregs = regs->num_regs; 451 if ( BE (nregs < 1, 0))405 if (__glibc_unlikely (nregs < 1)) 452 406 { 453 407 /* Nothing can be copied to regs. */ … … 459 413 nregs = bufp->re_nsub + 1; 460 414 pmatch = re_malloc (regmatch_t, nregs); 461 if ( BE (pmatch == NULL, 0))415 if (__glibc_unlikely (pmatch == NULL)) 462 416 { 463 417 rval = -2; … … 465 419 } 466 420 467 result = re_search_internal (bufp, string, length, start, range, stop,421 result = re_search_internal (bufp, string, length, start, last_start, stop, 468 422 nregs, pmatch, eflags); 469 423 470 424 rval = 0; 471 425 472 /* I hope we needn't fill the r regs with -1's when no match was found. */426 /* I hope we needn't fill their regs with -1's when no match was found. */ 473 427 if (result != REG_NOERROR) 474 rval = -1;428 rval = result == REG_NOMATCH ? -1 : -2; 475 429 else if (regs != NULL) 476 430 { … … 478 432 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, 479 433 bufp->regs_allocated); 480 if ( BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))434 if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED)) 481 435 rval = -2; 482 436 } 483 437 484 if ( BE (rval == 0, 1))438 if (__glibc_likely (rval == 0)) 485 439 { 486 440 if (ret_len) 487 441 { 488 assert(pmatch[0].rm_so == start);442 DEBUG_ASSERT (pmatch[0].rm_so == start); 489 443 rval = pmatch[0].rm_eo - start; 490 444 } … … 494 448 re_free (pmatch); 495 449 out: 496 __libc_lock_unlock (dfa->lock);450 lock_unlock (dfa->lock); 497 451 return rval; 498 452 } 499 453 500 454 static unsigned 501 re_copy_regs (regs, pmatch, nregs, regs_allocated) 502 struct re_registers *regs; 503 regmatch_t *pmatch; 504 int nregs, regs_allocated; 455 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, 456 int regs_allocated) 505 457 { 506 458 int rval = REGS_REALLOCATE; 507 inti;508 intneed_regs = nregs + 1;509 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code459 Idx i; 460 Idx need_regs = nregs + 1; 461 /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code 510 462 uses. */ 511 463 … … 514 466 { /* No. So allocate them with malloc. */ 515 467 regs->start = re_malloc (regoff_t, need_regs); 468 if (__glibc_unlikely (regs->start == NULL)) 469 return REGS_UNALLOCATED; 516 470 regs->end = re_malloc (regoff_t, need_regs); 517 if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) 518 return REGS_UNALLOCATED; 471 if (__glibc_unlikely (regs->end == NULL)) 472 { 473 re_free (regs->start); 474 return REGS_UNALLOCATED; 475 } 519 476 regs->num_regs = need_regs; 520 477 } … … 523 480 allocated, reallocate them. If we need fewer, just 524 481 leave it alone. */ 525 if ( BE (need_regs > regs->num_regs, 0))482 if (__glibc_unlikely (need_regs > regs->num_regs)) 526 483 { 527 484 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); 528 regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);529 if ( BE (new_start == NULL, 0) || BE (new_end == NULL, 0))485 regoff_t *new_end; 486 if (__glibc_unlikely (new_start == NULL)) 530 487 return REGS_UNALLOCATED; 488 new_end = re_realloc (regs->end, regoff_t, need_regs); 489 if (__glibc_unlikely (new_end == NULL)) 490 { 491 re_free (new_start); 492 return REGS_UNALLOCATED; 493 } 531 494 regs->start = new_start; 532 495 regs->end = new_end; … … 536 499 else 537 500 { 538 assert(regs_allocated == REGS_FIXED);501 DEBUG_ASSERT (regs_allocated == REGS_FIXED); 539 502 /* This function may not be called with REGS_FIXED and nregs too big. */ 540 assert (regs->num_regs >= nregs);503 DEBUG_ASSERT (nregs <= regs->num_regs); 541 504 rval = REGS_FIXED; 542 505 } … … 568 531 569 532 void 570 re_set_registers (bufp, regs, num_regs, starts, ends) 571 struct re_pattern_buffer *bufp; 572 struct re_registers *regs; 573 unsigned num_regs; 574 regoff_t *starts, *ends; 533 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, 534 __re_size_t num_regs, regoff_t *starts, regoff_t *ends) 575 535 { 576 536 if (num_regs) … … 585 545 bufp->regs_allocated = REGS_UNALLOCATED; 586 546 regs->num_regs = 0; 587 regs->start = regs->end = (regoff_t *) 0;547 regs->start = regs->end = NULL; 588 548 } 589 549 } … … 601 561 weak_function 602 562 # endif 603 re_exec (s) 604 const char *s; 563 re_exec (const char *s) 605 564 { 606 565 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); … … 613 572 /* Searches for a compiled pattern PREG in the string STRING, whose 614 573 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same 615 m ingings with regexec. START, and RANGE have the same meanings616 with re_search.574 meaning as with regexec. LAST_START is START + RANGE, where 575 START and RANGE have the same meaning as with re_search. 617 576 Return REG_NOERROR if we find a match, and REG_NOMATCH if not, 618 577 otherwise return the error code. 619 578 Note: We assume front end functions already check ranges. 620 ( START + RANGE >= 0 && START + RANGE<= LENGTH) */579 (0 <= LAST_START && LAST_START <= LENGTH) */ 621 580 622 581 static reg_errcode_t 623 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, 624 eflags) 625 const regex_t *preg; 626 const char *string; 627 int length, start, range, stop, eflags; 628 size_t nmatch; 629 regmatch_t pmatch[]; 582 __attribute_warn_unused_result__ 583 re_search_internal (const regex_t *preg, const char *string, Idx length, 584 Idx start, Idx last_start, Idx stop, size_t nmatch, 585 regmatch_t pmatch[], int eflags) 630 586 { 631 587 reg_errcode_t err; 632 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 633 int left_lim, right_lim, incr; 634 int fl_longest_match, match_first, match_kind, match_last = -1; 635 int extra_nmatch; 636 int sb, ch; 637 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) 588 const re_dfa_t *dfa = preg->buffer; 589 Idx left_lim, right_lim; 590 int incr; 591 bool fl_longest_match; 592 int match_kind; 593 Idx match_first; 594 Idx match_last = -1; 595 Idx extra_nmatch; 596 bool sb; 597 int ch; 638 598 re_match_context_t mctx = { .dfa = dfa }; 639 #else 640 re_match_context_t mctx; 641 #endif 642 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate 643 && range && !preg->can_be_null) ? preg->fastmap : NULL; 599 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate 600 && start != last_start && !preg->can_be_null) 601 ? preg->fastmap : NULL); 644 602 RE_TRANSLATE_TYPE t = preg->translate; 645 646 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))647 memset (&mctx, '\0', sizeof (re_match_context_t));648 mctx.dfa = dfa;649 #endif650 603 651 604 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; … … 653 606 654 607 /* Check if the DFA haven't been compiled. */ 655 if (BE (preg->used == 0 || dfa->init_state == NULL 656 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL 657 || dfa->init_state_begbuf == NULL, 0)) 608 if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL 609 || dfa->init_state_word == NULL 610 || dfa->init_state_nl == NULL 611 || dfa->init_state_begbuf == NULL)) 658 612 return REG_NOMATCH; 659 613 660 #ifdef DEBUG661 614 /* We assume front-end functions already check them. */ 662 assert (start + range >= 0 && start + range <= length); 663 #endif 615 DEBUG_ASSERT (0 <= last_start && last_start <= length); 664 616 665 617 /* If initial states with non-begbuf contexts have no elements, … … 671 623 || !preg->newline_anchor)) 672 624 { 673 if (start != 0 && start + range!= 0)625 if (start != 0 && last_start != 0) 674 626 return REG_NOMATCH; 675 start = range= 0;627 start = last_start = 0; 676 628 } 677 629 … … 680 632 681 633 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, 682 preg->translate, preg->syntax & RE_ICASE, dfa); 683 if (BE (err != REG_NOERROR, 0)) 634 preg->translate, (preg->syntax & RE_ICASE) != 0, 635 dfa); 636 if (__glibc_unlikely (err != REG_NOERROR)) 684 637 goto free_return; 685 638 mctx.input.stop = stop; … … 688 641 689 642 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); 690 if ( BE (err != REG_NOERROR, 0))643 if (__glibc_unlikely (err != REG_NOERROR)) 691 644 goto free_return; 692 645 … … 697 650 if (nmatch > 1 || dfa->has_mb_node) 698 651 { 699 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 700 if (BE (mctx.state_log == NULL, 0)) 652 /* Avoid overflow. */ 653 if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) 654 <= mctx.input.bufs_len))) 701 655 { 702 656 err = REG_ESPACE; 703 657 goto free_return; 704 658 } 705 } 706 else 707 mctx.state_log = NULL; 659 660 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 661 if (__glibc_unlikely (mctx.state_log == NULL)) 662 { 663 err = REG_ESPACE; 664 goto free_return; 665 } 666 } 708 667 709 668 match_first = start; … … 711 670 : CONTEXT_NEWLINE | CONTEXT_BEGBUF; 712 671 713 /* Check incrementally whether of not the input string match. */714 incr = ( range < 0) ? -1 : 1;715 left_lim = ( range < 0) ? start + range: start;716 right_lim = ( range < 0) ? start : start + range;672 /* Check incrementally whether the input string matches. */ 673 incr = (last_start < start) ? -1 : 1; 674 left_lim = (last_start < start) ? last_start : start; 675 right_lim = (last_start < start) ? start : last_start; 717 676 sb = dfa->mb_cur_max == 1; 718 677 match_kind = 719 678 (fastmap 720 679 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) 721 | ( range >= 0? 2 : 0)680 | (start <= last_start ? 2 : 0) 722 681 | (t != NULL ? 1 : 0)) 723 682 : 8); … … 742 701 case 7: 743 702 /* Fastmap with single-byte translation, match forward. */ 744 while ( BE (match_first < right_lim, 1)703 while (__glibc_likely (match_first < right_lim) 745 704 && !fastmap[t[(unsigned char) string[match_first]]]) 746 705 ++match_first; … … 749 708 case 6: 750 709 /* Fastmap without translation, match forward. */ 751 while ( BE (match_first < right_lim, 1)710 while (__glibc_likely (match_first < right_lim) 752 711 && !fastmap[(unsigned char) string[match_first]]) 753 712 ++match_first; 754 713 755 714 forward_match_found_start_or_reached_end: 756 if ( BE (match_first == right_lim, 0))715 if (__glibc_unlikely (match_first == right_lim)) 757 716 { 758 717 ch = match_first >= length … … 786 745 /* If MATCH_FIRST is out of the valid range, reconstruct the 787 746 buffers. */ 788 unsigned int offset = match_first - mctx.input.raw_mbs_idx; 789 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) 747 __re_size_t offset = match_first - mctx.input.raw_mbs_idx; 748 if (__glibc_unlikely (offset 749 >= (__re_size_t) mctx.input.valid_raw_len)) 790 750 { 791 751 err = re_string_reconstruct (&mctx.input, match_first, 792 752 eflags); 793 if ( BE (err != REG_NOERROR, 0))753 if (__glibc_unlikely (err != REG_NOERROR)) 794 754 goto free_return; 795 755 796 756 offset = match_first - mctx.input.raw_mbs_idx; 797 757 } 798 /* If MATCH_FIRST is out of the buffer, leave it as '\0'. 799 Note that MATCH_FIRST must not be smaller than 0. */ 800 ch = (match_first >= length 801 ? 0 : re_string_byte_at (&mctx.input, offset)); 758 /* Use buffer byte if OFFSET is in buffer, otherwise '\0'. */ 759 ch = (offset < mctx.input.valid_len 760 ? re_string_byte_at (&mctx.input, offset) : 0); 802 761 if (fastmap[ch]) 803 762 break; 804 763 match_first += incr; 805 764 if (match_first < left_lim || match_first > right_lim) 806 807 808 809 765 { 766 err = REG_NOMATCH; 767 goto free_return; 768 } 810 769 } 811 770 break; … … 815 774 the matching starts from the beginning of the buffer. */ 816 775 err = re_string_reconstruct (&mctx.input, match_first, eflags); 817 if ( BE (err != REG_NOERROR, 0))776 if (__glibc_unlikely (err != REG_NOERROR)) 818 777 goto free_return; 819 778 820 #ifdef RE_ENABLE_I18N 821 /* Don't consider this char as a possible match start if it part, 822 yet isn't the head, of a multibyte character. */ 779 /* Don't consider this char as a possible match start if it part, 780 yet isn't the head, of a multibyte character. */ 823 781 if (!sb && !re_string_first_byte (&mctx.input, 0)) 824 782 continue; 825 #endif826 783 827 784 /* It seems to be appropriate one, then use the matcher. */ … … 829 786 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; 830 787 match_last = check_matching (&mctx, fl_longest_match, 831 range >= 0? &match_first : NULL);788 start <= last_start ? &match_first : NULL); 832 789 if (match_last != -1) 833 790 { 834 if ( BE (match_last == -2, 0))791 if (__glibc_unlikely (match_last == -2)) 835 792 { 836 793 err = REG_ESPACE; … … 852 809 if (err == REG_NOERROR) 853 810 break; 854 if ( BE (err != REG_NOMATCH, 0))811 if (__glibc_unlikely (err != REG_NOMATCH)) 855 812 goto free_return; 856 813 match_last = -1; … … 864 821 } 865 822 866 #ifdef DEBUG 867 assert (match_last != -1); 868 assert (err == REG_NOERROR); 869 #endif 823 DEBUG_ASSERT (match_last != -1); 824 DEBUG_ASSERT (err == REG_NOERROR); 870 825 871 826 /* Set pmatch[] if we need. */ 872 827 if (nmatch > 0) 873 828 { 874 intreg_idx;829 Idx reg_idx; 875 830 876 831 /* Initialize registers. */ … … 881 836 pmatch[0].rm_so = 0; 882 837 pmatch[0].rm_eo = mctx.match_last; 838 /* FIXME: This function should fail if mctx.match_last exceeds 839 the maximum possible regoff_t value. We need a new error 840 code REG_OVERFLOW. */ 883 841 884 842 if (!preg->no_sub && nmatch > 1) … … 886 844 err = set_regs (preg, &mctx, nmatch, pmatch, 887 845 dfa->has_plural_match && dfa->nbackref > 0); 888 if ( BE (err != REG_NOERROR, 0))846 if (__glibc_unlikely (err != REG_NOERROR)) 889 847 goto free_return; 890 848 } 891 849 892 /* At last, add the offset to the each registers, since we slided850 /* At last, add the offset to each register, since we slid 893 851 the buffers so that we could assume that the matching starts 894 852 from 0. */ … … 896 854 if (pmatch[reg_idx].rm_so != -1) 897 855 { 898 #ifdef RE_ENABLE_I18N 899 if (BE (mctx.input.offsets_needed != 0, 0)) 856 if (__glibc_unlikely (mctx.input.offsets_needed != 0)) 900 857 { 901 858 pmatch[reg_idx].rm_so = … … 908 865 : mctx.input.offsets[pmatch[reg_idx].rm_eo]); 909 866 } 910 #else911 assert (mctx.input.offsets_needed == 0);912 #endif913 867 pmatch[reg_idx].rm_so += match_first; 914 868 pmatch[reg_idx].rm_eo += match_first; … … 921 875 922 876 if (dfa->subexp_map) 923 924 925 926 927 928 929 930 877 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) 878 if (dfa->subexp_map[reg_idx] != reg_idx) 879 { 880 pmatch[reg_idx + 1].rm_so 881 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; 882 pmatch[reg_idx + 1].rm_eo 883 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; 884 } 931 885 } 932 886 … … 940 894 941 895 static reg_errcode_t 942 prune_impossible_nodes (mctx) 943 re_match_context_t *mctx; 896 __attribute_warn_unused_result__ 897 prune_impossible_nodes (re_match_context_t *mctx) 944 898 { 945 899 const re_dfa_t *const dfa = mctx->dfa; 946 inthalt_node, match_last;900 Idx halt_node, match_last; 947 901 reg_errcode_t ret; 948 902 re_dfastate_t **sifted_states; 949 903 re_dfastate_t **lim_states = NULL; 950 904 re_sift_context_t sctx; 951 #ifdef DEBUG 952 assert (mctx->state_log != NULL); 953 #endif 905 DEBUG_ASSERT (mctx->state_log != NULL); 954 906 match_last = mctx->match_last; 955 907 halt_node = mctx->last_node; 908 909 /* Avoid overflow. */ 910 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) 911 <= match_last)) 912 return REG_ESPACE; 913 956 914 sifted_states = re_malloc (re_dfastate_t *, match_last + 1); 957 if ( BE (sifted_states == NULL, 0))915 if (__glibc_unlikely (sifted_states == NULL)) 958 916 { 959 917 ret = REG_ESPACE; … … 963 921 { 964 922 lim_states = re_malloc (re_dfastate_t *, match_last + 1); 965 if ( BE (lim_states == NULL, 0))923 if (__glibc_unlikely (lim_states == NULL)) 966 924 { 967 925 ret = REG_ESPACE; … … 976 934 ret = sift_states_backward (mctx, &sctx); 977 935 re_node_set_free (&sctx.limits); 978 if ( BE (ret != REG_NOERROR, 0))936 if (__glibc_unlikely (ret != REG_NOERROR)) 979 937 goto free_return; 980 938 if (sifted_states[0] != NULL || lim_states[0] != NULL) … … 998 956 re_free (lim_states); 999 957 lim_states = NULL; 1000 if ( BE (ret != REG_NOERROR, 0))958 if (__glibc_unlikely (ret != REG_NOERROR)) 1001 959 goto free_return; 1002 960 } … … 1006 964 ret = sift_states_backward (mctx, &sctx); 1007 965 re_node_set_free (&sctx.limits); 1008 if ( BE (ret != REG_NOERROR, 0))966 if (__glibc_unlikely (ret != REG_NOERROR)) 1009 967 goto free_return; 968 if (sifted_states[0] == NULL) 969 { 970 ret = REG_NOMATCH; 971 goto free_return; 972 } 1010 973 } 1011 974 re_free (mctx->state_log); … … 1025 988 since initial states may have constraints like "\<", "^", etc.. */ 1026 989 1027 static inline re_dfastate_t * 1028 __attribute ((always_inline)) internal_function 990 static __always_inline re_dfastate_t * 1029 991 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 1030 intidx)992 Idx idx) 1031 993 { 1032 994 const re_dfa_t *const dfa = mctx->dfa; … … 1059 1021 1060 1022 /* Check whether the regular expression match input string INPUT or not, 1061 and return the index where the matching end , return -1 if not match,1062 orreturn -2 in case of an error.1023 and return the index where the matching end. Return -1 if 1024 there is no match, and return -2 in case of an error. 1063 1025 FL_LONGEST_MATCH means we want the POSIX longest matching. 1064 1026 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1065 1027 next place where we may want to try matching. 1066 Note that the matcher assume that the maching starts from the current1028 Note that the matcher assumes that the matching starts from the current 1067 1029 index of the buffer. */ 1068 1030 1069 static int1070 internal_function 1071 check_matching (re_match_context_t *mctx, intfl_longest_match,1072 int*p_match_first)1031 static Idx 1032 __attribute_warn_unused_result__ 1033 check_matching (re_match_context_t *mctx, bool fl_longest_match, 1034 Idx *p_match_first) 1073 1035 { 1074 1036 const re_dfa_t *const dfa = mctx->dfa; 1075 1037 reg_errcode_t err; 1076 intmatch = 0;1077 intmatch_last = -1;1078 intcur_str_idx = re_string_cur_idx (&mctx->input);1038 Idx match = 0; 1039 Idx match_last = -1; 1040 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 1079 1041 re_dfastate_t *cur_state; 1080 intat_init_state = p_match_first != NULL;1081 intnext_start_idx = cur_str_idx;1042 bool at_init_state = p_match_first != NULL; 1043 Idx next_start_idx = cur_str_idx; 1082 1044 1083 1045 err = REG_NOERROR; 1084 1046 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); 1085 1047 /* An initial state must not be NULL (invalid). */ 1086 if ( BE (cur_state == NULL, 0))1087 { 1088 assert(err == REG_ESPACE);1048 if (__glibc_unlikely (cur_state == NULL)) 1049 { 1050 DEBUG_ASSERT (err == REG_ESPACE); 1089 1051 return -2; 1090 1052 } … … 1096 1058 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them 1097 1059 later. E.g. Processing back references. */ 1098 if ( BE (dfa->nbackref, 0))1099 { 1100 at_init_state = 0;1060 if (__glibc_unlikely (dfa->nbackref)) 1061 { 1062 at_init_state = false; 1101 1063 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); 1102 if ( BE (err != REG_NOERROR, 0))1064 if (__glibc_unlikely (err != REG_NOERROR)) 1103 1065 return err; 1104 1066 … … 1106 1068 { 1107 1069 err = transit_state_bkref (mctx, &cur_state->nodes); 1108 if ( BE (err != REG_NOERROR, 0))1109 1070 if (__glibc_unlikely (err != REG_NOERROR)) 1071 return err; 1110 1072 } 1111 1073 } … … 1113 1075 1114 1076 /* If the RE accepts NULL string. */ 1115 if ( BE (cur_state->halt, 0))1077 if (__glibc_unlikely (cur_state->halt)) 1116 1078 { 1117 1079 if (!cur_state->has_constraint … … 1131 1093 { 1132 1094 re_dfastate_t *old_state = cur_state; 1133 int next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1134 1135 if (BE (next_char_idx >= mctx->input.bufs_len, 0) 1136 || (BE (next_char_idx >= mctx->input.valid_len, 0) 1137 && mctx->input.valid_len < mctx->input.len)) 1138 { 1139 err = extend_buffers (mctx); 1140 if (BE (err != REG_NOERROR, 0)) 1141 { 1142 assert (err == REG_ESPACE); 1095 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1096 1097 if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len) 1098 && mctx->input.bufs_len < mctx->input.len) 1099 || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len) 1100 && mctx->input.valid_len < mctx->input.len)) 1101 { 1102 err = extend_buffers (mctx, next_char_idx + 1); 1103 if (__glibc_unlikely (err != REG_NOERROR)) 1104 { 1105 DEBUG_ASSERT (err == REG_ESPACE); 1143 1106 return -2; 1144 1107 } 1145 1108 } 1146 1109 1147 1110 cur_state = transit_state (&err, mctx, cur_state); … … 1154 1117 state using the state log, if available and if we have not 1155 1118 already found a valid (even if not the longest) match. */ 1156 if ( BE (err != REG_NOERROR, 0))1119 if (__glibc_unlikely (err != REG_NOERROR)) 1157 1120 return -2; 1158 1121 … … 1163 1126 } 1164 1127 1165 if ( BE (at_init_state, 0))1128 if (__glibc_unlikely (at_init_state)) 1166 1129 { 1167 1130 if (old_state == cur_state) 1168 1131 next_start_idx = next_char_idx; 1169 1132 else 1170 at_init_state = 0;1133 at_init_state = false; 1171 1134 } 1172 1135 … … 1199 1162 /* Check NODE match the current context. */ 1200 1163 1201 static int 1202 internal_function 1203 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) 1164 static bool 1165 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) 1204 1166 { 1205 1167 re_token_type_t type = dfa->nodes[node].type; 1206 1168 unsigned int constraint = dfa->nodes[node].constraint; 1207 1169 if (type != END_OF_RE) 1208 return 0;1170 return false; 1209 1171 if (!constraint) 1210 return 1;1172 return true; 1211 1173 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) 1212 return 0;1213 return 1;1174 return false; 1175 return true; 1214 1176 } 1215 1177 … … 1218 1180 match the context, return the node. */ 1219 1181 1220 static int 1221 internal_function 1182 static Idx 1222 1183 check_halt_state_context (const re_match_context_t *mctx, 1223 const re_dfastate_t *state, intidx)1224 { 1225 inti;1184 const re_dfastate_t *state, Idx idx) 1185 { 1186 Idx i; 1226 1187 unsigned int context; 1227 #ifdef DEBUG 1228 assert (state->halt); 1229 #endif 1188 DEBUG_ASSERT (state->halt); 1230 1189 context = re_string_context_at (&mctx->input, idx, mctx->eflags); 1231 1190 for (i = 0; i < state->nodes.nelem; ++i) … … 1237 1196 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1238 1197 corresponding to the DFA). 1239 Return the destination node, and update EPS_VIA_NODES , return -1 in case1240 of errors. */1241 1242 static int1243 internal_function 1244 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,1245 int *pidx, intnode, re_node_set *eps_via_nodes,1198 Return the destination node, and update EPS_VIA_NODES; 1199 return -1 on match failure, -2 on error. */ 1200 1201 static Idx 1202 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, 1203 regmatch_t *prevregs, 1204 Idx *pidx, Idx node, re_node_set *eps_via_nodes, 1246 1205 struct re_fail_stack_t *fs) 1247 1206 { 1248 1207 const re_dfa_t *const dfa = mctx->dfa; 1249 int i, err;1250 1208 if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1251 1209 { 1252 1210 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1253 1211 re_node_set *edests = &dfa->edests[node]; 1254 int dest_node; 1255 err = re_node_set_insert (eps_via_nodes, node); 1256 if (BE (err < 0, 0)) 1257 return -2; 1258 /* Pick up a valid destination, or return -1 if none is found. */ 1259 for (dest_node = -1, i = 0; i < edests->nelem; ++i) 1260 { 1261 int candidate = edests->elems[i]; 1212 1213 if (! re_node_set_contains (eps_via_nodes, node)) 1214 { 1215 bool ok = re_node_set_insert (eps_via_nodes, node); 1216 if (__glibc_unlikely (! ok)) 1217 return -2; 1218 } 1219 1220 /* Pick a valid destination, or return -1 if none is found. */ 1221 Idx dest_node = -1; 1222 for (Idx i = 0; i < edests->nelem; i++) 1223 { 1224 Idx candidate = edests->elems[i]; 1262 1225 if (!re_node_set_contains (cur_nodes, candidate)) 1263 1226 continue; … … 1265 1228 dest_node = candidate; 1266 1229 1267 1230 else 1268 1231 { 1269 1232 /* In order to avoid infinite loop like "(a*)*", return the second 1270 1233 epsilon-transition if the first was already considered. */ 1271 1234 if (re_node_set_contains (eps_via_nodes, dest_node)) 1272 1235 return candidate; 1273 1236 1274 1237 /* Otherwise, push the second epsilon-transition on the fail stack. */ 1275 1238 else if (fs != NULL 1276 1239 && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1277 1240 prevregs, eps_via_nodes)) 1278 1241 return -2; 1279 1242 … … 1286 1249 else 1287 1250 { 1288 intnaccepted = 0;1251 Idx naccepted = 0; 1289 1252 re_token_type_t type = dfa->nodes[node].type; 1290 1253 1291 #ifdef RE_ENABLE_I18N1292 1254 if (dfa->nodes[node].accept_mb) 1293 1255 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); 1294 else 1295 #endif /* RE_ENABLE_I18N */ 1296 if (type == OP_BACK_REF) 1297 { 1298 int subexp_idx = dfa->nodes[node].opr.idx + 1; 1299 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1256 else if (type == OP_BACK_REF) 1257 { 1258 Idx subexp_idx = dfa->nodes[node].opr.idx + 1; 1259 if (subexp_idx < nregs) 1260 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1300 1261 if (fs != NULL) 1301 1262 { 1302 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) 1263 if (subexp_idx >= nregs 1264 || regs[subexp_idx].rm_so == -1 1265 || regs[subexp_idx].rm_eo == -1) 1303 1266 return -1; 1304 1267 else if (naccepted) 1305 1268 { 1306 1269 char *buf = (char *) re_string_get_buffer (&mctx->input); 1307 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1308 naccepted) != 0) 1270 if (mctx->input.valid_len - *pidx < naccepted 1271 || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1272 naccepted) 1273 != 0)) 1309 1274 return -1; 1310 1275 } … … 1313 1278 if (naccepted == 0) 1314 1279 { 1315 intdest_node;1316 err= re_node_set_insert (eps_via_nodes, node);1317 if ( BE (err < 0, 0))1280 Idx dest_node; 1281 bool ok = re_node_set_insert (eps_via_nodes, node); 1282 if (__glibc_unlikely (! ok)) 1318 1283 return -2; 1319 1284 dest_node = dfa->edests[node].elems[0]; … … 1327 1292 || check_node_accept (mctx, dfa->nodes + node, *pidx)) 1328 1293 { 1329 intdest_node = dfa->nexts[node];1294 Idx dest_node = dfa->nexts[node]; 1330 1295 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; 1331 1296 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL … … 1341 1306 1342 1307 static reg_errcode_t 1343 internal_function 1344 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, 1345 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1308 __attribute_warn_unused_result__ 1309 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, 1310 Idx nregs, regmatch_t *regs, regmatch_t *prevregs, 1311 re_node_set *eps_via_nodes) 1346 1312 { 1347 1313 reg_errcode_t err; 1348 int num = fs->num++;1349 if ( fs->num == fs->alloc)1314 Idx num = fs->num; 1315 if (num == fs->alloc) 1350 1316 { 1351 1317 struct re_fail_stack_ent_t *new_array; 1352 new_array = re alloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)1353 * fs->alloc * 2));1318 new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, 1319 fs->alloc * 2); 1354 1320 if (new_array == NULL) 1355 1321 return REG_ESPACE; … … 1359 1325 fs->stack[num].idx = str_idx; 1360 1326 fs->stack[num].node = dest_node; 1361 fs->stack[num].regs = re_malloc (regmatch_t, nregs);1327 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); 1362 1328 if (fs->stack[num].regs == NULL) 1363 1329 return REG_ESPACE; 1330 fs->num = num + 1; 1364 1331 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1332 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); 1365 1333 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1366 1334 return err; 1367 1335 } 1368 1336 1369 static int 1370 internal_function 1371 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, 1372 regmatch_t *regs, re_node_set *eps_via_nodes) 1373 { 1374 int num = --fs->num; 1375 assert (num >= 0); 1337 static Idx 1338 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, 1339 regmatch_t *regs, regmatch_t *prevregs, 1340 re_node_set *eps_via_nodes) 1341 { 1342 if (fs == NULL || fs->num == 0) 1343 return -1; 1344 Idx num = --fs->num; 1376 1345 *pidx = fs->stack[num].idx; 1377 1346 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1347 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs); 1378 1348 re_node_set_free (eps_via_nodes); 1379 1349 re_free (fs->stack[num].regs); 1380 1350 *eps_via_nodes = fs->stack[num].eps_via_nodes; 1351 DEBUG_ASSERT (0 <= fs->stack[num].node); 1381 1352 return fs->stack[num].node; 1382 1353 } 1354 1355 1356 #define DYNARRAY_STRUCT regmatch_list 1357 #define DYNARRAY_ELEMENT regmatch_t 1358 #define DYNARRAY_PREFIX regmatch_list_ 1359 #include <malloc/dynarray-skeleton.c> 1383 1360 1384 1361 /* Set the positions where the subexpressions are starts/ends to registers … … 1388 1365 1389 1366 static reg_errcode_t 1390 internal_function 1367 __attribute_warn_unused_result__ 1391 1368 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, 1392 regmatch_t *pmatch, intfl_backtrack)1393 { 1394 const re_dfa_t *dfa = (const re_dfa_t *)preg->buffer;1395 intidx, cur_node;1369 regmatch_t *pmatch, bool fl_backtrack) 1370 { 1371 const re_dfa_t *dfa = preg->buffer; 1372 Idx idx, cur_node; 1396 1373 re_node_set eps_via_nodes; 1397 1374 struct re_fail_stack_t *fs; 1398 1375 struct re_fail_stack_t fs_body = { 0, 2, NULL }; 1399 regmatch_t *prev_idx_match; 1400 int prev_idx_match_malloced = 0; 1401 1402 #ifdef DEBUG 1403 assert (nmatch > 1); 1404 assert (mctx->state_log != NULL); 1405 #endif 1376 struct regmatch_list prev_match; 1377 regmatch_list_init (&prev_match); 1378 1379 DEBUG_ASSERT (nmatch > 1); 1380 DEBUG_ASSERT (mctx->state_log != NULL); 1406 1381 if (fl_backtrack) 1407 1382 { … … 1417 1392 re_node_set_init_empty (&eps_via_nodes); 1418 1393 1419 if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) 1420 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); 1421 else 1422 { 1423 prev_idx_match = re_malloc (regmatch_t, nmatch); 1424 if (prev_idx_match == NULL) 1425 { 1426 free_fail_stack_return (fs); 1427 return REG_ESPACE; 1428 } 1429 prev_idx_match_malloced = 1; 1430 } 1394 if (!regmatch_list_resize (&prev_match, nmatch)) 1395 { 1396 regmatch_list_free (&prev_match); 1397 free_fail_stack_return (fs); 1398 return REG_ESPACE; 1399 } 1400 regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match); 1431 1401 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1432 1402 … … 1435 1405 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1436 1406 1437 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1438 { 1439 int reg_idx; 1407 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1408 || (fs && re_node_set_contains (&eps_via_nodes, cur_node))) 1409 { 1410 Idx reg_idx; 1411 cur_node = -1; 1440 1412 if (fs) 1441 1413 { 1442 1414 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1443 1415 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1444 break; 1445 if (reg_idx == nmatch) 1446 { 1447 re_node_set_free (&eps_via_nodes); 1448 if (prev_idx_match_malloced) 1449 re_free (prev_idx_match); 1450 return free_fail_stack_return (fs); 1451 } 1452 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1453 &eps_via_nodes); 1454 } 1455 else 1416 { 1417 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1418 prev_idx_match, &eps_via_nodes); 1419 break; 1420 } 1421 } 1422 if (cur_node < 0) 1456 1423 { 1457 1424 re_node_set_free (&eps_via_nodes); 1458 if (prev_idx_match_malloced) 1459 re_free (prev_idx_match); 1460 return REG_NOERROR; 1425 regmatch_list_free (&prev_match); 1426 return free_fail_stack_return (fs); 1461 1427 } 1462 1428 } 1463 1429 1464 1430 /* Proceed to next node. */ 1465 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1431 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match, 1432 &idx, cur_node, 1466 1433 &eps_via_nodes, fs); 1467 1434 1468 if ( BE (cur_node < 0,0))1469 { 1470 if ( BE (cur_node == -2, 0))1435 if (__glibc_unlikely (cur_node < 0)) 1436 { 1437 if (__glibc_unlikely (cur_node == -2)) 1471 1438 { 1472 1439 re_node_set_free (&eps_via_nodes); 1473 if (prev_idx_match_malloced) 1474 re_free (prev_idx_match); 1440 regmatch_list_free (&prev_match); 1475 1441 free_fail_stack_return (fs); 1476 1442 return REG_ESPACE; 1477 1443 } 1478 if (fs) 1479 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1480 &eps_via_nodes); 1481 else 1444 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1445 prev_idx_match, &eps_via_nodes); 1446 if (cur_node < 0) 1482 1447 { 1483 1448 re_node_set_free (&eps_via_nodes); 1484 if (prev_idx_match_malloced)1485 re_free (prev_idx_match);1449 regmatch_list_free (&prev_match); 1450 free_fail_stack_return (fs); 1486 1451 return REG_NOMATCH; 1487 1452 } … … 1489 1454 } 1490 1455 re_node_set_free (&eps_via_nodes); 1491 if (prev_idx_match_malloced) 1492 re_free (prev_idx_match); 1456 regmatch_list_free (&prev_match); 1493 1457 return free_fail_stack_return (fs); 1494 1458 } 1495 1459 1496 1460 static reg_errcode_t 1497 internal_function1498 1461 free_fail_stack_return (struct re_fail_stack_t *fs) 1499 1462 { 1500 1463 if (fs) 1501 1464 { 1502 intfs_idx;1465 Idx fs_idx; 1503 1466 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) 1504 1467 { … … 1512 1475 1513 1476 static void 1514 internal_function1515 1477 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 1516 regmatch_t *prev_idx_match, int cur_node, int cur_idx, intnmatch)1478 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) 1517 1479 { 1518 1480 int type = dfa->nodes[cur_node].type; 1519 1481 if (type == OP_OPEN_SUBEXP) 1520 1482 { 1521 intreg_num = dfa->nodes[cur_node].opr.idx + 1;1483 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1522 1484 1523 1485 /* We are at the first node of this sub expression. */ … … 1530 1492 else if (type == OP_CLOSE_SUBEXP) 1531 1493 { 1532 int reg_num = dfa->nodes[cur_node].opr.idx + 1; 1494 /* We are at the last node of this sub expression. */ 1495 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1533 1496 if (reg_num < nmatch) 1534 1497 { 1535 /* We are at the last node of this sub expression. */1536 1498 if (pmatch[reg_num].rm_so < cur_idx) 1537 1499 { … … 1564 1526 Updated state_log will be wrote to STATE_LOG. 1565 1527 1566 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...1528 Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1567 1529 1. When STR_IDX == MATCH_LAST(the last index in the state_log): 1568 If `a' isn't the LAST_NODE and `a' can't epsilon transit to1569 the LAST_NODE, we throw away the node `a'.1570 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts1571 string `s' and transit to `b':1530 If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to 1531 the LAST_NODE, we throw away the node 'a'. 1532 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts 1533 string 's' and transit to 'b': 1572 1534 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw 1573 away the node `a'.1535 away the node 'a'. 1574 1536 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is 1575 thrown away, we throw away the node `a'.1537 thrown away, we throw away the node 'a'. 1576 1538 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': 1577 1539 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the 1578 node `a'.1540 node 'a'. 1579 1541 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, 1580 we throw away the node `a'. */1542 we throw away the node 'a'. */ 1581 1543 1582 1544 #define STATE_NODE_CONTAINS(state,node) \ … … 1584 1546 1585 1547 static reg_errcode_t 1586 internal_function1587 1548 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) 1588 1549 { 1589 1550 reg_errcode_t err; 1590 1551 int null_cnt = 0; 1591 intstr_idx = sctx->last_str_idx;1552 Idx str_idx = sctx->last_str_idx; 1592 1553 re_node_set cur_dest; 1593 1554 1594 #ifdef DEBUG 1595 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1596 #endif 1555 DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1597 1556 1598 1557 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon 1599 1558 transit to the last_node and the last_node itself. */ 1600 1559 err = re_node_set_init_1 (&cur_dest, sctx->last_node); 1601 if ( BE (err != REG_NOERROR, 0))1560 if (__glibc_unlikely (err != REG_NOERROR)) 1602 1561 return err; 1603 1562 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1604 if ( BE (err != REG_NOERROR, 0))1563 if (__glibc_unlikely (err != REG_NOERROR)) 1605 1564 goto free_return; 1606 1565 … … 1623 1582 { 1624 1583 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); 1625 if (BE (err != REG_NOERROR, 0))1584 if (__glibc_unlikely (err != REG_NOERROR)) 1626 1585 goto free_return; 1627 1586 } … … 1632 1591 And update state_log. */ 1633 1592 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1634 if ( BE (err != REG_NOERROR, 0))1593 if (__glibc_unlikely (err != REG_NOERROR)) 1635 1594 goto free_return; 1636 1595 } … … 1642 1601 1643 1602 static reg_errcode_t 1644 internal_function 1603 __attribute_warn_unused_result__ 1645 1604 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, 1646 intstr_idx, re_node_set *cur_dest)1605 Idx str_idx, re_node_set *cur_dest) 1647 1606 { 1648 1607 const re_dfa_t *const dfa = mctx->dfa; 1649 1608 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; 1650 inti;1609 Idx i; 1651 1610 1652 1611 /* Then build the next sifted state. 1653 We build the next sifted state on `cur_dest', and update1654 `sifted_states[str_idx]' with `cur_dest'.1612 We build the next sifted state on 'cur_dest', and update 1613 'sifted_states[str_idx]' with 'cur_dest'. 1655 1614 Note: 1656 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.1657 `cur_src' points the node_set of the old `state_log[str_idx]'1615 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. 1616 'cur_src' points the node_set of the old 'state_log[str_idx]' 1658 1617 (with the epsilon nodes pre-filtered out). */ 1659 1618 for (i = 0; i < cur_src->nelem; i++) 1660 1619 { 1661 intprev_node = cur_src->elems[i];1620 Idx prev_node = cur_src->elems[i]; 1662 1621 int naccepted = 0; 1663 int ret; 1664 1665 #ifdef DEBUG 1666 re_token_type_t type = dfa->nodes[prev_node].type; 1667 assert (!IS_EPSILON_NODE (type)); 1668 #endif 1669 #ifdef RE_ENABLE_I18N 1670 /* If the node may accept `multi byte'. */ 1622 bool ok; 1623 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type)); 1624 1625 /* If the node may accept "multi byte". */ 1671 1626 if (dfa->nodes[prev_node].accept_mb) 1672 1627 naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1673 1628 str_idx, sctx->last_str_idx); 1674 #endif /* RE_ENABLE_I18N */1675 1629 1676 1630 /* We don't check backreferences here. … … 1687 1641 if (sctx->limits.nelem) 1688 1642 { 1689 intto_idx = str_idx + naccepted;1643 Idx to_idx = str_idx + naccepted; 1690 1644 if (check_dst_limits (mctx, &sctx->limits, 1691 1645 dfa->nexts[prev_node], to_idx, … … 1693 1647 continue; 1694 1648 } 1695 ret= re_node_set_insert (cur_dest, prev_node);1696 if ( BE (ret == -1, 0))1649 ok = re_node_set_insert (cur_dest, prev_node); 1650 if (__glibc_unlikely (! ok)) 1697 1651 return REG_ESPACE; 1698 1652 } … … 1704 1658 1705 1659 static reg_errcode_t 1706 internal_function 1707 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) 1708 { 1709 int top = mctx->state_log_top; 1710 1711 if (next_state_log_idx >= mctx->input.bufs_len1660 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) 1661 { 1662 Idx top = mctx->state_log_top; 1663 1664 if ((next_state_log_idx >= mctx->input.bufs_len 1665 && mctx->input.bufs_len < mctx->input.len) 1712 1666 || (next_state_log_idx >= mctx->input.valid_len 1713 1667 && mctx->input.valid_len < mctx->input.len)) 1714 1668 { 1715 1669 reg_errcode_t err; 1716 err = extend_buffers (mctx );1717 if ( BE (err != REG_NOERROR, 0))1670 err = extend_buffers (mctx, next_state_log_idx + 1); 1671 if (__glibc_unlikely (err != REG_NOERROR)) 1718 1672 return err; 1719 1673 } … … 1721 1675 if (top < next_state_log_idx) 1722 1676 { 1677 DEBUG_ASSERT (mctx->state_log != NULL); 1723 1678 memset (mctx->state_log + top + 1, '\0', 1724 1679 sizeof (re_dfastate_t *) * (next_state_log_idx - top)); … … 1729 1684 1730 1685 static reg_errcode_t 1731 internal_function1732 1686 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, 1733 re_dfastate_t **src, intnum)1734 { 1735 intst_idx;1687 re_dfastate_t **src, Idx num) 1688 { 1689 Idx st_idx; 1736 1690 reg_errcode_t err; 1737 1691 for (st_idx = 0; st_idx < num; ++st_idx) … … 1744 1698 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, 1745 1699 &src[st_idx]->nodes); 1746 if ( BE (err != REG_NOERROR, 0))1700 if (__glibc_unlikely (err != REG_NOERROR)) 1747 1701 return err; 1748 1702 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); 1749 1703 re_node_set_free (&merged_set); 1750 if ( BE (err != REG_NOERROR, 0))1704 if (__glibc_unlikely (err != REG_NOERROR)) 1751 1705 return err; 1752 1706 } … … 1756 1710 1757 1711 static reg_errcode_t 1758 internal_function1759 1712 update_cur_sifted_state (const re_match_context_t *mctx, 1760 re_sift_context_t *sctx, intstr_idx,1713 re_sift_context_t *sctx, Idx str_idx, 1761 1714 re_node_set *dest_nodes) 1762 1715 { … … 1776 1729 DEST_NODE. */ 1777 1730 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); 1778 if ( BE (err != REG_NOERROR, 0))1731 if (__glibc_unlikely (err != REG_NOERROR)) 1779 1732 return err; 1780 1733 … … 1784 1737 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, 1785 1738 mctx->bkref_ents, str_idx); 1786 if ( BE (err != REG_NOERROR, 0))1739 if (__glibc_unlikely (err != REG_NOERROR)) 1787 1740 return err; 1788 1741 } … … 1790 1743 1791 1744 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); 1792 if ( BE (err != REG_NOERROR, 0))1745 if (__glibc_unlikely (err != REG_NOERROR)) 1793 1746 return err; 1794 1747 } … … 1797 1750 { 1798 1751 err = sift_states_bkref (mctx, sctx, str_idx, candidates); 1799 if ( BE (err != REG_NOERROR, 0))1752 if (__glibc_unlikely (err != REG_NOERROR)) 1800 1753 return err; 1801 1754 } … … 1804 1757 1805 1758 static reg_errcode_t 1806 internal_function 1759 __attribute_warn_unused_result__ 1807 1760 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, 1808 1761 const re_node_set *candidates) 1809 1762 { 1810 1763 reg_errcode_t err = REG_NOERROR; 1811 inti;1764 Idx i; 1812 1765 1813 1766 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); 1814 if ( BE (err != REG_NOERROR, 0))1767 if (__glibc_unlikely (err != REG_NOERROR)) 1815 1768 return err; 1816 1769 … … 1818 1771 { 1819 1772 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); 1820 if ( BE (err != REG_NOERROR, 0))1821 1773 if (__glibc_unlikely (err != REG_NOERROR)) 1774 return REG_ESPACE; 1822 1775 for (i = 0; i < dest_nodes->nelem; i++) 1823 re_node_set_merge (&state->inveclosure, 1824 dfa->inveclosures + dest_nodes->elems[i]); 1776 { 1777 err = re_node_set_merge (&state->inveclosure, 1778 dfa->inveclosures + dest_nodes->elems[i]); 1779 if (__glibc_unlikely (err != REG_NOERROR)) 1780 return REG_ESPACE; 1781 } 1825 1782 } 1826 1783 return re_node_set_add_intersect (dest_nodes, candidates, … … 1829 1786 1830 1787 static reg_errcode_t 1831 internal_function 1832 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, 1788 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, 1833 1789 const re_node_set *candidates) 1834 1790 { 1835 intecl_idx;1791 Idx ecl_idx; 1836 1792 reg_errcode_t err; 1837 1793 re_node_set *inv_eclosure = dfa->inveclosures + node; … … 1840 1796 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1841 1797 { 1842 intcur_node = inv_eclosure->elems[ecl_idx];1798 Idx cur_node = inv_eclosure->elems[ecl_idx]; 1843 1799 if (cur_node == node) 1844 1800 continue; 1845 1801 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) 1846 1802 { 1847 intedst1 = dfa->edests[cur_node].elems[0];1848 intedst2 = ((dfa->edests[cur_node].nelem > 1)1803 Idx edst1 = dfa->edests[cur_node].elems[0]; 1804 Idx edst2 = ((dfa->edests[cur_node].nelem > 1) 1849 1805 ? dfa->edests[cur_node].elems[1] : -1); 1850 1806 if ((!re_node_set_contains (inv_eclosure, edst1) … … 1856 1812 err = re_node_set_add_intersect (&except_nodes, candidates, 1857 1813 dfa->inveclosures + cur_node); 1858 if ( BE (err != REG_NOERROR, 0))1814 if (__glibc_unlikely (err != REG_NOERROR)) 1859 1815 { 1860 1816 re_node_set_free (&except_nodes); … … 1866 1822 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1867 1823 { 1868 intcur_node = inv_eclosure->elems[ecl_idx];1824 Idx cur_node = inv_eclosure->elems[ecl_idx]; 1869 1825 if (!re_node_set_contains (&except_nodes, cur_node)) 1870 1826 { 1871 intidx = re_node_set_contains (dest_nodes, cur_node) - 1;1827 Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; 1872 1828 re_node_set_remove_at (dest_nodes, idx); 1873 1829 } … … 1877 1833 } 1878 1834 1879 static int 1880 internal_function 1881 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, 1882 int dst_node, int dst_idx, int src_node, int src_idx) 1835 static bool 1836 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, 1837 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) 1883 1838 { 1884 1839 const re_dfa_t *const dfa = mctx->dfa; 1885 intlim_idx, src_pos, dst_pos;1886 1887 intdst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);1888 intsrc_bkref_idx = search_cur_bkref_entry (mctx, src_idx);1840 Idx lim_idx, src_pos, dst_pos; 1841 1842 Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); 1843 Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); 1889 1844 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 1890 1845 { 1891 intsubexp_idx;1846 Idx subexp_idx; 1892 1847 struct re_backref_cache_entry *ent; 1893 1848 ent = mctx->bkref_ents + limits->elems[lim_idx]; … … 1908 1863 continue; /* This is unrelated limitation. */ 1909 1864 else 1910 return 1;1911 } 1912 return 0;1865 return true; 1866 } 1867 return false; 1913 1868 } 1914 1869 1915 1870 static int 1916 internal_function1917 1871 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, 1918 int subexp_idx, int from_node, intbkref_idx)1872 Idx subexp_idx, Idx from_node, Idx bkref_idx) 1919 1873 { 1920 1874 const re_dfa_t *const dfa = mctx->dfa; 1921 1875 const re_node_set *eclosures = dfa->eclosures + from_node; 1922 intnode_idx;1876 Idx node_idx; 1923 1877 1924 1878 /* Else, we are on the boundary: examine the nodes on the epsilon … … 1926 1880 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) 1927 1881 { 1928 intnode = eclosures->elems[node_idx];1882 Idx node = eclosures->elems[node_idx]; 1929 1883 switch (dfa->nodes[node].type) 1930 1884 { … … 1934 1888 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; 1935 1889 do 1936 { 1937 int dst, cpos; 1890 { 1891 Idx dst; 1892 int cpos; 1938 1893 1939 1894 if (ent->node != node) … … 1955 1910 { 1956 1911 if (boundaries & 1) 1957 1912 return -1; 1958 1913 else /* if (boundaries & 2) */ 1959 1914 return 0; 1960 1915 } 1961 1916 … … 1971 1926 ent->eps_reachable_subexps_map 1972 1927 &= ~((bitset_word_t) 1 << subexp_idx); 1973 1928 } 1974 1929 while (ent++->more); 1975 1930 } … … 1995 1950 1996 1951 static int 1997 internal_function 1998 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, 1999 int subexp_idx, int from_node, int str_idx, 2000 int bkref_idx) 1952 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, 1953 Idx subexp_idx, Idx from_node, Idx str_idx, 1954 Idx bkref_idx) 2001 1955 { 2002 1956 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; … … 2025 1979 2026 1980 static reg_errcode_t 2027 internal_function2028 1981 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, 2029 1982 const re_node_set *candidates, re_node_set *limits, 2030 struct re_backref_cache_entry *bkref_ents, intstr_idx)1983 struct re_backref_cache_entry *bkref_ents, Idx str_idx) 2031 1984 { 2032 1985 reg_errcode_t err; 2033 intnode_idx, lim_idx;1986 Idx node_idx, lim_idx; 2034 1987 2035 1988 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 2036 1989 { 2037 intsubexp_idx;1990 Idx subexp_idx; 2038 1991 struct re_backref_cache_entry *ent; 2039 1992 ent = bkref_ents + limits->elems[lim_idx]; … … 2045 1998 if (ent->subexp_to == str_idx) 2046 1999 { 2047 intops_node = -1;2048 intcls_node = -1;2000 Idx ops_node = -1; 2001 Idx cls_node = -1; 2049 2002 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2050 2003 { 2051 intnode = dest_nodes->elems[node_idx];2004 Idx node = dest_nodes->elems[node_idx]; 2052 2005 re_token_type_t type = dfa->nodes[node].type; 2053 2006 if (type == OP_OPEN_SUBEXP … … 2065 2018 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, 2066 2019 candidates); 2067 if ( BE (err != REG_NOERROR, 0))2020 if (__glibc_unlikely (err != REG_NOERROR)) 2068 2021 return err; 2069 2022 } … … 2073 2026 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2074 2027 { 2075 intnode = dest_nodes->elems[node_idx];2028 Idx node = dest_nodes->elems[node_idx]; 2076 2029 if (!re_node_set_contains (dfa->inveclosures + node, 2077 2030 cls_node) … … 2083 2036 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2084 2037 candidates); 2085 if ( BE (err != REG_NOERROR, 0))2038 if (__glibc_unlikely (err != REG_NOERROR)) 2086 2039 return err; 2087 2040 --node_idx; … … 2093 2046 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2094 2047 { 2095 intnode = dest_nodes->elems[node_idx];2048 Idx node = dest_nodes->elems[node_idx]; 2096 2049 re_token_type_t type = dfa->nodes[node].type; 2097 2050 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) … … 2103 2056 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2104 2057 candidates); 2105 if ( BE (err != REG_NOERROR, 0))2058 if (__glibc_unlikely (err != REG_NOERROR)) 2106 2059 return err; 2107 2060 } … … 2113 2066 2114 2067 static reg_errcode_t 2115 internal_function 2068 __attribute_warn_unused_result__ 2116 2069 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, 2117 intstr_idx, const re_node_set *candidates)2070 Idx str_idx, const re_node_set *candidates) 2118 2071 { 2119 2072 const re_dfa_t *const dfa = mctx->dfa; 2120 2073 reg_errcode_t err; 2121 intnode_idx, node;2074 Idx node_idx, node; 2122 2075 re_sift_context_t local_sctx; 2123 intfirst_idx = search_cur_bkref_entry (mctx, str_idx);2076 Idx first_idx = search_cur_bkref_entry (mctx, str_idx); 2124 2077 2125 2078 if (first_idx == -1) … … 2130 2083 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) 2131 2084 { 2132 intenabled_idx;2085 Idx enabled_idx; 2133 2086 re_token_type_t type; 2134 2087 struct re_backref_cache_entry *entry; … … 2145 2098 do 2146 2099 { 2147 intsubexp_len;2148 intto_idx;2149 intdst_node;2150 int ret;2100 Idx subexp_len; 2101 Idx to_idx; 2102 Idx dst_node; 2103 bool ok; 2151 2104 re_dfastate_t *cur_state; 2152 2105 … … 2169 2122 local_sctx = *sctx; 2170 2123 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); 2171 if ( BE (err != REG_NOERROR, 0))2124 if (__glibc_unlikely (err != REG_NOERROR)) 2172 2125 goto free_return; 2173 2126 } 2174 2127 local_sctx.last_node = node; 2175 2128 local_sctx.last_str_idx = str_idx; 2176 ret= re_node_set_insert (&local_sctx.limits, enabled_idx);2177 if ( BE (ret < 0, 0))2129 ok = re_node_set_insert (&local_sctx.limits, enabled_idx); 2130 if (__glibc_unlikely (! ok)) 2178 2131 { 2179 2132 err = REG_ESPACE; … … 2182 2135 cur_state = local_sctx.sifted_states[str_idx]; 2183 2136 err = sift_states_backward (mctx, &local_sctx); 2184 if ( BE (err != REG_NOERROR, 0))2137 if (__glibc_unlikely (err != REG_NOERROR)) 2185 2138 goto free_return; 2186 2139 if (sctx->limited_states != NULL) … … 2189 2142 local_sctx.sifted_states, 2190 2143 str_idx + 1); 2191 if ( BE (err != REG_NOERROR, 0))2144 if (__glibc_unlikely (err != REG_NOERROR)) 2192 2145 goto free_return; 2193 2146 } … … 2196 2149 2197 2150 /* mctx->bkref_ents may have changed, reload the pointer. */ 2198 2151 entry = mctx->bkref_ents + enabled_idx; 2199 2152 } 2200 2153 while (enabled_idx++, entry++->more); … … 2211 2164 2212 2165 2213 #ifdef RE_ENABLE_I18N2214 2166 static int 2215 internal_function2216 2167 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, 2217 int node_idx, int str_idx, intmax_str_idx)2168 Idx node_idx, Idx str_idx, Idx max_str_idx) 2218 2169 { 2219 2170 const re_dfa_t *const dfa = mctx->dfa; 2220 2171 int naccepted; 2221 /* Check the node can accept `multi byte'. */2172 /* Check the node can accept "multi byte". */ 2222 2173 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2223 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&2224 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],2225 dfa->nexts[node_idx]))2226 /* The node can't accept the `multi byte', or the2174 if (naccepted > 0 && str_idx + naccepted <= max_str_idx 2175 && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2176 dfa->nexts[node_idx])) 2177 /* The node can't accept the "multi byte", or the 2227 2178 destination was already thrown away, then the node 2228 could 't accept the current input `multi byte'. */2179 couldn't accept the current input "multi byte". */ 2229 2180 naccepted = 0; 2230 2181 /* Otherwise, it is sure that the node could accept 2231 `naccepted' bytes input. */2182 'naccepted' bytes input. */ 2232 2183 return naccepted; 2233 2184 } 2234 #endif /* RE_ENABLE_I18N */2235 2236 2185 2237 2186 … … 2240 2189 /* Return the next state to which the current state STATE will transit by 2241 2190 accepting the current input byte, and update STATE_LOG if necessary. 2191 Return NULL on failure. 2242 2192 If STATE can accept a multibyte char/collating element/back reference 2243 2193 update the destination of STATE_LOG. */ 2244 2194 2245 2195 static re_dfastate_t * 2246 internal_function 2196 __attribute_warn_unused_result__ 2247 2197 transit_state (reg_errcode_t *err, re_match_context_t *mctx, 2248 2198 re_dfastate_t *state) … … 2251 2201 unsigned char ch; 2252 2202 2253 #ifdef RE_ENABLE_I18N2254 2203 /* If the current state can accept multibyte. */ 2255 if ( BE (state->accept_mb, 0))2204 if (__glibc_unlikely (state->accept_mb)) 2256 2205 { 2257 2206 *err = transit_state_mb (mctx, state); 2258 if ( BE (*err != REG_NOERROR, 0))2207 if (__glibc_unlikely (*err != REG_NOERROR)) 2259 2208 return NULL; 2260 2209 } 2261 #endif /* RE_ENABLE_I18N */2262 2210 2263 2211 /* Then decide the next state with the single byte. */ … … 2273 2221 { 2274 2222 trtable = state->trtable; 2275 if ( BE (trtable != NULL, 1))2223 if (__glibc_likely (trtable != NULL)) 2276 2224 return trtable[ch]; 2277 2225 2278 2226 trtable = state->word_trtable; 2279 if ( BE (trtable != NULL, 1))2280 2227 if (__glibc_likely (trtable != NULL)) 2228 { 2281 2229 unsigned int context; 2282 2230 context … … 2301 2249 2302 2250 /* Update the state_log if we need */ 2303 re_dfastate_t * 2304 internal_function 2251 static re_dfastate_t * 2305 2252 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, 2306 2253 re_dfastate_t *next_state) 2307 2254 { 2308 2255 const re_dfa_t *const dfa = mctx->dfa; 2309 intcur_idx = re_string_cur_idx (&mctx->input);2256 Idx cur_idx = re_string_cur_idx (&mctx->input); 2310 2257 2311 2258 if (cur_idx > mctx->state_log_top) … … 2324 2271 re_node_set next_nodes, *log_nodes, *table_nodes = NULL; 2325 2272 /* If (state_log[cur_idx] != 0), it implies that cur_idx is 2326 2327 2328 2273 the destination of a multibyte char/collating element/ 2274 back reference. Then the next state is the union set of 2275 these destinations and the results of the transition table. */ 2329 2276 pstate = mctx->state_log[cur_idx]; 2330 2277 log_nodes = pstate->entrance_nodes; 2331 2278 if (next_state != NULL) 2332 2333 2334 2279 { 2280 table_nodes = next_state->entrance_nodes; 2281 *err = re_node_set_init_union (&next_nodes, table_nodes, 2335 2282 log_nodes); 2336 if (BE (*err != REG_NOERROR, 0))2283 if (__glibc_unlikely (*err != REG_NOERROR)) 2337 2284 return NULL; 2338 2285 } 2339 2286 else 2340 2287 next_nodes = *log_nodes; 2341 2288 /* Note: We already add the nodes of the initial state, 2342 2289 then we don't need to add them here. */ … … 2346 2293 mctx->eflags); 2347 2294 next_state = mctx->state_log[cur_idx] 2348 2295 = re_acquire_state_context (err, dfa, &next_nodes, context); 2349 2296 /* We don't need to check errors here, since the return value of 2350 2297 this function is next_state and ERR is already set. */ 2351 2298 2352 2299 if (table_nodes != NULL) 2353 2354 } 2355 2356 if ( BE (dfa->nbackref, 0) && next_state != NULL)2300 re_node_set_free (&next_nodes); 2301 } 2302 2303 if (__glibc_unlikely (dfa->nbackref) && next_state != NULL) 2357 2304 { 2358 2305 /* Check OP_OPEN_SUBEXP in the current state in case that we use them … … 2361 2308 *err = check_subexp_matching_top (mctx, &next_state->nodes, 2362 2309 cur_idx); 2363 if ( BE (*err != REG_NOERROR, 0))2310 if (__glibc_unlikely (*err != REG_NOERROR)) 2364 2311 return NULL; 2365 2312 … … 2368 2315 { 2369 2316 *err = transit_state_bkref (mctx, &next_state->nodes); 2370 if ( BE (*err != REG_NOERROR, 0))2317 if (__glibc_unlikely (*err != REG_NOERROR)) 2371 2318 return NULL; 2372 2319 next_state = mctx->state_log[cur_idx]; … … 2380 2327 multi-byte match, then look in the log for a state 2381 2328 from which to restart matching. */ 2382 re_dfastate_t * 2383 internal_function 2329 static re_dfastate_t * 2384 2330 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) 2385 2331 { … … 2387 2333 do 2388 2334 { 2389 intmax = mctx->state_log_top;2390 intcur_str_idx = re_string_cur_idx (&mctx->input);2335 Idx max = mctx->state_log_top; 2336 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2391 2337 2392 2338 do 2393 2339 { 2394 2395 2396 2340 if (++cur_str_idx > max) 2341 return NULL; 2342 re_string_skip_bytes (&mctx->input, 1); 2397 2343 } 2398 2344 while (mctx->state_log[cur_str_idx] == NULL); … … 2409 2355 OP_OPEN_SUBEXP and which have corresponding back references in the regular 2410 2356 expression. And register them to use them later for evaluating the 2411 correspo ding back references. */2357 corresponding back references. */ 2412 2358 2413 2359 static reg_errcode_t 2414 internal_function2415 2360 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, 2416 intstr_idx)2361 Idx str_idx) 2417 2362 { 2418 2363 const re_dfa_t *const dfa = mctx->dfa; 2419 intnode_idx;2364 Idx node_idx; 2420 2365 reg_errcode_t err; 2421 2366 … … 2427 2372 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) 2428 2373 { 2429 intnode = cur_nodes->elems[node_idx];2374 Idx node = cur_nodes->elems[node_idx]; 2430 2375 if (dfa->nodes[node].type == OP_OPEN_SUBEXP 2431 2376 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS … … 2434 2379 { 2435 2380 err = match_ctx_add_subtop (mctx, node, str_idx); 2436 if ( BE (err != REG_NOERROR, 0))2381 if (__glibc_unlikely (err != REG_NOERROR)) 2437 2382 return err; 2438 2383 } … … 2443 2388 #if 0 2444 2389 /* Return the next state to which the current state STATE will transit by 2445 accepting the current input byte. */2390 accepting the current input byte. Return NULL on failure. */ 2446 2391 2447 2392 static re_dfastate_t * … … 2452 2397 re_node_set next_nodes; 2453 2398 re_dfastate_t *next_state; 2454 intnode_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);2399 Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); 2455 2400 unsigned int context; 2456 2401 2457 2402 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); 2458 if ( BE (*err != REG_NOERROR, 0))2403 if (__glibc_unlikely (*err != REG_NOERROR)) 2459 2404 return NULL; 2460 2405 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) 2461 2406 { 2462 intcur_node = state->nodes.elems[node_cnt];2407 Idx cur_node = state->nodes.elems[node_cnt]; 2463 2408 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) 2464 2409 { 2465 2410 *err = re_node_set_merge (&next_nodes, 2466 2411 dfa->eclosures + dfa->nexts[cur_node]); 2467 if ( BE (*err != REG_NOERROR, 0))2412 if (__glibc_unlikely (*err != REG_NOERROR)) 2468 2413 { 2469 2414 re_node_set_free (&next_nodes); … … 2483 2428 #endif 2484 2429 2485 #ifdef RE_ENABLE_I18N2486 2430 static reg_errcode_t 2487 internal_function2488 2431 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) 2489 2432 { 2490 2433 const re_dfa_t *const dfa = mctx->dfa; 2491 2434 reg_errcode_t err; 2492 inti;2435 Idx i; 2493 2436 2494 2437 for (i = 0; i < pstate->nodes.nelem; ++i) 2495 2438 { 2496 2439 re_node_set dest_nodes, *new_nodes; 2497 int cur_node_idx = pstate->nodes.elems[i]; 2498 int naccepted, dest_idx; 2440 Idx cur_node_idx = pstate->nodes.elems[i]; 2441 int naccepted; 2442 Idx dest_idx; 2499 2443 unsigned int context; 2500 2444 re_dfastate_t *dest_state; 2501 2445 2502 2446 if (!dfa->nodes[cur_node_idx].accept_mb) 2503 2447 continue; 2504 2448 2505 2449 if (dfa->nodes[cur_node_idx].constraint) … … 2519 2463 continue; 2520 2464 2521 /* The node can accepts `naccepted' bytes. */2465 /* The node can accepts 'naccepted' bytes. */ 2522 2466 dest_idx = re_string_cur_idx (&mctx->input) + naccepted; 2523 2467 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2524 2468 : mctx->max_mb_elem_len); 2525 2469 err = clean_state_log_if_needed (mctx, dest_idx); 2526 if ( BE (err != REG_NOERROR, 0))2470 if (__glibc_unlikely (err != REG_NOERROR)) 2527 2471 return err; 2528 #ifdef DEBUG 2529 assert (dfa->nexts[cur_node_idx] != -1); 2530 #endif 2472 DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1); 2531 2473 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; 2532 2474 … … 2538 2480 err = re_node_set_init_union (&dest_nodes, 2539 2481 dest_state->entrance_nodes, new_nodes); 2540 if ( BE (err != REG_NOERROR, 0))2482 if (__glibc_unlikely (err != REG_NOERROR)) 2541 2483 return err; 2542 2484 } … … 2547 2489 if (dest_state != NULL) 2548 2490 re_node_set_free (&dest_nodes); 2549 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) 2491 if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL 2492 && err != REG_NOERROR)) 2550 2493 return err; 2551 2494 } 2552 2495 return REG_NOERROR; 2553 2496 } 2554 #endif /* RE_ENABLE_I18N */2555 2497 2556 2498 static reg_errcode_t 2557 internal_function2558 2499 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) 2559 2500 { 2560 2501 const re_dfa_t *const dfa = mctx->dfa; 2561 2502 reg_errcode_t err; 2562 inti;2563 intcur_str_idx = re_string_cur_idx (&mctx->input);2503 Idx i; 2504 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2564 2505 2565 2506 for (i = 0; i < nodes->nelem; ++i) 2566 2507 { 2567 intdest_str_idx, prev_nelem, bkc_idx;2568 intnode_idx = nodes->elems[i];2508 Idx dest_str_idx, prev_nelem, bkc_idx; 2509 Idx node_idx = nodes->elems[i]; 2569 2510 unsigned int context; 2570 2511 const re_token_t *node = dfa->nodes + node_idx; 2571 2512 re_node_set *new_dest_nodes; 2572 2513 2573 /* Check whether `node' is a backreference or not. */2514 /* Check whether 'node' is a backreference or not. */ 2574 2515 if (node->type != OP_BACK_REF) 2575 2516 continue; … … 2583 2524 } 2584 2525 2585 /* `node' is a backreference.2526 /* 'node' is a backreference. 2586 2527 Check the substring which the substring matched. */ 2587 2528 bkc_idx = mctx->nbkref_ents; 2588 2529 err = get_subexp (mctx, node_idx, cur_str_idx); 2589 if ( BE (err != REG_NOERROR, 0))2530 if (__glibc_unlikely (err != REG_NOERROR)) 2590 2531 goto free_return; 2591 2532 2592 /* And add the epsilon closures (which is `new_dest_nodes') of2533 /* And add the epsilon closures (which is 'new_dest_nodes') of 2593 2534 the backreference to appropriate state_log. */ 2594 #ifdef DEBUG 2595 assert (dfa->nexts[node_idx] != -1); 2596 #endif 2535 DEBUG_ASSERT (dfa->nexts[node_idx] != -1); 2597 2536 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) 2598 2537 { 2599 intsubexp_len;2538 Idx subexp_len; 2600 2539 re_dfastate_t *dest_state; 2601 2540 struct re_backref_cache_entry *bkref_ent; … … 2614 2553 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 2615 2554 : mctx->state_log[cur_str_idx]->nodes.nelem); 2616 /* Add `new_dest_node' to state_log. */2555 /* Add 'new_dest_node' to state_log. */ 2617 2556 if (dest_state == NULL) 2618 2557 { … … 2620 2559 = re_acquire_state_context (&err, dfa, new_dest_nodes, 2621 2560 context); 2622 if ( BE(mctx->state_log[dest_str_idx] == NULL2623 && err != REG_NOERROR, 0))2561 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL 2562 && err != REG_NOERROR)) 2624 2563 goto free_return; 2625 2564 } … … 2630 2569 dest_state->entrance_nodes, 2631 2570 new_dest_nodes); 2632 if ( BE (err != REG_NOERROR, 0))2571 if (__glibc_unlikely (err != REG_NOERROR)) 2633 2572 { 2634 2573 re_node_set_free (&dest_nodes); … … 2638 2577 = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2639 2578 re_node_set_free (&dest_nodes); 2640 if ( BE(mctx->state_log[dest_str_idx] == NULL2641 && err != REG_NOERROR, 0))2579 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL 2580 && err != REG_NOERROR)) 2642 2581 goto free_return; 2643 2582 } … … 2649 2588 err = check_subexp_matching_top (mctx, new_dest_nodes, 2650 2589 cur_str_idx); 2651 if ( BE (err != REG_NOERROR, 0))2590 if (__glibc_unlikely (err != REG_NOERROR)) 2652 2591 goto free_return; 2653 2592 err = transit_state_bkref (mctx, new_dest_nodes); 2654 if ( BE (err != REG_NOERROR, 0))2593 if (__glibc_unlikely (err != REG_NOERROR)) 2655 2594 goto free_return; 2656 2595 } … … 2669 2608 2670 2609 static reg_errcode_t 2671 internal_function 2672 get_subexp (re_match_context_t *mctx, int bkref_node, intbkref_str_idx)2610 __attribute_warn_unused_result__ 2611 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) 2673 2612 { 2674 2613 const re_dfa_t *const dfa = mctx->dfa; 2675 intsubexp_num, sub_top_idx;2614 Idx subexp_num, sub_top_idx; 2676 2615 const char *buf = (const char *) re_string_get_buffer (&mctx->input); 2677 2616 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ 2678 intcache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);2617 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); 2679 2618 if (cache_idx != -1) 2680 2619 { … … 2682 2621 = mctx->bkref_ents + cache_idx; 2683 2622 do 2684 2623 if (entry->node == bkref_node) 2685 2624 return REG_NOERROR; /* We already checked it. */ 2686 2625 while (entry++->more); … … 2695 2634 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; 2696 2635 re_sub_match_last_t *sub_last; 2697 intsub_last_idx, sl_str, bkref_str_off;2636 Idx sub_last_idx, sl_str, bkref_str_off; 2698 2637 2699 2638 if (dfa->nodes[sub_top->node].opr.idx != subexp_num) … … 2706 2645 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) 2707 2646 { 2708 int sl_str_diff;2647 regoff_t sl_str_diff; 2709 2648 sub_last = sub_top->lasts[sub_last_idx]; 2710 2649 sl_str_diff = sub_last->str_idx - sl_str; … … 2713 2652 if (sl_str_diff > 0) 2714 2653 { 2715 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) 2654 if (__glibc_unlikely (bkref_str_off + sl_str_diff 2655 > mctx->input.valid_len)) 2716 2656 { 2717 2657 /* Not enough chars for a successful match. */ … … 2722 2662 bkref_str_off 2723 2663 + sl_str_diff); 2724 if ( BE (err != REG_NOERROR, 0))2664 if (__glibc_unlikely (err != REG_NOERROR)) 2725 2665 return err; 2726 2666 buf = (const char *) re_string_get_buffer (&mctx->input); … … 2741 2681 if (err == REG_NOMATCH) 2742 2682 continue; 2743 if ( BE (err != REG_NOERROR, 0))2683 if (__glibc_unlikely (err != REG_NOERROR)) 2744 2684 return err; 2745 2685 } … … 2752 2692 for (; sl_str <= bkref_str_idx; ++sl_str) 2753 2693 { 2754 int cls_node, sl_str_off; 2694 Idx cls_node; 2695 regoff_t sl_str_off; 2755 2696 const re_node_set *nodes; 2756 2697 sl_str_off = sl_str - sub_top->str_idx; … … 2759 2700 if (sl_str_off > 0) 2760 2701 { 2761 if ( BE (bkref_str_off >= mctx->input.valid_len, 0))2702 if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len)) 2762 2703 { 2763 2704 /* If we are at the end of the input, we cannot match. */ … … 2765 2706 break; 2766 2707 2767 err = extend_buffers (mctx );2768 if ( BE (err != REG_NOERROR, 0))2708 err = extend_buffers (mctx, bkref_str_off + 1); 2709 if (__glibc_unlikely (err != REG_NOERROR)) 2769 2710 return err; 2770 2711 … … 2797 2738 if (err == REG_NOMATCH) 2798 2739 continue; 2799 if ( BE (err != REG_NOERROR, 0))2740 if (__glibc_unlikely (err != REG_NOERROR)) 2800 2741 return err; 2801 2742 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); 2802 if ( BE (sub_last == NULL, 0))2743 if (__glibc_unlikely (sub_last == NULL)) 2803 2744 return REG_ESPACE; 2804 2745 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2805 2746 bkref_str_idx); 2747 buf = (const char *) re_string_get_buffer (&mctx->input); 2806 2748 if (err == REG_NOMATCH) 2807 2749 continue; 2750 if (__glibc_unlikely (err != REG_NOERROR)) 2751 return err; 2808 2752 } 2809 2753 } … … 2818 2762 2819 2763 static reg_errcode_t 2820 internal_function2821 2764 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, 2822 re_sub_match_last_t *sub_last, int bkref_node, intbkref_str)2765 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) 2823 2766 { 2824 2767 reg_errcode_t err; 2825 intto_idx;2768 Idx to_idx; 2826 2769 /* Can the subexpression arrive the back reference? */ 2827 2770 err = check_arrival (mctx, &sub_last->path, sub_last->node, … … 2832 2775 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, 2833 2776 sub_last->str_idx); 2834 if ( BE (err != REG_NOERROR, 0))2777 if (__glibc_unlikely (err != REG_NOERROR)) 2835 2778 return err; 2836 2779 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; … … 2846 2789 E.g. RE: (a){2} */ 2847 2790 2848 static int 2849 internal_function 2791 static Idx 2850 2792 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 2851 intsubexp_idx, int type)2852 { 2853 intcls_idx;2793 Idx subexp_idx, int type) 2794 { 2795 Idx cls_idx; 2854 2796 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) 2855 2797 { 2856 intcls_node = nodes->elems[cls_idx];2798 Idx cls_node = nodes->elems[cls_idx]; 2857 2799 const re_token_t *node = dfa->nodes + cls_node; 2858 2800 if (node->type == type … … 2866 2808 LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2867 2809 heavily reused. 2868 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2810 Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot, 2811 REG_ESPACE if memory is exhausted. */ 2869 2812 2870 2813 static reg_errcode_t 2871 internal_function 2872 check_arrival (re_match_context_t *mctx, state_array_t *path, inttop_node,2873 int top_str, int last_node, intlast_str, int type)2814 __attribute_warn_unused_result__ 2815 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, 2816 Idx top_str, Idx last_node, Idx last_str, int type) 2874 2817 { 2875 2818 const re_dfa_t *const dfa = mctx->dfa; 2876 2819 reg_errcode_t err = REG_NOERROR; 2877 intsubexp_num, backup_cur_idx, str_idx, null_cnt;2820 Idx subexp_num, backup_cur_idx, str_idx, null_cnt; 2878 2821 re_dfastate_t *cur_state = NULL; 2879 2822 re_node_set *cur_nodes, next_nodes; … … 2883 2826 subexp_num = dfa->nodes[top_node].opr.idx; 2884 2827 /* Extend the buffer if we need. */ 2885 if ( BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))2828 if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1)) 2886 2829 { 2887 2830 re_dfastate_t **new_array; 2888 int old_alloc = path->alloc; 2889 path->alloc += last_str + mctx->max_mb_elem_len + 1; 2890 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); 2891 if (BE (new_array == NULL, 0)) 2892 { 2893 path->alloc = old_alloc; 2894 return REG_ESPACE; 2895 } 2831 Idx old_alloc = path->alloc; 2832 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; 2833 Idx new_alloc; 2834 if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc)) 2835 return REG_ESPACE; 2836 new_alloc = old_alloc + incr_alloc; 2837 if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc)) 2838 return REG_ESPACE; 2839 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); 2840 if (__glibc_unlikely (new_array == NULL)) 2841 return REG_ESPACE; 2896 2842 path->array = new_array; 2843 path->alloc = new_alloc; 2897 2844 memset (new_array + old_alloc, '\0', 2898 2845 sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); 2899 2846 } 2900 2847 2901 #ifdef __GNUC__ /* silly buggers. */2902 str_idx = path->next_idx ?: top_str;2903 #else2904 2848 str_idx = path->next_idx ? path->next_idx : top_str; 2905 #endif2906 2849 2907 2850 /* Temporary modify MCTX. */ … … 2916 2859 { 2917 2860 err = re_node_set_init_1 (&next_nodes, top_node); 2918 if ( BE (err != REG_NOERROR, 0))2861 if (__glibc_unlikely (err != REG_NOERROR)) 2919 2862 return err; 2920 2863 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2921 if ( BE (err != REG_NOERROR, 0))2864 if (__glibc_unlikely (err != REG_NOERROR)) 2922 2865 { 2923 2866 re_node_set_free (&next_nodes); … … 2931 2874 { 2932 2875 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); 2933 if ( BE (err != REG_NOERROR, 0))2876 if (__glibc_unlikely (err != REG_NOERROR)) 2934 2877 return err; 2935 2878 } … … 2943 2886 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2944 2887 subexp_num, type); 2945 if ( BE (err != REG_NOERROR, 0))2888 if (__glibc_unlikely (err != REG_NOERROR)) 2946 2889 { 2947 2890 re_node_set_free (&next_nodes); … … 2950 2893 } 2951 2894 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2952 if ( BE (cur_state == NULL && err != REG_NOERROR, 0))2895 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) 2953 2896 { 2954 2897 re_node_set_free (&next_nodes); … … 2965 2908 err = re_node_set_merge (&next_nodes, 2966 2909 &mctx->state_log[str_idx + 1]->nodes); 2967 if ( BE (err != REG_NOERROR, 0))2910 if (__glibc_unlikely (err != REG_NOERROR)) 2968 2911 { 2969 2912 re_node_set_free (&next_nodes); … … 2976 2919 &cur_state->non_eps_nodes, 2977 2920 &next_nodes); 2978 if ( BE (err != REG_NOERROR, 0))2921 if (__glibc_unlikely (err != REG_NOERROR)) 2979 2922 { 2980 2923 re_node_set_free (&next_nodes); … … 2986 2929 { 2987 2930 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2988 if ( BE (err != REG_NOERROR, 0))2931 if (__glibc_unlikely (err != REG_NOERROR)) 2989 2932 { 2990 2933 re_node_set_free (&next_nodes); … … 2993 2936 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2994 2937 subexp_num, type); 2995 if ( BE (err != REG_NOERROR, 0))2938 if (__glibc_unlikely (err != REG_NOERROR)) 2996 2939 { 2997 2940 re_node_set_free (&next_nodes); … … 3001 2944 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 3002 2945 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 3003 if ( BE (cur_state == NULL && err != REG_NOERROR, 0))2946 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR)) 3004 2947 { 3005 2948 re_node_set_free (&next_nodes); … … 3034 2977 3035 2978 static reg_errcode_t 3036 internal_function 3037 check_arrival_add_next_nodes (re_match_context_t *mctx, intstr_idx,2979 __attribute_warn_unused_result__ 2980 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, 3038 2981 re_node_set *cur_nodes, re_node_set *next_nodes) 3039 2982 { 3040 2983 const re_dfa_t *const dfa = mctx->dfa; 3041 int result;3042 intcur_idx;2984 bool ok; 2985 Idx cur_idx; 3043 2986 reg_errcode_t err = REG_NOERROR; 3044 2987 re_node_set union_set; … … 3047 2990 { 3048 2991 int naccepted = 0; 3049 int cur_node = cur_nodes->elems[cur_idx]; 3050 #ifdef DEBUG 3051 re_token_type_t type = dfa->nodes[cur_node].type; 3052 assert (!IS_EPSILON_NODE (type)); 3053 #endif 3054 #ifdef RE_ENABLE_I18N 3055 /* If the node may accept `multi byte'. */ 2992 Idx cur_node = cur_nodes->elems[cur_idx]; 2993 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type)); 2994 2995 /* If the node may accept "multi byte". */ 3056 2996 if (dfa->nodes[cur_node].accept_mb) 3057 2997 { … … 3061 3001 { 3062 3002 re_dfastate_t *dest_state; 3063 intnext_node = dfa->nexts[cur_node];3064 intnext_idx = str_idx + naccepted;3003 Idx next_node = dfa->nexts[cur_node]; 3004 Idx next_idx = str_idx + naccepted; 3065 3005 dest_state = mctx->state_log[next_idx]; 3066 3006 re_node_set_empty (&union_set); … … 3068 3008 { 3069 3009 err = re_node_set_merge (&union_set, &dest_state->nodes); 3070 if ( BE (err != REG_NOERROR, 0))3010 if (__glibc_unlikely (err != REG_NOERROR)) 3071 3011 { 3072 3012 re_node_set_free (&union_set); … … 3074 3014 } 3075 3015 } 3076 result= re_node_set_insert (&union_set, next_node);3077 if ( BE (result < 0, 0))3016 ok = re_node_set_insert (&union_set, next_node); 3017 if (__glibc_unlikely (! ok)) 3078 3018 { 3079 3019 re_node_set_free (&union_set); … … 3082 3022 mctx->state_log[next_idx] = re_acquire_state (&err, dfa, 3083 3023 &union_set); 3084 if ( BE(mctx->state_log[next_idx] == NULL3085 && err != REG_NOERROR, 0))3024 if (__glibc_unlikely (mctx->state_log[next_idx] == NULL 3025 && err != REG_NOERROR)) 3086 3026 { 3087 3027 re_node_set_free (&union_set); … … 3090 3030 } 3091 3031 } 3092 #endif /* RE_ENABLE_I18N */ 3032 3093 3033 if (naccepted 3094 3034 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) 3095 3035 { 3096 result= re_node_set_insert (next_nodes, dfa->nexts[cur_node]);3097 if ( BE (result < 0, 0))3036 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); 3037 if (__glibc_unlikely (! ok)) 3098 3038 { 3099 3039 re_node_set_free (&union_set); … … 3113 3053 3114 3054 static reg_errcode_t 3115 internal_function3116 3055 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, 3117 intex_subexp, int type)3056 Idx ex_subexp, int type) 3118 3057 { 3119 3058 reg_errcode_t err; 3120 intidx, outside_node;3059 Idx idx, outside_node; 3121 3060 re_node_set new_nodes; 3122 #ifdef DEBUG 3123 assert (cur_nodes->nelem); 3124 #endif 3061 DEBUG_ASSERT (cur_nodes->nelem); 3125 3062 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); 3126 if ( BE (err != REG_NOERROR, 0))3063 if (__glibc_unlikely (err != REG_NOERROR)) 3127 3064 return err; 3128 3065 /* Create a new node set NEW_NODES with the nodes which are epsilon … … 3131 3068 for (idx = 0; idx < cur_nodes->nelem; ++idx) 3132 3069 { 3133 intcur_node = cur_nodes->elems[idx];3070 Idx cur_node = cur_nodes->elems[idx]; 3134 3071 const re_node_set *eclosure = dfa->eclosures + cur_node; 3135 3072 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); … … 3138 3075 /* There are no problematic nodes, just merge them. */ 3139 3076 err = re_node_set_merge (&new_nodes, eclosure); 3140 if ( BE (err != REG_NOERROR, 0))3077 if (__glibc_unlikely (err != REG_NOERROR)) 3141 3078 { 3142 3079 re_node_set_free (&new_nodes); … … 3149 3086 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, 3150 3087 ex_subexp, type); 3151 if ( BE (err != REG_NOERROR, 0))3088 if (__glibc_unlikely (err != REG_NOERROR)) 3152 3089 { 3153 3090 re_node_set_free (&new_nodes); … … 3166 3103 3167 3104 static reg_errcode_t 3168 internal_function 3105 __attribute_warn_unused_result__ 3169 3106 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, 3170 int target, intex_subexp, int type)3171 { 3172 intcur_node;3107 Idx target, Idx ex_subexp, int type) 3108 { 3109 Idx cur_node; 3173 3110 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) 3174 3111 { 3175 int err;3112 bool ok; 3176 3113 3177 3114 if (dfa->nodes[cur_node].type == type … … 3180 3117 if (type == OP_CLOSE_SUBEXP) 3181 3118 { 3182 err= re_node_set_insert (dst_nodes, cur_node);3183 if ( BE (err == -1, 0))3119 ok = re_node_set_insert (dst_nodes, cur_node); 3120 if (__glibc_unlikely (! ok)) 3184 3121 return REG_ESPACE; 3185 3122 } 3186 3123 break; 3187 3124 } 3188 err= re_node_set_insert (dst_nodes, cur_node);3189 if ( BE (err == -1, 0))3125 ok = re_node_set_insert (dst_nodes, cur_node); 3126 if (__glibc_unlikely (! ok)) 3190 3127 return REG_ESPACE; 3191 3128 if (dfa->edests[cur_node].nelem == 0) … … 3193 3130 if (dfa->edests[cur_node].nelem == 2) 3194 3131 { 3132 reg_errcode_t err; 3195 3133 err = check_arrival_expand_ecl_sub (dfa, dst_nodes, 3196 3134 dfa->edests[cur_node].elems[1], 3197 3135 ex_subexp, type); 3198 if ( BE (err != REG_NOERROR, 0))3136 if (__glibc_unlikely (err != REG_NOERROR)) 3199 3137 return err; 3200 3138 } … … 3210 3148 3211 3149 static reg_errcode_t 3212 internal_function 3150 __attribute_warn_unused_result__ 3213 3151 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, 3214 int cur_str, intsubexp_num, int type)3152 Idx cur_str, Idx subexp_num, int type) 3215 3153 { 3216 3154 const re_dfa_t *const dfa = mctx->dfa; 3217 3155 reg_errcode_t err; 3218 intcache_idx_start = search_cur_bkref_entry (mctx, cur_str);3156 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); 3219 3157 struct re_backref_cache_entry *ent; 3220 3158 … … 3226 3164 do 3227 3165 { 3228 intto_idx, next_node;3166 Idx to_idx, next_node; 3229 3167 3230 3168 /* Is this entry ENT is appropriate? */ … … 3248 3186 err3 = re_node_set_merge (cur_nodes, &new_dests); 3249 3187 re_node_set_free (&new_dests); 3250 if ( BE(err != REG_NOERROR || err2 != REG_NOERROR3251 || err3 != REG_NOERROR, 0))3188 if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR 3189 || err3 != REG_NOERROR)) 3252 3190 { 3253 3191 err = (err != REG_NOERROR ? err … … 3264 3202 if (mctx->state_log[to_idx]) 3265 3203 { 3266 int ret;3204 bool ok; 3267 3205 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, 3268 3206 next_node)) … … 3270 3208 err = re_node_set_init_copy (&union_set, 3271 3209 &mctx->state_log[to_idx]->nodes); 3272 ret= re_node_set_insert (&union_set, next_node);3273 if ( BE (err != REG_NOERROR || ret < 0, 0))3210 ok = re_node_set_insert (&union_set, next_node); 3211 if (__glibc_unlikely (err != REG_NOERROR || ! ok)) 3274 3212 { 3275 3213 re_node_set_free (&union_set); … … 3281 3219 { 3282 3220 err = re_node_set_init_1 (&union_set, next_node); 3283 if ( BE (err != REG_NOERROR, 0))3221 if (__glibc_unlikely (err != REG_NOERROR)) 3284 3222 return err; 3285 3223 } 3286 3224 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); 3287 3225 re_node_set_free (&union_set); 3288 if ( BE(mctx->state_log[to_idx] == NULL3289 && err != REG_NOERROR, 0))3226 if (__glibc_unlikely (mctx->state_log[to_idx] == NULL 3227 && err != REG_NOERROR)) 3290 3228 return err; 3291 3229 } … … 3296 3234 3297 3235 /* Build transition table for the state. 3298 Return 1 if succeeded, otherwise return NULL. */ 3299 3300 static int 3301 internal_function 3236 Return true if successful. */ 3237 3238 static bool __attribute_noinline__ 3302 3239 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) 3303 3240 { 3304 3241 reg_errcode_t err; 3305 int i, j, ch, need_word_trtable = 0; 3242 Idx i, j; 3243 int ch; 3244 bool need_word_trtable = false; 3306 3245 bitset_word_t elem, mask; 3307 bool dests_node_malloced = false; 3308 bool dest_states_malloced = false; 3309 int ndests; /* Number of the destination states from `state'. */ 3246 Idx ndests; /* Number of the destination states from 'state'. */ 3310 3247 re_dfastate_t **trtable; 3311 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3312 re_node_set follows, *dests_node; 3313 bitset_t *dests_ch; 3248 re_dfastate_t *dest_states[SBC_MAX]; 3249 re_dfastate_t *dest_states_word[SBC_MAX]; 3250 re_dfastate_t *dest_states_nl[SBC_MAX]; 3251 re_node_set follows; 3314 3252 bitset_t acceptable; 3315 3253 3316 struct dests_alloc3317 {3318 re_node_set dests_node[SBC_MAX];3319 bitset_t dests_ch[SBC_MAX];3320 } *dests_alloc;3321 3322 3254 /* We build DFA states which corresponds to the destination nodes 3323 from `state'. `dests_node[i]' represents the nodes which i-th3324 destination state contains, and `dests_ch[i]' represents the3255 from 'state'. 'dests_node[i]' represents the nodes which i-th 3256 destination state contains, and 'dests_ch[i]' represents the 3325 3257 characters which i-th destination state accepts. */ 3326 if (__libc_use_alloca (sizeof (struct dests_alloc))) 3327 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); 3328 else 3329 { 3330 dests_alloc = re_malloc (struct dests_alloc, 1); 3331 if (BE (dests_alloc == NULL, 0)) 3332 return 0; 3333 dests_node_malloced = true; 3334 } 3335 dests_node = dests_alloc->dests_node; 3336 dests_ch = dests_alloc->dests_ch; 3337 3338 /* Initialize transiton table. */ 3258 re_node_set dests_node[SBC_MAX]; 3259 bitset_t dests_ch[SBC_MAX]; 3260 3261 /* Initialize transition table. */ 3339 3262 state->word_trtable = state->trtable = NULL; 3340 3263 3341 /* At first, group all nodes belonging to `state' into several3264 /* At first, group all nodes belonging to 'state' into several 3342 3265 destinations. */ 3343 3266 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3344 if (BE (ndests <= 0, 0)) 3345 { 3346 if (dests_node_malloced) 3347 free (dests_alloc); 3348 /* Return 0 in case of an error, 1 otherwise. */ 3267 if (__glibc_unlikely (ndests <= 0)) 3268 { 3269 /* Return false in case of an error, true otherwise. */ 3349 3270 if (ndests == 0) 3350 3271 { 3351 3272 state->trtable = (re_dfastate_t **) 3352 3273 calloc (sizeof (re_dfastate_t *), SBC_MAX); 3353 return 1; 3354 } 3355 return 0; 3274 if (__glibc_unlikely (state->trtable == NULL)) 3275 return false; 3276 return true; 3277 } 3278 return false; 3356 3279 } 3357 3280 3358 3281 err = re_node_set_alloc (&follows, ndests + 1); 3359 if (BE (err != REG_NOERROR, 0)) 3360 goto out_free; 3361 3362 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX 3363 + ndests * 3 * sizeof (re_dfastate_t *))) 3364 dest_states = (re_dfastate_t **) 3365 alloca (ndests * 3 * sizeof (re_dfastate_t *)); 3366 else 3367 { 3368 dest_states = (re_dfastate_t **) 3369 malloc (ndests * 3 * sizeof (re_dfastate_t *)); 3370 if (BE (dest_states == NULL, 0)) 3371 { 3372 out_free: 3373 if (dest_states_malloced) 3374 free (dest_states); 3375 re_node_set_free (&follows); 3376 for (i = 0; i < ndests; ++i) 3377 re_node_set_free (dests_node + i); 3378 if (dests_node_malloced) 3379 free (dests_alloc); 3380 return 0; 3381 } 3382 dest_states_malloced = true; 3383 } 3384 dest_states_word = dest_states + ndests; 3385 dest_states_nl = dest_states_word + ndests; 3282 if (__glibc_unlikely (err != REG_NOERROR)) 3283 { 3284 out_free: 3285 re_node_set_free (&follows); 3286 for (i = 0; i < ndests; ++i) 3287 re_node_set_free (dests_node + i); 3288 return false; 3289 } 3290 3386 3291 bitset_empty (acceptable); 3387 3292 … … 3389 3294 for (i = 0; i < ndests; ++i) 3390 3295 { 3391 intnext_node;3296 Idx next_node; 3392 3297 re_node_set_empty (&follows); 3393 3298 /* Merge the follows of this destination states. */ … … 3398 3303 { 3399 3304 err = re_node_set_merge (&follows, dfa->eclosures + next_node); 3400 if ( BE (err != REG_NOERROR, 0))3305 if (__glibc_unlikely (err != REG_NOERROR)) 3401 3306 goto out_free; 3402 3307 } 3403 3308 } 3404 3309 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); 3405 if ( BE (dest_states[i] == NULL && err != REG_NOERROR, 0))3310 if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR)) 3406 3311 goto out_free; 3407 3312 /* If the new state has context constraint, … … 3411 3316 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, 3412 3317 CONTEXT_WORD); 3413 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) 3318 if (__glibc_unlikely (dest_states_word[i] == NULL 3319 && err != REG_NOERROR)) 3414 3320 goto out_free; 3415 3321 3416 3322 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) 3417 need_word_trtable = 1;3323 need_word_trtable = true; 3418 3324 3419 3325 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, 3420 3326 CONTEXT_NEWLINE); 3421 if ( BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))3327 if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR)) 3422 3328 goto out_free; 3423 3329 } 3424 3330 else 3425 3331 { … … 3430 3336 } 3431 3337 3432 if (! BE (need_word_trtable, 0))3338 if (!__glibc_unlikely (need_word_trtable)) 3433 3339 { 3434 3340 /* We don't care about whether the following character is a word … … 3438 3344 trtable = state->trtable = 3439 3345 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); 3440 if ( BE (trtable == NULL, 0))3346 if (__glibc_unlikely (trtable == NULL)) 3441 3347 goto out_free; 3442 3348 … … 3446 3352 elem; 3447 3353 mask <<= 1, elem >>= 1, ++ch) 3448 if ( BE (elem & 1, 0))3354 if (__glibc_unlikely (elem & 1)) 3449 3355 { 3450 3356 /* There must be exactly one destination which accepts … … 3469 3375 trtable = state->word_trtable = 3470 3376 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); 3471 if ( BE (trtable == NULL, 0))3377 if (__glibc_unlikely (trtable == NULL)) 3472 3378 goto out_free; 3473 3379 … … 3477 3383 elem; 3478 3384 mask <<= 1, elem >>= 1, ++ch) 3479 if ( BE (elem & 1, 0))3385 if (__glibc_unlikely (elem & 1)) 3480 3386 { 3481 3387 /* There must be exactly one destination which accepts … … 3507 3413 } 3508 3414 3509 if (dest_states_malloced)3510 free (dest_states);3511 3512 3415 re_node_set_free (&follows); 3513 3416 for (i = 0; i < ndests; ++i) 3514 3417 re_node_set_free (dests_node + i); 3515 3516 if (dests_node_malloced) 3517 free (dests_alloc); 3518 3519 return 1; 3418 return true; 3520 3419 } 3521 3420 … … 3523 3422 Then for all destinations, set the nodes belonging to the destination 3524 3423 to DESTS_NODE[i] and set the characters accepted by the destination 3525 to DEST_CH[i]. This function return the number of destinations. */3526 3527 static int 3528 internal_function 3424 to DEST_CH[i]. Return the number of destinations if successful, 3425 -1 on internal error. */ 3426 3427 static Idx 3529 3428 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3530 3429 re_node_set *dests_node, bitset_t *dests_ch) 3531 3430 { 3532 3431 reg_errcode_t err; 3533 int result;3534 inti, j, k;3535 int ndests; /* Number of the destinations from `state'. */3432 bool ok; 3433 Idx i, j, k; 3434 Idx ndests; /* Number of the destinations from 'state'. */ 3536 3435 bitset_t accepts; /* Characters a node can accept. */ 3537 3436 const re_node_set *cur_nodes = &state->nodes; … … 3539 3438 ndests = 0; 3540 3439 3541 /* For all the nodes belonging to `state', */3440 /* For all the nodes belonging to 'state', */ 3542 3441 for (i = 0; i < cur_nodes->nelem; ++i) 3543 3442 { … … 3555 3454 else if (type == OP_PERIOD) 3556 3455 { 3557 #ifdef RE_ENABLE_I18N3558 3456 if (dfa->mb_cur_max > 1) 3559 3457 bitset_merge (accepts, dfa->sb_char); 3560 3458 else 3561 #endif3562 3459 bitset_set_all (accepts); 3563 3460 if (!(dfa->syntax & RE_DOT_NEWLINE)) … … 3566 3463 bitset_clear (accepts, '\0'); 3567 3464 } 3568 #ifdef RE_ENABLE_I18N3569 3465 else if (type == OP_UTF8_PERIOD) 3570 { 3571 memset (accepts, '\xff', sizeof (bitset_t) / 2); 3466 { 3467 if (ASCII_CHARS % BITSET_WORD_BITS == 0) 3468 memset (accepts, -1, ASCII_CHARS / CHAR_BIT); 3469 else 3470 bitset_merge (accepts, utf8_sb_map); 3572 3471 if (!(dfa->syntax & RE_DOT_NEWLINE)) 3573 3472 bitset_clear (accepts, '\n'); 3574 3473 if (dfa->syntax & RE_DOT_NOT_NULL) 3575 3474 bitset_clear (accepts, '\0'); 3576 } 3577 #endif 3475 } 3578 3476 else 3579 3477 continue; 3580 3478 3581 /* Check the `accepts' and sift the characters which are not3479 /* Check the 'accepts' and sift the characters which are not 3582 3480 match it the context. */ 3583 3481 if (constraint) … … 3606 3504 continue; 3607 3505 } 3608 #ifdef RE_ENABLE_I18N3609 3506 if (dfa->mb_cur_max > 1) 3610 3507 for (j = 0; j < BITSET_WORDS; ++j) 3611 3508 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); 3612 3509 else 3613 #endif3614 3510 for (j = 0; j < BITSET_WORDS; ++j) 3615 3511 any_set |= (accepts[j] &= dfa->word_char[j]); … … 3625 3521 continue; 3626 3522 } 3627 #ifdef RE_ENABLE_I18N3628 3523 if (dfa->mb_cur_max > 1) 3629 3524 for (j = 0; j < BITSET_WORDS; ++j) 3630 3525 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); 3631 3526 else 3632 #endif3633 3527 for (j = 0; j < BITSET_WORDS; ++j) 3634 3528 any_set |= (accepts[j] &= ~dfa->word_char[j]); … … 3638 3532 } 3639 3533 3640 /* Then divide `accepts' into DFA states, or create a new3534 /* Then divide 'accepts' into DFA states, or create a new 3641 3535 state. Above, we make sure that accepts is not empty. */ 3642 3536 for (j = 0; j < ndests; ++j) … … 3651 3545 continue; 3652 3546 3653 /* Enumerate the intersection set of this state and `accepts'. */3547 /* Enumerate the intersection set of this state and 'accepts'. */ 3654 3548 has_intersec = 0; 3655 3549 for (k = 0; k < BITSET_WORDS; ++k) … … 3659 3553 continue; 3660 3554 3661 /* Then check if this state is a subset of `accepts'. */3555 /* Then check if this state is a subset of 'accepts'. */ 3662 3556 not_subset = not_consumed = 0; 3663 3557 for (k = 0; k < BITSET_WORDS; ++k) … … 3667 3561 } 3668 3562 3669 /* If this state isn't a subset of `accepts', create a3670 new group state, which has the `remains'. */3563 /* If this state isn't a subset of 'accepts', create a 3564 new group state, which has the 'remains'. */ 3671 3565 if (not_subset) 3672 3566 { … … 3674 3568 bitset_copy (dests_ch[j], intersec); 3675 3569 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); 3676 if ( BE (err != REG_NOERROR, 0))3570 if (__glibc_unlikely (err != REG_NOERROR)) 3677 3571 goto error_return; 3678 3572 ++ndests; … … 3680 3574 3681 3575 /* Put the position in the current group. */ 3682 result= re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);3683 if ( BE (result < 0, 0))3576 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); 3577 if (__glibc_unlikely (! ok)) 3684 3578 goto error_return; 3685 3579 … … 3693 3587 bitset_copy (dests_ch[ndests], accepts); 3694 3588 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); 3695 if ( BE (err != REG_NOERROR, 0))3589 if (__glibc_unlikely (err != REG_NOERROR)) 3696 3590 goto error_return; 3697 3591 ++ndests; … … 3699 3593 } 3700 3594 } 3595 assume (ndests <= SBC_MAX); 3701 3596 return ndests; 3702 3597 error_return: … … 3706 3601 } 3707 3602 3708 #ifdef RE_ENABLE_I18N 3709 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts. 3603 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. 3710 3604 Return the number of the bytes the node accepts. 3711 3605 STR_IDX is the current index of the input string. … … 3715 3609 can only accept one byte. */ 3716 3610 3611 #ifdef _LIBC 3612 # include <locale/weight.h> 3613 #endif 3614 3717 3615 static int 3718 internal_function 3719 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 3720 const re_string_t *input, int str_idx) 3616 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 3617 const re_string_t *input, Idx str_idx) 3721 3618 { 3722 3619 const re_token_t *node = dfa->nodes + node_idx; 3723 3620 int char_len, elem_len; 3724 inti;3725 3726 if ( BE (node->type == OP_UTF8_PERIOD, 0))3621 Idx i; 3622 3623 if (__glibc_unlikely (node->type == OP_UTF8_PERIOD)) 3727 3624 { 3728 3625 unsigned char c = re_string_byte_at (input, str_idx), d; 3729 if ( BE (c < 0xc2, 1))3626 if (__glibc_likely (c < 0xc2)) 3730 3627 return 0; 3731 3628 … … 3779 3676 { 3780 3677 if (char_len <= 1) 3781 3678 return 0; 3782 3679 /* FIXME: I don't think this if is needed, as both '\n' 3783 3680 and '\0' are char_len == 1. */ 3784 3681 /* '.' accepts any one character except the following two cases. */ 3785 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&3786 re_string_byte_at (input, str_idx) == '\n') ||3787 ((dfa->syntax & RE_DOT_NOT_NULL) &&3788 re_string_byte_at (input, str_idx) == '\0'))3682 if ((!(dfa->syntax & RE_DOT_NEWLINE) 3683 && re_string_byte_at (input, str_idx) == '\n') 3684 || ((dfa->syntax & RE_DOT_NOT_NULL) 3685 && re_string_byte_at (input, str_idx) == '\0')) 3789 3686 return 0; 3790 3687 return char_len; … … 3798 3695 { 3799 3696 const re_charset_t *cset = node->opr.mbcset; 3800 # 3697 #ifdef _LIBC 3801 3698 const unsigned char *pin 3802 3699 = ((const unsigned char *) re_string_get_buffer (input) + str_idx); 3803 intj;3700 Idx j; 3804 3701 uint32_t nrules; 3805 # endif /* _LIBC */3702 #endif 3806 3703 int match_len = 0; 3807 3704 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) … … 3826 3723 } 3827 3724 3828 # 3725 #ifdef _LIBC 3829 3726 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3830 3727 if (nrules != 0) … … 3834 3731 const unsigned char *weights, *extra; 3835 3732 const char *collseqwc; 3836 int32_t idx;3837 /* This #include defines a local function! */3838 # include <locale/weight.h>3839 3733 3840 3734 /* match with collating_symbol? */ … … 3872 3766 } 3873 3767 /* match with range expression? */ 3768 /* FIXME: Implement rational ranges here, too. */ 3874 3769 for (i = 0; i < cset->nranges; ++i) 3875 3770 if (cset->range_starts[i] <= in_collseq … … 3892 3787 indirect = (const int32_t *) 3893 3788 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3894 idx = findidx (&cp); 3789 int32_t idx = findidx (table, indirect, extra, &cp, elem_len); 3790 int32_t rule = idx >> 24; 3791 idx &= 0xffffff; 3895 3792 if (idx > 0) 3896 for (i = 0; i < cset->nequiv_classes; ++i)3897 {3898 int32_t equiv_class_idx = cset->equiv_classes[i];3899 size_t weight_len = weights[idx];3900 if (weight_len == weights[equiv_class_idx])3901 {3902 int cnt = 0;3903 while (cnt <= weight_len3904 && (weights[equiv_class_idx + 1 + cnt]3905 == weights[idx + 1 + cnt]))3906 ++cnt;3907 if (cnt > weight_len)3908 3909 3910 3911 3912 3913 3793 { 3794 size_t weight_len = weights[idx]; 3795 for (i = 0; i < cset->nequiv_classes; ++i) 3796 { 3797 int32_t equiv_class_idx = cset->equiv_classes[i]; 3798 int32_t equiv_class_rule = equiv_class_idx >> 24; 3799 equiv_class_idx &= 0xffffff; 3800 if (weights[equiv_class_idx] == weight_len 3801 && equiv_class_rule == rule 3802 && memcmp (weights + idx + 1, 3803 weights + equiv_class_idx + 1, 3804 weight_len) == 0) 3805 { 3806 match_len = elem_len; 3807 goto check_node_accept_bytes_match; 3808 } 3809 } 3810 } 3914 3811 } 3915 3812 } 3916 3813 else 3917 # 3814 #endif /* _LIBC */ 3918 3815 { 3919 3816 /* match with range expression? */ 3920 #if __GNUC__ >= 23921 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};3922 #else3923 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};3924 cmp_buf[2] = wc;3925 #endif3926 3817 for (i = 0; i < cset->nranges; ++i) 3927 3818 { 3928 cmp_buf[0] = cset->range_starts[i]; 3929 cmp_buf[4] = cset->range_ends[i]; 3930 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 3931 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 3819 if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) 3932 3820 { 3933 3821 match_len = char_len; … … 3950 3838 } 3951 3839 3952 # 3840 #ifdef _LIBC 3953 3841 static unsigned int 3954 internal_function3955 3842 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) 3956 3843 { … … 3977 3864 for (idx = 0; idx < extrasize;) 3978 3865 { 3979 int mbs_cnt, found = 0; 3866 int mbs_cnt; 3867 bool found = false; 3980 3868 int32_t elem_mbs_len; 3981 3869 /* Skip the name of collating element name. */ … … 3989 3877 if (mbs_cnt == elem_mbs_len) 3990 3878 /* Found the entry. */ 3991 found = 1;3879 found = true; 3992 3880 } 3993 3881 /* Skip the byte sequence of the collating element. */ … … 3998 3886 idx += sizeof (uint32_t); 3999 3887 /* Skip the wide char sequence of the collating element. */ 4000 idx = idx + sizeof (uint32_t) * ( extra[idx]+ 1);3888 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); 4001 3889 /* If we found the entry, return the sequence value. */ 4002 3890 if (found) … … 4008 3896 } 4009 3897 } 4010 # endif /* _LIBC */ 4011 #endif /* RE_ENABLE_I18N */ 3898 #endif /* _LIBC */ 4012 3899 4013 3900 /* Check whether the node accepts the byte which is IDX-th 4014 3901 byte of the INPUT. */ 4015 3902 4016 static int 4017 internal_function 3903 static bool 4018 3904 check_node_accept (const re_match_context_t *mctx, const re_token_t *node, 4019 intidx)3905 Idx idx) 4020 3906 { 4021 3907 unsigned char ch; … … 4025 3911 case CHARACTER: 4026 3912 if (node->opr.c != ch) 4027 return 0;3913 return false; 4028 3914 break; 4029 3915 4030 3916 case SIMPLE_BRACKET: 4031 3917 if (!bitset_contain (node->opr.sbcset, ch)) 4032 return 0;3918 return false; 4033 3919 break; 4034 3920 4035 #ifdef RE_ENABLE_I18N4036 3921 case OP_UTF8_PERIOD: 4037 if (ch >= 0x80) 4038 return 0; 4039 /* FALLTHROUGH */ 4040 #endif 3922 if (ch >= ASCII_CHARS) 3923 return false; 3924 FALLTHROUGH; 4041 3925 case OP_PERIOD: 4042 3926 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) 4043 3927 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) 4044 return 0;3928 return false; 4045 3929 break; 4046 3930 4047 3931 default: 4048 return 0;3932 return false; 4049 3933 } 4050 3934 … … 4056 3940 mctx->eflags); 4057 3941 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 4058 return 0;4059 } 4060 4061 return 1;3942 return false; 3943 } 3944 3945 return true; 4062 3946 } 4063 3947 … … 4065 3949 4066 3950 static reg_errcode_t 4067 internal_function 4068 extend_buffers (re_match_context_t *mctx )3951 __attribute_warn_unused_result__ 3952 extend_buffers (re_match_context_t *mctx, int min_len) 4069 3953 { 4070 3954 reg_errcode_t ret; 4071 3955 re_string_t *pstr = &mctx->input; 4072 3956 4073 /* Double the lengthes of the buffers. */ 4074 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 4075 if (BE (ret != REG_NOERROR, 0)) 3957 /* Avoid overflow. */ 3958 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 3959 <= pstr->bufs_len)) 3960 return REG_ESPACE; 3961 3962 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ 3963 ret = re_string_realloc_buffers (pstr, 3964 MAX (min_len, 3965 MIN (pstr->len, pstr->bufs_len * 2))); 3966 if (__glibc_unlikely (ret != REG_NOERROR)) 4076 3967 return ret; 4077 3968 … … 4084 3975 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, 4085 3976 pstr->bufs_len + 1); 4086 if ( BE (new_array == NULL, 0))3977 if (__glibc_unlikely (new_array == NULL)) 4087 3978 return REG_ESPACE; 4088 3979 mctx->state_log = new_array; … … 4092 3983 if (pstr->icase) 4093 3984 { 4094 #ifdef RE_ENABLE_I18N4095 3985 if (pstr->mb_cur_max > 1) 4096 3986 { 4097 3987 ret = build_wcs_upper_buffer (pstr); 4098 if ( BE (ret != REG_NOERROR, 0))3988 if (__glibc_unlikely (ret != REG_NOERROR)) 4099 3989 return ret; 4100 3990 } 4101 3991 else 4102 #endif /* RE_ENABLE_I18N */4103 3992 build_upper_buffer (pstr); 4104 3993 } 4105 3994 else 4106 3995 { 4107 #ifdef RE_ENABLE_I18N4108 3996 if (pstr->mb_cur_max > 1) 4109 3997 build_wcs_buffer (pstr); 4110 3998 else 4111 #endif /* RE_ENABLE_I18N */4112 3999 { 4113 4000 if (pstr->trans != NULL) … … 4125 4012 4126 4013 static reg_errcode_t 4127 internal_function 4128 match_ctx_init (re_match_context_t *mctx, int eflags, intn)4014 __attribute_warn_unused_result__ 4015 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) 4129 4016 { 4130 4017 mctx->eflags = eflags; … … 4132 4019 if (n > 0) 4133 4020 { 4021 /* Avoid overflow. */ 4022 size_t max_object_size = 4023 MAX (sizeof (struct re_backref_cache_entry), 4024 sizeof (re_sub_match_top_t *)); 4025 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n)) 4026 return REG_ESPACE; 4027 4134 4028 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); 4135 4029 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); 4136 if ( BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))4030 if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL)) 4137 4031 return REG_ESPACE; 4138 4032 } … … 4153 4047 4154 4048 static void 4155 internal_function4156 4049 match_ctx_clean (re_match_context_t *mctx) 4157 4050 { 4158 intst_idx;4051 Idx st_idx; 4159 4052 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) 4160 4053 { 4161 intsl_idx;4054 Idx sl_idx; 4162 4055 re_sub_match_top_t *top = mctx->sub_tops[st_idx]; 4163 4056 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) … … 4173 4066 re_free (top->path); 4174 4067 } 4175 free (top);4068 re_free (top); 4176 4069 } 4177 4070 … … 4183 4076 4184 4077 static void 4185 internal_function4186 4078 match_ctx_free (re_match_context_t *mctx) 4187 4079 { … … 4198 4090 4199 4091 static reg_errcode_t 4200 internal_function 4201 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, intfrom,4202 intto)4092 __attribute_warn_unused_result__ 4093 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, 4094 Idx to) 4203 4095 { 4204 4096 if (mctx->nbkref_ents >= mctx->abkref_ents) … … 4207 4099 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, 4208 4100 mctx->abkref_ents * 2); 4209 if ( BE (new_entry == NULL, 0))4101 if (__glibc_unlikely (new_entry == NULL)) 4210 4102 { 4211 4103 re_free (mctx->bkref_ents); … … 4235 4127 to all zeros if FROM != TO. */ 4236 4128 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map 4237 = (from == to ? ~0: 0);4129 = (from == to ? -1 : 0); 4238 4130 4239 4131 mctx->bkref_ents[mctx->nbkref_ents++].more = 0; … … 4243 4135 } 4244 4136 4245 /* Search for the first entry which hasthe same str_idx, or -1 if none is4137 /* Return the first entry with the same str_idx, or -1 if none is 4246 4138 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ 4247 4139 4248 static int 4249 internal_function 4250 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 4251 { 4252 int left, right, mid, last; 4140 static Idx 4141 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) 4142 { 4143 Idx left, right, mid, last; 4253 4144 last = right = mctx->nbkref_ents; 4254 4145 for (left = 0; left < right;) … … 4270 4161 4271 4162 static reg_errcode_t 4272 internal_function 4273 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) 4274 { 4275 #ifdef DEBUG 4276 assert (mctx->sub_tops != NULL); 4277 assert (mctx->asub_tops > 0); 4278 #endif 4279 if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) 4280 { 4281 int new_asub_tops = mctx->asub_tops * 2; 4163 __attribute_warn_unused_result__ 4164 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) 4165 { 4166 DEBUG_ASSERT (mctx->sub_tops != NULL); 4167 DEBUG_ASSERT (mctx->asub_tops > 0); 4168 if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops)) 4169 { 4170 Idx new_asub_tops = mctx->asub_tops * 2; 4282 4171 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, 4283 4172 re_sub_match_top_t *, 4284 4173 new_asub_tops); 4285 if ( BE (new_array == NULL, 0))4174 if (__glibc_unlikely (new_array == NULL)) 4286 4175 return REG_ESPACE; 4287 4176 mctx->sub_tops = new_array; … … 4289 4178 } 4290 4179 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); 4291 if ( BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))4180 if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL)) 4292 4181 return REG_ESPACE; 4293 4182 mctx->sub_tops[mctx->nsub_tops]->node = node; … … 4297 4186 4298 4187 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4299 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4188 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. 4189 Return the new entry if successful, NULL if memory is exhausted. */ 4300 4190 4301 4191 static re_sub_match_last_t * 4302 internal_function 4303 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) 4192 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) 4304 4193 { 4305 4194 re_sub_match_last_t *new_entry; 4306 if ( BE (subtop->nlasts == subtop->alasts, 0))4307 { 4308 intnew_alasts = 2 * subtop->alasts + 1;4195 if (__glibc_unlikely (subtop->nlasts == subtop->alasts)) 4196 { 4197 Idx new_alasts = 2 * subtop->alasts + 1; 4309 4198 re_sub_match_last_t **new_array = re_realloc (subtop->lasts, 4310 4199 re_sub_match_last_t *, 4311 4200 new_alasts); 4312 if ( BE (new_array == NULL, 0))4201 if (__glibc_unlikely (new_array == NULL)) 4313 4202 return NULL; 4314 4203 subtop->lasts = new_array; … … 4316 4205 } 4317 4206 new_entry = calloc (1, sizeof (re_sub_match_last_t)); 4318 if ( BE (new_entry != NULL, 1))4207 if (__glibc_likely (new_entry != NULL)) 4319 4208 { 4320 4209 subtop->lasts[subtop->nlasts] = new_entry; … … 4327 4216 4328 4217 static void 4329 internal_function4330 4218 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 4331 re_dfastate_t **limited_sts, int last_node, intlast_str_idx)4219 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) 4332 4220 { 4333 4221 sctx->sifted_states = sifted_sts;
Note:
See TracChangeset
for help on using the changeset viewer.