Changeset 3059 for trunk/libc/src/glibc/posix/regcomp.c
- Timestamp:
- Apr 8, 2007, 8:58:56 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libc/src/glibc/posix/regcomp.c
r2260 r3059 1 1 /* Extended regular expression matching and search library. 2 Copyright (C) 2002, 2003, 2004, 2005Free Software Foundation, Inc.2 Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc. 3 3 This file is part of the GNU C Library. 4 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. … … 20 20 21 21 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, 22 int length, reg_syntax_t syntax);22 size_t length, reg_syntax_t syntax); 23 23 static void re_compile_fastmap_iter (regex_t *bufp, 24 24 const re_dfastate_t *init_state, 25 25 char *fastmap); 26 static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); 27 static void init_word_char (re_dfa_t *dfa); 26 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); 28 27 #ifdef RE_ENABLE_I18N 29 28 static void free_charset (re_charset_t *cset); … … 35 34 #endif 36 35 static reg_errcode_t analyze (regex_t *preg); 37 static reg_errcode_t create_initial_state (re_dfa_t *dfa);38 36 static reg_errcode_t preorder (bin_tree_t *root, 39 37 reg_errcode_t (fn (void *, bin_tree_t *)), … … 49 47 static reg_errcode_t calc_next (void *extra, bin_tree_t *node); 50 48 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); 51 static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, 52 int top_clone_node, int root_node, 53 unsigned int constraint); 54 static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, 55 unsigned int constraint); 56 static int search_duplicated_node (re_dfa_t *dfa, int org_node, 49 static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); 50 static int search_duplicated_node (const re_dfa_t *dfa, int org_node, 57 51 unsigned int constraint); 58 52 static reg_errcode_t calc_eclosure (re_dfa_t *dfa); … … 62 56 static int fetch_number (re_string_t *input, re_token_t *token, 63 57 reg_syntax_t syntax); 64 static void fetch_token (re_token_t *result, re_string_t *input,65 reg_syntax_t syntax);66 58 static int peek_token (re_token_t *token, re_string_t *input, 67 reg_syntax_t syntax); 68 static int peek_token_bracket (re_token_t *token, re_string_t *input, 69 reg_syntax_t syntax); 59 reg_syntax_t syntax) internal_function; 70 60 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, 71 61 reg_syntax_t syntax, reg_errcode_t *err); … … 97 87 re_string_t *regexp, 98 88 re_token_t *token); 99 #ifndef _LIBC 100 # ifdef RE_ENABLE_I18N 101 static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, 102 re_charset_t *mbcset, int *range_alloc, 103 bracket_elem_t *start_elem, 104 bracket_elem_t *end_elem); 105 static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, 106 re_charset_t *mbcset, 107 int *coll_sym_alloc, 108 const unsigned char *name); 109 # else /* not RE_ENABLE_I18N */ 110 static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, 111 bracket_elem_t *start_elem, 112 bracket_elem_t *end_elem); 113 static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, 114 const unsigned char *name); 115 # endif /* not RE_ENABLE_I18N */ 116 #endif /* not _LIBC */ 117 #ifdef RE_ENABLE_I18N 118 static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, 89 #ifdef RE_ENABLE_I18N 90 static reg_errcode_t build_equiv_class (bitset_t sbcset, 119 91 re_charset_t *mbcset, 120 92 int *equiv_class_alloc, 121 93 const unsigned char *name); 122 static reg_errcode_t build_charclass ( unsignedRE_TRANSLATE_TYPE trans,123 re_bitset_ptr_t sbcset,94 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, 95 bitset_t sbcset, 124 96 re_charset_t *mbcset, 125 97 int *char_class_alloc, … … 127 99 reg_syntax_t syntax); 128 100 #else /* not RE_ENABLE_I18N */ 129 static reg_errcode_t build_equiv_class ( re_bitset_ptr_t sbcset,101 static reg_errcode_t build_equiv_class (bitset_t sbcset, 130 102 const unsigned char *name); 131 static reg_errcode_t build_charclass ( unsignedRE_TRANSLATE_TYPE trans,132 re_bitset_ptr_t sbcset,103 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, 104 bitset_t sbcset, 133 105 const unsigned char *class_name, 134 106 reg_syntax_t syntax); 135 107 #endif /* not RE_ENABLE_I18N */ 136 108 static bin_tree_t *build_charclass_op (re_dfa_t *dfa, 137 unsignedRE_TRANSLATE_TYPE trans,109 RE_TRANSLATE_TYPE trans, 138 110 const unsigned char *class_name, 139 111 const unsigned char *extra, … … 330 302 331 303 static void 332 re_compile_fastmap_iter (bufp, init_state, fastmap) 333 regex_t *bufp; 334 const re_dfastate_t *init_state; 335 char *fastmap; 304 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, 305 char *fastmap) 336 306 { 337 307 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; … … 359 329 && dfa->nodes[node].mb_partial) 360 330 *p++ = dfa->nodes[node].opr.c; 361 memset (&state, 0, sizeof (state));331 memset (&state, '\0', sizeof (state)); 362 332 if (mbrtowc (&wc, (const char *) buf, p - buf, 363 333 &state) == p - buf … … 370 340 else if (type == SIMPLE_BRACKET) 371 341 { 372 int i, j, ch; 373 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) 374 for (j = 0; j < UINT_BITS; ++j, ++ch) 375 if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) 376 re_set_fastmap (fastmap, icase, ch); 342 int i, ch; 343 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 344 { 345 int j; 346 bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; 347 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 348 if (w & ((bitset_word_t) 1 << j)) 349 re_set_fastmap (fastmap, icase, ch); 350 } 377 351 } 378 352 #ifdef RE_ENABLE_I18N … … 393 367 'b' since 'b' is the only collation element 394 368 which starts from 'b'. */ 395 int j, ch;396 369 const int32_t *table = (const int32_t *) 397 370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 398 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) 399 for (j = 0; j < UINT_BITS; ++j, ++ch) 400 if (table[ch] < 0) 401 re_set_fastmap (fastmap, icase, ch); 371 for (i = 0; i < SBC_MAX; ++i) 372 if (table[i] < 0) 373 re_set_fastmap (fastmap, icase, i); 402 374 } 403 375 # else … … 419 391 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) 420 392 != (size_t) -1) 421 re_set_fastmap (fastmap, 0, *(unsigned char *) buf);393 re_set_fastmap (fastmap, 0, *(unsigned char *) buf); 422 394 } 423 395 } … … 540 512 _STD(regerror) (errcode, preg, errbuf, errbuf_size) 541 513 int errcode; 542 const regex_t * preg;543 char * errbuf;514 const regex_t *__restrict preg; 515 char *__restrict errbuf; 544 516 size_t errbuf_size; 545 517 { … … 587 559 it the same all the time. UTF-8 is the preferred encoding so this is 588 560 a worthwhile optimization. */ 589 static const bitset utf8_sb_map =561 static const bitset_t utf8_sb_map = 590 562 { 591 563 /* Set the first 128 bits. */ 592 # if UINT_MAX == 0xffffffff 593 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff 594 # else 595 # error "Add case for new unsigned int size" 596 # endif 564 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 597 565 }; 598 566 #endif … … 745 713 746 714 static reg_errcode_t 747 re_compile_internal (preg, pattern, length, syntax) 748 regex_t *preg; 749 const char * pattern; 750 int length; 751 reg_syntax_t syntax; 715 re_compile_internal (regex_t *preg, const char * pattern, size_t length, 716 reg_syntax_t syntax) 752 717 { 753 718 reg_errcode_t err = REG_NOERROR; … … 780 745 preg->used = sizeof (re_dfa_t); 781 746 782 /* __libc_lock_init (dfa->lock); - why init before memsetting the whole thing? */783 784 747 err = init_dfa (dfa, length); 785 748 if (BE (err != REG_NOERROR, 0)) … … 790 753 return err; 791 754 } 792 __libc_lock_init (dfa->lock); /* bird */793 794 755 #ifdef DEBUG 756 /* Note: length+1 will not overflow since it is checked in init_dfa. */ 795 757 dfa->re_str = re_malloc (char, length + 1); 796 758 strncpy (dfa->re_str, pattern, length + 1); 797 759 #endif 760 761 __libc_lock_init (dfa->lock); 798 762 799 763 err = re_string_construct (®exp, pattern, length, preg->translate, … … 848 812 849 813 static reg_errcode_t 850 init_dfa (dfa, pat_len) 851 re_dfa_t *dfa; 852 int pat_len; 853 { 854 int table_size; 814 init_dfa (re_dfa_t *dfa, size_t pat_len) 815 { 816 unsigned int table_size; 855 817 #ifndef _LIBC 856 818 char *codeset_name; … … 862 824 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; 863 825 826 /* Avoid overflows. */ 827 if (pat_len == SIZE_MAX) 828 return REG_ESPACE; 829 864 830 dfa->nodes_alloc = pat_len + 1; 865 831 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); 866 832 867 dfa->states_alloc = pat_len + 1;868 869 833 /* table_size = 2 ^ ceil(log pat_len) */ 870 for (table_size = 1; table_size > 0; table_size <<= 1)834 for (table_size = 1; ; table_size <<= 1) 871 835 if (table_size > pat_len) 872 836 break; … … 915 879 int i, j, ch; 916 880 917 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset ), 1);881 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 918 882 if (BE (dfa->sb_char == NULL, 0)) 919 883 return REG_ESPACE; 920 884 921 /* Clear all bits by, then set those corresponding to single 922 byte chars. */ 923 bitset_empty (dfa->sb_char); 924 925 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) 926 for (j = 0; j < UINT_BITS; ++j, ++ch) 885 /* Set the bits corresponding to single byte chars. */ 886 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 887 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 927 888 { 928 889 wint_t wch = __btowc (ch); 929 890 if (wch != WEOF) 930 dfa->sb_char[i] |= 1 << j;931 # if !defined (_LIBC) && !defined (__INNOTEK_LIBC__)932 if (isascii (ch) && wch != (wchar_t)ch)891 dfa->sb_char[i] |= (bitset_word_t) 1 << j; 892 # if !defined (_LIBC) 893 if (isascii (ch) && wch != ch) 933 894 dfa->map_notascii = 1; 934 895 # endif … … 948 909 949 910 static void 950 in it_word_char (dfa)951 re_dfa_t *dfa; 911 internal_function 912 init_word_char (re_dfa_t *dfa) 952 913 { 953 914 int i, j, ch; 954 915 dfa->word_ops_used = 1; 955 for (i = 0, ch = 0; i < BITSET_ UINTS; ++i)956 for (j = 0; j < UINT_BITS; ++j, ++ch)916 for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 917 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 957 918 if (isalnum (ch) || ch == '_') 958 dfa->word_char[i] |= 1 << j;919 dfa->word_char[i] |= (bitset_word_t) 1 << j; 959 920 } 960 921 … … 962 923 963 924 static void 964 free_workarea_compile (preg) 965 regex_t *preg; 925 free_workarea_compile (regex_t *preg) 966 926 { 967 927 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 982 942 983 943 static reg_errcode_t 984 create_initial_state (dfa) 985 re_dfa_t *dfa; 944 create_initial_state (re_dfa_t *dfa) 986 945 { 987 946 int first, i; … … 1066 1025 1067 1026 static void 1068 optimize_utf8 (dfa) 1069 re_dfa_t *dfa; 1027 optimize_utf8 (re_dfa_t *dfa) 1070 1028 { 1071 1029 int node, i, mb_chars = 0, has_period = 0; … … 1104 1062 return; 1105 1063 case SIMPLE_BRACKET: 1106 /* Just double check. */ 1107 for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) 1064 /* Just double check. The non-ASCII range starts at 0x80. */ 1065 assert (0x80 % BITSET_WORD_BITS == 0); 1066 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) 1108 1067 if (dfa->nodes[node].opr.sbcset[i]) 1109 1068 return; … … 1135 1094 1136 1095 static reg_errcode_t 1137 analyze (preg) 1138 regex_t *preg; 1096 analyze (regex_t *preg) 1139 1097 { 1140 1098 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 1199 1157 some hairy code in these two functions. */ 1200 1158 static reg_errcode_t 1201 postorder (root, fn, extra) 1202 bin_tree_t *root; 1203 reg_errcode_t (fn (void *, bin_tree_t *)); 1204 void *extra; 1159 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), 1160 void *extra) 1205 1161 { 1206 1162 bin_tree_t *node, *prev; … … 1233 1189 1234 1190 static reg_errcode_t 1235 preorder (root, fn, extra) 1236 bin_tree_t *root; 1237 reg_errcode_t (fn (void *, bin_tree_t *)); 1238 void *extra; 1191 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), 1192 void *extra) 1239 1193 { 1240 1194 bin_tree_t *node; … … 1268 1222 backreferences as well. Requires a preorder visit. */ 1269 1223 static reg_errcode_t 1270 optimize_subexps (extra, node) 1271 void *extra; 1272 bin_tree_t *node; 1224 optimize_subexps (void *extra, bin_tree_t *node) 1273 1225 { 1274 1226 re_dfa_t *dfa = (re_dfa_t *) extra; … … 1291 1243 1292 1244 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; 1293 if (other_idx < 8 * sizeof (dfa->used_bkref_map))1294 dfa->used_bkref_map &= ~(1 << other_idx);1245 if (other_idx < BITSET_WORD_BITS) 1246 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); 1295 1247 } 1296 1248 … … 1301 1253 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ 1302 1254 static reg_errcode_t 1303 lower_subexps (extra, node) 1304 void *extra; 1305 bin_tree_t *node; 1255 lower_subexps (void *extra, bin_tree_t *node) 1306 1256 { 1307 1257 regex_t *preg = (regex_t *) extra; … … 1325 1275 1326 1276 static bin_tree_t * 1327 lower_subexp (err, preg, node) 1328 reg_errcode_t *err; 1329 regex_t *preg; 1330 bin_tree_t *node; 1277 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) 1331 1278 { 1332 1279 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 1340 1287 this case is the sed "script" /\(\)/x. */ 1341 1288 && node->left != NULL 1342 && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map) 1343 || !(dfa->used_bkref_map & (1 << node->token.opr.idx)))) 1289 && (node->token.opr.idx >= BITSET_WORD_BITS 1290 || !(dfa->used_bkref_map 1291 & ((bitset_word_t) 1 << node->token.opr.idx)))) 1344 1292 return node->left; 1345 1293 … … 1364 1312 nodes. Requires a postorder visit. */ 1365 1313 static reg_errcode_t 1366 calc_first (extra, node) 1367 void *extra; 1368 bin_tree_t *node; 1314 calc_first (void *extra, bin_tree_t *node) 1369 1315 { 1370 1316 re_dfa_t *dfa = (re_dfa_t *) extra; … … 1386 1332 /* Pass 2: compute NEXT on the tree. Preorder visit. */ 1387 1333 static reg_errcode_t 1388 calc_next (extra, node) 1389 void *extra; 1390 bin_tree_t *node; 1334 calc_next (void *extra, bin_tree_t *node) 1391 1335 { 1392 1336 switch (node->token.type) … … 1411 1355 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ 1412 1356 static reg_errcode_t 1413 link_nfa_nodes (extra, node) 1414 void *extra; 1415 bin_tree_t *node; 1357 link_nfa_nodes (void *extra, bin_tree_t *node) 1416 1358 { 1417 1359 re_dfa_t *dfa = (re_dfa_t *) extra; … … 1473 1415 1474 1416 static reg_errcode_t 1475 duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, 1476 init_constraint) 1477 re_dfa_t *dfa; 1478 int top_org_node, top_clone_node, root_node; 1479 unsigned int init_constraint; 1480 { 1481 reg_errcode_t err; 1417 internal_function 1418 duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, 1419 int root_node, unsigned int init_constraint) 1420 { 1482 1421 int org_node, clone_node, ret; 1483 1422 unsigned int constraint = init_constraint; … … 1493 1432 org_dest = dfa->nexts[org_node]; 1494 1433 re_node_set_empty (dfa->edests + clone_node); 1495 err = duplicate_node (&clone_dest,dfa, org_dest, constraint);1496 if (BE ( err != REG_NOERROR, 0))1497 return err;1434 clone_dest = duplicate_node (dfa, org_dest, constraint); 1435 if (BE (clone_dest == -1, 0)) 1436 return REG_ESPACE; 1498 1437 dfa->nexts[clone_node] = dfa->nexts[org_node]; 1499 1438 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); … … 1531 1470 constraint |= dfa->nodes[org_node].opr.ctx_type; 1532 1471 } 1533 err = duplicate_node (&clone_dest,dfa, org_dest, constraint);1534 if (BE ( err != REG_NOERROR, 0))1535 return err;1472 clone_dest = duplicate_node (dfa, org_dest, constraint); 1473 if (BE (clone_dest == -1, 0)) 1474 return REG_ESPACE; 1536 1475 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1537 1476 if (BE (ret < 0, 0)) … … 1549 1488 { 1550 1489 /* There are no such a duplicated node, create a new one. */ 1551 err = duplicate_node (&clone_dest, dfa, org_dest, constraint); 1552 if (BE (err != REG_NOERROR, 0)) 1553 return err; 1490 reg_errcode_t err; 1491 clone_dest = duplicate_node (dfa, org_dest, constraint); 1492 if (BE (clone_dest == -1, 0)) 1493 return REG_ESPACE; 1554 1494 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1555 1495 if (BE (ret < 0, 0)) … … 1570 1510 1571 1511 org_dest = dfa->edests[org_node].elems[1]; 1572 err = duplicate_node (&clone_dest,dfa, org_dest, constraint);1573 if (BE ( err != REG_NOERROR, 0))1574 return err;1512 clone_dest = duplicate_node (dfa, org_dest, constraint); 1513 if (BE (clone_dest == -1, 0)) 1514 return REG_ESPACE; 1575 1515 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1576 1516 if (BE (ret < 0, 0)) … … 1587 1527 1588 1528 static int 1589 search_duplicated_node (dfa, org_node, constraint) 1590 re_dfa_t *dfa; 1591 int org_node; 1592 unsigned int constraint; 1529 search_duplicated_node (const re_dfa_t *dfa, int org_node, 1530 unsigned int constraint) 1593 1531 { 1594 1532 int idx; … … 1603 1541 1604 1542 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. 1605 The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, 1606 otherwise return the error code. */ 1543 Return the index of the new node, or -1 if insufficient storage is 1544 available. */ 1545 1546 static int 1547 duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) 1548 { 1549 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1550 if (BE (dup_idx != -1, 1)) 1551 { 1552 dfa->nodes[dup_idx].constraint = constraint; 1553 if (dfa->nodes[org_idx].type == ANCHOR) 1554 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; 1555 dfa->nodes[dup_idx].duplicated = 1; 1556 1557 /* Store the index of the original node. */ 1558 dfa->org_indices[dup_idx] = org_idx; 1559 } 1560 return dup_idx; 1561 } 1607 1562 1608 1563 static reg_errcode_t 1609 duplicate_node (new_idx, dfa, org_idx, constraint) 1610 re_dfa_t *dfa; 1611 int *new_idx, org_idx; 1612 unsigned int constraint; 1613 { 1614 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1615 if (BE (dup_idx == -1, 0)) 1616 return REG_ESPACE; 1617 dfa->nodes[dup_idx].constraint = constraint; 1618 if (dfa->nodes[org_idx].type == ANCHOR) 1619 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; 1620 dfa->nodes[dup_idx].duplicated = 1; 1621 1622 /* Store the index of the original node. */ 1623 dfa->org_indices[dup_idx] = org_idx; 1624 *new_idx = dup_idx; 1625 return REG_NOERROR; 1626 } 1627 1628 static reg_errcode_t 1629 calc_inveclosure (dfa) 1630 re_dfa_t *dfa; 1564 calc_inveclosure (re_dfa_t *dfa) 1631 1565 { 1632 1566 int src, idx, ret; … … 1651 1585 1652 1586 static reg_errcode_t 1653 calc_eclosure (dfa) 1654 re_dfa_t *dfa; 1587 calc_eclosure (re_dfa_t *dfa) 1655 1588 { 1656 1589 int node_idx, incomplete; … … 1696 1629 1697 1630 static reg_errcode_t 1698 calc_eclosure_iter (new_set, dfa, node, root) 1699 re_node_set *new_set; 1700 re_dfa_t *dfa; 1701 int node, root; 1631 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) 1702 1632 { 1703 1633 reg_errcode_t err; … … 1722 1652 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) 1723 1653 { 1724 int org_node, cur_node;1725 org_node = cur_node = node;1726 1654 err = duplicate_node_closure (dfa, node, node, node, constraint); 1727 1655 if (BE (err != REG_NOERROR, 0)) … … 1780 1708 1781 1709 static void 1782 fetch_token (result, input, syntax) 1783 re_token_t *result; 1784 re_string_t *input; 1785 reg_syntax_t syntax; 1710 internal_function 1711 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) 1786 1712 { 1787 1713 re_string_skip_bytes (input, peek_token (result, input, syntax)); … … 1792 1718 1793 1719 static int 1794 peek_token (token, input, syntax) 1795 re_token_t *token; 1796 re_string_t *input; 1797 reg_syntax_t syntax; 1720 internal_function 1721 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1798 1722 { 1799 1723 unsigned char c; … … 2033 1957 2034 1958 static int 2035 peek_token_bracket (token, input, syntax) 2036 re_token_t *token; 2037 re_string_t *input; 2038 reg_syntax_t syntax; 1959 internal_function 1960 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 2039 1961 { 2040 1962 unsigned char c; … … 2133 2055 2134 2056 static bin_tree_t * 2135 parse (regexp, preg, syntax, err) 2136 re_string_t *regexp; 2137 regex_t *preg; 2138 reg_syntax_t syntax; 2139 reg_errcode_t *err; 2057 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, 2058 reg_errcode_t *err) 2140 2059 { 2141 2060 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 2170 2089 2171 2090 static bin_tree_t * 2172 parse_reg_exp (regexp, preg, token, syntax, nest, err) 2173 re_string_t *regexp; 2174 regex_t *preg; 2175 re_token_t *token; 2176 reg_syntax_t syntax; 2177 int nest; 2178 reg_errcode_t *err; 2091 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2092 reg_syntax_t syntax, int nest, reg_errcode_t *err) 2179 2093 { 2180 2094 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 2216 2130 2217 2131 static bin_tree_t * 2218 parse_branch (regexp, preg, token, syntax, nest, err) 2219 re_string_t *regexp; 2220 regex_t *preg; 2221 re_token_t *token; 2222 reg_syntax_t syntax; 2223 int nest; 2224 reg_errcode_t *err; 2132 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, 2133 reg_syntax_t syntax, int nest, reg_errcode_t *err) 2225 2134 { 2226 2135 bin_tree_t *tree, *exp; … … 2261 2170 2262 2171 static bin_tree_t * 2263 parse_expression (regexp, preg, token, syntax, nest, err) 2264 re_string_t *regexp; 2265 regex_t *preg; 2266 re_token_t *token; 2267 reg_syntax_t syntax; 2268 int nest; 2269 reg_errcode_t *err; 2172 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, 2173 reg_syntax_t syntax, int nest, reg_errcode_t *err) 2270 2174 { 2271 2175 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 2482 2386 2483 2387 static bin_tree_t * 2484 parse_sub_exp (regexp, preg, token, syntax, nest, err) 2485 re_string_t *regexp; 2486 regex_t *preg; 2487 re_token_t *token; 2488 reg_syntax_t syntax; 2489 int nest; 2490 reg_errcode_t *err; 2388 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2389 reg_syntax_t syntax, int nest, reg_errcode_t *err) 2491 2390 { 2492 2391 re_dfa_t *dfa = (re_dfa_t *) preg->buffer; … … 2508 2407 return NULL; 2509 2408 } 2510 dfa->completed_bkref_map |= 1 << cur_nsub; 2409 2410 if (cur_nsub <= '9' - '1') 2411 dfa->completed_bkref_map |= 1 << cur_nsub; 2511 2412 2512 2413 tree = create_tree (dfa, tree, NULL, SUBEXP); … … 2523 2424 2524 2425 static bin_tree_t * 2525 parse_dup_op (elem, regexp, dfa, token, syntax, err) 2526 bin_tree_t *elem; 2527 re_string_t *regexp; 2528 re_dfa_t *dfa; 2529 re_token_t *token; 2530 reg_syntax_t syntax; 2531 reg_errcode_t *err; 2426 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, 2427 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) 2532 2428 { 2533 2429 bin_tree_t *tree = NULL, *old_tree = NULL; … … 2668 2564 2669 2565 static reg_errcode_t 2566 internal_function 2670 2567 # ifdef RE_ENABLE_I18N 2671 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) 2672 re_charset_t *mbcset; 2673 int *range_alloc; 2568 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, 2569 bracket_elem_t *start_elem, bracket_elem_t *end_elem, reg_syntax_t syntax) /* bird: syntax */ 2674 2570 # else /* not RE_ENABLE_I18N */ 2675 build_range_exp (sbcset, start_elem, end_elem) 2571 build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, 2572 bracket_elem_t *end_elem, reg_syntax_t syntax) /* bird: syntax */ 2676 2573 # endif /* not RE_ENABLE_I18N */ 2677 re_bitset_ptr_t sbcset;2678 bracket_elem_t *start_elem, *end_elem;2679 2574 { 2680 2575 unsigned int start_ch, end_ch; … … 2695 2590 # ifdef RE_ENABLE_I18N 2696 2591 { 2697 wint_t wc, start_wc, end_wc; 2592 wchar_t wc; 2593 wint_t start_wc; 2594 wint_t end_wc; 2698 2595 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; 2699 2596 … … 2712 2609 cmp_buf[0] = start_wc; 2713 2610 cmp_buf[4] = end_wc; 2714 if ( wcscoll (cmp_buf, cmp_buf + 4) > 0)2611 if ((syntax & RE_NO_EMPTY_RANGES) && wcscoll (cmp_buf, cmp_buf + 4) > 0) /* bird: RE_NO_EMPTY_RANGES */ 2715 2612 return REG_ERANGE; 2716 2613 … … 2769 2666 : 0)); 2770 2667 if (start_ch > end_ch) 2771 return REG_ERANGE; 2668 return (syntax & RE_NO_EMPTY_RANGES) ? REG_ERANGE : REG_NOERROR; /* bird: added RE_NO_EMPTY_RANGES check. */ 2669 2772 2670 /* Build the table for single byte characters. */ 2773 2671 for (ch = 0; ch < SBC_MAX; ++ch) … … 2788 2686 2789 2687 static reg_errcode_t 2688 internal_function 2790 2689 # ifdef RE_ENABLE_I18N 2791 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) 2792 re_charset_t *mbcset; 2793 int *coll_sym_alloc; 2690 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2691 int *coll_sym_alloc, const unsigned char *name) 2794 2692 # else /* not RE_ENABLE_I18N */ 2795 build_collating_symbol ( sbcset,name)2693 build_collating_symbol (bitset_t sbcset, const unsigned char *name) 2796 2694 # endif /* not RE_ENABLE_I18N */ 2797 re_bitset_ptr_t sbcset;2798 const unsigned char *name;2799 2695 { 2800 2696 size_t name_len = strlen ((const char *) name); … … 2813 2709 2814 2710 static bin_tree_t * 2815 parse_bracket_exp (regexp, dfa, token, syntax, err) 2816 re_string_t *regexp; 2817 re_dfa_t *dfa; 2818 re_token_t *token; 2819 reg_syntax_t syntax; 2820 reg_errcode_t *err; 2711 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 2712 reg_syntax_t syntax, reg_errcode_t *err) 2821 2713 { 2822 2714 #ifdef _LIBC … … 2840 2732 int32_t hash = elem_hash ((const char *) name, name_len); 2841 2733 int32_t elem = hash % table_size; 2842 int32_t second = hash % (table_size - 2); 2843 while (symb_table[2 * elem] != 0) 2844 { 2845 /* First compare the hashing value. */ 2846 if (symb_table[2 * elem] == hash 2847 /* Compare the length of the name. */ 2848 && name_len == extra[symb_table[2 * elem + 1]] 2849 /* Compare the name. */ 2850 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], 2851 name_len) == 0) 2734 if (symb_table[2 * elem] != 0) 2735 { 2736 int32_t second = hash % (table_size - 2) + 1; 2737 2738 do 2852 2739 { 2853 /* Yep, this is the entry. */ 2854 break; 2740 /* First compare the hashing value. */ 2741 if (symb_table[2 * elem] == hash 2742 /* Compare the length of the name. */ 2743 && name_len == extra[symb_table[2 * elem + 1]] 2744 /* Compare the name. */ 2745 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], 2746 name_len) == 0) 2747 { 2748 /* Yep, this is the entry. */ 2749 break; 2750 } 2751 2752 /* Next entry. */ 2753 elem += second; 2855 2754 } 2856 2857 /* Next entry. */ 2858 elem += second; 2755 while (symb_table[2 * elem] != 0); 2859 2756 } 2860 2757 return elem; … … 2938 2835 re_charset_t *mbcset; 2939 2836 int *range_alloc; 2940 re_bitset_ptr_t sbcset;2837 bitset_t sbcset; 2941 2838 bracket_elem_t *start_elem, *end_elem; 2942 2839 { … … 3021 2918 re_charset_t *mbcset; 3022 2919 int *coll_sym_alloc; 3023 re_bitset_ptr_t sbcset;2920 bitset_t sbcset; 3024 2921 const unsigned char *name; 3025 2922 { … … 3098 2995 if (MB_CUR_MAX > 1) 3099 2996 */ 3100 2997 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3101 2998 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); 3102 2999 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, … … 3106 3003 } 3107 3004 #endif 3108 sbcset = (re_bitset_ptr_t) calloc (sizeof ( unsigned int), BITSET_UINTS);3005 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3109 3006 #ifdef RE_ENABLE_I18N 3110 3007 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); … … 3217 3114 *err = build_range_exp (sbcset, 3218 3115 dfa->mb_cur_max > 1 ? mbcset : NULL, 3219 &range_alloc, &start_elem, &end_elem );3116 &range_alloc, &start_elem, &end_elem, syntax); 3220 3117 # else 3221 *err = build_range_exp (sbcset, &start_elem, &end_elem );3118 *err = build_range_exp (sbcset, &start_elem, &end_elem, syntax); 3222 3119 # endif 3223 3120 #endif /* RE_ENABLE_I18N */ … … 3316 3213 if (BE (mbc_tree == NULL, 0)) 3317 3214 goto parse_bracket_exp_espace; 3318 for (sbc_idx = 0; sbc_idx < BITSET_ UINTS; ++sbc_idx)3215 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) 3319 3216 if (sbcset[sbc_idx]) 3320 3217 break; 3321 3218 /* If there are no bits set in sbcset, there is no point 3322 3219 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ 3323 if (sbc_idx < BITSET_ UINTS)3220 if (sbc_idx < BITSET_WORDS) 3324 3221 { 3325 3222 /* Build a tree for simple bracket. */ … … 3369 3266 3370 3267 static reg_errcode_t 3371 parse_bracket_element (elem, regexp, token, token_len, dfa, syntax, 3372 accept_hyphen) 3373 bracket_elem_t *elem; 3374 re_string_t *regexp; 3375 re_token_t *token; 3376 int token_len; 3377 re_dfa_t *dfa; 3378 reg_syntax_t syntax; 3379 int accept_hyphen; 3268 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, 3269 re_token_t *token, int token_len, re_dfa_t *dfa, 3270 reg_syntax_t syntax, int accept_hyphen) 3380 3271 { 3381 3272 #ifdef RE_ENABLE_I18N … … 3415 3306 3416 3307 static reg_errcode_t 3417 parse_bracket_symbol (elem, regexp, token) 3418 bracket_elem_t *elem; 3419 re_string_t *regexp; 3420 re_token_t *token; 3308 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, 3309 re_token_t *token) 3421 3310 { 3422 3311 unsigned char ch, delim = token->opr.c; … … 3465 3354 static reg_errcode_t 3466 3355 #ifdef RE_ENABLE_I18N 3467 build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) 3468 re_charset_t *mbcset; 3469 int *equiv_class_alloc; 3356 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, 3357 int *equiv_class_alloc, const unsigned char *name) 3470 3358 #else /* not RE_ENABLE_I18N */ 3471 build_equiv_class ( sbcset,name)3359 build_equiv_class (bitset_t sbcset, const unsigned char *name) 3472 3360 #endif /* not RE_ENABLE_I18N */ 3473 re_bitset_ptr_t sbcset; 3474 const unsigned char *name; 3475 { 3476 #if defined _LIBC 3361 { 3362 #ifdef _LIBC 3477 3363 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3478 3364 if (nrules != 0) … … 3560 3446 static reg_errcode_t 3561 3447 #ifdef RE_ENABLE_I18N 3562 build_charclass ( trans, sbcset, mbcset, char_class_alloc, class_name, syntax)3563 re_charset_t *mbcset; 3564 int *char_class_alloc; 3448 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3449 re_charset_t *mbcset, int *char_class_alloc, 3450 const unsigned char *class_name, reg_syntax_t syntax) 3565 3451 #else /* not RE_ENABLE_I18N */ 3566 build_charclass (trans, sbcset, class_name, syntax) 3452 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3453 const unsigned char *class_name, reg_syntax_t syntax) 3567 3454 #endif /* not RE_ENABLE_I18N */ 3568 unsigned RE_TRANSLATE_TYPE trans;3569 re_bitset_ptr_t sbcset;3570 const unsigned char *class_name;3571 reg_syntax_t syntax;3572 3455 { 3573 3456 int i; … … 3599 3482 3600 3483 #define BUILD_CHARCLASS_LOOP(ctype_func) \ 3601 for (i = 0; i < SBC_MAX; ++i) \ 3484 do { \ 3485 if (BE (trans != NULL, 0)) \ 3602 3486 { \ 3603 if (ctype_func (i)) \ 3604 { \ 3605 int ch = trans ? trans[i] : i; \ 3606 bitset_set (sbcset, ch); \ 3607 } \ 3608 } 3487 for (i = 0; i < SBC_MAX; ++i) \ 3488 if (ctype_func (i)) \ 3489 bitset_set (sbcset, trans[i]); \ 3490 } \ 3491 else \ 3492 { \ 3493 for (i = 0; i < SBC_MAX; ++i) \ 3494 if (ctype_func (i)) \ 3495 bitset_set (sbcset, i); \ 3496 } \ 3497 } while (0) 3609 3498 3610 3499 if (strcmp (name, "alnum") == 0) 3611 BUILD_CHARCLASS_LOOP (isalnum) 3500 BUILD_CHARCLASS_LOOP (isalnum); 3612 3501 else if (strcmp (name, "cntrl") == 0) 3613 BUILD_CHARCLASS_LOOP (iscntrl) 3502 BUILD_CHARCLASS_LOOP (iscntrl); 3614 3503 else if (strcmp (name, "lower") == 0) 3615 BUILD_CHARCLASS_LOOP (islower) 3504 BUILD_CHARCLASS_LOOP (islower); 3616 3505 else if (strcmp (name, "space") == 0) 3617 BUILD_CHARCLASS_LOOP (isspace) 3506 BUILD_CHARCLASS_LOOP (isspace); 3618 3507 else if (strcmp (name, "alpha") == 0) 3619 BUILD_CHARCLASS_LOOP (isalpha) 3508 BUILD_CHARCLASS_LOOP (isalpha); 3620 3509 else if (strcmp (name, "digit") == 0) 3621 BUILD_CHARCLASS_LOOP (isdigit) 3510 BUILD_CHARCLASS_LOOP (isdigit); 3622 3511 else if (strcmp (name, "print") == 0) 3623 BUILD_CHARCLASS_LOOP (isprint) 3512 BUILD_CHARCLASS_LOOP (isprint); 3624 3513 else if (strcmp (name, "upper") == 0) 3625 BUILD_CHARCLASS_LOOP (isupper) 3514 BUILD_CHARCLASS_LOOP (isupper); 3626 3515 else if (strcmp (name, "blank") == 0) 3627 BUILD_CHARCLASS_LOOP (isblank) 3516 BUILD_CHARCLASS_LOOP (isblank); 3628 3517 else if (strcmp (name, "graph") == 0) 3629 BUILD_CHARCLASS_LOOP (isgraph) 3518 BUILD_CHARCLASS_LOOP (isgraph); 3630 3519 else if (strcmp (name, "punct") == 0) 3631 BUILD_CHARCLASS_LOOP (ispunct) 3520 BUILD_CHARCLASS_LOOP (ispunct); 3632 3521 else if (strcmp (name, "xdigit") == 0) 3633 BUILD_CHARCLASS_LOOP (isxdigit) 3522 BUILD_CHARCLASS_LOOP (isxdigit); 3634 3523 else 3635 3524 return REG_ECTYPE; … … 3639 3528 3640 3529 static bin_tree_t * 3641 build_charclass_op (dfa, trans, class_name, extra, non_match, err) 3642 re_dfa_t *dfa; 3643 unsigned RE_TRANSLATE_TYPE trans; 3644 const unsigned char *class_name; 3645 const unsigned char *extra; 3646 int non_match; 3647 reg_errcode_t *err; 3530 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, 3531 const unsigned char *class_name, 3532 const unsigned char *extra, int non_match, 3533 reg_errcode_t *err) 3648 3534 { 3649 3535 re_bitset_ptr_t sbcset; … … 3656 3542 bin_tree_t *tree; 3657 3543 3658 sbcset = (re_bitset_ptr_t) calloc (sizeof ( unsigned int), BITSET_UINTS);3544 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3659 3545 #ifdef RE_ENABLE_I18N 3660 3546 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); … … 3759 3645 3760 3646 static int 3761 fetch_number (input, token, syntax) 3762 re_string_t *input; 3763 re_token_t *token; 3764 reg_syntax_t syntax; 3647 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) 3765 3648 { 3766 3649 int num = -1; … … 3804 3687 3805 3688 static bin_tree_t * 3806 create_tree (dfa, left, right, type) 3807 re_dfa_t *dfa; 3808 bin_tree_t *left; 3809 bin_tree_t *right; 3810 re_token_type_t type; 3689 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3690 re_token_type_t type) 3811 3691 { 3812 3692 re_token_t t; … … 3816 3696 3817 3697 static bin_tree_t * 3818 create_token_tree (dfa, left, right, token) 3819 re_dfa_t *dfa; 3820 bin_tree_t *left; 3821 bin_tree_t *right; 3822 const re_token_t *token; 3698 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3699 const re_token_t *token) 3823 3700 { 3824 3701 bin_tree_t *tree; … … 3856 3733 3857 3734 static reg_errcode_t 3858 mark_opt_subexp (extra, node) 3859 void *extra; 3860 bin_tree_t *node; 3735 mark_opt_subexp (void *extra, bin_tree_t *node) 3861 3736 { 3862 3737 int idx = (int) (long) extra; … … 3898 3773 3899 3774 static bin_tree_t * 3900 duplicate_tree (root, dfa) 3901 const bin_tree_t *root; 3902 re_dfa_t *dfa; 3775 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) 3903 3776 { 3904 3777 const bin_tree_t *node;
Note:
See TracChangeset
for help on using the changeset viewer.