Ignore:
Timestamp:
Apr 8, 2007, 8:58:56 PM (18 years ago)
Author:
bird
Message:

Merged in regex from glibc 2.5. Fixes #147.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/libc/src/glibc/posix/regcomp.c

    r2260 r3059  
    11/* Extended regular expression matching and search library.
    2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
     2   Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
    33   This file is part of the GNU C Library.
    44   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
     
    2020
    2121static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
    22                                           int length, reg_syntax_t syntax);
     22                                          size_t length, reg_syntax_t syntax);
    2323static void re_compile_fastmap_iter (regex_t *bufp,
    2424                                     const re_dfastate_t *init_state,
    2525                                     char *fastmap);
    26 static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
    27 static void init_word_char (re_dfa_t *dfa);
     26static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
    2827#ifdef RE_ENABLE_I18N
    2928static void free_charset (re_charset_t *cset);
     
    3534#endif
    3635static reg_errcode_t analyze (regex_t *preg);
    37 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
    3836static reg_errcode_t preorder (bin_tree_t *root,
    3937                               reg_errcode_t (fn (void *, bin_tree_t *)),
     
    4947static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
    5048static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
    51 static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
    52                                              int top_clone_node, int root_node,
    53                                              unsigned int constraint);
    54 static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
    55                                      unsigned int constraint);
    56 static int search_duplicated_node (re_dfa_t *dfa, int org_node,
     49static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
     50static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
    5751                                   unsigned int constraint);
    5852static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
     
    6256static int fetch_number (re_string_t *input, re_token_t *token,
    6357                         reg_syntax_t syntax);
    64 static void fetch_token (re_token_t *result, re_string_t *input,
    65                          reg_syntax_t syntax);
    6658static int peek_token (re_token_t *token, re_string_t *input,
    67                         reg_syntax_t syntax);
    68 static int peek_token_bracket (re_token_t *token, re_string_t *input,
    69                                reg_syntax_t syntax);
     59                        reg_syntax_t syntax) internal_function;
    7060static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
    7161                          reg_syntax_t syntax, reg_errcode_t *err);
     
    9787                                          re_string_t *regexp,
    9888                                          re_token_t *token);
    99 #ifndef _LIBC
    100 # ifdef RE_ENABLE_I18N
    101 static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
    102                                       re_charset_t *mbcset, int *range_alloc,
    103                                       bracket_elem_t *start_elem,
    104                                       bracket_elem_t *end_elem);
    105 static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
    106                                              re_charset_t *mbcset,
    107                                              int *coll_sym_alloc,
    108                                              const unsigned char *name);
    109 # else /* not RE_ENABLE_I18N */
    110 static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
    111                                       bracket_elem_t *start_elem,
    112                                       bracket_elem_t *end_elem);
    113 static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
    114                                              const unsigned char *name);
    115 # endif /* not RE_ENABLE_I18N */
    116 #endif /* not _LIBC */
    117 #ifdef RE_ENABLE_I18N
    118 static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
     89#ifdef RE_ENABLE_I18N
     90static reg_errcode_t build_equiv_class (bitset_t sbcset,
    11991                                        re_charset_t *mbcset,
    12092                                        int *equiv_class_alloc,
    12193                                        const unsigned char *name);
    122 static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
    123                                       re_bitset_ptr_t sbcset,
     94static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
     95                                      bitset_t sbcset,
    12496                                      re_charset_t *mbcset,
    12597                                      int *char_class_alloc,
     
    12799                                      reg_syntax_t syntax);
    128100#else  /* not RE_ENABLE_I18N */
    129 static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
     101static reg_errcode_t build_equiv_class (bitset_t sbcset,
    130102                                        const unsigned char *name);
    131 static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
    132                                       re_bitset_ptr_t sbcset,
     103static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
     104                                      bitset_t sbcset,
    133105                                      const unsigned char *class_name,
    134106                                      reg_syntax_t syntax);
    135107#endif /* not RE_ENABLE_I18N */
    136108static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
    137                                        unsigned RE_TRANSLATE_TYPE trans,
     109                                       RE_TRANSLATE_TYPE trans,
    138110                                       const unsigned char *class_name,
    139111                                       const unsigned char *extra,
     
    330302
    331303static void
    332 re_compile_fastmap_iter (bufp, init_state, fastmap)
    333      regex_t *bufp;
    334      const re_dfastate_t *init_state;
    335      char *fastmap;
     304re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
     305                         char *fastmap)
    336306{
    337307  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
     
    359329                     && dfa->nodes[node].mb_partial)
    360330                *p++ = dfa->nodes[node].opr.c;
    361               memset (&state, 0, sizeof (state));
     331              memset (&state, '\0', sizeof (state));
    362332              if (mbrtowc (&wc, (const char *) buf, p - buf,
    363333                           &state) == p - buf
     
    370340      else if (type == SIMPLE_BRACKET)
    371341        {
    372           int i, j, ch;
    373           for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    374             for (j = 0; j < UINT_BITS; ++j, ++ch)
    375               if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
    376                 re_set_fastmap (fastmap, icase, ch);
     342          int i, ch;
     343          for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
     344            {
     345              int j;
     346              bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
     347              for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
     348                if (w & ((bitset_word_t) 1 << j))
     349                  re_set_fastmap (fastmap, icase, ch);
     350            }
    377351        }
    378352#ifdef RE_ENABLE_I18N
     
    393367                          'b' since 'b' is the only collation element
    394368                          which starts from 'b'.  */
    395                   int j, ch;
    396369                  const int32_t *table = (const int32_t *)
    397370                    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    398                   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    399                     for (j = 0; j < UINT_BITS; ++j, ++ch)
    400                       if (table[ch] < 0)
    401                         re_set_fastmap (fastmap, icase, ch);
     371                  for (i = 0; i < SBC_MAX; ++i)
     372                    if (table[i] < 0)
     373                      re_set_fastmap (fastmap, icase, i);
    402374                }
    403375# else
     
    419391                  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
    420392                      != (size_t) -1)
    421                   re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
     393                    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
    422394                }
    423395            }
     
    540512_STD(regerror) (errcode, preg, errbuf, errbuf_size)
    541513    int errcode;
    542     const regex_t *preg;
    543     char *errbuf;
     514    const regex_t *__restrict preg;
     515    char *__restrict errbuf;
    544516    size_t errbuf_size;
    545517{
     
    587559   it the same all the time.  UTF-8 is the preferred encoding so this is
    588560   a worthwhile optimization.  */
    589 static const bitset utf8_sb_map =
     561static const bitset_t utf8_sb_map =
    590562{
    591563  /* Set the first 128 bits.  */
    592 # if UINT_MAX == 0xffffffff
    593   0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
    594 # else
    595 #  error "Add case for new unsigned int size"
    596 # endif
     564  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
    597565};
    598566#endif
     
    745713
    746714static reg_errcode_t
    747 re_compile_internal (preg, pattern, length, syntax)
    748      regex_t *preg;
    749      const char * pattern;
    750      int length;
    751      reg_syntax_t syntax;
     715re_compile_internal (regex_t *preg, const char * pattern, size_t length,
     716                     reg_syntax_t syntax)
    752717{
    753718  reg_errcode_t err = REG_NOERROR;
     
    780745  preg->used = sizeof (re_dfa_t);
    781746
    782   /* __libc_lock_init (dfa->lock); - why init before memsetting the whole thing? */
    783 
    784747  err = init_dfa (dfa, length);
    785748  if (BE (err != REG_NOERROR, 0))
     
    790753      return err;
    791754    }
    792   __libc_lock_init (dfa->lock); /* bird */
    793 
    794755#ifdef DEBUG
     756  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
    795757  dfa->re_str = re_malloc (char, length + 1);
    796758  strncpy (dfa->re_str, pattern, length + 1);
    797759#endif
     760
     761  __libc_lock_init (dfa->lock);
    798762
    799763  err = re_string_construct (&regexp, pattern, length, preg->translate,
     
    848812
    849813static reg_errcode_t
    850 init_dfa (dfa, pat_len)
    851      re_dfa_t *dfa;
    852      int pat_len;
    853 {
    854   int table_size;
     814init_dfa (re_dfa_t *dfa, size_t pat_len)
     815{
     816  unsigned int table_size;
    855817#ifndef _LIBC
    856818  char *codeset_name;
     
    862824  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
    863825
     826  /* Avoid overflows.  */
     827  if (pat_len == SIZE_MAX)
     828    return REG_ESPACE;
     829
    864830  dfa->nodes_alloc = pat_len + 1;
    865831  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
    866832
    867   dfa->states_alloc = pat_len + 1;
    868 
    869833  /*  table_size = 2 ^ ceil(log pat_len) */
    870   for (table_size = 1; table_size > 0; table_size <<= 1)
     834  for (table_size = 1; ; table_size <<= 1)
    871835    if (table_size > pat_len)
    872836      break;
     
    915879          int i, j, ch;
    916880
    917           dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
     881          dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
    918882          if (BE (dfa->sb_char == NULL, 0))
    919883            return REG_ESPACE;
    920884
    921           /* Clear all bits by, then set those corresponding to single
    922              byte chars.  */
    923           bitset_empty (dfa->sb_char);
    924 
    925           for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    926             for (j = 0; j < UINT_BITS; ++j, ++ch)
     885          /* Set the bits corresponding to single byte chars.  */
     886          for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
     887            for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
    927888              {
    928889                wint_t wch = __btowc (ch);
    929890                if (wch != WEOF)
    930                   dfa->sb_char[i] |= 1 << j;
    931 # if !defined (_LIBC) && !defined (__INNOTEK_LIBC__)
    932                 if (isascii (ch) && wch != (wchar_t) ch)
     891                  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
     892# if !defined (_LIBC)
     893                if (isascii (ch) && wch != ch)
    933894                  dfa->map_notascii = 1;
    934895# endif
     
    948909
    949910static void
    950 init_word_char (dfa)
    951      re_dfa_t *dfa;
     911internal_function
     912init_word_char (re_dfa_t *dfa)
    952913{
    953914  int i, j, ch;
    954915  dfa->word_ops_used = 1;
    955   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    956     for (j = 0; j < UINT_BITS; ++j, ++ch)
     916  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
     917    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
    957918      if (isalnum (ch) || ch == '_')
    958         dfa->word_char[i] |= 1 << j;
     919        dfa->word_char[i] |= (bitset_word_t) 1 << j;
    959920}
    960921
     
    962923
    963924static void
    964 free_workarea_compile (preg)
    965      regex_t *preg;
     925free_workarea_compile (regex_t *preg)
    966926{
    967927  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    982942
    983943static reg_errcode_t
    984 create_initial_state (dfa)
    985      re_dfa_t *dfa;
     944create_initial_state (re_dfa_t *dfa)
    986945{
    987946  int first, i;
     
    10661025
    10671026static void
    1068 optimize_utf8 (dfa)
    1069      re_dfa_t *dfa;
     1027optimize_utf8 (re_dfa_t *dfa)
    10701028{
    10711029  int node, i, mb_chars = 0, has_period = 0;
     
    11041062        return;
    11051063      case SIMPLE_BRACKET:
    1106         /* Just double check.  */
    1107         for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
     1064        /* Just double check.  The non-ASCII range starts at 0x80.  */
     1065        assert (0x80 % BITSET_WORD_BITS == 0);
     1066        for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
    11081067          if (dfa->nodes[node].opr.sbcset[i])
    11091068            return;
     
    11351094
    11361095static reg_errcode_t
    1137 analyze (preg)
    1138      regex_t *preg;
     1096analyze (regex_t *preg)
    11391097{
    11401098  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    11991157   some hairy code in these two functions.  */
    12001158static reg_errcode_t
    1201 postorder (root, fn, extra)
    1202      bin_tree_t *root;
    1203      reg_errcode_t (fn (void *, bin_tree_t *));
    1204      void *extra;
     1159postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
     1160           void *extra)
    12051161{
    12061162  bin_tree_t *node, *prev;
     
    12331189
    12341190static reg_errcode_t
    1235 preorder (root, fn, extra)
    1236      bin_tree_t *root;
    1237      reg_errcode_t (fn (void *, bin_tree_t *));
    1238      void *extra;
     1191preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
     1192          void *extra)
    12391193{
    12401194  bin_tree_t *node;
     
    12681222   backreferences as well.  Requires a preorder visit.  */
    12691223static reg_errcode_t
    1270 optimize_subexps (extra, node)
    1271      void *extra;
    1272      bin_tree_t *node;
     1224optimize_subexps (void *extra, bin_tree_t *node)
    12731225{
    12741226  re_dfa_t *dfa = (re_dfa_t *) extra;
     
    12911243
    12921244      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
    1293       if (other_idx < 8 * sizeof (dfa->used_bkref_map))
    1294         dfa->used_bkref_map &= ~(1 << other_idx);
     1245      if (other_idx < BITSET_WORD_BITS)
     1246          dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
    12951247    }
    12961248
     
    13011253   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
    13021254static reg_errcode_t
    1303 lower_subexps (extra, node)
    1304      void *extra;
    1305      bin_tree_t *node;
     1255lower_subexps (void *extra, bin_tree_t *node)
    13061256{
    13071257  regex_t *preg = (regex_t *) extra;
     
    13251275
    13261276static bin_tree_t *
    1327 lower_subexp (err, preg, node)
    1328      reg_errcode_t *err;
    1329      regex_t *preg;
    1330      bin_tree_t *node;
     1277lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
    13311278{
    13321279  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    13401287         this case is the sed "script" /\(\)/x.  */
    13411288      && node->left != NULL
    1342       && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
    1343           || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
     1289      && (node->token.opr.idx >= BITSET_WORD_BITS
     1290          || !(dfa->used_bkref_map
     1291               & ((bitset_word_t) 1 << node->token.opr.idx))))
    13441292    return node->left;
    13451293
     
    13641312   nodes.  Requires a postorder visit.  */
    13651313static reg_errcode_t
    1366 calc_first (extra, node)
    1367      void *extra;
    1368      bin_tree_t *node;
     1314calc_first (void *extra, bin_tree_t *node)
    13691315{
    13701316  re_dfa_t *dfa = (re_dfa_t *) extra;
     
    13861332/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
    13871333static reg_errcode_t
    1388 calc_next (extra, node)
    1389      void *extra;
    1390      bin_tree_t *node;
     1334calc_next (void *extra, bin_tree_t *node)
    13911335{
    13921336  switch (node->token.type)
     
    14111355/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
    14121356static reg_errcode_t
    1413 link_nfa_nodes (extra, node)
    1414      void *extra;
    1415      bin_tree_t *node;
     1357link_nfa_nodes (void *extra, bin_tree_t *node)
    14161358{
    14171359  re_dfa_t *dfa = (re_dfa_t *) extra;
     
    14731415
    14741416static reg_errcode_t
    1475 duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
    1476                         init_constraint)
    1477      re_dfa_t *dfa;
    1478      int top_org_node, top_clone_node, root_node;
    1479      unsigned int init_constraint;
    1480 {
    1481   reg_errcode_t err;
     1417internal_function
     1418duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
     1419                        int root_node, unsigned int init_constraint)
     1420{
    14821421  int org_node, clone_node, ret;
    14831422  unsigned int constraint = init_constraint;
     
    14931432          org_dest = dfa->nexts[org_node];
    14941433          re_node_set_empty (dfa->edests + clone_node);
    1495           err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
    1496           if (BE (err != REG_NOERROR, 0))
    1497             return err;
     1434          clone_dest = duplicate_node (dfa, org_dest, constraint);
     1435          if (BE (clone_dest == -1, 0))
     1436            return REG_ESPACE;
    14981437          dfa->nexts[clone_node] = dfa->nexts[org_node];
    14991438          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
     
    15311470              constraint |= dfa->nodes[org_node].opr.ctx_type;
    15321471            }
    1533           err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
    1534           if (BE (err != REG_NOERROR, 0))
    1535             return err;
     1472          clone_dest = duplicate_node (dfa, org_dest, constraint);
     1473          if (BE (clone_dest == -1, 0))
     1474            return REG_ESPACE;
    15361475          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
    15371476          if (BE (ret < 0, 0))
     
    15491488            {
    15501489              /* There are no such a duplicated node, create a new one.  */
    1551               err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
    1552               if (BE (err != REG_NOERROR, 0))
    1553                 return err;
     1490              reg_errcode_t err;
     1491              clone_dest = duplicate_node (dfa, org_dest, constraint);
     1492              if (BE (clone_dest == -1, 0))
     1493                return REG_ESPACE;
    15541494              ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
    15551495              if (BE (ret < 0, 0))
     
    15701510
    15711511          org_dest = dfa->edests[org_node].elems[1];
    1572           err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
    1573           if (BE (err != REG_NOERROR, 0))
    1574             return err;
     1512          clone_dest = duplicate_node (dfa, org_dest, constraint);
     1513          if (BE (clone_dest == -1, 0))
     1514            return REG_ESPACE;
    15751515          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
    15761516          if (BE (ret < 0, 0))
     
    15871527
    15881528static int
    1589 search_duplicated_node (dfa, org_node, constraint)
    1590      re_dfa_t *dfa;
    1591      int org_node;
    1592      unsigned int constraint;
     1529search_duplicated_node (const re_dfa_t *dfa, int org_node,
     1530                        unsigned int constraint)
    15931531{
    15941532  int idx;
     
    16031541
    16041542/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
    1605    The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
    1606    otherwise return the error code.  */
     1543   Return the index of the new node, or -1 if insufficient storage is
     1544   available.  */
     1545
     1546static int
     1547duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
     1548{
     1549  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
     1550  if (BE (dup_idx != -1, 1))
     1551    {
     1552      dfa->nodes[dup_idx].constraint = constraint;
     1553      if (dfa->nodes[org_idx].type == ANCHOR)
     1554        dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
     1555      dfa->nodes[dup_idx].duplicated = 1;
     1556
     1557      /* Store the index of the original node.  */
     1558      dfa->org_indices[dup_idx] = org_idx;
     1559    }
     1560  return dup_idx;
     1561}
    16071562
    16081563static reg_errcode_t
    1609 duplicate_node (new_idx, dfa, org_idx, constraint)
    1610      re_dfa_t *dfa;
    1611      int *new_idx, org_idx;
    1612      unsigned int constraint;
    1613 {
    1614   int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
    1615   if (BE (dup_idx == -1, 0))
    1616     return REG_ESPACE;
    1617   dfa->nodes[dup_idx].constraint = constraint;
    1618   if (dfa->nodes[org_idx].type == ANCHOR)
    1619     dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
    1620   dfa->nodes[dup_idx].duplicated = 1;
    1621 
    1622   /* Store the index of the original node.  */
    1623   dfa->org_indices[dup_idx] = org_idx;
    1624   *new_idx = dup_idx;
    1625   return REG_NOERROR;
    1626 }
    1627 
    1628 static reg_errcode_t
    1629 calc_inveclosure (dfa)
    1630      re_dfa_t *dfa;
     1564calc_inveclosure (re_dfa_t *dfa)
    16311565{
    16321566  int src, idx, ret;
     
    16511585
    16521586static reg_errcode_t
    1653 calc_eclosure (dfa)
    1654      re_dfa_t *dfa;
     1587calc_eclosure (re_dfa_t *dfa)
    16551588{
    16561589  int node_idx, incomplete;
     
    16961629
    16971630static reg_errcode_t
    1698 calc_eclosure_iter (new_set, dfa, node, root)
    1699      re_node_set *new_set;
    1700      re_dfa_t *dfa;
    1701      int node, root;
     1631calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
    17021632{
    17031633  reg_errcode_t err;
     
    17221652      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
    17231653    {
    1724       int org_node, cur_node;
    1725       org_node = cur_node = node;
    17261654      err = duplicate_node_closure (dfa, node, node, node, constraint);
    17271655      if (BE (err != REG_NOERROR, 0))
     
    17801708
    17811709static void
    1782 fetch_token (result, input, syntax)
    1783      re_token_t *result;
    1784      re_string_t *input;
    1785      reg_syntax_t syntax;
     1710internal_function
     1711fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
    17861712{
    17871713  re_string_skip_bytes (input, peek_token (result, input, syntax));
     
    17921718
    17931719static int
    1794 peek_token (token, input, syntax)
    1795      re_token_t *token;
    1796      re_string_t *input;
    1797      reg_syntax_t syntax;
     1720internal_function
     1721peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
    17981722{
    17991723  unsigned char c;
     
    20331957
    20341958static int
    2035 peek_token_bracket (token, input, syntax)
    2036      re_token_t *token;
    2037      re_string_t *input;
    2038      reg_syntax_t syntax;
     1959internal_function
     1960peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
    20391961{
    20401962  unsigned char c;
     
    21332055
    21342056static bin_tree_t *
    2135 parse (regexp, preg, syntax, err)
    2136      re_string_t *regexp;
    2137      regex_t *preg;
    2138      reg_syntax_t syntax;
    2139      reg_errcode_t *err;
     2057parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
     2058       reg_errcode_t *err)
    21402059{
    21412060  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    21702089
    21712090static bin_tree_t *
    2172 parse_reg_exp (regexp, preg, token, syntax, nest, err)
    2173      re_string_t *regexp;
    2174      regex_t *preg;
    2175      re_token_t *token;
    2176      reg_syntax_t syntax;
    2177      int nest;
    2178      reg_errcode_t *err;
     2091parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
     2092               reg_syntax_t syntax, int nest, reg_errcode_t *err)
    21792093{
    21802094  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    22162130
    22172131static bin_tree_t *
    2218 parse_branch (regexp, preg, token, syntax, nest, err)
    2219      re_string_t *regexp;
    2220      regex_t *preg;
    2221      re_token_t *token;
    2222      reg_syntax_t syntax;
    2223      int nest;
    2224      reg_errcode_t *err;
     2132parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
     2133              reg_syntax_t syntax, int nest, reg_errcode_t *err)
    22252134{
    22262135  bin_tree_t *tree, *exp;
     
    22612170
    22622171static bin_tree_t *
    2263 parse_expression (regexp, preg, token, syntax, nest, err)
    2264      re_string_t *regexp;
    2265      regex_t *preg;
    2266      re_token_t *token;
    2267      reg_syntax_t syntax;
    2268      int nest;
    2269      reg_errcode_t *err;
     2172parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
     2173                  reg_syntax_t syntax, int nest, reg_errcode_t *err)
    22702174{
    22712175  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    24822386
    24832387static bin_tree_t *
    2484 parse_sub_exp (regexp, preg, token, syntax, nest, err)
    2485      re_string_t *regexp;
    2486      regex_t *preg;
    2487      re_token_t *token;
    2488      reg_syntax_t syntax;
    2489      int nest;
    2490      reg_errcode_t *err;
     2388parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
     2389               reg_syntax_t syntax, int nest, reg_errcode_t *err)
    24912390{
    24922391  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     
    25082407        return NULL;
    25092408    }
    2510   dfa->completed_bkref_map |= 1 << cur_nsub;
     2409
     2410  if (cur_nsub <= '9' - '1')
     2411    dfa->completed_bkref_map |= 1 << cur_nsub;
    25112412
    25122413  tree = create_tree (dfa, tree, NULL, SUBEXP);
     
    25232424
    25242425static bin_tree_t *
    2525 parse_dup_op (elem, regexp, dfa, token, syntax, err)
    2526      bin_tree_t *elem;
    2527      re_string_t *regexp;
    2528      re_dfa_t *dfa;
    2529      re_token_t *token;
    2530      reg_syntax_t syntax;
    2531      reg_errcode_t *err;
     2426parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
     2427              re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
    25322428{
    25332429  bin_tree_t *tree = NULL, *old_tree = NULL;
     
    26682564
    26692565static reg_errcode_t
     2566internal_function
    26702567# ifdef RE_ENABLE_I18N
    2671 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
    2672      re_charset_t *mbcset;
    2673      int *range_alloc;
     2568build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
     2569                 bracket_elem_t *start_elem, bracket_elem_t *end_elem, reg_syntax_t syntax) /* bird: syntax */
    26742570# else /* not RE_ENABLE_I18N */
    2675 build_range_exp (sbcset, start_elem, end_elem)
     2571build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
     2572                 bracket_elem_t *end_elem, reg_syntax_t syntax)                             /* bird: syntax */
    26762573# endif /* not RE_ENABLE_I18N */
    2677      re_bitset_ptr_t sbcset;
    2678      bracket_elem_t *start_elem, *end_elem;
    26792574{
    26802575  unsigned int start_ch, end_ch;
     
    26952590# ifdef RE_ENABLE_I18N
    26962591  {
    2697     wint_t wc, start_wc, end_wc;
     2592    wchar_t wc;
     2593    wint_t start_wc;
     2594    wint_t end_wc;
    26982595    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
    26992596
     
    27122609    cmp_buf[0] = start_wc;
    27132610    cmp_buf[4] = end_wc;
    2714     if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
     2611    if ((syntax & RE_NO_EMPTY_RANGES) && wcscoll (cmp_buf, cmp_buf + 4) > 0) /* bird: RE_NO_EMPTY_RANGES */
    27152612      return REG_ERANGE;
    27162613
     
    27692666                 : 0));
    27702667    if (start_ch > end_ch)
    2771       return REG_ERANGE;
     2668      return (syntax & RE_NO_EMPTY_RANGES) ? REG_ERANGE : REG_NOERROR; /* bird: added RE_NO_EMPTY_RANGES check. */
     2669
    27722670    /* Build the table for single byte characters.  */
    27732671    for (ch = 0; ch < SBC_MAX; ++ch)
     
    27882686
    27892687static reg_errcode_t
     2688internal_function
    27902689# ifdef RE_ENABLE_I18N
    2791 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
    2792      re_charset_t *mbcset;
    2793      int *coll_sym_alloc;
     2690build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
     2691                        int *coll_sym_alloc, const unsigned char *name)
    27942692# else /* not RE_ENABLE_I18N */
    2795 build_collating_symbol (sbcset, name)
     2693build_collating_symbol (bitset_t sbcset, const unsigned char *name)
    27962694# endif /* not RE_ENABLE_I18N */
    2797      re_bitset_ptr_t sbcset;
    2798      const unsigned char *name;
    27992695{
    28002696  size_t name_len = strlen ((const char *) name);
     
    28132709
    28142710static bin_tree_t *
    2815 parse_bracket_exp (regexp, dfa, token, syntax, err)
    2816      re_string_t *regexp;
    2817      re_dfa_t *dfa;
    2818      re_token_t *token;
    2819      reg_syntax_t syntax;
    2820      reg_errcode_t *err;
     2711parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
     2712                   reg_syntax_t syntax, reg_errcode_t *err)
    28212713{
    28222714#ifdef _LIBC
     
    28402732      int32_t hash = elem_hash ((const char *) name, name_len);
    28412733      int32_t elem = hash % table_size;
    2842       int32_t second = hash % (table_size - 2);
    2843       while (symb_table[2 * elem] != 0)
    2844         {
    2845           /* First compare the hashing value.  */
    2846           if (symb_table[2 * elem] == hash
    2847               /* Compare the length of the name.  */
    2848               && name_len == extra[symb_table[2 * elem + 1]]
    2849               /* Compare the name.  */
    2850               && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
    2851                          name_len) == 0)
     2734      if (symb_table[2 * elem] != 0)
     2735        {
     2736          int32_t second = hash % (table_size - 2) + 1;
     2737
     2738          do
    28522739            {
    2853               /* Yep, this is the entry.  */
    2854               break;
     2740              /* First compare the hashing value.  */
     2741              if (symb_table[2 * elem] == hash
     2742                  /* Compare the length of the name.  */
     2743                  && name_len == extra[symb_table[2 * elem + 1]]
     2744                  /* Compare the name.  */
     2745                  && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
     2746                             name_len) == 0)
     2747                {
     2748                  /* Yep, this is the entry.  */
     2749                  break;
     2750                }
     2751
     2752              /* Next entry.  */
     2753              elem += second;
    28552754            }
    2856 
    2857           /* Next entry.  */
    2858           elem += second;
     2755          while (symb_table[2 * elem] != 0);
    28592756        }
    28602757      return elem;
     
    29382835         re_charset_t *mbcset;
    29392836         int *range_alloc;
    2940          re_bitset_ptr_t sbcset;
     2837         bitset_t sbcset;
    29412838         bracket_elem_t *start_elem, *end_elem;
    29422839    {
     
    30212918         re_charset_t *mbcset;
    30222919         int *coll_sym_alloc;
    3023          re_bitset_ptr_t sbcset;
     2920         bitset_t sbcset;
    30242921         const unsigned char *name;
    30252922    {
     
    30982995      if (MB_CUR_MAX > 1)
    30992996      */
    3100         collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
     2997      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
    31012998      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
    31022999      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
     
    31063003    }
    31073004#endif
    3108   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
     3005  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
    31093006#ifdef RE_ENABLE_I18N
    31103007  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
     
    32173114          *err = build_range_exp (sbcset,
    32183115                                  dfa->mb_cur_max > 1 ? mbcset : NULL,
    3219                                   &range_alloc, &start_elem, &end_elem);
     3116                                  &range_alloc, &start_elem, &end_elem, syntax);
    32203117# else
    3221           *err = build_range_exp (sbcset, &start_elem, &end_elem);
     3118          *err = build_range_exp (sbcset, &start_elem, &end_elem, syntax);
    32223119# endif
    32233120#endif /* RE_ENABLE_I18N */
     
    33163213      if (BE (mbc_tree == NULL, 0))
    33173214        goto parse_bracket_exp_espace;
    3318       for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
     3215      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
    33193216        if (sbcset[sbc_idx])
    33203217          break;
    33213218      /* If there are no bits set in sbcset, there is no point
    33223219         of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
    3323       if (sbc_idx < BITSET_UINTS)
     3220      if (sbc_idx < BITSET_WORDS)
    33243221        {
    33253222          /* Build a tree for simple bracket.  */
     
    33693266
    33703267static reg_errcode_t
    3371 parse_bracket_element (elem, regexp, token, token_len, dfa, syntax,
    3372                        accept_hyphen)
    3373      bracket_elem_t *elem;
    3374      re_string_t *regexp;
    3375      re_token_t *token;
    3376      int token_len;
    3377      re_dfa_t *dfa;
    3378      reg_syntax_t syntax;
    3379      int accept_hyphen;
     3268parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
     3269                       re_token_t *token, int token_len, re_dfa_t *dfa,
     3270                       reg_syntax_t syntax, int accept_hyphen)
    33803271{
    33813272#ifdef RE_ENABLE_I18N
     
    34153306
    34163307static reg_errcode_t
    3417 parse_bracket_symbol (elem, regexp, token)
    3418      bracket_elem_t *elem;
    3419      re_string_t *regexp;
    3420      re_token_t *token;
     3308parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
     3309                      re_token_t *token)
    34213310{
    34223311  unsigned char ch, delim = token->opr.c;
     
    34653354static reg_errcode_t
    34663355#ifdef RE_ENABLE_I18N
    3467 build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
    3468      re_charset_t *mbcset;
    3469      int *equiv_class_alloc;
     3356build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
     3357                   int *equiv_class_alloc, const unsigned char *name)
    34703358#else /* not RE_ENABLE_I18N */
    3471 build_equiv_class (sbcset, name)
     3359build_equiv_class (bitset_t sbcset, const unsigned char *name)
    34723360#endif /* not RE_ENABLE_I18N */
    3473      re_bitset_ptr_t sbcset;
    3474      const unsigned char *name;
    3475 {
    3476 #if defined _LIBC
     3361{
     3362#ifdef _LIBC
    34773363  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
    34783364  if (nrules != 0)
     
    35603446static reg_errcode_t
    35613447#ifdef RE_ENABLE_I18N
    3562 build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
    3563      re_charset_t *mbcset;
    3564      int *char_class_alloc;
     3448build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
     3449                 re_charset_t *mbcset, int *char_class_alloc,
     3450                 const unsigned char *class_name, reg_syntax_t syntax)
    35653451#else /* not RE_ENABLE_I18N */
    3566 build_charclass (trans, sbcset, class_name, syntax)
     3452build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
     3453                 const unsigned char *class_name, reg_syntax_t syntax)
    35673454#endif /* not RE_ENABLE_I18N */
    3568      unsigned RE_TRANSLATE_TYPE trans;
    3569      re_bitset_ptr_t sbcset;
    3570      const unsigned char *class_name;
    3571      reg_syntax_t syntax;
    35723455{
    35733456  int i;
     
    35993482
    36003483#define BUILD_CHARCLASS_LOOP(ctype_func)        \
    3601     for (i = 0; i < SBC_MAX; ++i)               \
     3484  do {                                          \
     3485    if (BE (trans != NULL, 0))                  \
    36023486      {                                         \
    3603         if (ctype_func (i))                     \
    3604           {                                     \
    3605             int ch = trans ? trans[i] : i;      \
    3606             bitset_set (sbcset, ch);            \
    3607           }                                     \
    3608       }
     3487        for (i = 0; i < SBC_MAX; ++i)           \
     3488          if (ctype_func (i))                   \
     3489            bitset_set (sbcset, trans[i]);      \
     3490      }                                         \
     3491    else                                        \
     3492      {                                         \
     3493        for (i = 0; i < SBC_MAX; ++i)           \
     3494          if (ctype_func (i))                   \
     3495            bitset_set (sbcset, i);             \
     3496      }                                         \
     3497  } while (0)
    36093498
    36103499  if (strcmp (name, "alnum") == 0)
    3611     BUILD_CHARCLASS_LOOP (isalnum)
     3500    BUILD_CHARCLASS_LOOP (isalnum);
    36123501  else if (strcmp (name, "cntrl") == 0)
    3613     BUILD_CHARCLASS_LOOP (iscntrl)
     3502    BUILD_CHARCLASS_LOOP (iscntrl);
    36143503  else if (strcmp (name, "lower") == 0)
    3615     BUILD_CHARCLASS_LOOP (islower)
     3504    BUILD_CHARCLASS_LOOP (islower);
    36163505  else if (strcmp (name, "space") == 0)
    3617     BUILD_CHARCLASS_LOOP (isspace)
     3506    BUILD_CHARCLASS_LOOP (isspace);
    36183507  else if (strcmp (name, "alpha") == 0)
    3619     BUILD_CHARCLASS_LOOP (isalpha)
     3508    BUILD_CHARCLASS_LOOP (isalpha);
    36203509  else if (strcmp (name, "digit") == 0)
    3621     BUILD_CHARCLASS_LOOP (isdigit)
     3510    BUILD_CHARCLASS_LOOP (isdigit);
    36223511  else if (strcmp (name, "print") == 0)
    3623     BUILD_CHARCLASS_LOOP (isprint)
     3512    BUILD_CHARCLASS_LOOP (isprint);
    36243513  else if (strcmp (name, "upper") == 0)
    3625     BUILD_CHARCLASS_LOOP (isupper)
     3514    BUILD_CHARCLASS_LOOP (isupper);
    36263515  else if (strcmp (name, "blank") == 0)
    3627     BUILD_CHARCLASS_LOOP (isblank)
     3516    BUILD_CHARCLASS_LOOP (isblank);
    36283517  else if (strcmp (name, "graph") == 0)
    3629     BUILD_CHARCLASS_LOOP (isgraph)
     3518    BUILD_CHARCLASS_LOOP (isgraph);
    36303519  else if (strcmp (name, "punct") == 0)
    3631     BUILD_CHARCLASS_LOOP (ispunct)
     3520    BUILD_CHARCLASS_LOOP (ispunct);
    36323521  else if (strcmp (name, "xdigit") == 0)
    3633     BUILD_CHARCLASS_LOOP (isxdigit)
     3522    BUILD_CHARCLASS_LOOP (isxdigit);
    36343523  else
    36353524    return REG_ECTYPE;
     
    36393528
    36403529static bin_tree_t *
    3641 build_charclass_op (dfa, trans, class_name, extra, non_match, err)
    3642      re_dfa_t *dfa;
    3643      unsigned RE_TRANSLATE_TYPE trans;
    3644      const unsigned char *class_name;
    3645      const unsigned char *extra;
    3646      int non_match;
    3647      reg_errcode_t *err;
     3530build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
     3531                    const unsigned char *class_name,
     3532                    const unsigned char *extra, int non_match,
     3533                    reg_errcode_t *err)
    36483534{
    36493535  re_bitset_ptr_t sbcset;
     
    36563542  bin_tree_t *tree;
    36573543
    3658   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
     3544  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
    36593545#ifdef RE_ENABLE_I18N
    36603546  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
     
    37593645
    37603646static int
    3761 fetch_number (input, token, syntax)
    3762      re_string_t *input;
    3763      re_token_t *token;
    3764      reg_syntax_t syntax;
     3647fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
    37653648{
    37663649  int num = -1;
     
    38043687
    38053688static bin_tree_t *
    3806 create_tree (dfa, left, right, type)
    3807      re_dfa_t *dfa;
    3808      bin_tree_t *left;
    3809      bin_tree_t *right;
    3810      re_token_type_t type;
     3689create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
     3690             re_token_type_t type)
    38113691{
    38123692  re_token_t t;
     
    38163696
    38173697static bin_tree_t *
    3818 create_token_tree (dfa, left, right, token)
    3819      re_dfa_t *dfa;
    3820      bin_tree_t *left;
    3821      bin_tree_t *right;
    3822      const re_token_t *token;
     3698create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
     3699                   const re_token_t *token)
    38233700{
    38243701  bin_tree_t *tree;
     
    38563733
    38573734static reg_errcode_t
    3858 mark_opt_subexp (extra, node)
    3859      void *extra;
    3860      bin_tree_t *node;
     3735mark_opt_subexp (void *extra, bin_tree_t *node)
    38613736{
    38623737  int idx = (int) (long) extra;
     
    38983773
    38993774static bin_tree_t *
    3900 duplicate_tree (root, dfa)
    3901      const bin_tree_t *root;
    3902      re_dfa_t *dfa;
     3775duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
    39033776{
    39043777  const bin_tree_t *node;
Note: See TracChangeset for help on using the changeset viewer.