[3076] | 1 | /* dfa.h - declarations for GNU deterministic regexp compiler
|
---|
| 2 | Copyright (C) 1988, 1998, 2002, 2004 Free Software Foundation, Inc.
|
---|
| 3 |
|
---|
| 4 | This program is free software; you can redistribute it and/or modify
|
---|
| 5 | it under the terms of the GNU General Public License as published by
|
---|
| 6 | the Free Software Foundation; either version 2, or (at your option)
|
---|
| 7 | any later version.
|
---|
| 8 |
|
---|
| 9 | This program is distributed in the hope that it will be useful,
|
---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 12 | GNU General Public License for more details.
|
---|
| 13 |
|
---|
| 14 | You should have received a copy of the GNU General Public License
|
---|
| 15 | along with this program; if not, write to the Free Software
|
---|
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
|
---|
| 17 |
|
---|
| 18 | /* Written June, 1988 by Mike Haertel */
|
---|
| 19 |
|
---|
| 20 | /* FIXME:
|
---|
| 21 | 2. We should not export so much of the DFA internals.
|
---|
| 22 | In addition to clobbering modularity, we eat up valuable
|
---|
| 23 | name space. */
|
---|
| 24 |
|
---|
| 25 | #ifdef __STDC__
|
---|
| 26 | # ifndef _PTR_T
|
---|
| 27 | # define _PTR_T
|
---|
| 28 | typedef void * ptr_t;
|
---|
| 29 | # endif
|
---|
| 30 | #else
|
---|
| 31 | # ifndef _PTR_T
|
---|
| 32 | # define _PTR_T
|
---|
| 33 | typedef char * ptr_t;
|
---|
| 34 | # endif
|
---|
| 35 | #endif
|
---|
| 36 |
|
---|
| 37 | #ifdef PARAMS
|
---|
| 38 | # undef PARAMS
|
---|
| 39 | #endif
|
---|
| 40 | #if PROTOTYPES
|
---|
| 41 | # define PARAMS(x) x
|
---|
| 42 | #else
|
---|
| 43 | # define PARAMS(x) ()
|
---|
| 44 | #endif
|
---|
| 45 |
|
---|
| 46 | /* Number of bits in an unsigned char. */
|
---|
| 47 | #ifndef CHARBITS
|
---|
| 48 | #define CHARBITS 8
|
---|
| 49 | #endif
|
---|
| 50 |
|
---|
| 51 | /* First integer value that is greater than any character code. */
|
---|
| 52 | #define NOTCHAR (1 << CHARBITS)
|
---|
| 53 |
|
---|
| 54 | /* INTBITS need not be exact, just a lower bound. */
|
---|
| 55 | #ifndef INTBITS
|
---|
| 56 | #define INTBITS (CHARBITS * sizeof (int))
|
---|
| 57 | #endif
|
---|
| 58 |
|
---|
| 59 | /* Number of ints required to hold a bit for every character. */
|
---|
| 60 | #define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
|
---|
| 61 |
|
---|
| 62 | /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
|
---|
| 63 | typedef int charclass[CHARCLASS_INTS];
|
---|
| 64 |
|
---|
| 65 | /* The regexp is parsed into an array of tokens in postfix form. Some tokens
|
---|
| 66 | are operators and others are terminal symbols. Most (but not all) of these
|
---|
| 67 | codes are returned by the lexical analyzer. */
|
---|
| 68 |
|
---|
| 69 | typedef enum
|
---|
| 70 | {
|
---|
| 71 | END = -1, /* END is a terminal symbol that matches the
|
---|
| 72 | end of input; any value of END or less in
|
---|
| 73 | the parse tree is such a symbol. Accepting
|
---|
| 74 | states of the DFA are those that would have
|
---|
| 75 | a transition on END. */
|
---|
| 76 |
|
---|
| 77 | /* Ordinary character values are terminal symbols that match themselves. */
|
---|
| 78 |
|
---|
| 79 | EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
|
---|
| 80 | the empty string. */
|
---|
| 81 |
|
---|
| 82 | BACKREF, /* BACKREF is generated by \<digit>; it
|
---|
| 83 | it not completely handled. If the scanner
|
---|
| 84 | detects a transition on backref, it returns
|
---|
| 85 | a kind of "semi-success" indicating that
|
---|
| 86 | the match will have to be verified with
|
---|
| 87 | a backtracking matcher. */
|
---|
| 88 |
|
---|
| 89 | BEGLINE, /* BEGLINE is a terminal symbol that matches
|
---|
| 90 | the empty string if it is at the beginning
|
---|
| 91 | of a line. */
|
---|
| 92 |
|
---|
| 93 | ENDLINE, /* ENDLINE is a terminal symbol that matches
|
---|
| 94 | the empty string if it is at the end of
|
---|
| 95 | a line. */
|
---|
| 96 |
|
---|
| 97 | BEGWORD, /* BEGWORD is a terminal symbol that matches
|
---|
| 98 | the empty string if it is at the beginning
|
---|
| 99 | of a word. */
|
---|
| 100 |
|
---|
| 101 | ENDWORD, /* ENDWORD is a terminal symbol that matches
|
---|
| 102 | the empty string if it is at the end of
|
---|
| 103 | a word. */
|
---|
| 104 |
|
---|
| 105 | LIMWORD, /* LIMWORD is a terminal symbol that matches
|
---|
| 106 | the empty string if it is at the beginning
|
---|
| 107 | or the end of a word. */
|
---|
| 108 |
|
---|
| 109 | NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
|
---|
| 110 | matches the empty string if it is not at
|
---|
| 111 | the beginning or end of a word. */
|
---|
| 112 |
|
---|
| 113 | QMARK, /* QMARK is an operator of one argument that
|
---|
| 114 | matches zero or one occurences of its
|
---|
| 115 | argument. */
|
---|
| 116 |
|
---|
| 117 | STAR, /* STAR is an operator of one argument that
|
---|
| 118 | matches the Kleene closure (zero or more
|
---|
| 119 | occurrences) of its argument. */
|
---|
| 120 |
|
---|
| 121 | PLUS, /* PLUS is an operator of one argument that
|
---|
| 122 | matches the positive closure (one or more
|
---|
| 123 | occurrences) of its argument. */
|
---|
| 124 |
|
---|
| 125 | REPMN, /* REPMN is a lexical token corresponding
|
---|
| 126 | to the {m,n} construct. REPMN never
|
---|
| 127 | appears in the compiled token vector. */
|
---|
| 128 |
|
---|
| 129 | CAT, /* CAT is an operator of two arguments that
|
---|
| 130 | matches the concatenation of its
|
---|
| 131 | arguments. CAT is never returned by the
|
---|
| 132 | lexical analyzer. */
|
---|
| 133 |
|
---|
| 134 | OR, /* OR is an operator of two arguments that
|
---|
| 135 | matches either of its arguments. */
|
---|
| 136 |
|
---|
| 137 | ORTOP, /* OR at the toplevel in the parse tree.
|
---|
| 138 | This is used for a boyer-moore heuristic. */
|
---|
| 139 |
|
---|
| 140 | LPAREN, /* LPAREN never appears in the parse tree,
|
---|
| 141 | it is only a lexeme. */
|
---|
| 142 |
|
---|
| 143 | RPAREN, /* RPAREN never appears in the parse tree. */
|
---|
| 144 |
|
---|
| 145 | CRANGE, /* CRANGE never appears in the parse tree.
|
---|
| 146 | It stands for a character range that can
|
---|
| 147 | match a string of one or more characters.
|
---|
| 148 | For example, [a-z] can match "ch" in
|
---|
| 149 | a Spanish locale. */
|
---|
| 150 |
|
---|
| 151 | #ifdef MBS_SUPPORT
|
---|
| 152 | ANYCHAR, /* ANYCHAR is a terminal symbol that matches
|
---|
| 153 | any multibyte (or single byte) characters.
|
---|
| 154 | It is used only if MB_CUR_MAX > 1. */
|
---|
| 155 |
|
---|
| 156 | MBCSET, /* MBCSET is similar to CSET, but for
|
---|
| 157 | multibyte characters. */
|
---|
| 158 | #endif /* MBS_SUPPORT */
|
---|
| 159 |
|
---|
| 160 | CSET /* CSET and (and any value greater) is a
|
---|
| 161 | terminal symbol that matches any of a
|
---|
| 162 | class of characters. */
|
---|
| 163 | } token;
|
---|
| 164 |
|
---|
| 165 | /* Sets are stored in an array in the compiled dfa; the index of the
|
---|
| 166 | array corresponding to a given set token is given by SET_INDEX(t). */
|
---|
| 167 | #define SET_INDEX(t) ((t) - CSET)
|
---|
| 168 |
|
---|
| 169 | /* Sometimes characters can only be matched depending on the surrounding
|
---|
| 170 | context. Such context decisions depend on what the previous character
|
---|
| 171 | was, and the value of the current (lookahead) character. Context
|
---|
| 172 | dependent constraints are encoded as 8 bit integers. Each bit that
|
---|
| 173 | is set indicates that the constraint succeeds in the corresponding
|
---|
| 174 | context.
|
---|
| 175 |
|
---|
| 176 | bit 7 - previous and current are newlines
|
---|
| 177 | bit 6 - previous was newline, current isn't
|
---|
| 178 | bit 5 - previous wasn't newline, current is
|
---|
| 179 | bit 4 - neither previous nor current is a newline
|
---|
| 180 | bit 3 - previous and current are word-constituents
|
---|
| 181 | bit 2 - previous was word-constituent, current isn't
|
---|
| 182 | bit 1 - previous wasn't word-constituent, current is
|
---|
| 183 | bit 0 - neither previous nor current is word-constituent
|
---|
| 184 |
|
---|
| 185 | Word-constituent characters are those that satisfy isalnum().
|
---|
| 186 |
|
---|
| 187 | The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
|
---|
| 188 | succeeds in a particular context. Prevn is true if the previous character
|
---|
| 189 | was a newline, currn is true if the lookahead character is a newline.
|
---|
| 190 | Prevl and currl similarly depend upon whether the previous and current
|
---|
| 191 | characters are word-constituent letters. */
|
---|
| 192 | #define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
---|
| 193 | ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
|
---|
| 194 | #define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
|
---|
| 195 | ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
|
---|
| 196 | #define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
|
---|
| 197 | (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
---|
| 198 | && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
|
---|
| 199 |
|
---|
| 200 | /* The following macros give information about what a constraint depends on. */
|
---|
| 201 | #define PREV_NEWLINE_DEPENDENT(constraint) \
|
---|
| 202 | (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
|
---|
| 203 | #define PREV_LETTER_DEPENDENT(constraint) \
|
---|
| 204 | (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
|
---|
| 205 |
|
---|
| 206 | /* Tokens that match the empty string subject to some constraint actually
|
---|
| 207 | work by applying that constraint to determine what may follow them,
|
---|
| 208 | taking into account what has gone before. The following values are
|
---|
| 209 | the constraints corresponding to the special tokens previously defined. */
|
---|
| 210 | #define NO_CONSTRAINT 0xff
|
---|
| 211 | #define BEGLINE_CONSTRAINT 0xcf
|
---|
| 212 | #define ENDLINE_CONSTRAINT 0xaf
|
---|
| 213 | #define BEGWORD_CONSTRAINT 0xf2
|
---|
| 214 | #define ENDWORD_CONSTRAINT 0xf4
|
---|
| 215 | #define LIMWORD_CONSTRAINT 0xf6
|
---|
| 216 | #define NOTLIMWORD_CONSTRAINT 0xf9
|
---|
| 217 |
|
---|
| 218 | /* States of the recognizer correspond to sets of positions in the parse
|
---|
| 219 | tree, together with the constraints under which they may be matched.
|
---|
| 220 | So a position is encoded as an index into the parse tree together with
|
---|
| 221 | a constraint. */
|
---|
| 222 | typedef struct
|
---|
| 223 | {
|
---|
| 224 | unsigned index; /* Index into the parse array. */
|
---|
| 225 | unsigned constraint; /* Constraint for matching this position. */
|
---|
| 226 | } position;
|
---|
| 227 |
|
---|
| 228 | /* Sets of positions are stored as arrays. */
|
---|
| 229 | typedef struct
|
---|
| 230 | {
|
---|
| 231 | position *elems; /* Elements of this position set. */
|
---|
| 232 | int nelem; /* Number of elements in this set. */
|
---|
| 233 | } position_set;
|
---|
| 234 |
|
---|
| 235 | /* A state of the dfa consists of a set of positions, some flags,
|
---|
| 236 | and the token value of the lowest-numbered position of the state that
|
---|
| 237 | contains an END token. */
|
---|
| 238 | typedef struct
|
---|
| 239 | {
|
---|
| 240 | int hash; /* Hash of the positions of this state. */
|
---|
| 241 | position_set elems; /* Positions this state could match. */
|
---|
| 242 | char newline; /* True if previous state matched newline. */
|
---|
| 243 | char letter; /* True if previous state matched a letter. */
|
---|
| 244 | char backref; /* True if this state matches a \<digit>. */
|
---|
| 245 | unsigned char constraint; /* Constraint for this state to accept. */
|
---|
| 246 | int first_end; /* Token value of the first END in elems. */
|
---|
| 247 | #ifdef MBS_SUPPORT
|
---|
| 248 | position_set mbps; /* Positions which can match multibyte
|
---|
| 249 | characters. e.g. period.
|
---|
| 250 | These staff are used only if
|
---|
| 251 | MB_CUR_MAX > 1. */
|
---|
| 252 | #endif
|
---|
| 253 | } dfa_state;
|
---|
| 254 |
|
---|
| 255 | /* Element of a list of strings, at least one of which is known to
|
---|
| 256 | appear in any R.E. matching the DFA. */
|
---|
| 257 | struct dfamust
|
---|
| 258 | {
|
---|
| 259 | int exact;
|
---|
| 260 | char *must;
|
---|
| 261 | struct dfamust *next;
|
---|
| 262 | };
|
---|
| 263 |
|
---|
| 264 | #ifdef MBS_SUPPORT
|
---|
| 265 | /* A bracket operator.
|
---|
| 266 | e.g. [a-c], [[:alpha:]], etc. */
|
---|
| 267 | struct mb_char_classes
|
---|
| 268 | {
|
---|
| 269 | int invert;
|
---|
| 270 | wchar_t *chars; /* Normal characters. */
|
---|
| 271 | int nchars;
|
---|
| 272 | wctype_t *ch_classes; /* Character classes. */
|
---|
| 273 | int nch_classes;
|
---|
| 274 | wchar_t *range_sts; /* Range characters (start of the range). */
|
---|
| 275 | wchar_t *range_ends; /* Range characters (end of the range). */
|
---|
| 276 | int nranges;
|
---|
| 277 | char **equivs; /* Equivalent classes. */
|
---|
| 278 | int nequivs;
|
---|
| 279 | char **coll_elems;
|
---|
| 280 | int ncoll_elems; /* Collating elements. */
|
---|
| 281 | };
|
---|
| 282 | #endif
|
---|
| 283 |
|
---|
| 284 | /* A compiled regular expression. */
|
---|
| 285 | struct dfa
|
---|
| 286 | {
|
---|
| 287 | /* Stuff built by the scanner. */
|
---|
| 288 | charclass *charclasses; /* Array of character sets for CSET tokens. */
|
---|
| 289 | int cindex; /* Index for adding new charclasses. */
|
---|
| 290 | int calloc; /* Number of charclasses currently allocated. */
|
---|
| 291 |
|
---|
| 292 | /* Stuff built by the parser. */
|
---|
| 293 | token *tokens; /* Postfix parse array. */
|
---|
| 294 | int tindex; /* Index for adding new tokens. */
|
---|
| 295 | int talloc; /* Number of tokens currently allocated. */
|
---|
| 296 | int depth; /* Depth required of an evaluation stack
|
---|
| 297 | used for depth-first traversal of the
|
---|
| 298 | parse tree. */
|
---|
| 299 | int nleaves; /* Number of leaves on the parse tree. */
|
---|
| 300 | int nregexps; /* Count of parallel regexps being built
|
---|
| 301 | with dfaparse(). */
|
---|
| 302 | #ifdef MBS_SUPPORT
|
---|
| 303 | /* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments. */
|
---|
| 304 | int nmultibyte_prop;
|
---|
| 305 | int *multibyte_prop;
|
---|
| 306 | /* The value of multibyte_prop[i] is defined by following rule.
|
---|
| 307 | if tokens[i] < NOTCHAR
|
---|
| 308 | bit 1 : tokens[i] is a single byte character, or the last-byte of
|
---|
| 309 | a multibyte character.
|
---|
| 310 | bit 0 : tokens[i] is a single byte character, or the 1st-byte of
|
---|
| 311 | a multibyte character.
|
---|
| 312 | if tokens[i] = MBCSET
|
---|
| 313 | ("the index of mbcsets correspnd to this operator" << 2) + 3
|
---|
| 314 |
|
---|
| 315 | e.g.
|
---|
| 316 | tokens
|
---|
| 317 | = 'single_byte_a', 'multi_byte_A', single_byte_b'
|
---|
| 318 | = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
|
---|
| 319 | multibyte_prop
|
---|
| 320 | = 3 , 1 , 0 , 2 , 3
|
---|
| 321 | */
|
---|
| 322 |
|
---|
| 323 | /* Array of the bracket expressoin in the DFA. */
|
---|
| 324 | struct mb_char_classes *mbcsets;
|
---|
| 325 | int nmbcsets;
|
---|
| 326 | int mbcsets_alloc;
|
---|
| 327 | #endif
|
---|
| 328 |
|
---|
| 329 | /* Stuff owned by the state builder. */
|
---|
| 330 | dfa_state *states; /* States of the dfa. */
|
---|
| 331 | int sindex; /* Index for adding new states. */
|
---|
| 332 | int salloc; /* Number of states currently allocated. */
|
---|
| 333 |
|
---|
| 334 | /* Stuff built by the structure analyzer. */
|
---|
| 335 | position_set *follows; /* Array of follow sets, indexed by position
|
---|
| 336 | index. The follow of a position is the set
|
---|
| 337 | of positions containing characters that
|
---|
| 338 | could conceivably follow a character
|
---|
| 339 | matching the given position in a string
|
---|
| 340 | matching the regexp. Allocated to the
|
---|
| 341 | maximum possible position index. */
|
---|
| 342 | int searchflag; /* True if we are supposed to build a searching
|
---|
| 343 | as opposed to an exact matcher. A searching
|
---|
| 344 | matcher finds the first and shortest string
|
---|
| 345 | matching a regexp anywhere in the buffer,
|
---|
| 346 | whereas an exact matcher finds the longest
|
---|
| 347 | string matching, but anchored to the
|
---|
| 348 | beginning of the buffer. */
|
---|
| 349 |
|
---|
| 350 | /* Stuff owned by the executor. */
|
---|
| 351 | int tralloc; /* Number of transition tables that have
|
---|
| 352 | slots so far. */
|
---|
| 353 | int trcount; /* Number of transition tables that have
|
---|
| 354 | actually been built. */
|
---|
| 355 | int **trans; /* Transition tables for states that can
|
---|
| 356 | never accept. If the transitions for a
|
---|
| 357 | state have not yet been computed, or the
|
---|
| 358 | state could possibly accept, its entry in
|
---|
| 359 | this table is NULL. */
|
---|
| 360 | int **realtrans; /* Trans always points to realtrans + 1; this
|
---|
| 361 | is so trans[-1] can contain NULL. */
|
---|
| 362 | int **fails; /* Transition tables after failing to accept
|
---|
| 363 | on a state that potentially could do so. */
|
---|
| 364 | int *success; /* Table of acceptance conditions used in
|
---|
| 365 | dfaexec and computed in build_state. */
|
---|
| 366 | int *newlines; /* Transitions on newlines. The entry for a
|
---|
| 367 | newline in any transition table is always
|
---|
| 368 | -1 so we can count lines without wasting
|
---|
| 369 | too many cycles. The transition for a
|
---|
| 370 | newline is stored separately and handled
|
---|
| 371 | as a special case. Newline is also used
|
---|
| 372 | as a sentinel at the end of the buffer. */
|
---|
| 373 | struct dfamust *musts; /* List of strings, at least one of which
|
---|
| 374 | is known to appear in any r.e. matching
|
---|
| 375 | the dfa. */
|
---|
| 376 | };
|
---|
| 377 |
|
---|
| 378 | /* Some macros for user access to dfa internals. */
|
---|
| 379 |
|
---|
| 380 | /* ACCEPTING returns true if s could possibly be an accepting state of r. */
|
---|
| 381 | #define ACCEPTING(s, r) ((r).states[s].constraint)
|
---|
| 382 |
|
---|
| 383 | /* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
|
---|
| 384 | specified context. */
|
---|
| 385 | #define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
|
---|
| 386 | SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
|
---|
| 387 | prevn, currn, prevl, currl)
|
---|
| 388 |
|
---|
| 389 | /* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
|
---|
| 390 | regexps that a given state could accept. Parallel regexps are numbered
|
---|
| 391 | starting at 1. */
|
---|
| 392 | #define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
|
---|
| 393 |
|
---|
| 394 | /* Entry points. */
|
---|
| 395 |
|
---|
| 396 | /* dfasyntax() takes three arguments; the first sets the syntax bits described
|
---|
| 397 | earlier in this file, the second sets the case-folding flag, and the
|
---|
| 398 | third specifies the line terminator. */
|
---|
| 399 | extern void dfasyntax PARAMS ((reg_syntax_t, int, unsigned char));
|
---|
| 400 |
|
---|
| 401 | /* Compile the given string of the given length into the given struct dfa.
|
---|
| 402 | Final argument is a flag specifying whether to build a searching or an
|
---|
| 403 | exact matcher. */
|
---|
| 404 | extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int));
|
---|
| 405 |
|
---|
| 406 | /* Execute the given struct dfa on the buffer of characters. The
|
---|
| 407 | first char * points to the beginning, and the second points to the
|
---|
| 408 | first character after the end of the buffer, which must be a writable
|
---|
| 409 | place so a sentinel end-of-buffer marker can be stored there. The
|
---|
| 410 | second-to-last argument is a flag telling whether to allow newlines to
|
---|
| 411 | be part of a string matching the regexp. The next-to-last argument,
|
---|
| 412 | if non-NULL, points to a place to increment every time we see a
|
---|
| 413 | newline. The final argument, if non-NULL, points to a flag that will
|
---|
| 414 | be set if further examination by a backtracking matcher is needed in
|
---|
| 415 | order to verify backreferencing; otherwise the flag will be cleared.
|
---|
| 416 | Returns NULL if no match is found, or a pointer to the first
|
---|
| 417 | character after the first & shortest matching string in the buffer. */
|
---|
| 418 | extern char *dfaexec PARAMS ((struct dfa *, char const *, char *, int, int *, int *));
|
---|
| 419 |
|
---|
| 420 | /* Free the storage held by the components of a struct dfa. */
|
---|
| 421 | extern void dfafree PARAMS ((struct dfa *));
|
---|
| 422 |
|
---|
| 423 | /* Entry points for people who know what they're doing. */
|
---|
| 424 |
|
---|
| 425 | /* Initialize the components of a struct dfa. */
|
---|
| 426 | extern void dfainit PARAMS ((struct dfa *));
|
---|
| 427 |
|
---|
| 428 | /* Incrementally parse a string of given length into a struct dfa. */
|
---|
| 429 | extern void dfaparse PARAMS ((char const *, size_t, struct dfa *));
|
---|
| 430 |
|
---|
| 431 | /* Analyze a parsed regexp; second argument tells whether to build a searching
|
---|
| 432 | or an exact matcher. */
|
---|
| 433 | extern void dfaanalyze PARAMS ((struct dfa *, int));
|
---|
| 434 |
|
---|
| 435 | /* Compute, for each possible character, the transitions out of a given
|
---|
| 436 | state, storing them in an array of integers. */
|
---|
| 437 | extern void dfastate PARAMS ((int, struct dfa *, int []));
|
---|
| 438 |
|
---|
| 439 | /* Error handling. */
|
---|
| 440 |
|
---|
| 441 | /* dfaerror() is called by the regexp routines whenever an error occurs. It
|
---|
| 442 | takes a single argument, a NUL-terminated string describing the error.
|
---|
| 443 | The user must supply a dfaerror. */
|
---|
| 444 | extern void dfaerror PARAMS ((const char *));
|
---|