| 1 | /* dfa.h - declarations for GNU deterministic regexp compiler | 
|---|
| 2 | Copyright (C) 1988, 1998, 2002, 2004 Free Software Foundation, Inc. | 
|---|
| 3 |  | 
|---|
| 4 | This program is free software; you can redistribute it and/or modify | 
|---|
| 5 | it under the terms of the GNU General Public License as published by | 
|---|
| 6 | the Free Software Foundation; either version 2, or (at your option) | 
|---|
| 7 | any later version. | 
|---|
| 8 |  | 
|---|
| 9 | This program is distributed in the hope that it will be useful, | 
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 12 | GNU General Public License for more details. | 
|---|
| 13 |  | 
|---|
| 14 | You should have received a copy of the GNU General Public License | 
|---|
| 15 | along with this program; if not, write to the Free Software | 
|---|
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA */ | 
|---|
| 17 |  | 
|---|
| 18 | /* Written June, 1988 by Mike Haertel */ | 
|---|
| 19 |  | 
|---|
| 20 | /* FIXME: | 
|---|
| 21 | 2.  We should not export so much of the DFA internals. | 
|---|
| 22 | In addition to clobbering modularity, we eat up valuable | 
|---|
| 23 | name space. */ | 
|---|
| 24 |  | 
|---|
| 25 | #ifdef __STDC__ | 
|---|
| 26 | # ifndef _PTR_T | 
|---|
| 27 | # define _PTR_T | 
|---|
| 28 | typedef void * ptr_t; | 
|---|
| 29 | # endif | 
|---|
| 30 | #else | 
|---|
| 31 | # ifndef _PTR_T | 
|---|
| 32 | # define _PTR_T | 
|---|
| 33 | typedef char * ptr_t; | 
|---|
| 34 | # endif | 
|---|
| 35 | #endif | 
|---|
| 36 |  | 
|---|
| 37 | #ifdef PARAMS | 
|---|
| 38 | # undef PARAMS | 
|---|
| 39 | #endif | 
|---|
| 40 | #if PROTOTYPES | 
|---|
| 41 | # define PARAMS(x) x | 
|---|
| 42 | #else | 
|---|
| 43 | # define PARAMS(x) () | 
|---|
| 44 | #endif | 
|---|
| 45 |  | 
|---|
| 46 | /* Number of bits in an unsigned char. */ | 
|---|
| 47 | #ifndef CHARBITS | 
|---|
| 48 | #define CHARBITS 8 | 
|---|
| 49 | #endif | 
|---|
| 50 |  | 
|---|
| 51 | /* First integer value that is greater than any character code. */ | 
|---|
| 52 | #define NOTCHAR (1 << CHARBITS) | 
|---|
| 53 |  | 
|---|
| 54 | /* INTBITS need not be exact, just a lower bound. */ | 
|---|
| 55 | #ifndef INTBITS | 
|---|
| 56 | #define INTBITS (CHARBITS * sizeof (int)) | 
|---|
| 57 | #endif | 
|---|
| 58 |  | 
|---|
| 59 | /* Number of ints required to hold a bit for every character. */ | 
|---|
| 60 | #define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS) | 
|---|
| 61 |  | 
|---|
| 62 | /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ | 
|---|
| 63 | typedef int charclass[CHARCLASS_INTS]; | 
|---|
| 64 |  | 
|---|
| 65 | /* The regexp is parsed into an array of tokens in postfix form.  Some tokens | 
|---|
| 66 | are operators and others are terminal symbols.  Most (but not all) of these | 
|---|
| 67 | codes are returned by the lexical analyzer. */ | 
|---|
| 68 |  | 
|---|
| 69 | typedef enum | 
|---|
| 70 | { | 
|---|
| 71 | END = -1,                     /* END is a terminal symbol that matches the | 
|---|
| 72 | end of input; any value of END or less in | 
|---|
| 73 | the parse tree is such a symbol.  Accepting | 
|---|
| 74 | states of the DFA are those that would have | 
|---|
| 75 | a transition on END. */ | 
|---|
| 76 |  | 
|---|
| 77 | /* Ordinary character values are terminal symbols that match themselves. */ | 
|---|
| 78 |  | 
|---|
| 79 | EMPTY = NOTCHAR,              /* EMPTY is a terminal symbol that matches | 
|---|
| 80 | the empty string. */ | 
|---|
| 81 |  | 
|---|
| 82 | BACKREF,                      /* BACKREF is generated by \<digit>; it | 
|---|
| 83 | it not completely handled.  If the scanner | 
|---|
| 84 | detects a transition on backref, it returns | 
|---|
| 85 | a kind of "semi-success" indicating that | 
|---|
| 86 | the match will have to be verified with | 
|---|
| 87 | a backtracking matcher. */ | 
|---|
| 88 |  | 
|---|
| 89 | BEGLINE,                      /* BEGLINE is a terminal symbol that matches | 
|---|
| 90 | the empty string if it is at the beginning | 
|---|
| 91 | of a line. */ | 
|---|
| 92 |  | 
|---|
| 93 | ENDLINE,                      /* ENDLINE is a terminal symbol that matches | 
|---|
| 94 | the empty string if it is at the end of | 
|---|
| 95 | a line. */ | 
|---|
| 96 |  | 
|---|
| 97 | BEGWORD,                      /* BEGWORD is a terminal symbol that matches | 
|---|
| 98 | the empty string if it is at the beginning | 
|---|
| 99 | of a word. */ | 
|---|
| 100 |  | 
|---|
| 101 | ENDWORD,                      /* ENDWORD is a terminal symbol that matches | 
|---|
| 102 | the empty string if it is at the end of | 
|---|
| 103 | a word. */ | 
|---|
| 104 |  | 
|---|
| 105 | LIMWORD,                      /* LIMWORD is a terminal symbol that matches | 
|---|
| 106 | the empty string if it is at the beginning | 
|---|
| 107 | or the end of a word. */ | 
|---|
| 108 |  | 
|---|
| 109 | NOTLIMWORD,                   /* NOTLIMWORD is a terminal symbol that | 
|---|
| 110 | matches the empty string if it is not at | 
|---|
| 111 | the beginning or end of a word. */ | 
|---|
| 112 |  | 
|---|
| 113 | QMARK,                        /* QMARK is an operator of one argument that | 
|---|
| 114 | matches zero or one occurences of its | 
|---|
| 115 | argument. */ | 
|---|
| 116 |  | 
|---|
| 117 | STAR,                         /* STAR is an operator of one argument that | 
|---|
| 118 | matches the Kleene closure (zero or more | 
|---|
| 119 | occurrences) of its argument. */ | 
|---|
| 120 |  | 
|---|
| 121 | PLUS,                         /* PLUS is an operator of one argument that | 
|---|
| 122 | matches the positive closure (one or more | 
|---|
| 123 | occurrences) of its argument. */ | 
|---|
| 124 |  | 
|---|
| 125 | REPMN,                        /* REPMN is a lexical token corresponding | 
|---|
| 126 | to the {m,n} construct.  REPMN never | 
|---|
| 127 | appears in the compiled token vector. */ | 
|---|
| 128 |  | 
|---|
| 129 | CAT,                          /* CAT is an operator of two arguments that | 
|---|
| 130 | matches the concatenation of its | 
|---|
| 131 | arguments.  CAT is never returned by the | 
|---|
| 132 | lexical analyzer. */ | 
|---|
| 133 |  | 
|---|
| 134 | OR,                           /* OR is an operator of two arguments that | 
|---|
| 135 | matches either of its arguments. */ | 
|---|
| 136 |  | 
|---|
| 137 | ORTOP,                        /* OR at the toplevel in the parse tree. | 
|---|
| 138 | This is used for a boyer-moore heuristic. */ | 
|---|
| 139 |  | 
|---|
| 140 | LPAREN,                       /* LPAREN never appears in the parse tree, | 
|---|
| 141 | it is only a lexeme. */ | 
|---|
| 142 |  | 
|---|
| 143 | RPAREN,                       /* RPAREN never appears in the parse tree. */ | 
|---|
| 144 |  | 
|---|
| 145 | CRANGE,                       /* CRANGE never appears in the parse tree. | 
|---|
| 146 | It stands for a character range that can | 
|---|
| 147 | match a string of one or more characters. | 
|---|
| 148 | For example, [a-z] can match "ch" in | 
|---|
| 149 | a Spanish locale.  */ | 
|---|
| 150 |  | 
|---|
| 151 | #ifdef MBS_SUPPORT | 
|---|
| 152 | ANYCHAR,                     /* ANYCHAR is a terminal symbol that matches | 
|---|
| 153 | any multibyte (or single byte) characters. | 
|---|
| 154 | It is used only if MB_CUR_MAX > 1.  */ | 
|---|
| 155 |  | 
|---|
| 156 | MBCSET,                       /* MBCSET is similar to CSET, but for | 
|---|
| 157 | multibyte characters.  */ | 
|---|
| 158 | #endif /* MBS_SUPPORT */ | 
|---|
| 159 |  | 
|---|
| 160 | CSET                          /* CSET and (and any value greater) is a | 
|---|
| 161 | terminal symbol that matches any of a | 
|---|
| 162 | class of characters. */ | 
|---|
| 163 | } token; | 
|---|
| 164 |  | 
|---|
| 165 | /* Sets are stored in an array in the compiled dfa; the index of the | 
|---|
| 166 | array corresponding to a given set token is given by SET_INDEX(t). */ | 
|---|
| 167 | #define SET_INDEX(t) ((t) - CSET) | 
|---|
| 168 |  | 
|---|
| 169 | /* Sometimes characters can only be matched depending on the surrounding | 
|---|
| 170 | context.  Such context decisions depend on what the previous character | 
|---|
| 171 | was, and the value of the current (lookahead) character.  Context | 
|---|
| 172 | dependent constraints are encoded as 8 bit integers.  Each bit that | 
|---|
| 173 | is set indicates that the constraint succeeds in the corresponding | 
|---|
| 174 | context. | 
|---|
| 175 |  | 
|---|
| 176 | bit 7 - previous and current are newlines | 
|---|
| 177 | bit 6 - previous was newline, current isn't | 
|---|
| 178 | bit 5 - previous wasn't newline, current is | 
|---|
| 179 | bit 4 - neither previous nor current is a newline | 
|---|
| 180 | bit 3 - previous and current are word-constituents | 
|---|
| 181 | bit 2 - previous was word-constituent, current isn't | 
|---|
| 182 | bit 1 - previous wasn't word-constituent, current is | 
|---|
| 183 | bit 0 - neither previous nor current is word-constituent | 
|---|
| 184 |  | 
|---|
| 185 | Word-constituent characters are those that satisfy isalnum(). | 
|---|
| 186 |  | 
|---|
| 187 | The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint | 
|---|
| 188 | succeeds in a particular context.  Prevn is true if the previous character | 
|---|
| 189 | was a newline, currn is true if the lookahead character is a newline. | 
|---|
| 190 | Prevl and currl similarly depend upon whether the previous and current | 
|---|
| 191 | characters are word-constituent letters. */ | 
|---|
| 192 | #define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \ | 
|---|
| 193 | ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)) | 
|---|
| 194 | #define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \ | 
|---|
| 195 | ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))) | 
|---|
| 196 | #define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \ | 
|---|
| 197 | (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)                 \ | 
|---|
| 198 | && MATCHES_LETTER_CONTEXT(constraint, prevl, currl)) | 
|---|
| 199 |  | 
|---|
| 200 | /* The following macros give information about what a constraint depends on. */ | 
|---|
| 201 | #define PREV_NEWLINE_DEPENDENT(constraint) \ | 
|---|
| 202 | (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30)) | 
|---|
| 203 | #define PREV_LETTER_DEPENDENT(constraint) \ | 
|---|
| 204 | (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03)) | 
|---|
| 205 |  | 
|---|
| 206 | /* Tokens that match the empty string subject to some constraint actually | 
|---|
| 207 | work by applying that constraint to determine what may follow them, | 
|---|
| 208 | taking into account what has gone before.  The following values are | 
|---|
| 209 | the constraints corresponding to the special tokens previously defined. */ | 
|---|
| 210 | #define NO_CONSTRAINT 0xff | 
|---|
| 211 | #define BEGLINE_CONSTRAINT 0xcf | 
|---|
| 212 | #define ENDLINE_CONSTRAINT 0xaf | 
|---|
| 213 | #define BEGWORD_CONSTRAINT 0xf2 | 
|---|
| 214 | #define ENDWORD_CONSTRAINT 0xf4 | 
|---|
| 215 | #define LIMWORD_CONSTRAINT 0xf6 | 
|---|
| 216 | #define NOTLIMWORD_CONSTRAINT 0xf9 | 
|---|
| 217 |  | 
|---|
| 218 | /* States of the recognizer correspond to sets of positions in the parse | 
|---|
| 219 | tree, together with the constraints under which they may be matched. | 
|---|
| 220 | So a position is encoded as an index into the parse tree together with | 
|---|
| 221 | a constraint. */ | 
|---|
| 222 | typedef struct | 
|---|
| 223 | { | 
|---|
| 224 | unsigned index;               /* Index into the parse array. */ | 
|---|
| 225 | unsigned constraint;          /* Constraint for matching this position. */ | 
|---|
| 226 | } position; | 
|---|
| 227 |  | 
|---|
| 228 | /* Sets of positions are stored as arrays. */ | 
|---|
| 229 | typedef struct | 
|---|
| 230 | { | 
|---|
| 231 | position *elems;              /* Elements of this position set. */ | 
|---|
| 232 | int nelem;                    /* Number of elements in this set. */ | 
|---|
| 233 | } position_set; | 
|---|
| 234 |  | 
|---|
| 235 | /* A state of the dfa consists of a set of positions, some flags, | 
|---|
| 236 | and the token value of the lowest-numbered position of the state that | 
|---|
| 237 | contains an END token. */ | 
|---|
| 238 | typedef struct | 
|---|
| 239 | { | 
|---|
| 240 | int hash;                     /* Hash of the positions of this state. */ | 
|---|
| 241 | position_set elems;           /* Positions this state could match. */ | 
|---|
| 242 | char newline;                 /* True if previous state matched newline. */ | 
|---|
| 243 | char letter;                  /* True if previous state matched a letter. */ | 
|---|
| 244 | char backref;                 /* True if this state matches a \<digit>. */ | 
|---|
| 245 | unsigned char constraint;     /* Constraint for this state to accept. */ | 
|---|
| 246 | int first_end;                /* Token value of the first END in elems. */ | 
|---|
| 247 | #ifdef MBS_SUPPORT | 
|---|
| 248 | position_set mbps;           /* Positions which can match multibyte | 
|---|
| 249 | characters.  e.g. period. | 
|---|
| 250 | These staff are used only if | 
|---|
| 251 | MB_CUR_MAX > 1.  */ | 
|---|
| 252 | #endif | 
|---|
| 253 | } dfa_state; | 
|---|
| 254 |  | 
|---|
| 255 | /* Element of a list of strings, at least one of which is known to | 
|---|
| 256 | appear in any R.E. matching the DFA. */ | 
|---|
| 257 | struct dfamust | 
|---|
| 258 | { | 
|---|
| 259 | int exact; | 
|---|
| 260 | char *must; | 
|---|
| 261 | struct dfamust *next; | 
|---|
| 262 | }; | 
|---|
| 263 |  | 
|---|
| 264 | #ifdef MBS_SUPPORT | 
|---|
| 265 | /* A bracket operator. | 
|---|
| 266 | e.g. [a-c], [[:alpha:]], etc.  */ | 
|---|
| 267 | struct mb_char_classes | 
|---|
| 268 | { | 
|---|
| 269 | int invert; | 
|---|
| 270 | wchar_t *chars;               /* Normal characters.  */ | 
|---|
| 271 | int nchars; | 
|---|
| 272 | wctype_t *ch_classes;         /* Character classes.  */ | 
|---|
| 273 | int nch_classes; | 
|---|
| 274 | wchar_t *range_sts;           /* Range characters (start of the range).  */ | 
|---|
| 275 | wchar_t *range_ends;          /* Range characters (end of the range).  */ | 
|---|
| 276 | int nranges; | 
|---|
| 277 | char **equivs;                /* Equivalent classes.  */ | 
|---|
| 278 | int nequivs; | 
|---|
| 279 | char **coll_elems; | 
|---|
| 280 | int ncoll_elems;              /* Collating elements.  */ | 
|---|
| 281 | }; | 
|---|
| 282 | #endif | 
|---|
| 283 |  | 
|---|
| 284 | /* A compiled regular expression. */ | 
|---|
| 285 | struct dfa | 
|---|
| 286 | { | 
|---|
| 287 | /* Stuff built by the scanner. */ | 
|---|
| 288 | charclass *charclasses;       /* Array of character sets for CSET tokens. */ | 
|---|
| 289 | int cindex;                   /* Index for adding new charclasses. */ | 
|---|
| 290 | int calloc;                   /* Number of charclasses currently allocated. */ | 
|---|
| 291 |  | 
|---|
| 292 | /* Stuff built by the parser. */ | 
|---|
| 293 | token *tokens;                /* Postfix parse array. */ | 
|---|
| 294 | int tindex;                   /* Index for adding new tokens. */ | 
|---|
| 295 | int talloc;                   /* Number of tokens currently allocated. */ | 
|---|
| 296 | int depth;                    /* Depth required of an evaluation stack | 
|---|
| 297 | used for depth-first traversal of the | 
|---|
| 298 | parse tree. */ | 
|---|
| 299 | int nleaves;                  /* Number of leaves on the parse tree. */ | 
|---|
| 300 | int nregexps;                 /* Count of parallel regexps being built | 
|---|
| 301 | with dfaparse(). */ | 
|---|
| 302 | #ifdef MBS_SUPPORT | 
|---|
| 303 | /* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments.  */ | 
|---|
| 304 | int nmultibyte_prop; | 
|---|
| 305 | int *multibyte_prop; | 
|---|
| 306 | /* The value of multibyte_prop[i] is defined by following rule. | 
|---|
| 307 | if tokens[i] < NOTCHAR | 
|---|
| 308 | bit 1 : tokens[i] is a single byte character, or the last-byte of | 
|---|
| 309 | a multibyte character. | 
|---|
| 310 | bit 0 : tokens[i] is a single byte character, or the 1st-byte of | 
|---|
| 311 | a multibyte character. | 
|---|
| 312 | if tokens[i] = MBCSET | 
|---|
| 313 | ("the index of mbcsets correspnd to this operator" << 2) + 3 | 
|---|
| 314 |  | 
|---|
| 315 | e.g. | 
|---|
| 316 | tokens | 
|---|
| 317 | = 'single_byte_a', 'multi_byte_A', single_byte_b' | 
|---|
| 318 | = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b' | 
|---|
| 319 | multibyte_prop | 
|---|
| 320 | = 3     , 1               ,  0              ,  2              , 3 | 
|---|
| 321 | */ | 
|---|
| 322 |  | 
|---|
| 323 | /* Array of the bracket expressoin in the DFA.  */ | 
|---|
| 324 | struct mb_char_classes *mbcsets; | 
|---|
| 325 | int nmbcsets; | 
|---|
| 326 | int mbcsets_alloc; | 
|---|
| 327 | #endif | 
|---|
| 328 |  | 
|---|
| 329 | /* Stuff owned by the state builder. */ | 
|---|
| 330 | dfa_state *states;            /* States of the dfa. */ | 
|---|
| 331 | int sindex;                   /* Index for adding new states. */ | 
|---|
| 332 | int salloc;                   /* Number of states currently allocated. */ | 
|---|
| 333 |  | 
|---|
| 334 | /* Stuff built by the structure analyzer. */ | 
|---|
| 335 | position_set *follows;        /* Array of follow sets, indexed by position | 
|---|
| 336 | index.  The follow of a position is the set | 
|---|
| 337 | of positions containing characters that | 
|---|
| 338 | could conceivably follow a character | 
|---|
| 339 | matching the given position in a string | 
|---|
| 340 | matching the regexp.  Allocated to the | 
|---|
| 341 | maximum possible position index. */ | 
|---|
| 342 | int searchflag;               /* True if we are supposed to build a searching | 
|---|
| 343 | as opposed to an exact matcher.  A searching | 
|---|
| 344 | matcher finds the first and shortest string | 
|---|
| 345 | matching a regexp anywhere in the buffer, | 
|---|
| 346 | whereas an exact matcher finds the longest | 
|---|
| 347 | string matching, but anchored to the | 
|---|
| 348 | beginning of the buffer. */ | 
|---|
| 349 |  | 
|---|
| 350 | /* Stuff owned by the executor. */ | 
|---|
| 351 | int tralloc;                  /* Number of transition tables that have | 
|---|
| 352 | slots so far. */ | 
|---|
| 353 | int trcount;                  /* Number of transition tables that have | 
|---|
| 354 | actually been built. */ | 
|---|
| 355 | int **trans;                  /* Transition tables for states that can | 
|---|
| 356 | never accept.  If the transitions for a | 
|---|
| 357 | state have not yet been computed, or the | 
|---|
| 358 | state could possibly accept, its entry in | 
|---|
| 359 | this table is NULL. */ | 
|---|
| 360 | int **realtrans;              /* Trans always points to realtrans + 1; this | 
|---|
| 361 | is so trans[-1] can contain NULL. */ | 
|---|
| 362 | int **fails;                  /* Transition tables after failing to accept | 
|---|
| 363 | on a state that potentially could do so. */ | 
|---|
| 364 | int *success;                 /* Table of acceptance conditions used in | 
|---|
| 365 | dfaexec and computed in build_state. */ | 
|---|
| 366 | int *newlines;                /* Transitions on newlines.  The entry for a | 
|---|
| 367 | newline in any transition table is always | 
|---|
| 368 | -1 so we can count lines without wasting | 
|---|
| 369 | too many cycles.  The transition for a | 
|---|
| 370 | newline is stored separately and handled | 
|---|
| 371 | as a special case.  Newline is also used | 
|---|
| 372 | as a sentinel at the end of the buffer. */ | 
|---|
| 373 | struct dfamust *musts;        /* List of strings, at least one of which | 
|---|
| 374 | is known to appear in any r.e. matching | 
|---|
| 375 | the dfa. */ | 
|---|
| 376 | }; | 
|---|
| 377 |  | 
|---|
| 378 | /* Some macros for user access to dfa internals. */ | 
|---|
| 379 |  | 
|---|
| 380 | /* ACCEPTING returns true if s could possibly be an accepting state of r. */ | 
|---|
| 381 | #define ACCEPTING(s, r) ((r).states[s].constraint) | 
|---|
| 382 |  | 
|---|
| 383 | /* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the | 
|---|
| 384 | specified context. */ | 
|---|
| 385 | #define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \ | 
|---|
| 386 | SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint,              \ | 
|---|
| 387 | prevn, currn, prevl, currl) | 
|---|
| 388 |  | 
|---|
| 389 | /* FIRST_MATCHING_REGEXP returns the index number of the first of parallel | 
|---|
| 390 | regexps that a given state could accept.  Parallel regexps are numbered | 
|---|
| 391 | starting at 1. */ | 
|---|
| 392 | #define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end) | 
|---|
| 393 |  | 
|---|
| 394 | /* Entry points. */ | 
|---|
| 395 |  | 
|---|
| 396 | /* dfasyntax() takes three arguments; the first sets the syntax bits described | 
|---|
| 397 | earlier in this file, the second sets the case-folding flag, and the | 
|---|
| 398 | third specifies the line terminator. */ | 
|---|
| 399 | extern void dfasyntax PARAMS ((reg_syntax_t, int, unsigned char)); | 
|---|
| 400 |  | 
|---|
| 401 | /* Compile the given string of the given length into the given struct dfa. | 
|---|
| 402 | Final argument is a flag specifying whether to build a searching or an | 
|---|
| 403 | exact matcher. */ | 
|---|
| 404 | extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int)); | 
|---|
| 405 |  | 
|---|
| 406 | /* Execute the given struct dfa on the buffer of characters.  The | 
|---|
| 407 | first char * points to the beginning, and the second points to the | 
|---|
| 408 | first character after the end of the buffer, which must be a writable | 
|---|
| 409 | place so a sentinel end-of-buffer marker can be stored there.  The | 
|---|
| 410 | second-to-last argument is a flag telling whether to allow newlines to | 
|---|
| 411 | be part of a string matching the regexp.  The next-to-last argument, | 
|---|
| 412 | if non-NULL, points to a place to increment every time we see a | 
|---|
| 413 | newline.  The final argument, if non-NULL, points to a flag that will | 
|---|
| 414 | be set if further examination by a backtracking matcher is needed in | 
|---|
| 415 | order to verify backreferencing; otherwise the flag will be cleared. | 
|---|
| 416 | Returns NULL if no match is found, or a pointer to the first | 
|---|
| 417 | character after the first & shortest matching string in the buffer. */ | 
|---|
| 418 | extern char *dfaexec PARAMS ((struct dfa *, char const *, char *, int, int *, int *)); | 
|---|
| 419 |  | 
|---|
| 420 | /* Free the storage held by the components of a struct dfa. */ | 
|---|
| 421 | extern void dfafree PARAMS ((struct dfa *)); | 
|---|
| 422 |  | 
|---|
| 423 | /* Entry points for people who know what they're doing. */ | 
|---|
| 424 |  | 
|---|
| 425 | /* Initialize the components of a struct dfa. */ | 
|---|
| 426 | extern void dfainit PARAMS ((struct dfa *)); | 
|---|
| 427 |  | 
|---|
| 428 | /* Incrementally parse a string of given length into a struct dfa. */ | 
|---|
| 429 | extern void dfaparse PARAMS ((char const *, size_t, struct dfa *)); | 
|---|
| 430 |  | 
|---|
| 431 | /* Analyze a parsed regexp; second argument tells whether to build a searching | 
|---|
| 432 | or an exact matcher. */ | 
|---|
| 433 | extern void dfaanalyze PARAMS ((struct dfa *, int)); | 
|---|
| 434 |  | 
|---|
| 435 | /* Compute, for each possible character, the transitions out of a given | 
|---|
| 436 | state, storing them in an array of integers. */ | 
|---|
| 437 | extern void dfastate PARAMS ((int, struct dfa *, int [])); | 
|---|
| 438 |  | 
|---|
| 439 | /* Error handling. */ | 
|---|
| 440 |  | 
|---|
| 441 | /* dfaerror() is called by the regexp routines whenever an error occurs.  It | 
|---|
| 442 | takes a single argument, a NUL-terminated string describing the error. | 
|---|
| 443 | The user must supply a dfaerror.  */ | 
|---|
| 444 | extern void dfaerror PARAMS ((const char *)); | 
|---|