[3611] | 1 | /* dfa.h - declarations for GNU deterministic regexp compiler
|
---|
| 2 | Copyright (C) 1988, 1998, 2007, 2009-2022 Free Software Foundation, Inc.
|
---|
| 3 |
|
---|
| 4 | This program is free software; you can redistribute it and/or modify
|
---|
| 5 | it under the terms of the GNU General Public License as published by
|
---|
| 6 | the Free Software Foundation, either version 3, or (at your option)
|
---|
| 7 | any later version.
|
---|
| 8 |
|
---|
| 9 | This program is distributed in the hope that it will be useful,
|
---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 12 | GNU General Public License for more details.
|
---|
| 13 |
|
---|
| 14 | You should have received a copy of the GNU General Public License
|
---|
| 15 | along with this program; if not, write to the Free Software
|
---|
| 16 | Foundation, Inc.,
|
---|
| 17 | 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
|
---|
| 18 |
|
---|
| 19 | /* Written June, 1988 by Mike Haertel */
|
---|
| 20 |
|
---|
| 21 | #ifndef DFA_H_
|
---|
| 22 | #define DFA_H_
|
---|
| 23 |
|
---|
| 24 | #include "idx.h"
|
---|
| 25 | #include <regex.h>
|
---|
| 26 | #include <stddef.h>
|
---|
| 27 | #include <stdlib.h>
|
---|
| 28 |
|
---|
| 29 | #ifdef __cplusplus
|
---|
| 30 | extern "C" {
|
---|
| 31 | #endif
|
---|
| 32 |
|
---|
| 33 | struct localeinfo; /* See localeinfo.h. */
|
---|
| 34 |
|
---|
| 35 | /* Element of a list of strings, at least one of which is known to
|
---|
| 36 | appear in any R.E. matching the DFA. */
|
---|
| 37 | struct dfamust
|
---|
| 38 | {
|
---|
| 39 | bool exact;
|
---|
| 40 | bool begline;
|
---|
| 41 | bool endline;
|
---|
| 42 | char must[FLEXIBLE_ARRAY_MEMBER];
|
---|
| 43 | };
|
---|
| 44 |
|
---|
| 45 | /* The dfa structure. It is completely opaque. */
|
---|
| 46 | struct dfa;
|
---|
| 47 |
|
---|
| 48 | /* Needed when Gnulib is not used. */
|
---|
| 49 | #ifndef _GL_ATTRIBUTE_MALLOC
|
---|
| 50 | # define _GL_ATTRIBUTE_MALLOC
|
---|
| 51 | # define _GL_ATTRIBUTE_DEALLOC(f, i)
|
---|
| 52 | # define _GL_ATTRIBUTE_DEALLOC_FREE
|
---|
| 53 | # define _GL_ATTRIBUTE_RETURNS_NONNULL
|
---|
| 54 | #endif
|
---|
| 55 |
|
---|
| 56 | /* Entry points. */
|
---|
| 57 |
|
---|
| 58 | /* Allocate a struct dfa. The struct dfa is completely opaque.
|
---|
| 59 | It should be initialized via dfasyntax or dfacopysyntax before other use.
|
---|
| 60 | The returned pointer should be passed directly to free() after
|
---|
| 61 | calling dfafree() on it. */
|
---|
| 62 | extern struct dfa *dfaalloc (void)
|
---|
| 63 | _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE
|
---|
| 64 | _GL_ATTRIBUTE_RETURNS_NONNULL;
|
---|
| 65 |
|
---|
| 66 | /* DFA options that can be ORed together, for dfasyntax's 4th arg. */
|
---|
| 67 | enum
|
---|
| 68 | {
|
---|
| 69 | /* ^ and $ match only the start and end of data, and do not match
|
---|
| 70 | end-of-line within data. This is always false for grep, but
|
---|
| 71 | possibly true for other apps. */
|
---|
| 72 | DFA_ANCHOR = 1 << 0,
|
---|
| 73 |
|
---|
| 74 | /* '\0' in data is end-of-line, instead of the traditional '\n'. */
|
---|
| 75 | DFA_EOL_NUL = 1 << 1,
|
---|
| 76 |
|
---|
| 77 | /* Treat [:alpha:] etc. as an error at the top level, instead of
|
---|
| 78 | merely a warning. */
|
---|
| 79 | DFA_CONFUSING_BRACKETS_ERROR = 1 << 2,
|
---|
| 80 |
|
---|
| 81 | /* Warn about stray backslashes before ordinary characters other
|
---|
| 82 | than ] and } which are special because even though POSIX
|
---|
| 83 | says \] and \} have undefined interpretation, platforms
|
---|
| 84 | reliably ignore those stray backlashes and warning about them
|
---|
| 85 | would likely cause more trouble than it's worth. */
|
---|
| 86 | DFA_STRAY_BACKSLASH_WARN = 1 << 3,
|
---|
| 87 |
|
---|
| 88 | /* Warn about * appearing out of context at the start of an
|
---|
| 89 | expression or subexpression. */
|
---|
| 90 | DFA_STAR_WARN = 1 << 4,
|
---|
| 91 |
|
---|
| 92 | /* Warn about +, ?, {...} appearing out of context at the start of
|
---|
| 93 | an expression or subexpression. */
|
---|
| 94 | DFA_PLUS_WARN = 1 << 5,
|
---|
| 95 | };
|
---|
| 96 |
|
---|
| 97 | /* Initialize or reinitialize a DFA. The arguments are:
|
---|
| 98 | 1. The DFA to operate on.
|
---|
| 99 | 2. Information about the current locale.
|
---|
| 100 | 3. Syntax bits described in regex.h.
|
---|
| 101 | 4. Additional DFA options described above. */
|
---|
| 102 | extern void dfasyntax (struct dfa *, struct localeinfo const *,
|
---|
| 103 | reg_syntax_t, int);
|
---|
| 104 |
|
---|
| 105 | /* Initialize or reinitialize a DFA from an already-initialized DFA. */
|
---|
| 106 | extern void dfacopysyntax (struct dfa *, struct dfa const *);
|
---|
| 107 |
|
---|
| 108 | /* Parse the given string of given length into the given struct dfa. */
|
---|
| 109 | extern void dfaparse (char const *, idx_t, struct dfa *);
|
---|
| 110 |
|
---|
| 111 | struct dfamust;
|
---|
| 112 |
|
---|
| 113 | /* Free the storage held by the components of a struct dfamust. */
|
---|
| 114 | extern void dfamustfree (struct dfamust *);
|
---|
| 115 |
|
---|
| 116 | /* Allocate and return a struct dfamust from a struct dfa that was
|
---|
| 117 | initialized by dfaparse and not yet given to dfacomp. */
|
---|
| 118 | extern struct dfamust *dfamust (struct dfa const *)
|
---|
| 119 | _GL_ATTRIBUTE_DEALLOC (dfamustfree, 1);
|
---|
| 120 |
|
---|
| 121 | /* Compile the given string of the given length into the given struct dfa.
|
---|
| 122 | The last argument says whether to build a searching or an exact matcher.
|
---|
| 123 | A null first argument means the struct dfa has already been
|
---|
| 124 | initialized by dfaparse; the second argument is ignored. */
|
---|
| 125 | extern void dfacomp (char const *, idx_t, struct dfa *, bool);
|
---|
| 126 |
|
---|
| 127 | /* Search through a buffer looking for a match to the given struct dfa.
|
---|
| 128 | Find the first occurrence of a string matching the regexp in the
|
---|
| 129 | buffer, and the shortest possible version thereof. Return a pointer to
|
---|
| 130 | the first character after the match, or NULL if none is found. BEGIN
|
---|
| 131 | points to the beginning of the buffer, and END points to the first byte
|
---|
| 132 | after its end. Note however that we store a sentinel byte (usually
|
---|
| 133 | newline) in *END, so the actual buffer must be one byte longer.
|
---|
| 134 | When ALLOW_NL is true, newlines may appear in the matching string.
|
---|
| 135 | If COUNT is non-NULL, increment *COUNT once for each newline processed.
|
---|
| 136 | Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
|
---|
| 137 | encountered a back-reference. The caller can use this to decide
|
---|
| 138 | whether to fall back on a backtracking matcher. */
|
---|
| 139 | extern char *dfaexec (struct dfa *d, char const *begin, char *end,
|
---|
| 140 | bool allow_nl, idx_t *count, bool *backref);
|
---|
| 141 |
|
---|
| 142 | /* Return a superset for D. The superset matches everything that D
|
---|
| 143 | matches, along with some other strings (though the latter should be
|
---|
| 144 | rare, for efficiency reasons). Return a null pointer if no useful
|
---|
| 145 | superset is available. */
|
---|
| 146 | extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
|
---|
| 147 |
|
---|
| 148 | /* The DFA is likely to be fast. */
|
---|
| 149 | extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
---|
| 150 |
|
---|
| 151 | /* Return true if every construct in D is supported by this DFA matcher. */
|
---|
| 152 | extern bool dfasupported (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
---|
| 153 |
|
---|
| 154 | /* Free the storage held by the components of a struct dfa. */
|
---|
| 155 | extern void dfafree (struct dfa *);
|
---|
| 156 |
|
---|
| 157 | /* Error handling. */
|
---|
| 158 |
|
---|
| 159 | /* dfawarn() is called by the regexp routines whenever a regex is compiled
|
---|
| 160 | that likely doesn't do what the user wanted. It takes a single
|
---|
| 161 | argument, a NUL-terminated string describing the situation. The user
|
---|
| 162 | must supply a dfawarn. */
|
---|
| 163 | extern void dfawarn (const char *);
|
---|
| 164 |
|
---|
| 165 | /* dfaerror() is called by the regexp routines whenever an error occurs. It
|
---|
| 166 | takes a single argument, a NUL-terminated string describing the error.
|
---|
| 167 | The user must supply a dfaerror. */
|
---|
| 168 | extern _Noreturn void dfaerror (const char *);
|
---|
| 169 |
|
---|
| 170 | #ifdef __cplusplus
|
---|
| 171 | }
|
---|
| 172 | #endif
|
---|
| 173 |
|
---|
| 174 | #endif /* dfa.h */
|
---|