| 1 | /* dfa.h - declarations for GNU deterministic regexp compiler
|
|---|
| 2 | Copyright (C) 1988, 1998, 2007, 2009-2021 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This program is free software; you can redistribute it and/or modify
|
|---|
| 5 | it under the terms of the GNU General Public License as published by
|
|---|
| 6 | the Free Software Foundation; either version 3, or (at your option)
|
|---|
| 7 | any later version.
|
|---|
| 8 |
|
|---|
| 9 | This program is distributed in the hope that it will be useful,
|
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 12 | GNU General Public License for more details.
|
|---|
| 13 |
|
|---|
| 14 | You should have received a copy of the GNU General Public License
|
|---|
| 15 | along with this program; if not, write to the Free Software
|
|---|
| 16 | Foundation, Inc.,
|
|---|
| 17 | 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
|
|---|
| 18 |
|
|---|
| 19 | /* Written June, 1988 by Mike Haertel */
|
|---|
| 20 |
|
|---|
| 21 | #ifndef DFA_H_
|
|---|
| 22 | #define DFA_H_
|
|---|
| 23 |
|
|---|
| 24 | #include <regex.h>
|
|---|
| 25 | #include <stdbool.h>
|
|---|
| 26 | #include <stddef.h>
|
|---|
| 27 | #include <stdlib.h>
|
|---|
| 28 |
|
|---|
| 29 | #ifdef __cplusplus
|
|---|
| 30 | extern "C" {
|
|---|
| 31 | #endif
|
|---|
| 32 |
|
|---|
| 33 | struct localeinfo; /* See localeinfo.h. */
|
|---|
| 34 |
|
|---|
| 35 | /* Element of a list of strings, at least one of which is known to
|
|---|
| 36 | appear in any R.E. matching the DFA. */
|
|---|
| 37 | struct dfamust
|
|---|
| 38 | {
|
|---|
| 39 | bool exact;
|
|---|
| 40 | bool begline;
|
|---|
| 41 | bool endline;
|
|---|
| 42 | char must[FLEXIBLE_ARRAY_MEMBER];
|
|---|
| 43 | };
|
|---|
| 44 |
|
|---|
| 45 | /* The dfa structure. It is completely opaque. */
|
|---|
| 46 | struct dfa;
|
|---|
| 47 |
|
|---|
| 48 | /* Needed when Gnulib is not used. */
|
|---|
| 49 | #ifndef _GL_ATTRIBUTE_MALLOC
|
|---|
| 50 | # define _GL_ATTRIBUTE_MALLOC
|
|---|
| 51 | # define _GL_ATTRIBUTE_DEALLOC_FREE
|
|---|
| 52 | # define _GL_ATTRIBUTE_RETURNS_NONNULL
|
|---|
| 53 | #endif
|
|---|
| 54 |
|
|---|
| 55 | /* Entry points. */
|
|---|
| 56 |
|
|---|
| 57 | /* Allocate a struct dfa. The struct dfa is completely opaque.
|
|---|
| 58 | It should be initialized via dfasyntax or dfacopysyntax before other use.
|
|---|
| 59 | The returned pointer should be passed directly to free() after
|
|---|
| 60 | calling dfafree() on it. */
|
|---|
| 61 | extern struct dfa *dfaalloc (void)
|
|---|
| 62 | _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE
|
|---|
| 63 | _GL_ATTRIBUTE_RETURNS_NONNULL;
|
|---|
| 64 |
|
|---|
| 65 | /* DFA options that can be ORed together, for dfasyntax's 4th arg. */
|
|---|
| 66 | enum
|
|---|
| 67 | {
|
|---|
| 68 | /* ^ and $ match only the start and end of data, and do not match
|
|---|
| 69 | end-of-line within data. This is always false for grep, but
|
|---|
| 70 | possibly true for other apps. */
|
|---|
| 71 | DFA_ANCHOR = 1 << 0,
|
|---|
| 72 |
|
|---|
| 73 | /* '\0' in data is end-of-line, instead of the traditional '\n'. */
|
|---|
| 74 | DFA_EOL_NUL = 1 << 1
|
|---|
| 75 | };
|
|---|
| 76 |
|
|---|
| 77 | /* Initialize or reinitialize a DFA. The arguments are:
|
|---|
| 78 | 1. The DFA to operate on.
|
|---|
| 79 | 2. Information about the current locale.
|
|---|
| 80 | 3. Syntax bits described in regex.h.
|
|---|
| 81 | 4. Additional DFA options described above. */
|
|---|
| 82 | extern void dfasyntax (struct dfa *, struct localeinfo const *,
|
|---|
| 83 | reg_syntax_t, int);
|
|---|
| 84 |
|
|---|
| 85 | /* Initialize or reinitialize a DFA from an already-initialized DFA. */
|
|---|
| 86 | extern void dfacopysyntax (struct dfa *, struct dfa const *);
|
|---|
| 87 |
|
|---|
| 88 | /* Parse the given string of given length into the given struct dfa. */
|
|---|
| 89 | extern void dfaparse (char const *, ptrdiff_t, struct dfa *);
|
|---|
| 90 |
|
|---|
| 91 | struct dfamust;
|
|---|
| 92 |
|
|---|
| 93 | /* Free the storage held by the components of a struct dfamust. */
|
|---|
| 94 | extern void dfamustfree (struct dfamust *);
|
|---|
| 95 |
|
|---|
| 96 | /* Allocate and return a struct dfamust from a struct dfa that was
|
|---|
| 97 | initialized by dfaparse and not yet given to dfacomp. */
|
|---|
| 98 | extern struct dfamust *dfamust (struct dfa const *)
|
|---|
| 99 | _GL_ATTRIBUTE_DEALLOC (dfamustfree, 1);
|
|---|
| 100 |
|
|---|
| 101 | /* Compile the given string of the given length into the given struct dfa.
|
|---|
| 102 | The last argument says whether to build a searching or an exact matcher.
|
|---|
| 103 | A null first argument means the struct dfa has already been
|
|---|
| 104 | initialized by dfaparse; the second argument is ignored. */
|
|---|
| 105 | extern void dfacomp (char const *, ptrdiff_t, struct dfa *, bool);
|
|---|
| 106 |
|
|---|
| 107 | /* Search through a buffer looking for a match to the given struct dfa.
|
|---|
| 108 | Find the first occurrence of a string matching the regexp in the
|
|---|
| 109 | buffer, and the shortest possible version thereof. Return a pointer to
|
|---|
| 110 | the first character after the match, or NULL if none is found. BEGIN
|
|---|
| 111 | points to the beginning of the buffer, and END points to the first byte
|
|---|
| 112 | after its end. Note however that we store a sentinel byte (usually
|
|---|
| 113 | newline) in *END, so the actual buffer must be one byte longer.
|
|---|
| 114 | When ALLOW_NL is true, newlines may appear in the matching string.
|
|---|
| 115 | If COUNT is non-NULL, increment *COUNT once for each newline processed.
|
|---|
| 116 | Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
|
|---|
| 117 | encountered a back-reference. The caller can use this to decide
|
|---|
| 118 | whether to fall back on a backtracking matcher. */
|
|---|
| 119 | extern char *dfaexec (struct dfa *d, char const *begin, char *end,
|
|---|
| 120 | bool allow_nl, ptrdiff_t *count, bool *backref);
|
|---|
| 121 |
|
|---|
| 122 | /* Return a superset for D. The superset matches everything that D
|
|---|
| 123 | matches, along with some other strings (though the latter should be
|
|---|
| 124 | rare, for efficiency reasons). Return a null pointer if no useful
|
|---|
| 125 | superset is available. */
|
|---|
| 126 | extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
|
|---|
| 127 |
|
|---|
| 128 | /* The DFA is likely to be fast. */
|
|---|
| 129 | extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
|---|
| 130 |
|
|---|
| 131 | /* Return true if every construct in D is supported by this DFA matcher. */
|
|---|
| 132 | extern bool dfasupported (struct dfa const *) _GL_ATTRIBUTE_PURE;
|
|---|
| 133 |
|
|---|
| 134 | /* Free the storage held by the components of a struct dfa. */
|
|---|
| 135 | extern void dfafree (struct dfa *);
|
|---|
| 136 |
|
|---|
| 137 | /* Error handling. */
|
|---|
| 138 |
|
|---|
| 139 | /* dfawarn() is called by the regexp routines whenever a regex is compiled
|
|---|
| 140 | that likely doesn't do what the user wanted. It takes a single
|
|---|
| 141 | argument, a NUL-terminated string describing the situation. The user
|
|---|
| 142 | must supply a dfawarn. */
|
|---|
| 143 | extern void dfawarn (const char *);
|
|---|
| 144 |
|
|---|
| 145 | /* dfaerror() is called by the regexp routines whenever an error occurs. It
|
|---|
| 146 | takes a single argument, a NUL-terminated string describing the error.
|
|---|
| 147 | The user must supply a dfaerror. */
|
|---|
| 148 | extern _Noreturn void dfaerror (const char *);
|
|---|
| 149 |
|
|---|
| 150 | #ifdef __cplusplus
|
|---|
| 151 | }
|
|---|
| 152 | #endif
|
|---|
| 153 |
|
|---|
| 154 | #endif /* dfa.h */
|
|---|