source: vendor/grep/current/src/pcresearch.c

Last change on this file was 3529, checked in by bird, 4 years ago

Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.

  • Property svn:eol-style set to native
File size: 10.6 KB
Line 
1/* pcresearch.c - searching subroutines using PCRE for grep.
2 Copyright 2000, 2007, 2009-2021 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19/* Written August 1992 by Mike Haertel. */
20
21#include <config.h>
22#include "search.h"
23#include "die.h"
24
25#include <pcre.h>
26
27/* This must be at least 2; everything after that is for performance
28 in pcre_exec. */
29enum { NSUB = 300 };
30
31#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION
32# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
33#endif
34#ifndef PCRE_STUDY_JIT_COMPILE
35# define PCRE_STUDY_JIT_COMPILE 0
36#endif
37#ifndef PCRE_STUDY_EXTRA_NEEDED
38# define PCRE_STUDY_EXTRA_NEEDED 0
39#endif
40
41struct pcre_comp
42{
43 /* Compiled internal form of a Perl regular expression. */
44 pcre *cre;
45
46 /* Additional information about the pattern. */
47 pcre_extra *extra;
48
49#if PCRE_STUDY_JIT_COMPILE
50 /* The JIT stack and its maximum size. */
51 pcre_jit_stack *jit_stack;
52 int jit_stack_size;
53#endif
54
55 /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
56 string matches when that flag is used. */
57 int empty_match[2];
58};
59
60
61/* Match the already-compiled PCRE pattern against the data in SUBJECT,
62 of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
63 options OPTIONS, and storing resulting matches into SUB. Return
64 the (nonnegative) match location or a (negative) error number. */
65static int
66jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
67 int search_offset, int options, int *sub)
68{
69 while (true)
70 {
71 int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
72 search_offset, options, sub, NSUB);
73
74#if PCRE_STUDY_JIT_COMPILE
75 if (e == PCRE_ERROR_JIT_STACKLIMIT
76 && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
77 {
78 int old_size = pc->jit_stack_size;
79 int new_size = pc->jit_stack_size = old_size * 2;
80 if (pc->jit_stack)
81 pcre_jit_stack_free (pc->jit_stack);
82 pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
83 if (!pc->jit_stack)
84 die (EXIT_TROUBLE, 0,
85 _("failed to allocate memory for the PCRE JIT stack"));
86 pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
87 continue;
88 }
89#endif
90
91#if PCRE_EXTRA_MATCH_LIMIT_RECURSION
92 if (e == PCRE_ERROR_RECURSIONLIMIT
93 && (PCRE_STUDY_EXTRA_NEEDED || pc->extra))
94 {
95 unsigned long lim
96 = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION
97 ? pc->extra->match_limit_recursion
98 : 0);
99 if (lim <= ULONG_MAX / 2)
100 {
101 pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1;
102 pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
103 continue;
104 }
105 }
106#endif
107
108 return e;
109 }
110}
111
112/* Compile the -P style PATTERN, containing SIZE bytes that are
113 followed by '\n'. Return a description of the compiled pattern. */
114
115void *
116Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
117{
118 int e;
119 char const *ep;
120 static char const wprefix[] = "(?<!\\w)(?:";
121 static char const wsuffix[] = ")(?!\\w)";
122 static char const xprefix[] = "^(?:";
123 static char const xsuffix[] = ")$";
124 int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
125 sizeof xprefix - 1 + sizeof xsuffix - 1);
126 char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
127 int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
128 char *patlim = pattern + size;
129 char *n = re;
130 char const *p;
131 char const *pnul;
132 struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
133
134 if (localeinfo.multibyte)
135 {
136 if (! localeinfo.using_utf8)
137 die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
138 flags |= PCRE_UTF8;
139 }
140
141 /* FIXME: Remove this restriction. */
142 if (rawmemchr (pattern, '\n') != patlim)
143 die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
144
145 *n = '\0';
146 if (match_words)
147 strcpy (n, wprefix);
148 if (match_lines)
149 strcpy (n, xprefix);
150 n += strlen (n);
151
152 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
153 replace each NUL byte in the pattern with the four characters
154 "\000", removing a preceding backslash if there are an odd
155 number of backslashes before the NUL. */
156 *patlim = '\0';
157 for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
158 {
159 memcpy (n, p, pnul - p);
160 n += pnul - p;
161 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
162 continue;
163 n -= (pnul - p) & 1;
164 strcpy (n, "\\000");
165 n += 4;
166 }
167 memcpy (n, p, patlim - p + 1);
168 n += patlim - p;
169 *patlim = '\n';
170
171 if (match_words)
172 strcpy (n, wsuffix);
173 if (match_lines)
174 strcpy (n, xsuffix);
175
176 pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
177 if (!pc->cre)
178 die (EXIT_TROUBLE, 0, "%s", ep);
179
180 int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE;
181 pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep);
182 if (ep)
183 die (EXIT_TROUBLE, 0, "%s", ep);
184
185#if PCRE_STUDY_JIT_COMPILE
186 if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
187 die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
188
189 /* The PCRE documentation says that a 32 KiB stack is the default. */
190 if (e)
191 pc->jit_stack_size = 32 << 10;
192#endif
193
194 free (re);
195
196 int sub[NSUB];
197 pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
198 PCRE_NOTBOL, sub, NSUB);
199 pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
200 NSUB);
201
202 return pc;
203}
204
205size_t
206Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
207 char const *start_ptr)
208{
209 int sub[NSUB];
210 char const *p = start_ptr ? start_ptr : buf;
211 bool bol = p[-1] == eolbyte;
212 char const *line_start = buf;
213 int e = PCRE_ERROR_NOMATCH;
214 char const *line_end;
215 struct pcre_comp *pc = vcp;
216
217 /* The search address to pass to pcre_exec. This is the start of
218 the buffer, or just past the most-recently discovered encoding
219 error or line end. */
220 char const *subject = buf;
221
222 do
223 {
224 /* Search line by line. Although this code formerly used
225 PCRE_MULTILINE for performance, the performance wasn't always
226 better and the correctness issues were too puzzling. See
227 Bug#22655. */
228 line_end = rawmemchr (p, eolbyte);
229 if (INT_MAX < line_end - p)
230 die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
231
232 for (;;)
233 {
234 /* Skip past bytes that are easily determined to be encoding
235 errors, treating them as data that cannot match. This is
236 faster than having pcre_exec check them. */
237 while (localeinfo.sbclen[to_uchar (*p)] == -1)
238 {
239 p++;
240 subject = p;
241 bol = false;
242 }
243
244 int search_offset = p - subject;
245
246 /* Check for an empty match; this is faster than letting
247 pcre_exec do it. */
248 if (p == line_end)
249 {
250 sub[0] = sub[1] = search_offset;
251 e = pc->empty_match[bol];
252 break;
253 }
254
255 int options = 0;
256 if (!bol)
257 options |= PCRE_NOTBOL;
258
259 e = jit_exec (pc, subject, line_end - subject, search_offset,
260 options, sub);
261 if (e != PCRE_ERROR_BADUTF8)
262 break;
263 int valid_bytes = sub[0];
264
265 if (search_offset <= valid_bytes)
266 {
267 /* Try to match the string before the encoding error. */
268 if (valid_bytes == 0)
269 {
270 /* Handle the empty-match case specially, for speed.
271 This optimization is valid if VALID_BYTES is zero,
272 which means SEARCH_OFFSET is also zero. */
273 sub[1] = 0;
274 e = pc->empty_match[bol];
275 }
276 else
277 e = jit_exec (pc, subject, valid_bytes, search_offset,
278 options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
279
280 if (e != PCRE_ERROR_NOMATCH)
281 break;
282
283 /* Treat the encoding error as data that cannot match. */
284 p = subject + valid_bytes + 1;
285 bol = false;
286 }
287
288 subject += valid_bytes + 1;
289 }
290
291 if (e != PCRE_ERROR_NOMATCH)
292 break;
293 bol = true;
294 p = subject = line_start = line_end + 1;
295 }
296 while (p < buf + size);
297
298 if (e <= 0)
299 {
300 switch (e)
301 {
302 case PCRE_ERROR_NOMATCH:
303 break;
304
305 case PCRE_ERROR_NOMEMORY:
306 die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ());
307
308#if PCRE_STUDY_JIT_COMPILE
309 case PCRE_ERROR_JIT_STACKLIMIT:
310 die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"),
311 input_filename ());
312#endif
313
314 case PCRE_ERROR_MATCHLIMIT:
315 die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"),
316 input_filename ());
317
318 case PCRE_ERROR_RECURSIONLIMIT:
319 die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"),
320 input_filename ());
321
322 default:
323 /* For now, we lump all remaining PCRE failures into this basket.
324 If anyone cares to provide sample grep usage that can trigger
325 particular PCRE errors, we can add to the list (above) of more
326 detailed diagnostics. */
327 die (EXIT_TROUBLE, 0, _("%s: internal PCRE error: %d"),
328 input_filename (), e);
329 }
330
331 return -1;
332 }
333 else
334 {
335 char const *matchbeg = subject + sub[0];
336 char const *matchend = subject + sub[1];
337 char const *beg;
338 char const *end;
339 if (start_ptr)
340 {
341 beg = matchbeg;
342 end = matchend;
343 }
344 else
345 {
346 beg = line_start;
347 end = line_end + 1;
348 }
349 *match_size = end - beg;
350 return beg - buf;
351 }
352}
Note: See TracBrowser for help on using the repository browser.