Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

regexec.c@ 3930

Visit:

Last change on this file since 3930 was 3076, checked in by bird, 18 years ago
gawk 3.1.5
File size: 125.7 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19	02110-1301 USA. */
20
21	static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22	int n) internal_function;
23	static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24	static void match_ctx_free (re_match_context_t *cache) internal_function;
25	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26	int str_idx, int from, int to)
27	internal_function;
28	static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
29	internal_function;
30	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31	int str_idx) internal_function;
32	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33	int node, int str_idx)
34	internal_function;
35	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
36	re_dfastate_t **limited_sts, int last_node,
37	int last_str_idx)
38	internal_function;
39	static reg_errcode_t re_search_internal (const regex_t *preg,
40	const char *string, int length,
41	int start, int range, int stop,
42	size_t nmatch, regmatch_t pmatch[],
43	int eflags) internal_function;
44	static int re_search_2_stub (struct re_pattern_buffer *bufp,
45	const char *string1, int length1,
46	const char *string2, int length2,
47	int start, int range, struct re_registers *regs,
48	int stop, int ret_len) internal_function;
49	static int re_search_stub (struct re_pattern_buffer *bufp,
50	const char *string, int length, int start,
51	int range, int stop, struct re_registers *regs,
52	int ret_len) internal_function;
53	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
54	int nregs, int regs_allocated) internal_function;
55	static inline re_dfastate_t *acquire_init_state_context
56	(reg_errcode_t err, const re_match_context_t mctx, int idx)
57	__attribute ((always_inline)) internal_function;
58	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59	internal_function;
60	static int check_matching (re_match_context_t *mctx, int fl_longest_match,
61	int *p_match_first)
62	internal_function;
63	static int check_halt_node_context (const re_dfa_t *dfa, int node,
64	unsigned int context) internal_function;
65	static int check_halt_state_context (const re_match_context_t *mctx,
66	const re_dfastate_t *state, int idx)
67	internal_function;
68	static void update_regs (re_dfa_t dfa, regmatch_t pmatch,
69	regmatch_t *prev_idx_match, int cur_node,
70	int cur_idx, int nmatch) internal_function;
71	static int proceed_next_node (const re_match_context_t *mctx,
72	int nregs, regmatch_t *regs,
73	int pidx, int node, re_node_set eps_via_nodes,
74	struct re_fail_stack_t *fs) internal_function;
75	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
76	int str_idx, int dest_node, int nregs,
77	regmatch_t *regs,
78	re_node_set *eps_via_nodes) internal_function;
79	static int pop_fail_stack (struct re_fail_stack_t fs, int pidx, int nregs,
80	regmatch_t regs, re_node_set eps_via_nodes) internal_function;
81	static reg_errcode_t set_regs (const regex_t *preg,
82	const re_match_context_t *mctx,
83	size_t nmatch, regmatch_t *pmatch,
84	int fl_backtrack) internal_function;
85	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
86
87	#ifdef RE_ENABLE_I18N
88	static int sift_states_iter_mb (const re_match_context_t *mctx,
89	re_sift_context_t *sctx,
90	int node_idx, int str_idx, int max_str_idx) internal_function;
91	#endif /* RE_ENABLE_I18N */
92	static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
93	re_sift_context_t *sctx) internal_function;
94	static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
95	re_sift_context_t *sctx, int str_idx,
96	re_node_set *cur_dest) internal_function;
97	static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
98	re_sift_context_t *sctx,
99	int str_idx,
100	re_node_set *dest_nodes) internal_function;
101	static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
102	re_node_set *dest_nodes,
103	const re_node_set *candidates) internal_function;
104	static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
105	re_node_set *dest_nodes,
106	const re_node_set *and_nodes) internal_function;
107	static int check_dst_limits (re_match_context_t mctx, re_node_set limits,
108	int dst_node, int dst_idx, int src_node,
109	int src_idx) internal_function;
110	static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
111	int boundaries, int subexp_idx,
112	int from_node, int bkref_idx) internal_function;
113	static int check_dst_limits_calc_pos (re_match_context_t *mctx,
114	int limit, int subexp_idx,
115	int node, int str_idx,
116	int bkref_idx) internal_function;
117	static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
118	re_node_set *dest_nodes,
119	const re_node_set *candidates,
120	re_node_set *limits,
121	struct re_backref_cache_entry *bkref_ents,
122	int str_idx) internal_function;
123	static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
124	re_sift_context_t *sctx,
125	int str_idx, const re_node_set *candidates) internal_function;
126	static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
127	int next_state_log_idx) internal_function;
128	static reg_errcode_t merge_state_array (re_dfa_t dfa, re_dfastate_t *dst,
129	re_dfastate_t **src, int num) internal_function;
130	static re_dfastate_t find_recover_state (reg_errcode_t err,
131	re_match_context_t *mctx) internal_function;
132	static re_dfastate_t transit_state (reg_errcode_t err,
133	re_match_context_t *mctx,
134	re_dfastate_t *state) internal_function;
135	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
136	re_match_context_t *mctx,
137	re_dfastate_t *next_state) internal_function;
138	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
139	re_node_set *cur_nodes,
140	int str_idx) internal_function;
141	#if 0
142	static re_dfastate_t transit_state_sb (reg_errcode_t err,
143	re_match_context_t *mctx,
144	re_dfastate_t *pstate) internal_function;
145	#endif
146	#ifdef RE_ENABLE_I18N
147	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148	re_dfastate_t *pstate) internal_function;
149	#endif /* RE_ENABLE_I18N */
150	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
151	const re_node_set *nodes) internal_function;
152	static reg_errcode_t get_subexp (re_match_context_t *mctx,
153	int bkref_node, int bkref_str_idx) internal_function;
154	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155	const re_sub_match_top_t *sub_top,
156	re_sub_match_last_t *sub_last,
157	int bkref_node, int bkref_str) internal_function;
158	static int find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
159	int subexp_idx, int type) internal_function;
160	static reg_errcode_t check_arrival (re_match_context_t *mctx,
161	state_array_t *path, int top_node,
162	int top_str, int last_node, int last_str,
163	int type) internal_function;
164	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165	int str_idx,
166	re_node_set *cur_nodes,
167	re_node_set *next_nodes) internal_function;
168	static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
169	re_node_set *cur_nodes,
170	int ex_subexp, int type) internal_function;
171	static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
172	re_node_set *dst_nodes,
173	int target, int ex_subexp,
174	int type) internal_function;
175	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
176	re_node_set *cur_nodes, int cur_str,
177	int subexp_num, int type) internal_function;
178	static int build_trtable (re_dfa_t *dfa,
179	re_dfastate_t *state) internal_function;
180	#ifdef RE_ENABLE_I18N
181	static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
182	const re_string_t *input, int idx) internal_function;
183	# ifdef _LIBC
184	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
185	size_t name_len) internal_function;
186	# endif /* _LIBC */
187	#endif /* RE_ENABLE_I18N */
188	static int group_nodes_into_DFAstates (re_dfa_t *dfa,
189	const re_dfastate_t *state,
190	re_node_set *states_node,
191	bitset *states_ch) internal_function;
192	static int check_node_accept (const re_match_context_t *mctx,
193	const re_token_t *node, int idx) internal_function;
194	static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
195
196
197	/* Entry point for POSIX code. */
198
199	/* regexec searches for a given pattern, specified by PREG, in the
200	string STRING.
201
202	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
203	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
204	least NMATCH elements, and we set them to the offsets of the
205	corresponding matched substrings.
206
207	EFLAGS specifies `execution flags' which affect matching: if
208	REG_NOTBOL is set, then ^ does not match at the beginning of the
209	string; if REG_NOTEOL is set, then $ does not match at the end.
210
211	We return 0 if we find a match and REG_NOMATCH if not. */
212
213	int
214	regexec (preg, string, nmatch, pmatch, eflags)
215	const regex_t *__restrict preg;
216	const char *__restrict string;
217	size_t nmatch;
218	regmatch_t pmatch[];
219	int eflags;
220	{
221	reg_errcode_t err;
222	int start, length;
223
224	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
225	return REG_BADPAT;
226
227	if (eflags & REG_STARTEND)
228	{
229	start = pmatch[0].rm_so;
230	length = pmatch[0].rm_eo;
231	}
232	else
233	{
234	start = 0;
235	length = strlen (string);
236	}
237	if (preg->no_sub)
238	err = re_search_internal (preg, string, length, start, length - start,
239	length, 0, NULL, eflags);
240	else
241	err = re_search_internal (preg, string, length, start, length - start,
242	length, nmatch, pmatch, eflags);
243	return err != REG_NOERROR;
244	}
245
246	#ifdef _LIBC
247	# include <shlib-compat.h>
248	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
249
250	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
251	__typeof__ (__regexec) __compat_regexec;
252
253	int
254	attribute_compat_text_section
255	__compat_regexec (const regex_t *__restrict preg,
256	const char *__restrict string, size_t nmatch,
257	regmatch_t pmatch[], int eflags)
258	{
259	return regexec (preg, string, nmatch, pmatch,
260	eflags & (REG_NOTBOL \| REG_NOTEOL));
261	}
262	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
263	# endif
264	#endif
265
266	/* Entry points for GNU code. */
267
268	/* re_match, re_search, re_match_2, re_search_2
269
270	The former two functions operate on STRING with length LENGTH,
271	while the later two operate on concatenation of STRING1 and STRING2
272	with lengths LENGTH1 and LENGTH2, respectively.
273
274	re_match() matches the compiled pattern in BUFP against the string,
275	starting at index START.
276
277	re_search() first tries matching at index START, then it tries to match
278	starting from index START + 1, and so on. The last start position tried
279	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
280	way as re_match().)
281
282	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
283	the first STOP characters of the concatenation of the strings should be
284	concerned.
285
286	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
287	and all groups is stroed in REGS. (For the "_2" variants, the offsets are
288	computed relative to the concatenation, not relative to the individual
289	strings.)
290
291	On success, re_match* functions return the length of the match, re_search*
292	return the position of the start of the match. Return value -1 means no
293	match was found and -2 indicates an internal error. */
294
295	int
296	re_match (bufp, string, length, start, regs)
297	struct re_pattern_buffer *bufp;
298	const char *string;
299	int length, start;
300	struct re_registers *regs;
301	{
302	return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
303	}
304	#ifdef _LIBC
305	weak_alias (__re_match, re_match)
306	#endif
307
308	int
309	re_search (bufp, string, length, start, range, regs)
310	struct re_pattern_buffer *bufp;
311	const char *string;
312	int length, start, range;
313	struct re_registers *regs;
314	{
315	return re_search_stub (bufp, string, length, start, range, length, regs, 0);
316	}
317	#ifdef _LIBC
318	weak_alias (__re_search, re_search)
319	#endif
320
321	int
322	re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
323	struct re_pattern_buffer *bufp;
324	const char string1, string2;
325	int length1, length2, start, stop;
326	struct re_registers *regs;
327	{
328	return re_search_2_stub (bufp, string1, length1, string2, length2,
329	start, 0, regs, stop, 1);
330	}
331	#ifdef _LIBC
332	weak_alias (__re_match_2, re_match_2)
333	#endif
334
335	int
336	re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
337	struct re_pattern_buffer *bufp;
338	const char string1, string2;
339	int length1, length2, start, range, stop;
340	struct re_registers *regs;
341	{
342	return re_search_2_stub (bufp, string1, length1, string2, length2,
343	start, range, regs, stop, 0);
344	}
345	#ifdef _LIBC
346	weak_alias (__re_search_2, re_search_2)
347	#endif
348
349	static int
350	re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
351	stop, ret_len)
352	struct re_pattern_buffer *bufp;
353	const char string1, string2;
354	int length1, length2, start, range, stop, ret_len;
355	struct re_registers *regs;
356	{
357	const char *str;
358	int rval;
359	int len = length1 + length2;
360	int free_str = 0;
361
362	if (BE (length1 < 0 \|\| length2 < 0 \|\| stop < 0, 0))
363	return -2;
364
365	/* Concatenate the strings. */
366	if (length2 > 0)
367	if (length1 > 0)
368	{
369	char *s = re_malloc (char, len);
370
371	if (BE (s == NULL, 0))
372	return -2;
373	memcpy (s, string1, length1);
374	memcpy (s + length1, string2, length2);
375	str = s;
376	free_str = 1;
377	}
378	else
379	str = string2;
380	else
381	str = string1;
382
383	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
384	ret_len);
385	if (free_str)
386	re_free ((char *) str);
387	return rval;
388	}
389
390	/* The parameters have the same meaning as those of re_search.
391	Additional parameters:
392	If RET_LEN is nonzero the length of the match is returned (re_match style);
393	otherwise the position of the match is returned. */
394
395	static int
396	re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
397	struct re_pattern_buffer *bufp;
398	const char *string;
399	int length, start, range, stop, ret_len;
400	struct re_registers *regs;
401	{
402	reg_errcode_t result;
403	regmatch_t *pmatch;
404	int nregs, rval;
405	int eflags = 0;
406
407	/* Check for out-of-range. */
408	if (BE (start < 0 \|\| start > length, 0))
409	return -1;
410	if (BE (start + range > length, 0))
411	range = length - start;
412	else if (BE (start + range < 0, 0))
413	range = -start;
414
415	eflags \|= (bufp->not_bol) ? REG_NOTBOL : 0;
416	eflags \|= (bufp->not_eol) ? REG_NOTEOL : 0;
417
418	/* Compile fastmap if we haven't yet. */
419	if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
420	re_compile_fastmap (bufp);
421
422	if (BE (bufp->no_sub, 0))
423	regs = NULL;
424
425	/* We need at least 1 register. */
426	if (regs == NULL)
427	nregs = 1;
428	else if (BE (bufp->regs_allocated == REGS_FIXED &&
429	regs->num_regs < bufp->re_nsub + 1, 0))
430	{
431	nregs = regs->num_regs;
432	if (BE (nregs < 1, 0))
433	{
434	/* Nothing can be copied to regs. */
435	regs = NULL;
436	nregs = 1;
437	}
438	}
439	else
440	nregs = bufp->re_nsub + 1;
441	pmatch = re_malloc (regmatch_t, nregs);
442	if (BE (pmatch == NULL, 0))
443	return -2;
444
445	result = re_search_internal (bufp, string, length, start, range, stop,
446	nregs, pmatch, eflags);
447
448	rval = 0;
449
450	/* I hope we needn't fill ther regs with -1's when no match was found. */
451	if (result != REG_NOERROR)
452	rval = -1;
453	else if (regs != NULL)
454	{
455	/* If caller wants register contents data back, copy them. */
456	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
457	bufp->regs_allocated);
458	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
459	rval = -2;
460	}
461
462	if (BE (rval == 0, 1))
463	{
464	if (ret_len)
465	{
466	assert (pmatch[0].rm_so == start);
467	rval = pmatch[0].rm_eo - start;
468	}
469	else
470	rval = pmatch[0].rm_so;
471	}
472	re_free (pmatch);
473	return rval;
474	}
475
476	static unsigned
477	re_copy_regs (regs, pmatch, nregs, regs_allocated)
478	struct re_registers *regs;
479	regmatch_t *pmatch;
480	int nregs, regs_allocated;
481	{
482	int rval = REGS_REALLOCATE;
483	int i;
484	int need_regs = nregs + 1;
485	/* We need one extra element beyond `num_regs' for the `-1' marker GNU code
486	uses. */
487
488	/* Have the register data arrays been allocated? */
489	if (regs_allocated == REGS_UNALLOCATED)
490	{ /* No. So allocate them with malloc. */
491	regs->start = re_malloc (regoff_t, need_regs);
492	regs->end = re_malloc (regoff_t, need_regs);
493	if (BE (regs->start == NULL, 0) \|\| BE (regs->end == NULL, 0))
494	return REGS_UNALLOCATED;
495	regs->num_regs = need_regs;
496	}
497	else if (regs_allocated == REGS_REALLOCATE)
498	{ /* Yes. If we need more elements than were already
499	allocated, reallocate them. If we need fewer, just
500	leave it alone. */
501	if (BE (need_regs > regs->num_regs, 0))
502	{
503	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
504	regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
505	if (BE (new_start == NULL, 0) \|\| BE (new_end == NULL, 0))
506	return REGS_UNALLOCATED;
507	regs->start = new_start;
508	regs->end = new_end;
509	regs->num_regs = need_regs;
510	}
511	}
512	else
513	{
514	assert (regs_allocated == REGS_FIXED);
515	/* This function may not be called with REGS_FIXED and nregs too big. */
516	assert (regs->num_regs >= nregs);
517	rval = REGS_FIXED;
518	}
519
520	/* Copy the regs. */
521	for (i = 0; i < nregs; ++i)
522	{
523	regs->start[i] = pmatch[i].rm_so;
524	regs->end[i] = pmatch[i].rm_eo;
525	}
526	for ( ; i < regs->num_regs; ++i)
527	regs->start[i] = regs->end[i] = -1;
528
529	return rval;
530	}
531
532	/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
533	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
534	this memory for recording register information. STARTS and ENDS
535	must be allocated using the malloc library routine, and must each
536	be at least NUM_REGS * sizeof (regoff_t) bytes long.
537
538	If NUM_REGS == 0, then subsequent matches should allocate their own
539	register data.
540
541	Unless this function is called, the first search or match using
542	PATTERN_BUFFER will allocate its own register data, without
543	freeing the old data. */
544
545	void
546	re_set_registers (bufp, regs, num_regs, starts, ends)
547	struct re_pattern_buffer *bufp;
548	struct re_registers *regs;
549	unsigned num_regs;
550	regoff_t starts, ends;
551	{
552	if (num_regs)
553	{
554	bufp->regs_allocated = REGS_REALLOCATE;
555	regs->num_regs = num_regs;
556	regs->start = starts;
557	regs->end = ends;
558	}
559	else
560	{
561	bufp->regs_allocated = REGS_UNALLOCATED;
562	regs->num_regs = 0;
563	regs->start = regs->end = (regoff_t *) 0;
564	}
565	}
566	#ifdef _LIBC
567	weak_alias (__re_set_registers, re_set_registers)
568	#endif
569
570
571	/* Entry points compatible with 4.2 BSD regex library. We don't define
572	them unless specifically requested. */
573
574	#if defined _REGEX_RE_COMP \|\| defined _LIBC
575	int
576	# ifdef _LIBC
577	weak_function
578	# endif
579	re_exec (s)
580	const char *s;
581	{
582	return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
583	}
584	#endif /* _REGEX_RE_COMP */
585
586
587	/* Internal entry point. */
588
589	/* Searches for a compiled pattern PREG in the string STRING, whose
590	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
591	mingings with regexec. START, and RANGE have the same meanings
592	with re_search.
593	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
594	otherwise return the error code.
595	Note: We assume front end functions already check ranges.
596	(START + RANGE >= 0 && START + RANGE <= LENGTH) */
597
598	static reg_errcode_t
599	re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
600	eflags)
601	const regex_t *preg;
602	const char *string;
603	int length, start, range, stop, eflags;
604	size_t nmatch;
605	regmatch_t pmatch[];
606	{
607	reg_errcode_t err;
608	re_dfa_t dfa = (re_dfa_t )preg->buffer;
609	int left_lim, right_lim, incr;
610	int fl_longest_match, match_first, match_kind, match_last = -1;
611	int extra_nmatch;
612	int sb, ch;
613	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
614	re_match_context_t mctx = { .dfa = dfa };
615	#else
616	re_match_context_t mctx;
617	#endif
618	char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
619	&& range && !preg->can_be_null) ? preg->fastmap : NULL;
620	unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
621
622	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
623	memset (&mctx, '\0', sizeof (re_match_context_t));
624	mctx.dfa = dfa;
625	#endif
626
627	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
628	nmatch -= extra_nmatch;
629
630	/* Check if the DFA haven't been compiled. */
631	if (BE (preg->used == 0 \|\| dfa->init_state == NULL
632	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
633	\|\| dfa->init_state_begbuf == NULL, 0))
634	return REG_NOMATCH;
635
636	#ifdef DEBUG
637	/* We assume front-end functions already check them. */
638	assert (start + range >= 0 && start + range <= length);
639	#endif
640
641	/* If initial states with non-begbuf contexts have no elements,
642	the regex must be anchored. If preg->newline_anchor is set,
643	we'll never use init_state_nl, so do not check it. */
644	if (dfa->init_state->nodes.nelem == 0
645	&& dfa->init_state_word->nodes.nelem == 0
646	&& (dfa->init_state_nl->nodes.nelem == 0
647	\|\| !preg->newline_anchor))
648	{
649	if (start != 0 && start + range != 0)
650	return REG_NOMATCH;
651	start = range = 0;
652	}
653
654	/* We must check the longest matching, if nmatch > 0. */
655	fl_longest_match = (nmatch != 0 \|\| dfa->nbackref);
656
657	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
658	preg->translate, preg->syntax & RE_ICASE, dfa);
659	if (BE (err != REG_NOERROR, 0))
660	goto free_return;
661	mctx.input.stop = stop;
662	mctx.input.raw_stop = stop;
663	mctx.input.newline_anchor = preg->newline_anchor;
664
665	err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
666	if (BE (err != REG_NOERROR, 0))
667	goto free_return;
668
669	/* We will log all the DFA states through which the dfa pass,
670	if nmatch > 1, or this dfa has "multibyte node", which is a
671	back-reference or a node which can accept multibyte character or
672	multi character collating element. */
673	if (nmatch > 1 \|\| dfa->has_mb_node)
674	{
675	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
676	if (BE (mctx.state_log == NULL, 0))
677	{
678	err = REG_ESPACE;
679	goto free_return;
680	}
681	}
682	else
683	mctx.state_log = NULL;
684
685	match_first = start;
686	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
687	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
688
689	/* Check incrementally whether of not the input string match. */
690	incr = (range < 0) ? -1 : 1;
691	left_lim = (range < 0) ? start + range : start;
692	right_lim = (range < 0) ? start : start + range;
693	sb = dfa->mb_cur_max == 1;
694	match_kind =
695	(fastmap
696	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? 4 : 0)
697	\| (range >= 0 ? 2 : 0)
698	\| (t != NULL ? 1 : 0))
699	: 8);
700
701	for (;; match_first += incr)
702	{
703	err = REG_NOMATCH;
704	if (match_first < left_lim \|\| right_lim < match_first)
705	goto free_return;
706
707	/* Advance as rapidly as possible through the string, until we
708	find a plausible place to start matching. This may be done
709	with varying efficiency, so there are various possibilities:
710	only the most common of them are specialized, in order to
711	save on code size. We use a switch statement for speed. */
712	switch (match_kind)
713	{
714	case 8:
715	/* No fastmap. */
716	break;
717
718	case 7:
719	/* Fastmap with single-byte translation, match forward. */
720	while (BE (match_first < right_lim, 1)
721	&& !fastmap[t[(unsigned char) string[match_first]]])
722	++match_first;
723	goto forward_match_found_start_or_reached_end;
724
725	case 6:
726	/* Fastmap without translation, match forward. */
727	while (BE (match_first < right_lim, 1)
728	&& !fastmap[(unsigned char) string[match_first]])
729	++match_first;
730
731	forward_match_found_start_or_reached_end:
732	if (BE (match_first == right_lim, 0))
733	{
734	ch = match_first >= length
735	? 0 : (unsigned char) string[match_first];
736	if (!fastmap[t ? t[ch] : ch])
737	goto free_return;
738	}
739	break;
740
741	case 4:
742	case 5:
743	/* Fastmap without multi-byte translation, match backwards. */
744	while (match_first >= left_lim)
745	{
746	ch = match_first >= length
747	? 0 : (unsigned char) string[match_first];
748	if (fastmap[t ? t[ch] : ch])
749	break;
750	--match_first;
751	}
752	if (match_first < left_lim)
753	goto free_return;
754	break;
755
756	default:
757	/* In this case, we can't determine easily the current byte,
758	since it might be a component byte of a multibyte
759	character. Then we use the constructed buffer instead. */
760	for (;;)
761	{
762	/* If MATCH_FIRST is out of the valid range, reconstruct the
763	buffers. */
764	unsigned int offset = match_first - mctx.input.raw_mbs_idx;
765	if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
766	{
767	err = re_string_reconstruct (&mctx.input, match_first,
768	eflags);
769	if (BE (err != REG_NOERROR, 0))
770	goto free_return;
771
772	offset = match_first - mctx.input.raw_mbs_idx;
773	}
774	/* If MATCH_FIRST is out of the buffer, leave it as '\0'.
775	Note that MATCH_FIRST must not be smaller than 0. */
776	ch = (match_first >= length
777	? 0 : re_string_byte_at (&mctx.input, offset));
778	if (fastmap[ch])
779	break;
780	match_first += incr;
781	if (match_first < left_lim \|\| match_first > right_lim)
782	{
783	err = REG_NOMATCH;
784	goto free_return;
785	}
786	}
787	break;
788	}
789
790	/* Reconstruct the buffers so that the matcher can assume that
791	the matching starts from the beginning of the buffer. */
792	err = re_string_reconstruct (&mctx.input, match_first, eflags);
793	if (BE (err != REG_NOERROR, 0))
794	goto free_return;
795
796	#ifdef RE_ENABLE_I18N
797	/* Don't consider this char as a possible match start if it part,
798	yet isn't the head, of a multibyte character. */
799	if (!sb && !re_string_first_byte (&mctx.input, 0))
800	continue;
801	#endif
802
803	/* It seems to be appropriate one, then use the matcher. */
804	/* We assume that the matching starts from 0. */
805	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
806	match_last = check_matching (&mctx, fl_longest_match,
807	range >= 0 ? &match_first : NULL);
808	if (match_last != -1)
809	{
810	if (BE (match_last == -2, 0))
811	{
812	err = REG_ESPACE;
813	goto free_return;
814	}
815	else
816	{
817	mctx.match_last = match_last;
818	if ((!preg->no_sub && nmatch > 1) \|\| dfa->nbackref)
819	{
820	re_dfastate_t *pstate = mctx.state_log[match_last];
821	mctx.last_node = check_halt_state_context (&mctx, pstate,
822	match_last);
823	}
824	if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
825	\|\| dfa->nbackref)
826	{
827	err = prune_impossible_nodes (&mctx);
828	if (err == REG_NOERROR)
829	break;
830	if (BE (err != REG_NOMATCH, 0))
831	goto free_return;
832	match_last = -1;
833	}
834	else
835	break; /* We found a match. */
836	}
837	}
838
839	match_ctx_clean (&mctx);
840	}
841
842	#ifdef DEBUG
843	assert (match_last != -1);
844	assert (err == REG_NOERROR);
845	#endif
846
847	/* Set pmatch[] if we need. */
848	if (nmatch > 0)
849	{
850	int reg_idx;
851
852	/* Initialize registers. */
853	for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
854	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
855
856	/* Set the points where matching start/end. */
857	pmatch[0].rm_so = 0;
858	pmatch[0].rm_eo = mctx.match_last;
859
860	if (!preg->no_sub && nmatch > 1)
861	{
862	err = set_regs (preg, &mctx, nmatch, pmatch,
863	dfa->has_plural_match && dfa->nbackref > 0);
864	if (BE (err != REG_NOERROR, 0))
865	goto free_return;
866	}
867
868	/* At last, add the offset to the each registers, since we slided
869	the buffers so that we could assume that the matching starts
870	from 0. */
871	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
872	if (pmatch[reg_idx].rm_so != -1)
873	{
874	#ifdef RE_ENABLE_I18N
875	if (BE (mctx.input.offsets_needed != 0, 0))
876	{
877	if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
878	pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
879	else
880	pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
881	if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
882	pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
883	else
884	pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
885	}
886	#else
887	assert (mctx.input.offsets_needed == 0);
888	#endif
889	pmatch[reg_idx].rm_so += match_first;
890	pmatch[reg_idx].rm_eo += match_first;
891	}
892	for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
893	{
894	pmatch[nmatch + reg_idx].rm_so = -1;
895	pmatch[nmatch + reg_idx].rm_eo = -1;
896	}
897
898	if (dfa->subexp_map)
899	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
900	if (dfa->subexp_map[reg_idx] != reg_idx)
901	{
902	pmatch[reg_idx + 1].rm_so
903	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
904	pmatch[reg_idx + 1].rm_eo
905	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
906	}
907	}
908
909	free_return:
910	re_free (mctx.state_log);
911	if (dfa->nbackref)
912	match_ctx_free (&mctx);
913	re_string_destruct (&mctx.input);
914	return err;
915	}
916
917	static reg_errcode_t
918	prune_impossible_nodes (mctx)
919	re_match_context_t *mctx;
920	{
921	re_dfa_t *const dfa = mctx->dfa;
922	int halt_node, match_last;
923	reg_errcode_t ret;
924	re_dfastate_t **sifted_states;
925	re_dfastate_t **lim_states = NULL;
926	re_sift_context_t sctx;
927	#ifdef DEBUG
928	assert (mctx->state_log != NULL);
929	#endif
930	match_last = mctx->match_last;
931	halt_node = mctx->last_node;
932	sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
933	if (BE (sifted_states == NULL, 0))
934	{
935	ret = REG_ESPACE;
936	goto free_return;
937	}
938	if (dfa->nbackref)
939	{
940	lim_states = re_malloc (re_dfastate_t *, match_last + 1);
941	if (BE (lim_states == NULL, 0))
942	{
943	ret = REG_ESPACE;
944	goto free_return;
945	}
946	while (1)
947	{
948	memset (lim_states, '\0',
949	sizeof (re_dfastate_t ) (match_last + 1));
950	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
951	match_last);
952	ret = sift_states_backward (mctx, &sctx);
953	re_node_set_free (&sctx.limits);
954	if (BE (ret != REG_NOERROR, 0))
955	goto free_return;
956	if (sifted_states[0] != NULL \|\| lim_states[0] != NULL)
957	break;
958	do
959	{
960	--match_last;
961	if (match_last < 0)
962	{
963	ret = REG_NOMATCH;
964	goto free_return;
965	}
966	} while (mctx->state_log[match_last] == NULL
967	\|\| !mctx->state_log[match_last]->halt);
968	halt_node = check_halt_state_context (mctx,
969	mctx->state_log[match_last],
970	match_last);
971	}
972	ret = merge_state_array (dfa, sifted_states, lim_states,
973	match_last + 1);
974	re_free (lim_states);
975	lim_states = NULL;
976	if (BE (ret != REG_NOERROR, 0))
977	goto free_return;
978	}
979	else
980	{
981	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
982	ret = sift_states_backward (mctx, &sctx);
983	re_node_set_free (&sctx.limits);
984	if (BE (ret != REG_NOERROR, 0))
985	goto free_return;
986	}
987	re_free (mctx->state_log);
988	mctx->state_log = sifted_states;
989	sifted_states = NULL;
990	mctx->last_node = halt_node;
991	mctx->match_last = match_last;
992	ret = REG_NOERROR;
993	free_return:
994	re_free (sifted_states);
995	re_free (lim_states);
996	return ret;
997	}
998
999	/* Acquire an initial state and return it.
1000	We must select appropriate initial state depending on the context,
1001	since initial states may have constraints like "\<", "^", etc.. */
1002
1003	static inline re_dfastate_t *
1004	acquire_init_state_context (err, mctx, idx)
1005	reg_errcode_t *err;
1006	const re_match_context_t *mctx;
1007	int idx;
1008	{
1009	re_dfa_t *const dfa = mctx->dfa;
1010	if (dfa->init_state->has_constraint)
1011	{
1012	unsigned int context;
1013	context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1014	if (IS_WORD_CONTEXT (context))
1015	return dfa->init_state_word;
1016	else if (IS_ORDINARY_CONTEXT (context))
1017	return dfa->init_state;
1018	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1019	return dfa->init_state_begbuf;
1020	else if (IS_NEWLINE_CONTEXT (context))
1021	return dfa->init_state_nl;
1022	else if (IS_BEGBUF_CONTEXT (context))
1023	{
1024	/* It is relatively rare case, then calculate on demand. */
1025	return re_acquire_state_context (err, dfa,
1026	dfa->init_state->entrance_nodes,
1027	context);
1028	}
1029	else
1030	/* Must not happen? */
1031	return dfa->init_state;
1032	}
1033	else
1034	return dfa->init_state;
1035	}
1036
1037	/* Check whether the regular expression match input string INPUT or not,
1038	and return the index where the matching end, return -1 if not match,
1039	or return -2 in case of an error.
1040	FL_LONGEST_MATCH means we want the POSIX longest matching.
1041	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1042	next place where we may want to try matching.
1043	Note that the matcher assume that the maching starts from the current
1044	index of the buffer. */
1045
1046	static int
1047	check_matching (mctx, fl_longest_match, p_match_first)
1048	re_match_context_t *mctx;
1049	int fl_longest_match;
1050	int *p_match_first;
1051	{
1052	re_dfa_t *const dfa = mctx->dfa;
1053	reg_errcode_t err;
1054	int match = 0;
1055	int match_last = -1;
1056	int cur_str_idx = re_string_cur_idx (&mctx->input);
1057	re_dfastate_t *cur_state;
1058	int at_init_state = p_match_first != NULL;
1059	int next_start_idx = cur_str_idx;
1060
1061	err = REG_NOERROR;
1062	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1063	/* An initial state must not be NULL (invalid). */
1064	if (BE (cur_state == NULL, 0))
1065	{
1066	assert (err == REG_ESPACE);
1067	return -2;
1068	}
1069
1070	if (mctx->state_log != NULL)
1071	{
1072	mctx->state_log[cur_str_idx] = cur_state;
1073
1074	/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1075	later. E.g. Processing back references. */
1076	if (BE (dfa->nbackref, 0))
1077	{
1078	at_init_state = 0;
1079	err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1080	if (BE (err != REG_NOERROR, 0))
1081	return err;
1082
1083	if (cur_state->has_backref)
1084	{
1085	err = transit_state_bkref (mctx, &cur_state->nodes);
1086	if (BE (err != REG_NOERROR, 0))
1087	return err;
1088	}
1089	}
1090	}
1091
1092	/* If the RE accepts NULL string. */
1093	if (BE (cur_state->halt, 0))
1094	{
1095	if (!cur_state->has_constraint
1096	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1097	{
1098	if (!fl_longest_match)
1099	return cur_str_idx;
1100	else
1101	{
1102	match_last = cur_str_idx;
1103	match = 1;
1104	}
1105	}
1106	}
1107
1108	while (!re_string_eoi (&mctx->input))
1109	{
1110	re_dfastate_t *old_state = cur_state;
1111	int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1112
1113	if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1114	\|\| (BE (next_char_idx >= mctx->input.valid_len, 0)
1115	&& mctx->input.valid_len < mctx->input.len))
1116	{
1117	err = extend_buffers (mctx);
1118	if (BE (err != REG_NOERROR, 0))
1119	{
1120	assert (err == REG_ESPACE);
1121	return -2;
1122	}
1123	}
1124
1125	cur_state = transit_state (&err, mctx, cur_state);
1126	if (mctx->state_log != NULL)
1127	cur_state = merge_state_with_log (&err, mctx, cur_state);
1128
1129	if (cur_state == NULL)
1130	{
1131	/* Reached the invalid state or an error. Try to recover a valid
1132	state using the state log, if available and if we have not
1133	already found a valid (even if not the longest) match. */
1134	if (BE (err != REG_NOERROR, 0))
1135	return -2;
1136
1137	if (mctx->state_log == NULL
1138	\|\| (match && !fl_longest_match)
1139	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1140	break;
1141	}
1142
1143	if (BE (at_init_state, 0))
1144	{
1145	if (old_state == cur_state)
1146	next_start_idx = next_char_idx;
1147	else
1148	at_init_state = 0;
1149	}
1150
1151	if (cur_state->halt)
1152	{
1153	/* Reached a halt state.
1154	Check the halt state can satisfy the current context. */
1155	if (!cur_state->has_constraint
1156	\|\| check_halt_state_context (mctx, cur_state,
1157	re_string_cur_idx (&mctx->input)))
1158	{
1159	/* We found an appropriate halt state. */
1160	match_last = re_string_cur_idx (&mctx->input);
1161	match = 1;
1162
1163	/* We found a match, do not modify match_first below. */
1164	p_match_first = NULL;
1165	if (!fl_longest_match)
1166	break;
1167	}
1168	}
1169	}
1170
1171	if (p_match_first)
1172	*p_match_first += next_start_idx;
1173
1174	return match_last;
1175	}
1176
1177	/* Check NODE match the current context. */
1178
1179	static int check_halt_node_context (dfa, node, context)
1180	const re_dfa_t *dfa;
1181	int node;
1182	unsigned int context;
1183	{
1184	re_token_type_t type = dfa->nodes[node].type;
1185	unsigned int constraint = dfa->nodes[node].constraint;
1186	if (type != END_OF_RE)
1187	return 0;
1188	if (!constraint)
1189	return 1;
1190	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1191	return 0;
1192	return 1;
1193	}
1194
1195	/* Check the halt state STATE match the current context.
1196	Return 0 if not match, if the node, STATE has, is a halt node and
1197	match the context, return the node. */
1198
1199	static int
1200	check_halt_state_context (mctx, state, idx)
1201	const re_match_context_t *mctx;
1202	const re_dfastate_t *state;
1203	int idx;
1204	{
1205	int i;
1206	unsigned int context;
1207	#ifdef DEBUG
1208	assert (state->halt);
1209	#endif
1210	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1211	for (i = 0; i < state->nodes.nelem; ++i)
1212	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1213	return state->nodes.elems[i];
1214	return 0;
1215	}
1216
1217	/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1218	corresponding to the DFA).
1219	Return the destination node, and update EPS_VIA_NODES, return -1 in case
1220	of errors. */
1221
1222	static int
1223	proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1224	const re_match_context_t *mctx;
1225	regmatch_t *regs;
1226	int nregs, *pidx, node;
1227	re_node_set *eps_via_nodes;
1228	struct re_fail_stack_t *fs;
1229	{
1230	re_dfa_t *const dfa = mctx->dfa;
1231	int i, err, dest_node;
1232	dest_node = -1;
1233	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1234	{
1235	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1236	re_node_set *edests = &dfa->edests[node];
1237	int dest_node;
1238	err = re_node_set_insert (eps_via_nodes, node);
1239	if (BE (err < 0, 0))
1240	return -2;
1241	/* Pick up a valid destination, or return -1 if none is found. */
1242	for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1243	{
1244	int candidate = edests->elems[i];
1245	if (!re_node_set_contains (cur_nodes, candidate))
1246	continue;
1247	if (dest_node == -1)
1248	dest_node = candidate;
1249
1250	else
1251	{
1252	/* In order to avoid infinite loop like "(a)", return the second
1253	epsilon-transition if the first was already considered. */
1254	if (re_node_set_contains (eps_via_nodes, dest_node))
1255	return candidate;
1256
1257	/* Otherwise, push the second epsilon-transition on the fail stack. */
1258	else if (fs != NULL
1259	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1260	eps_via_nodes) != REG_NOERROR)
1261	return -2;
1262
1263	/* We know we are going to exit. */
1264	break;
1265	}
1266	}
1267	return dest_node;
1268	}
1269	else
1270	{
1271	int naccepted = 0;
1272	re_token_type_t type = dfa->nodes[node].type;
1273
1274	#ifdef RE_ENABLE_I18N
1275	if (dfa->nodes[node].accept_mb)
1276	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1277	else
1278	#endif /* RE_ENABLE_I18N */
1279	if (type == OP_BACK_REF)
1280	{
1281	int subexp_idx = dfa->nodes[node].opr.idx + 1;
1282	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1283	if (fs != NULL)
1284	{
1285	if (regs[subexp_idx].rm_so == -1 \|\| regs[subexp_idx].rm_eo == -1)
1286	return -1;
1287	else if (naccepted)
1288	{
1289	char buf = (char ) re_string_get_buffer (&mctx->input);
1290	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1291	naccepted) != 0)
1292	return -1;
1293	}
1294	}
1295
1296	if (naccepted == 0)
1297	{
1298	err = re_node_set_insert (eps_via_nodes, node);
1299	if (BE (err < 0, 0))
1300	return -2;
1301	dest_node = dfa->edests[node].elems[0];
1302	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1303	dest_node))
1304	return dest_node;
1305	}
1306	}
1307
1308	if (naccepted != 0
1309	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1310	{
1311	dest_node = dfa->nexts[node];
1312	pidx = (naccepted == 0) ? pidx + 1 : *pidx + naccepted;
1313	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1314	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1315	dest_node)))
1316	return -1;
1317	re_node_set_empty (eps_via_nodes);
1318	return dest_node;
1319	}
1320	}
1321	return -1;
1322	}
1323
1324	static reg_errcode_t
1325	push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
1326	struct re_fail_stack_t *fs;
1327	int str_idx, dest_node, nregs;
1328	regmatch_t *regs;
1329	re_node_set *eps_via_nodes;
1330	{
1331	reg_errcode_t err;
1332	int num = fs->num++;
1333	if (fs->num == fs->alloc)
1334	{
1335	struct re_fail_stack_ent_t *new_array;
1336	new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1337	* fs->alloc * 2));
1338	if (new_array == NULL)
1339	return REG_ESPACE;
1340	fs->alloc *= 2;
1341	fs->stack = new_array;
1342	}
1343	fs->stack[num].idx = str_idx;
1344	fs->stack[num].node = dest_node;
1345	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1346	if (fs->stack[num].regs == NULL)
1347	return REG_ESPACE;
1348	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1349	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1350	return err;
1351	}
1352
1353	static int
1354	pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1355	struct re_fail_stack_t *fs;
1356	int *pidx, nregs;
1357	regmatch_t *regs;
1358	re_node_set *eps_via_nodes;
1359	{
1360	int num = --fs->num;
1361	assert (num >= 0);
1362	*pidx = fs->stack[num].idx;
1363	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1364	re_node_set_free (eps_via_nodes);
1365	re_free (fs->stack[num].regs);
1366	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1367	return fs->stack[num].node;
1368	}
1369
1370	/* Set the positions where the subexpressions are starts/ends to registers
1371	PMATCH.
1372	Note: We assume that pmatch[0] is already set, and
1373	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1374
1375	static reg_errcode_t
1376	set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1377	const regex_t *preg;
1378	const re_match_context_t *mctx;
1379	size_t nmatch;
1380	regmatch_t *pmatch;
1381	int fl_backtrack;
1382	{
1383	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1384	int idx, cur_node;
1385	re_node_set eps_via_nodes;
1386	struct re_fail_stack_t *fs;
1387	struct re_fail_stack_t fs_body = { 0, 2, NULL };
1388	regmatch_t *prev_idx_match;
1389
1390	#ifdef DEBUG
1391	assert (nmatch > 1);
1392	assert (mctx->state_log != NULL);
1393	#endif
1394	if (fl_backtrack)
1395	{
1396	fs = &fs_body;
1397	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1398	if (fs->stack == NULL)
1399	return REG_ESPACE;
1400	}
1401	else
1402	fs = NULL;
1403
1404	cur_node = dfa->init_node;
1405	re_node_set_init_empty (&eps_via_nodes);
1406
1407	prev_idx_match = re_malloc (regmatch_t, nmatch);
1408	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1409
1410	for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1411	{
1412	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1413
1414	if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1415	{
1416	int reg_idx;
1417	if (fs)
1418	{
1419	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1420	if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1421	break;
1422	if (reg_idx == nmatch)
1423	{
1424	re_node_set_free (&eps_via_nodes);
1425	re_free (prev_idx_match);
1426	return free_fail_stack_return (fs);
1427	}
1428	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1429	&eps_via_nodes);
1430	}
1431	else
1432	{
1433	re_node_set_free (&eps_via_nodes);
1434	re_free (prev_idx_match);
1435	return REG_NOERROR;
1436	}
1437	}
1438
1439	/* Proceed to next node. */
1440	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1441	&eps_via_nodes, fs);
1442
1443	if (BE (cur_node < 0, 0))
1444	{
1445	if (BE (cur_node == -2, 0))
1446	{
1447	re_node_set_free (&eps_via_nodes);
1448	free_fail_stack_return (fs);
1449	re_free (prev_idx_match);
1450	return REG_ESPACE;
1451	}
1452	if (fs)
1453	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1454	&eps_via_nodes);
1455	else
1456	{
1457	re_node_set_free (&eps_via_nodes);
1458	re_free (prev_idx_match);
1459	return REG_NOMATCH;
1460	}
1461	}
1462	}
1463	re_node_set_free (&eps_via_nodes);
1464	re_free (prev_idx_match);
1465	return free_fail_stack_return (fs);
1466	}
1467
1468	static reg_errcode_t
1469	free_fail_stack_return (fs)
1470	struct re_fail_stack_t *fs;
1471	{
1472	if (fs)
1473	{
1474	int fs_idx;
1475	for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1476	{
1477	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1478	re_free (fs->stack[fs_idx].regs);
1479	}
1480	re_free (fs->stack);
1481	}
1482	return REG_NOERROR;
1483	}
1484
1485	static void
1486	update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1487	re_dfa_t *dfa;
1488	regmatch_t pmatch, prev_idx_match;
1489	int cur_node, cur_idx, nmatch;
1490	{
1491	int type = dfa->nodes[cur_node].type;
1492	if (type == OP_OPEN_SUBEXP)
1493	{
1494	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1495
1496	/* We are at the first node of this sub expression. */
1497	if (reg_num < nmatch)
1498	{
1499	pmatch[reg_num].rm_so = cur_idx;
1500	pmatch[reg_num].rm_eo = -1;
1501	}
1502	}
1503	else if (type == OP_CLOSE_SUBEXP)
1504	{
1505	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1506	if (reg_num < nmatch)
1507	{
1508	/* We are at the last node of this sub expression. */
1509	if (pmatch[reg_num].rm_so < cur_idx)
1510	{
1511	pmatch[reg_num].rm_eo = cur_idx;
1512	/* This is a non-empty match or we are not inside an optional
1513	subexpression. Accept this right away. */
1514	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1515	}
1516	else
1517	{
1518	if (dfa->nodes[cur_node].opt_subexp
1519	&& prev_idx_match[reg_num].rm_so != -1)
1520	/* We transited through an empty match for an optional
1521	subexpression, like (a?)*, and this is not the subexp's
1522	first match. Copy back the old content of the registers
1523	so that matches of an inner subexpression are undone as
1524	well, like in ((a?)). /
1525	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1526	else
1527	/* We completed a subexpression, but it may be part of
1528	an optional one, so do not update PREV_IDX_MATCH. */
1529	pmatch[reg_num].rm_eo = cur_idx;
1530	}
1531	}
1532	}
1533	}
1534
1535	/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1536	and sift the nodes in each states according to the following rules.
1537	Updated state_log will be wrote to STATE_LOG.
1538
1539	Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1540	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1541	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1542	the LAST_NODE, we throw away the node `a'.
1543	2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1544	string `s' and transit to `b':
1545	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1546	away the node `a'.
1547	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1548	thrown away, we throw away the node `a'.
1549	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1550	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1551	node `a'.
1552	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1553	we throw away the node `a'. */
1554
1555	#define STATE_NODE_CONTAINS(state,node) \
1556	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1557
1558	static reg_errcode_t
1559	sift_states_backward (mctx, sctx)
1560	re_match_context_t *mctx;
1561	re_sift_context_t *sctx;
1562	{
1563	reg_errcode_t err;
1564	int null_cnt = 0;
1565	int str_idx = sctx->last_str_idx;
1566	re_node_set cur_dest;
1567
1568	#ifdef DEBUG
1569	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1570	#endif
1571
1572	/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1573	transit to the last_node and the last_node itself. */
1574	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1575	if (BE (err != REG_NOERROR, 0))
1576	return err;
1577	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1578	if (BE (err != REG_NOERROR, 0))
1579	goto free_return;
1580
1581	/* Then check each states in the state_log. */
1582	while (str_idx > 0)
1583	{
1584	/* Update counters. */
1585	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1586	if (null_cnt > mctx->max_mb_elem_len)
1587	{
1588	memset (sctx->sifted_states, '\0',
1589	sizeof (re_dfastate_t ) str_idx);
1590	re_node_set_free (&cur_dest);
1591	return REG_NOERROR;
1592	}
1593	re_node_set_empty (&cur_dest);
1594	--str_idx;
1595
1596	if (mctx->state_log[str_idx])
1597	{
1598	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1599	if (BE (err != REG_NOERROR, 0))
1600	goto free_return;
1601	}
1602
1603	/* Add all the nodes which satisfy the following conditions:
1604	- It can epsilon transit to a node in CUR_DEST.
1605	- It is in CUR_SRC.
1606	And update state_log. */
1607	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1608	if (BE (err != REG_NOERROR, 0))
1609	goto free_return;
1610	}
1611	err = REG_NOERROR;
1612	free_return:
1613	re_node_set_free (&cur_dest);
1614	return err;
1615	}
1616
1617	static reg_errcode_t
1618	build_sifted_states (mctx, sctx, str_idx, cur_dest)
1619	re_match_context_t *mctx;
1620	re_sift_context_t *sctx;
1621	int str_idx;
1622	re_node_set *cur_dest;
1623	{
1624	re_dfa_t *const dfa = mctx->dfa;
1625	re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1626	int i;
1627
1628	/* Then build the next sifted state.
1629	We build the next sifted state on `cur_dest', and update
1630	`sifted_states[str_idx]' with `cur_dest'.
1631	Note:
1632	`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1633	`cur_src' points the node_set of the old `state_log[str_idx]'
1634	(with the epsilon nodes pre-filtered out). */
1635	for (i = 0; i < cur_src->nelem; i++)
1636	{
1637	int prev_node = cur_src->elems[i];
1638	int naccepted = 0;
1639	int ret;
1640
1641	#ifdef DEBUG
1642	re_token_type_t type = dfa->nodes[prev_node].type;
1643	assert (!IS_EPSILON_NODE (type));
1644	#endif
1645	#ifdef RE_ENABLE_I18N
1646	/* If the node may accept `multi byte'. */
1647	if (dfa->nodes[prev_node].accept_mb)
1648	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1649	str_idx, sctx->last_str_idx);
1650	#endif /* RE_ENABLE_I18N */
1651
1652	/* We don't check backreferences here.
1653	See update_cur_sifted_state(). */
1654	if (!naccepted
1655	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1656	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1657	dfa->nexts[prev_node]))
1658	naccepted = 1;
1659
1660	if (naccepted == 0)
1661	continue;
1662
1663	if (sctx->limits.nelem)
1664	{
1665	int to_idx = str_idx + naccepted;
1666	if (check_dst_limits (mctx, &sctx->limits,
1667	dfa->nexts[prev_node], to_idx,
1668	prev_node, str_idx))
1669	continue;
1670	}
1671	ret = re_node_set_insert (cur_dest, prev_node);
1672	if (BE (ret == -1, 0))
1673	return REG_ESPACE;
1674	}
1675
1676	return REG_NOERROR;
1677	}
1678
1679	/* Helper functions. */
1680
1681	static reg_errcode_t
1682	clean_state_log_if_needed (mctx, next_state_log_idx)
1683	re_match_context_t *mctx;
1684	int next_state_log_idx;
1685	{
1686	int top = mctx->state_log_top;
1687
1688	if (next_state_log_idx >= mctx->input.bufs_len
1689	\|\| (next_state_log_idx >= mctx->input.valid_len
1690	&& mctx->input.valid_len < mctx->input.len))
1691	{
1692	reg_errcode_t err;
1693	err = extend_buffers (mctx);
1694	if (BE (err != REG_NOERROR, 0))
1695	return err;
1696	}
1697
1698	if (top < next_state_log_idx)
1699	{
1700	memset (mctx->state_log + top + 1, '\0',
1701	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1702	mctx->state_log_top = next_state_log_idx;
1703	}
1704	return REG_NOERROR;
1705	}
1706
1707	static reg_errcode_t
1708	merge_state_array (dfa, dst, src, num)
1709	re_dfa_t *dfa;
1710	re_dfastate_t **dst;
1711	re_dfastate_t **src;
1712	int num;
1713	{
1714	int st_idx;
1715	reg_errcode_t err;
1716	for (st_idx = 0; st_idx < num; ++st_idx)
1717	{
1718	if (dst[st_idx] == NULL)
1719	dst[st_idx] = src[st_idx];
1720	else if (src[st_idx] != NULL)
1721	{
1722	re_node_set merged_set;
1723	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1724	&src[st_idx]->nodes);
1725	if (BE (err != REG_NOERROR, 0))
1726	return err;
1727	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1728	re_node_set_free (&merged_set);
1729	if (BE (err != REG_NOERROR, 0))
1730	return err;
1731	}
1732	}
1733	return REG_NOERROR;
1734	}
1735
1736	static reg_errcode_t
1737	update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1738	re_match_context_t *mctx;
1739	re_sift_context_t *sctx;
1740	int str_idx;
1741	re_node_set *dest_nodes;
1742	{
1743	re_dfa_t *const dfa = mctx->dfa;
1744	reg_errcode_t err;
1745	const re_node_set *candidates;
1746	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1747	: &mctx->state_log[str_idx]->nodes);
1748
1749	if (dest_nodes->nelem == 0)
1750	sctx->sifted_states[str_idx] = NULL;
1751	else
1752	{
1753	if (candidates)
1754	{
1755	/* At first, add the nodes which can epsilon transit to a node in
1756	DEST_NODE. */
1757	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1758	if (BE (err != REG_NOERROR, 0))
1759	return err;
1760
1761	/* Then, check the limitations in the current sift_context. */
1762	if (sctx->limits.nelem)
1763	{
1764	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1765	mctx->bkref_ents, str_idx);
1766	if (BE (err != REG_NOERROR, 0))
1767	return err;
1768	}
1769	}
1770
1771	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1772	if (BE (err != REG_NOERROR, 0))
1773	return err;
1774	}
1775
1776	if (candidates && mctx->state_log[str_idx]->has_backref)
1777	{
1778	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1779	if (BE (err != REG_NOERROR, 0))
1780	return err;
1781	}
1782	return REG_NOERROR;
1783	}
1784
1785	static reg_errcode_t
1786	add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1787	re_dfa_t *dfa;
1788	re_node_set *dest_nodes;
1789	const re_node_set *candidates;
1790	{
1791	reg_errcode_t err = REG_NOERROR;
1792	int i;
1793
1794	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1795	if (BE (err != REG_NOERROR, 0))
1796	return err;
1797
1798	if (!state->inveclosure.alloc)
1799	{
1800	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1801	if (BE (err != REG_NOERROR, 0))
1802	return REG_ESPACE;
1803	for (i = 0; i < dest_nodes->nelem; i++)
1804	re_node_set_merge (&state->inveclosure,
1805	dfa->inveclosures + dest_nodes->elems[i]);
1806	}
1807	return re_node_set_add_intersect (dest_nodes, candidates,
1808	&state->inveclosure);
1809	}
1810
1811	static reg_errcode_t
1812	sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1813	re_dfa_t *dfa;
1814	int node;
1815	re_node_set *dest_nodes;
1816	const re_node_set *candidates;
1817	{
1818	int ecl_idx;
1819	reg_errcode_t err;
1820	re_node_set *inv_eclosure = dfa->inveclosures + node;
1821	re_node_set except_nodes;
1822	re_node_set_init_empty (&except_nodes);
1823	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1824	{
1825	int cur_node = inv_eclosure->elems[ecl_idx];
1826	if (cur_node == node)
1827	continue;
1828	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1829	{
1830	int edst1 = dfa->edests[cur_node].elems[0];
1831	int edst2 = ((dfa->edests[cur_node].nelem > 1)
1832	? dfa->edests[cur_node].elems[1] : -1);
1833	if ((!re_node_set_contains (inv_eclosure, edst1)
1834	&& re_node_set_contains (dest_nodes, edst1))
1835	\|\| (edst2 > 0
1836	&& !re_node_set_contains (inv_eclosure, edst2)
1837	&& re_node_set_contains (dest_nodes, edst2)))
1838	{
1839	err = re_node_set_add_intersect (&except_nodes, candidates,
1840	dfa->inveclosures + cur_node);
1841	if (BE (err != REG_NOERROR, 0))
1842	{
1843	re_node_set_free (&except_nodes);
1844	return err;
1845	}
1846	}
1847	}
1848	}
1849	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1850	{
1851	int cur_node = inv_eclosure->elems[ecl_idx];
1852	if (!re_node_set_contains (&except_nodes, cur_node))
1853	{
1854	int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1855	re_node_set_remove_at (dest_nodes, idx);
1856	}
1857	}
1858	re_node_set_free (&except_nodes);
1859	return REG_NOERROR;
1860	}
1861
1862	static int
1863	check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1864	re_match_context_t *mctx;
1865	re_node_set *limits;
1866	int dst_node, dst_idx, src_node, src_idx;
1867	{
1868	re_dfa_t *const dfa = mctx->dfa;
1869	int lim_idx, src_pos, dst_pos;
1870
1871	int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1872	int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1873	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1874	{
1875	int subexp_idx;
1876	struct re_backref_cache_entry *ent;
1877	ent = mctx->bkref_ents + limits->elems[lim_idx];
1878	subexp_idx = dfa->nodes[ent->node].opr.idx;
1879
1880	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1881	subexp_idx, dst_node, dst_idx,
1882	dst_bkref_idx);
1883	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1884	subexp_idx, src_node, src_idx,
1885	src_bkref_idx);
1886
1887	/* In case of:
1888	<src> <dst> ( <subexp> )
1889	( <subexp> ) <src> <dst>
1890	( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1891	if (src_pos == dst_pos)
1892	continue; /* This is unrelated limitation. */
1893	else
1894	return 1;
1895	}
1896	return 0;
1897	}
1898
1899	static int
1900	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
1901	re_match_context_t *mctx;
1902	int boundaries, subexp_idx, from_node, bkref_idx;
1903	{
1904	re_dfa_t *const dfa = mctx->dfa;
1905	re_node_set *eclosures = dfa->eclosures + from_node;
1906	int node_idx;
1907
1908	/* Else, we are on the boundary: examine the nodes on the epsilon
1909	closure. */
1910	for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1911	{
1912	int node = eclosures->elems[node_idx];
1913	switch (dfa->nodes[node].type)
1914	{
1915	case OP_BACK_REF:
1916	if (bkref_idx != -1)
1917	{
1918	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1919	do
1920	{
1921	int dst, cpos;
1922
1923	if (ent->node != node)
1924	continue;
1925
1926	if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
1927	&& !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
1928	continue;
1929
1930	/* Recurse trying to reach the OP_OPEN_SUBEXP and
1931	OP_CLOSE_SUBEXP cases below. But, if the
1932	destination node is the same node as the source
1933	node, don't recurse because it would cause an
1934	infinite loop: a regex that exhibits this behavior
1935	is ()\1\1 */
1936	dst = dfa->edests[node].elems[0];
1937	if (dst == from_node)
1938	{
1939	if (boundaries & 1)
1940	return -1;
1941	else /* if (boundaries & 2) */
1942	return 0;
1943	}
1944
1945	cpos =
1946	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1947	dst, bkref_idx);
1948	if (cpos == -1 /* && (boundaries & 1) */)
1949	return -1;
1950	if (cpos == 0 && (boundaries & 2))
1951	return 0;
1952
1953	ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
1954	}
1955	while (ent++->more);
1956	}
1957	break;
1958
1959	case OP_OPEN_SUBEXP:
1960	if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1961	return -1;
1962	break;
1963
1964	case OP_CLOSE_SUBEXP:
1965	if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1966	return 0;
1967	break;
1968
1969	default:
1970	break;
1971	}
1972	}
1973
1974	return (boundaries & 2) ? 1 : 0;
1975	}
1976
1977	static int
1978	check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
1979	re_match_context_t *mctx;
1980	int limit, subexp_idx, from_node, str_idx, bkref_idx;
1981	{
1982	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1983	int boundaries;
1984
1985	/* If we are outside the range of the subexpression, return -1 or 1. */
1986	if (str_idx < lim->subexp_from)
1987	return -1;
1988
1989	if (lim->subexp_to < str_idx)
1990	return 1;
1991
1992	/* If we are within the subexpression, return 0. */
1993	boundaries = (str_idx == lim->subexp_from);
1994	boundaries \|= (str_idx == lim->subexp_to) << 1;
1995	if (boundaries == 0)
1996	return 0;
1997
1998	/* Else, examine epsilon closure. */
1999	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2000	from_node, bkref_idx);
2001	}
2002
2003	/* Check the limitations of sub expressions LIMITS, and remove the nodes
2004	which are against limitations from DEST_NODES. */
2005
2006	static reg_errcode_t
2007	check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
2008	re_dfa_t *dfa;
2009	re_node_set *dest_nodes;
2010	const re_node_set *candidates;
2011	re_node_set *limits;
2012	struct re_backref_cache_entry *bkref_ents;
2013	int str_idx;
2014	{
2015	reg_errcode_t err;
2016	int node_idx, lim_idx;
2017
2018	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2019	{
2020	int subexp_idx;
2021	struct re_backref_cache_entry *ent;
2022	ent = bkref_ents + limits->elems[lim_idx];
2023
2024	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2025	continue; /* This is unrelated limitation. */
2026
2027	subexp_idx = dfa->nodes[ent->node].opr.idx;
2028	if (ent->subexp_to == str_idx)
2029	{
2030	int ops_node = -1;
2031	int cls_node = -1;
2032	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2033	{
2034	int node = dest_nodes->elems[node_idx];
2035	re_token_type_t type = dfa->nodes[node].type;
2036	if (type == OP_OPEN_SUBEXP
2037	&& subexp_idx == dfa->nodes[node].opr.idx)
2038	ops_node = node;
2039	else if (type == OP_CLOSE_SUBEXP
2040	&& subexp_idx == dfa->nodes[node].opr.idx)
2041	cls_node = node;
2042	}
2043
2044	/* Check the limitation of the open subexpression. */
2045	/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2046	if (ops_node >= 0)
2047	{
2048	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2049	candidates);
2050	if (BE (err != REG_NOERROR, 0))
2051	return err;
2052	}
2053
2054	/* Check the limitation of the close subexpression. */
2055	if (cls_node >= 0)
2056	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2057	{
2058	int node = dest_nodes->elems[node_idx];
2059	if (!re_node_set_contains (dfa->inveclosures + node,
2060	cls_node)
2061	&& !re_node_set_contains (dfa->eclosures + node,
2062	cls_node))
2063	{
2064	/* It is against this limitation.
2065	Remove it form the current sifted state. */
2066	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2067	candidates);
2068	if (BE (err != REG_NOERROR, 0))
2069	return err;
2070	--node_idx;
2071	}
2072	}
2073	}
2074	else /* (ent->subexp_to != str_idx) */
2075	{
2076	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2077	{
2078	int node = dest_nodes->elems[node_idx];
2079	re_token_type_t type = dfa->nodes[node].type;
2080	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2081	{
2082	if (subexp_idx != dfa->nodes[node].opr.idx)
2083	continue;
2084	/* It is against this limitation.
2085	Remove it form the current sifted state. */
2086	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2087	candidates);
2088	if (BE (err != REG_NOERROR, 0))
2089	return err;
2090	}
2091	}
2092	}
2093	}
2094	return REG_NOERROR;
2095	}
2096
2097	static reg_errcode_t
2098	sift_states_bkref (mctx, sctx, str_idx, candidates)
2099	re_match_context_t *mctx;
2100	re_sift_context_t *sctx;
2101	int str_idx;
2102	const re_node_set *candidates;
2103	{
2104	re_dfa_t *const dfa = mctx->dfa;
2105	reg_errcode_t err;
2106	int node_idx, node;
2107	re_sift_context_t local_sctx;
2108	int first_idx = search_cur_bkref_entry (mctx, str_idx);
2109
2110	if (first_idx == -1)
2111	return REG_NOERROR;
2112
2113	local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2114
2115	for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2116	{
2117	int enabled_idx;
2118	re_token_type_t type;
2119	struct re_backref_cache_entry *entry;
2120	node = candidates->elems[node_idx];
2121	type = dfa->nodes[node].type;
2122	/* Avoid infinite loop for the REs like "()\1+". */
2123	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2124	continue;
2125	if (type != OP_BACK_REF)
2126	continue;
2127
2128	entry = mctx->bkref_ents + first_idx;
2129	enabled_idx = first_idx;
2130	do
2131	{
2132	int subexp_len, to_idx, dst_node;
2133	re_dfastate_t *cur_state;
2134
2135	if (entry->node != node)
2136	continue;
2137	subexp_len = entry->subexp_to - entry->subexp_from;
2138	to_idx = str_idx + subexp_len;
2139	dst_node = (subexp_len ? dfa->nexts[node]
2140	: dfa->edests[node].elems[0]);
2141
2142	if (to_idx > sctx->last_str_idx
2143	\|\| sctx->sifted_states[to_idx] == NULL
2144	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2145	\|\| check_dst_limits (mctx, &sctx->limits, node,
2146	str_idx, dst_node, to_idx))
2147	continue;
2148
2149	if (local_sctx.sifted_states == NULL)
2150	{
2151	local_sctx = *sctx;
2152	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2153	if (BE (err != REG_NOERROR, 0))
2154	goto free_return;
2155	}
2156	local_sctx.last_node = node;
2157	local_sctx.last_str_idx = str_idx;
2158	err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2159	if (BE (err < 0, 0))
2160	{
2161	err = REG_ESPACE;
2162	goto free_return;
2163	}
2164	cur_state = local_sctx.sifted_states[str_idx];
2165	err = sift_states_backward (mctx, &local_sctx);
2166	if (BE (err != REG_NOERROR, 0))
2167	goto free_return;
2168	if (sctx->limited_states != NULL)
2169	{
2170	err = merge_state_array (dfa, sctx->limited_states,
2171	local_sctx.sifted_states,
2172	str_idx + 1);
2173	if (BE (err != REG_NOERROR, 0))
2174	goto free_return;
2175	}
2176	local_sctx.sifted_states[str_idx] = cur_state;
2177	re_node_set_remove (&local_sctx.limits, enabled_idx);
2178
2179	/* mctx->bkref_ents may have changed, reload the pointer. */
2180	entry = mctx->bkref_ents + enabled_idx;
2181	}
2182	while (enabled_idx++, entry++->more);
2183	}
2184	err = REG_NOERROR;
2185	free_return:
2186	if (local_sctx.sifted_states != NULL)
2187	{
2188	re_node_set_free (&local_sctx.limits);
2189	}
2190
2191	return err;
2192	}
2193
2194
2195	#ifdef RE_ENABLE_I18N
2196	static int
2197	sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2198	const re_match_context_t *mctx;
2199	re_sift_context_t *sctx;
2200	int node_idx, str_idx, max_str_idx;
2201	{
2202	re_dfa_t *const dfa = mctx->dfa;
2203	int naccepted;
2204	/* Check the node can accept `multi byte'. */
2205	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2206	if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2207	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2208	dfa->nexts[node_idx]))
2209	/* The node can't accept the `multi byte', or the
2210	destination was already thrown away, then the node
2211	could't accept the current input `multi byte'. */
2212	naccepted = 0;
2213	/* Otherwise, it is sure that the node could accept
2214	`naccepted' bytes input. */
2215	return naccepted;
2216	}
2217	#endif /* RE_ENABLE_I18N */
2218
2219
2220
2221	/* Functions for state transition. */
2222
2223	/* Return the next state to which the current state STATE will transit by
2224	accepting the current input byte, and update STATE_LOG if necessary.
2225	If STATE can accept a multibyte char/collating element/back reference
2226	update the destination of STATE_LOG. */
2227
2228	static re_dfastate_t *
2229	transit_state (err, mctx, state)
2230	reg_errcode_t *err;
2231	re_match_context_t *mctx;
2232	re_dfastate_t *state;
2233	{
2234	re_dfastate_t **trtable;
2235	unsigned char ch;
2236
2237	#ifdef RE_ENABLE_I18N
2238	/* If the current state can accept multibyte. */
2239	if (BE (state->accept_mb, 0))
2240	{
2241	*err = transit_state_mb (mctx, state);
2242	if (BE (*err != REG_NOERROR, 0))
2243	return NULL;
2244	}
2245	#endif /* RE_ENABLE_I18N */
2246
2247	/* Then decide the next state with the single byte. */
2248	#if 0
2249	if (0)
2250	/* don't use transition table */
2251	return transit_state_sb (err, mctx, state);
2252	#endif
2253
2254	/* Use transition table */
2255	ch = re_string_fetch_byte (&mctx->input);
2256	for (;;)
2257	{
2258	trtable = state->trtable;
2259	if (BE (trtable != NULL, 1))
2260	return trtable[ch];
2261
2262	trtable = state->word_trtable;
2263	if (BE (trtable != NULL, 1))
2264	{
2265	unsigned int context;
2266	context
2267	= re_string_context_at (&mctx->input,
2268	re_string_cur_idx (&mctx->input) - 1,
2269	mctx->eflags);
2270	if (IS_WORD_CONTEXT (context))
2271	return trtable[ch + SBC_MAX];
2272	else
2273	return trtable[ch];
2274	}
2275
2276	if (!build_trtable (mctx->dfa, state))
2277	{
2278	*err = REG_ESPACE;
2279	return NULL;
2280	}
2281
2282	/* Retry, we now have a transition table. */
2283	}
2284	}
2285
2286	/* Update the state_log if we need */
2287	re_dfastate_t *
2288	merge_state_with_log (err, mctx, next_state)
2289	reg_errcode_t *err;
2290	re_match_context_t *mctx;
2291	re_dfastate_t *next_state;
2292	{
2293	re_dfa_t *const dfa = mctx->dfa;
2294	int cur_idx = re_string_cur_idx (&mctx->input);
2295
2296	if (cur_idx > mctx->state_log_top)
2297	{
2298	mctx->state_log[cur_idx] = next_state;
2299	mctx->state_log_top = cur_idx;
2300	}
2301	else if (mctx->state_log[cur_idx] == 0)
2302	{
2303	mctx->state_log[cur_idx] = next_state;
2304	}
2305	else
2306	{
2307	re_dfastate_t *pstate;
2308	unsigned int context;
2309	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2310	/* If (state_log[cur_idx] != 0), it implies that cur_idx is
2311	the destination of a multibyte char/collating element/
2312	back reference. Then the next state is the union set of
2313	these destinations and the results of the transition table. */
2314	pstate = mctx->state_log[cur_idx];
2315	log_nodes = pstate->entrance_nodes;
2316	if (next_state != NULL)
2317	{
2318	table_nodes = next_state->entrance_nodes;
2319	*err = re_node_set_init_union (&next_nodes, table_nodes,
2320	log_nodes);
2321	if (BE (*err != REG_NOERROR, 0))
2322	return NULL;
2323	}
2324	else
2325	next_nodes = *log_nodes;
2326	/* Note: We already add the nodes of the initial state,
2327	then we don't need to add them here. */
2328
2329	context = re_string_context_at (&mctx->input,
2330	re_string_cur_idx (&mctx->input) - 1,
2331	mctx->eflags);
2332	next_state = mctx->state_log[cur_idx]
2333	= re_acquire_state_context (err, dfa, &next_nodes, context);
2334	/* We don't need to check errors here, since the return value of
2335	this function is next_state and ERR is already set. */
2336
2337	if (table_nodes != NULL)
2338	re_node_set_free (&next_nodes);
2339	}
2340
2341	if (BE (dfa->nbackref, 0) && next_state != NULL)
2342	{
2343	/* Check OP_OPEN_SUBEXP in the current state in case that we use them
2344	later. We must check them here, since the back references in the
2345	next state might use them. */
2346	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2347	cur_idx);
2348	if (BE (*err != REG_NOERROR, 0))
2349	return NULL;
2350
2351	/* If the next state has back references. */
2352	if (next_state->has_backref)
2353	{
2354	*err = transit_state_bkref (mctx, &next_state->nodes);
2355	if (BE (*err != REG_NOERROR, 0))
2356	return NULL;
2357	next_state = mctx->state_log[cur_idx];
2358	}
2359	}
2360
2361	return next_state;
2362	}
2363
2364	/* Skip bytes in the input that correspond to part of a
2365	multi-byte match, then look in the log for a state
2366	from which to restart matching. */
2367	re_dfastate_t *
2368	find_recover_state (err, mctx)
2369	reg_errcode_t *err;
2370	re_match_context_t *mctx;
2371	{
2372	re_dfastate_t *cur_state = NULL;
2373	do
2374	{
2375	int max = mctx->state_log_top;
2376	int cur_str_idx = re_string_cur_idx (&mctx->input);
2377
2378	do
2379	{
2380	if (++cur_str_idx > max)
2381	return NULL;
2382	re_string_skip_bytes (&mctx->input, 1);
2383	}
2384	while (mctx->state_log[cur_str_idx] == NULL);
2385
2386	cur_state = merge_state_with_log (err, mctx, NULL);
2387	}
2388	while (err == REG_NOERROR && cur_state == NULL);
2389	return cur_state;
2390	}
2391
2392	/* Helper functions for transit_state. */
2393
2394	/* From the node set CUR_NODES, pick up the nodes whose types are
2395	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2396	expression. And register them to use them later for evaluating the
2397	correspoding back references. */
2398
2399	static reg_errcode_t
2400	check_subexp_matching_top (mctx, cur_nodes, str_idx)
2401	re_match_context_t *mctx;
2402	re_node_set *cur_nodes;
2403	int str_idx;
2404	{
2405	re_dfa_t *const dfa = mctx->dfa;
2406	int node_idx;
2407	reg_errcode_t err;
2408
2409	/* TODO: This isn't efficient.
2410	Because there might be more than one nodes whose types are
2411	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2412	nodes.
2413	E.g. RE: (a){2} */
2414	for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2415	{
2416	int node = cur_nodes->elems[node_idx];
2417	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2418	&& dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2419	&& dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2420	{
2421	err = match_ctx_add_subtop (mctx, node, str_idx);
2422	if (BE (err != REG_NOERROR, 0))
2423	return err;
2424	}
2425	}
2426	return REG_NOERROR;
2427	}
2428
2429	#if 0
2430	/* Return the next state to which the current state STATE will transit by
2431	accepting the current input byte. */
2432
2433	static re_dfastate_t *
2434	transit_state_sb (err, mctx, state)
2435	reg_errcode_t *err;
2436	re_match_context_t *mctx;
2437	re_dfastate_t *state;
2438	{
2439	re_dfa_t *const dfa = mctx->dfa;
2440	re_node_set next_nodes;
2441	re_dfastate_t *next_state;
2442	int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2443	unsigned int context;
2444
2445	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2446	if (BE (*err != REG_NOERROR, 0))
2447	return NULL;
2448	for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2449	{
2450	int cur_node = state->nodes.elems[node_cnt];
2451	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2452	{
2453	*err = re_node_set_merge (&next_nodes,
2454	dfa->eclosures + dfa->nexts[cur_node]);
2455	if (BE (*err != REG_NOERROR, 0))
2456	{
2457	re_node_set_free (&next_nodes);
2458	return NULL;
2459	}
2460	}
2461	}
2462	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2463	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2464	/* We don't need to check errors here, since the return value of
2465	this function is next_state and ERR is already set. */
2466
2467	re_node_set_free (&next_nodes);
2468	re_string_skip_bytes (&mctx->input, 1);
2469	return next_state;
2470	}
2471	#endif
2472
2473	#ifdef RE_ENABLE_I18N
2474	static reg_errcode_t
2475	transit_state_mb (mctx, pstate)
2476	re_match_context_t *mctx;
2477	re_dfastate_t *pstate;
2478	{
2479	re_dfa_t *const dfa = mctx->dfa;
2480	reg_errcode_t err;
2481	int i;
2482
2483	for (i = 0; i < pstate->nodes.nelem; ++i)
2484	{
2485	re_node_set dest_nodes, *new_nodes;
2486	int cur_node_idx = pstate->nodes.elems[i];
2487	int naccepted, dest_idx;
2488	unsigned int context;
2489	re_dfastate_t *dest_state;
2490
2491	if (!dfa->nodes[cur_node_idx].accept_mb)
2492	continue;
2493
2494	if (dfa->nodes[cur_node_idx].constraint)
2495	{
2496	context = re_string_context_at (&mctx->input,
2497	re_string_cur_idx (&mctx->input),
2498	mctx->eflags);
2499	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2500	context))
2501	continue;
2502	}
2503
2504	/* How many bytes the node can accept? */
2505	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2506	re_string_cur_idx (&mctx->input));
2507	if (naccepted == 0)
2508	continue;
2509
2510	/* The node can accepts `naccepted' bytes. */
2511	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2512	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2513	: mctx->max_mb_elem_len);
2514	err = clean_state_log_if_needed (mctx, dest_idx);
2515	if (BE (err != REG_NOERROR, 0))
2516	return err;
2517	#ifdef DEBUG
2518	assert (dfa->nexts[cur_node_idx] != -1);
2519	#endif
2520	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2521
2522	dest_state = mctx->state_log[dest_idx];
2523	if (dest_state == NULL)
2524	dest_nodes = *new_nodes;
2525	else
2526	{
2527	err = re_node_set_init_union (&dest_nodes,
2528	dest_state->entrance_nodes, new_nodes);
2529	if (BE (err != REG_NOERROR, 0))
2530	return err;
2531	}
2532	context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2533	mctx->state_log[dest_idx]
2534	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2535	if (dest_state != NULL)
2536	re_node_set_free (&dest_nodes);
2537	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2538	return err;
2539	}
2540	return REG_NOERROR;
2541	}
2542	#endif /* RE_ENABLE_I18N */
2543
2544	static reg_errcode_t
2545	transit_state_bkref (mctx, nodes)
2546	re_match_context_t *mctx;
2547	const re_node_set *nodes;
2548	{
2549	re_dfa_t *const dfa = mctx->dfa;
2550	reg_errcode_t err;
2551	int i;
2552	int cur_str_idx = re_string_cur_idx (&mctx->input);
2553
2554	for (i = 0; i < nodes->nelem; ++i)
2555	{
2556	int dest_str_idx, prev_nelem, bkc_idx;
2557	int node_idx = nodes->elems[i];
2558	unsigned int context;
2559	const re_token_t *node = dfa->nodes + node_idx;
2560	re_node_set *new_dest_nodes;
2561
2562	/* Check whether `node' is a backreference or not. */
2563	if (node->type != OP_BACK_REF)
2564	continue;
2565
2566	if (node->constraint)
2567	{
2568	context = re_string_context_at (&mctx->input, cur_str_idx,
2569	mctx->eflags);
2570	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2571	continue;
2572	}
2573
2574	/* `node' is a backreference.
2575	Check the substring which the substring matched. */
2576	bkc_idx = mctx->nbkref_ents;
2577	err = get_subexp (mctx, node_idx, cur_str_idx);
2578	if (BE (err != REG_NOERROR, 0))
2579	goto free_return;
2580
2581	/* And add the epsilon closures (which is `new_dest_nodes') of
2582	the backreference to appropriate state_log. */
2583	#ifdef DEBUG
2584	assert (dfa->nexts[node_idx] != -1);
2585	#endif
2586	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2587	{
2588	int subexp_len;
2589	re_dfastate_t *dest_state;
2590	struct re_backref_cache_entry *bkref_ent;
2591	bkref_ent = mctx->bkref_ents + bkc_idx;
2592	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2593	continue;
2594	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2595	new_dest_nodes = (subexp_len == 0
2596	? dfa->eclosures + dfa->edests[node_idx].elems[0]
2597	: dfa->eclosures + dfa->nexts[node_idx]);
2598	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2599	- bkref_ent->subexp_from);
2600	context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2601	mctx->eflags);
2602	dest_state = mctx->state_log[dest_str_idx];
2603	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2604	: mctx->state_log[cur_str_idx]->nodes.nelem);
2605	/* Add `new_dest_node' to state_log. */
2606	if (dest_state == NULL)
2607	{
2608	mctx->state_log[dest_str_idx]
2609	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2610	context);
2611	if (BE (mctx->state_log[dest_str_idx] == NULL
2612	&& err != REG_NOERROR, 0))
2613	goto free_return;
2614	}
2615	else
2616	{
2617	re_node_set dest_nodes;
2618	err = re_node_set_init_union (&dest_nodes,
2619	dest_state->entrance_nodes,
2620	new_dest_nodes);
2621	if (BE (err != REG_NOERROR, 0))
2622	{
2623	re_node_set_free (&dest_nodes);
2624	goto free_return;
2625	}
2626	mctx->state_log[dest_str_idx]
2627	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2628	re_node_set_free (&dest_nodes);
2629	if (BE (mctx->state_log[dest_str_idx] == NULL
2630	&& err != REG_NOERROR, 0))
2631	goto free_return;
2632	}
2633	/* We need to check recursively if the backreference can epsilon
2634	transit. */
2635	if (subexp_len == 0
2636	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2637	{
2638	err = check_subexp_matching_top (mctx, new_dest_nodes,
2639	cur_str_idx);
2640	if (BE (err != REG_NOERROR, 0))
2641	goto free_return;
2642	err = transit_state_bkref (mctx, new_dest_nodes);
2643	if (BE (err != REG_NOERROR, 0))
2644	goto free_return;
2645	}
2646	}
2647	}
2648	err = REG_NOERROR;
2649	free_return:
2650	return err;
2651	}
2652
2653	/* Enumerate all the candidates which the backreference BKREF_NODE can match
2654	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2655	Note that we might collect inappropriate candidates here.
2656	However, the cost of checking them strictly here is too high, then we
2657	delay these checking for prune_impossible_nodes(). */
2658
2659	static reg_errcode_t
2660	get_subexp (mctx, bkref_node, bkref_str_idx)
2661	re_match_context_t *mctx;
2662	int bkref_node, bkref_str_idx;
2663	{
2664	re_dfa_t *const dfa = mctx->dfa;
2665	int subexp_num, sub_top_idx;
2666	const char buf = (const char ) re_string_get_buffer (&mctx->input);
2667	/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2668	int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2669	if (cache_idx != -1)
2670	{
2671	const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2672	do
2673	if (entry->node == bkref_node)
2674	return REG_NOERROR; /* We already checked it. */
2675	while (entry++->more);
2676	}
2677
2678	subexp_num = dfa->nodes[bkref_node].opr.idx;
2679
2680	/* For each sub expression */
2681	for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2682	{
2683	reg_errcode_t err;
2684	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2685	re_sub_match_last_t *sub_last;
2686	int sub_last_idx, sl_str, bkref_str_off;
2687
2688	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2689	continue; /* It isn't related. */
2690
2691	sl_str = sub_top->str_idx;
2692	bkref_str_off = bkref_str_idx;
2693	/* At first, check the last node of sub expressions we already
2694	evaluated. */
2695	for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2696	{
2697	int sl_str_diff;
2698	sub_last = sub_top->lasts[sub_last_idx];
2699	sl_str_diff = sub_last->str_idx - sl_str;
2700	/* The matched string by the sub expression match with the substring
2701	at the back reference? */
2702	if (sl_str_diff > 0)
2703	{
2704	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2705	{
2706	/* Not enough chars for a successful match. */
2707	if (bkref_str_off + sl_str_diff > mctx->input.len)
2708	break;
2709
2710	err = clean_state_log_if_needed (mctx,
2711	bkref_str_off
2712	+ sl_str_diff);
2713	if (BE (err != REG_NOERROR, 0))
2714	return err;
2715	buf = (const char *) re_string_get_buffer (&mctx->input);
2716	}
2717	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2718	break; /* We don't need to search this sub expression any more. */
2719	}
2720	bkref_str_off += sl_str_diff;
2721	sl_str += sl_str_diff;
2722	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2723	bkref_str_idx);
2724
2725	/* Reload buf, since the preceding call might have reallocated
2726	the buffer. */
2727	buf = (const char *) re_string_get_buffer (&mctx->input);
2728
2729	if (err == REG_NOMATCH)
2730	continue;
2731	if (BE (err != REG_NOERROR, 0))
2732	return err;
2733	}
2734
2735	if (sub_last_idx < sub_top->nlasts)
2736	continue;
2737	if (sub_last_idx > 0)
2738	++sl_str;
2739	/* Then, search for the other last nodes of the sub expression. */
2740	for (; sl_str <= bkref_str_idx; ++sl_str)
2741	{
2742	int cls_node, sl_str_off;
2743	const re_node_set *nodes;
2744	sl_str_off = sl_str - sub_top->str_idx;
2745	/* The matched string by the sub expression match with the substring
2746	at the back reference? */
2747	if (sl_str_off > 0)
2748	{
2749	if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2750	{
2751	/* If we are at the end of the input, we cannot match. */
2752	if (bkref_str_off >= mctx->input.len)
2753	break;
2754
2755	err = extend_buffers (mctx);
2756	if (BE (err != REG_NOERROR, 0))
2757	return err;
2758
2759	buf = (const char *) re_string_get_buffer (&mctx->input);
2760	}
2761	if (buf [bkref_str_off++] != buf[sl_str - 1])
2762	break; /* We don't need to search this sub expression
2763	any more. */
2764	}
2765	if (mctx->state_log[sl_str] == NULL)
2766	continue;
2767	/* Does this state have a ')' of the sub expression? */
2768	nodes = &mctx->state_log[sl_str]->nodes;
2769	cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2770	if (cls_node == -1)
2771	continue; /* No. */
2772	if (sub_top->path == NULL)
2773	{
2774	sub_top->path = calloc (sizeof (state_array_t),
2775	sl_str - sub_top->str_idx + 1);
2776	if (sub_top->path == NULL)
2777	return REG_ESPACE;
2778	}
2779	/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2780	in the current context? */
2781	err = check_arrival (mctx, sub_top->path, sub_top->node,
2782	sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2783	if (err == REG_NOMATCH)
2784	continue;
2785	if (BE (err != REG_NOERROR, 0))
2786	return err;
2787	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2788	if (BE (sub_last == NULL, 0))
2789	return REG_ESPACE;
2790	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2791	bkref_str_idx);
2792	if (err == REG_NOMATCH)
2793	continue;
2794	}
2795	}
2796	return REG_NOERROR;
2797	}
2798
2799	/* Helper functions for get_subexp(). */
2800
2801	/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2802	If it can arrive, register the sub expression expressed with SUB_TOP
2803	and SUB_LAST. */
2804
2805	static reg_errcode_t
2806	get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2807	re_match_context_t *mctx;
2808	const re_sub_match_top_t *sub_top;
2809	re_sub_match_last_t *sub_last;
2810	int bkref_node, bkref_str;
2811	{
2812	reg_errcode_t err;
2813	int to_idx;
2814	/* Can the subexpression arrive the back reference? */
2815	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2816	sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2817	if (err != REG_NOERROR)
2818	return err;
2819	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2820	sub_last->str_idx);
2821	if (BE (err != REG_NOERROR, 0))
2822	return err;
2823	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2824	return clean_state_log_if_needed (mctx, to_idx);
2825	}
2826
2827	/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2828	Search '(' if FL_OPEN, or search ')' otherwise.
2829	TODO: This function isn't efficient...
2830	Because there might be more than one nodes whose types are
2831	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2832	nodes.
2833	E.g. RE: (a){2} */
2834
2835	static int
2836	find_subexp_node (dfa, nodes, subexp_idx, type)
2837	const re_dfa_t *dfa;
2838	const re_node_set *nodes;
2839	int subexp_idx, type;
2840	{
2841	int cls_idx;
2842	for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2843	{
2844	int cls_node = nodes->elems[cls_idx];
2845	const re_token_t *node = dfa->nodes + cls_node;
2846	if (node->type == type
2847	&& node->opr.idx == subexp_idx)
2848	return cls_node;
2849	}
2850	return -1;
2851	}
2852
2853	/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2854	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2855	heavily reused.
2856	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2857
2858	static reg_errcode_t
2859	check_arrival (mctx, path, top_node, top_str, last_node, last_str,
2860	type)
2861	re_match_context_t *mctx;
2862	state_array_t *path;
2863	int top_node, top_str, last_node, last_str, type;
2864	{
2865	re_dfa_t *const dfa = mctx->dfa;
2866	reg_errcode_t err;
2867	int subexp_num, backup_cur_idx, str_idx, null_cnt;
2868	re_dfastate_t *cur_state = NULL;
2869	re_node_set *cur_nodes, next_nodes;
2870	re_dfastate_t **backup_state_log;
2871	unsigned int context;
2872
2873	subexp_num = dfa->nodes[top_node].opr.idx;
2874	/* Extend the buffer if we need. */
2875	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2876	{
2877	re_dfastate_t **new_array;
2878	int old_alloc = path->alloc;
2879	path->alloc += last_str + mctx->max_mb_elem_len + 1;
2880	new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2881	if (new_array == NULL)
2882	{
2883	path->alloc = old_alloc;
2884	return REG_ESPACE;
2885	}
2886	path->array = new_array;
2887	memset (new_array + old_alloc, '\0',
2888	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2889	}
2890
2891	str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2892
2893	/* Temporary modify MCTX. */
2894	backup_state_log = mctx->state_log;
2895	backup_cur_idx = mctx->input.cur_idx;
2896	mctx->state_log = path->array;
2897	mctx->input.cur_idx = str_idx;
2898
2899	/* Setup initial node set. */
2900	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2901	if (str_idx == top_str)
2902	{
2903	err = re_node_set_init_1 (&next_nodes, top_node);
2904	if (BE (err != REG_NOERROR, 0))
2905	return err;
2906	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2907	if (BE (err != REG_NOERROR, 0))
2908	{
2909	re_node_set_free (&next_nodes);
2910	return err;
2911	}
2912	}
2913	else
2914	{
2915	cur_state = mctx->state_log[str_idx];
2916	if (cur_state && cur_state->has_backref)
2917	{
2918	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2919	if (BE ( err != REG_NOERROR, 0))
2920	return err;
2921	}
2922	else
2923	re_node_set_init_empty (&next_nodes);
2924	}
2925	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2926	{
2927	if (next_nodes.nelem)
2928	{
2929	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2930	subexp_num, type);
2931	if (BE ( err != REG_NOERROR, 0))
2932	{
2933	re_node_set_free (&next_nodes);
2934	return err;
2935	}
2936	}
2937	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2938	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2939	{
2940	re_node_set_free (&next_nodes);
2941	return err;
2942	}
2943	mctx->state_log[str_idx] = cur_state;
2944	}
2945
2946	for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2947	{
2948	re_node_set_empty (&next_nodes);
2949	if (mctx->state_log[str_idx + 1])
2950	{
2951	err = re_node_set_merge (&next_nodes,
2952	&mctx->state_log[str_idx + 1]->nodes);
2953	if (BE (err != REG_NOERROR, 0))
2954	{
2955	re_node_set_free (&next_nodes);
2956	return err;
2957	}
2958	}
2959	if (cur_state)
2960	{
2961	err = check_arrival_add_next_nodes (mctx, str_idx,
2962	&cur_state->non_eps_nodes, &next_nodes);
2963	if (BE (err != REG_NOERROR, 0))
2964	{
2965	re_node_set_free (&next_nodes);
2966	return err;
2967	}
2968	}
2969	++str_idx;
2970	if (next_nodes.nelem)
2971	{
2972	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2973	if (BE (err != REG_NOERROR, 0))
2974	{
2975	re_node_set_free (&next_nodes);
2976	return err;
2977	}
2978	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2979	subexp_num, type);
2980	if (BE ( err != REG_NOERROR, 0))
2981	{
2982	re_node_set_free (&next_nodes);
2983	return err;
2984	}
2985	}
2986	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2987	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2988	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2989	{
2990	re_node_set_free (&next_nodes);
2991	return err;
2992	}
2993	mctx->state_log[str_idx] = cur_state;
2994	null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2995	}
2996	re_node_set_free (&next_nodes);
2997	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2998	: &mctx->state_log[last_str]->nodes);
2999	path->next_idx = str_idx;
3000
3001	/* Fix MCTX. */
3002	mctx->state_log = backup_state_log;
3003	mctx->input.cur_idx = backup_cur_idx;
3004
3005	/* Then check the current node set has the node LAST_NODE. */
3006	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3007	return REG_NOERROR;
3008
3009	return REG_NOMATCH;
3010	}
3011
3012	/* Helper functions for check_arrival. */
3013
3014	/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3015	to NEXT_NODES.
3016	TODO: This function is similar to the functions transit_state*(),
3017	however this function has many additional works.
3018	Can't we unify them? */
3019
3020	static reg_errcode_t
3021	check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
3022	re_match_context_t *mctx;
3023	int str_idx;
3024	re_node_set cur_nodes, next_nodes;
3025	{
3026	re_dfa_t *const dfa = mctx->dfa;
3027	int result;
3028	int cur_idx;
3029	re_node_set union_set;
3030	re_node_set_init_empty (&union_set);
3031	for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3032	{
3033	int naccepted = 0;
3034	int cur_node = cur_nodes->elems[cur_idx];
3035	#ifdef DEBUG
3036	re_token_type_t type = dfa->nodes[cur_node].type;
3037	assert (!IS_EPSILON_NODE (type));
3038	#endif
3039	#ifdef RE_ENABLE_I18N
3040	/* If the node may accept `multi byte'. */
3041	if (dfa->nodes[cur_node].accept_mb)
3042	{
3043	reg_errcode_t err;
3044
3045	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3046	str_idx);
3047	if (naccepted > 1)
3048	{
3049	re_dfastate_t *dest_state;
3050	int next_node = dfa->nexts[cur_node];
3051	int next_idx = str_idx + naccepted;
3052	dest_state = mctx->state_log[next_idx];
3053	re_node_set_empty (&union_set);
3054	if (dest_state)
3055	{
3056	err = re_node_set_merge (&union_set, &dest_state->nodes);
3057	if (BE (err != REG_NOERROR, 0))
3058	{
3059	re_node_set_free (&union_set);
3060	return err;
3061	}
3062	}
3063	result = re_node_set_insert (&union_set, next_node);
3064	if (BE (result < 0, 0))
3065	{
3066	re_node_set_free (&union_set);
3067	return REG_ESPACE;
3068	}
3069	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3070	&union_set);
3071	if (BE (mctx->state_log[next_idx] == NULL
3072	&& err != REG_NOERROR, 0))
3073	{
3074	re_node_set_free (&union_set);
3075	return err;
3076	}
3077	}
3078	}
3079	#endif /* RE_ENABLE_I18N */
3080	if (naccepted
3081	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3082	{
3083	result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3084	if (BE (result < 0, 0))
3085	{
3086	re_node_set_free (&union_set);
3087	return REG_ESPACE;
3088	}
3089	}
3090	}
3091	re_node_set_free (&union_set);
3092	return REG_NOERROR;
3093	}
3094
3095	/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3096	CUR_NODES, however exclude the nodes which are:
3097	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3098	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3099	*/
3100
3101	static reg_errcode_t
3102	check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
3103	re_dfa_t *dfa;
3104	re_node_set *cur_nodes;
3105	int ex_subexp, type;
3106	{
3107	reg_errcode_t err;
3108	int idx, outside_node;
3109	re_node_set new_nodes;
3110	#ifdef DEBUG
3111	assert (cur_nodes->nelem);
3112	#endif
3113	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3114	if (BE (err != REG_NOERROR, 0))
3115	return err;
3116	/* Create a new node set NEW_NODES with the nodes which are epsilon
3117	closures of the node in CUR_NODES. */
3118
3119	for (idx = 0; idx < cur_nodes->nelem; ++idx)
3120	{
3121	int cur_node = cur_nodes->elems[idx];
3122	re_node_set *eclosure = dfa->eclosures + cur_node;
3123	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3124	if (outside_node == -1)
3125	{
3126	/* There are no problematic nodes, just merge them. */
3127	err = re_node_set_merge (&new_nodes, eclosure);
3128	if (BE (err != REG_NOERROR, 0))
3129	{
3130	re_node_set_free (&new_nodes);
3131	return err;
3132	}
3133	}
3134	else
3135	{
3136	/* There are problematic nodes, re-calculate incrementally. */
3137	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3138	ex_subexp, type);
3139	if (BE (err != REG_NOERROR, 0))
3140	{
3141	re_node_set_free (&new_nodes);
3142	return err;
3143	}
3144	}
3145	}
3146	re_node_set_free (cur_nodes);
3147	*cur_nodes = new_nodes;
3148	return REG_NOERROR;
3149	}
3150
3151	/* Helper function for check_arrival_expand_ecl.
3152	Check incrementally the epsilon closure of TARGET, and if it isn't
3153	problematic append it to DST_NODES. */
3154
3155	static reg_errcode_t
3156	check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3157	re_dfa_t *dfa;
3158	int target, ex_subexp, type;
3159	re_node_set *dst_nodes;
3160	{
3161	int cur_node;
3162	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3163	{
3164	int err;
3165
3166	if (dfa->nodes[cur_node].type == type
3167	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3168	{
3169	if (type == OP_CLOSE_SUBEXP)
3170	{
3171	err = re_node_set_insert (dst_nodes, cur_node);
3172	if (BE (err == -1, 0))
3173	return REG_ESPACE;
3174	}
3175	break;
3176	}
3177	err = re_node_set_insert (dst_nodes, cur_node);
3178	if (BE (err == -1, 0))
3179	return REG_ESPACE;
3180	if (dfa->edests[cur_node].nelem == 0)
3181	break;
3182	if (dfa->edests[cur_node].nelem == 2)
3183	{
3184	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3185	dfa->edests[cur_node].elems[1],
3186	ex_subexp, type);
3187	if (BE (err != REG_NOERROR, 0))
3188	return err;
3189	}
3190	cur_node = dfa->edests[cur_node].elems[0];
3191	}
3192	return REG_NOERROR;
3193	}
3194
3195
3196	/* For all the back references in the current state, calculate the
3197	destination of the back references by the appropriate entry
3198	in MCTX->BKREF_ENTS. */
3199
3200	static reg_errcode_t
3201	expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
3202	type)
3203	re_match_context_t *mctx;
3204	int cur_str, subexp_num, type;
3205	re_node_set *cur_nodes;
3206	{
3207	re_dfa_t *const dfa = mctx->dfa;
3208	reg_errcode_t err;
3209	int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3210	struct re_backref_cache_entry *ent;
3211
3212	if (cache_idx_start == -1)
3213	return REG_NOERROR;
3214
3215	restart:
3216	ent = mctx->bkref_ents + cache_idx_start;
3217	do
3218	{
3219	int to_idx, next_node;
3220
3221	/* Is this entry ENT is appropriate? */
3222	if (!re_node_set_contains (cur_nodes, ent->node))
3223	continue; /* No. */
3224
3225	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3226	/* Calculate the destination of the back reference, and append it
3227	to MCTX->STATE_LOG. */
3228	if (to_idx == cur_str)
3229	{
3230	/* The backreference did epsilon transit, we must re-check all the
3231	node in the current state. */
3232	re_node_set new_dests;
3233	reg_errcode_t err2, err3;
3234	next_node = dfa->edests[ent->node].elems[0];
3235	if (re_node_set_contains (cur_nodes, next_node))
3236	continue;
3237	err = re_node_set_init_1 (&new_dests, next_node);
3238	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3239	err3 = re_node_set_merge (cur_nodes, &new_dests);
3240	re_node_set_free (&new_dests);
3241	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3242	\|\| err3 != REG_NOERROR, 0))
3243	{
3244	err = (err != REG_NOERROR ? err
3245	: (err2 != REG_NOERROR ? err2 : err3));
3246	return err;
3247	}
3248	/* TODO: It is still inefficient... */
3249	goto restart;
3250	}
3251	else
3252	{
3253	re_node_set union_set;
3254	next_node = dfa->nexts[ent->node];
3255	if (mctx->state_log[to_idx])
3256	{
3257	int ret;
3258	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3259	next_node))
3260	continue;
3261	err = re_node_set_init_copy (&union_set,
3262	&mctx->state_log[to_idx]->nodes);
3263	ret = re_node_set_insert (&union_set, next_node);
3264	if (BE (err != REG_NOERROR \|\| ret < 0, 0))
3265	{
3266	re_node_set_free (&union_set);
3267	err = err != REG_NOERROR ? err : REG_ESPACE;
3268	return err;
3269	}
3270	}
3271	else
3272	{
3273	err = re_node_set_init_1 (&union_set, next_node);
3274	if (BE (err != REG_NOERROR, 0))
3275	return err;
3276	}
3277	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3278	re_node_set_free (&union_set);
3279	if (BE (mctx->state_log[to_idx] == NULL
3280	&& err != REG_NOERROR, 0))
3281	return err;
3282	}
3283	}
3284	while (ent++->more);
3285	return REG_NOERROR;
3286	}
3287
3288	/* Build transition table for the state.
3289	Return 1 if succeeded, otherwise return NULL. */
3290
3291	static int
3292	build_trtable (dfa, state)
3293	re_dfa_t *dfa;
3294	re_dfastate_t *state;
3295	{
3296	reg_errcode_t err;
3297	int i, j, ch, need_word_trtable = 0;
3298	unsigned int elem, mask;
3299	int dests_node_malloced = 0, dest_states_malloced = 0;
3300	int ndests; /* Number of the destination states from `state'. */
3301	re_dfastate_t **trtable;
3302	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3303	re_node_set follows, *dests_node;
3304	bitset *dests_ch;
3305	bitset acceptable;
3306
3307	/* We build DFA states which corresponds to the destination nodes
3308	from `state'. `dests_node[i]' represents the nodes which i-th
3309	destination state contains, and `dests_ch[i]' represents the
3310	characters which i-th destination state accepts. */
3311	#ifdef _LIBC
3312	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3313	dests_node = (re_node_set *)
3314	alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3315	else
3316	#endif
3317	{
3318	dests_node = (re_node_set *)
3319	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3320	if (BE (dests_node == NULL, 0))
3321	return 0;
3322	dests_node_malloced = 1;
3323	}
3324	dests_ch = (bitset *) (dests_node + SBC_MAX);
3325
3326	/* Initialize transiton table. */
3327	state->word_trtable = state->trtable = NULL;
3328
3329	/* At first, group all nodes belonging to `state' into several
3330	destinations. */
3331	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3332	if (BE (ndests <= 0, 0))
3333	{
3334	if (dests_node_malloced)
3335	free (dests_node);
3336	/* Return 0 in case of an error, 1 otherwise. */
3337	if (ndests == 0)
3338	{
3339	state->trtable = (re_dfastate_t **)
3340	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3341	return 1;
3342	}
3343	return 0;
3344	}
3345
3346	err = re_node_set_alloc (&follows, ndests + 1);
3347	if (BE (err != REG_NOERROR, 0))
3348	goto out_free;
3349
3350	#ifdef _LIBC
3351	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3352	+ ndests * 3 * sizeof (re_dfastate_t *)))
3353	dest_states = (re_dfastate_t **)
3354	alloca (ndests * 3 * sizeof (re_dfastate_t *));
3355	else
3356	#endif
3357	{
3358	dest_states = (re_dfastate_t **)
3359	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3360	if (BE (dest_states == NULL, 0))
3361	{
3362	out_free:
3363	if (dest_states_malloced)
3364	free (dest_states);
3365	re_node_set_free (&follows);
3366	for (i = 0; i < ndests; ++i)
3367	re_node_set_free (dests_node + i);
3368	if (dests_node_malloced)
3369	free (dests_node);
3370	return 0;
3371	}
3372	dest_states_malloced = 1;
3373	}
3374	dest_states_word = dest_states + ndests;
3375	dest_states_nl = dest_states_word + ndests;
3376	bitset_empty (acceptable);
3377
3378	/* Then build the states for all destinations. */
3379	for (i = 0; i < ndests; ++i)
3380	{
3381	int next_node;
3382	re_node_set_empty (&follows);
3383	/* Merge the follows of this destination states. */
3384	for (j = 0; j < dests_node[i].nelem; ++j)
3385	{
3386	next_node = dfa->nexts[dests_node[i].elems[j]];
3387	if (next_node != -1)
3388	{
3389	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3390	if (BE (err != REG_NOERROR, 0))
3391	goto out_free;
3392	}
3393	}
3394	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3395	if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3396	goto out_free;
3397	/* If the new state has context constraint,
3398	build appropriate states for these contexts. */
3399	if (dest_states[i]->has_constraint)
3400	{
3401	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3402	CONTEXT_WORD);
3403	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3404	goto out_free;
3405
3406	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3407	need_word_trtable = 1;
3408
3409	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3410	CONTEXT_NEWLINE);
3411	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3412	goto out_free;
3413	}
3414	else
3415	{
3416	dest_states_word[i] = dest_states[i];
3417	dest_states_nl[i] = dest_states[i];
3418	}
3419	bitset_merge (acceptable, dests_ch[i]);
3420	}
3421
3422	if (!BE (need_word_trtable, 0))
3423	{
3424	/* We don't care about whether the following character is a word
3425	character, or we are in a single-byte character set so we can
3426	discern by looking at the character code: allocate a
3427	256-entry transition table. */
3428	trtable = state->trtable =
3429	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), SBC_MAX);
3430	if (BE (trtable == NULL, 0))
3431	goto out_free;
3432
3433	/* For all characters ch...: */
3434	for (i = 0; i < BITSET_UINTS; ++i)
3435	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3436	elem;
3437	mask <<= 1, elem >>= 1, ++ch)
3438	if (BE (elem & 1, 0))
3439	{
3440	/* There must be exactly one destination which accepts
3441	character ch. See group_nodes_into_DFAstates. */
3442	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3443	;
3444
3445	/* j-th destination accepts the word character ch. */
3446	if (dfa->word_char[i] & mask)
3447	trtable[ch] = dest_states_word[j];
3448	else
3449	trtable[ch] = dest_states[j];
3450	}
3451	}
3452	else
3453	{
3454	/* We care about whether the following character is a word
3455	character, and we are in a multi-byte character set: discern
3456	by looking at the character code: build two 256-entry
3457	transition tables, one starting at trtable[0] and one
3458	starting at trtable[SBC_MAX]. */
3459	trtable = state->word_trtable =
3460	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), 2 * SBC_MAX);
3461	if (BE (trtable == NULL, 0))
3462	goto out_free;
3463
3464	/* For all characters ch...: */
3465	for (i = 0; i < BITSET_UINTS; ++i)
3466	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3467	elem;
3468	mask <<= 1, elem >>= 1, ++ch)
3469	if (BE (elem & 1, 0))
3470	{
3471	/* There must be exactly one destination which accepts
3472	character ch. See group_nodes_into_DFAstates. */
3473	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3474	;
3475
3476	/* j-th destination accepts the word character ch. */
3477	trtable[ch] = dest_states[j];
3478	trtable[ch + SBC_MAX] = dest_states_word[j];
3479	}
3480	}
3481
3482	/* new line */
3483	if (bitset_contain (acceptable, NEWLINE_CHAR))
3484	{
3485	/* The current state accepts newline character. */
3486	for (j = 0; j < ndests; ++j)
3487	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3488	{
3489	/* k-th destination accepts newline character. */
3490	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3491	if (need_word_trtable)
3492	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3493	/* There must be only one destination which accepts
3494	newline. See group_nodes_into_DFAstates. */
3495	break;
3496	}
3497	}
3498
3499	if (dest_states_malloced)
3500	free (dest_states);
3501
3502	re_node_set_free (&follows);
3503	for (i = 0; i < ndests; ++i)
3504	re_node_set_free (dests_node + i);
3505
3506	if (dests_node_malloced)
3507	free (dests_node);
3508
3509	return 1;
3510	}
3511
3512	/* Group all nodes belonging to STATE into several destinations.
3513	Then for all destinations, set the nodes belonging to the destination
3514	to DESTS_NODE[i] and set the characters accepted by the destination
3515	to DEST_CH[i]. This function return the number of destinations. */
3516
3517	static int
3518	group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3519	re_dfa_t *dfa;
3520	const re_dfastate_t *state;
3521	re_node_set *dests_node;
3522	bitset *dests_ch;
3523	{
3524	reg_errcode_t err;
3525	int result;
3526	int i, j, k;
3527	int ndests; /* Number of the destinations from `state'. */
3528	bitset accepts; /* Characters a node can accept. */
3529	const re_node_set *cur_nodes = &state->nodes;
3530	bitset_empty (accepts);
3531	ndests = 0;
3532
3533	/* For all the nodes belonging to `state', */
3534	for (i = 0; i < cur_nodes->nelem; ++i)
3535	{
3536	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3537	re_token_type_t type = node->type;
3538	unsigned int constraint = node->constraint;
3539
3540	/* Enumerate all single byte character this node can accept. */
3541	if (type == CHARACTER)
3542	bitset_set (accepts, node->opr.c);
3543	else if (type == SIMPLE_BRACKET)
3544	{
3545	bitset_merge (accepts, node->opr.sbcset);
3546	}
3547	else if (type == OP_PERIOD)
3548	{
3549	#ifdef RE_ENABLE_I18N
3550	if (dfa->mb_cur_max > 1)
3551	bitset_merge (accepts, dfa->sb_char);
3552	else
3553	#endif
3554	bitset_set_all (accepts);
3555	if (!(dfa->syntax & RE_DOT_NEWLINE))
3556	bitset_clear (accepts, '\n');
3557	if (dfa->syntax & RE_DOT_NOT_NULL)
3558	bitset_clear (accepts, '\0');
3559	}
3560	#ifdef RE_ENABLE_I18N
3561	else if (type == OP_UTF8_PERIOD)
3562	{
3563	memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3564	if (!(dfa->syntax & RE_DOT_NEWLINE))
3565	bitset_clear (accepts, '\n');
3566	if (dfa->syntax & RE_DOT_NOT_NULL)
3567	bitset_clear (accepts, '\0');
3568	}
3569	#endif
3570	else
3571	continue;
3572
3573	/* Check the `accepts' and sift the characters which are not
3574	match it the context. */
3575	if (constraint)
3576	{
3577	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3578	{
3579	int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3580	bitset_empty (accepts);
3581	if (accepts_newline)
3582	bitset_set (accepts, NEWLINE_CHAR);
3583	else
3584	continue;
3585	}
3586	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3587	{
3588	bitset_empty (accepts);
3589	continue;
3590	}
3591
3592	if (constraint & NEXT_WORD_CONSTRAINT)
3593	{
3594	unsigned int any_set = 0;
3595	if (type == CHARACTER && !node->word_char)
3596	{
3597	bitset_empty (accepts);
3598	continue;
3599	}
3600	#ifdef RE_ENABLE_I18N
3601	if (dfa->mb_cur_max > 1)
3602	for (j = 0; j < BITSET_UINTS; ++j)
3603	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3604	else
3605	#endif
3606	for (j = 0; j < BITSET_UINTS; ++j)
3607	any_set \|= (accepts[j] &= dfa->word_char[j]);
3608	if (!any_set)
3609	continue;
3610	}
3611	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3612	{
3613	unsigned int any_set = 0;
3614	if (type == CHARACTER && node->word_char)
3615	{
3616	bitset_empty (accepts);
3617	continue;
3618	}
3619	#ifdef RE_ENABLE_I18N
3620	if (dfa->mb_cur_max > 1)
3621	for (j = 0; j < BITSET_UINTS; ++j)
3622	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3623	else
3624	#endif
3625	for (j = 0; j < BITSET_UINTS; ++j)
3626	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3627	if (!any_set)
3628	continue;
3629	}
3630	}
3631
3632	/* Then divide `accepts' into DFA states, or create a new
3633	state. Above, we make sure that accepts is not empty. */
3634	for (j = 0; j < ndests; ++j)
3635	{
3636	bitset intersec; /* Intersection sets, see below. */
3637	bitset remains;
3638	/* Flags, see below. */
3639	int has_intersec, not_subset, not_consumed;
3640
3641	/* Optimization, skip if this state doesn't accept the character. */
3642	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3643	continue;
3644
3645	/* Enumerate the intersection set of this state and `accepts'. */
3646	has_intersec = 0;
3647	for (k = 0; k < BITSET_UINTS; ++k)
3648	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3649	/* And skip if the intersection set is empty. */
3650	if (!has_intersec)
3651	continue;
3652
3653	/* Then check if this state is a subset of `accepts'. */
3654	not_subset = not_consumed = 0;
3655	for (k = 0; k < BITSET_UINTS; ++k)
3656	{
3657	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3658	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3659	}
3660
3661	/* If this state isn't a subset of `accepts', create a
3662	new group state, which has the `remains'. */
3663	if (not_subset)
3664	{
3665	bitset_copy (dests_ch[ndests], remains);
3666	bitset_copy (dests_ch[j], intersec);
3667	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3668	if (BE (err != REG_NOERROR, 0))
3669	goto error_return;
3670	++ndests;
3671	}
3672
3673	/* Put the position in the current group. */
3674	result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3675	if (BE (result < 0, 0))
3676	goto error_return;
3677
3678	/* If all characters are consumed, go to next node. */
3679	if (!not_consumed)
3680	break;
3681	}
3682	/* Some characters remain, create a new group. */
3683	if (j == ndests)
3684	{
3685	bitset_copy (dests_ch[ndests], accepts);
3686	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3687	if (BE (err != REG_NOERROR, 0))
3688	goto error_return;
3689	++ndests;
3690	bitset_empty (accepts);
3691	}
3692	}
3693	return ndests;
3694	error_return:
3695	for (j = 0; j < ndests; ++j)
3696	re_node_set_free (dests_node + j);
3697	return -1;
3698	}
3699
3700	#ifdef RE_ENABLE_I18N
3701	/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3702	Return the number of the bytes the node accepts.
3703	STR_IDX is the current index of the input string.
3704
3705	This function handles the nodes which can accept one character, or
3706	one collating element like '.', '[a-z]', opposite to the other nodes
3707	can only accept one byte. */
3708
3709	static int
3710	check_node_accept_bytes (dfa, node_idx, input, str_idx)
3711	re_dfa_t *dfa;
3712	int node_idx, str_idx;
3713	const re_string_t *input;
3714	{
3715	const re_token_t *node = dfa->nodes + node_idx;
3716	int char_len, elem_len;
3717	int i;
3718
3719	if (BE (node->type == OP_UTF8_PERIOD, 0))
3720	{
3721	unsigned char c = re_string_byte_at (input, str_idx), d;
3722	if (BE (c < 0xc2, 1))
3723	return 0;
3724
3725	if (str_idx + 2 > input->len)
3726	return 0;
3727
3728	d = re_string_byte_at (input, str_idx + 1);
3729	if (c < 0xe0)
3730	return (d < 0x80 \|\| d > 0xbf) ? 0 : 2;
3731	else if (c < 0xf0)
3732	{
3733	char_len = 3;
3734	if (c == 0xe0 && d < 0xa0)
3735	return 0;
3736	}
3737	else if (c < 0xf8)
3738	{
3739	char_len = 4;
3740	if (c == 0xf0 && d < 0x90)
3741	return 0;
3742	}
3743	else if (c < 0xfc)
3744	{
3745	char_len = 5;
3746	if (c == 0xf8 && d < 0x88)
3747	return 0;
3748	}
3749	else if (c < 0xfe)
3750	{
3751	char_len = 6;
3752	if (c == 0xfc && d < 0x84)
3753	return 0;
3754	}
3755	else
3756	return 0;
3757
3758	if (str_idx + char_len > input->len)
3759	return 0;
3760
3761	for (i = 1; i < char_len; ++i)
3762	{
3763	d = re_string_byte_at (input, str_idx + i);
3764	if (d < 0x80 \|\| d > 0xbf)
3765	return 0;
3766	}
3767	return char_len;
3768	}
3769
3770	char_len = re_string_char_size_at (input, str_idx);
3771	if (node->type == OP_PERIOD)
3772	{
3773	if (char_len <= 1)
3774	return 0;
3775	/* FIXME: I don't think this if is needed, as both '\n'
3776	and '\0' are char_len == 1. */
3777	/* '.' accepts any one character except the following two cases. */
3778	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3779	re_string_byte_at (input, str_idx) == '\n') \|\|
3780	((dfa->syntax & RE_DOT_NOT_NULL) &&
3781	re_string_byte_at (input, str_idx) == '\0'))
3782	return 0;
3783	return char_len;
3784	}
3785
3786	elem_len = re_string_elem_size_at (input, str_idx);
3787	if ((elem_len <= 1 && char_len <= 1) \|\| char_len == 0)
3788	return 0;
3789
3790	if (node->type == COMPLEX_BRACKET)
3791	{
3792	const re_charset_t *cset = node->opr.mbcset;
3793	# ifdef _LIBC
3794	const unsigned char pin = ((char ) re_string_get_buffer (input)
3795	+ str_idx);
3796	int j;
3797	uint32_t nrules;
3798	# endif /* _LIBC */
3799	int match_len = 0;
3800	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3801	? re_string_wchar_at (input, str_idx) : 0);
3802
3803	/* match with multibyte character? */
3804	for (i = 0; i < cset->nmbchars; ++i)
3805	if (wc == cset->mbchars[i])
3806	{
3807	match_len = char_len;
3808	goto check_node_accept_bytes_match;
3809	}
3810	/* match with character_class? */
3811	for (i = 0; i < cset->nchar_classes; ++i)
3812	{
3813	wctype_t wt = cset->char_classes[i];
3814	if (__iswctype (wc, wt))
3815	{
3816	match_len = char_len;
3817	goto check_node_accept_bytes_match;
3818	}
3819	}
3820
3821	# ifdef _LIBC
3822	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3823	if (nrules != 0)
3824	{
3825	unsigned int in_collseq = 0;
3826	const int32_t table, indirect;
3827	const unsigned char weights, extra;
3828	const char *collseqwc;
3829	int32_t idx;
3830	/* This #include defines a local function! */
3831	# include <locale/weight.h>
3832
3833	/* match with collating_symbol? */
3834	if (cset->ncoll_syms)
3835	extra = (const unsigned char *)
3836	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3837	for (i = 0; i < cset->ncoll_syms; ++i)
3838	{
3839	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3840	/* Compare the length of input collating element and
3841	the length of current collating element. */
3842	if (*coll_sym != elem_len)
3843	continue;
3844	/* Compare each bytes. */
3845	for (j = 0; j < *coll_sym; j++)
3846	if (pin[j] != coll_sym[1 + j])
3847	break;
3848	if (j == *coll_sym)
3849	{
3850	/* Match if every bytes is equal. */
3851	match_len = j;
3852	goto check_node_accept_bytes_match;
3853	}
3854	}
3855
3856	if (cset->nranges)
3857	{
3858	if (elem_len <= char_len)
3859	{
3860	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3861	in_collseq = __collseq_table_lookup (collseqwc, wc);
3862	}
3863	else
3864	in_collseq = find_collation_sequence_value (pin, elem_len);
3865	}
3866	/* match with range expression? */
3867	for (i = 0; i < cset->nranges; ++i)
3868	if (cset->range_starts[i] <= in_collseq
3869	&& in_collseq <= cset->range_ends[i])
3870	{
3871	match_len = elem_len;
3872	goto check_node_accept_bytes_match;
3873	}
3874
3875	/* match with equivalence_class? */
3876	if (cset->nequiv_classes)
3877	{
3878	const unsigned char *cp = pin;
3879	table = (const int32_t *)
3880	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3881	weights = (const unsigned char *)
3882	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3883	extra = (const unsigned char *)
3884	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3885	indirect = (const int32_t *)
3886	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3887	idx = findidx (&cp);
3888	if (idx > 0)
3889	for (i = 0; i < cset->nequiv_classes; ++i)
3890	{
3891	int32_t equiv_class_idx = cset->equiv_classes[i];
3892	size_t weight_len = weights[idx];
3893	if (weight_len == weights[equiv_class_idx])
3894	{
3895	int cnt = 0;
3896	while (cnt <= weight_len
3897	&& (weights[equiv_class_idx + 1 + cnt]
3898	== weights[idx + 1 + cnt]))
3899	++cnt;
3900	if (cnt > weight_len)
3901	{
3902	match_len = elem_len;
3903	goto check_node_accept_bytes_match;
3904	}
3905	}
3906	}
3907	}
3908	}
3909	else
3910	# endif /* _LIBC */
3911	{
3912	/* match with range expression? */
3913	#if __GNUC__ >= 2
3914	wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3915	#else
3916	wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3917	cmp_buf[2] = wc;
3918	#endif
3919	for (i = 0; i < cset->nranges; ++i)
3920	{
3921	cmp_buf[0] = cset->range_starts[i];
3922	cmp_buf[4] = cset->range_ends[i];
3923	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3924	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3925	{
3926	match_len = char_len;
3927	goto check_node_accept_bytes_match;
3928	}
3929	}
3930	}
3931	check_node_accept_bytes_match:
3932	if (!cset->non_match)
3933	return match_len;
3934	else
3935	{
3936	if (match_len > 0)
3937	return 0;
3938	else
3939	return (elem_len > char_len) ? elem_len : char_len;
3940	}
3941	}
3942	return 0;
3943	}
3944
3945	# ifdef _LIBC
3946	static unsigned int
3947	find_collation_sequence_value (mbs, mbs_len)
3948	const unsigned char *mbs;
3949	size_t mbs_len;
3950	{
3951	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3952	if (nrules == 0)
3953	{
3954	if (mbs_len == 1)
3955	{
3956	/* No valid character. Match it as a single byte character. */
3957	const unsigned char collseq = (const unsigned char )
3958	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3959	return collseq[mbs[0]];
3960	}
3961	return UINT_MAX;
3962	}
3963	else
3964	{
3965	int32_t idx;
3966	const unsigned char extra = (const unsigned char )
3967	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3968	int32_t extrasize = (const unsigned char *)
3969	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3970
3971	for (idx = 0; idx < extrasize;)
3972	{
3973	int mbs_cnt, found = 0;
3974	int32_t elem_mbs_len;
3975	/* Skip the name of collating element name. */
3976	idx = idx + extra[idx] + 1;
3977	elem_mbs_len = extra[idx++];
3978	if (mbs_len == elem_mbs_len)
3979	{
3980	for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3981	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3982	break;
3983	if (mbs_cnt == elem_mbs_len)
3984	/* Found the entry. */
3985	found = 1;
3986	}
3987	/* Skip the byte sequence of the collating element. */
3988	idx += elem_mbs_len;
3989	/* Adjust for the alignment. */
3990	idx = (idx + 3) & ~3;
3991	/* Skip the collation sequence value. */
3992	idx += sizeof (uint32_t);
3993	/* Skip the wide char sequence of the collating element. */
3994	idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3995	/* If we found the entry, return the sequence value. */
3996	if (found)
3997	return (uint32_t ) (extra + idx);
3998	/* Skip the collation sequence value. */
3999	idx += sizeof (uint32_t);
4000	}
4001	return UINT_MAX;
4002	}
4003	}
4004	# endif /* _LIBC */
4005	#endif /* RE_ENABLE_I18N */
4006
4007	/* Check whether the node accepts the byte which is IDX-th
4008	byte of the INPUT. */
4009
4010	static int
4011	check_node_accept (mctx, node, idx)
4012	const re_match_context_t *mctx;
4013	const re_token_t *node;
4014	int idx;
4015	{
4016	unsigned char ch;
4017	ch = re_string_byte_at (&mctx->input, idx);
4018	switch (node->type)
4019	{
4020	case CHARACTER:
4021	if (node->opr.c != ch)
4022	return 0;
4023	break;
4024
4025	case SIMPLE_BRACKET:
4026	if (!bitset_contain (node->opr.sbcset, ch))
4027	return 0;
4028	break;
4029
4030	#ifdef RE_ENABLE_I18N
4031	case OP_UTF8_PERIOD:
4032	if (ch >= 0x80)
4033	return 0;
4034	/* FALLTHROUGH */
4035	#endif
4036	case OP_PERIOD:
4037	if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4038	\|\| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4039	return 0;
4040	break;
4041
4042	default:
4043	return 0;
4044	}
4045
4046	if (node->constraint)
4047	{
4048	/* The node has constraints. Check whether the current context
4049	satisfies the constraints. */
4050	unsigned int context = re_string_context_at (&mctx->input, idx,
4051	mctx->eflags);
4052	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4053	return 0;
4054	}
4055
4056	return 1;
4057	}
4058
4059	/* Extend the buffers, if the buffers have run out. */
4060
4061	static reg_errcode_t
4062	extend_buffers (mctx)
4063	re_match_context_t *mctx;
4064	{
4065	reg_errcode_t ret;
4066	re_string_t *pstr = &mctx->input;
4067
4068	/* Double the lengthes of the buffers. */
4069	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4070	if (BE (ret != REG_NOERROR, 0))
4071	return ret;
4072
4073	if (mctx->state_log != NULL)
4074	{
4075	/* And double the length of state_log. */
4076	/* XXX We have no indication of the size of this buffer. If this
4077	allocation fail we have no indication that the state_log array
4078	does not have the right size. */
4079	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4080	pstr->bufs_len + 1);
4081	if (BE (new_array == NULL, 0))
4082	return REG_ESPACE;
4083	mctx->state_log = new_array;
4084	}
4085
4086	/* Then reconstruct the buffers. */
4087	if (pstr->icase)
4088	{
4089	#ifdef RE_ENABLE_I18N
4090	if (pstr->mb_cur_max > 1)
4091	{
4092	ret = build_wcs_upper_buffer (pstr);
4093	if (BE (ret != REG_NOERROR, 0))
4094	return ret;
4095	}
4096	else
4097	#endif /* RE_ENABLE_I18N */
4098	build_upper_buffer (pstr);
4099	}
4100	else
4101	{
4102	#ifdef RE_ENABLE_I18N
4103	if (pstr->mb_cur_max > 1)
4104	build_wcs_buffer (pstr);
4105	else
4106	#endif /* RE_ENABLE_I18N */
4107	{
4108	if (pstr->trans != NULL)
4109	re_string_translate_buffer (pstr);
4110	}
4111	}
4112	return REG_NOERROR;
4113	}
4114
4115
4116
4117	/* Functions for matching context. */
4118
4119	/* Initialize MCTX. */
4120
4121	static reg_errcode_t
4122	match_ctx_init (mctx, eflags, n)
4123	re_match_context_t *mctx;
4124	int eflags, n;
4125	{
4126	mctx->eflags = eflags;
4127	mctx->match_last = -1;
4128	if (n > 0)
4129	{
4130	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4131	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4132	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, 0))
4133	return REG_ESPACE;
4134	}
4135	/* Already zero-ed by the caller.
4136	else
4137	mctx->bkref_ents = NULL;
4138	mctx->nbkref_ents = 0;
4139	mctx->nsub_tops = 0; */
4140	mctx->abkref_ents = n;
4141	mctx->max_mb_elem_len = 1;
4142	mctx->asub_tops = n;
4143	return REG_NOERROR;
4144	}
4145
4146	/* Clean the entries which depend on the current input in MCTX.
4147	This function must be invoked when the matcher changes the start index
4148	of the input, or changes the input string. */
4149
4150	static void
4151	match_ctx_clean (mctx)
4152	re_match_context_t *mctx;
4153	{
4154	int st_idx;
4155	for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4156	{
4157	int sl_idx;
4158	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4159	for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4160	{
4161	re_sub_match_last_t *last = top->lasts[sl_idx];
4162	re_free (last->path.array);
4163	re_free (last);
4164	}
4165	re_free (top->lasts);
4166	if (top->path)
4167	{
4168	re_free (top->path->array);
4169	re_free (top->path);
4170	}
4171	free (top);
4172	}
4173
4174	mctx->nsub_tops = 0;
4175	mctx->nbkref_ents = 0;
4176	}
4177
4178	/* Free all the memory associated with MCTX. */
4179
4180	static void
4181	match_ctx_free (mctx)
4182	re_match_context_t *mctx;
4183	{
4184	/* First, free all the memory associated with MCTX->SUB_TOPS. */
4185	match_ctx_clean (mctx);
4186	re_free (mctx->sub_tops);
4187	re_free (mctx->bkref_ents);
4188	}
4189
4190	/* Add a new backreference entry to MCTX.
4191	Note that we assume that caller never call this function with duplicate
4192	entry, and call with STR_IDX which isn't smaller than any existing entry.
4193	*/
4194
4195	static reg_errcode_t
4196	match_ctx_add_entry (mctx, node, str_idx, from, to)
4197	re_match_context_t *mctx;
4198	int node, str_idx, from, to;
4199	{
4200	if (mctx->nbkref_ents >= mctx->abkref_ents)
4201	{
4202	struct re_backref_cache_entry* new_entry;
4203	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4204	mctx->abkref_ents * 2);
4205	if (BE (new_entry == NULL, 0))
4206	{
4207	re_free (mctx->bkref_ents);
4208	return REG_ESPACE;
4209	}
4210	mctx->bkref_ents = new_entry;
4211	memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4212	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4213	mctx->abkref_ents *= 2;
4214	}
4215	if (mctx->nbkref_ents > 0
4216	&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4217	mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4218
4219	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4220	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4221	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4222	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4223
4224	/* This is a cache that saves negative results of check_dst_limits_calc_pos.
4225	If bit N is clear, means that this entry won't epsilon-transition to
4226	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4227	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4228	such node.
4229
4230	A backreference does not epsilon-transition unless it is empty, so set
4231	to all zeros if FROM != TO. */
4232	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4233	= (from == to ? ~0 : 0);
4234
4235	mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4236	if (mctx->max_mb_elem_len < to - from)
4237	mctx->max_mb_elem_len = to - from;
4238	return REG_NOERROR;
4239	}
4240
4241	/* Search for the first entry which has the same str_idx, or -1 if none is
4242	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4243
4244	static int
4245	search_cur_bkref_entry (mctx, str_idx)
4246	re_match_context_t *mctx;
4247	int str_idx;
4248	{
4249	int left, right, mid, last;
4250	last = right = mctx->nbkref_ents;
4251	for (left = 0; left < right;)
4252	{
4253	mid = (left + right) / 2;
4254	if (mctx->bkref_ents[mid].str_idx < str_idx)
4255	left = mid + 1;
4256	else
4257	right = mid;
4258	}
4259	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4260	return left;
4261	else
4262	return -1;
4263	}
4264
4265	/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4266	at STR_IDX. */
4267
4268	static reg_errcode_t
4269	match_ctx_add_subtop (mctx, node, str_idx)
4270	re_match_context_t *mctx;
4271	int node, str_idx;
4272	{
4273	#ifdef DEBUG
4274	assert (mctx->sub_tops != NULL);
4275	assert (mctx->asub_tops > 0);
4276	#endif
4277	if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4278	{
4279	int new_asub_tops = mctx->asub_tops * 2;
4280	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4281	re_sub_match_top_t *,
4282	new_asub_tops);
4283	if (BE (new_array == NULL, 0))
4284	return REG_ESPACE;
4285	mctx->sub_tops = new_array;
4286	mctx->asub_tops = new_asub_tops;
4287	}
4288	mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4289	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4290	return REG_ESPACE;
4291	mctx->sub_tops[mctx->nsub_tops]->node = node;
4292	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4293	return REG_NOERROR;
4294	}
4295
4296	/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4297	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4298
4299	static re_sub_match_last_t *
4300	match_ctx_add_sublast (subtop, node, str_idx)
4301	re_sub_match_top_t *subtop;
4302	int node, str_idx;
4303	{
4304	re_sub_match_last_t *new_entry;
4305	if (BE (subtop->nlasts == subtop->alasts, 0))
4306	{
4307	int new_alasts = 2 * subtop->alasts + 1;
4308	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4309	re_sub_match_last_t *,
4310	new_alasts);
4311	if (BE (new_array == NULL, 0))
4312	return NULL;
4313	subtop->lasts = new_array;
4314	subtop->alasts = new_alasts;
4315	}
4316	new_entry = calloc (1, sizeof (re_sub_match_last_t));
4317	if (BE (new_entry != NULL, 1))
4318	{
4319	subtop->lasts[subtop->nlasts] = new_entry;
4320	new_entry->node = node;
4321	new_entry->str_idx = str_idx;
4322	++subtop->nlasts;
4323	}
4324	return new_entry;
4325	}
4326
4327	static void
4328	sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
4329	re_sift_context_t *sctx;
4330	re_dfastate_t sifted_sts, limited_sts;
4331	int last_node, last_str_idx;
4332	{
4333	sctx->sifted_states = sifted_sts;
4334	sctx->limited_states = limited_sts;
4335	sctx->last_node = last_node;
4336	sctx->last_str_idx = last_str_idx;
4337	re_node_set_init_empty (&sctx->limits);
4338	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/gawk/3.1.5/regexec.c@ 3930

Download in other formats: