Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: vendor/glibc/current/posix/regexec.c

Visit:

Last change on this file was 2237, checked in by (none), 20 years ago
This commit was manufactured by cvs2svn to create branch 'GNU'.
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 125.8 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22	int n) internal_function;
23	static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24	static void match_ctx_free (re_match_context_t *cache) internal_function;
25	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26	int str_idx, int from, int to)
27	internal_function;
28	static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
29	internal_function;
30	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31	int str_idx) internal_function;
32	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33	int node, int str_idx)
34	internal_function;
35	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
36	re_dfastate_t **limited_sts, int last_node,
37	int last_str_idx)
38	internal_function;
39	static reg_errcode_t re_search_internal (const regex_t *preg,
40	const char *string, int length,
41	int start, int range, int stop,
42	size_t nmatch, regmatch_t pmatch[],
43	int eflags) internal_function;
44	static int re_search_2_stub (struct re_pattern_buffer *bufp,
45	const char *string1, int length1,
46	const char *string2, int length2,
47	int start, int range, struct re_registers *regs,
48	int stop, int ret_len) internal_function;
49	static int re_search_stub (struct re_pattern_buffer *bufp,
50	const char *string, int length, int start,
51	int range, int stop, struct re_registers *regs,
52	int ret_len) internal_function;
53	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
54	int nregs, int regs_allocated) internal_function;
55	static inline re_dfastate_t *acquire_init_state_context
56	(reg_errcode_t err, const re_match_context_t mctx, int idx)
57	__attribute ((always_inline)) internal_function;
58	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59	internal_function;
60	static int check_matching (re_match_context_t *mctx, int fl_longest_match,
61	int *p_match_first)
62	internal_function;
63	static int check_halt_node_context (const re_dfa_t *dfa, int node,
64	unsigned int context) internal_function;
65	static int check_halt_state_context (const re_match_context_t *mctx,
66	const re_dfastate_t *state, int idx)
67	internal_function;
68	static void update_regs (re_dfa_t dfa, regmatch_t pmatch,
69	regmatch_t *prev_idx_match, int cur_node,
70	int cur_idx, int nmatch) internal_function;
71	static int proceed_next_node (const re_match_context_t *mctx,
72	int nregs, regmatch_t *regs,
73	int pidx, int node, re_node_set eps_via_nodes,
74	struct re_fail_stack_t *fs) internal_function;
75	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
76	int str_idx, int dest_node, int nregs,
77	regmatch_t *regs,
78	re_node_set *eps_via_nodes) internal_function;
79	static int pop_fail_stack (struct re_fail_stack_t fs, int pidx, int nregs,
80	regmatch_t regs, re_node_set eps_via_nodes) internal_function;
81	static reg_errcode_t set_regs (const regex_t *preg,
82	const re_match_context_t *mctx,
83	size_t nmatch, regmatch_t *pmatch,
84	int fl_backtrack) internal_function;
85	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
86
87	#ifdef RE_ENABLE_I18N
88	static int sift_states_iter_mb (const re_match_context_t *mctx,
89	re_sift_context_t *sctx,
90	int node_idx, int str_idx, int max_str_idx) internal_function;
91	#endif /* RE_ENABLE_I18N */
92	static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
93	re_sift_context_t *sctx) internal_function;
94	static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
95	re_sift_context_t *sctx, int str_idx,
96	re_node_set *cur_dest) internal_function;
97	static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
98	re_sift_context_t *sctx,
99	int str_idx,
100	re_node_set *dest_nodes) internal_function;
101	static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
102	re_node_set *dest_nodes,
103	const re_node_set *candidates) internal_function;
104	static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
105	re_node_set *dest_nodes,
106	const re_node_set *and_nodes) internal_function;
107	static int check_dst_limits (re_match_context_t mctx, re_node_set limits,
108	int dst_node, int dst_idx, int src_node,
109	int src_idx) internal_function;
110	static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
111	int boundaries, int subexp_idx,
112	int from_node, int bkref_idx) internal_function;
113	static int check_dst_limits_calc_pos (re_match_context_t *mctx,
114	int limit, int subexp_idx,
115	int node, int str_idx,
116	int bkref_idx) internal_function;
117	static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
118	re_node_set *dest_nodes,
119	const re_node_set *candidates,
120	re_node_set *limits,
121	struct re_backref_cache_entry *bkref_ents,
122	int str_idx) internal_function;
123	static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
124	re_sift_context_t *sctx,
125	int str_idx, const re_node_set *candidates) internal_function;
126	static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
127	int next_state_log_idx) internal_function;
128	static reg_errcode_t merge_state_array (re_dfa_t dfa, re_dfastate_t *dst,
129	re_dfastate_t **src, int num) internal_function;
130	static re_dfastate_t find_recover_state (reg_errcode_t err,
131	re_match_context_t *mctx) internal_function;
132	static re_dfastate_t transit_state (reg_errcode_t err,
133	re_match_context_t *mctx,
134	re_dfastate_t *state) internal_function;
135	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
136	re_match_context_t *mctx,
137	re_dfastate_t *next_state) internal_function;
138	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
139	re_node_set *cur_nodes,
140	int str_idx) internal_function;
141	#if 0
142	static re_dfastate_t transit_state_sb (reg_errcode_t err,
143	re_match_context_t *mctx,
144	re_dfastate_t *pstate) internal_function;
145	#endif
146	#ifdef RE_ENABLE_I18N
147	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148	re_dfastate_t *pstate) internal_function;
149	#endif /* RE_ENABLE_I18N */
150	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
151	const re_node_set *nodes) internal_function;
152	static reg_errcode_t get_subexp (re_match_context_t *mctx,
153	int bkref_node, int bkref_str_idx) internal_function;
154	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155	const re_sub_match_top_t *sub_top,
156	re_sub_match_last_t *sub_last,
157	int bkref_node, int bkref_str) internal_function;
158	static int find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
159	int subexp_idx, int type) internal_function;
160	static reg_errcode_t check_arrival (re_match_context_t *mctx,
161	state_array_t *path, int top_node,
162	int top_str, int last_node, int last_str,
163	int type) internal_function;
164	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165	int str_idx,
166	re_node_set *cur_nodes,
167	re_node_set *next_nodes) internal_function;
168	static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
169	re_node_set *cur_nodes,
170	int ex_subexp, int type) internal_function;
171	static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
172	re_node_set *dst_nodes,
173	int target, int ex_subexp,
174	int type) internal_function;
175	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
176	re_node_set *cur_nodes, int cur_str,
177	int subexp_num, int type) internal_function;
178	static int build_trtable (re_dfa_t *dfa,
179	re_dfastate_t *state) internal_function;
180	#ifdef RE_ENABLE_I18N
181	static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
182	const re_string_t *input, int idx) internal_function;
183	# ifdef _LIBC
184	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
185	size_t name_len) internal_function;
186	# endif /* _LIBC */
187	#endif /* RE_ENABLE_I18N */
188	static int group_nodes_into_DFAstates (re_dfa_t *dfa,
189	const re_dfastate_t *state,
190	re_node_set *states_node,
191	bitset *states_ch) internal_function;
192	static int check_node_accept (const re_match_context_t *mctx,
193	const re_token_t *node, int idx) internal_function;
194	static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
195
196
197	/* Entry point for POSIX code. */
198
199	/* regexec searches for a given pattern, specified by PREG, in the
200	string STRING.
201
202	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
203	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
204	least NMATCH elements, and we set them to the offsets of the
205	corresponding matched substrings.
206
207	EFLAGS specifies `execution flags' which affect matching: if
208	REG_NOTBOL is set, then ^ does not match at the beginning of the
209	string; if REG_NOTEOL is set, then $ does not match at the end.
210
211	We return 0 if we find a match and REG_NOMATCH if not. */
212
213	int
214	regexec (preg, string, nmatch, pmatch, eflags)
215	const regex_t *__restrict preg;
216	const char *__restrict string;
217	size_t nmatch;
218	regmatch_t pmatch[];
219	int eflags;
220	{
221	reg_errcode_t err;
222	int start, length;
223	re_dfa_t dfa = (re_dfa_t )preg->buffer;
224
225	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
226	return REG_BADPAT;
227
228	if (eflags & REG_STARTEND)
229	{
230	start = pmatch[0].rm_so;
231	length = pmatch[0].rm_eo;
232	}
233	else
234	{
235	start = 0;
236	length = strlen (string);
237	}
238
239	__libc_lock_lock (dfa->lock);
240	if (preg->no_sub)
241	err = re_search_internal (preg, string, length, start, length - start,
242	length, 0, NULL, eflags);
243	else
244	err = re_search_internal (preg, string, length, start, length - start,
245	length, nmatch, pmatch, eflags);
246	__libc_lock_unlock (dfa->lock);
247	return err != REG_NOERROR;
248	}
249
250	#ifdef _LIBC
251	# include <shlib-compat.h>
252	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
253
254	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
255	__typeof__ (__regexec) __compat_regexec;
256
257	int
258	attribute_compat_text_section
259	__compat_regexec (const regex_t *__restrict preg,
260	const char *__restrict string, size_t nmatch,
261	regmatch_t pmatch[], int eflags)
262	{
263	return regexec (preg, string, nmatch, pmatch,
264	eflags & (REG_NOTBOL \| REG_NOTEOL));
265	}
266	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
267	# endif
268	#endif
269
270	/* Entry points for GNU code. */
271
272	/* re_match, re_search, re_match_2, re_search_2
273
274	The former two functions operate on STRING with length LENGTH,
275	while the later two operate on concatenation of STRING1 and STRING2
276	with lengths LENGTH1 and LENGTH2, respectively.
277
278	re_match() matches the compiled pattern in BUFP against the string,
279	starting at index START.
280
281	re_search() first tries matching at index START, then it tries to match
282	starting from index START + 1, and so on. The last start position tried
283	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
284	way as re_match().)
285
286	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
287	the first STOP characters of the concatenation of the strings should be
288	concerned.
289
290	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
291	and all groups is stroed in REGS. (For the "_2" variants, the offsets are
292	computed relative to the concatenation, not relative to the individual
293	strings.)
294
295	On success, re_match* functions return the length of the match, re_search*
296	return the position of the start of the match. Return value -1 means no
297	match was found and -2 indicates an internal error. */
298
299	int
300	re_match (bufp, string, length, start, regs)
301	struct re_pattern_buffer *bufp;
302	const char *string;
303	int length, start;
304	struct re_registers *regs;
305	{
306	return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
307	}
308	#ifdef _LIBC
309	weak_alias (__re_match, re_match)
310	#endif
311
312	int
313	re_search (bufp, string, length, start, range, regs)
314	struct re_pattern_buffer *bufp;
315	const char *string;
316	int length, start, range;
317	struct re_registers *regs;
318	{
319	return re_search_stub (bufp, string, length, start, range, length, regs, 0);
320	}
321	#ifdef _LIBC
322	weak_alias (__re_search, re_search)
323	#endif
324
325	int
326	re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
327	struct re_pattern_buffer *bufp;
328	const char string1, string2;
329	int length1, length2, start, stop;
330	struct re_registers *regs;
331	{
332	return re_search_2_stub (bufp, string1, length1, string2, length2,
333	start, 0, regs, stop, 1);
334	}
335	#ifdef _LIBC
336	weak_alias (__re_match_2, re_match_2)
337	#endif
338
339	int
340	re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
341	struct re_pattern_buffer *bufp;
342	const char string1, string2;
343	int length1, length2, start, range, stop;
344	struct re_registers *regs;
345	{
346	return re_search_2_stub (bufp, string1, length1, string2, length2,
347	start, range, regs, stop, 0);
348	}
349	#ifdef _LIBC
350	weak_alias (__re_search_2, re_search_2)
351	#endif
352
353	static int
354	re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
355	stop, ret_len)
356	struct re_pattern_buffer *bufp;
357	const char string1, string2;
358	int length1, length2, start, range, stop, ret_len;
359	struct re_registers *regs;
360	{
361	const char *str;
362	int rval;
363	int len = length1 + length2;
364	int free_str = 0;
365
366	if (BE (length1 < 0 \|\| length2 < 0 \|\| stop < 0, 0))
367	return -2;
368
369	/* Concatenate the strings. */
370	if (length2 > 0)
371	if (length1 > 0)
372	{
373	char *s = re_malloc (char, len);
374
375	if (BE (s == NULL, 0))
376	return -2;
377	memcpy (s, string1, length1);
378	memcpy (s + length1, string2, length2);
379	str = s;
380	free_str = 1;
381	}
382	else
383	str = string2;
384	else
385	str = string1;
386
387	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
388	ret_len);
389	if (free_str)
390	re_free ((char *) str);
391	return rval;
392	}
393
394	/* The parameters have the same meaning as those of re_search.
395	Additional parameters:
396	If RET_LEN is nonzero the length of the match is returned (re_match style);
397	otherwise the position of the match is returned. */
398
399	static int
400	re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
401	struct re_pattern_buffer *bufp;
402	const char *string;
403	int length, start, range, stop, ret_len;
404	struct re_registers *regs;
405	{
406	reg_errcode_t result;
407	regmatch_t *pmatch;
408	int nregs, rval;
409	int eflags = 0;
410	re_dfa_t dfa = (re_dfa_t )bufp->buffer;
411
412	/* Check for out-of-range. */
413	if (BE (start < 0 \|\| start > length, 0))
414	return -1;
415	if (BE (start + range > length, 0))
416	range = length - start;
417	else if (BE (start + range < 0, 0))
418	range = -start;
419
420	__libc_lock_lock (dfa->lock);
421
422	eflags \|= (bufp->not_bol) ? REG_NOTBOL : 0;
423	eflags \|= (bufp->not_eol) ? REG_NOTEOL : 0;
424
425	/* Compile fastmap if we haven't yet. */
426	if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
427	re_compile_fastmap (bufp);
428
429	if (BE (bufp->no_sub, 0))
430	regs = NULL;
431
432	/* We need at least 1 register. */
433	if (regs == NULL)
434	nregs = 1;
435	else if (BE (bufp->regs_allocated == REGS_FIXED &&
436	regs->num_regs < bufp->re_nsub + 1, 0))
437	{
438	nregs = regs->num_regs;
439	if (BE (nregs < 1, 0))
440	{
441	/* Nothing can be copied to regs. */
442	regs = NULL;
443	nregs = 1;
444	}
445	}
446	else
447	nregs = bufp->re_nsub + 1;
448	pmatch = re_malloc (regmatch_t, nregs);
449	if (BE (pmatch == NULL, 0))
450	{
451	rval = -2;
452	goto out;
453	}
454
455	result = re_search_internal (bufp, string, length, start, range, stop,
456	nregs, pmatch, eflags);
457
458	rval = 0;
459
460	/* I hope we needn't fill ther regs with -1's when no match was found. */
461	if (result != REG_NOERROR)
462	rval = -1;
463	else if (regs != NULL)
464	{
465	/* If caller wants register contents data back, copy them. */
466	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
467	bufp->regs_allocated);
468	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
469	rval = -2;
470	}
471
472	if (BE (rval == 0, 1))
473	{
474	if (ret_len)
475	{
476	assert (pmatch[0].rm_so == start);
477	rval = pmatch[0].rm_eo - start;
478	}
479	else
480	rval = pmatch[0].rm_so;
481	}
482	re_free (pmatch);
483	out:
484	__libc_lock_unlock (dfa->lock);
485	return rval;
486	}
487
488	static unsigned
489	re_copy_regs (regs, pmatch, nregs, regs_allocated)
490	struct re_registers *regs;
491	regmatch_t *pmatch;
492	int nregs, regs_allocated;
493	{
494	int rval = REGS_REALLOCATE;
495	int i;
496	int need_regs = nregs + 1;
497	/* We need one extra element beyond `num_regs' for the `-1' marker GNU code
498	uses. */
499
500	/* Have the register data arrays been allocated? */
501	if (regs_allocated == REGS_UNALLOCATED)
502	{ /* No. So allocate them with malloc. */
503	regs->start = re_malloc (regoff_t, need_regs);
504	regs->end = re_malloc (regoff_t, need_regs);
505	if (BE (regs->start == NULL, 0) \|\| BE (regs->end == NULL, 0))
506	return REGS_UNALLOCATED;
507	regs->num_regs = need_regs;
508	}
509	else if (regs_allocated == REGS_REALLOCATE)
510	{ /* Yes. If we need more elements than were already
511	allocated, reallocate them. If we need fewer, just
512	leave it alone. */
513	if (BE (need_regs > regs->num_regs, 0))
514	{
515	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
516	regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
517	if (BE (new_start == NULL, 0) \|\| BE (new_end == NULL, 0))
518	return REGS_UNALLOCATED;
519	regs->start = new_start;
520	regs->end = new_end;
521	regs->num_regs = need_regs;
522	}
523	}
524	else
525	{
526	assert (regs_allocated == REGS_FIXED);
527	/* This function may not be called with REGS_FIXED and nregs too big. */
528	assert (regs->num_regs >= nregs);
529	rval = REGS_FIXED;
530	}
531
532	/* Copy the regs. */
533	for (i = 0; i < nregs; ++i)
534	{
535	regs->start[i] = pmatch[i].rm_so;
536	regs->end[i] = pmatch[i].rm_eo;
537	}
538	for ( ; i < regs->num_regs; ++i)
539	regs->start[i] = regs->end[i] = -1;
540
541	return rval;
542	}
543
544	/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
545	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
546	this memory for recording register information. STARTS and ENDS
547	must be allocated using the malloc library routine, and must each
548	be at least NUM_REGS * sizeof (regoff_t) bytes long.
549
550	If NUM_REGS == 0, then subsequent matches should allocate their own
551	register data.
552
553	Unless this function is called, the first search or match using
554	PATTERN_BUFFER will allocate its own register data, without
555	freeing the old data. */
556
557	void
558	re_set_registers (bufp, regs, num_regs, starts, ends)
559	struct re_pattern_buffer *bufp;
560	struct re_registers *regs;
561	unsigned num_regs;
562	regoff_t starts, ends;
563	{
564	if (num_regs)
565	{
566	bufp->regs_allocated = REGS_REALLOCATE;
567	regs->num_regs = num_regs;
568	regs->start = starts;
569	regs->end = ends;
570	}
571	else
572	{
573	bufp->regs_allocated = REGS_UNALLOCATED;
574	regs->num_regs = 0;
575	regs->start = regs->end = (regoff_t *) 0;
576	}
577	}
578	#ifdef _LIBC
579	weak_alias (__re_set_registers, re_set_registers)
580	#endif
581
582
583	/* Entry points compatible with 4.2 BSD regex library. We don't define
584	them unless specifically requested. */
585
586	#if defined _REGEX_RE_COMP \|\| defined _LIBC
587	int
588	# ifdef _LIBC
589	weak_function
590	# endif
591	re_exec (s)
592	const char *s;
593	{
594	return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
595	}
596	#endif /* _REGEX_RE_COMP */
597
598
599	/* Internal entry point. */
600
601	/* Searches for a compiled pattern PREG in the string STRING, whose
602	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
603	mingings with regexec. START, and RANGE have the same meanings
604	with re_search.
605	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
606	otherwise return the error code.
607	Note: We assume front end functions already check ranges.
608	(START + RANGE >= 0 && START + RANGE <= LENGTH) */
609
610	static reg_errcode_t
611	re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
612	eflags)
613	const regex_t *preg;
614	const char *string;
615	int length, start, range, stop, eflags;
616	size_t nmatch;
617	regmatch_t pmatch[];
618	{
619	reg_errcode_t err;
620	re_dfa_t dfa = (re_dfa_t )preg->buffer;
621	int left_lim, right_lim, incr;
622	int fl_longest_match, match_first, match_kind, match_last = -1;
623	int extra_nmatch;
624	int sb, ch;
625	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
626	re_match_context_t mctx = { .dfa = dfa };
627	#else
628	re_match_context_t mctx;
629	#endif
630	char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
631	&& range && !preg->can_be_null) ? preg->fastmap : NULL;
632	unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
633
634	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
635	memset (&mctx, '\0', sizeof (re_match_context_t));
636	mctx.dfa = dfa;
637	#endif
638
639	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
640	nmatch -= extra_nmatch;
641
642	/* Check if the DFA haven't been compiled. */
643	if (BE (preg->used == 0 \|\| dfa->init_state == NULL
644	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
645	\|\| dfa->init_state_begbuf == NULL, 0))
646	return REG_NOMATCH;
647
648	#ifdef DEBUG
649	/* We assume front-end functions already check them. */
650	assert (start + range >= 0 && start + range <= length);
651	#endif
652
653	/* If initial states with non-begbuf contexts have no elements,
654	the regex must be anchored. If preg->newline_anchor is set,
655	we'll never use init_state_nl, so do not check it. */
656	if (dfa->init_state->nodes.nelem == 0
657	&& dfa->init_state_word->nodes.nelem == 0
658	&& (dfa->init_state_nl->nodes.nelem == 0
659	\|\| !preg->newline_anchor))
660	{
661	if (start != 0 && start + range != 0)
662	return REG_NOMATCH;
663	start = range = 0;
664	}
665
666	/* We must check the longest matching, if nmatch > 0. */
667	fl_longest_match = (nmatch != 0 \|\| dfa->nbackref);
668
669	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
670	preg->translate, preg->syntax & RE_ICASE, dfa);
671	if (BE (err != REG_NOERROR, 0))
672	goto free_return;
673	mctx.input.stop = stop;
674	mctx.input.raw_stop = stop;
675	mctx.input.newline_anchor = preg->newline_anchor;
676
677	err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
678	if (BE (err != REG_NOERROR, 0))
679	goto free_return;
680
681	/* We will log all the DFA states through which the dfa pass,
682	if nmatch > 1, or this dfa has "multibyte node", which is a
683	back-reference or a node which can accept multibyte character or
684	multi character collating element. */
685	if (nmatch > 1 \|\| dfa->has_mb_node)
686	{
687	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
688	if (BE (mctx.state_log == NULL, 0))
689	{
690	err = REG_ESPACE;
691	goto free_return;
692	}
693	}
694	else
695	mctx.state_log = NULL;
696
697	match_first = start;
698	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
699	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
700
701	/* Check incrementally whether of not the input string match. */
702	incr = (range < 0) ? -1 : 1;
703	left_lim = (range < 0) ? start + range : start;
704	right_lim = (range < 0) ? start : start + range;
705	sb = dfa->mb_cur_max == 1;
706	match_kind =
707	(fastmap
708	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? 4 : 0)
709	\| (range >= 0 ? 2 : 0)
710	\| (t != NULL ? 1 : 0))
711	: 8);
712
713	for (;; match_first += incr)
714	{
715	err = REG_NOMATCH;
716	if (match_first < left_lim \|\| right_lim < match_first)
717	goto free_return;
718
719	/* Advance as rapidly as possible through the string, until we
720	find a plausible place to start matching. This may be done
721	with varying efficiency, so there are various possibilities:
722	only the most common of them are specialized, in order to
723	save on code size. We use a switch statement for speed. */
724	switch (match_kind)
725	{
726	case 8:
727	/* No fastmap. */
728	break;
729
730	case 7:
731	/* Fastmap with single-byte translation, match forward. */
732	while (BE (match_first < right_lim, 1)
733	&& !fastmap[t[(unsigned char) string[match_first]]])
734	++match_first;
735	goto forward_match_found_start_or_reached_end;
736
737	case 6:
738	/* Fastmap without translation, match forward. */
739	while (BE (match_first < right_lim, 1)
740	&& !fastmap[(unsigned char) string[match_first]])
741	++match_first;
742
743	forward_match_found_start_or_reached_end:
744	if (BE (match_first == right_lim, 0))
745	{
746	ch = match_first >= length
747	? 0 : (unsigned char) string[match_first];
748	if (!fastmap[t ? t[ch] : ch])
749	goto free_return;
750	}
751	break;
752
753	case 4:
754	case 5:
755	/* Fastmap without multi-byte translation, match backwards. */
756	while (match_first >= left_lim)
757	{
758	ch = match_first >= length
759	? 0 : (unsigned char) string[match_first];
760	if (fastmap[t ? t[ch] : ch])
761	break;
762	--match_first;
763	}
764	if (match_first < left_lim)
765	goto free_return;
766	break;
767
768	default:
769	/* In this case, we can't determine easily the current byte,
770	since it might be a component byte of a multibyte
771	character. Then we use the constructed buffer instead. */
772	for (;;)
773	{
774	/* If MATCH_FIRST is out of the valid range, reconstruct the
775	buffers. */
776	unsigned int offset = match_first - mctx.input.raw_mbs_idx;
777	if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
778	{
779	err = re_string_reconstruct (&mctx.input, match_first,
780	eflags);
781	if (BE (err != REG_NOERROR, 0))
782	goto free_return;
783
784	offset = match_first - mctx.input.raw_mbs_idx;
785	}
786	/* If MATCH_FIRST is out of the buffer, leave it as '\0'.
787	Note that MATCH_FIRST must not be smaller than 0. */
788	ch = (match_first >= length
789	? 0 : re_string_byte_at (&mctx.input, offset));
790	if (fastmap[ch])
791	break;
792	match_first += incr;
793	if (match_first < left_lim \|\| match_first > right_lim)
794	{
795	err = REG_NOMATCH;
796	goto free_return;
797	}
798	}
799	break;
800	}
801
802	/* Reconstruct the buffers so that the matcher can assume that
803	the matching starts from the beginning of the buffer. */
804	err = re_string_reconstruct (&mctx.input, match_first, eflags);
805	if (BE (err != REG_NOERROR, 0))
806	goto free_return;
807
808	#ifdef RE_ENABLE_I18N
809	/* Don't consider this char as a possible match start if it part,
810	yet isn't the head, of a multibyte character. */
811	if (!sb && !re_string_first_byte (&mctx.input, 0))
812	continue;
813	#endif
814
815	/* It seems to be appropriate one, then use the matcher. */
816	/* We assume that the matching starts from 0. */
817	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
818	match_last = check_matching (&mctx, fl_longest_match,
819	range >= 0 ? &match_first : NULL);
820	if (match_last != -1)
821	{
822	if (BE (match_last == -2, 0))
823	{
824	err = REG_ESPACE;
825	goto free_return;
826	}
827	else
828	{
829	mctx.match_last = match_last;
830	if ((!preg->no_sub && nmatch > 1) \|\| dfa->nbackref)
831	{
832	re_dfastate_t *pstate = mctx.state_log[match_last];
833	mctx.last_node = check_halt_state_context (&mctx, pstate,
834	match_last);
835	}
836	if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
837	\|\| dfa->nbackref)
838	{
839	err = prune_impossible_nodes (&mctx);
840	if (err == REG_NOERROR)
841	break;
842	if (BE (err != REG_NOMATCH, 0))
843	goto free_return;
844	match_last = -1;
845	}
846	else
847	break; /* We found a match. */
848	}
849	}
850
851	match_ctx_clean (&mctx);
852	}
853
854	#ifdef DEBUG
855	assert (match_last != -1);
856	assert (err == REG_NOERROR);
857	#endif
858
859	/* Set pmatch[] if we need. */
860	if (nmatch > 0)
861	{
862	int reg_idx;
863
864	/* Initialize registers. */
865	for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
866	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
867
868	/* Set the points where matching start/end. */
869	pmatch[0].rm_so = 0;
870	pmatch[0].rm_eo = mctx.match_last;
871
872	if (!preg->no_sub && nmatch > 1)
873	{
874	err = set_regs (preg, &mctx, nmatch, pmatch,
875	dfa->has_plural_match && dfa->nbackref > 0);
876	if (BE (err != REG_NOERROR, 0))
877	goto free_return;
878	}
879
880	/* At last, add the offset to the each registers, since we slided
881	the buffers so that we could assume that the matching starts
882	from 0. */
883	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
884	if (pmatch[reg_idx].rm_so != -1)
885	{
886	#ifdef RE_ENABLE_I18N
887	if (BE (mctx.input.offsets_needed != 0, 0))
888	{
889	if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
890	pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
891	else
892	pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
893	if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
894	pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
895	else
896	pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
897	}
898	#else
899	assert (mctx.input.offsets_needed == 0);
900	#endif
901	pmatch[reg_idx].rm_so += match_first;
902	pmatch[reg_idx].rm_eo += match_first;
903	}
904	for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
905	{
906	pmatch[nmatch + reg_idx].rm_so = -1;
907	pmatch[nmatch + reg_idx].rm_eo = -1;
908	}
909
910	if (dfa->subexp_map)
911	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
912	if (dfa->subexp_map[reg_idx] != reg_idx)
913	{
914	pmatch[reg_idx + 1].rm_so
915	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
916	pmatch[reg_idx + 1].rm_eo
917	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
918	}
919	}
920
921	free_return:
922	re_free (mctx.state_log);
923	if (dfa->nbackref)
924	match_ctx_free (&mctx);
925	re_string_destruct (&mctx.input);
926	return err;
927	}
928
929	static reg_errcode_t
930	prune_impossible_nodes (mctx)
931	re_match_context_t *mctx;
932	{
933	re_dfa_t *const dfa = mctx->dfa;
934	int halt_node, match_last;
935	reg_errcode_t ret;
936	re_dfastate_t **sifted_states;
937	re_dfastate_t **lim_states = NULL;
938	re_sift_context_t sctx;
939	#ifdef DEBUG
940	assert (mctx->state_log != NULL);
941	#endif
942	match_last = mctx->match_last;
943	halt_node = mctx->last_node;
944	sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
945	if (BE (sifted_states == NULL, 0))
946	{
947	ret = REG_ESPACE;
948	goto free_return;
949	}
950	if (dfa->nbackref)
951	{
952	lim_states = re_malloc (re_dfastate_t *, match_last + 1);
953	if (BE (lim_states == NULL, 0))
954	{
955	ret = REG_ESPACE;
956	goto free_return;
957	}
958	while (1)
959	{
960	memset (lim_states, '\0',
961	sizeof (re_dfastate_t ) (match_last + 1));
962	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
963	match_last);
964	ret = sift_states_backward (mctx, &sctx);
965	re_node_set_free (&sctx.limits);
966	if (BE (ret != REG_NOERROR, 0))
967	goto free_return;
968	if (sifted_states[0] != NULL \|\| lim_states[0] != NULL)
969	break;
970	do
971	{
972	--match_last;
973	if (match_last < 0)
974	{
975	ret = REG_NOMATCH;
976	goto free_return;
977	}
978	} while (mctx->state_log[match_last] == NULL
979	\|\| !mctx->state_log[match_last]->halt);
980	halt_node = check_halt_state_context (mctx,
981	mctx->state_log[match_last],
982	match_last);
983	}
984	ret = merge_state_array (dfa, sifted_states, lim_states,
985	match_last + 1);
986	re_free (lim_states);
987	lim_states = NULL;
988	if (BE (ret != REG_NOERROR, 0))
989	goto free_return;
990	}
991	else
992	{
993	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
994	ret = sift_states_backward (mctx, &sctx);
995	re_node_set_free (&sctx.limits);
996	if (BE (ret != REG_NOERROR, 0))
997	goto free_return;
998	}
999	re_free (mctx->state_log);
1000	mctx->state_log = sifted_states;
1001	sifted_states = NULL;
1002	mctx->last_node = halt_node;
1003	mctx->match_last = match_last;
1004	ret = REG_NOERROR;
1005	free_return:
1006	re_free (sifted_states);
1007	re_free (lim_states);
1008	return ret;
1009	}
1010
1011	/* Acquire an initial state and return it.
1012	We must select appropriate initial state depending on the context,
1013	since initial states may have constraints like "\<", "^", etc.. */
1014
1015	static inline re_dfastate_t *
1016	acquire_init_state_context (err, mctx, idx)
1017	reg_errcode_t *err;
1018	const re_match_context_t *mctx;
1019	int idx;
1020	{
1021	re_dfa_t *const dfa = mctx->dfa;
1022	if (dfa->init_state->has_constraint)
1023	{
1024	unsigned int context;
1025	context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1026	if (IS_WORD_CONTEXT (context))
1027	return dfa->init_state_word;
1028	else if (IS_ORDINARY_CONTEXT (context))
1029	return dfa->init_state;
1030	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1031	return dfa->init_state_begbuf;
1032	else if (IS_NEWLINE_CONTEXT (context))
1033	return dfa->init_state_nl;
1034	else if (IS_BEGBUF_CONTEXT (context))
1035	{
1036	/* It is relatively rare case, then calculate on demand. */
1037	return re_acquire_state_context (err, dfa,
1038	dfa->init_state->entrance_nodes,
1039	context);
1040	}
1041	else
1042	/* Must not happen? */
1043	return dfa->init_state;
1044	}
1045	else
1046	return dfa->init_state;
1047	}
1048
1049	/* Check whether the regular expression match input string INPUT or not,
1050	and return the index where the matching end, return -1 if not match,
1051	or return -2 in case of an error.
1052	FL_LONGEST_MATCH means we want the POSIX longest matching.
1053	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1054	next place where we may want to try matching.
1055	Note that the matcher assume that the maching starts from the current
1056	index of the buffer. */
1057
1058	static int
1059	check_matching (mctx, fl_longest_match, p_match_first)
1060	re_match_context_t *mctx;
1061	int fl_longest_match;
1062	int *p_match_first;
1063	{
1064	re_dfa_t *const dfa = mctx->dfa;
1065	reg_errcode_t err;
1066	int match = 0;
1067	int match_last = -1;
1068	int cur_str_idx = re_string_cur_idx (&mctx->input);
1069	re_dfastate_t *cur_state;
1070	int at_init_state = p_match_first != NULL;
1071	int next_start_idx = cur_str_idx;
1072
1073	err = REG_NOERROR;
1074	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1075	/* An initial state must not be NULL (invalid). */
1076	if (BE (cur_state == NULL, 0))
1077	{
1078	assert (err == REG_ESPACE);
1079	return -2;
1080	}
1081
1082	if (mctx->state_log != NULL)
1083	{
1084	mctx->state_log[cur_str_idx] = cur_state;
1085
1086	/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1087	later. E.g. Processing back references. */
1088	if (BE (dfa->nbackref, 0))
1089	{
1090	at_init_state = 0;
1091	err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1092	if (BE (err != REG_NOERROR, 0))
1093	return err;
1094
1095	if (cur_state->has_backref)
1096	{
1097	err = transit_state_bkref (mctx, &cur_state->nodes);
1098	if (BE (err != REG_NOERROR, 0))
1099	return err;
1100	}
1101	}
1102	}
1103
1104	/* If the RE accepts NULL string. */
1105	if (BE (cur_state->halt, 0))
1106	{
1107	if (!cur_state->has_constraint
1108	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1109	{
1110	if (!fl_longest_match)
1111	return cur_str_idx;
1112	else
1113	{
1114	match_last = cur_str_idx;
1115	match = 1;
1116	}
1117	}
1118	}
1119
1120	while (!re_string_eoi (&mctx->input))
1121	{
1122	re_dfastate_t *old_state = cur_state;
1123	int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1124
1125	if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1126	\|\| (BE (next_char_idx >= mctx->input.valid_len, 0)
1127	&& mctx->input.valid_len < mctx->input.len))
1128	{
1129	err = extend_buffers (mctx);
1130	if (BE (err != REG_NOERROR, 0))
1131	{
1132	assert (err == REG_ESPACE);
1133	return -2;
1134	}
1135	}
1136
1137	cur_state = transit_state (&err, mctx, cur_state);
1138	if (mctx->state_log != NULL)
1139	cur_state = merge_state_with_log (&err, mctx, cur_state);
1140
1141	if (cur_state == NULL)
1142	{
1143	/* Reached the invalid state or an error. Try to recover a valid
1144	state using the state log, if available and if we have not
1145	already found a valid (even if not the longest) match. */
1146	if (BE (err != REG_NOERROR, 0))
1147	return -2;
1148
1149	if (mctx->state_log == NULL
1150	\|\| (match && !fl_longest_match)
1151	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1152	break;
1153	}
1154
1155	if (BE (at_init_state, 0))
1156	{
1157	if (old_state == cur_state)
1158	next_start_idx = next_char_idx;
1159	else
1160	at_init_state = 0;
1161	}
1162
1163	if (cur_state->halt)
1164	{
1165	/* Reached a halt state.
1166	Check the halt state can satisfy the current context. */
1167	if (!cur_state->has_constraint
1168	\|\| check_halt_state_context (mctx, cur_state,
1169	re_string_cur_idx (&mctx->input)))
1170	{
1171	/* We found an appropriate halt state. */
1172	match_last = re_string_cur_idx (&mctx->input);
1173	match = 1;
1174
1175	/* We found a match, do not modify match_first below. */
1176	p_match_first = NULL;
1177	if (!fl_longest_match)
1178	break;
1179	}
1180	}
1181	}
1182
1183	if (p_match_first)
1184	*p_match_first += next_start_idx;
1185
1186	return match_last;
1187	}
1188
1189	/* Check NODE match the current context. */
1190
1191	static int check_halt_node_context (dfa, node, context)
1192	const re_dfa_t *dfa;
1193	int node;
1194	unsigned int context;
1195	{
1196	re_token_type_t type = dfa->nodes[node].type;
1197	unsigned int constraint = dfa->nodes[node].constraint;
1198	if (type != END_OF_RE)
1199	return 0;
1200	if (!constraint)
1201	return 1;
1202	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1203	return 0;
1204	return 1;
1205	}
1206
1207	/* Check the halt state STATE match the current context.
1208	Return 0 if not match, if the node, STATE has, is a halt node and
1209	match the context, return the node. */
1210
1211	static int
1212	check_halt_state_context (mctx, state, idx)
1213	const re_match_context_t *mctx;
1214	const re_dfastate_t *state;
1215	int idx;
1216	{
1217	int i;
1218	unsigned int context;
1219	#ifdef DEBUG
1220	assert (state->halt);
1221	#endif
1222	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1223	for (i = 0; i < state->nodes.nelem; ++i)
1224	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1225	return state->nodes.elems[i];
1226	return 0;
1227	}
1228
1229	/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1230	corresponding to the DFA).
1231	Return the destination node, and update EPS_VIA_NODES, return -1 in case
1232	of errors. */
1233
1234	static int
1235	proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1236	const re_match_context_t *mctx;
1237	regmatch_t *regs;
1238	int nregs, *pidx, node;
1239	re_node_set *eps_via_nodes;
1240	struct re_fail_stack_t *fs;
1241	{
1242	re_dfa_t *const dfa = mctx->dfa;
1243	int i, err, dest_node;
1244	dest_node = -1;
1245	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1246	{
1247	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1248	re_node_set *edests = &dfa->edests[node];
1249	int dest_node;
1250	err = re_node_set_insert (eps_via_nodes, node);
1251	if (BE (err < 0, 0))
1252	return -2;
1253	/* Pick up a valid destination, or return -1 if none is found. */
1254	for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1255	{
1256	int candidate = edests->elems[i];
1257	if (!re_node_set_contains (cur_nodes, candidate))
1258	continue;
1259	if (dest_node == -1)
1260	dest_node = candidate;
1261
1262	else
1263	{
1264	/* In order to avoid infinite loop like "(a)", return the second
1265	epsilon-transition if the first was already considered. */
1266	if (re_node_set_contains (eps_via_nodes, dest_node))
1267	return candidate;
1268
1269	/* Otherwise, push the second epsilon-transition on the fail stack. */
1270	else if (fs != NULL
1271	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1272	eps_via_nodes))
1273	return -2;
1274
1275	/* We know we are going to exit. */
1276	break;
1277	}
1278	}
1279	return dest_node;
1280	}
1281	else
1282	{
1283	int naccepted = 0;
1284	re_token_type_t type = dfa->nodes[node].type;
1285
1286	#ifdef RE_ENABLE_I18N
1287	if (dfa->nodes[node].accept_mb)
1288	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1289	else
1290	#endif /* RE_ENABLE_I18N */
1291	if (type == OP_BACK_REF)
1292	{
1293	int subexp_idx = dfa->nodes[node].opr.idx + 1;
1294	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1295	if (fs != NULL)
1296	{
1297	if (regs[subexp_idx].rm_so == -1 \|\| regs[subexp_idx].rm_eo == -1)
1298	return -1;
1299	else if (naccepted)
1300	{
1301	char buf = (char ) re_string_get_buffer (&mctx->input);
1302	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1303	naccepted) != 0)
1304	return -1;
1305	}
1306	}
1307
1308	if (naccepted == 0)
1309	{
1310	err = re_node_set_insert (eps_via_nodes, node);
1311	if (BE (err < 0, 0))
1312	return -2;
1313	dest_node = dfa->edests[node].elems[0];
1314	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1315	dest_node))
1316	return dest_node;
1317	}
1318	}
1319
1320	if (naccepted != 0
1321	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1322	{
1323	dest_node = dfa->nexts[node];
1324	pidx = (naccepted == 0) ? pidx + 1 : *pidx + naccepted;
1325	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1326	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1327	dest_node)))
1328	return -1;
1329	re_node_set_empty (eps_via_nodes);
1330	return dest_node;
1331	}
1332	}
1333	return -1;
1334	}
1335
1336	static reg_errcode_t
1337	push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
1338	struct re_fail_stack_t *fs;
1339	int str_idx, dest_node, nregs;
1340	regmatch_t *regs;
1341	re_node_set *eps_via_nodes;
1342	{
1343	reg_errcode_t err;
1344	int num = fs->num++;
1345	if (fs->num == fs->alloc)
1346	{
1347	struct re_fail_stack_ent_t *new_array;
1348	new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1349	* fs->alloc * 2));
1350	if (new_array == NULL)
1351	return REG_ESPACE;
1352	fs->alloc *= 2;
1353	fs->stack = new_array;
1354	}
1355	fs->stack[num].idx = str_idx;
1356	fs->stack[num].node = dest_node;
1357	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1358	if (fs->stack[num].regs == NULL)
1359	return REG_ESPACE;
1360	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1361	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1362	return err;
1363	}
1364
1365	static int
1366	pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1367	struct re_fail_stack_t *fs;
1368	int *pidx, nregs;
1369	regmatch_t *regs;
1370	re_node_set *eps_via_nodes;
1371	{
1372	int num = --fs->num;
1373	assert (num >= 0);
1374	*pidx = fs->stack[num].idx;
1375	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1376	re_node_set_free (eps_via_nodes);
1377	re_free (fs->stack[num].regs);
1378	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1379	return fs->stack[num].node;
1380	}
1381
1382	/* Set the positions where the subexpressions are starts/ends to registers
1383	PMATCH.
1384	Note: We assume that pmatch[0] is already set, and
1385	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1386
1387	static reg_errcode_t
1388	set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1389	const regex_t *preg;
1390	const re_match_context_t *mctx;
1391	size_t nmatch;
1392	regmatch_t *pmatch;
1393	int fl_backtrack;
1394	{
1395	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1396	int idx, cur_node;
1397	re_node_set eps_via_nodes;
1398	struct re_fail_stack_t *fs;
1399	struct re_fail_stack_t fs_body = { 0, 2, NULL };
1400	regmatch_t *prev_idx_match;
1401
1402	#ifdef DEBUG
1403	assert (nmatch > 1);
1404	assert (mctx->state_log != NULL);
1405	#endif
1406	if (fl_backtrack)
1407	{
1408	fs = &fs_body;
1409	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1410	if (fs->stack == NULL)
1411	return REG_ESPACE;
1412	}
1413	else
1414	fs = NULL;
1415
1416	cur_node = dfa->init_node;
1417	re_node_set_init_empty (&eps_via_nodes);
1418
1419	prev_idx_match = (regmatch_t ) alloca (sizeof (regmatch_t) nmatch);
1420	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1421
1422	for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1423	{
1424	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1425
1426	if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1427	{
1428	int reg_idx;
1429	if (fs)
1430	{
1431	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1432	if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1433	break;
1434	if (reg_idx == nmatch)
1435	{
1436	re_node_set_free (&eps_via_nodes);
1437	return free_fail_stack_return (fs);
1438	}
1439	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1440	&eps_via_nodes);
1441	}
1442	else
1443	{
1444	re_node_set_free (&eps_via_nodes);
1445	return REG_NOERROR;
1446	}
1447	}
1448
1449	/* Proceed to next node. */
1450	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1451	&eps_via_nodes, fs);
1452
1453	if (BE (cur_node < 0, 0))
1454	{
1455	if (BE (cur_node == -2, 0))
1456	{
1457	re_node_set_free (&eps_via_nodes);
1458	free_fail_stack_return (fs);
1459	return REG_ESPACE;
1460	}
1461	if (fs)
1462	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1463	&eps_via_nodes);
1464	else
1465	{
1466	re_node_set_free (&eps_via_nodes);
1467	return REG_NOMATCH;
1468	}
1469	}
1470	}
1471	re_node_set_free (&eps_via_nodes);
1472	return free_fail_stack_return (fs);
1473	}
1474
1475	static reg_errcode_t
1476	free_fail_stack_return (fs)
1477	struct re_fail_stack_t *fs;
1478	{
1479	if (fs)
1480	{
1481	int fs_idx;
1482	for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1483	{
1484	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1485	re_free (fs->stack[fs_idx].regs);
1486	}
1487	re_free (fs->stack);
1488	}
1489	return REG_NOERROR;
1490	}
1491
1492	static void
1493	update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1494	re_dfa_t *dfa;
1495	regmatch_t pmatch, prev_idx_match;
1496	int cur_node, cur_idx, nmatch;
1497	{
1498	int type = dfa->nodes[cur_node].type;
1499	if (type == OP_OPEN_SUBEXP)
1500	{
1501	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1502
1503	/* We are at the first node of this sub expression. */
1504	if (reg_num < nmatch)
1505	{
1506	pmatch[reg_num].rm_so = cur_idx;
1507	pmatch[reg_num].rm_eo = -1;
1508	}
1509	}
1510	else if (type == OP_CLOSE_SUBEXP)
1511	{
1512	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1513	if (reg_num < nmatch)
1514	{
1515	/* We are at the last node of this sub expression. */
1516	if (pmatch[reg_num].rm_so < cur_idx)
1517	{
1518	pmatch[reg_num].rm_eo = cur_idx;
1519	/* This is a non-empty match or we are not inside an optional
1520	subexpression. Accept this right away. */
1521	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1522	}
1523	else
1524	{
1525	if (dfa->nodes[cur_node].opt_subexp
1526	&& prev_idx_match[reg_num].rm_so != -1)
1527	/* We transited through an empty match for an optional
1528	subexpression, like (a?)*, and this is not the subexp's
1529	first match. Copy back the old content of the registers
1530	so that matches of an inner subexpression are undone as
1531	well, like in ((a?)). /
1532	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1533	else
1534	/* We completed a subexpression, but it may be part of
1535	an optional one, so do not update PREV_IDX_MATCH. */
1536	pmatch[reg_num].rm_eo = cur_idx;
1537	}
1538	}
1539	}
1540	}
1541
1542	/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1543	and sift the nodes in each states according to the following rules.
1544	Updated state_log will be wrote to STATE_LOG.
1545
1546	Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1547	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1548	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1549	the LAST_NODE, we throw away the node `a'.
1550	2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1551	string `s' and transit to `b':
1552	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1553	away the node `a'.
1554	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1555	thrown away, we throw away the node `a'.
1556	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1557	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1558	node `a'.
1559	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1560	we throw away the node `a'. */
1561
1562	#define STATE_NODE_CONTAINS(state,node) \
1563	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1564
1565	static reg_errcode_t
1566	sift_states_backward (mctx, sctx)
1567	re_match_context_t *mctx;
1568	re_sift_context_t *sctx;
1569	{
1570	reg_errcode_t err;
1571	int null_cnt = 0;
1572	int str_idx = sctx->last_str_idx;
1573	re_node_set cur_dest;
1574
1575	#ifdef DEBUG
1576	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1577	#endif
1578
1579	/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1580	transit to the last_node and the last_node itself. */
1581	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1582	if (BE (err != REG_NOERROR, 0))
1583	return err;
1584	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1585	if (BE (err != REG_NOERROR, 0))
1586	goto free_return;
1587
1588	/* Then check each states in the state_log. */
1589	while (str_idx > 0)
1590	{
1591	/* Update counters. */
1592	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1593	if (null_cnt > mctx->max_mb_elem_len)
1594	{
1595	memset (sctx->sifted_states, '\0',
1596	sizeof (re_dfastate_t ) str_idx);
1597	re_node_set_free (&cur_dest);
1598	return REG_NOERROR;
1599	}
1600	re_node_set_empty (&cur_dest);
1601	--str_idx;
1602
1603	if (mctx->state_log[str_idx])
1604	{
1605	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1606	if (BE (err != REG_NOERROR, 0))
1607	goto free_return;
1608	}
1609
1610	/* Add all the nodes which satisfy the following conditions:
1611	- It can epsilon transit to a node in CUR_DEST.
1612	- It is in CUR_SRC.
1613	And update state_log. */
1614	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1615	if (BE (err != REG_NOERROR, 0))
1616	goto free_return;
1617	}
1618	err = REG_NOERROR;
1619	free_return:
1620	re_node_set_free (&cur_dest);
1621	return err;
1622	}
1623
1624	static reg_errcode_t
1625	build_sifted_states (mctx, sctx, str_idx, cur_dest)
1626	re_match_context_t *mctx;
1627	re_sift_context_t *sctx;
1628	int str_idx;
1629	re_node_set *cur_dest;
1630	{
1631	re_dfa_t *const dfa = mctx->dfa;
1632	re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1633	int i;
1634
1635	/* Then build the next sifted state.
1636	We build the next sifted state on `cur_dest', and update
1637	`sifted_states[str_idx]' with `cur_dest'.
1638	Note:
1639	`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1640	`cur_src' points the node_set of the old `state_log[str_idx]'
1641	(with the epsilon nodes pre-filtered out). */
1642	for (i = 0; i < cur_src->nelem; i++)
1643	{
1644	int prev_node = cur_src->elems[i];
1645	int naccepted = 0;
1646	int ret;
1647
1648	#ifdef DEBUG
1649	re_token_type_t type = dfa->nodes[prev_node].type;
1650	assert (!IS_EPSILON_NODE (type));
1651	#endif
1652	#ifdef RE_ENABLE_I18N
1653	/* If the node may accept `multi byte'. */
1654	if (dfa->nodes[prev_node].accept_mb)
1655	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1656	str_idx, sctx->last_str_idx);
1657	#endif /* RE_ENABLE_I18N */
1658
1659	/* We don't check backreferences here.
1660	See update_cur_sifted_state(). */
1661	if (!naccepted
1662	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1663	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1664	dfa->nexts[prev_node]))
1665	naccepted = 1;
1666
1667	if (naccepted == 0)
1668	continue;
1669
1670	if (sctx->limits.nelem)
1671	{
1672	int to_idx = str_idx + naccepted;
1673	if (check_dst_limits (mctx, &sctx->limits,
1674	dfa->nexts[prev_node], to_idx,
1675	prev_node, str_idx))
1676	continue;
1677	}
1678	ret = re_node_set_insert (cur_dest, prev_node);
1679	if (BE (ret == -1, 0))
1680	return REG_ESPACE;
1681	}
1682
1683	return REG_NOERROR;
1684	}
1685
1686	/* Helper functions. */
1687
1688	static reg_errcode_t
1689	clean_state_log_if_needed (mctx, next_state_log_idx)
1690	re_match_context_t *mctx;
1691	int next_state_log_idx;
1692	{
1693	int top = mctx->state_log_top;
1694
1695	if (next_state_log_idx >= mctx->input.bufs_len
1696	\|\| (next_state_log_idx >= mctx->input.valid_len
1697	&& mctx->input.valid_len < mctx->input.len))
1698	{
1699	reg_errcode_t err;
1700	err = extend_buffers (mctx);
1701	if (BE (err != REG_NOERROR, 0))
1702	return err;
1703	}
1704
1705	if (top < next_state_log_idx)
1706	{
1707	memset (mctx->state_log + top + 1, '\0',
1708	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1709	mctx->state_log_top = next_state_log_idx;
1710	}
1711	return REG_NOERROR;
1712	}
1713
1714	static reg_errcode_t
1715	merge_state_array (dfa, dst, src, num)
1716	re_dfa_t *dfa;
1717	re_dfastate_t **dst;
1718	re_dfastate_t **src;
1719	int num;
1720	{
1721	int st_idx;
1722	reg_errcode_t err;
1723	for (st_idx = 0; st_idx < num; ++st_idx)
1724	{
1725	if (dst[st_idx] == NULL)
1726	dst[st_idx] = src[st_idx];
1727	else if (src[st_idx] != NULL)
1728	{
1729	re_node_set merged_set;
1730	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1731	&src[st_idx]->nodes);
1732	if (BE (err != REG_NOERROR, 0))
1733	return err;
1734	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1735	re_node_set_free (&merged_set);
1736	if (BE (err != REG_NOERROR, 0))
1737	return err;
1738	}
1739	}
1740	return REG_NOERROR;
1741	}
1742
1743	static reg_errcode_t
1744	update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1745	re_match_context_t *mctx;
1746	re_sift_context_t *sctx;
1747	int str_idx;
1748	re_node_set *dest_nodes;
1749	{
1750	re_dfa_t *const dfa = mctx->dfa;
1751	reg_errcode_t err;
1752	const re_node_set *candidates;
1753	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1754	: &mctx->state_log[str_idx]->nodes);
1755
1756	if (dest_nodes->nelem == 0)
1757	sctx->sifted_states[str_idx] = NULL;
1758	else
1759	{
1760	if (candidates)
1761	{
1762	/* At first, add the nodes which can epsilon transit to a node in
1763	DEST_NODE. */
1764	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1765	if (BE (err != REG_NOERROR, 0))
1766	return err;
1767
1768	/* Then, check the limitations in the current sift_context. */
1769	if (sctx->limits.nelem)
1770	{
1771	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1772	mctx->bkref_ents, str_idx);
1773	if (BE (err != REG_NOERROR, 0))
1774	return err;
1775	}
1776	}
1777
1778	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1779	if (BE (err != REG_NOERROR, 0))
1780	return err;
1781	}
1782
1783	if (candidates && mctx->state_log[str_idx]->has_backref)
1784	{
1785	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1786	if (BE (err != REG_NOERROR, 0))
1787	return err;
1788	}
1789	return REG_NOERROR;
1790	}
1791
1792	static reg_errcode_t
1793	add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1794	re_dfa_t *dfa;
1795	re_node_set *dest_nodes;
1796	const re_node_set *candidates;
1797	{
1798	reg_errcode_t err = REG_NOERROR;
1799	int i;
1800
1801	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1802	if (BE (err != REG_NOERROR, 0))
1803	return err;
1804
1805	if (!state->inveclosure.alloc)
1806	{
1807	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1808	if (BE (err != REG_NOERROR, 0))
1809	return REG_ESPACE;
1810	for (i = 0; i < dest_nodes->nelem; i++)
1811	re_node_set_merge (&state->inveclosure,
1812	dfa->inveclosures + dest_nodes->elems[i]);
1813	}
1814	return re_node_set_add_intersect (dest_nodes, candidates,
1815	&state->inveclosure);
1816	}
1817
1818	static reg_errcode_t
1819	sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1820	re_dfa_t *dfa;
1821	int node;
1822	re_node_set *dest_nodes;
1823	const re_node_set *candidates;
1824	{
1825	int ecl_idx;
1826	reg_errcode_t err;
1827	re_node_set *inv_eclosure = dfa->inveclosures + node;
1828	re_node_set except_nodes;
1829	re_node_set_init_empty (&except_nodes);
1830	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1831	{
1832	int cur_node = inv_eclosure->elems[ecl_idx];
1833	if (cur_node == node)
1834	continue;
1835	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1836	{
1837	int edst1 = dfa->edests[cur_node].elems[0];
1838	int edst2 = ((dfa->edests[cur_node].nelem > 1)
1839	? dfa->edests[cur_node].elems[1] : -1);
1840	if ((!re_node_set_contains (inv_eclosure, edst1)
1841	&& re_node_set_contains (dest_nodes, edst1))
1842	\|\| (edst2 > 0
1843	&& !re_node_set_contains (inv_eclosure, edst2)
1844	&& re_node_set_contains (dest_nodes, edst2)))
1845	{
1846	err = re_node_set_add_intersect (&except_nodes, candidates,
1847	dfa->inveclosures + cur_node);
1848	if (BE (err != REG_NOERROR, 0))
1849	{
1850	re_node_set_free (&except_nodes);
1851	return err;
1852	}
1853	}
1854	}
1855	}
1856	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1857	{
1858	int cur_node = inv_eclosure->elems[ecl_idx];
1859	if (!re_node_set_contains (&except_nodes, cur_node))
1860	{
1861	int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1862	re_node_set_remove_at (dest_nodes, idx);
1863	}
1864	}
1865	re_node_set_free (&except_nodes);
1866	return REG_NOERROR;
1867	}
1868
1869	static int
1870	check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1871	re_match_context_t *mctx;
1872	re_node_set *limits;
1873	int dst_node, dst_idx, src_node, src_idx;
1874	{
1875	re_dfa_t *const dfa = mctx->dfa;
1876	int lim_idx, src_pos, dst_pos;
1877
1878	int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1879	int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1880	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1881	{
1882	int subexp_idx;
1883	struct re_backref_cache_entry *ent;
1884	ent = mctx->bkref_ents + limits->elems[lim_idx];
1885	subexp_idx = dfa->nodes[ent->node].opr.idx;
1886
1887	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1888	subexp_idx, dst_node, dst_idx,
1889	dst_bkref_idx);
1890	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1891	subexp_idx, src_node, src_idx,
1892	src_bkref_idx);
1893
1894	/* In case of:
1895	<src> <dst> ( <subexp> )
1896	( <subexp> ) <src> <dst>
1897	( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1898	if (src_pos == dst_pos)
1899	continue; /* This is unrelated limitation. */
1900	else
1901	return 1;
1902	}
1903	return 0;
1904	}
1905
1906	static int
1907	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
1908	re_match_context_t *mctx;
1909	int boundaries, subexp_idx, from_node, bkref_idx;
1910	{
1911	re_dfa_t *const dfa = mctx->dfa;
1912	re_node_set *eclosures = dfa->eclosures + from_node;
1913	int node_idx;
1914
1915	/* Else, we are on the boundary: examine the nodes on the epsilon
1916	closure. */
1917	for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1918	{
1919	int node = eclosures->elems[node_idx];
1920	switch (dfa->nodes[node].type)
1921	{
1922	case OP_BACK_REF:
1923	if (bkref_idx != -1)
1924	{
1925	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1926	do
1927	{
1928	int dst, cpos;
1929
1930	if (ent->node != node)
1931	continue;
1932
1933	if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
1934	&& !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
1935	continue;
1936
1937	/* Recurse trying to reach the OP_OPEN_SUBEXP and
1938	OP_CLOSE_SUBEXP cases below. But, if the
1939	destination node is the same node as the source
1940	node, don't recurse because it would cause an
1941	infinite loop: a regex that exhibits this behavior
1942	is ()\1\1 */
1943	dst = dfa->edests[node].elems[0];
1944	if (dst == from_node)
1945	{
1946	if (boundaries & 1)
1947	return -1;
1948	else /* if (boundaries & 2) */
1949	return 0;
1950	}
1951
1952	cpos =
1953	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1954	dst, bkref_idx);
1955	if (cpos == -1 /* && (boundaries & 1) */)
1956	return -1;
1957	if (cpos == 0 && (boundaries & 2))
1958	return 0;
1959
1960	ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
1961	}
1962	while (ent++->more);
1963	}
1964	break;
1965
1966	case OP_OPEN_SUBEXP:
1967	if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1968	return -1;
1969	break;
1970
1971	case OP_CLOSE_SUBEXP:
1972	if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1973	return 0;
1974	break;
1975
1976	default:
1977	break;
1978	}
1979	}
1980
1981	return (boundaries & 2) ? 1 : 0;
1982	}
1983
1984	static int
1985	check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
1986	re_match_context_t *mctx;
1987	int limit, subexp_idx, from_node, str_idx, bkref_idx;
1988	{
1989	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1990	int boundaries;
1991
1992	/* If we are outside the range of the subexpression, return -1 or 1. */
1993	if (str_idx < lim->subexp_from)
1994	return -1;
1995
1996	if (lim->subexp_to < str_idx)
1997	return 1;
1998
1999	/* If we are within the subexpression, return 0. */
2000	boundaries = (str_idx == lim->subexp_from);
2001	boundaries \|= (str_idx == lim->subexp_to) << 1;
2002	if (boundaries == 0)
2003	return 0;
2004
2005	/* Else, examine epsilon closure. */
2006	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2007	from_node, bkref_idx);
2008	}
2009
2010	/* Check the limitations of sub expressions LIMITS, and remove the nodes
2011	which are against limitations from DEST_NODES. */
2012
2013	static reg_errcode_t
2014	check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
2015	re_dfa_t *dfa;
2016	re_node_set *dest_nodes;
2017	const re_node_set *candidates;
2018	re_node_set *limits;
2019	struct re_backref_cache_entry *bkref_ents;
2020	int str_idx;
2021	{
2022	reg_errcode_t err;
2023	int node_idx, lim_idx;
2024
2025	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2026	{
2027	int subexp_idx;
2028	struct re_backref_cache_entry *ent;
2029	ent = bkref_ents + limits->elems[lim_idx];
2030
2031	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2032	continue; /* This is unrelated limitation. */
2033
2034	subexp_idx = dfa->nodes[ent->node].opr.idx;
2035	if (ent->subexp_to == str_idx)
2036	{
2037	int ops_node = -1;
2038	int cls_node = -1;
2039	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2040	{
2041	int node = dest_nodes->elems[node_idx];
2042	re_token_type_t type = dfa->nodes[node].type;
2043	if (type == OP_OPEN_SUBEXP
2044	&& subexp_idx == dfa->nodes[node].opr.idx)
2045	ops_node = node;
2046	else if (type == OP_CLOSE_SUBEXP
2047	&& subexp_idx == dfa->nodes[node].opr.idx)
2048	cls_node = node;
2049	}
2050
2051	/* Check the limitation of the open subexpression. */
2052	/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2053	if (ops_node >= 0)
2054	{
2055	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2056	candidates);
2057	if (BE (err != REG_NOERROR, 0))
2058	return err;
2059	}
2060
2061	/* Check the limitation of the close subexpression. */
2062	if (cls_node >= 0)
2063	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2064	{
2065	int node = dest_nodes->elems[node_idx];
2066	if (!re_node_set_contains (dfa->inveclosures + node,
2067	cls_node)
2068	&& !re_node_set_contains (dfa->eclosures + node,
2069	cls_node))
2070	{
2071	/* It is against this limitation.
2072	Remove it form the current sifted state. */
2073	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2074	candidates);
2075	if (BE (err != REG_NOERROR, 0))
2076	return err;
2077	--node_idx;
2078	}
2079	}
2080	}
2081	else /* (ent->subexp_to != str_idx) */
2082	{
2083	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2084	{
2085	int node = dest_nodes->elems[node_idx];
2086	re_token_type_t type = dfa->nodes[node].type;
2087	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2088	{
2089	if (subexp_idx != dfa->nodes[node].opr.idx)
2090	continue;
2091	/* It is against this limitation.
2092	Remove it form the current sifted state. */
2093	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2094	candidates);
2095	if (BE (err != REG_NOERROR, 0))
2096	return err;
2097	}
2098	}
2099	}
2100	}
2101	return REG_NOERROR;
2102	}
2103
2104	static reg_errcode_t
2105	sift_states_bkref (mctx, sctx, str_idx, candidates)
2106	re_match_context_t *mctx;
2107	re_sift_context_t *sctx;
2108	int str_idx;
2109	const re_node_set *candidates;
2110	{
2111	re_dfa_t *const dfa = mctx->dfa;
2112	reg_errcode_t err;
2113	int node_idx, node;
2114	re_sift_context_t local_sctx;
2115	int first_idx = search_cur_bkref_entry (mctx, str_idx);
2116
2117	if (first_idx == -1)
2118	return REG_NOERROR;
2119
2120	local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2121
2122	for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2123	{
2124	int enabled_idx;
2125	re_token_type_t type;
2126	struct re_backref_cache_entry *entry;
2127	node = candidates->elems[node_idx];
2128	type = dfa->nodes[node].type;
2129	/* Avoid infinite loop for the REs like "()\1+". */
2130	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2131	continue;
2132	if (type != OP_BACK_REF)
2133	continue;
2134
2135	entry = mctx->bkref_ents + first_idx;
2136	enabled_idx = first_idx;
2137	do
2138	{
2139	int subexp_len, to_idx, dst_node;
2140	re_dfastate_t *cur_state;
2141
2142	if (entry->node != node)
2143	continue;
2144	subexp_len = entry->subexp_to - entry->subexp_from;
2145	to_idx = str_idx + subexp_len;
2146	dst_node = (subexp_len ? dfa->nexts[node]
2147	: dfa->edests[node].elems[0]);
2148
2149	if (to_idx > sctx->last_str_idx
2150	\|\| sctx->sifted_states[to_idx] == NULL
2151	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2152	\|\| check_dst_limits (mctx, &sctx->limits, node,
2153	str_idx, dst_node, to_idx))
2154	continue;
2155
2156	if (local_sctx.sifted_states == NULL)
2157	{
2158	local_sctx = *sctx;
2159	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2160	if (BE (err != REG_NOERROR, 0))
2161	goto free_return;
2162	}
2163	local_sctx.last_node = node;
2164	local_sctx.last_str_idx = str_idx;
2165	err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2166	if (BE (err < 0, 0))
2167	{
2168	err = REG_ESPACE;
2169	goto free_return;
2170	}
2171	cur_state = local_sctx.sifted_states[str_idx];
2172	err = sift_states_backward (mctx, &local_sctx);
2173	if (BE (err != REG_NOERROR, 0))
2174	goto free_return;
2175	if (sctx->limited_states != NULL)
2176	{
2177	err = merge_state_array (dfa, sctx->limited_states,
2178	local_sctx.sifted_states,
2179	str_idx + 1);
2180	if (BE (err != REG_NOERROR, 0))
2181	goto free_return;
2182	}
2183	local_sctx.sifted_states[str_idx] = cur_state;
2184	re_node_set_remove (&local_sctx.limits, enabled_idx);
2185
2186	/* mctx->bkref_ents may have changed, reload the pointer. */
2187	entry = mctx->bkref_ents + enabled_idx;
2188	}
2189	while (enabled_idx++, entry++->more);
2190	}
2191	err = REG_NOERROR;
2192	free_return:
2193	if (local_sctx.sifted_states != NULL)
2194	{
2195	re_node_set_free (&local_sctx.limits);
2196	}
2197
2198	return err;
2199	}
2200
2201
2202	#ifdef RE_ENABLE_I18N
2203	static int
2204	sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2205	const re_match_context_t *mctx;
2206	re_sift_context_t *sctx;
2207	int node_idx, str_idx, max_str_idx;
2208	{
2209	re_dfa_t *const dfa = mctx->dfa;
2210	int naccepted;
2211	/* Check the node can accept `multi byte'. */
2212	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2213	if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2214	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2215	dfa->nexts[node_idx]))
2216	/* The node can't accept the `multi byte', or the
2217	destination was already thrown away, then the node
2218	could't accept the current input `multi byte'. */
2219	naccepted = 0;
2220	/* Otherwise, it is sure that the node could accept
2221	`naccepted' bytes input. */
2222	return naccepted;
2223	}
2224	#endif /* RE_ENABLE_I18N */
2225
2226
2227
2228	/* Functions for state transition. */
2229
2230	/* Return the next state to which the current state STATE will transit by
2231	accepting the current input byte, and update STATE_LOG if necessary.
2232	If STATE can accept a multibyte char/collating element/back reference
2233	update the destination of STATE_LOG. */
2234
2235	static re_dfastate_t *
2236	transit_state (err, mctx, state)
2237	reg_errcode_t *err;
2238	re_match_context_t *mctx;
2239	re_dfastate_t *state;
2240	{
2241	re_dfastate_t **trtable;
2242	unsigned char ch;
2243
2244	#ifdef RE_ENABLE_I18N
2245	/* If the current state can accept multibyte. */
2246	if (BE (state->accept_mb, 0))
2247	{
2248	*err = transit_state_mb (mctx, state);
2249	if (BE (*err != REG_NOERROR, 0))
2250	return NULL;
2251	}
2252	#endif /* RE_ENABLE_I18N */
2253
2254	/* Then decide the next state with the single byte. */
2255	#if 0
2256	if (0)
2257	/* don't use transition table */
2258	return transit_state_sb (err, mctx, state);
2259	#endif
2260
2261	/* Use transition table */
2262	ch = re_string_fetch_byte (&mctx->input);
2263	for (;;)
2264	{
2265	trtable = state->trtable;
2266	if (BE (trtable != NULL, 1))
2267	return trtable[ch];
2268
2269	trtable = state->word_trtable;
2270	if (BE (trtable != NULL, 1))
2271	{
2272	unsigned int context;
2273	context
2274	= re_string_context_at (&mctx->input,
2275	re_string_cur_idx (&mctx->input) - 1,
2276	mctx->eflags);
2277	if (IS_WORD_CONTEXT (context))
2278	return trtable[ch + SBC_MAX];
2279	else
2280	return trtable[ch];
2281	}
2282
2283	if (!build_trtable (mctx->dfa, state))
2284	{
2285	*err = REG_ESPACE;
2286	return NULL;
2287	}
2288
2289	/* Retry, we now have a transition table. */
2290	}
2291	}
2292
2293	/* Update the state_log if we need */
2294	re_dfastate_t *
2295	merge_state_with_log (err, mctx, next_state)
2296	reg_errcode_t *err;
2297	re_match_context_t *mctx;
2298	re_dfastate_t *next_state;
2299	{
2300	re_dfa_t *const dfa = mctx->dfa;
2301	int cur_idx = re_string_cur_idx (&mctx->input);
2302
2303	if (cur_idx > mctx->state_log_top)
2304	{
2305	mctx->state_log[cur_idx] = next_state;
2306	mctx->state_log_top = cur_idx;
2307	}
2308	else if (mctx->state_log[cur_idx] == 0)
2309	{
2310	mctx->state_log[cur_idx] = next_state;
2311	}
2312	else
2313	{
2314	re_dfastate_t *pstate;
2315	unsigned int context;
2316	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2317	/* If (state_log[cur_idx] != 0), it implies that cur_idx is
2318	the destination of a multibyte char/collating element/
2319	back reference. Then the next state is the union set of
2320	these destinations and the results of the transition table. */
2321	pstate = mctx->state_log[cur_idx];
2322	log_nodes = pstate->entrance_nodes;
2323	if (next_state != NULL)
2324	{
2325	table_nodes = next_state->entrance_nodes;
2326	*err = re_node_set_init_union (&next_nodes, table_nodes,
2327	log_nodes);
2328	if (BE (*err != REG_NOERROR, 0))
2329	return NULL;
2330	}
2331	else
2332	next_nodes = *log_nodes;
2333	/* Note: We already add the nodes of the initial state,
2334	then we don't need to add them here. */
2335
2336	context = re_string_context_at (&mctx->input,
2337	re_string_cur_idx (&mctx->input) - 1,
2338	mctx->eflags);
2339	next_state = mctx->state_log[cur_idx]
2340	= re_acquire_state_context (err, dfa, &next_nodes, context);
2341	/* We don't need to check errors here, since the return value of
2342	this function is next_state and ERR is already set. */
2343
2344	if (table_nodes != NULL)
2345	re_node_set_free (&next_nodes);
2346	}
2347
2348	if (BE (dfa->nbackref, 0) && next_state != NULL)
2349	{
2350	/* Check OP_OPEN_SUBEXP in the current state in case that we use them
2351	later. We must check them here, since the back references in the
2352	next state might use them. */
2353	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2354	cur_idx);
2355	if (BE (*err != REG_NOERROR, 0))
2356	return NULL;
2357
2358	/* If the next state has back references. */
2359	if (next_state->has_backref)
2360	{
2361	*err = transit_state_bkref (mctx, &next_state->nodes);
2362	if (BE (*err != REG_NOERROR, 0))
2363	return NULL;
2364	next_state = mctx->state_log[cur_idx];
2365	}
2366	}
2367
2368	return next_state;
2369	}
2370
2371	/* Skip bytes in the input that correspond to part of a
2372	multi-byte match, then look in the log for a state
2373	from which to restart matching. */
2374	re_dfastate_t *
2375	find_recover_state (err, mctx)
2376	reg_errcode_t *err;
2377	re_match_context_t *mctx;
2378	{
2379	re_dfastate_t *cur_state = NULL;
2380	do
2381	{
2382	int max = mctx->state_log_top;
2383	int cur_str_idx = re_string_cur_idx (&mctx->input);
2384
2385	do
2386	{
2387	if (++cur_str_idx > max)
2388	return NULL;
2389	re_string_skip_bytes (&mctx->input, 1);
2390	}
2391	while (mctx->state_log[cur_str_idx] == NULL);
2392
2393	cur_state = merge_state_with_log (err, mctx, NULL);
2394	}
2395	while (err == REG_NOERROR && cur_state == NULL);
2396	return cur_state;
2397	}
2398
2399	/* Helper functions for transit_state. */
2400
2401	/* From the node set CUR_NODES, pick up the nodes whose types are
2402	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2403	expression. And register them to use them later for evaluating the
2404	correspoding back references. */
2405
2406	static reg_errcode_t
2407	check_subexp_matching_top (mctx, cur_nodes, str_idx)
2408	re_match_context_t *mctx;
2409	re_node_set *cur_nodes;
2410	int str_idx;
2411	{
2412	re_dfa_t *const dfa = mctx->dfa;
2413	int node_idx;
2414	reg_errcode_t err;
2415
2416	/* TODO: This isn't efficient.
2417	Because there might be more than one nodes whose types are
2418	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2419	nodes.
2420	E.g. RE: (a){2} */
2421	for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2422	{
2423	int node = cur_nodes->elems[node_idx];
2424	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2425	&& dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2426	&& dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2427	{
2428	err = match_ctx_add_subtop (mctx, node, str_idx);
2429	if (BE (err != REG_NOERROR, 0))
2430	return err;
2431	}
2432	}
2433	return REG_NOERROR;
2434	}
2435
2436	#if 0
2437	/* Return the next state to which the current state STATE will transit by
2438	accepting the current input byte. */
2439
2440	static re_dfastate_t *
2441	transit_state_sb (err, mctx, state)
2442	reg_errcode_t *err;
2443	re_match_context_t *mctx;
2444	re_dfastate_t *state;
2445	{
2446	re_dfa_t *const dfa = mctx->dfa;
2447	re_node_set next_nodes;
2448	re_dfastate_t *next_state;
2449	int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2450	unsigned int context;
2451
2452	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2453	if (BE (*err != REG_NOERROR, 0))
2454	return NULL;
2455	for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2456	{
2457	int cur_node = state->nodes.elems[node_cnt];
2458	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2459	{
2460	*err = re_node_set_merge (&next_nodes,
2461	dfa->eclosures + dfa->nexts[cur_node]);
2462	if (BE (*err != REG_NOERROR, 0))
2463	{
2464	re_node_set_free (&next_nodes);
2465	return NULL;
2466	}
2467	}
2468	}
2469	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2470	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2471	/* We don't need to check errors here, since the return value of
2472	this function is next_state and ERR is already set. */
2473
2474	re_node_set_free (&next_nodes);
2475	re_string_skip_bytes (&mctx->input, 1);
2476	return next_state;
2477	}
2478	#endif
2479
2480	#ifdef RE_ENABLE_I18N
2481	static reg_errcode_t
2482	transit_state_mb (mctx, pstate)
2483	re_match_context_t *mctx;
2484	re_dfastate_t *pstate;
2485	{
2486	re_dfa_t *const dfa = mctx->dfa;
2487	reg_errcode_t err;
2488	int i;
2489
2490	for (i = 0; i < pstate->nodes.nelem; ++i)
2491	{
2492	re_node_set dest_nodes, *new_nodes;
2493	int cur_node_idx = pstate->nodes.elems[i];
2494	int naccepted, dest_idx;
2495	unsigned int context;
2496	re_dfastate_t *dest_state;
2497
2498	if (!dfa->nodes[cur_node_idx].accept_mb)
2499	continue;
2500
2501	if (dfa->nodes[cur_node_idx].constraint)
2502	{
2503	context = re_string_context_at (&mctx->input,
2504	re_string_cur_idx (&mctx->input),
2505	mctx->eflags);
2506	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2507	context))
2508	continue;
2509	}
2510
2511	/* How many bytes the node can accept? */
2512	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2513	re_string_cur_idx (&mctx->input));
2514	if (naccepted == 0)
2515	continue;
2516
2517	/* The node can accepts `naccepted' bytes. */
2518	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2519	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2520	: mctx->max_mb_elem_len);
2521	err = clean_state_log_if_needed (mctx, dest_idx);
2522	if (BE (err != REG_NOERROR, 0))
2523	return err;
2524	#ifdef DEBUG
2525	assert (dfa->nexts[cur_node_idx] != -1);
2526	#endif
2527	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2528
2529	dest_state = mctx->state_log[dest_idx];
2530	if (dest_state == NULL)
2531	dest_nodes = *new_nodes;
2532	else
2533	{
2534	err = re_node_set_init_union (&dest_nodes,
2535	dest_state->entrance_nodes, new_nodes);
2536	if (BE (err != REG_NOERROR, 0))
2537	return err;
2538	}
2539	context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2540	mctx->state_log[dest_idx]
2541	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2542	if (dest_state != NULL)
2543	re_node_set_free (&dest_nodes);
2544	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2545	return err;
2546	}
2547	return REG_NOERROR;
2548	}
2549	#endif /* RE_ENABLE_I18N */
2550
2551	static reg_errcode_t
2552	transit_state_bkref (mctx, nodes)
2553	re_match_context_t *mctx;
2554	const re_node_set *nodes;
2555	{
2556	re_dfa_t *const dfa = mctx->dfa;
2557	reg_errcode_t err;
2558	int i;
2559	int cur_str_idx = re_string_cur_idx (&mctx->input);
2560
2561	for (i = 0; i < nodes->nelem; ++i)
2562	{
2563	int dest_str_idx, prev_nelem, bkc_idx;
2564	int node_idx = nodes->elems[i];
2565	unsigned int context;
2566	const re_token_t *node = dfa->nodes + node_idx;
2567	re_node_set *new_dest_nodes;
2568
2569	/* Check whether `node' is a backreference or not. */
2570	if (node->type != OP_BACK_REF)
2571	continue;
2572
2573	if (node->constraint)
2574	{
2575	context = re_string_context_at (&mctx->input, cur_str_idx,
2576	mctx->eflags);
2577	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2578	continue;
2579	}
2580
2581	/* `node' is a backreference.
2582	Check the substring which the substring matched. */
2583	bkc_idx = mctx->nbkref_ents;
2584	err = get_subexp (mctx, node_idx, cur_str_idx);
2585	if (BE (err != REG_NOERROR, 0))
2586	goto free_return;
2587
2588	/* And add the epsilon closures (which is `new_dest_nodes') of
2589	the backreference to appropriate state_log. */
2590	#ifdef DEBUG
2591	assert (dfa->nexts[node_idx] != -1);
2592	#endif
2593	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2594	{
2595	int subexp_len;
2596	re_dfastate_t *dest_state;
2597	struct re_backref_cache_entry *bkref_ent;
2598	bkref_ent = mctx->bkref_ents + bkc_idx;
2599	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2600	continue;
2601	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2602	new_dest_nodes = (subexp_len == 0
2603	? dfa->eclosures + dfa->edests[node_idx].elems[0]
2604	: dfa->eclosures + dfa->nexts[node_idx]);
2605	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2606	- bkref_ent->subexp_from);
2607	context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2608	mctx->eflags);
2609	dest_state = mctx->state_log[dest_str_idx];
2610	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2611	: mctx->state_log[cur_str_idx]->nodes.nelem);
2612	/* Add `new_dest_node' to state_log. */
2613	if (dest_state == NULL)
2614	{
2615	mctx->state_log[dest_str_idx]
2616	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2617	context);
2618	if (BE (mctx->state_log[dest_str_idx] == NULL
2619	&& err != REG_NOERROR, 0))
2620	goto free_return;
2621	}
2622	else
2623	{
2624	re_node_set dest_nodes;
2625	err = re_node_set_init_union (&dest_nodes,
2626	dest_state->entrance_nodes,
2627	new_dest_nodes);
2628	if (BE (err != REG_NOERROR, 0))
2629	{
2630	re_node_set_free (&dest_nodes);
2631	goto free_return;
2632	}
2633	mctx->state_log[dest_str_idx]
2634	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2635	re_node_set_free (&dest_nodes);
2636	if (BE (mctx->state_log[dest_str_idx] == NULL
2637	&& err != REG_NOERROR, 0))
2638	goto free_return;
2639	}
2640	/* We need to check recursively if the backreference can epsilon
2641	transit. */
2642	if (subexp_len == 0
2643	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2644	{
2645	err = check_subexp_matching_top (mctx, new_dest_nodes,
2646	cur_str_idx);
2647	if (BE (err != REG_NOERROR, 0))
2648	goto free_return;
2649	err = transit_state_bkref (mctx, new_dest_nodes);
2650	if (BE (err != REG_NOERROR, 0))
2651	goto free_return;
2652	}
2653	}
2654	}
2655	err = REG_NOERROR;
2656	free_return:
2657	return err;
2658	}
2659
2660	/* Enumerate all the candidates which the backreference BKREF_NODE can match
2661	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2662	Note that we might collect inappropriate candidates here.
2663	However, the cost of checking them strictly here is too high, then we
2664	delay these checking for prune_impossible_nodes(). */
2665
2666	static reg_errcode_t
2667	get_subexp (mctx, bkref_node, bkref_str_idx)
2668	re_match_context_t *mctx;
2669	int bkref_node, bkref_str_idx;
2670	{
2671	re_dfa_t *const dfa = mctx->dfa;
2672	int subexp_num, sub_top_idx;
2673	const char buf = (const char ) re_string_get_buffer (&mctx->input);
2674	/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2675	int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2676	if (cache_idx != -1)
2677	{
2678	const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2679	do
2680	if (entry->node == bkref_node)
2681	return REG_NOERROR; /* We already checked it. */
2682	while (entry++->more);
2683	}
2684
2685	subexp_num = dfa->nodes[bkref_node].opr.idx;
2686
2687	/* For each sub expression */
2688	for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2689	{
2690	reg_errcode_t err;
2691	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2692	re_sub_match_last_t *sub_last;
2693	int sub_last_idx, sl_str, bkref_str_off;
2694
2695	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2696	continue; /* It isn't related. */
2697
2698	sl_str = sub_top->str_idx;
2699	bkref_str_off = bkref_str_idx;
2700	/* At first, check the last node of sub expressions we already
2701	evaluated. */
2702	for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2703	{
2704	int sl_str_diff;
2705	sub_last = sub_top->lasts[sub_last_idx];
2706	sl_str_diff = sub_last->str_idx - sl_str;
2707	/* The matched string by the sub expression match with the substring
2708	at the back reference? */
2709	if (sl_str_diff > 0)
2710	{
2711	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2712	{
2713	/* Not enough chars for a successful match. */
2714	if (bkref_str_off + sl_str_diff > mctx->input.len)
2715	break;
2716
2717	err = clean_state_log_if_needed (mctx,
2718	bkref_str_off
2719	+ sl_str_diff);
2720	if (BE (err != REG_NOERROR, 0))
2721	return err;
2722	buf = (const char *) re_string_get_buffer (&mctx->input);
2723	}
2724	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2725	break; /* We don't need to search this sub expression any more. */
2726	}
2727	bkref_str_off += sl_str_diff;
2728	sl_str += sl_str_diff;
2729	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2730	bkref_str_idx);
2731
2732	/* Reload buf, since the preceding call might have reallocated
2733	the buffer. */
2734	buf = (const char *) re_string_get_buffer (&mctx->input);
2735
2736	if (err == REG_NOMATCH)
2737	continue;
2738	if (BE (err != REG_NOERROR, 0))
2739	return err;
2740	}
2741
2742	if (sub_last_idx < sub_top->nlasts)
2743	continue;
2744	if (sub_last_idx > 0)
2745	++sl_str;
2746	/* Then, search for the other last nodes of the sub expression. */
2747	for (; sl_str <= bkref_str_idx; ++sl_str)
2748	{
2749	int cls_node, sl_str_off;
2750	const re_node_set *nodes;
2751	sl_str_off = sl_str - sub_top->str_idx;
2752	/* The matched string by the sub expression match with the substring
2753	at the back reference? */
2754	if (sl_str_off > 0)
2755	{
2756	if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2757	{
2758	/* If we are at the end of the input, we cannot match. */
2759	if (bkref_str_off >= mctx->input.len)
2760	break;
2761
2762	err = extend_buffers (mctx);
2763	if (BE (err != REG_NOERROR, 0))
2764	return err;
2765
2766	buf = (const char *) re_string_get_buffer (&mctx->input);
2767	}
2768	if (buf [bkref_str_off++] != buf[sl_str - 1])
2769	break; /* We don't need to search this sub expression
2770	any more. */
2771	}
2772	if (mctx->state_log[sl_str] == NULL)
2773	continue;
2774	/* Does this state have a ')' of the sub expression? */
2775	nodes = &mctx->state_log[sl_str]->nodes;
2776	cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2777	if (cls_node == -1)
2778	continue; /* No. */
2779	if (sub_top->path == NULL)
2780	{
2781	sub_top->path = calloc (sizeof (state_array_t),
2782	sl_str - sub_top->str_idx + 1);
2783	if (sub_top->path == NULL)
2784	return REG_ESPACE;
2785	}
2786	/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2787	in the current context? */
2788	err = check_arrival (mctx, sub_top->path, sub_top->node,
2789	sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2790	if (err == REG_NOMATCH)
2791	continue;
2792	if (BE (err != REG_NOERROR, 0))
2793	return err;
2794	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2795	if (BE (sub_last == NULL, 0))
2796	return REG_ESPACE;
2797	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2798	bkref_str_idx);
2799	if (err == REG_NOMATCH)
2800	continue;
2801	}
2802	}
2803	return REG_NOERROR;
2804	}
2805
2806	/* Helper functions for get_subexp(). */
2807
2808	/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2809	If it can arrive, register the sub expression expressed with SUB_TOP
2810	and SUB_LAST. */
2811
2812	static reg_errcode_t
2813	get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2814	re_match_context_t *mctx;
2815	const re_sub_match_top_t *sub_top;
2816	re_sub_match_last_t *sub_last;
2817	int bkref_node, bkref_str;
2818	{
2819	reg_errcode_t err;
2820	int to_idx;
2821	/* Can the subexpression arrive the back reference? */
2822	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2823	sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2824	if (err != REG_NOERROR)
2825	return err;
2826	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2827	sub_last->str_idx);
2828	if (BE (err != REG_NOERROR, 0))
2829	return err;
2830	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2831	return clean_state_log_if_needed (mctx, to_idx);
2832	}
2833
2834	/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2835	Search '(' if FL_OPEN, or search ')' otherwise.
2836	TODO: This function isn't efficient...
2837	Because there might be more than one nodes whose types are
2838	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2839	nodes.
2840	E.g. RE: (a){2} */
2841
2842	static int
2843	find_subexp_node (dfa, nodes, subexp_idx, type)
2844	const re_dfa_t *dfa;
2845	const re_node_set *nodes;
2846	int subexp_idx, type;
2847	{
2848	int cls_idx;
2849	for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2850	{
2851	int cls_node = nodes->elems[cls_idx];
2852	const re_token_t *node = dfa->nodes + cls_node;
2853	if (node->type == type
2854	&& node->opr.idx == subexp_idx)
2855	return cls_node;
2856	}
2857	return -1;
2858	}
2859
2860	/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2861	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2862	heavily reused.
2863	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2864
2865	static reg_errcode_t
2866	check_arrival (mctx, path, top_node, top_str, last_node, last_str,
2867	type)
2868	re_match_context_t *mctx;
2869	state_array_t *path;
2870	int top_node, top_str, last_node, last_str, type;
2871	{
2872	re_dfa_t *const dfa = mctx->dfa;
2873	reg_errcode_t err;
2874	int subexp_num, backup_cur_idx, str_idx, null_cnt;
2875	re_dfastate_t *cur_state = NULL;
2876	re_node_set *cur_nodes, next_nodes;
2877	re_dfastate_t **backup_state_log;
2878	unsigned int context;
2879
2880	subexp_num = dfa->nodes[top_node].opr.idx;
2881	/* Extend the buffer if we need. */
2882	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2883	{
2884	re_dfastate_t **new_array;
2885	int old_alloc = path->alloc;
2886	path->alloc += last_str + mctx->max_mb_elem_len + 1;
2887	new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2888	if (new_array == NULL)
2889	{
2890	path->alloc = old_alloc;
2891	return REG_ESPACE;
2892	}
2893	path->array = new_array;
2894	memset (new_array + old_alloc, '\0',
2895	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2896	}
2897
2898	str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2899
2900	/* Temporary modify MCTX. */
2901	backup_state_log = mctx->state_log;
2902	backup_cur_idx = mctx->input.cur_idx;
2903	mctx->state_log = path->array;
2904	mctx->input.cur_idx = str_idx;
2905
2906	/* Setup initial node set. */
2907	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2908	if (str_idx == top_str)
2909	{
2910	err = re_node_set_init_1 (&next_nodes, top_node);
2911	if (BE (err != REG_NOERROR, 0))
2912	return err;
2913	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2914	if (BE (err != REG_NOERROR, 0))
2915	{
2916	re_node_set_free (&next_nodes);
2917	return err;
2918	}
2919	}
2920	else
2921	{
2922	cur_state = mctx->state_log[str_idx];
2923	if (cur_state && cur_state->has_backref)
2924	{
2925	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2926	if (BE ( err != REG_NOERROR, 0))
2927	return err;
2928	}
2929	else
2930	re_node_set_init_empty (&next_nodes);
2931	}
2932	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2933	{
2934	if (next_nodes.nelem)
2935	{
2936	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2937	subexp_num, type);
2938	if (BE ( err != REG_NOERROR, 0))
2939	{
2940	re_node_set_free (&next_nodes);
2941	return err;
2942	}
2943	}
2944	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2945	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2946	{
2947	re_node_set_free (&next_nodes);
2948	return err;
2949	}
2950	mctx->state_log[str_idx] = cur_state;
2951	}
2952
2953	for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2954	{
2955	re_node_set_empty (&next_nodes);
2956	if (mctx->state_log[str_idx + 1])
2957	{
2958	err = re_node_set_merge (&next_nodes,
2959	&mctx->state_log[str_idx + 1]->nodes);
2960	if (BE (err != REG_NOERROR, 0))
2961	{
2962	re_node_set_free (&next_nodes);
2963	return err;
2964	}
2965	}
2966	if (cur_state)
2967	{
2968	err = check_arrival_add_next_nodes (mctx, str_idx,
2969	&cur_state->non_eps_nodes, &next_nodes);
2970	if (BE (err != REG_NOERROR, 0))
2971	{
2972	re_node_set_free (&next_nodes);
2973	return err;
2974	}
2975	}
2976	++str_idx;
2977	if (next_nodes.nelem)
2978	{
2979	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2980	if (BE (err != REG_NOERROR, 0))
2981	{
2982	re_node_set_free (&next_nodes);
2983	return err;
2984	}
2985	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2986	subexp_num, type);
2987	if (BE ( err != REG_NOERROR, 0))
2988	{
2989	re_node_set_free (&next_nodes);
2990	return err;
2991	}
2992	}
2993	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2994	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2995	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2996	{
2997	re_node_set_free (&next_nodes);
2998	return err;
2999	}
3000	mctx->state_log[str_idx] = cur_state;
3001	null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3002	}
3003	re_node_set_free (&next_nodes);
3004	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3005	: &mctx->state_log[last_str]->nodes);
3006	path->next_idx = str_idx;
3007
3008	/* Fix MCTX. */
3009	mctx->state_log = backup_state_log;
3010	mctx->input.cur_idx = backup_cur_idx;
3011
3012	/* Then check the current node set has the node LAST_NODE. */
3013	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3014	return REG_NOERROR;
3015
3016	return REG_NOMATCH;
3017	}
3018
3019	/* Helper functions for check_arrival. */
3020
3021	/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3022	to NEXT_NODES.
3023	TODO: This function is similar to the functions transit_state*(),
3024	however this function has many additional works.
3025	Can't we unify them? */
3026
3027	static reg_errcode_t
3028	check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
3029	re_match_context_t *mctx;
3030	int str_idx;
3031	re_node_set cur_nodes, next_nodes;
3032	{
3033	re_dfa_t *const dfa = mctx->dfa;
3034	int result;
3035	int cur_idx;
3036	reg_errcode_t err;
3037	re_node_set union_set;
3038	re_node_set_init_empty (&union_set);
3039	for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3040	{
3041	int naccepted = 0;
3042	int cur_node = cur_nodes->elems[cur_idx];
3043	#ifdef DEBUG
3044	re_token_type_t type = dfa->nodes[cur_node].type;
3045	assert (!IS_EPSILON_NODE (type));
3046	#endif
3047	#ifdef RE_ENABLE_I18N
3048	/* If the node may accept `multi byte'. */
3049	if (dfa->nodes[cur_node].accept_mb)
3050	{
3051	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3052	str_idx);
3053	if (naccepted > 1)
3054	{
3055	re_dfastate_t *dest_state;
3056	int next_node = dfa->nexts[cur_node];
3057	int next_idx = str_idx + naccepted;
3058	dest_state = mctx->state_log[next_idx];
3059	re_node_set_empty (&union_set);
3060	if (dest_state)
3061	{
3062	err = re_node_set_merge (&union_set, &dest_state->nodes);
3063	if (BE (err != REG_NOERROR, 0))
3064	{
3065	re_node_set_free (&union_set);
3066	return err;
3067	}
3068	}
3069	result = re_node_set_insert (&union_set, next_node);
3070	if (BE (result < 0, 0))
3071	{
3072	re_node_set_free (&union_set);
3073	return REG_ESPACE;
3074	}
3075	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3076	&union_set);
3077	if (BE (mctx->state_log[next_idx] == NULL
3078	&& err != REG_NOERROR, 0))
3079	{
3080	re_node_set_free (&union_set);
3081	return err;
3082	}
3083	}
3084	}
3085	#endif /* RE_ENABLE_I18N */
3086	if (naccepted
3087	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3088	{
3089	result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3090	if (BE (result < 0, 0))
3091	{
3092	re_node_set_free (&union_set);
3093	return REG_ESPACE;
3094	}
3095	}
3096	}
3097	re_node_set_free (&union_set);
3098	return REG_NOERROR;
3099	}
3100
3101	/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3102	CUR_NODES, however exclude the nodes which are:
3103	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3104	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3105	*/
3106
3107	static reg_errcode_t
3108	check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
3109	re_dfa_t *dfa;
3110	re_node_set *cur_nodes;
3111	int ex_subexp, type;
3112	{
3113	reg_errcode_t err;
3114	int idx, outside_node;
3115	re_node_set new_nodes;
3116	#ifdef DEBUG
3117	assert (cur_nodes->nelem);
3118	#endif
3119	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3120	if (BE (err != REG_NOERROR, 0))
3121	return err;
3122	/* Create a new node set NEW_NODES with the nodes which are epsilon
3123	closures of the node in CUR_NODES. */
3124
3125	for (idx = 0; idx < cur_nodes->nelem; ++idx)
3126	{
3127	int cur_node = cur_nodes->elems[idx];
3128	re_node_set *eclosure = dfa->eclosures + cur_node;
3129	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3130	if (outside_node == -1)
3131	{
3132	/* There are no problematic nodes, just merge them. */
3133	err = re_node_set_merge (&new_nodes, eclosure);
3134	if (BE (err != REG_NOERROR, 0))
3135	{
3136	re_node_set_free (&new_nodes);
3137	return err;
3138	}
3139	}
3140	else
3141	{
3142	/* There are problematic nodes, re-calculate incrementally. */
3143	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3144	ex_subexp, type);
3145	if (BE (err != REG_NOERROR, 0))
3146	{
3147	re_node_set_free (&new_nodes);
3148	return err;
3149	}
3150	}
3151	}
3152	re_node_set_free (cur_nodes);
3153	*cur_nodes = new_nodes;
3154	return REG_NOERROR;
3155	}
3156
3157	/* Helper function for check_arrival_expand_ecl.
3158	Check incrementally the epsilon closure of TARGET, and if it isn't
3159	problematic append it to DST_NODES. */
3160
3161	static reg_errcode_t
3162	check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3163	re_dfa_t *dfa;
3164	int target, ex_subexp, type;
3165	re_node_set *dst_nodes;
3166	{
3167	int cur_node;
3168	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3169	{
3170	int err;
3171
3172	if (dfa->nodes[cur_node].type == type
3173	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3174	{
3175	if (type == OP_CLOSE_SUBEXP)
3176	{
3177	err = re_node_set_insert (dst_nodes, cur_node);
3178	if (BE (err == -1, 0))
3179	return REG_ESPACE;
3180	}
3181	break;
3182	}
3183	err = re_node_set_insert (dst_nodes, cur_node);
3184	if (BE (err == -1, 0))
3185	return REG_ESPACE;
3186	if (dfa->edests[cur_node].nelem == 0)
3187	break;
3188	if (dfa->edests[cur_node].nelem == 2)
3189	{
3190	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3191	dfa->edests[cur_node].elems[1],
3192	ex_subexp, type);
3193	if (BE (err != REG_NOERROR, 0))
3194	return err;
3195	}
3196	cur_node = dfa->edests[cur_node].elems[0];
3197	}
3198	return REG_NOERROR;
3199	}
3200
3201
3202	/* For all the back references in the current state, calculate the
3203	destination of the back references by the appropriate entry
3204	in MCTX->BKREF_ENTS. */
3205
3206	static reg_errcode_t
3207	expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
3208	type)
3209	re_match_context_t *mctx;
3210	int cur_str, subexp_num, type;
3211	re_node_set *cur_nodes;
3212	{
3213	re_dfa_t *const dfa = mctx->dfa;
3214	reg_errcode_t err;
3215	int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3216	struct re_backref_cache_entry *ent;
3217
3218	if (cache_idx_start == -1)
3219	return REG_NOERROR;
3220
3221	restart:
3222	ent = mctx->bkref_ents + cache_idx_start;
3223	do
3224	{
3225	int to_idx, next_node;
3226
3227	/* Is this entry ENT is appropriate? */
3228	if (!re_node_set_contains (cur_nodes, ent->node))
3229	continue; /* No. */
3230
3231	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3232	/* Calculate the destination of the back reference, and append it
3233	to MCTX->STATE_LOG. */
3234	if (to_idx == cur_str)
3235	{
3236	/* The backreference did epsilon transit, we must re-check all the
3237	node in the current state. */
3238	re_node_set new_dests;
3239	reg_errcode_t err2, err3;
3240	next_node = dfa->edests[ent->node].elems[0];
3241	if (re_node_set_contains (cur_nodes, next_node))
3242	continue;
3243	err = re_node_set_init_1 (&new_dests, next_node);
3244	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3245	err3 = re_node_set_merge (cur_nodes, &new_dests);
3246	re_node_set_free (&new_dests);
3247	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3248	\|\| err3 != REG_NOERROR, 0))
3249	{
3250	err = (err != REG_NOERROR ? err
3251	: (err2 != REG_NOERROR ? err2 : err3));
3252	return err;
3253	}
3254	/* TODO: It is still inefficient... */
3255	goto restart;
3256	}
3257	else
3258	{
3259	re_node_set union_set;
3260	next_node = dfa->nexts[ent->node];
3261	if (mctx->state_log[to_idx])
3262	{
3263	int ret;
3264	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3265	next_node))
3266	continue;
3267	err = re_node_set_init_copy (&union_set,
3268	&mctx->state_log[to_idx]->nodes);
3269	ret = re_node_set_insert (&union_set, next_node);
3270	if (BE (err != REG_NOERROR \|\| ret < 0, 0))
3271	{
3272	re_node_set_free (&union_set);
3273	err = err != REG_NOERROR ? err : REG_ESPACE;
3274	return err;
3275	}
3276	}
3277	else
3278	{
3279	err = re_node_set_init_1 (&union_set, next_node);
3280	if (BE (err != REG_NOERROR, 0))
3281	return err;
3282	}
3283	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3284	re_node_set_free (&union_set);
3285	if (BE (mctx->state_log[to_idx] == NULL
3286	&& err != REG_NOERROR, 0))
3287	return err;
3288	}
3289	}
3290	while (ent++->more);
3291	return REG_NOERROR;
3292	}
3293
3294	/* Build transition table for the state.
3295	Return 1 if succeeded, otherwise return NULL. */
3296
3297	static int
3298	build_trtable (dfa, state)
3299	re_dfa_t *dfa;
3300	re_dfastate_t *state;
3301	{
3302	reg_errcode_t err;
3303	int i, j, ch, need_word_trtable = 0;
3304	unsigned int elem, mask;
3305	int dests_node_malloced = 0, dest_states_malloced = 0;
3306	int ndests; /* Number of the destination states from `state'. */
3307	re_dfastate_t **trtable;
3308	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3309	re_node_set follows, *dests_node;
3310	bitset *dests_ch;
3311	bitset acceptable;
3312
3313	/* We build DFA states which corresponds to the destination nodes
3314	from `state'. `dests_node[i]' represents the nodes which i-th
3315	destination state contains, and `dests_ch[i]' represents the
3316	characters which i-th destination state accepts. */
3317	#ifdef _LIBC
3318	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3319	dests_node = (re_node_set *)
3320	alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3321	else
3322	#endif
3323	{
3324	dests_node = (re_node_set *)
3325	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3326	if (BE (dests_node == NULL, 0))
3327	return 0;
3328	dests_node_malloced = 1;
3329	}
3330	dests_ch = (bitset *) (dests_node + SBC_MAX);
3331
3332	/* Initialize transiton table. */
3333	state->word_trtable = state->trtable = NULL;
3334
3335	/* At first, group all nodes belonging to `state' into several
3336	destinations. */
3337	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3338	if (BE (ndests <= 0, 0))
3339	{
3340	if (dests_node_malloced)
3341	free (dests_node);
3342	/* Return 0 in case of an error, 1 otherwise. */
3343	if (ndests == 0)
3344	{
3345	state->trtable = (re_dfastate_t **)
3346	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3347	return 1;
3348	}
3349	return 0;
3350	}
3351
3352	err = re_node_set_alloc (&follows, ndests + 1);
3353	if (BE (err != REG_NOERROR, 0))
3354	goto out_free;
3355
3356	#ifdef _LIBC
3357	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3358	+ ndests * 3 * sizeof (re_dfastate_t *)))
3359	dest_states = (re_dfastate_t **)
3360	alloca (ndests * 3 * sizeof (re_dfastate_t *));
3361	else
3362	#endif
3363	{
3364	dest_states = (re_dfastate_t **)
3365	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3366	if (BE (dest_states == NULL, 0))
3367	{
3368	out_free:
3369	if (dest_states_malloced)
3370	free (dest_states);
3371	re_node_set_free (&follows);
3372	for (i = 0; i < ndests; ++i)
3373	re_node_set_free (dests_node + i);
3374	if (dests_node_malloced)
3375	free (dests_node);
3376	return 0;
3377	}
3378	dest_states_malloced = 1;
3379	}
3380	dest_states_word = dest_states + ndests;
3381	dest_states_nl = dest_states_word + ndests;
3382	bitset_empty (acceptable);
3383
3384	/* Then build the states for all destinations. */
3385	for (i = 0; i < ndests; ++i)
3386	{
3387	int next_node;
3388	re_node_set_empty (&follows);
3389	/* Merge the follows of this destination states. */
3390	for (j = 0; j < dests_node[i].nelem; ++j)
3391	{
3392	next_node = dfa->nexts[dests_node[i].elems[j]];
3393	if (next_node != -1)
3394	{
3395	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3396	if (BE (err != REG_NOERROR, 0))
3397	goto out_free;
3398	}
3399	}
3400	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3401	if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3402	goto out_free;
3403	/* If the new state has context constraint,
3404	build appropriate states for these contexts. */
3405	if (dest_states[i]->has_constraint)
3406	{
3407	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3408	CONTEXT_WORD);
3409	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3410	goto out_free;
3411
3412	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3413	need_word_trtable = 1;
3414
3415	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3416	CONTEXT_NEWLINE);
3417	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3418	goto out_free;
3419	}
3420	else
3421	{
3422	dest_states_word[i] = dest_states[i];
3423	dest_states_nl[i] = dest_states[i];
3424	}
3425	bitset_merge (acceptable, dests_ch[i]);
3426	}
3427
3428	if (!BE (need_word_trtable, 0))
3429	{
3430	/* We don't care about whether the following character is a word
3431	character, or we are in a single-byte character set so we can
3432	discern by looking at the character code: allocate a
3433	256-entry transition table. */
3434	trtable = state->trtable =
3435	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), SBC_MAX);
3436	if (BE (trtable == NULL, 0))
3437	goto out_free;
3438
3439	/* For all characters ch...: */
3440	for (i = 0; i < BITSET_UINTS; ++i)
3441	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3442	elem;
3443	mask <<= 1, elem >>= 1, ++ch)
3444	if (BE (elem & 1, 0))
3445	{
3446	/* There must be exactly one destination which accepts
3447	character ch. See group_nodes_into_DFAstates. */
3448	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3449	;
3450
3451	/* j-th destination accepts the word character ch. */
3452	if (dfa->word_char[i] & mask)
3453	trtable[ch] = dest_states_word[j];
3454	else
3455	trtable[ch] = dest_states[j];
3456	}
3457	}
3458	else
3459	{
3460	/* We care about whether the following character is a word
3461	character, and we are in a multi-byte character set: discern
3462	by looking at the character code: build two 256-entry
3463	transition tables, one starting at trtable[0] and one
3464	starting at trtable[SBC_MAX]. */
3465	trtable = state->word_trtable =
3466	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), 2 * SBC_MAX);
3467	if (BE (trtable == NULL, 0))
3468	goto out_free;
3469
3470	/* For all characters ch...: */
3471	for (i = 0; i < BITSET_UINTS; ++i)
3472	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3473	elem;
3474	mask <<= 1, elem >>= 1, ++ch)
3475	if (BE (elem & 1, 0))
3476	{
3477	/* There must be exactly one destination which accepts
3478	character ch. See group_nodes_into_DFAstates. */
3479	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3480	;
3481
3482	/* j-th destination accepts the word character ch. */
3483	trtable[ch] = dest_states[j];
3484	trtable[ch + SBC_MAX] = dest_states_word[j];
3485	}
3486	}
3487
3488	/* new line */
3489	if (bitset_contain (acceptable, NEWLINE_CHAR))
3490	{
3491	/* The current state accepts newline character. */
3492	for (j = 0; j < ndests; ++j)
3493	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3494	{
3495	/* k-th destination accepts newline character. */
3496	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3497	if (need_word_trtable)
3498	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3499	/* There must be only one destination which accepts
3500	newline. See group_nodes_into_DFAstates. */
3501	break;
3502	}
3503	}
3504
3505	if (dest_states_malloced)
3506	free (dest_states);
3507
3508	re_node_set_free (&follows);
3509	for (i = 0; i < ndests; ++i)
3510	re_node_set_free (dests_node + i);
3511
3512	if (dests_node_malloced)
3513	free (dests_node);
3514
3515	return 1;
3516	}
3517
3518	/* Group all nodes belonging to STATE into several destinations.
3519	Then for all destinations, set the nodes belonging to the destination
3520	to DESTS_NODE[i] and set the characters accepted by the destination
3521	to DEST_CH[i]. This function return the number of destinations. */
3522
3523	static int
3524	group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3525	re_dfa_t *dfa;
3526	const re_dfastate_t *state;
3527	re_node_set *dests_node;
3528	bitset *dests_ch;
3529	{
3530	reg_errcode_t err;
3531	int result;
3532	int i, j, k;
3533	int ndests; /* Number of the destinations from `state'. */
3534	bitset accepts; /* Characters a node can accept. */
3535	const re_node_set *cur_nodes = &state->nodes;
3536	bitset_empty (accepts);
3537	ndests = 0;
3538
3539	/* For all the nodes belonging to `state', */
3540	for (i = 0; i < cur_nodes->nelem; ++i)
3541	{
3542	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3543	re_token_type_t type = node->type;
3544	unsigned int constraint = node->constraint;
3545
3546	/* Enumerate all single byte character this node can accept. */
3547	if (type == CHARACTER)
3548	bitset_set (accepts, node->opr.c);
3549	else if (type == SIMPLE_BRACKET)
3550	{
3551	bitset_merge (accepts, node->opr.sbcset);
3552	}
3553	else if (type == OP_PERIOD)
3554	{
3555	#ifdef RE_ENABLE_I18N
3556	if (dfa->mb_cur_max > 1)
3557	bitset_merge (accepts, dfa->sb_char);
3558	else
3559	#endif
3560	bitset_set_all (accepts);
3561	if (!(dfa->syntax & RE_DOT_NEWLINE))
3562	bitset_clear (accepts, '\n');
3563	if (dfa->syntax & RE_DOT_NOT_NULL)
3564	bitset_clear (accepts, '\0');
3565	}
3566	#ifdef RE_ENABLE_I18N
3567	else if (type == OP_UTF8_PERIOD)
3568	{
3569	memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3570	if (!(dfa->syntax & RE_DOT_NEWLINE))
3571	bitset_clear (accepts, '\n');
3572	if (dfa->syntax & RE_DOT_NOT_NULL)
3573	bitset_clear (accepts, '\0');
3574	}
3575	#endif
3576	else
3577	continue;
3578
3579	/* Check the `accepts' and sift the characters which are not
3580	match it the context. */
3581	if (constraint)
3582	{
3583	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3584	{
3585	int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3586	bitset_empty (accepts);
3587	if (accepts_newline)
3588	bitset_set (accepts, NEWLINE_CHAR);
3589	else
3590	continue;
3591	}
3592	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3593	{
3594	bitset_empty (accepts);
3595	continue;
3596	}
3597
3598	if (constraint & NEXT_WORD_CONSTRAINT)
3599	{
3600	unsigned int any_set = 0;
3601	if (type == CHARACTER && !node->word_char)
3602	{
3603	bitset_empty (accepts);
3604	continue;
3605	}
3606	#ifdef RE_ENABLE_I18N
3607	if (dfa->mb_cur_max > 1)
3608	for (j = 0; j < BITSET_UINTS; ++j)
3609	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3610	else
3611	#endif
3612	for (j = 0; j < BITSET_UINTS; ++j)
3613	any_set \|= (accepts[j] &= dfa->word_char[j]);
3614	if (!any_set)
3615	continue;
3616	}
3617	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3618	{
3619	unsigned int any_set = 0;
3620	if (type == CHARACTER && node->word_char)
3621	{
3622	bitset_empty (accepts);
3623	continue;
3624	}
3625	#ifdef RE_ENABLE_I18N
3626	if (dfa->mb_cur_max > 1)
3627	for (j = 0; j < BITSET_UINTS; ++j)
3628	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3629	else
3630	#endif
3631	for (j = 0; j < BITSET_UINTS; ++j)
3632	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3633	if (!any_set)
3634	continue;
3635	}
3636	}
3637
3638	/* Then divide `accepts' into DFA states, or create a new
3639	state. Above, we make sure that accepts is not empty. */
3640	for (j = 0; j < ndests; ++j)
3641	{
3642	bitset intersec; /* Intersection sets, see below. */
3643	bitset remains;
3644	/* Flags, see below. */
3645	int has_intersec, not_subset, not_consumed;
3646
3647	/* Optimization, skip if this state doesn't accept the character. */
3648	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3649	continue;
3650
3651	/* Enumerate the intersection set of this state and `accepts'. */
3652	has_intersec = 0;
3653	for (k = 0; k < BITSET_UINTS; ++k)
3654	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3655	/* And skip if the intersection set is empty. */
3656	if (!has_intersec)
3657	continue;
3658
3659	/* Then check if this state is a subset of `accepts'. */
3660	not_subset = not_consumed = 0;
3661	for (k = 0; k < BITSET_UINTS; ++k)
3662	{
3663	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3664	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3665	}
3666
3667	/* If this state isn't a subset of `accepts', create a
3668	new group state, which has the `remains'. */
3669	if (not_subset)
3670	{
3671	bitset_copy (dests_ch[ndests], remains);
3672	bitset_copy (dests_ch[j], intersec);
3673	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3674	if (BE (err != REG_NOERROR, 0))
3675	goto error_return;
3676	++ndests;
3677	}
3678
3679	/* Put the position in the current group. */
3680	result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3681	if (BE (result < 0, 0))
3682	goto error_return;
3683
3684	/* If all characters are consumed, go to next node. */
3685	if (!not_consumed)
3686	break;
3687	}
3688	/* Some characters remain, create a new group. */
3689	if (j == ndests)
3690	{
3691	bitset_copy (dests_ch[ndests], accepts);
3692	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3693	if (BE (err != REG_NOERROR, 0))
3694	goto error_return;
3695	++ndests;
3696	bitset_empty (accepts);
3697	}
3698	}
3699	return ndests;
3700	error_return:
3701	for (j = 0; j < ndests; ++j)
3702	re_node_set_free (dests_node + j);
3703	return -1;
3704	}
3705
3706	#ifdef RE_ENABLE_I18N
3707	/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3708	Return the number of the bytes the node accepts.
3709	STR_IDX is the current index of the input string.
3710
3711	This function handles the nodes which can accept one character, or
3712	one collating element like '.', '[a-z]', opposite to the other nodes
3713	can only accept one byte. */
3714
3715	static int
3716	check_node_accept_bytes (dfa, node_idx, input, str_idx)
3717	re_dfa_t *dfa;
3718	int node_idx, str_idx;
3719	const re_string_t *input;
3720	{
3721	const re_token_t *node = dfa->nodes + node_idx;
3722	int char_len, elem_len;
3723	int i;
3724
3725	if (BE (node->type == OP_UTF8_PERIOD, 0))
3726	{
3727	unsigned char c = re_string_byte_at (input, str_idx), d;
3728	if (BE (c < 0xc2, 1))
3729	return 0;
3730
3731	if (str_idx + 2 > input->len)
3732	return 0;
3733
3734	d = re_string_byte_at (input, str_idx + 1);
3735	if (c < 0xe0)
3736	return (d < 0x80 \|\| d > 0xbf) ? 0 : 2;
3737	else if (c < 0xf0)
3738	{
3739	char_len = 3;
3740	if (c == 0xe0 && d < 0xa0)
3741	return 0;
3742	}
3743	else if (c < 0xf8)
3744	{
3745	char_len = 4;
3746	if (c == 0xf0 && d < 0x90)
3747	return 0;
3748	}
3749	else if (c < 0xfc)
3750	{
3751	char_len = 5;
3752	if (c == 0xf8 && d < 0x88)
3753	return 0;
3754	}
3755	else if (c < 0xfe)
3756	{
3757	char_len = 6;
3758	if (c == 0xfc && d < 0x84)
3759	return 0;
3760	}
3761	else
3762	return 0;
3763
3764	if (str_idx + char_len > input->len)
3765	return 0;
3766
3767	for (i = 1; i < char_len; ++i)
3768	{
3769	d = re_string_byte_at (input, str_idx + i);
3770	if (d < 0x80 \|\| d > 0xbf)
3771	return 0;
3772	}
3773	return char_len;
3774	}
3775
3776	char_len = re_string_char_size_at (input, str_idx);
3777	if (node->type == OP_PERIOD)
3778	{
3779	if (char_len <= 1)
3780	return 0;
3781	/* FIXME: I don't think this if is needed, as both '\n'
3782	and '\0' are char_len == 1. */
3783	/* '.' accepts any one character except the following two cases. */
3784	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3785	re_string_byte_at (input, str_idx) == '\n') \|\|
3786	((dfa->syntax & RE_DOT_NOT_NULL) &&
3787	re_string_byte_at (input, str_idx) == '\0'))
3788	return 0;
3789	return char_len;
3790	}
3791
3792	elem_len = re_string_elem_size_at (input, str_idx);
3793	if ((elem_len <= 1 && char_len <= 1) \|\| char_len == 0)
3794	return 0;
3795
3796	if (node->type == COMPLEX_BRACKET)
3797	{
3798	const re_charset_t *cset = node->opr.mbcset;
3799	# ifdef _LIBC
3800	const unsigned char *pin
3801	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3802	int j;
3803	uint32_t nrules;
3804	# endif /* _LIBC */
3805	int match_len = 0;
3806	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3807	? re_string_wchar_at (input, str_idx) : 0);
3808
3809	/* match with multibyte character? */
3810	for (i = 0; i < cset->nmbchars; ++i)
3811	if (wc == cset->mbchars[i])
3812	{
3813	match_len = char_len;
3814	goto check_node_accept_bytes_match;
3815	}
3816	/* match with character_class? */
3817	for (i = 0; i < cset->nchar_classes; ++i)
3818	{
3819	wctype_t wt = cset->char_classes[i];
3820	if (__iswctype (wc, wt))
3821	{
3822	match_len = char_len;
3823	goto check_node_accept_bytes_match;
3824	}
3825	}
3826
3827	# ifdef _LIBC
3828	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3829	if (nrules != 0)
3830	{
3831	unsigned int in_collseq = 0;
3832	const int32_t table, indirect;
3833	const unsigned char weights, extra;
3834	const char *collseqwc;
3835	int32_t idx;
3836	/* This #include defines a local function! */
3837	# include <locale/weight.h>
3838
3839	/* match with collating_symbol? */
3840	if (cset->ncoll_syms)
3841	extra = (const unsigned char *)
3842	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3843	for (i = 0; i < cset->ncoll_syms; ++i)
3844	{
3845	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3846	/* Compare the length of input collating element and
3847	the length of current collating element. */
3848	if (*coll_sym != elem_len)
3849	continue;
3850	/* Compare each bytes. */
3851	for (j = 0; j < *coll_sym; j++)
3852	if (pin[j] != coll_sym[1 + j])
3853	break;
3854	if (j == *coll_sym)
3855	{
3856	/* Match if every bytes is equal. */
3857	match_len = j;
3858	goto check_node_accept_bytes_match;
3859	}
3860	}
3861
3862	if (cset->nranges)
3863	{
3864	if (elem_len <= char_len)
3865	{
3866	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3867	in_collseq = __collseq_table_lookup (collseqwc, wc);
3868	}
3869	else
3870	in_collseq = find_collation_sequence_value (pin, elem_len);
3871	}
3872	/* match with range expression? */
3873	for (i = 0; i < cset->nranges; ++i)
3874	if (cset->range_starts[i] <= in_collseq
3875	&& in_collseq <= cset->range_ends[i])
3876	{
3877	match_len = elem_len;
3878	goto check_node_accept_bytes_match;
3879	}
3880
3881	/* match with equivalence_class? */
3882	if (cset->nequiv_classes)
3883	{
3884	const unsigned char *cp = pin;
3885	table = (const int32_t *)
3886	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3887	weights = (const unsigned char *)
3888	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3889	extra = (const unsigned char *)
3890	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3891	indirect = (const int32_t *)
3892	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3893	idx = findidx (&cp);
3894	if (idx > 0)
3895	for (i = 0; i < cset->nequiv_classes; ++i)
3896	{
3897	int32_t equiv_class_idx = cset->equiv_classes[i];
3898	size_t weight_len = weights[idx];
3899	if (weight_len == weights[equiv_class_idx])
3900	{
3901	int cnt = 0;
3902	while (cnt <= weight_len
3903	&& (weights[equiv_class_idx + 1 + cnt]
3904	== weights[idx + 1 + cnt]))
3905	++cnt;
3906	if (cnt > weight_len)
3907	{
3908	match_len = elem_len;
3909	goto check_node_accept_bytes_match;
3910	}
3911	}
3912	}
3913	}
3914	}
3915	else
3916	# endif /* _LIBC */
3917	{
3918	/* match with range expression? */
3919	#if __GNUC__ >= 2
3920	wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3921	#else
3922	wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3923	cmp_buf[2] = wc;
3924	#endif
3925	for (i = 0; i < cset->nranges; ++i)
3926	{
3927	cmp_buf[0] = cset->range_starts[i];
3928	cmp_buf[4] = cset->range_ends[i];
3929	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3930	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3931	{
3932	match_len = char_len;
3933	goto check_node_accept_bytes_match;
3934	}
3935	}
3936	}
3937	check_node_accept_bytes_match:
3938	if (!cset->non_match)
3939	return match_len;
3940	else
3941	{
3942	if (match_len > 0)
3943	return 0;
3944	else
3945	return (elem_len > char_len) ? elem_len : char_len;
3946	}
3947	}
3948	return 0;
3949	}
3950
3951	# ifdef _LIBC
3952	static unsigned int
3953	find_collation_sequence_value (mbs, mbs_len)
3954	const unsigned char *mbs;
3955	size_t mbs_len;
3956	{
3957	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3958	if (nrules == 0)
3959	{
3960	if (mbs_len == 1)
3961	{
3962	/* No valid character. Match it as a single byte character. */
3963	const unsigned char collseq = (const unsigned char )
3964	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3965	return collseq[mbs[0]];
3966	}
3967	return UINT_MAX;
3968	}
3969	else
3970	{
3971	int32_t idx;
3972	const unsigned char extra = (const unsigned char )
3973	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3974	int32_t extrasize = (const unsigned char *)
3975	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3976
3977	for (idx = 0; idx < extrasize;)
3978	{
3979	int mbs_cnt, found = 0;
3980	int32_t elem_mbs_len;
3981	/* Skip the name of collating element name. */
3982	idx = idx + extra[idx] + 1;
3983	elem_mbs_len = extra[idx++];
3984	if (mbs_len == elem_mbs_len)
3985	{
3986	for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3987	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3988	break;
3989	if (mbs_cnt == elem_mbs_len)
3990	/* Found the entry. */
3991	found = 1;
3992	}
3993	/* Skip the byte sequence of the collating element. */
3994	idx += elem_mbs_len;
3995	/* Adjust for the alignment. */
3996	idx = (idx + 3) & ~3;
3997	/* Skip the collation sequence value. */
3998	idx += sizeof (uint32_t);
3999	/* Skip the wide char sequence of the collating element. */
4000	idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4001	/* If we found the entry, return the sequence value. */
4002	if (found)
4003	return (uint32_t ) (extra + idx);
4004	/* Skip the collation sequence value. */
4005	idx += sizeof (uint32_t);
4006	}
4007	return UINT_MAX;
4008	}
4009	}
4010	# endif /* _LIBC */
4011	#endif /* RE_ENABLE_I18N */
4012
4013	/* Check whether the node accepts the byte which is IDX-th
4014	byte of the INPUT. */
4015
4016	static int
4017	check_node_accept (mctx, node, idx)
4018	const re_match_context_t *mctx;
4019	const re_token_t *node;
4020	int idx;
4021	{
4022	unsigned char ch;
4023	ch = re_string_byte_at (&mctx->input, idx);
4024	switch (node->type)
4025	{
4026	case CHARACTER:
4027	if (node->opr.c != ch)
4028	return 0;
4029	break;
4030
4031	case SIMPLE_BRACKET:
4032	if (!bitset_contain (node->opr.sbcset, ch))
4033	return 0;
4034	break;
4035
4036	#ifdef RE_ENABLE_I18N
4037	case OP_UTF8_PERIOD:
4038	if (ch >= 0x80)
4039	return 0;
4040	/* FALLTHROUGH */
4041	#endif
4042	case OP_PERIOD:
4043	if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4044	\|\| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4045	return 0;
4046	break;
4047
4048	default:
4049	return 0;
4050	}
4051
4052	if (node->constraint)
4053	{
4054	/* The node has constraints. Check whether the current context
4055	satisfies the constraints. */
4056	unsigned int context = re_string_context_at (&mctx->input, idx,
4057	mctx->eflags);
4058	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4059	return 0;
4060	}
4061
4062	return 1;
4063	}
4064
4065	/* Extend the buffers, if the buffers have run out. */
4066
4067	static reg_errcode_t
4068	extend_buffers (mctx)
4069	re_match_context_t *mctx;
4070	{
4071	reg_errcode_t ret;
4072	re_string_t *pstr = &mctx->input;
4073
4074	/* Double the lengthes of the buffers. */
4075	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4076	if (BE (ret != REG_NOERROR, 0))
4077	return ret;
4078
4079	if (mctx->state_log != NULL)
4080	{
4081	/* And double the length of state_log. */
4082	/* XXX We have no indication of the size of this buffer. If this
4083	allocation fail we have no indication that the state_log array
4084	does not have the right size. */
4085	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4086	pstr->bufs_len + 1);
4087	if (BE (new_array == NULL, 0))
4088	return REG_ESPACE;
4089	mctx->state_log = new_array;
4090	}
4091
4092	/* Then reconstruct the buffers. */
4093	if (pstr->icase)
4094	{
4095	#ifdef RE_ENABLE_I18N
4096	if (pstr->mb_cur_max > 1)
4097	{
4098	ret = build_wcs_upper_buffer (pstr);
4099	if (BE (ret != REG_NOERROR, 0))
4100	return ret;
4101	}
4102	else
4103	#endif /* RE_ENABLE_I18N */
4104	build_upper_buffer (pstr);
4105	}
4106	else
4107	{
4108	#ifdef RE_ENABLE_I18N
4109	if (pstr->mb_cur_max > 1)
4110	build_wcs_buffer (pstr);
4111	else
4112	#endif /* RE_ENABLE_I18N */
4113	{
4114	if (pstr->trans != NULL)
4115	re_string_translate_buffer (pstr);
4116	}
4117	}
4118	return REG_NOERROR;
4119	}
4120
4121
4122
4123	/* Functions for matching context. */
4124
4125	/* Initialize MCTX. */
4126
4127	static reg_errcode_t
4128	match_ctx_init (mctx, eflags, n)
4129	re_match_context_t *mctx;
4130	int eflags, n;
4131	{
4132	mctx->eflags = eflags;
4133	mctx->match_last = -1;
4134	if (n > 0)
4135	{
4136	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4137	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4138	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, 0))
4139	return REG_ESPACE;
4140	}
4141	/* Already zero-ed by the caller.
4142	else
4143	mctx->bkref_ents = NULL;
4144	mctx->nbkref_ents = 0;
4145	mctx->nsub_tops = 0; */
4146	mctx->abkref_ents = n;
4147	mctx->max_mb_elem_len = 1;
4148	mctx->asub_tops = n;
4149	return REG_NOERROR;
4150	}
4151
4152	/* Clean the entries which depend on the current input in MCTX.
4153	This function must be invoked when the matcher changes the start index
4154	of the input, or changes the input string. */
4155
4156	static void
4157	match_ctx_clean (mctx)
4158	re_match_context_t *mctx;
4159	{
4160	int st_idx;
4161	for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4162	{
4163	int sl_idx;
4164	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4165	for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4166	{
4167	re_sub_match_last_t *last = top->lasts[sl_idx];
4168	re_free (last->path.array);
4169	re_free (last);
4170	}
4171	re_free (top->lasts);
4172	if (top->path)
4173	{
4174	re_free (top->path->array);
4175	re_free (top->path);
4176	}
4177	free (top);
4178	}
4179
4180	mctx->nsub_tops = 0;
4181	mctx->nbkref_ents = 0;
4182	}
4183
4184	/* Free all the memory associated with MCTX. */
4185
4186	static void
4187	match_ctx_free (mctx)
4188	re_match_context_t *mctx;
4189	{
4190	/* First, free all the memory associated with MCTX->SUB_TOPS. */
4191	match_ctx_clean (mctx);
4192	re_free (mctx->sub_tops);
4193	re_free (mctx->bkref_ents);
4194	}
4195
4196	/* Add a new backreference entry to MCTX.
4197	Note that we assume that caller never call this function with duplicate
4198	entry, and call with STR_IDX which isn't smaller than any existing entry.
4199	*/
4200
4201	static reg_errcode_t
4202	match_ctx_add_entry (mctx, node, str_idx, from, to)
4203	re_match_context_t *mctx;
4204	int node, str_idx, from, to;
4205	{
4206	if (mctx->nbkref_ents >= mctx->abkref_ents)
4207	{
4208	struct re_backref_cache_entry* new_entry;
4209	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4210	mctx->abkref_ents * 2);
4211	if (BE (new_entry == NULL, 0))
4212	{
4213	re_free (mctx->bkref_ents);
4214	return REG_ESPACE;
4215	}
4216	mctx->bkref_ents = new_entry;
4217	memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4218	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4219	mctx->abkref_ents *= 2;
4220	}
4221	if (mctx->nbkref_ents > 0
4222	&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4223	mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4224
4225	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4226	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4227	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4228	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4229
4230	/* This is a cache that saves negative results of check_dst_limits_calc_pos.
4231	If bit N is clear, means that this entry won't epsilon-transition to
4232	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4233	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4234	such node.
4235
4236	A backreference does not epsilon-transition unless it is empty, so set
4237	to all zeros if FROM != TO. */
4238	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4239	= (from == to ? ~0 : 0);
4240
4241	mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4242	if (mctx->max_mb_elem_len < to - from)
4243	mctx->max_mb_elem_len = to - from;
4244	return REG_NOERROR;
4245	}
4246
4247	/* Search for the first entry which has the same str_idx, or -1 if none is
4248	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4249
4250	static int
4251	search_cur_bkref_entry (mctx, str_idx)
4252	re_match_context_t *mctx;
4253	int str_idx;
4254	{
4255	int left, right, mid, last;
4256	last = right = mctx->nbkref_ents;
4257	for (left = 0; left < right;)
4258	{
4259	mid = (left + right) / 2;
4260	if (mctx->bkref_ents[mid].str_idx < str_idx)
4261	left = mid + 1;
4262	else
4263	right = mid;
4264	}
4265	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4266	return left;
4267	else
4268	return -1;
4269	}
4270
4271	/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4272	at STR_IDX. */
4273
4274	static reg_errcode_t
4275	match_ctx_add_subtop (mctx, node, str_idx)
4276	re_match_context_t *mctx;
4277	int node, str_idx;
4278	{
4279	#ifdef DEBUG
4280	assert (mctx->sub_tops != NULL);
4281	assert (mctx->asub_tops > 0);
4282	#endif
4283	if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4284	{
4285	int new_asub_tops = mctx->asub_tops * 2;
4286	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4287	re_sub_match_top_t *,
4288	new_asub_tops);
4289	if (BE (new_array == NULL, 0))
4290	return REG_ESPACE;
4291	mctx->sub_tops = new_array;
4292	mctx->asub_tops = new_asub_tops;
4293	}
4294	mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4295	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4296	return REG_ESPACE;
4297	mctx->sub_tops[mctx->nsub_tops]->node = node;
4298	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4299	return REG_NOERROR;
4300	}
4301
4302	/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4303	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4304
4305	static re_sub_match_last_t *
4306	match_ctx_add_sublast (subtop, node, str_idx)
4307	re_sub_match_top_t *subtop;
4308	int node, str_idx;
4309	{
4310	re_sub_match_last_t *new_entry;
4311	if (BE (subtop->nlasts == subtop->alasts, 0))
4312	{
4313	int new_alasts = 2 * subtop->alasts + 1;
4314	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4315	re_sub_match_last_t *,
4316	new_alasts);
4317	if (BE (new_array == NULL, 0))
4318	return NULL;
4319	subtop->lasts = new_array;
4320	subtop->alasts = new_alasts;
4321	}
4322	new_entry = calloc (1, sizeof (re_sub_match_last_t));
4323	if (BE (new_entry != NULL, 1))
4324	{
4325	subtop->lasts[subtop->nlasts] = new_entry;
4326	new_entry->node = node;
4327	new_entry->str_idx = str_idx;
4328	++subtop->nlasts;
4329	}
4330	return new_entry;
4331	}
4332
4333	static void
4334	sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
4335	re_sift_context_t *sctx;
4336	re_dfastate_t sifted_sts, limited_sts;
4337	int last_node, last_str_idx;
4338	{
4339	sctx->sifted_states = sifted_sts;
4340	sctx->limited_states = limited_sts;
4341	sctx->last_node = last_node;
4342	sctx->last_str_idx = last_str_idx;
4343	re_node_set_init_empty (&sctx->limits);
4344	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: