Context Navigation

source: trunk/src/sed/lib/regcomp.c@ 3425

Last change on this file since 3425 was 606, checked in by bird, 19 years ago
Made sed build using MSC.
File size: 107.6 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	static reg_errcode_t re_compile_internal (regex_t preg, const char pattern,
22	size_t length, reg_syntax_t syntax);
23	static void re_compile_fastmap_iter (regex_t *bufp,
24	const re_dfastate_t *init_state,
25	char *fastmap);
26	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
27	#ifdef RE_ENABLE_I18N
28	static void free_charset (re_charset_t *cset);
29	#endif /* RE_ENABLE_I18N */
30	static void free_workarea_compile (regex_t *preg);
31	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
32	#ifdef RE_ENABLE_I18N
33	static void optimize_utf8 (re_dfa_t *dfa);
34	#endif
35	static reg_errcode_t analyze (regex_t *preg);
36	static reg_errcode_t preorder (bin_tree_t *root,
37	reg_errcode_t (fn (void , bin_tree_t )),
38	void *extra);
39	static reg_errcode_t postorder (bin_tree_t *root,
40	reg_errcode_t (fn (void , bin_tree_t )),
41	void *extra);
42	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
43	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
44	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
45	bin_tree_t *node);
46	static reg_errcode_t calc_first (void extra, bin_tree_t node);
47	static reg_errcode_t calc_next (void extra, bin_tree_t node);
48	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
49	static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
50	static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
51	unsigned int constraint);
52	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
53	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
54	int node, int root);
55	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
56	static int fetch_number (re_string_t input, re_token_t token,
57	reg_syntax_t syntax);
58	static int peek_token (re_token_t token, re_string_t input,
59	reg_syntax_t syntax) internal_function;
60	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
61	reg_syntax_t syntax, reg_errcode_t *err);
62	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
63	re_token_t *token, reg_syntax_t syntax,
64	int nest, reg_errcode_t *err);
65	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
66	re_token_t *token, reg_syntax_t syntax,
67	int nest, reg_errcode_t *err);
68	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
69	re_token_t *token, reg_syntax_t syntax,
70	int nest, reg_errcode_t *err);
71	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
72	re_token_t *token, reg_syntax_t syntax,
73	int nest, reg_errcode_t *err);
74	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
75	re_dfa_t dfa, re_token_t token,
76	reg_syntax_t syntax, reg_errcode_t *err);
77	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
78	re_token_t *token, reg_syntax_t syntax,
79	reg_errcode_t *err);
80	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
81	re_string_t *regexp,
82	re_token_t *token, int token_len,
83	re_dfa_t *dfa,
84	reg_syntax_t syntax,
85	int accept_hyphen);
86	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
87	re_string_t *regexp,
88	re_token_t *token);
89	#ifdef RE_ENABLE_I18N
90	static reg_errcode_t build_equiv_class (bitset_t sbcset,
91	re_charset_t *mbcset,
92	int *equiv_class_alloc,
93	const unsigned char *name);
94	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
95	bitset_t sbcset,
96	re_charset_t *mbcset,
97	int *char_class_alloc,
98	const unsigned char *class_name,
99	reg_syntax_t syntax);
100	#else /* not RE_ENABLE_I18N */
101	static reg_errcode_t build_equiv_class (bitset_t sbcset,
102	const unsigned char *name);
103	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
104	bitset_t sbcset,
105	const unsigned char *class_name,
106	reg_syntax_t syntax);
107	#endif /* not RE_ENABLE_I18N */
108	static bin_tree_t build_charclass_op (re_dfa_t dfa,
109	RE_TRANSLATE_TYPE trans,
110	const unsigned char *class_name,
111	const unsigned char *extra,
112	int non_match, reg_errcode_t *err);
113	static bin_tree_t create_tree (re_dfa_t dfa,
114	bin_tree_t left, bin_tree_t right,
115	re_token_type_t type);
116	static bin_tree_t create_token_tree (re_dfa_t dfa,
117	bin_tree_t left, bin_tree_t right,
118	const re_token_t *token);
119	static bin_tree_t duplicate_tree (const bin_tree_t src, re_dfa_t *dfa);
120	static void free_token (re_token_t *node);
121	static reg_errcode_t free_tree (void extra, bin_tree_t node);
122	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
123
124
125	/* This table gives an error message for each of the error codes listed
126	in regex.h. Obviously the order here has to be same as there.
127	POSIX doesn't require that we do anything for REG_NOERROR,
128	but why not be nice? */
129
130	const char __re_error_msgid[] attribute_hidden =
131	{
132	#define REG_NOERROR_IDX 0
133	gettext_noop ("Success") /* REG_NOERROR */
134	"\0"
135	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
136	gettext_noop ("No match") /* REG_NOMATCH */
137	"\0"
138	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
139	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
140	"\0"
141	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
142	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
143	"\0"
144	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
145	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
146	"\0"
147	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
148	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
149	"\0"
150	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
151	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
152	"\0"
153	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
154	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
155	"\0"
156	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
157	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
158	"\0"
159	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
160	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
161	"\0"
162	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
163	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
164	"\0"
165	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
166	gettext_noop ("Invalid range end") /* REG_ERANGE */
167	"\0"
168	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
169	gettext_noop ("Memory exhausted") /* REG_ESPACE */
170	"\0"
171	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
172	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
173	"\0"
174	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
175	gettext_noop ("Premature end of regular expression") /* REG_EEND */
176	"\0"
177	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
178	gettext_noop ("Regular expression too big") /* REG_ESIZE */
179	"\0"
180	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
181	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
182	};
183
184	const size_t __re_error_msgid_idx[] attribute_hidden =
185	{
186	REG_NOERROR_IDX,
187	REG_NOMATCH_IDX,
188	REG_BADPAT_IDX,
189	REG_ECOLLATE_IDX,
190	REG_ECTYPE_IDX,
191	REG_EESCAPE_IDX,
192	REG_ESUBREG_IDX,
193	REG_EBRACK_IDX,
194	REG_EPAREN_IDX,
195	REG_EBRACE_IDX,
196	REG_BADBR_IDX,
197	REG_ERANGE_IDX,
198	REG_ESPACE_IDX,
199	REG_BADRPT_IDX,
200	REG_EEND_IDX,
201	REG_ESIZE_IDX,
202	REG_ERPAREN_IDX
203	};
204
205
206	/* Entry points for GNU code. */
207
208	/* re_compile_pattern is the GNU regular expression compiler: it
209	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
210	Returns 0 if the pattern was valid, otherwise an error string.
211
212	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
213	are set in BUFP on entry. */
214
215	const char *
216	re_compile_pattern (pattern, length, bufp)
217	const char *pattern;
218	size_t length;
219	struct re_pattern_buffer *bufp;
220	{
221	reg_errcode_t ret;
222
223	/* And GNU code determines whether or not to get register information
224	by passing null for the REGS argument to re_match, etc., not by
225	setting no_sub, unless RE_NO_SUB is set. */
226	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
227
228	/* Match anchors at newline. */
229	bufp->newline_anchor = 1;
230
231	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
232
233	if (!ret)
234	return NULL;
235	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
236	}
237	#ifdef _LIBC
238	weak_alias (__re_compile_pattern, re_compile_pattern)
239	#endif
240
241	/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
242	also be assigned to arbitrarily: each pattern buffer stores its own
243	syntax, so it can be changed between regex compilations. */
244	/* This has no initializer because initialized variables in Emacs
245	become read-only after dumping. */
246	reg_syntax_t re_syntax_options;
247
248
249	/* Specify the precise syntax of regexps for compilation. This provides
250	for compatibility for various utilities which historically have
251	different, incompatible syntaxes.
252
253	The argument SYNTAX is a bit mask comprised of the various bits
254	defined in regex.h. We return the old syntax. */
255
256	reg_syntax_t
257	re_set_syntax (syntax)
258	reg_syntax_t syntax;
259	{
260	reg_syntax_t ret = re_syntax_options;
261
262	re_syntax_options = syntax;
263	return ret;
264	}
265	#ifdef _LIBC
266	weak_alias (__re_set_syntax, re_set_syntax)
267	#endif
268
269	int
270	re_compile_fastmap (bufp)
271	struct re_pattern_buffer *bufp;
272	{
273	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
274	char *fastmap = bufp->fastmap;
275
276	memset (fastmap, '\0', sizeof (char) * SBC_MAX);
277	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
278	if (dfa->init_state != dfa->init_state_word)
279	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
280	if (dfa->init_state != dfa->init_state_nl)
281	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
282	if (dfa->init_state != dfa->init_state_begbuf)
283	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
284	bufp->fastmap_accurate = 1;
285	return 0;
286	}
287	#ifdef _LIBC
288	weak_alias (__re_compile_fastmap, re_compile_fastmap)
289	#endif
290
291	static inline void
292	__attribute ((always_inline))
293	re_set_fastmap (char *fastmap, int icase, int ch)
294	{
295	fastmap[ch] = 1;
296	if (icase)
297	fastmap[tolower (ch)] = 1;
298	}
299
300	/* Helper function for re_compile_fastmap.
301	Compile fastmap for the initial_state INIT_STATE. */
302
303	static void
304	re_compile_fastmap_iter (regex_t bufp, const re_dfastate_t init_state,
305	char *fastmap)
306	{
307	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
308	int node_cnt;
309	int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
310	for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
311	{
312	int node = init_state->nodes.elems[node_cnt];
313	re_token_type_t type = dfa->nodes[node].type;
314
315	if (type == CHARACTER)
316	{
317	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
318	#ifdef RE_ENABLE_I18N
319	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
320	{
321	unsigned char buf = alloca (dfa->mb_cur_max), p;
322	wchar_t wc;
323	mbstate_t state;
324
325	p = buf;
326	*p++ = dfa->nodes[node].opr.c;
327	while (++node < dfa->nodes_len
328	&& dfa->nodes[node].type == CHARACTER
329	&& dfa->nodes[node].mb_partial)
330	*p++ = dfa->nodes[node].opr.c;
331	memset (&state, '\0', sizeof (state));
332	if (mbrtowc (&wc, (const char *) buf, p - buf,
333	&state) == p - buf
334	&& (__wcrtomb ((char *) buf, towlower (wc), &state)
335	!= (size_t) -1))
336	re_set_fastmap (fastmap, 0, buf[0]);
337	}
338	#endif
339	}
340	else if (type == SIMPLE_BRACKET)
341	{
342	int i, ch;
343	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
344	{
345	int j;
346	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
347	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
348	if (w & ((bitset_word_t) 1 << j))
349	re_set_fastmap (fastmap, icase, ch);
350	}
351	}
352	#ifdef RE_ENABLE_I18N
353	else if (type == COMPLEX_BRACKET)
354	{
355	int i;
356	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
357	if (cset->non_match \|\| cset->ncoll_syms \|\| cset->nequiv_classes
358	\|\| cset->nranges \|\| cset->nchar_classes)
359	{
360	# ifdef _LIBC
361	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
362	{
363	/* In this case we want to catch the bytes which are
364	the first byte of any collation elements.
365	e.g. In da_DK, we want to catch 'a' since "aa"
366	is a valid collation element, and don't catch
367	'b' since 'b' is the only collation element
368	which starts from 'b'. */
369	const int32_t table = (const int32_t )
370	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371	for (i = 0; i < SBC_MAX; ++i)
372	if (table[i] < 0)
373	re_set_fastmap (fastmap, icase, i);
374	}
375	# else
376	if (dfa->mb_cur_max > 1)
377	for (i = 0; i < SBC_MAX; ++i)
378	if (__btowc (i) == WEOF)
379	re_set_fastmap (fastmap, icase, i);
380	# endif /* not _LIBC */
381	}
382	for (i = 0; i < cset->nmbchars; ++i)
383	{
384	char buf[256];
385	mbstate_t state;
386	memset (&state, '\0', sizeof (state));
387	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
388	re_set_fastmap (fastmap, icase, (unsigned char ) buf);
389	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
390	{
391	if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
392	!= (size_t) -1)
393	re_set_fastmap (fastmap, 0, (unsigned char ) buf);
394	}
395	}
396	}
397	#endif /* RE_ENABLE_I18N */
398	else if (type == OP_PERIOD
399	#ifdef RE_ENABLE_I18N
400	\|\| type == OP_UTF8_PERIOD
401	#endif /* RE_ENABLE_I18N */
402	\|\| type == END_OF_RE)
403	{
404	memset (fastmap, '\1', sizeof (char) * SBC_MAX);
405	if (type == END_OF_RE)
406	bufp->can_be_null = 1;
407	return;
408	}
409	}
410	}
411
412
413	/* Entry point for POSIX code. */
414	/* regcomp takes a regular expression as a string and compiles it.
415
416	PREG is a regex_t *. We do not expect any fields to be initialized,
417	since POSIX says we shouldn't. Thus, we set
418
419	`buffer' to the compiled pattern;
420	`used' to the length of the compiled pattern;
421	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
422	REG_EXTENDED bit in CFLAGS is set; otherwise, to
423	RE_SYNTAX_POSIX_BASIC;
424	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
425	`fastmap' to an allocated space for the fastmap;
426	`fastmap_accurate' to zero;
427	`re_nsub' to the number of subexpressions in PATTERN.
428
429	PATTERN is the address of the pattern string.
430
431	CFLAGS is a series of bits which affect compilation.
432
433	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
434	use POSIX basic syntax.
435
436	If REG_NEWLINE is set, then . and [^...] don't match newline.
437	Also, regexec will try a match beginning after every newline.
438
439	If REG_ICASE is set, then we considers upper- and lowercase
440	versions of letters to be equivalent when matching.
441
442	If REG_NOSUB is set, then when PREG is passed to regexec, that
443	routine will report only success or failure, and nothing about the
444	registers.
445
446	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
447	the return codes and their meanings.) */
448
449	int
450	regcomp (preg, pattern, cflags)
451	regex_t *__restrict preg;
452	const char *__restrict pattern;
453	int cflags;
454	{
455	reg_errcode_t ret;
456	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
457	: RE_SYNTAX_POSIX_BASIC);
458
459	preg->buffer = NULL;
460	preg->allocated = 0;
461	preg->used = 0;
462
463	/* Try to allocate space for the fastmap. */
464	preg->fastmap = re_malloc (char, SBC_MAX);
465	if (BE (preg->fastmap == NULL, 0))
466	return REG_ESPACE;
467
468	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : 0;
469
470	/* If REG_NEWLINE is set, newlines are treated differently. */
471	if (cflags & REG_NEWLINE)
472	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
473	syntax &= ~RE_DOT_NEWLINE;
474	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
475	/* It also changes the matching behavior. */
476	preg->newline_anchor = 1;
477	}
478	else
479	preg->newline_anchor = 0;
480	preg->no_sub = !!(cflags & REG_NOSUB);
481	preg->translate = NULL;
482
483	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
484
485	/* POSIX doesn't distinguish between an unmatched open-group and an
486	unmatched close-group: both are REG_EPAREN. */
487	if (ret == REG_ERPAREN)
488	ret = REG_EPAREN;
489
490	/* We have already checked preg->fastmap != NULL. */
491	if (BE (ret == REG_NOERROR, 1))
492	/* Compute the fastmap now, since regexec cannot modify the pattern
493	buffer. This function never fails in this implementation. */
494	(void) re_compile_fastmap (preg);
495	else
496	{
497	/* Some error occurred while compiling the expression. */
498	re_free (preg->fastmap);
499	preg->fastmap = NULL;
500	}
501
502	return (int) ret;
503	}
504	#ifdef _LIBC
505	weak_alias (__regcomp, regcomp)
506	#endif
507
508	/* Returns a message corresponding to an error code, ERRCODE, returned
509	from either regcomp or regexec. We don't use PREG here. */
510
511	size_t
512	regerror (errcode, preg, errbuf, errbuf_size)
513	int errcode;
514	const regex_t *__restrict preg;
515	char *__restrict errbuf;
516	size_t errbuf_size;
517	{
518	const char *msg;
519	size_t msg_size;
520
521	if (BE (errcode < 0
522	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
523	/ sizeof (__re_error_msgid_idx[0])), 0))
524	/* Only error codes returned by the rest of the code should be passed
525	to this routine. If we are given anything else, or if other regex
526	code generates an invalid error code, then the program has a bug.
527	Dump core so we can fix it. */
528	abort ();
529
530	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
531
532	msg_size = strlen (msg) + 1; /* Includes the null. */
533
534	if (BE (errbuf_size != 0, 1))
535	{
536	if (BE (msg_size > errbuf_size, 0))
537	{
538	#if defined HAVE_MEMPCPY \|\| defined _LIBC
539	((char ) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
540	#else
541	memcpy (errbuf, msg, errbuf_size - 1);
542	errbuf[errbuf_size - 1] = 0;
543	#endif
544	}
545	else
546	memcpy (errbuf, msg, msg_size);
547	}
548
549	return msg_size;
550	}
551	#ifdef _LIBC
552	weak_alias (__regerror, regerror)
553	#endif
554
555
556	#ifdef RE_ENABLE_I18N
557	/* This static array is used for the map to single-byte characters when
558	UTF-8 is used. Otherwise we would allocate memory just to initialize
559	it the same all the time. UTF-8 is the preferred encoding so this is
560	a worthwhile optimization. */
561	static const bitset_t utf8_sb_map =
562	{
563	/* Set the first 128 bits. */
564	#ifdef _MSC_VER
565	BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 0, 0, 0, 0
566	#else
567	[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
568	#endif
569	};
570	#endif
571
572
573	static void
574	free_dfa_content (re_dfa_t *dfa)
575	{
576	int i, j;
577
578	if (dfa->nodes)
579	for (i = 0; i < dfa->nodes_len; ++i)
580	free_token (dfa->nodes + i);
581	re_free (dfa->nexts);
582	for (i = 0; i < dfa->nodes_len; ++i)
583	{
584	if (dfa->eclosures != NULL)
585	re_node_set_free (dfa->eclosures + i);
586	if (dfa->inveclosures != NULL)
587	re_node_set_free (dfa->inveclosures + i);
588	if (dfa->edests != NULL)
589	re_node_set_free (dfa->edests + i);
590	}
591	re_free (dfa->edests);
592	re_free (dfa->eclosures);
593	re_free (dfa->inveclosures);
594	re_free (dfa->nodes);
595
596	if (dfa->state_table)
597	for (i = 0; i <= dfa->state_hash_mask; ++i)
598	{
599	struct re_state_table_entry *entry = dfa->state_table + i;
600	for (j = 0; j < entry->num; ++j)
601	{
602	re_dfastate_t *state = entry->array[j];
603	free_state (state);
604	}
605	re_free (entry->array);
606	}
607	re_free (dfa->state_table);
608	#ifdef RE_ENABLE_I18N
609	if (dfa->sb_char != utf8_sb_map)
610	re_free (dfa->sb_char);
611	#endif
612	re_free (dfa->subexp_map);
613	#ifdef DEBUG
614	re_free (dfa->re_str);
615	#endif
616
617	re_free (dfa);
618	}
619
620
621	/* Free dynamically allocated space used by PREG. */
622
623	void
624	regfree (preg)
625	regex_t *preg;
626	{
627	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
628	if (BE (dfa != NULL, 1))
629	free_dfa_content (dfa);
630	preg->buffer = NULL;
631	preg->allocated = 0;
632
633	re_free (preg->fastmap);
634	preg->fastmap = NULL;
635
636	re_free (preg->translate);
637	preg->translate = NULL;
638	}
639	#ifdef _LIBC
640	weak_alias (__regfree, regfree)
641	#endif
642
643
644	/* Entry points compatible with 4.2 BSD regex library. We don't define
645	them unless specifically requested. */
646
647	#if defined _REGEX_RE_COMP \|\| defined _LIBC
648
649	/* BSD has one and only one pattern buffer. */
650	static struct re_pattern_buffer re_comp_buf;
651
652	char *
653	# ifdef _LIBC
654	/* Make these definitions weak in libc, so POSIX programs can redefine
655	these names if they don't use our functions, and still use
656	regcomp/regexec above without link errors. */
657	weak_function
658	# endif
659	re_comp (s)
660	const char *s;
661	{
662	reg_errcode_t ret;
663	char *fastmap;
664
665	if (!s)
666	{
667	if (!re_comp_buf.buffer)
668	return gettext ("No previous regular expression");
669	return 0;
670	}
671
672	if (re_comp_buf.buffer)
673	{
674	fastmap = re_comp_buf.fastmap;
675	re_comp_buf.fastmap = NULL;
676	__regfree (&re_comp_buf);
677	memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
678	re_comp_buf.fastmap = fastmap;
679	}
680
681	if (re_comp_buf.fastmap == NULL)
682	{
683	re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
684	if (re_comp_buf.fastmap == NULL)
685	return (char *) gettext (__re_error_msgid
686	+ __re_error_msgid_idx[(int) REG_ESPACE]);
687	}
688
689	/* Since `re_exec' always passes NULL for the `regs' argument, we
690	don't need to initialize the pattern buffer fields which affect it. */
691
692	/* Match anchors at newlines. */
693	re_comp_buf.newline_anchor = 1;
694
695	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
696
697	if (!ret)
698	return NULL;
699
700	/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
701	return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
702	}
703
704	#ifdef _LIBC
705	libc_freeres_fn (free_mem)
706	{
707	__regfree (&re_comp_buf);
708	}
709	#endif
710
711	#endif /* _REGEX_RE_COMP */
712
713
714	/* Internal entry point.
715	Compile the regular expression PATTERN, whose length is LENGTH.
716	SYNTAX indicate regular expression's syntax. */
717
718	static reg_errcode_t
719	re_compile_internal (regex_t preg, const char pattern, size_t length,
720	reg_syntax_t syntax)
721	{
722	reg_errcode_t err = REG_NOERROR;
723	re_dfa_t *dfa;
724	re_string_t regexp;
725
726	/* Initialize the pattern buffer. */
727	preg->fastmap_accurate = 0;
728	preg->syntax = syntax;
729	preg->not_bol = preg->not_eol = 0;
730	preg->used = 0;
731	preg->re_nsub = 0;
732	preg->can_be_null = 0;
733	preg->regs_allocated = REGS_UNALLOCATED;
734
735	/* Initialize the dfa. */
736	dfa = (re_dfa_t *) preg->buffer;
737	if (BE (preg->allocated < sizeof (re_dfa_t), 0))
738	{
739	/* If zero allocated, but buffer is non-null, try to realloc
740	enough space. This loses if buffer's address is bogus, but
741	that is the user's responsibility. If ->buffer is NULL this
742	is a simple allocation. */
743	dfa = re_realloc (preg->buffer, re_dfa_t, 1);
744	if (dfa == NULL)
745	return REG_ESPACE;
746	preg->allocated = sizeof (re_dfa_t);
747	preg->buffer = (unsigned char *) dfa;
748	}
749	preg->used = sizeof (re_dfa_t);
750
751	err = init_dfa (dfa, length);
752	if (BE (err != REG_NOERROR, 0))
753	{
754	free_dfa_content (dfa);
755	preg->buffer = NULL;
756	preg->allocated = 0;
757	return err;
758	}
759	#ifdef DEBUG
760	/* Note: length+1 will not overflow since it is checked in init_dfa. */
761	dfa->re_str = re_malloc (char, length + 1);
762	strncpy (dfa->re_str, pattern, length + 1);
763	#endif
764
765	__libc_lock_init (dfa->lock);
766
767	err = re_string_construct (&regexp, pattern, length, preg->translate,
768	syntax & RE_ICASE, dfa);
769	if (BE (err != REG_NOERROR, 0))
770	{
771	re_compile_internal_free_return:
772	free_workarea_compile (preg);
773	re_string_destruct (&regexp);
774	free_dfa_content (dfa);
775	preg->buffer = NULL;
776	preg->allocated = 0;
777	return err;
778	}
779
780	/* Parse the regular expression, and build a structure tree. */
781	preg->re_nsub = 0;
782	dfa->str_tree = parse (&regexp, preg, syntax, &err);
783	if (BE (dfa->str_tree == NULL, 0))
784	goto re_compile_internal_free_return;
785
786	/* Analyze the tree and create the nfa. */
787	err = analyze (preg);
788	if (BE (err != REG_NOERROR, 0))
789	goto re_compile_internal_free_return;
790
791	#ifdef RE_ENABLE_I18N
792	/* If possible, do searching in single byte encoding to speed things up. */
793	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
794	optimize_utf8 (dfa);
795	#endif
796
797	/* Then create the initial state of the dfa. */
798	err = create_initial_state (dfa);
799
800	/* Release work areas. */
801	free_workarea_compile (preg);
802	re_string_destruct (&regexp);
803
804	if (BE (err != REG_NOERROR, 0))
805	{
806	free_dfa_content (dfa);
807	preg->buffer = NULL;
808	preg->allocated = 0;
809	}
810
811	return err;
812	}
813
814	/* Initialize DFA. We use the length of the regular expression PAT_LEN
815	as the initial length of some arrays. */
816
817	static reg_errcode_t
818	init_dfa (re_dfa_t *dfa, size_t pat_len)
819	{
820	unsigned int table_size;
821	#ifndef _LIBC
822	char *codeset_name;
823	#endif
824
825	memset (dfa, '\0', sizeof (re_dfa_t));
826
827	/* Force allocation of str_tree_storage the first time. */
828	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
829
830	/* Avoid overflows. */
831	if (pat_len == SIZE_MAX)
832	return REG_ESPACE;
833
834	dfa->nodes_alloc = pat_len + 1;
835	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
836
837	/* table_size = 2 ^ ceil(log pat_len) */
838	for (table_size = 1; ; table_size <<= 1)
839	if (table_size > pat_len)
840	break;
841
842	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
843	dfa->state_hash_mask = table_size - 1;
844
845	dfa->mb_cur_max = MB_CUR_MAX;
846	#ifdef _LIBC
847	if (dfa->mb_cur_max == 6
848	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
849	dfa->is_utf8 = 1;
850	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
851	!= 0);
852	#else
853	# ifdef HAVE_LANGINFO_CODESET
854	codeset_name = nl_langinfo (CODESET);
855	# else
856	codeset_name = getenv ("LC_ALL");
857	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
858	codeset_name = getenv ("LC_CTYPE");
859	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
860	codeset_name = getenv ("LANG");
861	if (codeset_name == NULL)
862	codeset_name = "";
863	else if (strchr (codeset_name, '.') != NULL)
864	codeset_name = strchr (codeset_name, '.') + 1;
865	# endif
866
867	if (strcasecmp (codeset_name, "UTF-8") == 0
868	\|\| strcasecmp (codeset_name, "UTF8") == 0)
869	dfa->is_utf8 = 1;
870
871	/* We check exhaustively in the loop below if this charset is a
872	superset of ASCII. */
873	dfa->map_notascii = 0;
874	#endif
875
876	#ifdef RE_ENABLE_I18N
877	if (dfa->mb_cur_max > 1)
878	{
879	if (dfa->is_utf8)
880	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
881	else
882	{
883	int i, j, ch;
884
885	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
886	if (BE (dfa->sb_char == NULL, 0))
887	return REG_ESPACE;
888
889	/* Set the bits corresponding to single byte chars. */
890	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
891	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
892	{
893	wint_t wch = __btowc (ch);
894	if (wch != WEOF)
895	dfa->sb_char[i] \|= (bitset_word_t) 1 << j;
896	# ifndef _LIBC
897	if (isascii (ch) && wch != ch)
898	dfa->map_notascii = 1;
899	# endif
900	}
901	}
902	}
903	#endif
904
905	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, 0))
906	return REG_ESPACE;
907	return REG_NOERROR;
908	}
909
910	/* Initialize WORD_CHAR table, which indicate which character is
911	"word". In this case "word" means that it is the word construction
912	character used by some operators like "\<", "\>", etc. */
913
914	static void
915	internal_function
916	init_word_char (re_dfa_t *dfa)
917	{
918	int i, j, ch;
919	dfa->word_ops_used = 1;
920	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
921	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
922	if (isalnum (ch) \|\| ch == '_')
923	dfa->word_char[i] \|= (bitset_word_t) 1 << j;
924	}
925
926	/* Free the work area which are only used while compiling. */
927
928	static void
929	free_workarea_compile (regex_t *preg)
930	{
931	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
932	bin_tree_storage_t storage, next;
933	for (storage = dfa->str_tree_storage; storage; storage = next)
934	{
935	next = storage->next;
936	re_free (storage);
937	}
938	dfa->str_tree_storage = NULL;
939	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
940	dfa->str_tree = NULL;
941	re_free (dfa->org_indices);
942	dfa->org_indices = NULL;
943	}
944
945	/* Create initial states for all contexts. */
946
947	static reg_errcode_t
948	create_initial_state (re_dfa_t *dfa)
949	{
950	int first, i;
951	reg_errcode_t err;
952	re_node_set init_nodes;
953
954	/* Initial states have the epsilon closure of the node which is
955	the first node of the regular expression. */
956	first = dfa->str_tree->first->node_idx;
957	dfa->init_node = first;
958	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
959	if (BE (err != REG_NOERROR, 0))
960	return err;
961
962	/* The back-references which are in initial states can epsilon transit,
963	since in this case all of the subexpressions can be null.
964	Then we add epsilon closures of the nodes which are the next nodes of
965	the back-references. */
966	if (dfa->nbackref > 0)
967	for (i = 0; i < init_nodes.nelem; ++i)
968	{
969	int node_idx = init_nodes.elems[i];
970	re_token_type_t type = dfa->nodes[node_idx].type;
971
972	int clexp_idx;
973	if (type != OP_BACK_REF)
974	continue;
975	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
976	{
977	re_token_t *clexp_node;
978	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
979	if (clexp_node->type == OP_CLOSE_SUBEXP
980	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
981	break;
982	}
983	if (clexp_idx == init_nodes.nelem)
984	continue;
985
986	if (type == OP_BACK_REF)
987	{
988	int dest_idx = dfa->edests[node_idx].elems[0];
989	if (!re_node_set_contains (&init_nodes, dest_idx))
990	{
991	re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
992	i = 0;
993	}
994	}
995	}
996
997	/* It must be the first time to invoke acquire_state. */
998	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
999	/* We don't check ERR here, since the initial state must not be NULL. */
1000	if (BE (dfa->init_state == NULL, 0))
1001	return err;
1002	if (dfa->init_state->has_constraint)
1003	{
1004	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1005	CONTEXT_WORD);
1006	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1007	CONTEXT_NEWLINE);
1008	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1009	&init_nodes,
1010	CONTEXT_NEWLINE
1011	\| CONTEXT_BEGBUF);
1012	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
1013	\|\| dfa->init_state_begbuf == NULL, 0))
1014	return err;
1015	}
1016	else
1017	dfa->init_state_word = dfa->init_state_nl
1018	= dfa->init_state_begbuf = dfa->init_state;
1019
1020	re_node_set_free (&init_nodes);
1021	return REG_NOERROR;
1022	}
1023
1024
1025	#ifdef RE_ENABLE_I18N
1026	/* If it is possible to do searching in single byte encoding instead of UTF-8
1027	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1028	DFA nodes where needed. */
1029
1030	static void
1031	optimize_utf8 (re_dfa_t *dfa)
1032	{
1033	int node, i, mb_chars = 0, has_period = 0;
1034
1035	for (node = 0; node < dfa->nodes_len; ++node)
1036	switch (dfa->nodes[node].type)
1037	{
1038	case CHARACTER:
1039	if (dfa->nodes[node].opr.c >= 0x80)
1040	mb_chars = 1;
1041	break;
1042	case ANCHOR:
1043	switch (dfa->nodes[node].opr.idx)
1044	{
1045	case LINE_FIRST:
1046	case LINE_LAST:
1047	case BUF_FIRST:
1048	case BUF_LAST:
1049	break;
1050	default:
1051	/* Word anchors etc. cannot be handled. */
1052	return;
1053	}
1054	break;
1055	case OP_PERIOD:
1056	has_period = 1;
1057	break;
1058	case OP_BACK_REF:
1059	case OP_ALT:
1060	case END_OF_RE:
1061	case OP_DUP_ASTERISK:
1062	case OP_OPEN_SUBEXP:
1063	case OP_CLOSE_SUBEXP:
1064	break;
1065	case COMPLEX_BRACKET:
1066	return;
1067	case SIMPLE_BRACKET:
1068	/* Just double check. The non-ASCII range starts at 0x80. */
1069	assert (0x80 % BITSET_WORD_BITS == 0);
1070	for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1071	if (dfa->nodes[node].opr.sbcset[i])
1072	return;
1073	break;
1074	default:
1075	abort ();
1076	}
1077
1078	if (mb_chars \|\| has_period)
1079	for (node = 0; node < dfa->nodes_len; ++node)
1080	{
1081	if (dfa->nodes[node].type == CHARACTER
1082	&& dfa->nodes[node].opr.c >= 0x80)
1083	dfa->nodes[node].mb_partial = 0;
1084	else if (dfa->nodes[node].type == OP_PERIOD)
1085	dfa->nodes[node].type = OP_UTF8_PERIOD;
1086	}
1087
1088	/* The search can be in single byte locale. */
1089	dfa->mb_cur_max = 1;
1090	dfa->is_utf8 = 0;
1091	dfa->has_mb_node = dfa->nbackref > 0 \|\| has_period;
1092	}
1093	#endif
1094
1095
1096	/* Analyze the structure tree, and calculate "first", "next", "edest",
1097	"eclosure", and "inveclosure". */
1098
1099	static reg_errcode_t
1100	analyze (regex_t *preg)
1101	{
1102	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1103	reg_errcode_t ret;
1104
1105	/* Allocate arrays. */
1106	dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1107	dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1108	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1109	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1110	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
1111	\|\| dfa->eclosures == NULL, 0))
1112	return REG_ESPACE;
1113
1114	dfa->subexp_map = re_malloc (int, preg->re_nsub);
1115	if (dfa->subexp_map != NULL)
1116	{
1117	int i;
1118	for (i = 0; i < preg->re_nsub; i++)
1119	dfa->subexp_map[i] = i;
1120	preorder (dfa->str_tree, optimize_subexps, dfa);
1121	for (i = 0; i < preg->re_nsub; i++)
1122	if (dfa->subexp_map[i] != i)
1123	break;
1124	if (i == preg->re_nsub)
1125	{
1126	free (dfa->subexp_map);
1127	dfa->subexp_map = NULL;
1128	}
1129	}
1130
1131	ret = postorder (dfa->str_tree, lower_subexps, preg);
1132	if (BE (ret != REG_NOERROR, 0))
1133	return ret;
1134	ret = postorder (dfa->str_tree, calc_first, dfa);
1135	if (BE (ret != REG_NOERROR, 0))
1136	return ret;
1137	preorder (dfa->str_tree, calc_next, dfa);
1138	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1139	if (BE (ret != REG_NOERROR, 0))
1140	return ret;
1141	ret = calc_eclosure (dfa);
1142	if (BE (ret != REG_NOERROR, 0))
1143	return ret;
1144
1145	/* We only need this during the prune_impossible_nodes pass in regexec.c;
1146	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1147	if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1148	\|\| dfa->nbackref)
1149	{
1150	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1151	if (BE (dfa->inveclosures == NULL, 0))
1152	return REG_ESPACE;
1153	ret = calc_inveclosure (dfa);
1154	}
1155
1156	return ret;
1157	}
1158
1159	/* Our parse trees are very unbalanced, so we cannot use a stack to
1160	implement parse tree visits. Instead, we use parent pointers and
1161	some hairy code in these two functions. */
1162	static reg_errcode_t
1163	postorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1164	void *extra)
1165	{
1166	bin_tree_t node, prev;
1167
1168	for (node = root; ; )
1169	{
1170	/* Descend down the tree, preferably to the left (or to the right
1171	if that's the only child). */
1172	while (node->left \|\| node->right)
1173	if (node->left)
1174	node = node->left;
1175	else
1176	node = node->right;
1177
1178	do
1179	{
1180	reg_errcode_t err = fn (extra, node);
1181	if (BE (err != REG_NOERROR, 0))
1182	return err;
1183	if (node->parent == NULL)
1184	return REG_NOERROR;
1185	prev = node;
1186	node = node->parent;
1187	}
1188	/* Go up while we have a node that is reached from the right. */
1189	while (node->right == prev \|\| node->right == NULL);
1190	node = node->right;
1191	}
1192	}
1193
1194	static reg_errcode_t
1195	preorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1196	void *extra)
1197	{
1198	bin_tree_t *node;
1199
1200	for (node = root; ; )
1201	{
1202	reg_errcode_t err = fn (extra, node);
1203	if (BE (err != REG_NOERROR, 0))
1204	return err;
1205
1206	/* Go to the left node, or up and to the right. */
1207	if (node->left)
1208	node = node->left;
1209	else
1210	{
1211	bin_tree_t *prev = NULL;
1212	while (node->right == prev \|\| node->right == NULL)
1213	{
1214	prev = node;
1215	node = node->parent;
1216	if (!node)
1217	return REG_NOERROR;
1218	}
1219	node = node->right;
1220	}
1221	}
1222	}
1223
1224	/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1225	re_search_internal to map the inner one's opr.idx to this one's. Adjust
1226	backreferences as well. Requires a preorder visit. */
1227	static reg_errcode_t
1228	optimize_subexps (void extra, bin_tree_t node)
1229	{
1230	re_dfa_t dfa = (re_dfa_t ) extra;
1231
1232	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1233	{
1234	int idx = node->token.opr.idx;
1235	node->token.opr.idx = dfa->subexp_map[idx];
1236	dfa->used_bkref_map \|= 1 << node->token.opr.idx;
1237	}
1238
1239	else if (node->token.type == SUBEXP
1240	&& node->left && node->left->token.type == SUBEXP)
1241	{
1242	int other_idx = node->left->token.opr.idx;
1243
1244	node->left = node->left->left;
1245	if (node->left)
1246	node->left->parent = node;
1247
1248	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1249	if (other_idx < BITSET_WORD_BITS)
1250	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1251	}
1252
1253	return REG_NOERROR;
1254	}
1255
1256	/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1257	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1258	static reg_errcode_t
1259	lower_subexps (void extra, bin_tree_t node)
1260	{
1261	regex_t preg = (regex_t ) extra;
1262	reg_errcode_t err = REG_NOERROR;
1263
1264	if (node->left && node->left->token.type == SUBEXP)
1265	{
1266	node->left = lower_subexp (&err, preg, node->left);
1267	if (node->left)
1268	node->left->parent = node;
1269	}
1270	if (node->right && node->right->token.type == SUBEXP)
1271	{
1272	node->right = lower_subexp (&err, preg, node->right);
1273	if (node->right)
1274	node->right->parent = node;
1275	}
1276
1277	return err;
1278	}
1279
1280	static bin_tree_t *
1281	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
1282	{
1283	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1284	bin_tree_t *body = node->left;
1285	bin_tree_t op, cls, tree1, tree;
1286
1287	if (preg->no_sub
1288	/* We do not optimize empty subexpressions, because otherwise we may
1289	have bad CONCAT nodes with NULL children. This is obviously not
1290	very common, so we do not lose much. An example that triggers
1291	this case is the sed "script" /\(\)/x. */
1292	&& node->left != NULL
1293	&& (node->token.opr.idx >= BITSET_WORD_BITS
1294	\|\| !(dfa->used_bkref_map
1295	& ((bitset_word_t) 1 << node->token.opr.idx))))
1296	return node->left;
1297
1298	/* Convert the SUBEXP node to the concatenation of an
1299	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1300	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1301	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1302	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1303	tree = create_tree (dfa, op, tree1, CONCAT);
1304	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, 0))
1305	{
1306	*err = REG_ESPACE;
1307	return NULL;
1308	}
1309
1310	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1311	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1312	return tree;
1313	}
1314
1315	/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1316	nodes. Requires a postorder visit. */
1317	static reg_errcode_t
1318	calc_first (void extra, bin_tree_t node)
1319	{
1320	re_dfa_t dfa = (re_dfa_t ) extra;
1321	if (node->token.type == CONCAT)
1322	{
1323	node->first = node->left->first;
1324	node->node_idx = node->left->node_idx;
1325	}
1326	else
1327	{
1328	node->first = node;
1329	node->node_idx = re_dfa_add_node (dfa, node->token);
1330	if (BE (node->node_idx == -1, 0))
1331	return REG_ESPACE;
1332	}
1333	return REG_NOERROR;
1334	}
1335
1336	/* Pass 2: compute NEXT on the tree. Preorder visit. */
1337	static reg_errcode_t
1338	calc_next (void extra, bin_tree_t node)
1339	{
1340	switch (node->token.type)
1341	{
1342	case OP_DUP_ASTERISK:
1343	node->left->next = node;
1344	break;
1345	case CONCAT:
1346	node->left->next = node->right->first;
1347	node->right->next = node->next;
1348	break;
1349	default:
1350	if (node->left)
1351	node->left->next = node->next;
1352	if (node->right)
1353	node->right->next = node->next;
1354	break;
1355	}
1356	return REG_NOERROR;
1357	}
1358
1359	/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1360	static reg_errcode_t
1361	link_nfa_nodes (void extra, bin_tree_t node)
1362	{
1363	re_dfa_t dfa = (re_dfa_t ) extra;
1364	int idx = node->node_idx;
1365	reg_errcode_t err = REG_NOERROR;
1366
1367	switch (node->token.type)
1368	{
1369	case CONCAT:
1370	break;
1371
1372	case END_OF_RE:
1373	assert (node->next == NULL);
1374	break;
1375
1376	case OP_DUP_ASTERISK:
1377	case OP_ALT:
1378	{
1379	int left, right;
1380	dfa->has_plural_match = 1;
1381	if (node->left != NULL)
1382	left = node->left->first->node_idx;
1383	else
1384	left = node->next->node_idx;
1385	if (node->right != NULL)
1386	right = node->right->first->node_idx;
1387	else
1388	right = node->next->node_idx;
1389	assert (left > -1);
1390	assert (right > -1);
1391	err = re_node_set_init_2 (dfa->edests + idx, left, right);
1392	}
1393	break;
1394
1395	case ANCHOR:
1396	case OP_OPEN_SUBEXP:
1397	case OP_CLOSE_SUBEXP:
1398	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1399	break;
1400
1401	case OP_BACK_REF:
1402	dfa->nexts[idx] = node->next->node_idx;
1403	if (node->token.type == OP_BACK_REF)
1404	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1405	break;
1406
1407	default:
1408	assert (!IS_EPSILON_NODE (node->token.type));
1409	dfa->nexts[idx] = node->next->node_idx;
1410	break;
1411	}
1412
1413	return err;
1414	}
1415
1416	/* Duplicate the epsilon closure of the node ROOT_NODE.
1417	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1418	to their own constraint. */
1419
1420	static reg_errcode_t
1421	internal_function
1422	duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1423	int root_node, unsigned int init_constraint)
1424	{
1425	int org_node, clone_node, ret;
1426	unsigned int constraint = init_constraint;
1427	for (org_node = top_org_node, clone_node = top_clone_node;;)
1428	{
1429	int org_dest, clone_dest;
1430	if (dfa->nodes[org_node].type == OP_BACK_REF)
1431	{
1432	/* If the back reference epsilon-transit, its destination must
1433	also have the constraint. Then duplicate the epsilon closure
1434	of the destination of the back reference, and store it in
1435	edests of the back reference. */
1436	org_dest = dfa->nexts[org_node];
1437	re_node_set_empty (dfa->edests + clone_node);
1438	clone_dest = duplicate_node (dfa, org_dest, constraint);
1439	if (BE (clone_dest == -1, 0))
1440	return REG_ESPACE;
1441	dfa->nexts[clone_node] = dfa->nexts[org_node];
1442	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1443	if (BE (ret < 0, 0))
1444	return REG_ESPACE;
1445	}
1446	else if (dfa->edests[org_node].nelem == 0)
1447	{
1448	/* In case of the node can't epsilon-transit, don't duplicate the
1449	destination and store the original destination as the
1450	destination of the node. */
1451	dfa->nexts[clone_node] = dfa->nexts[org_node];
1452	break;
1453	}
1454	else if (dfa->edests[org_node].nelem == 1)
1455	{
1456	/* In case of the node can epsilon-transit, and it has only one
1457	destination. */
1458	org_dest = dfa->edests[org_node].elems[0];
1459	re_node_set_empty (dfa->edests + clone_node);
1460	if (dfa->nodes[org_node].type == ANCHOR)
1461	{
1462	/* In case of the node has another constraint, append it. */
1463	if (org_node == root_node && clone_node != org_node)
1464	{
1465	/* ...but if the node is root_node itself, it means the
1466	epsilon closure have a loop, then tie it to the
1467	destination of the root_node. */
1468	ret = re_node_set_insert (dfa->edests + clone_node,
1469	org_dest);
1470	if (BE (ret < 0, 0))
1471	return REG_ESPACE;
1472	break;
1473	}
1474	constraint \|= dfa->nodes[org_node].opr.ctx_type;
1475	}
1476	clone_dest = duplicate_node (dfa, org_dest, constraint);
1477	if (BE (clone_dest == -1, 0))
1478	return REG_ESPACE;
1479	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1480	if (BE (ret < 0, 0))
1481	return REG_ESPACE;
1482	}
1483	else /* dfa->edests[org_node].nelem == 2 */
1484	{
1485	/* In case of the node can epsilon-transit, and it has two
1486	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1487	org_dest = dfa->edests[org_node].elems[0];
1488	re_node_set_empty (dfa->edests + clone_node);
1489	/* Search for a duplicated node which satisfies the constraint. */
1490	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1491	if (clone_dest == -1)
1492	{
1493	/* There are no such a duplicated node, create a new one. */
1494	reg_errcode_t err;
1495	clone_dest = duplicate_node (dfa, org_dest, constraint);
1496	if (BE (clone_dest == -1, 0))
1497	return REG_ESPACE;
1498	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1499	if (BE (ret < 0, 0))
1500	return REG_ESPACE;
1501	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1502	root_node, constraint);
1503	if (BE (err != REG_NOERROR, 0))
1504	return err;
1505	}
1506	else
1507	{
1508	/* There are a duplicated node which satisfy the constraint,
1509	use it to avoid infinite loop. */
1510	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1511	if (BE (ret < 0, 0))
1512	return REG_ESPACE;
1513	}
1514
1515	org_dest = dfa->edests[org_node].elems[1];
1516	clone_dest = duplicate_node (dfa, org_dest, constraint);
1517	if (BE (clone_dest == -1, 0))
1518	return REG_ESPACE;
1519	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1520	if (BE (ret < 0, 0))
1521	return REG_ESPACE;
1522	}
1523	org_node = org_dest;
1524	clone_node = clone_dest;
1525	}
1526	return REG_NOERROR;
1527	}
1528
1529	/* Search for a node which is duplicated from the node ORG_NODE, and
1530	satisfies the constraint CONSTRAINT. */
1531
1532	static int
1533	search_duplicated_node (const re_dfa_t *dfa, int org_node,
1534	unsigned int constraint)
1535	{
1536	int idx;
1537	for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1538	{
1539	if (org_node == dfa->org_indices[idx]
1540	&& constraint == dfa->nodes[idx].constraint)
1541	return idx; /* Found. */
1542	}
1543	return -1; /* Not found. */
1544	}
1545
1546	/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1547	Return the index of the new node, or -1 if insufficient storage is
1548	available. */
1549
1550	static int
1551	duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1552	{
1553	int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1554	if (BE (dup_idx != -1, 1))
1555	{
1556	dfa->nodes[dup_idx].constraint = constraint;
1557	if (dfa->nodes[org_idx].type == ANCHOR)
1558	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].opr.ctx_type;
1559	dfa->nodes[dup_idx].duplicated = 1;
1560
1561	/* Store the index of the original node. */
1562	dfa->org_indices[dup_idx] = org_idx;
1563	}
1564	return dup_idx;
1565	}
1566
1567	static reg_errcode_t
1568	calc_inveclosure (re_dfa_t *dfa)
1569	{
1570	int src, idx, ret;
1571	for (idx = 0; idx < dfa->nodes_len; ++idx)
1572	re_node_set_init_empty (dfa->inveclosures + idx);
1573
1574	for (src = 0; src < dfa->nodes_len; ++src)
1575	{
1576	int *elems = dfa->eclosures[src].elems;
1577	for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1578	{
1579	ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1580	if (BE (ret == -1, 0))
1581	return REG_ESPACE;
1582	}
1583	}
1584
1585	return REG_NOERROR;
1586	}
1587
1588	/* Calculate "eclosure" for all the node in DFA. */
1589
1590	static reg_errcode_t
1591	calc_eclosure (re_dfa_t *dfa)
1592	{
1593	int node_idx, incomplete;
1594	#ifdef DEBUG
1595	assert (dfa->nodes_len > 0);
1596	#endif
1597	incomplete = 0;
1598	/* For each nodes, calculate epsilon closure. */
1599	for (node_idx = 0; ; ++node_idx)
1600	{
1601	reg_errcode_t err;
1602	re_node_set eclosure_elem;
1603	if (node_idx == dfa->nodes_len)
1604	{
1605	if (!incomplete)
1606	break;
1607	incomplete = 0;
1608	node_idx = 0;
1609	}
1610
1611	#ifdef DEBUG
1612	assert (dfa->eclosures[node_idx].nelem != -1);
1613	#endif
1614
1615	/* If we have already calculated, skip it. */
1616	if (dfa->eclosures[node_idx].nelem != 0)
1617	continue;
1618	/* Calculate epsilon closure of `node_idx'. */
1619	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1620	if (BE (err != REG_NOERROR, 0))
1621	return err;
1622
1623	if (dfa->eclosures[node_idx].nelem == 0)
1624	{
1625	incomplete = 1;
1626	re_node_set_free (&eclosure_elem);
1627	}
1628	}
1629	return REG_NOERROR;
1630	}
1631
1632	/* Calculate epsilon closure of NODE. */
1633
1634	static reg_errcode_t
1635	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, int node, int root)
1636	{
1637	reg_errcode_t err;
1638	unsigned int constraint;
1639	int i, incomplete;
1640	re_node_set eclosure;
1641	incomplete = 0;
1642	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1643	if (BE (err != REG_NOERROR, 0))
1644	return err;
1645
1646	/* This indicates that we are calculating this node now.
1647	We reference this value to avoid infinite loop. */
1648	dfa->eclosures[node].nelem = -1;
1649
1650	constraint = ((dfa->nodes[node].type == ANCHOR)
1651	? dfa->nodes[node].opr.ctx_type : 0);
1652	/* If the current node has constraints, duplicate all nodes.
1653	Since they must inherit the constraints. */
1654	if (constraint
1655	&& dfa->edests[node].nelem
1656	&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1657	{
1658	int org_node, cur_node;
1659	org_node = cur_node = node;
1660	err = duplicate_node_closure (dfa, node, node, node, constraint);
1661	if (BE (err != REG_NOERROR, 0))
1662	return err;
1663	}
1664
1665	/* Expand each epsilon destination nodes. */
1666	if (IS_EPSILON_NODE(dfa->nodes[node].type))
1667	for (i = 0; i < dfa->edests[node].nelem; ++i)
1668	{
1669	re_node_set eclosure_elem;
1670	int edest = dfa->edests[node].elems[i];
1671	/* If calculating the epsilon closure of `edest' is in progress,
1672	return intermediate result. */
1673	if (dfa->eclosures[edest].nelem == -1)
1674	{
1675	incomplete = 1;
1676	continue;
1677	}
1678	/* If we haven't calculated the epsilon closure of `edest' yet,
1679	calculate now. Otherwise use calculated epsilon closure. */
1680	if (dfa->eclosures[edest].nelem == 0)
1681	{
1682	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1683	if (BE (err != REG_NOERROR, 0))
1684	return err;
1685	}
1686	else
1687	eclosure_elem = dfa->eclosures[edest];
1688	/* Merge the epsilon closure of `edest'. */
1689	re_node_set_merge (&eclosure, &eclosure_elem);
1690	/* If the epsilon closure of `edest' is incomplete,
1691	the epsilon closure of this node is also incomplete. */
1692	if (dfa->eclosures[edest].nelem == 0)
1693	{
1694	incomplete = 1;
1695	re_node_set_free (&eclosure_elem);
1696	}
1697	}
1698
1699	/* Epsilon closures include itself. */
1700	re_node_set_insert (&eclosure, node);
1701	if (incomplete && !root)
1702	dfa->eclosures[node].nelem = 0;
1703	else
1704	dfa->eclosures[node] = eclosure;
1705	*new_set = eclosure;
1706	return REG_NOERROR;
1707	}
1708
1709
1710	/* Functions for token which are used in the parser. */
1711
1712	/* Fetch a token from INPUT.
1713	We must not use this function inside bracket expressions. */
1714
1715	static void
1716	internal_function
1717	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
1718	{
1719	re_string_skip_bytes (input, peek_token (result, input, syntax));
1720	}
1721
1722	/* Peek a token from INPUT, and return the length of the token.
1723	We must not use this function inside bracket expressions. */
1724
1725	static int
1726	internal_function
1727	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
1728	{
1729	unsigned char c;
1730
1731	if (re_string_eoi (input))
1732	{
1733	token->type = END_OF_RE;
1734	return 0;
1735	}
1736
1737	c = re_string_peek_byte (input, 0);
1738	token->opr.c = c;
1739
1740	token->word_char = 0;
1741	#ifdef RE_ENABLE_I18N
1742	token->mb_partial = 0;
1743	if (input->mb_cur_max > 1 &&
1744	!re_string_first_byte (input, re_string_cur_idx (input)))
1745	{
1746	token->type = CHARACTER;
1747	token->mb_partial = 1;
1748	return 1;
1749	}
1750	#endif
1751	if (c == '\\')
1752	{
1753	unsigned char c2;
1754	if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1755	{
1756	token->type = BACK_SLASH;
1757	return 1;
1758	}
1759
1760	c2 = re_string_peek_byte_case (input, 1);
1761	token->opr.c = c2;
1762	token->type = CHARACTER;
1763	#ifdef RE_ENABLE_I18N
1764	if (input->mb_cur_max > 1)
1765	{
1766	wint_t wc = re_string_wchar_at (input,
1767	re_string_cur_idx (input) + 1);
1768	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1769	}
1770	else
1771	#endif
1772	token->word_char = IS_WORD_CHAR (c2) != 0;
1773
1774	switch (c2)
1775	{
1776	case '\|':
1777	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1778	token->type = OP_ALT;
1779	break;
1780	case '1': case '2': case '3': case '4': case '5':
1781	case '6': case '7': case '8': case '9':
1782	if (!(syntax & RE_NO_BK_REFS))
1783	{
1784	token->type = OP_BACK_REF;
1785	token->opr.idx = c2 - '1';
1786	}
1787	break;
1788	case '<':
1789	if (!(syntax & RE_NO_GNU_OPS))
1790	{
1791	token->type = ANCHOR;
1792	token->opr.ctx_type = WORD_FIRST;
1793	}
1794	break;
1795	case '>':
1796	if (!(syntax & RE_NO_GNU_OPS))
1797	{
1798	token->type = ANCHOR;
1799	token->opr.ctx_type = WORD_LAST;
1800	}
1801	break;
1802	case 'b':
1803	if (!(syntax & RE_NO_GNU_OPS))
1804	{
1805	token->type = ANCHOR;
1806	token->opr.ctx_type = WORD_DELIM;
1807	}
1808	break;
1809	case 'B':
1810	if (!(syntax & RE_NO_GNU_OPS))
1811	{
1812	token->type = ANCHOR;
1813	token->opr.ctx_type = NOT_WORD_DELIM;
1814	}
1815	break;
1816	case 'w':
1817	if (!(syntax & RE_NO_GNU_OPS))
1818	token->type = OP_WORD;
1819	break;
1820	case 'W':
1821	if (!(syntax & RE_NO_GNU_OPS))
1822	token->type = OP_NOTWORD;
1823	break;
1824	case 's':
1825	if (!(syntax & RE_NO_GNU_OPS))
1826	token->type = OP_SPACE;
1827	break;
1828	case 'S':
1829	if (!(syntax & RE_NO_GNU_OPS))
1830	token->type = OP_NOTSPACE;
1831	break;
1832	case '`':
1833	if (!(syntax & RE_NO_GNU_OPS))
1834	{
1835	token->type = ANCHOR;
1836	token->opr.ctx_type = BUF_FIRST;
1837	}
1838	break;
1839	case '\'':
1840	if (!(syntax & RE_NO_GNU_OPS))
1841	{
1842	token->type = ANCHOR;
1843	token->opr.ctx_type = BUF_LAST;
1844	}
1845	break;
1846	case '(':
1847	if (!(syntax & RE_NO_BK_PARENS))
1848	token->type = OP_OPEN_SUBEXP;
1849	break;
1850	case ')':
1851	if (!(syntax & RE_NO_BK_PARENS))
1852	token->type = OP_CLOSE_SUBEXP;
1853	break;
1854	case '+':
1855	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1856	token->type = OP_DUP_PLUS;
1857	break;
1858	case '?':
1859	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1860	token->type = OP_DUP_QUESTION;
1861	break;
1862	case '{':
1863	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1864	token->type = OP_OPEN_DUP_NUM;
1865	break;
1866	case '}':
1867	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1868	token->type = OP_CLOSE_DUP_NUM;
1869	break;
1870	default:
1871	break;
1872	}
1873	return 2;
1874	}
1875
1876	token->type = CHARACTER;
1877	#ifdef RE_ENABLE_I18N
1878	if (input->mb_cur_max > 1)
1879	{
1880	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1881	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1882	}
1883	else
1884	#endif
1885	token->word_char = IS_WORD_CHAR (token->opr.c);
1886
1887	switch (c)
1888	{
1889	case '\n':
1890	if (syntax & RE_NEWLINE_ALT)
1891	token->type = OP_ALT;
1892	break;
1893	case '\|':
1894	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1895	token->type = OP_ALT;
1896	break;
1897	case '*':
1898	token->type = OP_DUP_ASTERISK;
1899	break;
1900	case '+':
1901	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1902	token->type = OP_DUP_PLUS;
1903	break;
1904	case '?':
1905	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1906	token->type = OP_DUP_QUESTION;
1907	break;
1908	case '{':
1909	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1910	token->type = OP_OPEN_DUP_NUM;
1911	break;
1912	case '}':
1913	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1914	token->type = OP_CLOSE_DUP_NUM;
1915	break;
1916	case '(':
1917	if (syntax & RE_NO_BK_PARENS)
1918	token->type = OP_OPEN_SUBEXP;
1919	break;
1920	case ')':
1921	if (syntax & RE_NO_BK_PARENS)
1922	token->type = OP_CLOSE_SUBEXP;
1923	break;
1924	case '[':
1925	token->type = OP_OPEN_BRACKET;
1926	break;
1927	case '.':
1928	token->type = OP_PERIOD;
1929	break;
1930	case '^':
1931	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
1932	re_string_cur_idx (input) != 0)
1933	{
1934	char prev = re_string_peek_byte (input, -1);
1935	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != '\n')
1936	break;
1937	}
1938	token->type = ANCHOR;
1939	token->opr.ctx_type = LINE_FIRST;
1940	break;
1941	case '$':
1942	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1943	re_string_cur_idx (input) + 1 != re_string_length (input))
1944	{
1945	re_token_t next;
1946	re_string_skip_bytes (input, 1);
1947	peek_token (&next, input, syntax);
1948	re_string_skip_bytes (input, -1);
1949	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1950	break;
1951	}
1952	token->type = ANCHOR;
1953	token->opr.ctx_type = LINE_LAST;
1954	break;
1955	default:
1956	break;
1957	}
1958	return 1;
1959	}
1960
1961	/* Peek a token from INPUT, and return the length of the token.
1962	We must not use this function out of bracket expressions. */
1963
1964	static int
1965	internal_function
1966	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
1967	{
1968	unsigned char c;
1969	if (re_string_eoi (input))
1970	{
1971	token->type = END_OF_RE;
1972	return 0;
1973	}
1974	c = re_string_peek_byte (input, 0);
1975	token->opr.c = c;
1976
1977	#ifdef RE_ENABLE_I18N
1978	if (input->mb_cur_max > 1 &&
1979	!re_string_first_byte (input, re_string_cur_idx (input)))
1980	{
1981	token->type = CHARACTER;
1982	return 1;
1983	}
1984	#endif /* RE_ENABLE_I18N */
1985
1986	if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
1987	&& re_string_cur_idx (input) + 1 < re_string_length (input))
1988	{
1989	/* In this case, '\' escape a character. */
1990	unsigned char c2;
1991	re_string_skip_bytes (input, 1);
1992	c2 = re_string_peek_byte (input, 0);
1993	token->opr.c = c2;
1994	token->type = CHARACTER;
1995	return 1;
1996	}
1997	if (c == '[') /* '[' is a special char in a bracket exps. */
1998	{
1999	unsigned char c2;
2000	int token_len;
2001	if (re_string_cur_idx (input) + 1 < re_string_length (input))
2002	c2 = re_string_peek_byte (input, 1);
2003	else
2004	c2 = 0;
2005	token->opr.c = c2;
2006	token_len = 2;
2007	switch (c2)
2008	{
2009	case '.':
2010	token->type = OP_OPEN_COLL_ELEM;
2011	break;
2012	case '=':
2013	token->type = OP_OPEN_EQUIV_CLASS;
2014	break;
2015	case ':':
2016	if (syntax & RE_CHAR_CLASSES)
2017	{
2018	token->type = OP_OPEN_CHAR_CLASS;
2019	break;
2020	}
2021	/* else fall through. */
2022	default:
2023	token->type = CHARACTER;
2024	token->opr.c = c;
2025	token_len = 1;
2026	break;
2027	}
2028	return token_len;
2029	}
2030	switch (c)
2031	{
2032	case '-':
2033	token->type = OP_CHARSET_RANGE;
2034	break;
2035	case ']':
2036	token->type = OP_CLOSE_BRACKET;
2037	break;
2038	case '^':
2039	token->type = OP_NON_MATCH_LIST;
2040	break;
2041	default:
2042	token->type = CHARACTER;
2043	}
2044	return 1;
2045	}
2046
2047
2048	/* Functions for parser. */
2049
2050	/* Entry point of the parser.
2051	Parse the regular expression REGEXP and return the structure tree.
2052	If an error is occured, ERR is set by error code, and return NULL.
2053	This function build the following tree, from regular expression <reg_exp>:
2054	CAT
2055	/ \
2056	/ \
2057	<reg_exp> EOR
2058
2059	CAT means concatenation.
2060	EOR means end of regular expression. */
2061
2062	static bin_tree_t *
2063	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
2064	reg_errcode_t *err)
2065	{
2066	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2067	bin_tree_t tree, eor, *root;
2068	re_token_t current_token;
2069	dfa->syntax = syntax;
2070	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2071	tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2072	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2073	return NULL;
2074	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2075	if (tree != NULL)
2076	root = create_tree (dfa, tree, eor, CONCAT);
2077	else
2078	root = eor;
2079	if (BE (eor == NULL \|\| root == NULL, 0))
2080	{
2081	*err = REG_ESPACE;
2082	return NULL;
2083	}
2084	return root;
2085	}
2086
2087	/* This function build the following tree, from regular expression
2088	<branch1>\|<branch2>:
2089	ALT
2090	/ \
2091	/ \
2092	<branch1> <branch2>
2093
2094	ALT means alternative, which represents the operator `\|'. */
2095
2096	static bin_tree_t *
2097	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2098	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2099	{
2100	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2101	bin_tree_t tree, branch = NULL;
2102	tree = parse_branch (regexp, preg, token, syntax, nest, err);
2103	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2104	return NULL;
2105
2106	while (token->type == OP_ALT)
2107	{
2108	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2109	if (token->type != OP_ALT && token->type != END_OF_RE
2110	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2111	{
2112	branch = parse_branch (regexp, preg, token, syntax, nest, err);
2113	if (BE (*err != REG_NOERROR && branch == NULL, 0))
2114	return NULL;
2115	}
2116	else
2117	branch = NULL;
2118	tree = create_tree (dfa, tree, branch, OP_ALT);
2119	if (BE (tree == NULL, 0))
2120	{
2121	*err = REG_ESPACE;
2122	return NULL;
2123	}
2124	}
2125	return tree;
2126	}
2127
2128	/* This function build the following tree, from regular expression
2129	<exp1><exp2>:
2130	CAT
2131	/ \
2132	/ \
2133	<exp1> <exp2>
2134
2135	CAT means concatenation. */
2136
2137	static bin_tree_t *
2138	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
2139	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2140	{
2141	bin_tree_t tree, exp;
2142	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2143	tree = parse_expression (regexp, preg, token, syntax, nest, err);
2144	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2145	return NULL;
2146
2147	while (token->type != OP_ALT && token->type != END_OF_RE
2148	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2149	{
2150	exp = parse_expression (regexp, preg, token, syntax, nest, err);
2151	if (BE (*err != REG_NOERROR && exp == NULL, 0))
2152	{
2153	return NULL;
2154	}
2155	if (tree != NULL && exp != NULL)
2156	{
2157	tree = create_tree (dfa, tree, exp, CONCAT);
2158	if (tree == NULL)
2159	{
2160	*err = REG_ESPACE;
2161	return NULL;
2162	}
2163	}
2164	else if (tree == NULL)
2165	tree = exp;
2166	/* Otherwise exp == NULL, we don't need to create new tree. */
2167	}
2168	return tree;
2169	}
2170
2171	/* This function build the following tree, from regular expression a*:
2172	*
2173	\|
2174	a
2175	*/
2176
2177	static bin_tree_t *
2178	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
2179	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2180	{
2181	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2182	bin_tree_t *tree;
2183	switch (token->type)
2184	{
2185	case CHARACTER:
2186	tree = create_token_tree (dfa, NULL, NULL, token);
2187	if (BE (tree == NULL, 0))
2188	{
2189	*err = REG_ESPACE;
2190	return NULL;
2191	}
2192	#ifdef RE_ENABLE_I18N
2193	if (dfa->mb_cur_max > 1)
2194	{
2195	while (!re_string_eoi (regexp)
2196	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2197	{
2198	bin_tree_t *mbc_remain;
2199	fetch_token (token, regexp, syntax);
2200	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2201	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2202	if (BE (mbc_remain == NULL \|\| tree == NULL, 0))
2203	{
2204	*err = REG_ESPACE;
2205	return NULL;
2206	}
2207	}
2208	}
2209	#endif
2210	break;
2211	case OP_OPEN_SUBEXP:
2212	tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2213	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2214	return NULL;
2215	break;
2216	case OP_OPEN_BRACKET:
2217	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2218	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2219	return NULL;
2220	break;
2221	case OP_BACK_REF:
2222	if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2223	{
2224	*err = REG_ESUBREG;
2225	return NULL;
2226	}
2227	dfa->used_bkref_map \|= 1 << token->opr.idx;
2228	tree = create_token_tree (dfa, NULL, NULL, token);
2229	if (BE (tree == NULL, 0))
2230	{
2231	*err = REG_ESPACE;
2232	return NULL;
2233	}
2234	++dfa->nbackref;
2235	dfa->has_mb_node = 1;
2236	break;
2237	case OP_OPEN_DUP_NUM:
2238	if (syntax & RE_CONTEXT_INVALID_DUP)
2239	{
2240	*err = REG_BADRPT;
2241	return NULL;
2242	}
2243	/* FALLTHROUGH */
2244	case OP_DUP_ASTERISK:
2245	case OP_DUP_PLUS:
2246	case OP_DUP_QUESTION:
2247	if (syntax & RE_CONTEXT_INVALID_OPS)
2248	{
2249	*err = REG_BADRPT;
2250	return NULL;
2251	}
2252	else if (syntax & RE_CONTEXT_INDEP_OPS)
2253	{
2254	fetch_token (token, regexp, syntax);
2255	return parse_expression (regexp, preg, token, syntax, nest, err);
2256	}
2257	/* else fall through */
2258	case OP_CLOSE_SUBEXP:
2259	if ((token->type == OP_CLOSE_SUBEXP) &&
2260	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2261	{
2262	*err = REG_ERPAREN;
2263	return NULL;
2264	}
2265	/* else fall through */
2266	case OP_CLOSE_DUP_NUM:
2267	/* We treat it as a normal character. */
2268
2269	/* Then we can these characters as normal characters. */
2270	token->type = CHARACTER;
2271	/* mb_partial and word_char bits should be initialized already
2272	by peek_token. */
2273	tree = create_token_tree (dfa, NULL, NULL, token);
2274	if (BE (tree == NULL, 0))
2275	{
2276	*err = REG_ESPACE;
2277	return NULL;
2278	}
2279	break;
2280	case ANCHOR:
2281	if ((token->opr.ctx_type
2282	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2283	&& dfa->word_ops_used == 0)
2284	init_word_char (dfa);
2285	if (token->opr.ctx_type == WORD_DELIM
2286	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
2287	{
2288	bin_tree_t tree_first, tree_last;
2289	if (token->opr.ctx_type == WORD_DELIM)
2290	{
2291	token->opr.ctx_type = WORD_FIRST;
2292	tree_first = create_token_tree (dfa, NULL, NULL, token);
2293	token->opr.ctx_type = WORD_LAST;
2294	}
2295	else
2296	{
2297	token->opr.ctx_type = INSIDE_WORD;
2298	tree_first = create_token_tree (dfa, NULL, NULL, token);
2299	token->opr.ctx_type = INSIDE_NOTWORD;
2300	}
2301	tree_last = create_token_tree (dfa, NULL, NULL, token);
2302	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2303	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, 0))
2304	{
2305	*err = REG_ESPACE;
2306	return NULL;
2307	}
2308	}
2309	else
2310	{
2311	tree = create_token_tree (dfa, NULL, NULL, token);
2312	if (BE (tree == NULL, 0))
2313	{
2314	*err = REG_ESPACE;
2315	return NULL;
2316	}
2317	}
2318	/* We must return here, since ANCHORs can't be followed
2319	by repetition operators.
2320	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
2321	it must not be "<ANCHOR(^)><REPEAT()>". /
2322	fetch_token (token, regexp, syntax);
2323	return tree;
2324	case OP_PERIOD:
2325	tree = create_token_tree (dfa, NULL, NULL, token);
2326	if (BE (tree == NULL, 0))
2327	{
2328	*err = REG_ESPACE;
2329	return NULL;
2330	}
2331	if (dfa->mb_cur_max > 1)
2332	dfa->has_mb_node = 1;
2333	break;
2334	case OP_WORD:
2335	case OP_NOTWORD:
2336	tree = build_charclass_op (dfa, regexp->trans,
2337	(const unsigned char *) "alnum",
2338	(const unsigned char *) "_",
2339	token->type == OP_NOTWORD, err);
2340	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2341	return NULL;
2342	break;
2343	case OP_SPACE:
2344	case OP_NOTSPACE:
2345	tree = build_charclass_op (dfa, regexp->trans,
2346	(const unsigned char *) "space",
2347	(const unsigned char *) "",
2348	token->type == OP_NOTSPACE, err);
2349	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2350	return NULL;
2351	break;
2352	case OP_ALT:
2353	case END_OF_RE:
2354	return NULL;
2355	case BACK_SLASH:
2356	*err = REG_EESCAPE;
2357	return NULL;
2358	default:
2359	/* Must not happen? */
2360	#ifdef DEBUG
2361	assert (0);
2362	#endif
2363	return NULL;
2364	}
2365	fetch_token (token, regexp, syntax);
2366
2367	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
2368	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
2369	{
2370	tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2371	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2372	return NULL;
2373	/* In BRE consecutive duplications are not allowed. */
2374	if ((syntax & RE_CONTEXT_INVALID_DUP)
2375	&& (token->type == OP_DUP_ASTERISK
2376	\|\| token->type == OP_OPEN_DUP_NUM))
2377	{
2378	*err = REG_BADRPT;
2379	return NULL;
2380	}
2381	}
2382
2383	return tree;
2384	}
2385
2386	/* This function build the following tree, from regular expression
2387	(<reg_exp>):
2388	SUBEXP
2389	\|
2390	<reg_exp>
2391	*/
2392
2393	static bin_tree_t *
2394	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2395	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2396	{
2397	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2398	bin_tree_t *tree;
2399	size_t cur_nsub;
2400	cur_nsub = preg->re_nsub++;
2401
2402	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2403
2404	/* The subexpression may be a null string. */
2405	if (token->type == OP_CLOSE_SUBEXP)
2406	tree = NULL;
2407	else
2408	{
2409	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2410	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2411	*err = REG_EPAREN;
2412	if (BE (*err != REG_NOERROR, 0))
2413	return NULL;
2414	}
2415
2416	if (cur_nsub <= '9' - '1')
2417	dfa->completed_bkref_map \|= 1 << cur_nsub;
2418
2419	tree = create_tree (dfa, tree, NULL, SUBEXP);
2420	if (BE (tree == NULL, 0))
2421	{
2422	*err = REG_ESPACE;
2423	return NULL;
2424	}
2425	tree->token.opr.idx = cur_nsub;
2426	return tree;
2427	}
2428
2429	/* This function parse repetition operators like "", "+", "{1,3}" etc. /
2430
2431	static bin_tree_t *
2432	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
2433	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
2434	{
2435	bin_tree_t tree = NULL, old_tree = NULL;
2436	int i, start, end, start_idx = re_string_cur_idx (regexp);
2437	re_token_t start_token = *token;
2438
2439	if (token->type == OP_OPEN_DUP_NUM)
2440	{
2441	end = 0;
2442	start = fetch_number (regexp, token, syntax);
2443	if (start == -1)
2444	{
2445	if (token->type == CHARACTER && token->opr.c == ',')
2446	start = 0; /* We treat "{,m}" as "{0,m}". */
2447	else
2448	{
2449	err = REG_BADBR; / <re>{} is invalid. */
2450	return NULL;
2451	}
2452	}
2453	if (BE (start != -2, 1))
2454	{
2455	/* We treat "{n}" as "{n,n}". */
2456	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2457	: ((token->type == CHARACTER && token->opr.c == ',')
2458	? fetch_number (regexp, token, syntax) : -2));
2459	}
2460	if (BE (start == -2 \|\| end == -2, 0))
2461	{
2462	/* Invalid sequence. */
2463	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2464	{
2465	if (token->type == END_OF_RE)
2466	*err = REG_EBRACE;
2467	else
2468	*err = REG_BADBR;
2469
2470	return NULL;
2471	}
2472
2473	/* If the syntax bit is set, rollback. */
2474	re_string_set_index (regexp, start_idx);
2475	*token = start_token;
2476	token->type = CHARACTER;
2477	/* mb_partial and word_char bits should be already initialized by
2478	peek_token. */
2479	return elem;
2480	}
2481
2482	if (BE (end != -1 && start > end, 0))
2483	{
2484	/* First number greater than second. */
2485	*err = REG_BADBR;
2486	return NULL;
2487	}
2488	}
2489	else
2490	{
2491	start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2492	end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2493	}
2494
2495	fetch_token (token, regexp, syntax);
2496
2497	if (BE (elem == NULL, 0))
2498	return NULL;
2499	if (BE (start == 0 && end == 0, 0))
2500	{
2501	postorder (elem, free_tree, NULL);
2502	return NULL;
2503	}
2504
2505	/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2506	if (BE (start > 0, 0))
2507	{
2508	tree = elem;
2509	for (i = 2; i <= start; ++i)
2510	{
2511	elem = duplicate_tree (elem, dfa);
2512	tree = create_tree (dfa, tree, elem, CONCAT);
2513	if (BE (elem == NULL \|\| tree == NULL, 0))
2514	goto parse_dup_op_espace;
2515	}
2516
2517	if (start == end)
2518	return tree;
2519
2520	/* Duplicate ELEM before it is marked optional. */
2521	elem = duplicate_tree (elem, dfa);
2522	old_tree = tree;
2523	}
2524	else
2525	old_tree = NULL;
2526
2527	if (elem->token.type == SUBEXP)
2528	postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2529
2530	tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2531	if (BE (tree == NULL, 0))
2532	goto parse_dup_op_espace;
2533
2534	/* This loop is actually executed only when end != -1,
2535	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2536	already created the start+1-th copy. */
2537	for (i = start + 2; i <= end; ++i)
2538	{
2539	elem = duplicate_tree (elem, dfa);
2540	tree = create_tree (dfa, tree, elem, CONCAT);
2541	if (BE (elem == NULL \|\| tree == NULL, 0))
2542	goto parse_dup_op_espace;
2543
2544	tree = create_tree (dfa, tree, NULL, OP_ALT);
2545	if (BE (tree == NULL, 0))
2546	goto parse_dup_op_espace;
2547	}
2548
2549	if (old_tree)
2550	tree = create_tree (dfa, old_tree, tree, CONCAT);
2551
2552	return tree;
2553
2554	parse_dup_op_espace:
2555	*err = REG_ESPACE;
2556	return NULL;
2557	}
2558
2559	/* Size of the names for collating symbol/equivalence_class/character_class.
2560	I'm not sure, but maybe enough. */
2561	#define BRACKET_NAME_BUF_SIZE 32
2562
2563	#ifndef _LIBC
2564	/* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2565	Build the range expression which starts from START_ELEM, and ends
2566	at END_ELEM. The result are written to MBCSET and SBCSET.
2567	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2568	mbcset->range_ends, is a pointer argument sinse we may
2569	update it. */
2570
2571	static reg_errcode_t
2572	internal_function
2573	# ifdef RE_ENABLE_I18N
2574	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int range_alloc,
2575	bracket_elem_t start_elem, bracket_elem_t end_elem)
2576	# else /* not RE_ENABLE_I18N */
2577	build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2578	bracket_elem_t *end_elem)
2579	# endif /* not RE_ENABLE_I18N */
2580	{
2581	unsigned int start_ch, end_ch;
2582	/* Equivalence Classes and Character Classes can't be a range start/end. */
2583	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2584	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2585	0))
2586	return REG_ERANGE;
2587
2588	/* We can handle no multi character collating elements without libc
2589	support. */
2590	if (BE ((start_elem->type == COLL_SYM
2591	&& strlen ((char *) start_elem->opr.name) > 1)
2592	\|\| (end_elem->type == COLL_SYM
2593	&& strlen ((char *) end_elem->opr.name) > 1), 0))
2594	return REG_ECOLLATE;
2595
2596	# ifdef RE_ENABLE_I18N
2597	{
2598	wchar_t wc;
2599	wint_t start_wc;
2600	wint_t end_wc;
2601	wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2602
2603	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2604	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2605	: 0));
2606	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2607	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2608	: 0));
2609	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
2610	? __btowc (start_ch) : start_elem->opr.wch);
2611	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
2612	? __btowc (end_ch) : end_elem->opr.wch);
2613	if (start_wc == WEOF \|\| end_wc == WEOF)
2614	return REG_ECOLLATE;
2615	cmp_buf[0] = start_wc;
2616	cmp_buf[4] = end_wc;
2617	if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2618	return REG_ERANGE;
2619
2620	/* Got valid collation sequence values, add them as a new entry.
2621	However, for !_LIBC we have no collation elements: if the
2622	character set is single byte, the single byte character set
2623	that we build below suffices. parse_bracket_exp passes
2624	no MBCSET if dfa->mb_cur_max == 1. */
2625	if (mbcset)
2626	{
2627	/* Check the space of the arrays. */
2628	if (BE (*range_alloc == mbcset->nranges, 0))
2629	{
2630	/* There is not enough space, need realloc. */
2631	wchar_t new_array_start, new_array_end;
2632	int new_nranges;
2633
2634	/* +1 in case of mbcset->nranges is 0. */
2635	new_nranges = 2 * mbcset->nranges + 1;
2636	/* Use realloc since mbcset->range_starts and mbcset->range_ends
2637	are NULL if range_alloc == 0. /
2638	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2639	new_nranges);
2640	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2641	new_nranges);
2642
2643	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2644	return REG_ESPACE;
2645
2646	mbcset->range_starts = new_array_start;
2647	mbcset->range_ends = new_array_end;
2648	*range_alloc = new_nranges;
2649	}
2650
2651	mbcset->range_starts[mbcset->nranges] = start_wc;
2652	mbcset->range_ends[mbcset->nranges++] = end_wc;
2653	}
2654
2655	/* Build the table for single byte characters. */
2656	for (wc = 0; wc < SBC_MAX; ++wc)
2657	{
2658	cmp_buf[2] = wc;
2659	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2660	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2661	bitset_set (sbcset, wc);
2662	}
2663	}
2664	# else /* not RE_ENABLE_I18N */
2665	{
2666	unsigned int ch;
2667	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2668	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2669	: 0));
2670	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2671	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2672	: 0));
2673	if (start_ch > end_ch)
2674	return REG_ERANGE;
2675	/* Build the table for single byte characters. */
2676	for (ch = 0; ch < SBC_MAX; ++ch)
2677	if (start_ch <= ch && ch <= end_ch)
2678	bitset_set (sbcset, ch);
2679	}
2680	# endif /* not RE_ENABLE_I18N */
2681	return REG_NOERROR;
2682	}
2683	#endif /* not _LIBC */
2684
2685	#ifndef _LIBC
2686	/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2687	Build the collating element which is represented by NAME.
2688	The result are written to MBCSET and SBCSET.
2689	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2690	pointer argument since we may update it. */
2691
2692	static reg_errcode_t
2693	internal_function
2694	# ifdef RE_ENABLE_I18N
2695	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2696	int coll_sym_alloc, const unsigned char name)
2697	# else /* not RE_ENABLE_I18N */
2698	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2699	# endif /* not RE_ENABLE_I18N */
2700	{
2701	size_t name_len = strlen ((const char *) name);
2702	if (BE (name_len != 1, 0))
2703	return REG_ECOLLATE;
2704	else
2705	{
2706	bitset_set (sbcset, name[0]);
2707	return REG_NOERROR;
2708	}
2709	}
2710	#endif /* not _LIBC */
2711
2712	/* This function parse bracket expression like "[abc]", "[a-c]",
2713	"[[.a-a.]]" etc. */
2714
2715	static bin_tree_t *
2716	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
2717	reg_syntax_t syntax, reg_errcode_t *err)
2718	{
2719	#ifdef _LIBC
2720	const unsigned char *collseqmb;
2721	const char *collseqwc;
2722	uint32_t nrules;
2723	int32_t table_size;
2724	const int32_t *symb_table;
2725	const unsigned char *extra;
2726
2727	/* Local function for parse_bracket_exp used in _LIBC environement.
2728	Seek the collating symbol entry correspondings to NAME.
2729	Return the index of the symbol in the SYMB_TABLE. */
2730
2731	auto inline int32_t
2732	__attribute ((always_inline))
2733	seek_collating_symbol_entry (name, name_len)
2734	const unsigned char *name;
2735	size_t name_len;
2736	{
2737	int32_t hash = elem_hash ((const char *) name, name_len);
2738	int32_t elem = hash % table_size;
2739	if (symb_table[2 * elem] != 0)
2740	{
2741	int32_t second = hash % (table_size - 2) + 1;
2742
2743	do
2744	{
2745	/* First compare the hashing value. */
2746	if (symb_table[2 * elem] == hash
2747	/* Compare the length of the name. */
2748	&& name_len == extra[symb_table[2 * elem + 1]]
2749	/* Compare the name. */
2750	&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2751	name_len) == 0)
2752	{
2753	/* Yep, this is the entry. */
2754	break;
2755	}
2756
2757	/* Next entry. */
2758	elem += second;
2759	}
2760	while (symb_table[2 * elem] != 0);
2761	}
2762	return elem;
2763	}
2764
2765	/* Local function for parse_bracket_exp used in _LIBC environement.
2766	Look up the collation sequence value of BR_ELEM.
2767	Return the value if succeeded, UINT_MAX otherwise. */
2768
2769	auto inline unsigned int
2770	__attribute ((always_inline))
2771	lookup_collation_sequence_value (br_elem)
2772	bracket_elem_t *br_elem;
2773	{
2774	if (br_elem->type == SB_CHAR)
2775	{
2776	/*
2777	if (MB_CUR_MAX == 1)
2778	*/
2779	if (nrules == 0)
2780	return collseqmb[br_elem->opr.ch];
2781	else
2782	{
2783	wint_t wc = __btowc (br_elem->opr.ch);
2784	return __collseq_table_lookup (collseqwc, wc);
2785	}
2786	}
2787	else if (br_elem->type == MB_CHAR)
2788	{
2789	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2790	}
2791	else if (br_elem->type == COLL_SYM)
2792	{
2793	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2794	if (nrules != 0)
2795	{
2796	int32_t elem, idx;
2797	elem = seek_collating_symbol_entry (br_elem->opr.name,
2798	sym_name_len);
2799	if (symb_table[2 * elem] != 0)
2800	{
2801	/* We found the entry. */
2802	idx = symb_table[2 * elem + 1];
2803	/* Skip the name of collating element name. */
2804	idx += 1 + extra[idx];
2805	/* Skip the byte sequence of the collating element. */
2806	idx += 1 + extra[idx];
2807	/* Adjust for the alignment. */
2808	idx = (idx + 3) & ~3;
2809	/* Skip the multibyte collation sequence value. */
2810	idx += sizeof (unsigned int);
2811	/* Skip the wide char sequence of the collating element. */
2812	idx += sizeof (unsigned int) *
2813	(1 + (unsigned int ) (extra + idx));
2814	/* Return the collation sequence value. */
2815	return (unsigned int ) (extra + idx);
2816	}
2817	else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2818	{
2819	/* No valid character. Match it as a single byte
2820	character. */
2821	return collseqmb[br_elem->opr.name[0]];
2822	}
2823	}
2824	else if (sym_name_len == 1)
2825	return collseqmb[br_elem->opr.name[0]];
2826	}
2827	return UINT_MAX;
2828	}
2829
2830	/* Local function for parse_bracket_exp used in _LIBC environement.
2831	Build the range expression which starts from START_ELEM, and ends
2832	at END_ELEM. The result are written to MBCSET and SBCSET.
2833	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2834	mbcset->range_ends, is a pointer argument sinse we may
2835	update it. */
2836
2837	auto inline reg_errcode_t
2838	__attribute ((always_inline))
2839	build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2840	re_charset_t *mbcset;
2841	int *range_alloc;
2842	bitset_t sbcset;
2843	bracket_elem_t start_elem, end_elem;
2844	{
2845	unsigned int ch;
2846	uint32_t start_collseq;
2847	uint32_t end_collseq;
2848
2849	/* Equivalence Classes and Character Classes can't be a range
2850	start/end. */
2851	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2852	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2853	0))
2854	return REG_ERANGE;
2855
2856	start_collseq = lookup_collation_sequence_value (start_elem);
2857	end_collseq = lookup_collation_sequence_value (end_elem);
2858	/* Check start/end collation sequence values. */
2859	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, 0))
2860	return REG_ECOLLATE;
2861	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2862	return REG_ERANGE;
2863
2864	/* Got valid collation sequence values, add them as a new entry.
2865	However, if we have no collation elements, and the character set
2866	is single byte, the single byte character set that we
2867	build below suffices. */
2868	if (nrules > 0 \|\| dfa->mb_cur_max > 1)
2869	{
2870	/* Check the space of the arrays. */
2871	if (BE (*range_alloc == mbcset->nranges, 0))
2872	{
2873	/* There is not enough space, need realloc. */
2874	uint32_t *new_array_start;
2875	uint32_t *new_array_end;
2876	int new_nranges;
2877
2878	/* +1 in case of mbcset->nranges is 0. */
2879	new_nranges = 2 * mbcset->nranges + 1;
2880	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2881	new_nranges);
2882	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2883	new_nranges);
2884
2885	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2886	return REG_ESPACE;
2887
2888	mbcset->range_starts = new_array_start;
2889	mbcset->range_ends = new_array_end;
2890	*range_alloc = new_nranges;
2891	}
2892
2893	mbcset->range_starts[mbcset->nranges] = start_collseq;
2894	mbcset->range_ends[mbcset->nranges++] = end_collseq;
2895	}
2896
2897	/* Build the table for single byte characters. */
2898	for (ch = 0; ch < SBC_MAX; ch++)
2899	{
2900	uint32_t ch_collseq;
2901	/*
2902	if (MB_CUR_MAX == 1)
2903	*/
2904	if (nrules == 0)
2905	ch_collseq = collseqmb[ch];
2906	else
2907	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2908	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2909	bitset_set (sbcset, ch);
2910	}
2911	return REG_NOERROR;
2912	}
2913
2914	/* Local function for parse_bracket_exp used in _LIBC environement.
2915	Build the collating element which is represented by NAME.
2916	The result are written to MBCSET and SBCSET.
2917	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2918	pointer argument sinse we may update it. */
2919
2920	auto inline reg_errcode_t
2921	__attribute ((always_inline))
2922	build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2923	re_charset_t *mbcset;
2924	int *coll_sym_alloc;
2925	bitset_t sbcset;
2926	const unsigned char *name;
2927	{
2928	int32_t elem, idx;
2929	size_t name_len = strlen ((const char *) name);
2930	if (nrules != 0)
2931	{
2932	elem = seek_collating_symbol_entry (name, name_len);
2933	if (symb_table[2 * elem] != 0)
2934	{
2935	/* We found the entry. */
2936	idx = symb_table[2 * elem + 1];
2937	/* Skip the name of collating element name. */
2938	idx += 1 + extra[idx];
2939	}
2940	else if (symb_table[2 * elem] == 0 && name_len == 1)
2941	{
2942	/* No valid character, treat it as a normal
2943	character. */
2944	bitset_set (sbcset, name[0]);
2945	return REG_NOERROR;
2946	}
2947	else
2948	return REG_ECOLLATE;
2949
2950	/* Got valid collation sequence, add it as a new entry. */
2951	/* Check the space of the arrays. */
2952	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
2953	{
2954	/* Not enough, realloc it. */
2955	/* +1 in case of mbcset->ncoll_syms is 0. */
2956	int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
2957	/* Use realloc since mbcset->coll_syms is NULL
2958	if alloc == 0. /
2959	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2960	new_coll_sym_alloc);
2961	if (BE (new_coll_syms == NULL, 0))
2962	return REG_ESPACE;
2963	mbcset->coll_syms = new_coll_syms;
2964	*coll_sym_alloc = new_coll_sym_alloc;
2965	}
2966	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
2967	return REG_NOERROR;
2968	}
2969	else
2970	{
2971	if (BE (name_len != 1, 0))
2972	return REG_ECOLLATE;
2973	else
2974	{
2975	bitset_set (sbcset, name[0]);
2976	return REG_NOERROR;
2977	}
2978	}
2979	}
2980	#endif
2981
2982	re_token_t br_token;
2983	re_bitset_ptr_t sbcset;
2984	#ifdef RE_ENABLE_I18N
2985	re_charset_t *mbcset;
2986	int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
2987	int equiv_class_alloc = 0, char_class_alloc = 0;
2988	#endif /* not RE_ENABLE_I18N */
2989	int non_match = 0;
2990	bin_tree_t *work_tree;
2991	int token_len;
2992	int first_round = 1;
2993	#ifdef _LIBC
2994	collseqmb = (const unsigned char *)
2995	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
2996	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2997	if (nrules)
2998	{
2999	/*
3000	if (MB_CUR_MAX > 1)
3001	*/
3002	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3003	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3004	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3005	_NL_COLLATE_SYMB_TABLEMB);
3006	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3007	_NL_COLLATE_SYMB_EXTRAMB);
3008	}
3009	#endif
3010	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3011	#ifdef RE_ENABLE_I18N
3012	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3013	#endif /* RE_ENABLE_I18N */
3014	#ifdef RE_ENABLE_I18N
3015	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3016	#else
3017	if (BE (sbcset == NULL, 0))
3018	#endif /* RE_ENABLE_I18N */
3019	{
3020	*err = REG_ESPACE;
3021	return NULL;
3022	}
3023
3024	token_len = peek_token_bracket (token, regexp, syntax);
3025	if (BE (token->type == END_OF_RE, 0))
3026	{
3027	*err = REG_BADPAT;
3028	goto parse_bracket_exp_free_return;
3029	}
3030	if (token->type == OP_NON_MATCH_LIST)
3031	{
3032	#ifdef RE_ENABLE_I18N
3033	mbcset->non_match = 1;
3034	#endif /* not RE_ENABLE_I18N */
3035	non_match = 1;
3036	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3037	bitset_set (sbcset, '\0');
3038	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3039	token_len = peek_token_bracket (token, regexp, syntax);
3040	if (BE (token->type == END_OF_RE, 0))
3041	{
3042	*err = REG_BADPAT;
3043	goto parse_bracket_exp_free_return;
3044	}
3045	}
3046
3047	/* We treat the first ']' as a normal character. */
3048	if (token->type == OP_CLOSE_BRACKET)
3049	token->type = CHARACTER;
3050
3051	while (1)
3052	{
3053	bracket_elem_t start_elem, end_elem;
3054	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3055	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3056	reg_errcode_t ret;
3057	int token_len2 = 0, is_range_exp = 0;
3058	re_token_t token2;
3059
3060	start_elem.opr.name = start_name_buf;
3061	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3062	syntax, first_round);
3063	if (BE (ret != REG_NOERROR, 0))
3064	{
3065	*err = ret;
3066	goto parse_bracket_exp_free_return;
3067	}
3068	first_round = 0;
3069
3070	/* Get information about the next token. We need it in any case. */
3071	token_len = peek_token_bracket (token, regexp, syntax);
3072
3073	/* Do not check for ranges if we know they are not allowed. */
3074	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3075	{
3076	if (BE (token->type == END_OF_RE, 0))
3077	{
3078	*err = REG_EBRACK;
3079	goto parse_bracket_exp_free_return;
3080	}
3081	if (token->type == OP_CHARSET_RANGE)
3082	{
3083	re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3084	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3085	if (BE (token2.type == END_OF_RE, 0))
3086	{
3087	*err = REG_EBRACK;
3088	goto parse_bracket_exp_free_return;
3089	}
3090	if (token2.type == OP_CLOSE_BRACKET)
3091	{
3092	/* We treat the last '-' as a normal character. */
3093	re_string_skip_bytes (regexp, -token_len);
3094	token->type = CHARACTER;
3095	}
3096	else
3097	is_range_exp = 1;
3098	}
3099	}
3100
3101	if (is_range_exp == 1)
3102	{
3103	end_elem.opr.name = end_name_buf;
3104	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3105	dfa, syntax, 1);
3106	if (BE (ret != REG_NOERROR, 0))
3107	{
3108	*err = ret;
3109	goto parse_bracket_exp_free_return;
3110	}
3111
3112	token_len = peek_token_bracket (token, regexp, syntax);
3113
3114	#ifdef _LIBC
3115	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3116	&start_elem, &end_elem);
3117	#else
3118	# ifdef RE_ENABLE_I18N
3119	*err = build_range_exp (sbcset,
3120	dfa->mb_cur_max > 1 ? mbcset : NULL,
3121	&range_alloc, &start_elem, &end_elem);
3122	# else
3123	*err = build_range_exp (sbcset, &start_elem, &end_elem);
3124	# endif
3125	#endif /* RE_ENABLE_I18N */
3126	if (BE (*err != REG_NOERROR, 0))
3127	goto parse_bracket_exp_free_return;
3128	}
3129	else
3130	{
3131	switch (start_elem.type)
3132	{
3133	case SB_CHAR:
3134	bitset_set (sbcset, start_elem.opr.ch);
3135	break;
3136	#ifdef RE_ENABLE_I18N
3137	case MB_CHAR:
3138	/* Check whether the array has enough space. */
3139	if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3140	{
3141	wchar_t *new_mbchars;
3142	/* Not enough, realloc it. */
3143	/* +1 in case of mbcset->nmbchars is 0. */
3144	mbchar_alloc = 2 * mbcset->nmbchars + 1;
3145	/* Use realloc since array is NULL if alloc == 0. /
3146	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3147	mbchar_alloc);
3148	if (BE (new_mbchars == NULL, 0))
3149	goto parse_bracket_exp_espace;
3150	mbcset->mbchars = new_mbchars;
3151	}
3152	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3153	break;
3154	#endif /* RE_ENABLE_I18N */
3155	case EQUIV_CLASS:
3156	*err = build_equiv_class (sbcset,
3157	#ifdef RE_ENABLE_I18N
3158	mbcset, &equiv_class_alloc,
3159	#endif /* RE_ENABLE_I18N */
3160	start_elem.opr.name);
3161	if (BE (*err != REG_NOERROR, 0))
3162	goto parse_bracket_exp_free_return;
3163	break;
3164	case COLL_SYM:
3165	*err = build_collating_symbol (sbcset,
3166	#ifdef RE_ENABLE_I18N
3167	mbcset, &coll_sym_alloc,
3168	#endif /* RE_ENABLE_I18N */
3169	start_elem.opr.name);
3170	if (BE (*err != REG_NOERROR, 0))
3171	goto parse_bracket_exp_free_return;
3172	break;
3173	case CHAR_CLASS:
3174	*err = build_charclass (regexp->trans, sbcset,
3175	#ifdef RE_ENABLE_I18N
3176	mbcset, &char_class_alloc,
3177	#endif /* RE_ENABLE_I18N */
3178	start_elem.opr.name, syntax);
3179	if (BE (*err != REG_NOERROR, 0))
3180	goto parse_bracket_exp_free_return;
3181	break;
3182	default:
3183	assert (0);
3184	break;
3185	}
3186	}
3187	if (BE (token->type == END_OF_RE, 0))
3188	{
3189	*err = REG_EBRACK;
3190	goto parse_bracket_exp_free_return;
3191	}
3192	if (token->type == OP_CLOSE_BRACKET)
3193	break;
3194	}
3195
3196	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3197
3198	/* If it is non-matching list. */
3199	if (non_match)
3200	bitset_not (sbcset);
3201
3202	#ifdef RE_ENABLE_I18N
3203	/* Ensure only single byte characters are set. */
3204	if (dfa->mb_cur_max > 1)
3205	bitset_mask (sbcset, dfa->sb_char);
3206
3207	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
3208	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3209	\|\| mbcset->non_match)))
3210	{
3211	bin_tree_t *mbc_tree;
3212	int sbc_idx;
3213	/* Build a tree for complex bracket. */
3214	dfa->has_mb_node = 1;
3215	br_token.type = COMPLEX_BRACKET;
3216	br_token.opr.mbcset = mbcset;
3217	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3218	if (BE (mbc_tree == NULL, 0))
3219	goto parse_bracket_exp_espace;
3220	for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3221	if (sbcset[sbc_idx])
3222	break;
3223	/* If there are no bits set in sbcset, there is no point
3224	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3225	if (sbc_idx < BITSET_WORDS)
3226	{
3227	/* Build a tree for simple bracket. */
3228	br_token.type = SIMPLE_BRACKET;
3229	br_token.opr.sbcset = sbcset;
3230	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3231	if (BE (work_tree == NULL, 0))
3232	goto parse_bracket_exp_espace;
3233
3234	/* Then join them by ALT node. */
3235	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3236	if (BE (work_tree == NULL, 0))
3237	goto parse_bracket_exp_espace;
3238	}
3239	else
3240	{
3241	re_free (sbcset);
3242	work_tree = mbc_tree;
3243	}
3244	}
3245	else
3246	#endif /* not RE_ENABLE_I18N */
3247	{
3248	#ifdef RE_ENABLE_I18N
3249	free_charset (mbcset);
3250	#endif
3251	/* Build a tree for simple bracket. */
3252	br_token.type = SIMPLE_BRACKET;
3253	br_token.opr.sbcset = sbcset;
3254	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3255	if (BE (work_tree == NULL, 0))
3256	goto parse_bracket_exp_espace;
3257	}
3258	return work_tree;
3259
3260	parse_bracket_exp_espace:
3261	*err = REG_ESPACE;
3262	parse_bracket_exp_free_return:
3263	re_free (sbcset);
3264	#ifdef RE_ENABLE_I18N
3265	free_charset (mbcset);
3266	#endif /* RE_ENABLE_I18N */
3267	return NULL;
3268	}
3269
3270	/* Parse an element in the bracket expression. */
3271
3272	static reg_errcode_t
3273	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
3274	re_token_t token, int token_len, re_dfa_t dfa,
3275	reg_syntax_t syntax, int accept_hyphen)
3276	{
3277	#ifdef RE_ENABLE_I18N
3278	int cur_char_size;
3279	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3280	if (cur_char_size > 1)
3281	{
3282	elem->type = MB_CHAR;
3283	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3284	re_string_skip_bytes (regexp, cur_char_size);
3285	return REG_NOERROR;
3286	}
3287	#endif /* RE_ENABLE_I18N */
3288	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3289	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
3290	\|\| token->type == OP_OPEN_EQUIV_CLASS)
3291	return parse_bracket_symbol (elem, regexp, token);
3292	if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3293	{
3294	/* A '-' must only appear as anything but a range indicator before
3295	the closing bracket. Everything else is an error. */
3296	re_token_t token2;
3297	(void) peek_token_bracket (&token2, regexp, syntax);
3298	if (token2.type != OP_CLOSE_BRACKET)
3299	/* The actual error value is not standardized since this whole
3300	case is undefined. But ERANGE makes good sense. */
3301	return REG_ERANGE;
3302	}
3303	elem->type = SB_CHAR;
3304	elem->opr.ch = token->opr.c;
3305	return REG_NOERROR;
3306	}
3307
3308	/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3309	such as [:<character_class>:], [.<collating_element>.], and
3310	[=<equivalent_class>=]. */
3311
3312	static reg_errcode_t
3313	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
3314	re_token_t *token)
3315	{
3316	unsigned char ch, delim = token->opr.c;
3317	int i = 0;
3318	if (re_string_eoi(regexp))
3319	return REG_EBRACK;
3320	for (;; ++i)
3321	{
3322	if (i >= BRACKET_NAME_BUF_SIZE)
3323	return REG_EBRACK;
3324	if (token->type == OP_OPEN_CHAR_CLASS)
3325	ch = re_string_fetch_byte_case (regexp);
3326	else
3327	ch = re_string_fetch_byte (regexp);
3328	if (re_string_eoi(regexp))
3329	return REG_EBRACK;
3330	if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3331	break;
3332	elem->opr.name[i] = ch;
3333	}
3334	re_string_skip_bytes (regexp, 1);
3335	elem->opr.name[i] = '\0';
3336	switch (token->type)
3337	{
3338	case OP_OPEN_COLL_ELEM:
3339	elem->type = COLL_SYM;
3340	break;
3341	case OP_OPEN_EQUIV_CLASS:
3342	elem->type = EQUIV_CLASS;
3343	break;
3344	case OP_OPEN_CHAR_CLASS:
3345	elem->type = CHAR_CLASS;
3346	break;
3347	default:
3348	break;
3349	}
3350	return REG_NOERROR;
3351	}
3352
3353	/* Helper function for parse_bracket_exp.
3354	Build the equivalence class which is represented by NAME.
3355	The result are written to MBCSET and SBCSET.
3356	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3357	is a pointer argument sinse we may update it. */
3358
3359	static reg_errcode_t
3360	#ifdef RE_ENABLE_I18N
3361	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3362	int equiv_class_alloc, const unsigned char name)
3363	#else /* not RE_ENABLE_I18N */
3364	build_equiv_class (bitset_t sbcset, const unsigned char *name)
3365	#endif /* not RE_ENABLE_I18N */
3366	{
3367	#ifdef _LIBC
3368	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3369	if (nrules != 0)
3370	{
3371	const int32_t table, indirect;
3372	const unsigned char weights, extra, *cp;
3373	unsigned char char_buf[2];
3374	int32_t idx1, idx2;
3375	unsigned int ch;
3376	size_t len;
3377	/* This #include defines a local function! */
3378	# include <locale/weight.h>
3379	/* Calculate the index for equivalence class. */
3380	cp = name;
3381	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3382	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3383	_NL_COLLATE_WEIGHTMB);
3384	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3385	_NL_COLLATE_EXTRAMB);
3386	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3387	_NL_COLLATE_INDIRECTMB);
3388	idx1 = findidx (&cp);
3389	if (BE (idx1 == 0 \|\| cp < name + strlen ((const char *) name), 0))
3390	/* This isn't a valid character. */
3391	return REG_ECOLLATE;
3392
3393	/* Build single byte matcing table for this equivalence class. */
3394	char_buf[1] = (unsigned char) '\0';
3395	len = weights[idx1];
3396	for (ch = 0; ch < SBC_MAX; ++ch)
3397	{
3398	char_buf[0] = ch;
3399	cp = char_buf;
3400	idx2 = findidx (&cp);
3401	/*
3402	idx2 = table[ch];
3403	*/
3404	if (idx2 == 0)
3405	/* This isn't a valid character. */
3406	continue;
3407	if (len == weights[idx2])
3408	{
3409	int cnt = 0;
3410	while (cnt <= len &&
3411	weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
3412	++cnt;
3413
3414	if (cnt > len)
3415	bitset_set (sbcset, ch);
3416	}
3417	}
3418	/* Check whether the array has enough space. */
3419	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3420	{
3421	/* Not enough, realloc it. */
3422	/* +1 in case of mbcset->nequiv_classes is 0. */
3423	int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3424	/* Use realloc since the array is NULL if alloc == 0. /
3425	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3426	int32_t,
3427	new_equiv_class_alloc);
3428	if (BE (new_equiv_classes == NULL, 0))
3429	return REG_ESPACE;
3430	mbcset->equiv_classes = new_equiv_classes;
3431	*equiv_class_alloc = new_equiv_class_alloc;
3432	}
3433	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3434	}
3435	else
3436	#endif /* _LIBC */
3437	{
3438	if (BE (strlen ((const char *) name) != 1, 0))
3439	return REG_ECOLLATE;
3440	bitset_set (sbcset, *name);
3441	}
3442	return REG_NOERROR;
3443	}
3444
3445	/* Helper function for parse_bracket_exp.
3446	Build the character class which is represented by NAME.
3447	The result are written to MBCSET and SBCSET.
3448	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3449	is a pointer argument sinse we may update it. */
3450
3451	static reg_errcode_t
3452	#ifdef RE_ENABLE_I18N
3453	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3454	re_charset_t mbcset, int char_class_alloc,
3455	const unsigned char *class_name, reg_syntax_t syntax)
3456	#else /* not RE_ENABLE_I18N */
3457	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3458	const unsigned char *class_name, reg_syntax_t syntax)
3459	#endif /* not RE_ENABLE_I18N */
3460	{
3461	int i;
3462	const char name = (const char ) class_name;
3463
3464	/* In case of REG_ICASE "upper" and "lower" match the both of
3465	upper and lower cases. */
3466	if ((syntax & RE_ICASE)
3467	&& (strcmp (name, "upper") == 0 \|\| strcmp (name, "lower") == 0))
3468	name = "alpha";
3469
3470	#ifdef RE_ENABLE_I18N
3471	/* Check the space of the arrays. */
3472	if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3473	{
3474	/* Not enough, realloc it. */
3475	/* +1 in case of mbcset->nchar_classes is 0. */
3476	int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3477	/* Use realloc since array is NULL if alloc == 0. /
3478	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3479	new_char_class_alloc);
3480	if (BE (new_char_classes == NULL, 0))
3481	return REG_ESPACE;
3482	mbcset->char_classes = new_char_classes;
3483	*char_class_alloc = new_char_class_alloc;
3484	}
3485	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3486	#endif /* RE_ENABLE_I18N */
3487
3488	#define BUILD_CHARCLASS_LOOP(ctype_func) \
3489	do { \
3490	if (BE (trans != NULL, 0)) \
3491	{ \
3492	for (i = 0; i < SBC_MAX; ++i) \
3493	if (ctype_func (i)) \
3494	bitset_set (sbcset, trans[i]); \
3495	} \
3496	else \
3497	{ \
3498	for (i = 0; i < SBC_MAX; ++i) \
3499	if (ctype_func (i)) \
3500	bitset_set (sbcset, i); \
3501	} \
3502	} while (0)
3503
3504	if (strcmp (name, "alnum") == 0)
3505	BUILD_CHARCLASS_LOOP (isalnum);
3506	else if (strcmp (name, "cntrl") == 0)
3507	BUILD_CHARCLASS_LOOP (iscntrl);
3508	else if (strcmp (name, "lower") == 0)
3509	BUILD_CHARCLASS_LOOP (islower);
3510	else if (strcmp (name, "space") == 0)
3511	BUILD_CHARCLASS_LOOP (isspace);
3512	else if (strcmp (name, "alpha") == 0)
3513	BUILD_CHARCLASS_LOOP (isalpha);
3514	else if (strcmp (name, "digit") == 0)
3515	BUILD_CHARCLASS_LOOP (isdigit);
3516	else if (strcmp (name, "print") == 0)
3517	BUILD_CHARCLASS_LOOP (isprint);
3518	else if (strcmp (name, "upper") == 0)
3519	BUILD_CHARCLASS_LOOP (isupper);
3520	else if (strcmp (name, "blank") == 0)
3521	BUILD_CHARCLASS_LOOP (isblank);
3522	else if (strcmp (name, "graph") == 0)
3523	BUILD_CHARCLASS_LOOP (isgraph);
3524	else if (strcmp (name, "punct") == 0)
3525	BUILD_CHARCLASS_LOOP (ispunct);
3526	else if (strcmp (name, "xdigit") == 0)
3527	BUILD_CHARCLASS_LOOP (isxdigit);
3528	else
3529	return REG_ECTYPE;
3530
3531	return REG_NOERROR;
3532	}
3533
3534	static bin_tree_t *
3535	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3536	const unsigned char *class_name,
3537	const unsigned char *extra, int non_match,
3538	reg_errcode_t *err)
3539	{
3540	re_bitset_ptr_t sbcset;
3541	#ifdef RE_ENABLE_I18N
3542	re_charset_t *mbcset;
3543	int alloc = 0;
3544	#endif /* not RE_ENABLE_I18N */
3545	reg_errcode_t ret;
3546	re_token_t br_token;
3547	bin_tree_t *tree;
3548
3549	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3550	#ifdef RE_ENABLE_I18N
3551	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3552	#endif /* RE_ENABLE_I18N */
3553
3554	#ifdef RE_ENABLE_I18N
3555	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3556	#else /* not RE_ENABLE_I18N */
3557	if (BE (sbcset == NULL, 0))
3558	#endif /* not RE_ENABLE_I18N */
3559	{
3560	*err = REG_ESPACE;
3561	return NULL;
3562	}
3563
3564	if (non_match)
3565	{
3566	#ifdef RE_ENABLE_I18N
3567	/*
3568	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3569	bitset_set(cset->sbcset, '\0');
3570	*/
3571	mbcset->non_match = 1;
3572	#endif /* not RE_ENABLE_I18N */
3573	}
3574
3575	/* We don't care the syntax in this case. */
3576	ret = build_charclass (trans, sbcset,
3577	#ifdef RE_ENABLE_I18N
3578	mbcset, &alloc,
3579	#endif /* RE_ENABLE_I18N */
3580	class_name, 0);
3581
3582	if (BE (ret != REG_NOERROR, 0))
3583	{
3584	re_free (sbcset);
3585	#ifdef RE_ENABLE_I18N
3586	free_charset (mbcset);
3587	#endif /* RE_ENABLE_I18N */
3588	*err = ret;
3589	return NULL;
3590	}
3591	/* \w match '_' also. */
3592	for (; *extra; extra++)
3593	bitset_set (sbcset, *extra);
3594
3595	/* If it is non-matching list. */
3596	if (non_match)
3597	bitset_not (sbcset);
3598
3599	#ifdef RE_ENABLE_I18N
3600	/* Ensure only single byte characters are set. */
3601	if (dfa->mb_cur_max > 1)
3602	bitset_mask (sbcset, dfa->sb_char);
3603	#endif
3604
3605	/* Build a tree for simple bracket. */
3606	br_token.type = SIMPLE_BRACKET;
3607	br_token.opr.sbcset = sbcset;
3608	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3609	if (BE (tree == NULL, 0))
3610	goto build_word_op_espace;
3611
3612	#ifdef RE_ENABLE_I18N
3613	if (dfa->mb_cur_max > 1)
3614	{
3615	bin_tree_t *mbc_tree;
3616	/* Build a tree for complex bracket. */
3617	br_token.type = COMPLEX_BRACKET;
3618	br_token.opr.mbcset = mbcset;
3619	dfa->has_mb_node = 1;
3620	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3621	if (BE (mbc_tree == NULL, 0))
3622	goto build_word_op_espace;
3623	/* Then join them by ALT node. */
3624	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3625	if (BE (mbc_tree != NULL, 1))
3626	return tree;
3627	}
3628	else
3629	{
3630	free_charset (mbcset);
3631	return tree;
3632	}
3633	#else /* not RE_ENABLE_I18N */
3634	return tree;
3635	#endif /* not RE_ENABLE_I18N */
3636
3637	build_word_op_espace:
3638	re_free (sbcset);
3639	#ifdef RE_ENABLE_I18N
3640	free_charset (mbcset);
3641	#endif /* RE_ENABLE_I18N */
3642	*err = REG_ESPACE;
3643	return NULL;
3644	}
3645
3646	/* This is intended for the expressions like "a{1,3}".
3647	Fetch a number from `input', and return the number.
3648	Return -1, if the number field is empty like "{,1}".
3649	Return -2, If an error is occured. */
3650
3651	static int
3652	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
3653	{
3654	int num = -1;
3655	unsigned char c;
3656	while (1)
3657	{
3658	fetch_token (token, input, syntax);
3659	c = token->opr.c;
3660	if (BE (token->type == END_OF_RE, 0))
3661	return -2;
3662	if (token->type == OP_CLOSE_DUP_NUM \|\| c == ',')
3663	break;
3664	num = ((token->type != CHARACTER \|\| c < '0' \|\| '9' < c \|\| num == -2)
3665	? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3666	num = (num > RE_DUP_MAX) ? -2 : num;
3667	}
3668	return num;
3669	}
3670
3671
3672	#ifdef RE_ENABLE_I18N
3673	static void
3674	free_charset (re_charset_t *cset)
3675	{
3676	re_free (cset->mbchars);
3677	# ifdef _LIBC
3678	re_free (cset->coll_syms);
3679	re_free (cset->equiv_classes);
3680	re_free (cset->range_starts);
3681	re_free (cset->range_ends);
3682	# endif
3683	re_free (cset->char_classes);
3684	re_free (cset);
3685	}
3686	#endif /* RE_ENABLE_I18N */
3687
3688
3689	/* Functions for binary tree operation. */
3690
3691	/* Create a tree node. */
3692
3693	static bin_tree_t *
3694	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3695	re_token_type_t type)
3696	{
3697	re_token_t t;
3698	t.type = type;
3699	return create_token_tree (dfa, left, right, &t);
3700	}
3701
3702	static bin_tree_t *
3703	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3704	const re_token_t *token)
3705	{
3706	bin_tree_t *tree;
3707	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3708	{
3709	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3710
3711	if (storage == NULL)
3712	return NULL;
3713	storage->next = dfa->str_tree_storage;
3714	dfa->str_tree_storage = storage;
3715	dfa->str_tree_storage_idx = 0;
3716	}
3717	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3718
3719	tree->parent = NULL;
3720	tree->left = left;
3721	tree->right = right;
3722	tree->token = *token;
3723	tree->token.duplicated = 0;
3724	tree->token.opt_subexp = 0;
3725	tree->first = NULL;
3726	tree->next = NULL;
3727	tree->node_idx = -1;
3728
3729	if (left != NULL)
3730	left->parent = tree;
3731	if (right != NULL)
3732	right->parent = tree;
3733	return tree;
3734	}
3735
3736	/* Mark the tree SRC as an optional subexpression.
3737	To be called from preorder or postorder. */
3738
3739	static reg_errcode_t
3740	mark_opt_subexp (void extra, bin_tree_t node)
3741	{
3742	int idx = (int) (long) extra;
3743	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3744	node->token.opt_subexp = 1;
3745
3746	return REG_NOERROR;
3747	}
3748
3749	/* Free the allocated memory inside NODE. */
3750
3751	static void
3752	free_token (re_token_t *node)
3753	{
3754	#ifdef RE_ENABLE_I18N
3755	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3756	free_charset (node->opr.mbcset);
3757	else
3758	#endif /* RE_ENABLE_I18N */
3759	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3760	re_free (node->opr.sbcset);
3761	}
3762
3763	/* Worker function for tree walking. Free the allocated memory inside NODE
3764	and its children. */
3765
3766	static reg_errcode_t
3767	free_tree (void extra, bin_tree_t node)
3768	{
3769	free_token (&node->token);
3770	return REG_NOERROR;
3771	}
3772
3773
3774	/* Duplicate the node SRC, and return new node. This is a preorder
3775	visit similar to the one implemented by the generic visitor, but
3776	we need more infrastructure to maintain two parallel trees --- so,
3777	it's easier to duplicate. */
3778
3779	static bin_tree_t *
3780	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
3781	{
3782	const bin_tree_t *node;
3783	bin_tree_t *dup_root;
3784	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
3785
3786	for (node = root; ; )
3787	{
3788	/* Create a new tree and link it back to the current parent. */
3789	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3790	if (*p_new == NULL)
3791	return NULL;
3792	(*p_new)->parent = dup_node;
3793	(*p_new)->token.duplicated = 1;
3794	dup_node = *p_new;
3795
3796	/* Go to the left node, or up and to the right. */
3797	if (node->left)
3798	{
3799	node = node->left;
3800	p_new = &dup_node->left;
3801	}
3802	else
3803	{
3804	const bin_tree_t *prev = NULL;
3805	while (node->right == prev \|\| node->right == NULL)
3806	{
3807	prev = node;
3808	node = node->parent;
3809	dup_node = dup_node->parent;
3810	if (!node)
3811	return dup_root;
3812	}
3813	node = node->right;
3814	p_new = &dup_node->right;
3815	}
3816	}
3817	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: