Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: vendor/gcc/3.2.2/libiberty/regex.c

Visit:

Last change on this file was 2, checked in by bird, 22 years ago
Initial revision
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 255.0 KB

Line
1	/* Extended regular expression matching and search library,
2	version 0.12.
3	(Implements POSIX draft P1003.2/D11.2, except for some of the
4	internationalization features.)
5	Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
6	This file is part of the GNU C Library.
7
8	The GNU C Library is free software; you can redistribute it and/or
9	modify it under the terms of the GNU Lesser General Public
10	License as published by the Free Software Foundation; either
11	version 2.1 of the License, or (at your option) any later version.
12
13	The GNU C Library is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	Lesser General Public License for more details.
17
18	You should have received a copy of the GNU Lesser General Public
19	License along with the GNU C Library; if not, write to the Free
20	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21	02111-1307 USA. */
22
23	/* This file has been modified for usage in libiberty. It includes "xregex.h"
24	instead of <regex.h>. The "xregex.h" header file renames all external
25	routines with an "x" prefix so they do not collide with the native regex
26	routines or with other components regex routines. */
27	/* AIX requires this to be the first thing in the file. */
28	#if defined _AIX && !defined REGEX_MALLOC
29	#pragma alloca
30	#endif
31
32	#undef _GNU_SOURCE
33	#define _GNU_SOURCE
34
35	#ifdef HAVE_CONFIG_H
36	# include <config.h>
37	#endif
38
39	#ifndef PARAMS
40	# if defined __GNUC__ \|\| (defined __STDC__ && __STDC__)
41	# define PARAMS(args) args
42	# else
43	# define PARAMS(args) ()
44	# endif /* GCC. */
45	#endif /* Not PARAMS. */
46
47	#ifndef INSIDE_RECURSION
48
49	# if defined STDC_HEADERS && !defined emacs
50	# include <stddef.h>
51	# else
52	/* We need this for `regex.h', and perhaps for the Emacs include files. */
53	# include <sys/types.h>
54	# endif
55
56	# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
57
58	/* For platform which support the ISO C amendement 1 functionality we
59	support user defined character classes. */
60	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
61	/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
62	# include <wchar.h>
63	# include <wctype.h>
64	# endif
65
66	# ifdef _LIBC
67	/* We have to keep the namespace clean. */
68	# define regfree(preg) __regfree (preg)
69	# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
70	# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
71	# define regerror(errcode, preg, errbuf, errbuf_size) \
72	__regerror(errcode, preg, errbuf, errbuf_size)
73	# define re_set_registers(bu, re, nu, st, en) \
74	__re_set_registers (bu, re, nu, st, en)
75	# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
76	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
77	# define re_match(bufp, string, size, pos, regs) \
78	__re_match (bufp, string, size, pos, regs)
79	# define re_search(bufp, string, size, startpos, range, regs) \
80	__re_search (bufp, string, size, startpos, range, regs)
81	# define re_compile_pattern(pattern, length, bufp) \
82	__re_compile_pattern (pattern, length, bufp)
83	# define re_set_syntax(syntax) __re_set_syntax (syntax)
84	# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
85	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
86	# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
87
88	# define btowc __btowc
89
90	/* We are also using some library internals. */
91	# include <locale/localeinfo.h>
92	# include <locale/elem-hash.h>
93	# include <langinfo.h>
94	# include <locale/coll-lookup.h>
95	# endif
96
97	/* This is for other GNU distributions with internationalized messages. */
98	# if (HAVE_LIBINTL_H && ENABLE_NLS) \|\| defined _LIBC
99	# include <libintl.h>
100	# ifdef _LIBC
101	# undef gettext
102	# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
103	# endif
104	# else
105	# define gettext(msgid) (msgid)
106	# endif
107
108	# ifndef gettext_noop
109	/* This define is so xgettext can find the internationalizable
110	strings. */
111	# define gettext_noop(String) String
112	# endif
113
114	/* The `emacs' switch turns on certain matching commands
115	that make sense only in Emacs. */
116	# ifdef emacs
117
118	# include "lisp.h"
119	# include "buffer.h"
120	# include "syntax.h"
121
122	# else /* not emacs */
123
124	/* If we are not linking with Emacs proper,
125	we can't use the relocating allocator
126	even if config.h says that we can. */
127	# undef REL_ALLOC
128
129	# if defined STDC_HEADERS \|\| defined _LIBC
130	# include <stdlib.h>
131	# else
132	char *malloc ();
133	char *realloc ();
134	# endif
135
136	/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
137	If nothing else has been done, use the method below. */
138	# ifdef INHIBIT_STRING_HEADER
139	# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
140	# if !defined bzero && !defined bcopy
141	# undef INHIBIT_STRING_HEADER
142	# endif
143	# endif
144	# endif
145
146	/* This is the normal way of making sure we have a bcopy and a bzero.
147	This is used in most programs--a few other programs avoid this
148	by defining INHIBIT_STRING_HEADER. */
149	# ifndef INHIBIT_STRING_HEADER
150	# if defined HAVE_STRING_H \|\| defined STDC_HEADERS \|\| defined _LIBC
151	# include <string.h>
152	# ifndef bzero
153	# ifndef _LIBC
154	# define bzero(s, n) (memset (s, '\0', n), (s))
155	# else
156	# define bzero(s, n) __bzero (s, n)
157	# endif
158	# endif
159	# else
160	# include <strings.h>
161	# ifndef memcmp
162	# define memcmp(s1, s2, n) bcmp (s1, s2, n)
163	# endif
164	# ifndef memcpy
165	# define memcpy(d, s, n) (bcopy (s, d, n), (d))
166	# endif
167	# endif
168	# endif
169
170	/* Define the syntax stuff for \<, \>, etc. */
171
172	/* This must be nonzero for the wordchar and notwordchar pattern
173	commands in re_match_2. */
174	# ifndef Sword
175	# define Sword 1
176	# endif
177
178	# ifdef SWITCH_ENUM_BUG
179	# define SWITCH_ENUM_CAST(x) ((int)(x))
180	# else
181	# define SWITCH_ENUM_CAST(x) (x)
182	# endif
183
184	# endif /* not emacs */
185
186	# if defined _LIBC \|\| HAVE_LIMITS_H
187	# include <limits.h>
188	# endif
189
190	# ifndef MB_LEN_MAX
191	# define MB_LEN_MAX 1
192	# endif
193
194
195	/* Get the interface, including the syntax bits. */
196	# include "xregex.h" /* change for libiberty */
197
198	/* isalpha etc. are used for the character classes. */
199	# include <ctype.h>
200
201	/* Jim Meyering writes:
202
203	"... Some ctype macros are valid only for character codes that
204	isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
205	using /bin/cc or gcc but without giving an ansi option). So, all
206	ctype uses should be through macros like ISPRINT... If
207	STDC_HEADERS is defined, then autoconf has verified that the ctype
208	macros don't need to be guarded with references to isascii. ...
209	Defining isascii to 1 should let any compiler worth its salt
210	eliminate the && through constant folding."
211	Solaris defines some of these symbols so we must undefine them first. */
212
213	# undef ISASCII
214	# if defined STDC_HEADERS \|\| (!defined isascii && !defined HAVE_ISASCII)
215	# define ISASCII(c) 1
216	# else
217	# define ISASCII(c) isascii(c)
218	# endif
219
220	# ifdef isblank
221	# define ISBLANK(c) (ISASCII (c) && isblank (c))
222	# else
223	# define ISBLANK(c) ((c) == ' ' \|\| (c) == '\t')
224	# endif
225	# ifdef isgraph
226	# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
227	# else
228	# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
229	# endif
230
231	# undef ISPRINT
232	# define ISPRINT(c) (ISASCII (c) && isprint (c))
233	# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
234	# define ISALNUM(c) (ISASCII (c) && isalnum (c))
235	# define ISALPHA(c) (ISASCII (c) && isalpha (c))
236	# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
237	# define ISLOWER(c) (ISASCII (c) && islower (c))
238	# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
239	# define ISSPACE(c) (ISASCII (c) && isspace (c))
240	# define ISUPPER(c) (ISASCII (c) && isupper (c))
241	# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
242
243	# ifdef _tolower
244	# define TOLOWER(c) _tolower(c)
245	# else
246	# define TOLOWER(c) tolower(c)
247	# endif
248
249	# ifndef NULL
250	# define NULL (void *)0
251	# endif
252
253	/* We remove any previous definition of `SIGN_EXTEND_CHAR',
254	since ours (we hope) works properly with all combinations of
255	machines, compilers, `char' and `unsigned char' argument types.
256	(Per Bothner suggested the basic approach.) */
257	# undef SIGN_EXTEND_CHAR
258	# if __STDC__
259	# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
260	# else /* not __STDC__ */
261	/* As in Harbison and Steele. */
262	# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
263	# endif
264
265
266	# ifndef emacs
267	/* How many characters in the character set. */
268	# define CHAR_SET_SIZE 256
269
270	# ifdef SYNTAX_TABLE
271
272	extern char *re_syntax_table;
273
274	# else /* not SYNTAX_TABLE */
275
276	static char re_syntax_table[CHAR_SET_SIZE];
277
278	static void init_syntax_once PARAMS ((void));
279
280	static void
281	init_syntax_once ()
282	{
283	register int c;
284	static int done = 0;
285
286	if (done)
287	return;
288	bzero (re_syntax_table, sizeof re_syntax_table);
289
290	for (c = 0; c < CHAR_SET_SIZE; ++c)
291	if (ISALNUM (c))
292	re_syntax_table[c] = Sword;
293
294	re_syntax_table['_'] = Sword;
295
296	done = 1;
297	}
298
299	# endif /* not SYNTAX_TABLE */
300
301	# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
302
303	# endif /* emacs */
304
305
306	/* Integer type for pointers. */
307	# if !defined _LIBC && !defined HAVE_UINTPTR_T
308	typedef unsigned long int uintptr_t;
309	# endif
310
311	/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
312	use `alloca' instead of `malloc'. This is because using malloc in
313	re_search* or re_match* could cause memory leaks when C-g is used in
314	Emacs; also, malloc is slower and causes storage fragmentation. On
315	the other hand, malloc is more portable, and easier to debug.
316
317	Because we sometimes use alloca, some routines have to be macros,
318	not functions -- `alloca'-allocated space disappears at the end of the
319	function it is called in. */
320
321	# ifdef REGEX_MALLOC
322
323	# define REGEX_ALLOCATE malloc
324	# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
325	# define REGEX_FREE free
326
327	# else /* not REGEX_MALLOC */
328
329	/* Emacs already defines alloca, sometimes. */
330	# ifndef alloca
331
332	/* Make alloca work the best possible way. */
333	# ifdef __GNUC__
334	# define alloca __builtin_alloca
335	# else /* not __GNUC__ */
336	# if HAVE_ALLOCA_H
337	# include <alloca.h>
338	# endif /* HAVE_ALLOCA_H */
339	# endif /* not __GNUC__ */
340
341	# endif /* not alloca */
342
343	# define REGEX_ALLOCATE alloca
344
345	/* Assumes a `char destination' variable. /
346	# define REGEX_REALLOCATE(source, osize, nsize) \
347	(destination = (char *) alloca (nsize), \
348	memcpy (destination, source, osize))
349
350	/* No need to do anything to free, after alloca. */
351	# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
352
353	# endif /* not REGEX_MALLOC */
354
355	/* Define how to allocate the failure stack. */
356
357	# if defined REL_ALLOC && defined REGEX_MALLOC
358
359	# define REGEX_ALLOCATE_STACK(size) \
360	r_alloc (&failure_stack_ptr, (size))
361	# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
362	r_re_alloc (&failure_stack_ptr, (nsize))
363	# define REGEX_FREE_STACK(ptr) \
364	r_alloc_free (&failure_stack_ptr)
365
366	# else /* not using relocating allocator */
367
368	# ifdef REGEX_MALLOC
369
370	# define REGEX_ALLOCATE_STACK malloc
371	# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
372	# define REGEX_FREE_STACK free
373
374	# else /* not REGEX_MALLOC */
375
376	# define REGEX_ALLOCATE_STACK alloca
377
378	# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
379	REGEX_REALLOCATE (source, osize, nsize)
380	/* No need to explicitly free anything. */
381	# define REGEX_FREE_STACK(arg)
382
383	# endif /* not REGEX_MALLOC */
384	# endif /* not using relocating allocator */
385
386
387	/* True if `size1' is non-NULL and PTR is pointing anywhere inside
388	`string1' or just past its end. This works if PTR is NULL, which is
389	a good thing. */
390	# define FIRST_STRING_P(ptr) \
391	(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
392
393	/* (Re)Allocate N items of type T using malloc, or fail. */
394	# define TALLOC(n, t) ((t ) malloc ((n) sizeof (t)))
395	# define RETALLOC(addr, n, t) ((addr) = (t ) realloc (addr, (n) sizeof (t)))
396	# define RETALLOC_IF(addr, n, t) \
397	if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
398	# define REGEX_TALLOC(n, t) ((t ) REGEX_ALLOCATE ((n) sizeof (t)))
399
400	# define BYTEWIDTH 8 /* In bits. */
401
402	# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
403
404	# undef MAX
405	# undef MIN
406	# define MAX(a, b) ((a) > (b) ? (a) : (b))
407	# define MIN(a, b) ((a) < (b) ? (a) : (b))
408
409	typedef char boolean;
410	# define false 0
411	# define true 1
412
413	static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
414	reg_syntax_t syntax,
415	struct re_pattern_buffer *bufp));
416
417	static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
418	const char *string1, int size1,
419	const char *string2, int size2,
420	int pos,
421	struct re_registers *regs,
422	int stop));
423	static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
424	const char *string1, int size1,
425	const char *string2, int size2,
426	int startpos, int range,
427	struct re_registers *regs, int stop));
428	static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
429
430	#ifdef MBS_SUPPORT
431	static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
432	reg_syntax_t syntax,
433	struct re_pattern_buffer *bufp));
434
435
436	static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
437	const char *cstring1, int csize1,
438	const char *cstring2, int csize2,
439	int pos,
440	struct re_registers *regs,
441	int stop,
442	wchar_t *string1, int size1,
443	wchar_t *string2, int size2,
444	int mbs_offset1, int mbs_offset2));
445	static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
446	const char *string1, int size1,
447	const char *string2, int size2,
448	int startpos, int range,
449	struct re_registers *regs, int stop));
450	static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
451	#endif
452
453
454	/* These are the command codes that appear in compiled regular
455	expressions. Some opcodes are followed by argument bytes. A
456	command code can specify any interpretation whatsoever for its
457	arguments. Zero bytes may appear in the compiled regular expression. */
458
459	typedef enum
460	{
461	no_op = 0,
462
463	/* Succeed right away--no more backtracking. */
464	succeed,
465
466	/* Followed by one byte giving n, then by n literal bytes. */
467	exactn,
468
469	# ifdef MBS_SUPPORT
470	/* Same as exactn, but contains binary data. */
471	exactn_bin,
472	# endif
473
474	/* Matches any (more or less) character. */
475	anychar,
476
477	/* Matches any one char belonging to specified set. First
478	following byte is number of bitmap bytes. Then come bytes
479	for a bitmap saying which chars are in. Bits in each byte
480	are ordered low-bit-first. A character is in the set if its
481	bit is 1. A character too large to have a bit in the map is
482	automatically not in the set. */
483	/* ifdef MBS_SUPPORT, following element is length of character
484	classes, length of collating symbols, length of equivalence
485	classes, length of character ranges, and length of characters.
486	Next, character class element, collating symbols elements,
487	equivalence class elements, range elements, and character
488	elements follow.
489	See regex_compile function. */
490	charset,
491
492	/* Same parameters as charset, but match any character that is
493	not one of those specified. */
494	charset_not,
495
496	/* Start remembering the text that is matched, for storing in a
497	register. Followed by one byte with the register number, in
498	the range 0 to one less than the pattern buffer's re_nsub
499	field. Then followed by one byte with the number of groups
500	inner to this one. (This last has to be part of the
501	start_memory only because we need it in the on_failure_jump
502	of re_match_2.) */
503	start_memory,
504
505	/* Stop remembering the text that is matched and store it in a
506	memory register. Followed by one byte with the register
507	number, in the range 0 to one less than `re_nsub' in the
508	pattern buffer, and one byte with the number of inner groups,
509	just like `start_memory'. (We need the number of inner
510	groups here because we don't have any easy way of finding the
511	corresponding start_memory when we're at a stop_memory.) */
512	stop_memory,
513
514	/* Match a duplicate of something remembered. Followed by one
515	byte containing the register number. */
516	duplicate,
517
518	/* Fail unless at beginning of line. */
519	begline,
520
521	/* Fail unless at end of line. */
522	endline,
523
524	/* Succeeds if at beginning of buffer (if emacs) or at beginning
525	of string to be matched (if not). */
526	begbuf,
527
528	/* Analogously, for end of buffer/string. */
529	endbuf,
530
531	/* Followed by two byte relative address to which to jump. */
532	jump,
533
534	/* Same as jump, but marks the end of an alternative. */
535	jump_past_alt,
536
537	/* Followed by two-byte relative address of place to resume at
538	in case of failure. */
539	/* ifdef MBS_SUPPORT, the size of address is 1. */
540	on_failure_jump,
541
542	/* Like on_failure_jump, but pushes a placeholder instead of the
543	current string position when executed. */
544	on_failure_keep_string_jump,
545
546	/* Throw away latest failure point and then jump to following
547	two-byte relative address. */
548	/* ifdef MBS_SUPPORT, the size of address is 1. */
549	pop_failure_jump,
550
551	/* Change to pop_failure_jump if know won't have to backtrack to
552	match; otherwise change to jump. This is used to jump
553	back to the beginning of a repeat. If what follows this jump
554	clearly won't match what the repeat does, such that we can be
555	sure that there is no use backtracking out of repetitions
556	already matched, then we change it to a pop_failure_jump.
557	Followed by two-byte address. */
558	/* ifdef MBS_SUPPORT, the size of address is 1. */
559	maybe_pop_jump,
560
561	/* Jump to following two-byte address, and push a dummy failure
562	point. This failure point will be thrown away if an attempt
563	is made to use it for a failure. A `+' construct makes this
564	before the first repeat. Also used as an intermediary kind
565	of jump when compiling an alternative. */
566	/* ifdef MBS_SUPPORT, the size of address is 1. */
567	dummy_failure_jump,
568
569	/* Push a dummy failure point and continue. Used at the end of
570	alternatives. */
571	push_dummy_failure,
572
573	/* Followed by two-byte relative address and two-byte number n.
574	After matching N times, jump to the address upon failure. */
575	/* ifdef MBS_SUPPORT, the size of address is 1. */
576	succeed_n,
577
578	/* Followed by two-byte relative address, and two-byte number n.
579	Jump to the address N times, then fail. */
580	/* ifdef MBS_SUPPORT, the size of address is 1. */
581	jump_n,
582
583	/* Set the following two-byte relative address to the
584	subsequent two-byte number. The address includes the two
585	bytes of number. */
586	/* ifdef MBS_SUPPORT, the size of address is 1. */
587	set_number_at,
588
589	wordchar, /* Matches any word-constituent character. */
590	notwordchar, /* Matches any char that is not a word-constituent. */
591
592	wordbeg, /* Succeeds if at word beginning. */
593	wordend, /* Succeeds if at word end. */
594
595	wordbound, /* Succeeds if at a word boundary. */
596	notwordbound /* Succeeds if not at a word boundary. */
597
598	# ifdef emacs
599	,before_dot, /* Succeeds if before point. */
600	at_dot, /* Succeeds if at point. */
601	after_dot, /* Succeeds if after point. */
602
603	/* Matches any character whose syntax is specified. Followed by
604	a byte which contains a syntax code, e.g., Sword. */
605	syntaxspec,
606
607	/* Matches any character whose syntax is not that specified. */
608	notsyntaxspec
609	# endif /* emacs */
610	} re_opcode_t;
611	#endif /* not INSIDE_RECURSION */
612
613
614
615	#ifdef BYTE
616	# define CHAR_T char
617	# define UCHAR_T unsigned char
618	# define COMPILED_BUFFER_VAR bufp->buffer
619	# define OFFSET_ADDRESS_SIZE 2
620	# if defined (__STDC__) \|\| defined (ALMOST_STDC) \|\| defined (HAVE_STRINGIZE)
621	# define PREFIX(name) byte_##name
622	# else
623	# define PREFIX(name) byte_/**/name
624	# endif
625	# define ARG_PREFIX(name) name
626	# define PUT_CHAR(c) putchar (c)
627	#else
628	# ifdef WCHAR
629	# define CHAR_T wchar_t
630	# define UCHAR_T wchar_t
631	# define COMPILED_BUFFER_VAR wc_buffer
632	# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
633	# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
634	# if defined (__STDC__) \|\| defined (ALMOST_STDC) \|\| defined (HAVE_STRINGIZE)
635	# define PREFIX(name) wcs_##name
636	# define ARG_PREFIX(name) c##name
637	# else
638	# define PREFIX(name) wcs_/**/name
639	# define ARG_PREFIX(name) c/**/name
640	# endif
641	/* Should we use wide stream?? */
642	# define PUT_CHAR(c) printf ("%C", c);
643	# define TRUE 1
644	# define FALSE 0
645	# else
646	# ifdef MBS_SUPPORT
647	# define WCHAR
648	# define INSIDE_RECURSION
649	# include "regex.c"
650	# undef INSIDE_RECURSION
651	# endif
652	# define BYTE
653	# define INSIDE_RECURSION
654	# include "regex.c"
655	# undef INSIDE_RECURSION
656	# endif
657	#endif
658
659	#ifdef INSIDE_RECURSION
660	/* Common operations on the compiled pattern. */
661
662	/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
663	/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
664
665	# ifdef WCHAR
666	# define STORE_NUMBER(destination, number) \
667	do { \
668	*(destination) = (UCHAR_T)(number); \
669	} while (0)
670	# else /* BYTE */
671	# define STORE_NUMBER(destination, number) \
672	do { \
673	(destination)[0] = (number) & 0377; \
674	(destination)[1] = (number) >> 8; \
675	} while (0)
676	# endif /* WCHAR */
677
678	/* Same as STORE_NUMBER, except increment DESTINATION to
679	the byte after where the number is stored. Therefore, DESTINATION
680	must be an lvalue. */
681	/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
682
683	# define STORE_NUMBER_AND_INCR(destination, number) \
684	do { \
685	STORE_NUMBER (destination, number); \
686	(destination) += OFFSET_ADDRESS_SIZE; \
687	} while (0)
688
689	/* Put into DESTINATION a number stored in two contiguous bytes starting
690	at SOURCE. */
691	/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
692
693	# ifdef WCHAR
694	# define EXTRACT_NUMBER(destination, source) \
695	do { \
696	(destination) = *(source); \
697	} while (0)
698	# else /* BYTE */
699	# define EXTRACT_NUMBER(destination, source) \
700	do { \
701	(destination) = *(source) & 0377; \
702	(destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
703	} while (0)
704	# endif
705
706	# ifdef DEBUG
707	static void PREFIX(extract_number) _RE_ARGS ((int dest, UCHAR_T source));
708	static void
709	PREFIX(extract_number) (dest, source)
710	int *dest;
711	UCHAR_T *source;
712	{
713	# ifdef WCHAR
714	dest = source;
715	# else /* BYTE */
716	int temp = SIGN_EXTEND_CHAR (*(source + 1));
717	dest = source & 0377;
718	*dest += temp << 8;
719	# endif
720	}
721
722	# ifndef EXTRACT_MACROS /* To debug the macros. */
723	# undef EXTRACT_NUMBER
724	# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
725	# endif /* not EXTRACT_MACROS */
726
727	# endif /* DEBUG */
728
729	/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
730	SOURCE must be an lvalue. */
731
732	# define EXTRACT_NUMBER_AND_INCR(destination, source) \
733	do { \
734	EXTRACT_NUMBER (destination, source); \
735	(source) += OFFSET_ADDRESS_SIZE; \
736	} while (0)
737
738	# ifdef DEBUG
739	static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
740	UCHAR_T **source));
741	static void
742	PREFIX(extract_number_and_incr) (destination, source)
743	int *destination;
744	UCHAR_T **source;
745	{
746	PREFIX(extract_number) (destination, *source);
747	*source += OFFSET_ADDRESS_SIZE;
748	}
749
750	# ifndef EXTRACT_MACROS
751	# undef EXTRACT_NUMBER_AND_INCR
752	# define EXTRACT_NUMBER_AND_INCR(dest, src) \
753	PREFIX(extract_number_and_incr) (&dest, &src)
754	# endif /* not EXTRACT_MACROS */
755
756	# endif /* DEBUG */
757
758
759
760
761	/* If DEBUG is defined, Regex prints many voluminous messages about what
762	it is doing (if the variable `debug' is nonzero). If linked with the
763	main program in `iregex.c', you can enter patterns and strings
764	interactively. And if linked with the main program in `main.c' and
765	the other test files, you can run the already-written tests. */
766
767	# ifdef DEBUG
768
769	# ifndef DEFINED_ONCE
770
771	/* We use standard I/O for debugging. */
772	# include <stdio.h>
773
774	/* It is useful to test things that ``must'' be true when debugging. */
775	# include <assert.h>
776
777	static int debug;
778
779	# define DEBUG_STATEMENT(e) e
780	# define DEBUG_PRINT1(x) if (debug) printf (x)
781	# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
782	# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
783	# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
784	# endif /* not DEFINED_ONCE */
785
786	# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
787	if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
788	# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
789	if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
790
791
792	/* Print the fastmap in human-readable form. */
793
794	# ifndef DEFINED_ONCE
795	void
796	print_fastmap (fastmap)
797	char *fastmap;
798	{
799	unsigned was_a_range = 0;
800	unsigned i = 0;
801
802	while (i < (1 << BYTEWIDTH))
803	{
804	if (fastmap[i++])
805	{
806	was_a_range = 0;
807	putchar (i - 1);
808	while (i < (1 << BYTEWIDTH) && fastmap[i])
809	{
810	was_a_range = 1;
811	i++;
812	}
813	if (was_a_range)
814	{
815	printf ("-");
816	putchar (i - 1);
817	}
818	}
819	}
820	putchar ('\n');
821	}
822	# endif /* not DEFINED_ONCE */
823
824
825	/* Print a compiled pattern string in human-readable form, starting at
826	the START pointer into it and ending just before the pointer END. */
827
828	void
829	PREFIX(print_partial_compiled_pattern) (start, end)
830	UCHAR_T *start;
831	UCHAR_T *end;
832	{
833	int mcnt, mcnt2;
834	UCHAR_T *p1;
835	UCHAR_T *p = start;
836	UCHAR_T *pend = end;
837
838	if (start == NULL)
839	{
840	printf ("(null)\n");
841	return;
842	}
843
844	/* Loop over pattern commands. */
845	while (p < pend)
846	{
847	# ifdef _LIBC
848	printf ("%td:\t", p - start);
849	# else
850	printf ("%ld:\t", (long int) (p - start));
851	# endif
852
853	switch ((re_opcode_t) *p++)
854	{
855	case no_op:
856	printf ("/no_op");
857	break;
858
859	case exactn:
860	mcnt = *p++;
861	printf ("/exactn/%d", mcnt);
862	do
863	{
864	putchar ('/');
865	PUT_CHAR (*p++);
866	}
867	while (--mcnt);
868	break;
869
870	# ifdef MBS_SUPPORT
871	case exactn_bin:
872	mcnt = *p++;
873	printf ("/exactn_bin/%d", mcnt);
874	do
875	{
876	printf("/%lx", (long int) *p++);
877	}
878	while (--mcnt);
879	break;
880	# endif /* MBS_SUPPORT */
881
882	case start_memory:
883	mcnt = *p++;
884	printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
885	break;
886
887	case stop_memory:
888	mcnt = *p++;
889	printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
890	break;
891
892	case duplicate:
893	printf ("/duplicate/%ld", (long int) *p++);
894	break;
895
896	case anychar:
897	printf ("/anychar");
898	break;
899
900	case charset:
901	case charset_not:
902	{
903	# ifdef WCHAR
904	int i, length;
905	wchar_t *workp = p;
906	printf ("/charset [%s",
907	(re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
908	p += 5;
909	length = workp++; / the length of char_classes */
910	for (i=0 ; i<length ; i++)
911	printf("[:%lx:]", (long int) *p++);
912	length = workp++; / the length of collating_symbol */
913	for (i=0 ; i<length ;)
914	{
915	printf("[.");
916	while(*p != 0)
917	PUT_CHAR((i++,*p++));
918	i++,p++;
919	printf(".]");
920	}
921	length = workp++; / the length of equivalence_class */
922	for (i=0 ; i<length ;)
923	{
924	printf("[=");
925	while(*p != 0)
926	PUT_CHAR((i++,*p++));
927	i++,p++;
928	printf("=]");
929	}
930	length = workp++; / the length of char_range */
931	for (i=0 ; i<length ; i++)
932	{
933	wchar_t range_start = *p++;
934	wchar_t range_end = *p++;
935	printf("%C-%C", range_start, range_end);
936	}
937	length = workp++; / the length of char */
938	for (i=0 ; i<length ; i++)
939	printf("%C", *p++);
940	putchar (']');
941	# else
942	register int c, last = -100;
943	register int in_range = 0;
944
945	printf ("/charset [%s",
946	(re_opcode_t) *(p - 1) == charset_not ? "^" : "");
947
948	assert (p + *p < pend);
949
950	for (c = 0; c < 256; c++)
951	if (c / 8 < *p
952	&& (p[1 + (c/8)] & (1 << (c % 8))))
953	{
954	/* Are we starting a range? */
955	if (last + 1 == c && ! in_range)
956	{
957	putchar ('-');
958	in_range = 1;
959	}
960	/* Have we broken a range? */
961	else if (last + 1 != c && in_range)
962	{
963	putchar (last);
964	in_range = 0;
965	}
966
967	if (! in_range)
968	putchar (c);
969
970	last = c;
971	}
972
973	if (in_range)
974	putchar (last);
975
976	putchar (']');
977
978	p += 1 + *p;
979	# endif /* WCHAR */
980	}
981	break;
982
983	case begline:
984	printf ("/begline");
985	break;
986
987	case endline:
988	printf ("/endline");
989	break;
990
991	case on_failure_jump:
992	PREFIX(extract_number_and_incr) (&mcnt, &p);
993	# ifdef _LIBC
994	printf ("/on_failure_jump to %td", p + mcnt - start);
995	# else
996	printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
997	# endif
998	break;
999
1000	case on_failure_keep_string_jump:
1001	PREFIX(extract_number_and_incr) (&mcnt, &p);
1002	# ifdef _LIBC
1003	printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
1004	# else
1005	printf ("/on_failure_keep_string_jump to %ld",
1006	(long int) (p + mcnt - start));
1007	# endif
1008	break;
1009
1010	case dummy_failure_jump:
1011	PREFIX(extract_number_and_incr) (&mcnt, &p);
1012	# ifdef _LIBC
1013	printf ("/dummy_failure_jump to %td", p + mcnt - start);
1014	# else
1015	printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
1016	# endif
1017	break;
1018
1019	case push_dummy_failure:
1020	printf ("/push_dummy_failure");
1021	break;
1022
1023	case maybe_pop_jump:
1024	PREFIX(extract_number_and_incr) (&mcnt, &p);
1025	# ifdef _LIBC
1026	printf ("/maybe_pop_jump to %td", p + mcnt - start);
1027	# else
1028	printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1029	# endif
1030	break;
1031
1032	case pop_failure_jump:
1033	PREFIX(extract_number_and_incr) (&mcnt, &p);
1034	# ifdef _LIBC
1035	printf ("/pop_failure_jump to %td", p + mcnt - start);
1036	# else
1037	printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1038	# endif
1039	break;
1040
1041	case jump_past_alt:
1042	PREFIX(extract_number_and_incr) (&mcnt, &p);
1043	# ifdef _LIBC
1044	printf ("/jump_past_alt to %td", p + mcnt - start);
1045	# else
1046	printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1047	# endif
1048	break;
1049
1050	case jump:
1051	PREFIX(extract_number_and_incr) (&mcnt, &p);
1052	# ifdef _LIBC
1053	printf ("/jump to %td", p + mcnt - start);
1054	# else
1055	printf ("/jump to %ld", (long int) (p + mcnt - start));
1056	# endif
1057	break;
1058
1059	case succeed_n:
1060	PREFIX(extract_number_and_incr) (&mcnt, &p);
1061	p1 = p + mcnt;
1062	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063	# ifdef _LIBC
1064	printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1065	# else
1066	printf ("/succeed_n to %ld, %d times",
1067	(long int) (p1 - start), mcnt2);
1068	# endif
1069	break;
1070
1071	case jump_n:
1072	PREFIX(extract_number_and_incr) (&mcnt, &p);
1073	p1 = p + mcnt;
1074	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1075	printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1076	break;
1077
1078	case set_number_at:
1079	PREFIX(extract_number_and_incr) (&mcnt, &p);
1080	p1 = p + mcnt;
1081	PREFIX(extract_number_and_incr) (&mcnt2, &p);
1082	# ifdef _LIBC
1083	printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1084	# else
1085	printf ("/set_number_at location %ld to %d",
1086	(long int) (p1 - start), mcnt2);
1087	# endif
1088	break;
1089
1090	case wordbound:
1091	printf ("/wordbound");
1092	break;
1093
1094	case notwordbound:
1095	printf ("/notwordbound");
1096	break;
1097
1098	case wordbeg:
1099	printf ("/wordbeg");
1100	break;
1101
1102	case wordend:
1103	printf ("/wordend");
1104	break;
1105
1106	# ifdef emacs
1107	case before_dot:
1108	printf ("/before_dot");
1109	break;
1110
1111	case at_dot:
1112	printf ("/at_dot");
1113	break;
1114
1115	case after_dot:
1116	printf ("/after_dot");
1117	break;
1118
1119	case syntaxspec:
1120	printf ("/syntaxspec");
1121	mcnt = *p++;
1122	printf ("/%d", mcnt);
1123	break;
1124
1125	case notsyntaxspec:
1126	printf ("/notsyntaxspec");
1127	mcnt = *p++;
1128	printf ("/%d", mcnt);
1129	break;
1130	# endif /* emacs */
1131
1132	case wordchar:
1133	printf ("/wordchar");
1134	break;
1135
1136	case notwordchar:
1137	printf ("/notwordchar");
1138	break;
1139
1140	case begbuf:
1141	printf ("/begbuf");
1142	break;
1143
1144	case endbuf:
1145	printf ("/endbuf");
1146	break;
1147
1148	default:
1149	printf ("?%ld", (long int) *(p-1));
1150	}
1151
1152	putchar ('\n');
1153	}
1154
1155	# ifdef _LIBC
1156	printf ("%td:\tend of pattern.\n", p - start);
1157	# else
1158	printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1159	# endif
1160	}
1161
1162
1163	void
1164	PREFIX(print_compiled_pattern) (bufp)
1165	struct re_pattern_buffer *bufp;
1166	{
1167	UCHAR_T buffer = (UCHAR_T) bufp->buffer;
1168
1169	PREFIX(print_partial_compiled_pattern) (buffer, buffer
1170	+ bufp->used / sizeof(UCHAR_T));
1171	printf ("%ld bytes used/%ld bytes allocated.\n",
1172	bufp->used, bufp->allocated);
1173
1174	if (bufp->fastmap_accurate && bufp->fastmap)
1175	{
1176	printf ("fastmap: ");
1177	print_fastmap (bufp->fastmap);
1178	}
1179
1180	# ifdef _LIBC
1181	printf ("re_nsub: %Zd\t", bufp->re_nsub);
1182	# else
1183	printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1184	# endif
1185	printf ("regs_alloc: %d\t", bufp->regs_allocated);
1186	printf ("can_be_null: %d\t", bufp->can_be_null);
1187	printf ("newline_anchor: %d\n", bufp->newline_anchor);
1188	printf ("no_sub: %d\t", bufp->no_sub);
1189	printf ("not_bol: %d\t", bufp->not_bol);
1190	printf ("not_eol: %d\t", bufp->not_eol);
1191	printf ("syntax: %lx\n", bufp->syntax);
1192	/* Perhaps we should print the translate table? */
1193	}
1194
1195
1196	void
1197	PREFIX(print_double_string) (where, string1, size1, string2, size2)
1198	const CHAR_T *where;
1199	const CHAR_T *string1;
1200	const CHAR_T *string2;
1201	int size1;
1202	int size2;
1203	{
1204	int this_char;
1205
1206	if (where == NULL)
1207	printf ("(null)");
1208	else
1209	{
1210	int cnt;
1211
1212	if (FIRST_STRING_P (where))
1213	{
1214	for (this_char = where - string1; this_char < size1; this_char++)
1215	PUT_CHAR (string1[this_char]);
1216
1217	where = string2;
1218	}
1219
1220	cnt = 0;
1221	for (this_char = where - string2; this_char < size2; this_char++)
1222	{
1223	PUT_CHAR (string2[this_char]);
1224	if (++cnt > 100)
1225	{
1226	fputs ("...", stdout);
1227	break;
1228	}
1229	}
1230	}
1231	}
1232
1233	# ifndef DEFINED_ONCE
1234	void
1235	printchar (c)
1236	int c;
1237	{
1238	putc (c, stderr);
1239	}
1240	# endif
1241
1242	# else /* not DEBUG */
1243
1244	# ifndef DEFINED_ONCE
1245	# undef assert
1246	# define assert(e)
1247
1248	# define DEBUG_STATEMENT(e)
1249	# define DEBUG_PRINT1(x)
1250	# define DEBUG_PRINT2(x1, x2)
1251	# define DEBUG_PRINT3(x1, x2, x3)
1252	# define DEBUG_PRINT4(x1, x2, x3, x4)
1253	# endif /* not DEFINED_ONCE */
1254	# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1255	# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1256
1257	# endif /* not DEBUG */
1258
1259
1260
1261
1262	# ifdef WCHAR
1263	/* This convert a multibyte string to a wide character string.
1264	And write their correspondances to offset_buffer(see below)
1265	and write whether each wchar_t is binary data to is_binary.
1266	This assume invalid multibyte sequences as binary data.
1267	We assume offset_buffer and is_binary is already allocated
1268	enough space. */
1269
1270	static size_t convert_mbs_to_wcs (CHAR_T dest, const unsigned char src,
1271	size_t len, int *offset_buffer,
1272	char *is_binary);
1273	static size_t
1274	convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
1275	CHAR_T *dest;
1276	const unsigned char* src;
1277	size_t len; /* the length of multibyte string. */
1278
1279	/* It hold correspondances between src(char string) and
1280	dest(wchar_t string) for optimization.
1281	e.g. src = "xxxyzz"
1282	dest = {'X', 'Y', 'Z'}
1283	(each "xxx", "y" and "zz" represent one multibyte character
1284	corresponding to 'X', 'Y' and 'Z'.)
1285	offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1286	= {0, 3, 4, 6}
1287	*/
1288	int *offset_buffer;
1289	char *is_binary;
1290	{
1291	wchar_t *pdest = dest;
1292	const unsigned char *psrc = src;
1293	size_t wc_count = 0;
1294
1295	mbstate_t mbs;
1296	int i, consumed;
1297	size_t mb_remain = len;
1298	size_t mb_count = 0;
1299
1300	/* Initialize the conversion state. */
1301	memset (&mbs, 0, sizeof (mbstate_t));
1302
1303	offset_buffer[0] = 0;
1304	for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1305	psrc += consumed)
1306	{
1307	#ifdef _LIBC
1308	consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
1309	#else
1310	consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1311	#endif
1312
1313	if (consumed <= 0)
1314	/* failed to convert. maybe src contains binary data.
1315	So we consume 1 byte manualy. */
1316	{
1317	pdest = psrc;
1318	consumed = 1;
1319	is_binary[wc_count] = TRUE;
1320	}
1321	else
1322	is_binary[wc_count] = FALSE;
1323	/* In sjis encoding, we use yen sign as escape character in
1324	place of reverse solidus. So we convert 0x5c(yen sign in
1325	sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1326	solidus in UCS2). */
1327	if (consumed == 1 && (int) psrc == 0x5c && (int) pdest == 0xa5)
1328	pdest = (wchar_t) psrc;
1329
1330	offset_buffer[wc_count + 1] = mb_count += consumed;
1331	}
1332
1333	/* Fill remain of the buffer with sentinel. */
1334	for (i = wc_count + 1 ; i <= len ; i++)
1335	offset_buffer[i] = mb_count + 1;
1336
1337	return wc_count;
1338	}
1339
1340	# endif /* WCHAR */
1341
1342	#else /* not INSIDE_RECURSION */
1343
1344	/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1345	also be assigned to arbitrarily: each pattern buffer stores its own
1346	syntax, so it can be changed between regex compilations. */
1347	/* This has no initializer because initialized variables in Emacs
1348	become read-only after dumping. */
1349	reg_syntax_t re_syntax_options;
1350
1351
1352	/* Specify the precise syntax of regexps for compilation. This provides
1353	for compatibility for various utilities which historically have
1354	different, incompatible syntaxes.
1355
1356	The argument SYNTAX is a bit mask comprised of the various bits
1357	defined in regex.h. We return the old syntax. */
1358
1359	reg_syntax_t
1360	re_set_syntax (syntax)
1361	reg_syntax_t syntax;
1362	{
1363	reg_syntax_t ret = re_syntax_options;
1364
1365	re_syntax_options = syntax;
1366	# ifdef DEBUG
1367	if (syntax & RE_DEBUG)
1368	debug = 1;
1369	else if (debug) /* was on but now is not */
1370	debug = 0;
1371	# endif /* DEBUG */
1372	return ret;
1373	}
1374	# ifdef _LIBC
1375	weak_alias (__re_set_syntax, re_set_syntax)
1376	# endif
1377
1378
1379	/* This table gives an error message for each of the error codes listed
1380	in regex.h. Obviously the order here has to be same as there.
1381	POSIX doesn't require that we do anything for REG_NOERROR,
1382	but why not be nice? */
1383
1384	static const char re_error_msgid[] =
1385	{
1386	# define REG_NOERROR_IDX 0
1387	gettext_noop ("Success") /* REG_NOERROR */
1388	"\0"
1389	# define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1390	gettext_noop ("No match") /* REG_NOMATCH */
1391	"\0"
1392	# define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
1393	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1394	"\0"
1395	# define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1396	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1397	"\0"
1398	# define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1399	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1400	"\0"
1401	# define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
1402	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1403	"\0"
1404	# define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
1405	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1406	"\0"
1407	# define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
1408	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
1409	"\0"
1410	# define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1411	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1412	"\0"
1413	# define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1414	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1415	"\0"
1416	# define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
1417	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1418	"\0"
1419	# define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1420	gettext_noop ("Invalid range end") /* REG_ERANGE */
1421	"\0"
1422	# define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
1423	gettext_noop ("Memory exhausted") /* REG_ESPACE */
1424	"\0"
1425	# define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
1426	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1427	"\0"
1428	# define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1429	gettext_noop ("Premature end of regular expression") /* REG_EEND */
1430	"\0"
1431	# define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
1432	gettext_noop ("Regular expression too big") /* REG_ESIZE */
1433	"\0"
1434	# define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
1435	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1436	};
1437
1438	static const size_t re_error_msgid_idx[] =
1439	{
1440	REG_NOERROR_IDX,
1441	REG_NOMATCH_IDX,
1442	REG_BADPAT_IDX,
1443	REG_ECOLLATE_IDX,
1444	REG_ECTYPE_IDX,
1445	REG_EESCAPE_IDX,
1446	REG_ESUBREG_IDX,
1447	REG_EBRACK_IDX,
1448	REG_EPAREN_IDX,
1449	REG_EBRACE_IDX,
1450	REG_BADBR_IDX,
1451	REG_ERANGE_IDX,
1452	REG_ESPACE_IDX,
1453	REG_BADRPT_IDX,
1454	REG_EEND_IDX,
1455	REG_ESIZE_IDX,
1456	REG_ERPAREN_IDX
1457	};
1458
1459
1460	#endif /* INSIDE_RECURSION */
1461
1462	#ifndef DEFINED_ONCE
1463	/* Avoiding alloca during matching, to placate r_alloc. */
1464
1465	/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1466	searching and matching functions should not call alloca. On some
1467	systems, alloca is implemented in terms of malloc, and if we're
1468	using the relocating allocator routines, then malloc could cause a
1469	relocation, which might (if the strings being searched are in the
1470	ralloc heap) shift the data out from underneath the regexp
1471	routines.
1472
1473	Here's another reason to avoid allocation: Emacs
1474	processes input from X in a signal handler; processing X input may
1475	call malloc; if input arrives while a matching routine is calling
1476	malloc, then we're scrod. But Emacs can't just block input while
1477	calling matching routines; then we don't notice interrupts when
1478	they come in. So, Emacs blocks input around all regexp calls
1479	except the matching calls, which it leaves unprotected, in the
1480	faith that they will not malloc. */
1481
1482	/* Normally, this is fine. */
1483	# define MATCH_MAY_ALLOCATE
1484
1485	/* When using GNU C, we are not REALLY using the C alloca, no matter
1486	what config.h may say. So don't take precautions for it. */
1487	# ifdef __GNUC__
1488	# undef C_ALLOCA
1489	# endif
1490
1491	/* The match routines may not allocate if (1) they would do it with malloc
1492	and (2) it's not safe for them to use malloc.
1493	Note that if REL_ALLOC is defined, matching would not use malloc for the
1494	failure stack, but we would still use it for the register vectors;
1495	so REL_ALLOC should not affect this. */
1496	# if (defined C_ALLOCA \|\| defined REGEX_MALLOC) && defined emacs
1497	# undef MATCH_MAY_ALLOCATE
1498	# endif
1499	#endif /* not DEFINED_ONCE */
1500
1501
1502	#ifdef INSIDE_RECURSION
1503	/* Failure stack declarations and macros; both re_compile_fastmap and
1504	re_match_2 use a failure stack. These have to be macros because of
1505	REGEX_ALLOCATE_STACK. */
1506
1507
1508	/* Number of failure points for which to initially allocate space
1509	when matching. If this number is exceeded, we allocate more
1510	space, so it is not a hard limit. */
1511	# ifndef INIT_FAILURE_ALLOC
1512	# define INIT_FAILURE_ALLOC 5
1513	# endif
1514
1515	/* Roughly the maximum number of failure points on the stack. Would be
1516	exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1517	This is a variable only so users of regex can assign to it; we never
1518	change it ourselves. */
1519
1520	# ifdef INT_IS_16BIT
1521
1522	# ifndef DEFINED_ONCE
1523	# if defined MATCH_MAY_ALLOCATE
1524	/* 4400 was enough to cause a crash on Alpha OSF/1,
1525	whose default stack limit is 2mb. */
1526	long int re_max_failures = 4000;
1527	# else
1528	long int re_max_failures = 2000;
1529	# endif
1530	# endif
1531
1532	union PREFIX(fail_stack_elt)
1533	{
1534	UCHAR_T *pointer;
1535	long int integer;
1536	};
1537
1538	typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1539
1540	typedef struct
1541	{
1542	PREFIX(fail_stack_elt_t) *stack;
1543	unsigned long int size;
1544	unsigned long int avail; /* Offset of next open position. */
1545	} PREFIX(fail_stack_type);
1546
1547	# else /* not INT_IS_16BIT */
1548
1549	# ifndef DEFINED_ONCE
1550	# if defined MATCH_MAY_ALLOCATE
1551	/* 4400 was enough to cause a crash on Alpha OSF/1,
1552	whose default stack limit is 2mb. */
1553	int re_max_failures = 4000;
1554	# else
1555	int re_max_failures = 2000;
1556	# endif
1557	# endif
1558
1559	union PREFIX(fail_stack_elt)
1560	{
1561	UCHAR_T *pointer;
1562	int integer;
1563	};
1564
1565	typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1566
1567	typedef struct
1568	{
1569	PREFIX(fail_stack_elt_t) *stack;
1570	unsigned size;
1571	unsigned avail; /* Offset of next open position. */
1572	} PREFIX(fail_stack_type);
1573
1574	# endif /* INT_IS_16BIT */
1575
1576	# ifndef DEFINED_ONCE
1577	# define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1578	# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1579	# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1580	# endif
1581
1582
1583	/* Define macros to initialize and free the failure stack.
1584	Do `return -2' if the alloc fails. */
1585
1586	# ifdef MATCH_MAY_ALLOCATE
1587	# define INIT_FAIL_STACK() \
1588	do { \
1589	fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
1590	REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1591	\
1592	if (fail_stack.stack == NULL) \
1593	return -2; \
1594	\
1595	fail_stack.size = INIT_FAILURE_ALLOC; \
1596	fail_stack.avail = 0; \
1597	} while (0)
1598
1599	# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1600	# else
1601	# define INIT_FAIL_STACK() \
1602	do { \
1603	fail_stack.avail = 0; \
1604	} while (0)
1605
1606	# define RESET_FAIL_STACK()
1607	# endif
1608
1609
1610	/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1611
1612	Return 1 if succeeds, and 0 if either ran out of memory
1613	allocating space for it or it was already too large.
1614
1615	REGEX_REALLOCATE_STACK requires `destination' be declared. */
1616
1617	# define DOUBLE_FAIL_STACK(fail_stack) \
1618	((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1619	? 0 \
1620	: ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
1621	REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1622	(fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
1623	((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1624	\
1625	(fail_stack).stack == NULL \
1626	? 0 \
1627	: ((fail_stack).size <<= 1, \
1628	1)))
1629
1630
1631	/* Push pointer POINTER on FAIL_STACK.
1632	Return 1 if was able to do so and 0 if ran out of memory allocating
1633	space to do so. */
1634	# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1635	((FAIL_STACK_FULL () \
1636	&& !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1637	? 0 \
1638	: ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1639	1))
1640
1641	/* Push a pointer value onto the failure stack.
1642	Assumes the variable `fail_stack'. Probably should only
1643	be called from within `PUSH_FAILURE_POINT'. */
1644	# define PUSH_FAILURE_POINTER(item) \
1645	fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1646
1647	/* This pushes an integer-valued item onto the failure stack.
1648	Assumes the variable `fail_stack'. Probably should only
1649	be called from within `PUSH_FAILURE_POINT'. */
1650	# define PUSH_FAILURE_INT(item) \
1651	fail_stack.stack[fail_stack.avail++].integer = (item)
1652
1653	/* Push a fail_stack_elt_t value onto the failure stack.
1654	Assumes the variable `fail_stack'. Probably should only
1655	be called from within `PUSH_FAILURE_POINT'. */
1656	# define PUSH_FAILURE_ELT(item) \
1657	fail_stack.stack[fail_stack.avail++] = (item)
1658
1659	/* These three POP... operations complement the three PUSH... operations.
1660	All assume that `fail_stack' is nonempty. */
1661	# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1662	# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1663	# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1664
1665	/* Used to omit pushing failure point id's when we're not debugging. */
1666	# ifdef DEBUG
1667	# define DEBUG_PUSH PUSH_FAILURE_INT
1668	# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1669	# else
1670	# define DEBUG_PUSH(item)
1671	# define DEBUG_POP(item_addr)
1672	# endif
1673
1674
1675	/* Push the information about the state we will need
1676	if we ever fail back to it.
1677
1678	Requires variables fail_stack, regstart, regend, reg_info, and
1679	num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
1680	be declared.
1681
1682	Does `return FAILURE_CODE' if runs out of memory. */
1683
1684	# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1685	do { \
1686	char *destination; \
1687	/* Must be int, so when we don't save any registers, the arithmetic \
1688	of 0 + -1 isn't done as unsigned. */ \
1689	/* Can't be int, since there is not a shred of a guarantee that int \
1690	is wide enough to hold a value of something to which pointer can \
1691	be assigned */ \
1692	active_reg_t this_reg; \
1693	\
1694	DEBUG_STATEMENT (failure_id++); \
1695	DEBUG_STATEMENT (nfailure_points_pushed++); \
1696	DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1697	DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1698	DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1699	\
1700	DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
1701	DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1702	\
1703	/* Ensure we have enough space allocated for what we will push. */ \
1704	while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1705	{ \
1706	if (!DOUBLE_FAIL_STACK (fail_stack)) \
1707	return failure_code; \
1708	\
1709	DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1710	(fail_stack).size); \
1711	DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1712	} \
1713	\
1714	/* Push the info, starting with the registers. */ \
1715	DEBUG_PRINT1 ("\n"); \
1716	\
1717	if (1) \
1718	for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1719	this_reg++) \
1720	{ \
1721	DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
1722	DEBUG_STATEMENT (num_regs_pushed++); \
1723	\
1724	DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1725	PUSH_FAILURE_POINTER (regstart[this_reg]); \
1726	\
1727	DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1728	PUSH_FAILURE_POINTER (regend[this_reg]); \
1729	\
1730	DEBUG_PRINT2 (" info: %p\n ", \
1731	reg_info[this_reg].word.pointer); \
1732	DEBUG_PRINT2 (" match_null=%d", \
1733	REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1734	DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1735	DEBUG_PRINT2 (" matched_something=%d", \
1736	MATCHED_SOMETHING (reg_info[this_reg])); \
1737	DEBUG_PRINT2 (" ever_matched=%d", \
1738	EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1739	DEBUG_PRINT1 ("\n"); \
1740	PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1741	} \
1742	\
1743	DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
1744	PUSH_FAILURE_INT (lowest_active_reg); \
1745	\
1746	DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
1747	PUSH_FAILURE_INT (highest_active_reg); \
1748	\
1749	DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
1750	DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1751	PUSH_FAILURE_POINTER (pattern_place); \
1752	\
1753	DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
1754	DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1755	size2); \
1756	DEBUG_PRINT1 ("'\n"); \
1757	PUSH_FAILURE_POINTER (string_place); \
1758	\
1759	DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1760	DEBUG_PUSH (failure_id); \
1761	} while (0)
1762
1763	# ifndef DEFINED_ONCE
1764	/* This is the number of items that are pushed and popped on the stack
1765	for each register. */
1766	# define NUM_REG_ITEMS 3
1767
1768	/* Individual items aside from the registers. */
1769	# ifdef DEBUG
1770	# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1771	# else
1772	# define NUM_NONREG_ITEMS 4
1773	# endif
1774
1775	/* We push at most this many items on the stack. */
1776	/* We used to use (num_regs - 1), which is the number of registers
1777	this regexp will save; but that was changed to 5
1778	to avoid stack overflow for a regexp with lots of parens. */
1779	# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1780
1781	/* We actually push this many items. */
1782	# define NUM_FAILURE_ITEMS \
1783	(((0 \
1784	? 0 : highest_active_reg - lowest_active_reg + 1) \
1785	* NUM_REG_ITEMS) \
1786	+ NUM_NONREG_ITEMS)
1787
1788	/* How many items can still be added to the stack without overflowing it. */
1789	# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1790	# endif /* not DEFINED_ONCE */
1791
1792
1793	/* Pops what PUSH_FAIL_STACK pushes.
1794
1795	We restore into the parameters, all of which should be lvalues:
1796	STR -- the saved data position.
1797	PAT -- the saved pattern position.
1798	LOW_REG, HIGH_REG -- the highest and lowest active registers.
1799	REGSTART, REGEND -- arrays of string positions.
1800	REG_INFO -- array of information about each subexpression.
1801
1802	Also assumes the variables `fail_stack' and (if debugging), `bufp',
1803	`pend', `string1', `size1', `string2', and `size2'. */
1804	# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1805	{ \
1806	DEBUG_STATEMENT (unsigned failure_id;) \
1807	active_reg_t this_reg; \
1808	const UCHAR_T *string_temp; \
1809	\
1810	assert (!FAIL_STACK_EMPTY ()); \
1811	\
1812	/* Remove failure points and point to how many regs pushed. */ \
1813	DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1814	DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1815	DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1816	\
1817	assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1818	\
1819	DEBUG_POP (&failure_id); \
1820	DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1821	\
1822	/* If the saved string location is NULL, it came from an \
1823	on_failure_keep_string_jump opcode, and we want to throw away the \
1824	saved NULL, thus retaining our current position in the string. */ \
1825	string_temp = POP_FAILURE_POINTER (); \
1826	if (string_temp != NULL) \
1827	str = (const CHAR_T *) string_temp; \
1828	\
1829	DEBUG_PRINT2 (" Popping string %p: `", str); \
1830	DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1831	DEBUG_PRINT1 ("'\n"); \
1832	\
1833	pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
1834	DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
1835	DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1836	\
1837	/* Restore register info. */ \
1838	high_reg = (active_reg_t) POP_FAILURE_INT (); \
1839	DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
1840	\
1841	low_reg = (active_reg_t) POP_FAILURE_INT (); \
1842	DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
1843	\
1844	if (1) \
1845	for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1846	{ \
1847	DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
1848	\
1849	reg_info[this_reg].word = POP_FAILURE_ELT (); \
1850	DEBUG_PRINT2 (" info: %p\n", \
1851	reg_info[this_reg].word.pointer); \
1852	\
1853	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1854	DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1855	\
1856	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1857	DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1858	} \
1859	else \
1860	{ \
1861	for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1862	{ \
1863	reg_info[this_reg].word.integer = 0; \
1864	regend[this_reg] = 0; \
1865	regstart[this_reg] = 0; \
1866	} \
1867	highest_active_reg = high_reg; \
1868	} \
1869	\
1870	set_regs_matched_done = 0; \
1871	DEBUG_STATEMENT (nfailure_points_popped++); \
1872	} /* POP_FAILURE_POINT */
1873
1874
1875	/* Structure for per-register (a.k.a. per-group) information.
1876	Other register information, such as the
1877	starting and ending positions (which are addresses), and the list of
1878	inner groups (which is a bits list) are maintained in separate
1879	variables.
1880
1881	We are making a (strictly speaking) nonportable assumption here: that
1882	the compiler will pack our bit fields into something that fits into
1883	the type of `word', i.e., is something that fits into one item on the
1884	failure stack. */
1885
1886
1887	/* Declarations and macros for re_match_2. */
1888
1889	typedef union
1890	{
1891	PREFIX(fail_stack_elt_t) word;
1892	struct
1893	{
1894	/* This field is one if this group can match the empty string,
1895	zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1896	# define MATCH_NULL_UNSET_VALUE 3
1897	unsigned match_null_string_p : 2;
1898	unsigned is_active : 1;
1899	unsigned matched_something : 1;
1900	unsigned ever_matched_something : 1;
1901	} bits;
1902	} PREFIX(register_info_type);
1903
1904	# ifndef DEFINED_ONCE
1905	# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1906	# define IS_ACTIVE(R) ((R).bits.is_active)
1907	# define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1908	# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1909
1910
1911	/* Call this when have matched a real character; it sets `matched' flags
1912	for the subexpressions which we are currently inside. Also records
1913	that those subexprs have matched. */
1914	# define SET_REGS_MATCHED() \
1915	do \
1916	{ \
1917	if (!set_regs_matched_done) \
1918	{ \
1919	active_reg_t r; \
1920	set_regs_matched_done = 1; \
1921	for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1922	{ \
1923	MATCHED_SOMETHING (reg_info[r]) \
1924	= EVER_MATCHED_SOMETHING (reg_info[r]) \
1925	= 1; \
1926	} \
1927	} \
1928	} \
1929	while (0)
1930	# endif /* not DEFINED_ONCE */
1931
1932	/* Registers are set to a sentinel when they haven't yet matched. */
1933	static CHAR_T PREFIX(reg_unset_dummy);
1934	# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1935	# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1936
1937	/* Subroutine declarations and macros for regex_compile. */
1938	static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
1939	static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1940	int arg1, int arg2));
1941	static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1942	int arg, UCHAR_T *end));
1943	static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1944	int arg1, int arg2, UCHAR_T *end));
1945	static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
1946	const CHAR_T *p,
1947	reg_syntax_t syntax));
1948	static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
1949	const CHAR_T *pend,
1950	reg_syntax_t syntax));
1951	# ifdef WCHAR
1952	static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
1953	const CHAR_T **p_ptr,
1954	const CHAR_T *pend,
1955	char *translate,
1956	reg_syntax_t syntax,
1957	UCHAR_T *b,
1958	CHAR_T *char_set));
1959	static void insert_space _RE_ARGS ((int num, CHAR_T loc, CHAR_T end));
1960	# else /* BYTE */
1961	static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
1962	const char **p_ptr,
1963	const char *pend,
1964	char *translate,
1965	reg_syntax_t syntax,
1966	unsigned char *b));
1967	# endif /* WCHAR */
1968
1969	/* Fetch the next character in the uncompiled pattern---translating it
1970	if necessary. Also cast from a signed character in the constant
1971	string passed to us by the user to an unsigned char that we can use
1972	as an array index (in, e.g., `translate'). */
1973	/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1974	because it is impossible to allocate 4GB array for some encodings
1975	which have 4 byte character_set like UCS4. */
1976	# ifndef PATFETCH
1977	# ifdef WCHAR
1978	# define PATFETCH(c) \
1979	do {if (p == pend) return REG_EEND; \
1980	c = (UCHAR_T) *p++; \
1981	if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
1982	} while (0)
1983	# else /* BYTE */
1984	# define PATFETCH(c) \
1985	do {if (p == pend) return REG_EEND; \
1986	c = (unsigned char) *p++; \
1987	if (translate) c = (unsigned char) translate[c]; \
1988	} while (0)
1989	# endif /* WCHAR */
1990	# endif
1991
1992	/* Fetch the next character in the uncompiled pattern, with no
1993	translation. */
1994	# define PATFETCH_RAW(c) \
1995	do {if (p == pend) return REG_EEND; \
1996	c = (UCHAR_T) *p++; \
1997	} while (0)
1998
1999	/* Go backwards one character in the pattern. */
2000	# define PATUNFETCH p--
2001
2002
2003	/* If `translate' is non-null, return translate[D], else just D. We
2004	cast the subscript to translate because some data is declared as
2005	`char *', to avoid warnings when a string constant is passed. But
2006	when we use a character as a subscript we must make it unsigned. */
2007	/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
2008	because it is impossible to allocate 4GB array for some encodings
2009	which have 4 byte character_set like UCS4. */
2010
2011	# ifndef TRANSLATE
2012	# ifdef WCHAR
2013	# define TRANSLATE(d) \
2014	((translate && ((UCHAR_T) (d)) <= 0xff) \
2015	? (char) translate[(unsigned char) (d)] : (d))
2016	# else /* BYTE */
2017	# define TRANSLATE(d) \
2018	(translate ? (char) translate[(unsigned char) (d)] : (d))
2019	# endif /* WCHAR */
2020	# endif
2021
2022
2023	/* Macros for outputting the compiled pattern into `buffer'. */
2024
2025	/* If the buffer isn't allocated when it comes in, use this. */
2026	# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
2027
2028	/* Make sure we have at least N more bytes of space in buffer. */
2029	# ifdef WCHAR
2030	# define GET_BUFFER_SPACE(n) \
2031	while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
2032	+ (n)*sizeof(CHAR_T)) > bufp->allocated) \
2033	EXTEND_BUFFER ()
2034	# else /* BYTE */
2035	# define GET_BUFFER_SPACE(n) \
2036	while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
2037	EXTEND_BUFFER ()
2038	# endif /* WCHAR */
2039
2040	/* Make sure we have one more byte of buffer space and then add C to it. */
2041	# define BUF_PUSH(c) \
2042	do { \
2043	GET_BUFFER_SPACE (1); \
2044	*b++ = (UCHAR_T) (c); \
2045	} while (0)
2046
2047
2048	/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
2049	# define BUF_PUSH_2(c1, c2) \
2050	do { \
2051	GET_BUFFER_SPACE (2); \
2052	*b++ = (UCHAR_T) (c1); \
2053	*b++ = (UCHAR_T) (c2); \
2054	} while (0)
2055
2056
2057	/* As with BUF_PUSH_2, except for three bytes. */
2058	# define BUF_PUSH_3(c1, c2, c3) \
2059	do { \
2060	GET_BUFFER_SPACE (3); \
2061	*b++ = (UCHAR_T) (c1); \
2062	*b++ = (UCHAR_T) (c2); \
2063	*b++ = (UCHAR_T) (c3); \
2064	} while (0)
2065
2066	/* Store a jump with opcode OP at LOC to location TO. We store a
2067	relative address offset by the three bytes the jump itself occupies. */
2068	# define STORE_JUMP(op, loc, to) \
2069	PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
2070
2071	/* Likewise, for a two-argument jump. */
2072	# define STORE_JUMP2(op, loc, to, arg) \
2073	PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
2074
2075	/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
2076	# define INSERT_JUMP(op, loc, to) \
2077	PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
2078
2079	/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
2080	# define INSERT_JUMP2(op, loc, to, arg) \
2081	PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
2082	arg, b)
2083
2084	/* This is not an arbitrary limit: the arguments which represent offsets
2085	into the pattern are two bytes long. So if 2^16 bytes turns out to
2086	be too small, many things would have to change. */
2087	/* Any other compiler which, like MSC, has allocation limit below 2^16
2088	bytes will have to use approach similar to what was done below for
2089	MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
2090	reallocating to 0 bytes. Such thing is not going to work too well.
2091	You have been warned!! */
2092	# ifndef DEFINED_ONCE
2093	# if defined _MSC_VER && !defined WIN32
2094	/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2095	The REALLOC define eliminates a flurry of conversion warnings,
2096	but is not required. */
2097	# define MAX_BUF_SIZE 65500L
2098	# define REALLOC(p,s) realloc ((p), (size_t) (s))
2099	# else
2100	# define MAX_BUF_SIZE (1L << 16)
2101	# define REALLOC(p,s) realloc ((p), (s))
2102	# endif
2103
2104	/* Extend the buffer by twice its current size via realloc and
2105	reset the pointers that pointed into the old block to point to the
2106	correct places in the new one. If extending the buffer results in it
2107	being larger than MAX_BUF_SIZE, then flag memory exhausted. */
2108	# if __BOUNDED_POINTERS__
2109	# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2110	# define MOVE_BUFFER_POINTER(P) \
2111	(__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2112	# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
2113	else \
2114	{ \
2115	SET_HIGH_BOUND (b); \
2116	SET_HIGH_BOUND (begalt); \
2117	if (fixup_alt_jump) \
2118	SET_HIGH_BOUND (fixup_alt_jump); \
2119	if (laststart) \
2120	SET_HIGH_BOUND (laststart); \
2121	if (pending_exact) \
2122	SET_HIGH_BOUND (pending_exact); \
2123	}
2124	# else
2125	# define MOVE_BUFFER_POINTER(P) (P) += incr
2126	# define ELSE_EXTEND_BUFFER_HIGH_BOUND
2127	# endif
2128	# endif /* not DEFINED_ONCE */
2129
2130	# ifdef WCHAR
2131	# define EXTEND_BUFFER() \
2132	do { \
2133	UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2134	int wchar_count; \
2135	if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
2136	return REG_ESIZE; \
2137	bufp->allocated <<= 1; \
2138	if (bufp->allocated > MAX_BUF_SIZE) \
2139	bufp->allocated = MAX_BUF_SIZE; \
2140	/* How many characters the new buffer can have? */ \
2141	wchar_count = bufp->allocated / sizeof(UCHAR_T); \
2142	if (wchar_count == 0) wchar_count = 1; \
2143	/* Truncate the buffer to CHAR_T align. */ \
2144	bufp->allocated = wchar_count * sizeof(UCHAR_T); \
2145	RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
2146	bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
2147	if (COMPILED_BUFFER_VAR == NULL) \
2148	return REG_ESPACE; \
2149	/* If the buffer moved, move all the pointers into it. */ \
2150	if (old_buffer != COMPILED_BUFFER_VAR) \
2151	{ \
2152	int incr = COMPILED_BUFFER_VAR - old_buffer; \
2153	MOVE_BUFFER_POINTER (b); \
2154	MOVE_BUFFER_POINTER (begalt); \
2155	if (fixup_alt_jump) \
2156	MOVE_BUFFER_POINTER (fixup_alt_jump); \
2157	if (laststart) \
2158	MOVE_BUFFER_POINTER (laststart); \
2159	if (pending_exact) \
2160	MOVE_BUFFER_POINTER (pending_exact); \
2161	} \
2162	ELSE_EXTEND_BUFFER_HIGH_BOUND \
2163	} while (0)
2164	# else /* BYTE */
2165	# define EXTEND_BUFFER() \
2166	do { \
2167	UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2168	if (bufp->allocated == MAX_BUF_SIZE) \
2169	return REG_ESIZE; \
2170	bufp->allocated <<= 1; \
2171	if (bufp->allocated > MAX_BUF_SIZE) \
2172	bufp->allocated = MAX_BUF_SIZE; \
2173	bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
2174	bufp->allocated); \
2175	if (COMPILED_BUFFER_VAR == NULL) \
2176	return REG_ESPACE; \
2177	/* If the buffer moved, move all the pointers into it. */ \
2178	if (old_buffer != COMPILED_BUFFER_VAR) \
2179	{ \
2180	int incr = COMPILED_BUFFER_VAR - old_buffer; \
2181	MOVE_BUFFER_POINTER (b); \
2182	MOVE_BUFFER_POINTER (begalt); \
2183	if (fixup_alt_jump) \
2184	MOVE_BUFFER_POINTER (fixup_alt_jump); \
2185	if (laststart) \
2186	MOVE_BUFFER_POINTER (laststart); \
2187	if (pending_exact) \
2188	MOVE_BUFFER_POINTER (pending_exact); \
2189	} \
2190	ELSE_EXTEND_BUFFER_HIGH_BOUND \
2191	} while (0)
2192	# endif /* WCHAR */
2193
2194	# ifndef DEFINED_ONCE
2195	/* Since we have one byte reserved for the register number argument to
2196	{start,stop}_memory, the maximum number of groups we can report
2197	things about is what fits in that byte. */
2198	# define MAX_REGNUM 255
2199
2200	/* But patterns can have more than `MAX_REGNUM' registers. We just
2201	ignore the excess. */
2202	typedef unsigned regnum_t;
2203
2204
2205	/* Macros for the compile stack. */
2206
2207	/* Since offsets can go either forwards or backwards, this type needs to
2208	be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
2209	/* int may be not enough when sizeof(int) == 2. */
2210	typedef long pattern_offset_t;
2211
2212	typedef struct
2213	{
2214	pattern_offset_t begalt_offset;
2215	pattern_offset_t fixup_alt_jump;
2216	pattern_offset_t inner_group_offset;
2217	pattern_offset_t laststart_offset;
2218	regnum_t regnum;
2219	} compile_stack_elt_t;
2220
2221
2222	typedef struct
2223	{
2224	compile_stack_elt_t *stack;
2225	unsigned size;
2226	unsigned avail; /* Offset of next open position. */
2227	} compile_stack_type;
2228
2229
2230	# define INIT_COMPILE_STACK_SIZE 32
2231
2232	# define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
2233	# define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
2234
2235	/* The next available element. */
2236	# define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2237
2238	# endif /* not DEFINED_ONCE */
2239
2240	/* Set the bit for character C in a list. */
2241	# ifndef DEFINED_ONCE
2242	# define SET_LIST_BIT(c) \
2243	(b[((unsigned char) (c)) / BYTEWIDTH] \
2244	\|= 1 << (((unsigned char) c) % BYTEWIDTH))
2245	# endif /* DEFINED_ONCE */
2246
2247	/* Get the next unsigned number in the uncompiled pattern. */
2248	# define GET_UNSIGNED_NUMBER(num) \
2249	{ \
2250	while (p != pend) \
2251	{ \
2252	PATFETCH (c); \
2253	if (c < '0' \|\| c > '9') \
2254	break; \
2255	if (num <= RE_DUP_MAX) \
2256	{ \
2257	if (num < 0) \
2258	num = 0; \
2259	num = num * 10 + c - '0'; \
2260	} \
2261	} \
2262	}
2263
2264	# ifndef DEFINED_ONCE
2265	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
2266	/* The GNU C library provides support for user-defined character classes
2267	and the functions from ISO C amendement 1. */
2268	# ifdef CHARCLASS_NAME_MAX
2269	# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2270	# else
2271	/* This shouldn't happen but some implementation might still have this
2272	problem. Use a reasonable default value. */
2273	# define CHAR_CLASS_MAX_LENGTH 256
2274	# endif
2275
2276	# ifdef _LIBC
2277	# define IS_CHAR_CLASS(string) __wctype (string)
2278	# else
2279	# define IS_CHAR_CLASS(string) wctype (string)
2280	# endif
2281	# else
2282	# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
2283
2284	# define IS_CHAR_CLASS(string) \
2285	(STREQ (string, "alpha") \|\| STREQ (string, "upper") \
2286	\|\| STREQ (string, "lower") \|\| STREQ (string, "digit") \
2287	\|\| STREQ (string, "alnum") \|\| STREQ (string, "xdigit") \
2288	\|\| STREQ (string, "space") \|\| STREQ (string, "print") \
2289	\|\| STREQ (string, "punct") \|\| STREQ (string, "graph") \
2290	\|\| STREQ (string, "cntrl") \|\| STREQ (string, "blank"))
2291	# endif
2292	# endif /* DEFINED_ONCE */
2293
2294
2295	# ifndef MATCH_MAY_ALLOCATE
2296
2297	/* If we cannot allocate large objects within re_match_2_internal,
2298	we make the fail stack and register vectors global.
2299	The fail stack, we grow to the maximum size when a regexp
2300	is compiled.
2301	The register vectors, we adjust in size each time we
2302	compile a regexp, according to the number of registers it needs. */
2303
2304	static PREFIX(fail_stack_type) fail_stack;
2305
2306	/* Size with which the following vectors are currently allocated.
2307	That is so we can make them bigger as needed,
2308	but never make them smaller. */
2309	# ifdef DEFINED_ONCE
2310	static int regs_allocated_size;
2311
2312	static const char regstart, regend;
2313	static const char old_regstart, old_regend;
2314	static const char best_regstart, best_regend;
2315	static const char **reg_dummy;
2316	# endif /* DEFINED_ONCE */
2317
2318	static PREFIX(register_info_type) *PREFIX(reg_info);
2319	static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2320
2321	/* Make the register vectors big enough for NUM_REGS registers,
2322	but don't make them smaller. */
2323
2324	static void
2325	PREFIX(regex_grow_registers) (num_regs)
2326	int num_regs;
2327	{
2328	if (num_regs > regs_allocated_size)
2329	{
2330	RETALLOC_IF (regstart, num_regs, const char *);
2331	RETALLOC_IF (regend, num_regs, const char *);
2332	RETALLOC_IF (old_regstart, num_regs, const char *);
2333	RETALLOC_IF (old_regend, num_regs, const char *);
2334	RETALLOC_IF (best_regstart, num_regs, const char *);
2335	RETALLOC_IF (best_regend, num_regs, const char *);
2336	RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2337	RETALLOC_IF (reg_dummy, num_regs, const char *);
2338	RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2339
2340	regs_allocated_size = num_regs;
2341	}
2342	}
2343
2344	# endif /* not MATCH_MAY_ALLOCATE */
2345
2346
2347	# ifndef DEFINED_ONCE
2348	static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2349	compile_stack,
2350	regnum_t regnum));
2351	# endif /* not DEFINED_ONCE */
2352
2353	/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2354	Returns one of error codes defined in `regex.h', or zero for success.
2355
2356	Assumes the `allocated' (and perhaps `buffer') and `translate'
2357	fields are set in BUFP on entry.
2358
2359	If it succeeds, results are put in BUFP (if it returns an error, the
2360	contents of BUFP are undefined):
2361	`buffer' is the compiled pattern;
2362	`syntax' is set to SYNTAX;
2363	`used' is set to the length of the compiled pattern;
2364	`fastmap_accurate' is zero;
2365	`re_nsub' is the number of subexpressions in PATTERN;
2366	`not_bol' and `not_eol' are zero;
2367
2368	The `fastmap' and `newline_anchor' fields are neither
2369	examined nor set. */
2370
2371	/* Return, freeing storage we allocated. */
2372	# ifdef WCHAR
2373	# define FREE_STACK_RETURN(value) \
2374	return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2375	# else
2376	# define FREE_STACK_RETURN(value) \
2377	return (free (compile_stack.stack), value)
2378	# endif /* WCHAR */
2379
2380	static reg_errcode_t
2381	PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
2382	const char *ARG_PREFIX(pattern);
2383	size_t ARG_PREFIX(size);
2384	reg_syntax_t syntax;
2385	struct re_pattern_buffer *bufp;
2386	{
2387	/* We fetch characters from PATTERN here. Even though PATTERN is
2388	`char *' (i.e., signed), we declare these variables as unsigned, so
2389	they can be reliably used as array indices. */
2390	register UCHAR_T c, c1;
2391
2392	#ifdef WCHAR
2393	/* A temporary space to keep wchar_t pattern and compiled pattern. */
2394	CHAR_T pattern, COMPILED_BUFFER_VAR;
2395	size_t size;
2396	/* offset buffer for optimization. See convert_mbs_to_wc. */
2397	int *mbs_offset = NULL;
2398	/* It hold whether each wchar_t is binary data or not. */
2399	char *is_binary = NULL;
2400	/* A flag whether exactn is handling binary data or not. */
2401	char is_exactn_bin = FALSE;
2402	#endif /* WCHAR */
2403
2404	/* A random temporary spot in PATTERN. */
2405	const CHAR_T *p1;
2406
2407	/* Points to the end of the buffer, where we should append. */
2408	register UCHAR_T *b;
2409
2410	/* Keeps track of unclosed groups. */
2411	compile_stack_type compile_stack;
2412
2413	/* Points to the current (ending) position in the pattern. */
2414	#ifdef WCHAR
2415	const CHAR_T *p;
2416	const CHAR_T *pend;
2417	#else /* BYTE */
2418	const CHAR_T *p = pattern;
2419	const CHAR_T *pend = pattern + size;
2420	#endif /* WCHAR */
2421
2422	/* How to translate the characters in the pattern. */
2423	RE_TRANSLATE_TYPE translate = bufp->translate;
2424
2425	/* Address of the count-byte of the most recently inserted `exactn'
2426	command. This makes it possible to tell if a new exact-match
2427	character can be added to that command or if the character requires
2428	a new `exactn' command. */
2429	UCHAR_T *pending_exact = 0;
2430
2431	/* Address of start of the most recently finished expression.
2432	This tells, e.g., postfix * where to find the start of its
2433	operand. Reset at the beginning of groups and alternatives. */
2434	UCHAR_T *laststart = 0;
2435
2436	/* Address of beginning of regexp, or inside of last group. */
2437	UCHAR_T *begalt;
2438
2439	/* Address of the place where a forward jump should go to the end of
2440	the containing expression. Each alternative of an `or' -- except the
2441	last -- ends with a forward jump of this sort. */
2442	UCHAR_T *fixup_alt_jump = 0;
2443
2444	/* Counts open-groups as they are encountered. Remembered for the
2445	matching close-group on the compile stack, so the same register
2446	number is put in the stop_memory as the start_memory. */
2447	regnum_t regnum = 0;
2448
2449	#ifdef WCHAR
2450	/* Initialize the wchar_t PATTERN and offset_buffer. */
2451	p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2452	mbs_offset = TALLOC(csize + 1, int);
2453	is_binary = TALLOC(csize + 1, char);
2454	if (pattern == NULL \|\| mbs_offset == NULL \|\| is_binary == NULL)
2455	{
2456	free(pattern);
2457	free(mbs_offset);
2458	free(is_binary);
2459	return REG_ESPACE;
2460	}
2461	pattern[csize] = L'\0'; /* sentinel */
2462	size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2463	pend = p + size;
2464	if (size < 0)
2465	{
2466	free(pattern);
2467	free(mbs_offset);
2468	free(is_binary);
2469	return REG_BADPAT;
2470	}
2471	#endif
2472
2473	#ifdef DEBUG
2474	DEBUG_PRINT1 ("\nCompiling pattern: ");
2475	if (debug)
2476	{
2477	unsigned debug_count;
2478
2479	for (debug_count = 0; debug_count < size; debug_count++)
2480	PUT_CHAR (pattern[debug_count]);
2481	putchar ('\n');
2482	}
2483	#endif /* DEBUG */
2484
2485	/* Initialize the compile stack. */
2486	compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2487	if (compile_stack.stack == NULL)
2488	{
2489	#ifdef WCHAR
2490	free(pattern);
2491	free(mbs_offset);
2492	free(is_binary);
2493	#endif
2494	return REG_ESPACE;
2495	}
2496
2497	compile_stack.size = INIT_COMPILE_STACK_SIZE;
2498	compile_stack.avail = 0;
2499
2500	/* Initialize the pattern buffer. */
2501	bufp->syntax = syntax;
2502	bufp->fastmap_accurate = 0;
2503	bufp->not_bol = bufp->not_eol = 0;
2504
2505	/* Set `used' to zero, so that if we return an error, the pattern
2506	printer (for debugging) will think there's no pattern. We reset it
2507	at the end. */
2508	bufp->used = 0;
2509
2510	/* Always count groups, whether or not bufp->no_sub is set. */
2511	bufp->re_nsub = 0;
2512
2513	#if !defined emacs && !defined SYNTAX_TABLE
2514	/* Initialize the syntax table. */
2515	init_syntax_once ();
2516	#endif
2517
2518	if (bufp->allocated == 0)
2519	{
2520	if (bufp->buffer)
2521	{ /* If zero allocated, but buffer is non-null, try to realloc
2522	enough space. This loses if buffer's address is bogus, but
2523	that is the user's responsibility. */
2524	#ifdef WCHAR
2525	/* Free bufp->buffer and allocate an array for wchar_t pattern
2526	buffer. */
2527	free(bufp->buffer);
2528	COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2529	UCHAR_T);
2530	#else
2531	RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2532	#endif /* WCHAR */
2533	}
2534	else
2535	{ /* Caller did not allocate a buffer. Do it for them. */
2536	COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2537	UCHAR_T);
2538	}
2539
2540	if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2541	#ifdef WCHAR
2542	bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2543	#endif /* WCHAR */
2544	bufp->allocated = INIT_BUF_SIZE;
2545	}
2546	#ifdef WCHAR
2547	else
2548	COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2549	#endif
2550
2551	begalt = b = COMPILED_BUFFER_VAR;
2552
2553	/* Loop through the uncompiled pattern until we're at the end. */
2554	while (p != pend)
2555	{
2556	PATFETCH (c);
2557
2558	switch (c)
2559	{
2560	case '^':
2561	{
2562	if ( /* If at start of pattern, it's an operator. */
2563	p == pattern + 1
2564	/* If context independent, it's an operator. */
2565	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
2566	/* Otherwise, depends on what's come before. */
2567	\|\| PREFIX(at_begline_loc_p) (pattern, p, syntax))
2568	BUF_PUSH (begline);
2569	else
2570	goto normal_char;
2571	}
2572	break;
2573
2574
2575	case '$':
2576	{
2577	if ( /* If at end of pattern, it's an operator. */
2578	p == pend
2579	/* If context independent, it's an operator. */
2580	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
2581	/* Otherwise, depends on what's next. */
2582	\|\| PREFIX(at_endline_loc_p) (p, pend, syntax))
2583	BUF_PUSH (endline);
2584	else
2585	goto normal_char;
2586	}
2587	break;
2588
2589
2590	case '+':
2591	case '?':
2592	if ((syntax & RE_BK_PLUS_QM)
2593	\|\| (syntax & RE_LIMITED_OPS))
2594	goto normal_char;
2595	handle_plus:
2596	case '*':
2597	/* If there is no previous pattern... */
2598	if (!laststart)
2599	{
2600	if (syntax & RE_CONTEXT_INVALID_OPS)
2601	FREE_STACK_RETURN (REG_BADRPT);
2602	else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2603	goto normal_char;
2604	}
2605
2606	{
2607	/* Are we optimizing this jump? */
2608	boolean keep_string_p = false;
2609
2610	/* 1 means zero (many) matches is allowed. */
2611	char zero_times_ok = 0, many_times_ok = 0;
2612
2613	/* If there is a sequence of repetition chars, collapse it
2614	down to just one (the right one). We can't combine
2615	interval operators with these because of, e.g., `a{2}*',
2616	which should only match an even number of `a's. */
2617
2618	for (;;)
2619	{
2620	zero_times_ok \|= c != '+';
2621	many_times_ok \|= c != '?';
2622
2623	if (p == pend)
2624	break;
2625
2626	PATFETCH (c);
2627
2628	if (c == '*'
2629	\|\| (!(syntax & RE_BK_PLUS_QM) && (c == '+' \|\| c == '?')))
2630	;
2631
2632	else if (syntax & RE_BK_PLUS_QM && c == '\\')
2633	{
2634	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2635
2636	PATFETCH (c1);
2637	if (!(c1 == '+' \|\| c1 == '?'))
2638	{
2639	PATUNFETCH;
2640	PATUNFETCH;
2641	break;
2642	}
2643
2644	c = c1;
2645	}
2646	else
2647	{
2648	PATUNFETCH;
2649	break;
2650	}
2651
2652	/* If we get here, we found another repeat character. */
2653	}
2654
2655	/* Star, etc. applied to an empty pattern is equivalent
2656	to an empty pattern. */
2657	if (!laststart)
2658	break;
2659
2660	/* Now we know whether or not zero matches is allowed
2661	and also whether or not two or more matches is allowed. */
2662	if (many_times_ok)
2663	{ /* More than one repetition is allowed, so put in at the
2664	end a backward relative jump from `b' to before the next
2665	jump we're going to put in below (which jumps from
2666	laststart to after this jump).
2667
2668	But if we are at the `' in the exact sequence `.\n',
2669	insert an unconditional jump backwards to the .,
2670	instead of the beginning of the loop. This way we only
2671	push a failure point once, instead of every time
2672	through the loop. */
2673	assert (p - 1 > pattern);
2674
2675	/* Allocate the space for the jump. */
2676	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2677
2678	/* We know we are not at the first character of the pattern,
2679	because laststart was nonzero. And we've already
2680	incremented `p', by the way, to be the character after
2681	the `*'. Do we have to do something analogous here
2682	for null bytes, because of RE_DOT_NOT_NULL? */
2683	if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2684	&& zero_times_ok
2685	&& p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2686	&& !(syntax & RE_DOT_NEWLINE))
2687	{ /* We have .\n. /
2688	STORE_JUMP (jump, b, laststart);
2689	keep_string_p = true;
2690	}
2691	else
2692	/* Anything else. */
2693	STORE_JUMP (maybe_pop_jump, b, laststart -
2694	(1 + OFFSET_ADDRESS_SIZE));
2695
2696	/* We've added more stuff to the buffer. */
2697	b += 1 + OFFSET_ADDRESS_SIZE;
2698	}
2699
2700	/* On failure, jump from laststart to b + 3, which will be the
2701	end of the buffer after this jump is inserted. */
2702	/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2703	'b + 3'. */
2704	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2705	INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2706	: on_failure_jump,
2707	laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2708	pending_exact = 0;
2709	b += 1 + OFFSET_ADDRESS_SIZE;
2710
2711	if (!zero_times_ok)
2712	{
2713	/* At least one repetition is required, so insert a
2714	`dummy_failure_jump' before the initial
2715	`on_failure_jump' instruction of the loop. This
2716	effects a skip over that instruction the first time
2717	we hit that loop. */
2718	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2719	INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2720	2 + 2 * OFFSET_ADDRESS_SIZE);
2721	b += 1 + OFFSET_ADDRESS_SIZE;
2722	}
2723	}
2724	break;
2725
2726
2727	case '.':
2728	laststart = b;
2729	BUF_PUSH (anychar);
2730	break;
2731
2732
2733	case '[':
2734	{
2735	boolean had_char_class = false;
2736	#ifdef WCHAR
2737	CHAR_T range_start = 0xffffffff;
2738	#else
2739	unsigned int range_start = 0xffffffff;
2740	#endif
2741	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2742
2743	#ifdef WCHAR
2744	/* We assume a charset(_not) structure as a wchar_t array.
2745	charset[0] = (re_opcode_t) charset(_not)
2746	charset[1] = l (= length of char_classes)
2747	charset[2] = m (= length of collating_symbols)
2748	charset[3] = n (= length of equivalence_classes)
2749	charset[4] = o (= length of char_ranges)
2750	charset[5] = p (= length of chars)
2751
2752	charset[6] = char_class (wctype_t)
2753	charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2754	...
2755	charset[l+5] = char_class (wctype_t)
2756
2757	charset[l+6] = collating_symbol (wchar_t)
2758	...
2759	charset[l+m+5] = collating_symbol (wchar_t)
2760	ifdef _LIBC we use the index if
2761	_NL_COLLATE_SYMB_EXTRAMB instead of
2762	wchar_t string.
2763
2764	charset[l+m+6] = equivalence_classes (wchar_t)
2765	...
2766	charset[l+m+n+5] = equivalence_classes (wchar_t)
2767	ifdef _LIBC we use the index in
2768	_NL_COLLATE_WEIGHT instead of
2769	wchar_t string.
2770
2771	charset[l+m+n+6] = range_start
2772	charset[l+m+n+7] = range_end
2773	...
2774	charset[l+m+n+2o+4] = range_start
2775	charset[l+m+n+2o+5] = range_end
2776	ifdef _LIBC we use the value looked up
2777	in _NL_COLLATE_COLLSEQ instead of
2778	wchar_t character.
2779
2780	charset[l+m+n+2o+6] = char
2781	...
2782	charset[l+m+n+2o+p+5] = char
2783
2784	*/
2785
2786	/* We need at least 6 spaces: the opcode, the length of
2787	char_classes, the length of collating_symbols, the length of
2788	equivalence_classes, the length of char_ranges, the length of
2789	chars. */
2790	GET_BUFFER_SPACE (6);
2791
2792	/* Save b as laststart. And We use laststart as the pointer
2793	to the first element of the charset here.
2794	In other words, laststart[i] indicates charset[i]. */
2795	laststart = b;
2796
2797	/* We test `*p == '^' twice, instead of using an if
2798	statement, so we only need one BUF_PUSH. */
2799	BUF_PUSH (*p == '^' ? charset_not : charset);
2800	if (*p == '^')
2801	p++;
2802
2803	/* Push the length of char_classes, the length of
2804	collating_symbols, the length of equivalence_classes, the
2805	length of char_ranges and the length of chars. */
2806	BUF_PUSH_3 (0, 0, 0);
2807	BUF_PUSH_2 (0, 0);
2808
2809	/* Remember the first position in the bracket expression. */
2810	p1 = p;
2811
2812	/* charset_not matches newline according to a syntax bit. */
2813	if ((re_opcode_t) b[-6] == charset_not
2814	&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2815	{
2816	BUF_PUSH('\n');
2817	laststart[5]++; /* Update the length of characters */
2818	}
2819
2820	/* Read in characters and ranges, setting map bits. */
2821	for (;;)
2822	{
2823	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2824
2825	PATFETCH (c);
2826
2827	/* \ might escape characters inside [...] and [^...]. */
2828	if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2829	{
2830	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2831
2832	PATFETCH (c1);
2833	BUF_PUSH(c1);
2834	laststart[5]++; /* Update the length of chars */
2835	range_start = c1;
2836	continue;
2837	}
2838
2839	/* Could be the end of the bracket expression. If it's
2840	not (i.e., when the bracket expression is `[]' so
2841	far), the ']' character bit gets set way below. */
2842	if (c == ']' && p != p1 + 1)
2843	break;
2844
2845	/* Look ahead to see if it's a range when the last thing
2846	was a character class. */
2847	if (had_char_class && c == '-' && *p != ']')
2848	FREE_STACK_RETURN (REG_ERANGE);
2849
2850	/* Look ahead to see if it's a range when the last thing
2851	was a character: if this is a hyphen not at the
2852	beginning or the end of a list, then it's the range
2853	operator. */
2854	if (c == '-'
2855	&& !(p - 2 >= pattern && p[-2] == '[')
2856	&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2857	&& *p != ']')
2858	{
2859	reg_errcode_t ret;
2860	/* Allocate the space for range_start and range_end. */
2861	GET_BUFFER_SPACE (2);
2862	/* Update the pointer to indicate end of buffer. */
2863	b += 2;
2864	ret = wcs_compile_range (range_start, &p, pend, translate,
2865	syntax, b, laststart);
2866	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2867	range_start = 0xffffffff;
2868	}
2869	else if (p[0] == '-' && p[1] != ']')
2870	{ /* This handles ranges made up of characters only. */
2871	reg_errcode_t ret;
2872
2873	/* Move past the `-'. */
2874	PATFETCH (c1);
2875	/* Allocate the space for range_start and range_end. */
2876	GET_BUFFER_SPACE (2);
2877	/* Update the pointer to indicate end of buffer. */
2878	b += 2;
2879	ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2880	laststart);
2881	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2882	range_start = 0xffffffff;
2883	}
2884
2885	/* See if we're at the beginning of a possible character
2886	class. */
2887	else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2888	{ /* Leave room for the null. */
2889	char str[CHAR_CLASS_MAX_LENGTH + 1];
2890
2891	PATFETCH (c);
2892	c1 = 0;
2893
2894	/* If pattern is `[[:'. */
2895	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2896
2897	for (;;)
2898	{
2899	PATFETCH (c);
2900	if ((c == ':' && *p == ']') \|\| p == pend)
2901	break;
2902	if (c1 < CHAR_CLASS_MAX_LENGTH)
2903	str[c1++] = c;
2904	else
2905	/* This is in any case an invalid class name. */
2906	str[0] = '\0';
2907	}
2908	str[c1] = '\0';
2909
2910	/* If isn't a word bracketed by `[:' and `:]':
2911	undo the ending character, the letters, and leave
2912	the leading `:' and `[' (but store them as character). */
2913	if (c == ':' && *p == ']')
2914	{
2915	wctype_t wt;
2916	uintptr_t alignedp;
2917
2918	/* Query the character class as wctype_t. */
2919	wt = IS_CHAR_CLASS (str);
2920	if (wt == 0)
2921	FREE_STACK_RETURN (REG_ECTYPE);
2922
2923	/* Throw away the ] at the end of the character
2924	class. */
2925	PATFETCH (c);
2926
2927	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2928
2929	/* Allocate the space for character class. */
2930	GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2931	/* Update the pointer to indicate end of buffer. */
2932	b += CHAR_CLASS_SIZE;
2933	/* Move data which follow character classes
2934	not to violate the data. */
2935	insert_space(CHAR_CLASS_SIZE,
2936	laststart + 6 + laststart[1],
2937	b - 1);
2938	alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2939	+ __alignof__(wctype_t) - 1)
2940	& ~(uintptr_t)(__alignof__(wctype_t) - 1);
2941	/* Store the character class. */
2942	((wctype_t)alignedp) = wt;
2943	/* Update length of char_classes */
2944	laststart[1] += CHAR_CLASS_SIZE;
2945
2946	had_char_class = true;
2947	}
2948	else
2949	{
2950	c1++;
2951	while (c1--)
2952	PATUNFETCH;
2953	BUF_PUSH ('[');
2954	BUF_PUSH (':');
2955	laststart[5] += 2; /* Update the length of characters */
2956	range_start = ':';
2957	had_char_class = false;
2958	}
2959	}
2960	else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2961	\|\| *p == '.'))
2962	{
2963	CHAR_T str[128]; /* Should be large enough. */
2964	CHAR_T delim = p; / '=' or '.' */
2965	# ifdef _LIBC
2966	uint32_t nrules =
2967	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2968	# endif
2969	PATFETCH (c);
2970	c1 = 0;
2971
2972	/* If pattern is `[[=' or '[[.'. */
2973	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2974
2975	for (;;)
2976	{
2977	PATFETCH (c);
2978	if ((c == delim && *p == ']') \|\| p == pend)
2979	break;
2980	if (c1 < sizeof (str) - 1)
2981	str[c1++] = c;
2982	else
2983	/* This is in any case an invalid class name. */
2984	str[0] = '\0';
2985	}
2986	str[c1] = '\0';
2987
2988	if (c == delim && *p == ']' && str[0] != '\0')
2989	{
2990	unsigned int i, offset;
2991	/* If we have no collation data we use the default
2992	collation in which each character is in a class
2993	by itself. It also means that ASCII is the
2994	character set and therefore we cannot have character
2995	with more than one byte in the multibyte
2996	representation. */
2997
2998	/* If not defined _LIBC, we push the name and
2999	`\0' for the sake of matching performance. */
3000	int datasize = c1 + 1;
3001
3002	# ifdef _LIBC
3003	int32_t idx = 0;
3004	if (nrules == 0)
3005	# endif
3006	{
3007	if (c1 != 1)
3008	FREE_STACK_RETURN (REG_ECOLLATE);
3009	}
3010	# ifdef _LIBC
3011	else
3012	{
3013	const int32_t *table;
3014	const int32_t *weights;
3015	const int32_t *extra;
3016	const int32_t *indirect;
3017	wint_t *cp;
3018
3019	/* This #include defines a local function! */
3020	# include <locale/weightwc.h>
3021
3022	if(delim == '=')
3023	{
3024	/* We push the index for equivalence class. */
3025	cp = (wint_t*)str;
3026
3027	table = (const int32_t *)
3028	_NL_CURRENT (LC_COLLATE,
3029	_NL_COLLATE_TABLEWC);
3030	weights = (const int32_t *)
3031	_NL_CURRENT (LC_COLLATE,
3032	_NL_COLLATE_WEIGHTWC);
3033	extra = (const int32_t *)
3034	_NL_CURRENT (LC_COLLATE,
3035	_NL_COLLATE_EXTRAWC);
3036	indirect = (const int32_t *)
3037	_NL_CURRENT (LC_COLLATE,
3038	_NL_COLLATE_INDIRECTWC);
3039
3040	idx = findidx ((const wint_t**)&cp);
3041	if (idx == 0 \|\| cp < (wint_t*) str + c1)
3042	/* This is no valid character. */
3043	FREE_STACK_RETURN (REG_ECOLLATE);
3044
3045	str[0] = (wchar_t)idx;
3046	}
3047	else /* delim == '.' */
3048	{
3049	/* We push collation sequence value
3050	for collating symbol. */
3051	int32_t table_size;
3052	const int32_t *symb_table;
3053	const unsigned char *extra;
3054	int32_t idx;
3055	int32_t elem;
3056	int32_t second;
3057	int32_t hash;
3058	char char_str[c1];
3059
3060	/* We have to convert the name to a single-byte
3061	string. This is possible since the names
3062	consist of ASCII characters and the internal
3063	representation is UCS4. */
3064	for (i = 0; i < c1; ++i)
3065	char_str[i] = str[i];
3066
3067	table_size =
3068	_NL_CURRENT_WORD (LC_COLLATE,
3069	_NL_COLLATE_SYMB_HASH_SIZEMB);
3070	symb_table = (const int32_t *)
3071	_NL_CURRENT (LC_COLLATE,
3072	_NL_COLLATE_SYMB_TABLEMB);
3073	extra = (const unsigned char *)
3074	_NL_CURRENT (LC_COLLATE,
3075	_NL_COLLATE_SYMB_EXTRAMB);
3076
3077	/* Locate the character in the hashing table. */
3078	hash = elem_hash (char_str, c1);
3079
3080	idx = 0;
3081	elem = hash % table_size;
3082	second = hash % (table_size - 2);
3083	while (symb_table[2 * elem] != 0)
3084	{
3085	/* First compare the hashing value. */
3086	if (symb_table[2 * elem] == hash
3087	&& c1 == extra[symb_table[2 * elem + 1]]
3088	&& memcmp (char_str,
3089	&extra[symb_table[2 * elem + 1]
3090	+ 1], c1) == 0)
3091	{
3092	/* Yep, this is the entry. */
3093	idx = symb_table[2 * elem + 1];
3094	idx += 1 + extra[idx];
3095	break;
3096	}
3097
3098	/* Next entry. */
3099	elem += second;
3100	}
3101
3102	if (symb_table[2 * elem] != 0)
3103	{
3104	/* Compute the index of the byte sequence
3105	in the table. */
3106	idx += 1 + extra[idx];
3107	/* Adjust for the alignment. */
3108	idx = (idx + 3) & ~3;
3109
3110	str[0] = (wchar_t) idx + 4;
3111	}
3112	else if (symb_table[2 * elem] == 0 && c1 == 1)
3113	{
3114	/* No valid character. Match it as a
3115	single byte character. */
3116	had_char_class = false;
3117	BUF_PUSH(str[0]);
3118	/* Update the length of characters */
3119	laststart[5]++;
3120	range_start = str[0];
3121
3122	/* Throw away the ] at the end of the
3123	collating symbol. */
3124	PATFETCH (c);
3125	/* exit from the switch block. */
3126	continue;
3127	}
3128	else
3129	FREE_STACK_RETURN (REG_ECOLLATE);
3130	}
3131	datasize = 1;
3132	}
3133	# endif
3134	/* Throw away the ] at the end of the equivalence
3135	class (or collating symbol). */
3136	PATFETCH (c);
3137
3138	/* Allocate the space for the equivalence class
3139	(or collating symbol) (and '\0' if needed). */
3140	GET_BUFFER_SPACE(datasize);
3141	/* Update the pointer to indicate end of buffer. */
3142	b += datasize;
3143
3144	if (delim == '=')
3145	{ /* equivalence class */
3146	/* Calculate the offset of char_ranges,
3147	which is next to equivalence_classes. */
3148	offset = laststart[1] + laststart[2]
3149	+ laststart[3] +6;
3150	/* Insert space. */
3151	insert_space(datasize, laststart + offset, b - 1);
3152
3153	/* Write the equivalence_class and \0. */
3154	for (i = 0 ; i < datasize ; i++)
3155	laststart[offset + i] = str[i];
3156
3157	/* Update the length of equivalence_classes. */
3158	laststart[3] += datasize;
3159	had_char_class = true;
3160	}
3161	else /* delim == '.' */
3162	{ /* collating symbol */
3163	/* Calculate the offset of the equivalence_classes,
3164	which is next to collating_symbols. */
3165	offset = laststart[1] + laststart[2] + 6;
3166	/* Insert space and write the collationg_symbol
3167	and \0. */
3168	insert_space(datasize, laststart + offset, b-1);
3169	for (i = 0 ; i < datasize ; i++)
3170	laststart[offset + i] = str[i];
3171
3172	/* In re_match_2_internal if range_start < -1, we
3173	assume -range_start is the offset of the
3174	collating symbol which is specified as
3175	the character of the range start. So we assign
3176	-(laststart[1] + laststart[2] + 6) to
3177	range_start. */
3178	range_start = -(laststart[1] + laststart[2] + 6);
3179	/* Update the length of collating_symbol. */
3180	laststart[2] += datasize;
3181	had_char_class = false;
3182	}
3183	}
3184	else
3185	{
3186	c1++;
3187	while (c1--)
3188	PATUNFETCH;
3189	BUF_PUSH ('[');
3190	BUF_PUSH (delim);
3191	laststart[5] += 2; /* Update the length of characters */
3192	range_start = delim;
3193	had_char_class = false;
3194	}
3195	}
3196	else
3197	{
3198	had_char_class = false;
3199	BUF_PUSH(c);
3200	laststart[5]++; /* Update the length of characters */
3201	range_start = c;
3202	}
3203	}
3204
3205	#else /* BYTE */
3206	/* Ensure that we have enough space to push a charset: the
3207	opcode, the length count, and the bitset; 34 bytes in all. */
3208	GET_BUFFER_SPACE (34);
3209
3210	laststart = b;
3211
3212	/* We test `*p == '^' twice, instead of using an if
3213	statement, so we only need one BUF_PUSH. */
3214	BUF_PUSH (*p == '^' ? charset_not : charset);
3215	if (*p == '^')
3216	p++;
3217
3218	/* Remember the first position in the bracket expression. */
3219	p1 = p;
3220
3221	/* Push the number of bytes in the bitmap. */
3222	BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3223
3224	/* Clear the whole map. */
3225	bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3226
3227	/* charset_not matches newline according to a syntax bit. */
3228	if ((re_opcode_t) b[-2] == charset_not
3229	&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3230	SET_LIST_BIT ('\n');
3231
3232	/* Read in characters and ranges, setting map bits. */
3233	for (;;)
3234	{
3235	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3236
3237	PATFETCH (c);
3238
3239	/* \ might escape characters inside [...] and [^...]. */
3240	if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3241	{
3242	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3243
3244	PATFETCH (c1);
3245	SET_LIST_BIT (c1);
3246	range_start = c1;
3247	continue;
3248	}
3249
3250	/* Could be the end of the bracket expression. If it's
3251	not (i.e., when the bracket expression is `[]' so
3252	far), the ']' character bit gets set way below. */
3253	if (c == ']' && p != p1 + 1)
3254	break;
3255
3256	/* Look ahead to see if it's a range when the last thing
3257	was a character class. */
3258	if (had_char_class && c == '-' && *p != ']')
3259	FREE_STACK_RETURN (REG_ERANGE);
3260
3261	/* Look ahead to see if it's a range when the last thing
3262	was a character: if this is a hyphen not at the
3263	beginning or the end of a list, then it's the range
3264	operator. */
3265	if (c == '-'
3266	&& !(p - 2 >= pattern && p[-2] == '[')
3267	&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3268	&& *p != ']')
3269	{
3270	reg_errcode_t ret
3271	= byte_compile_range (range_start, &p, pend, translate,
3272	syntax, b);
3273	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3274	range_start = 0xffffffff;
3275	}
3276
3277	else if (p[0] == '-' && p[1] != ']')
3278	{ /* This handles ranges made up of characters only. */
3279	reg_errcode_t ret;
3280
3281	/* Move past the `-'. */
3282	PATFETCH (c1);
3283
3284	ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3285	if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3286	range_start = 0xffffffff;
3287	}
3288
3289	/* See if we're at the beginning of a possible character
3290	class. */
3291
3292	else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3293	{ /* Leave room for the null. */
3294	char str[CHAR_CLASS_MAX_LENGTH + 1];
3295
3296	PATFETCH (c);
3297	c1 = 0;
3298
3299	/* If pattern is `[[:'. */
3300	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3301
3302	for (;;)
3303	{
3304	PATFETCH (c);
3305	if ((c == ':' && *p == ']') \|\| p == pend)
3306	break;
3307	if (c1 < CHAR_CLASS_MAX_LENGTH)
3308	str[c1++] = c;
3309	else
3310	/* This is in any case an invalid class name. */
3311	str[0] = '\0';
3312	}
3313	str[c1] = '\0';
3314
3315	/* If isn't a word bracketed by `[:' and `:]':
3316	undo the ending character, the letters, and leave
3317	the leading `:' and `[' (but set bits for them). */
3318	if (c == ':' && *p == ']')
3319	{
3320	# if defined _LIBC \|\| WIDE_CHAR_SUPPORT
3321	boolean is_lower = STREQ (str, "lower");
3322	boolean is_upper = STREQ (str, "upper");
3323	wctype_t wt;
3324	int ch;
3325
3326	wt = IS_CHAR_CLASS (str);
3327	if (wt == 0)
3328	FREE_STACK_RETURN (REG_ECTYPE);
3329
3330	/* Throw away the ] at the end of the character
3331	class. */
3332	PATFETCH (c);
3333
3334	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3335
3336	for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3337	{
3338	# ifdef _LIBC
3339	if (__iswctype (__btowc (ch), wt))
3340	SET_LIST_BIT (ch);
3341	# else
3342	if (iswctype (btowc (ch), wt))
3343	SET_LIST_BIT (ch);
3344	# endif
3345
3346	if (translate && (is_upper \|\| is_lower)
3347	&& (ISUPPER (ch) \|\| ISLOWER (ch)))
3348	SET_LIST_BIT (ch);
3349	}
3350
3351	had_char_class = true;
3352	# else
3353	int ch;
3354	boolean is_alnum = STREQ (str, "alnum");
3355	boolean is_alpha = STREQ (str, "alpha");
3356	boolean is_blank = STREQ (str, "blank");
3357	boolean is_cntrl = STREQ (str, "cntrl");
3358	boolean is_digit = STREQ (str, "digit");
3359	boolean is_graph = STREQ (str, "graph");
3360	boolean is_lower = STREQ (str, "lower");
3361	boolean is_print = STREQ (str, "print");
3362	boolean is_punct = STREQ (str, "punct");
3363	boolean is_space = STREQ (str, "space");
3364	boolean is_upper = STREQ (str, "upper");
3365	boolean is_xdigit = STREQ (str, "xdigit");
3366
3367	if (!IS_CHAR_CLASS (str))
3368	FREE_STACK_RETURN (REG_ECTYPE);
3369
3370	/* Throw away the ] at the end of the character
3371	class. */
3372	PATFETCH (c);
3373
3374	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3375
3376	for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3377	{
3378	/* This was split into 3 if's to
3379	avoid an arbitrary limit in some compiler. */
3380	if ( (is_alnum && ISALNUM (ch))
3381	\|\| (is_alpha && ISALPHA (ch))
3382	\|\| (is_blank && ISBLANK (ch))
3383	\|\| (is_cntrl && ISCNTRL (ch)))
3384	SET_LIST_BIT (ch);
3385	if ( (is_digit && ISDIGIT (ch))
3386	\|\| (is_graph && ISGRAPH (ch))
3387	\|\| (is_lower && ISLOWER (ch))
3388	\|\| (is_print && ISPRINT (ch)))
3389	SET_LIST_BIT (ch);
3390	if ( (is_punct && ISPUNCT (ch))
3391	\|\| (is_space && ISSPACE (ch))
3392	\|\| (is_upper && ISUPPER (ch))
3393	\|\| (is_xdigit && ISXDIGIT (ch)))
3394	SET_LIST_BIT (ch);
3395	if ( translate && (is_upper \|\| is_lower)
3396	&& (ISUPPER (ch) \|\| ISLOWER (ch)))
3397	SET_LIST_BIT (ch);
3398	}
3399	had_char_class = true;
3400	# endif /* libc \|\| wctype.h */
3401	}
3402	else
3403	{
3404	c1++;
3405	while (c1--)
3406	PATUNFETCH;
3407	SET_LIST_BIT ('[');
3408	SET_LIST_BIT (':');
3409	range_start = ':';
3410	had_char_class = false;
3411	}
3412	}
3413	else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3414	{
3415	unsigned char str[MB_LEN_MAX + 1];
3416	# ifdef _LIBC
3417	uint32_t nrules =
3418	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3419	# endif
3420
3421	PATFETCH (c);
3422	c1 = 0;
3423
3424	/* If pattern is `[[='. */
3425	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3426
3427	for (;;)
3428	{
3429	PATFETCH (c);
3430	if ((c == '=' && *p == ']') \|\| p == pend)
3431	break;
3432	if (c1 < MB_LEN_MAX)
3433	str[c1++] = c;
3434	else
3435	/* This is in any case an invalid class name. */
3436	str[0] = '\0';
3437	}
3438	str[c1] = '\0';
3439
3440	if (c == '=' && *p == ']' && str[0] != '\0')
3441	{
3442	/* If we have no collation data we use the default
3443	collation in which each character is in a class
3444	by itself. It also means that ASCII is the
3445	character set and therefore we cannot have character
3446	with more than one byte in the multibyte
3447	representation. */
3448	# ifdef _LIBC
3449	if (nrules == 0)
3450	# endif
3451	{
3452	if (c1 != 1)
3453	FREE_STACK_RETURN (REG_ECOLLATE);
3454
3455	/* Throw away the ] at the end of the equivalence
3456	class. */
3457	PATFETCH (c);
3458
3459	/* Set the bit for the character. */
3460	SET_LIST_BIT (str[0]);
3461	}
3462	# ifdef _LIBC
3463	else
3464	{
3465	/* Try to match the byte sequence in `str' against
3466	those known to the collate implementation.
3467	First find out whether the bytes in `str' are
3468	actually from exactly one character. */
3469	const int32_t *table;
3470	const unsigned char *weights;
3471	const unsigned char *extra;
3472	const int32_t *indirect;
3473	int32_t idx;
3474	const unsigned char *cp = str;
3475	int ch;
3476
3477	/* This #include defines a local function! */
3478	# include <locale/weight.h>
3479
3480	table = (const int32_t *)
3481	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3482	weights = (const unsigned char *)
3483	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3484	extra = (const unsigned char *)
3485	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3486	indirect = (const int32_t *)
3487	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3488
3489	idx = findidx (&cp);
3490	if (idx == 0 \|\| cp < str + c1)
3491	/* This is no valid character. */
3492	FREE_STACK_RETURN (REG_ECOLLATE);
3493
3494	/* Throw away the ] at the end of the equivalence
3495	class. */
3496	PATFETCH (c);
3497
3498	/* Now we have to go throught the whole table
3499	and find all characters which have the same
3500	first level weight.
3501
3502	XXX Note that this is not entirely correct.
3503	we would have to match multibyte sequences
3504	but this is not possible with the current
3505	implementation. */
3506	for (ch = 1; ch < 256; ++ch)
3507	/* XXX This test would have to be changed if we
3508	would allow matching multibyte sequences. */
3509	if (table[ch] > 0)
3510	{
3511	int32_t idx2 = table[ch];
3512	size_t len = weights[idx2];
3513
3514	/* Test whether the lenghts match. */
3515	if (weights[idx] == len)
3516	{
3517	/* They do. New compare the bytes of
3518	the weight. */
3519	size_t cnt = 0;
3520
3521	while (cnt < len
3522	&& (weights[idx + 1 + cnt]
3523	== weights[idx2 + 1 + cnt]))
3524	++cnt;
3525
3526	if (cnt == len)
3527	/* They match. Mark the character as
3528	acceptable. */
3529	SET_LIST_BIT (ch);
3530	}
3531	}
3532	}
3533	# endif
3534	had_char_class = true;
3535	}
3536	else
3537	{
3538	c1++;
3539	while (c1--)
3540	PATUNFETCH;
3541	SET_LIST_BIT ('[');
3542	SET_LIST_BIT ('=');
3543	range_start = '=';
3544	had_char_class = false;
3545	}
3546	}
3547	else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3548	{
3549	unsigned char str[128]; /* Should be large enough. */
3550	# ifdef _LIBC
3551	uint32_t nrules =
3552	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3553	# endif
3554
3555	PATFETCH (c);
3556	c1 = 0;
3557
3558	/* If pattern is `[[.'. */
3559	if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3560
3561	for (;;)
3562	{
3563	PATFETCH (c);
3564	if ((c == '.' && *p == ']') \|\| p == pend)
3565	break;
3566	if (c1 < sizeof (str))
3567	str[c1++] = c;
3568	else
3569	/* This is in any case an invalid class name. */
3570	str[0] = '\0';
3571	}
3572	str[c1] = '\0';
3573
3574	if (c == '.' && *p == ']' && str[0] != '\0')
3575	{
3576	/* If we have no collation data we use the default
3577	collation in which each character is the name
3578	for its own class which contains only the one
3579	character. It also means that ASCII is the
3580	character set and therefore we cannot have character
3581	with more than one byte in the multibyte
3582	representation. */
3583	# ifdef _LIBC
3584	if (nrules == 0)
3585	# endif
3586	{
3587	if (c1 != 1)
3588	FREE_STACK_RETURN (REG_ECOLLATE);
3589
3590	/* Throw away the ] at the end of the equivalence
3591	class. */
3592	PATFETCH (c);
3593
3594	/* Set the bit for the character. */
3595	SET_LIST_BIT (str[0]);
3596	range_start = ((const unsigned char *) str)[0];
3597	}
3598	# ifdef _LIBC
3599	else
3600	{
3601	/* Try to match the byte sequence in `str' against
3602	those known to the collate implementation.
3603	First find out whether the bytes in `str' are
3604	actually from exactly one character. */
3605	int32_t table_size;
3606	const int32_t *symb_table;
3607	const unsigned char *extra;
3608	int32_t idx;
3609	int32_t elem;
3610	int32_t second;
3611	int32_t hash;
3612
3613	table_size =
3614	_NL_CURRENT_WORD (LC_COLLATE,
3615	_NL_COLLATE_SYMB_HASH_SIZEMB);
3616	symb_table = (const int32_t *)
3617	_NL_CURRENT (LC_COLLATE,
3618	_NL_COLLATE_SYMB_TABLEMB);
3619	extra = (const unsigned char *)
3620	_NL_CURRENT (LC_COLLATE,
3621	_NL_COLLATE_SYMB_EXTRAMB);
3622
3623	/* Locate the character in the hashing table. */
3624	hash = elem_hash (str, c1);
3625
3626	idx = 0;
3627	elem = hash % table_size;
3628	second = hash % (table_size - 2);
3629	while (symb_table[2 * elem] != 0)
3630	{
3631	/* First compare the hashing value. */
3632	if (symb_table[2 * elem] == hash
3633	&& c1 == extra[symb_table[2 * elem + 1]]
3634	&& memcmp (str,
3635	&extra[symb_table[2 * elem + 1]
3636	+ 1],
3637	c1) == 0)
3638	{
3639	/* Yep, this is the entry. */
3640	idx = symb_table[2 * elem + 1];
3641	idx += 1 + extra[idx];
3642	break;
3643	}
3644
3645	/* Next entry. */
3646	elem += second;
3647	}
3648
3649	if (symb_table[2 * elem] == 0)
3650	/* This is no valid character. */
3651	FREE_STACK_RETURN (REG_ECOLLATE);
3652
3653	/* Throw away the ] at the end of the equivalence
3654	class. */
3655	PATFETCH (c);
3656
3657	/* Now add the multibyte character(s) we found
3658	to the accept list.
3659
3660	XXX Note that this is not entirely correct.
3661	we would have to match multibyte sequences
3662	but this is not possible with the current
3663	implementation. Also, we have to match
3664	collating symbols, which expand to more than
3665	one file, as a whole and not allow the
3666	individual bytes. */
3667	c1 = extra[idx++];
3668	if (c1 == 1)
3669	range_start = extra[idx];
3670	while (c1-- > 0)
3671	{
3672	SET_LIST_BIT (extra[idx]);
3673	++idx;
3674	}
3675	}
3676	# endif
3677	had_char_class = false;
3678	}
3679	else
3680	{
3681	c1++;
3682	while (c1--)
3683	PATUNFETCH;
3684	SET_LIST_BIT ('[');
3685	SET_LIST_BIT ('.');
3686	range_start = '.';
3687	had_char_class = false;
3688	}
3689	}
3690	else
3691	{
3692	had_char_class = false;
3693	SET_LIST_BIT (c);
3694	range_start = c;
3695	}
3696	}
3697
3698	/* Discard any (non)matching list bytes that are all 0 at the
3699	end of the map. Decrease the map-length byte too. */
3700	while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3701	b[-1]--;
3702	b += b[-1];
3703	#endif /* WCHAR */
3704	}
3705	break;
3706
3707
3708	case '(':
3709	if (syntax & RE_NO_BK_PARENS)
3710	goto handle_open;
3711	else
3712	goto normal_char;
3713
3714
3715	case ')':
3716	if (syntax & RE_NO_BK_PARENS)
3717	goto handle_close;
3718	else
3719	goto normal_char;
3720
3721
3722	case '\n':
3723	if (syntax & RE_NEWLINE_ALT)
3724	goto handle_alt;
3725	else
3726	goto normal_char;
3727
3728
3729	case '\|':
3730	if (syntax & RE_NO_BK_VBAR)
3731	goto handle_alt;
3732	else
3733	goto normal_char;
3734
3735
3736	case '{':
3737	if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3738	goto handle_interval;
3739	else
3740	goto normal_char;
3741
3742
3743	case '\\':
3744	if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3745
3746	/* Do not translate the character after the \, so that we can
3747	distinguish, e.g., \B from \b, even if we normally would
3748	translate, e.g., B to b. */
3749	PATFETCH_RAW (c);
3750
3751	switch (c)
3752	{
3753	case '(':
3754	if (syntax & RE_NO_BK_PARENS)
3755	goto normal_backslash;
3756
3757	handle_open:
3758	bufp->re_nsub++;
3759	regnum++;
3760
3761	if (COMPILE_STACK_FULL)
3762	{
3763	RETALLOC (compile_stack.stack, compile_stack.size << 1,
3764	compile_stack_elt_t);
3765	if (compile_stack.stack == NULL) return REG_ESPACE;
3766
3767	compile_stack.size <<= 1;
3768	}
3769
3770	/* These are the values to restore when we hit end of this
3771	group. They are all relative offsets, so that if the
3772	whole pattern moves because of realloc, they will still
3773	be valid. */
3774	COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3775	COMPILE_STACK_TOP.fixup_alt_jump
3776	= fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3777	COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3778	COMPILE_STACK_TOP.regnum = regnum;
3779
3780	/* We will eventually replace the 0 with the number of
3781	groups inner to this one. But do not push a
3782	start_memory for groups beyond the last one we can
3783	represent in the compiled pattern. */
3784	if (regnum <= MAX_REGNUM)
3785	{
3786	COMPILE_STACK_TOP.inner_group_offset = b
3787	- COMPILED_BUFFER_VAR + 2;
3788	BUF_PUSH_3 (start_memory, regnum, 0);
3789	}
3790
3791	compile_stack.avail++;
3792
3793	fixup_alt_jump = 0;
3794	laststart = 0;
3795	begalt = b;
3796	/* If we've reached MAX_REGNUM groups, then this open
3797	won't actually generate any code, so we'll have to
3798	clear pending_exact explicitly. */
3799	pending_exact = 0;
3800	break;
3801
3802
3803	case ')':
3804	if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3805
3806	if (COMPILE_STACK_EMPTY)
3807	{
3808	if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3809	goto normal_backslash;
3810	else
3811	FREE_STACK_RETURN (REG_ERPAREN);
3812	}
3813
3814	handle_close:
3815	if (fixup_alt_jump)
3816	{ /* Push a dummy failure point at the end of the
3817	alternative for a possible future
3818	`pop_failure_jump' to pop. See comments at
3819	`push_dummy_failure' in `re_match_2'. */
3820	BUF_PUSH (push_dummy_failure);
3821
3822	/* We allocated space for this jump when we assigned
3823	to `fixup_alt_jump', in the `handle_alt' case below. */
3824	STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3825	}
3826
3827	/* See similar code for backslashed left paren above. */
3828	if (COMPILE_STACK_EMPTY)
3829	{
3830	if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3831	goto normal_char;
3832	else
3833	FREE_STACK_RETURN (REG_ERPAREN);
3834	}
3835
3836	/* Since we just checked for an empty stack above, this
3837	``can't happen''. */
3838	assert (compile_stack.avail != 0);
3839	{
3840	/* We don't just want to restore into `regnum', because
3841	later groups should continue to be numbered higher,
3842	as in `(ab)c(de)' -- the second group is #2. */
3843	regnum_t this_group_regnum;
3844
3845	compile_stack.avail--;
3846	begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3847	fixup_alt_jump
3848	= COMPILE_STACK_TOP.fixup_alt_jump
3849	? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3850	: 0;
3851	laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3852	this_group_regnum = COMPILE_STACK_TOP.regnum;
3853	/* If we've reached MAX_REGNUM groups, then this open
3854	won't actually generate any code, so we'll have to
3855	clear pending_exact explicitly. */
3856	pending_exact = 0;
3857
3858	/* We're at the end of the group, so now we know how many
3859	groups were inside this one. */
3860	if (this_group_regnum <= MAX_REGNUM)
3861	{
3862	UCHAR_T *inner_group_loc
3863	= COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3864
3865	*inner_group_loc = regnum - this_group_regnum;
3866	BUF_PUSH_3 (stop_memory, this_group_regnum,
3867	regnum - this_group_regnum);
3868	}
3869	}
3870	break;
3871
3872
3873	case '\|': /* `\\|'. */
3874	if (syntax & RE_LIMITED_OPS \|\| syntax & RE_NO_BK_VBAR)
3875	goto normal_backslash;
3876	handle_alt:
3877	if (syntax & RE_LIMITED_OPS)
3878	goto normal_char;
3879
3880	/* Insert before the previous alternative a jump which
3881	jumps to this alternative if the former fails. */
3882	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3883	INSERT_JUMP (on_failure_jump, begalt,
3884	b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3885	pending_exact = 0;
3886	b += 1 + OFFSET_ADDRESS_SIZE;
3887
3888	/* The alternative before this one has a jump after it
3889	which gets executed if it gets matched. Adjust that
3890	jump so it will jump to this alternative's analogous
3891	jump (put in below, which in turn will jump to the next
3892	(if any) alternative's such jump, etc.). The last such
3893	jump jumps to the correct final destination. A picture:
3894	_____ _____
3895	\| \| \| \|
3896	\| v \| v
3897	a \| b \| c
3898
3899	If we are at `b', then fixup_alt_jump right now points to a
3900	three-byte space after `a'. We'll put in the jump, set
3901	fixup_alt_jump to right after `b', and leave behind three
3902	bytes which we'll fill in when we get to after `c'. */
3903
3904	if (fixup_alt_jump)
3905	STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3906
3907	/* Mark and leave space for a jump after this alternative,
3908	to be filled in later either by next alternative or
3909	when know we're at the end of a series of alternatives. */
3910	fixup_alt_jump = b;
3911	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3912	b += 1 + OFFSET_ADDRESS_SIZE;
3913
3914	laststart = 0;
3915	begalt = b;
3916	break;
3917
3918
3919	case '{':
3920	/* If \{ is a literal. */
3921	if (!(syntax & RE_INTERVALS)
3922	/* If we're at `\{' and it's not the open-interval
3923	operator. */
3924	\|\| (syntax & RE_NO_BK_BRACES))
3925	goto normal_backslash;
3926
3927	handle_interval:
3928	{
3929	/* If got here, then the syntax allows intervals. */
3930
3931	/* At least (most) this many matches must be made. */
3932	int lower_bound = -1, upper_bound = -1;
3933
3934	/* Place in the uncompiled pattern (i.e., just after
3935	the '{') to go back to if the interval is invalid. */
3936	const CHAR_T *beg_interval = p;
3937
3938	if (p == pend)
3939	goto invalid_interval;
3940
3941	GET_UNSIGNED_NUMBER (lower_bound);
3942
3943	if (c == ',')
3944	{
3945	GET_UNSIGNED_NUMBER (upper_bound);
3946	if (upper_bound < 0)
3947	upper_bound = RE_DUP_MAX;
3948	}
3949	else
3950	/* Interval such as `{1}' => match exactly once. */
3951	upper_bound = lower_bound;
3952
3953	if (! (0 <= lower_bound && lower_bound <= upper_bound))
3954	goto invalid_interval;
3955
3956	if (!(syntax & RE_NO_BK_BRACES))
3957	{
3958	if (c != '\\' \|\| p == pend)
3959	goto invalid_interval;
3960	PATFETCH (c);
3961	}
3962
3963	if (c != '}')
3964	goto invalid_interval;
3965
3966	/* If it's invalid to have no preceding re. */
3967	if (!laststart)
3968	{
3969	if (syntax & RE_CONTEXT_INVALID_OPS
3970	&& !(syntax & RE_INVALID_INTERVAL_ORD))
3971	FREE_STACK_RETURN (REG_BADRPT);
3972	else if (syntax & RE_CONTEXT_INDEP_OPS)
3973	laststart = b;
3974	else
3975	goto unfetch_interval;
3976	}
3977
3978	/* We just parsed a valid interval. */
3979
3980	if (RE_DUP_MAX < upper_bound)
3981	FREE_STACK_RETURN (REG_BADBR);
3982
3983	/* If the upper bound is zero, don't want to succeed at
3984	all; jump from `laststart' to `b + 3', which will be
3985	the end of the buffer after we insert the jump. */
3986	/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3987	instead of 'b + 3'. */
3988	if (upper_bound == 0)
3989	{
3990	GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3991	INSERT_JUMP (jump, laststart, b + 1
3992	+ OFFSET_ADDRESS_SIZE);
3993	b += 1 + OFFSET_ADDRESS_SIZE;
3994	}
3995
3996	/* Otherwise, we have a nontrivial interval. When
3997	we're all done, the pattern will look like:
3998	set_number_at <jump count> <upper bound>
3999	set_number_at <succeed_n count> <lower bound>
4000	succeed_n <after jump addr> <succeed_n count>
4001	<body of loop>
4002	jump_n <succeed_n addr> <jump count>
4003	(The upper bound and `jump_n' are omitted if
4004	`upper_bound' is 1, though.) */
4005	else
4006	{ /* If the upper bound is > 1, we need to insert
4007	more at the end of the loop. */
4008	unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
4009	(upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
4010
4011	GET_BUFFER_SPACE (nbytes);
4012
4013	/* Initialize lower bound of the `succeed_n', even
4014	though it will be set during matching by its
4015	attendant `set_number_at' (inserted next),
4016	because `re_compile_fastmap' needs to know.
4017	Jump to the `jump_n' we might insert below. */
4018	INSERT_JUMP2 (succeed_n, laststart,
4019	b + 1 + 2 * OFFSET_ADDRESS_SIZE
4020	+ (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
4021	, lower_bound);
4022	b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4023
4024	/* Code to initialize the lower bound. Insert
4025	before the `succeed_n'. The `5' is the last two
4026	bytes of this `set_number_at', plus 3 bytes of
4027	the following `succeed_n'. */
4028	/* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
4029	is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
4030	of the following `succeed_n'. */
4031	PREFIX(insert_op2) (set_number_at, laststart, 1
4032	+ 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
4033	b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4034
4035	if (upper_bound > 1)
4036	{ /* More than one repetition is allowed, so
4037	append a backward jump to the `succeed_n'
4038	that starts this interval.
4039
4040	When we've reached this during matching,
4041	we'll have matched the interval once, so
4042	jump back only `upper_bound - 1' times. */
4043	STORE_JUMP2 (jump_n, b, laststart
4044	+ 2 * OFFSET_ADDRESS_SIZE + 1,
4045	upper_bound - 1);
4046	b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4047
4048	/* The location we want to set is the second
4049	parameter of the `jump_n'; that is `b-2' as
4050	an absolute address. `laststart' will be
4051	the `set_number_at' we're about to insert;
4052	`laststart+3' the number to set, the source
4053	for the relative address. But we are
4054	inserting into the middle of the pattern --
4055	so everything is getting moved up by 5.
4056	Conclusion: (b - 2) - (laststart + 3) + 5,
4057	i.e., b - laststart.
4058
4059	We insert this at the beginning of the loop
4060	so that if we fail during matching, we'll
4061	reinitialize the bounds. */
4062	PREFIX(insert_op2) (set_number_at, laststart,
4063	b - laststart,
4064	upper_bound - 1, b);
4065	b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4066	}
4067	}
4068	pending_exact = 0;
4069	break;
4070
4071	invalid_interval:
4072	if (!(syntax & RE_INVALID_INTERVAL_ORD))
4073	FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
4074	unfetch_interval:
4075	/* Match the characters as literals. */
4076	p = beg_interval;
4077	c = '{';
4078	if (syntax & RE_NO_BK_BRACES)
4079	goto normal_char;
4080	else
4081	goto normal_backslash;
4082	}
4083
4084	#ifdef emacs
4085	/* There is no way to specify the before_dot and after_dot
4086	operators. rms says this is ok. --karl */
4087	case '=':
4088	BUF_PUSH (at_dot);
4089	break;
4090
4091	case 's':
4092	laststart = b;
4093	PATFETCH (c);
4094	BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4095	break;
4096
4097	case 'S':
4098	laststart = b;
4099	PATFETCH (c);
4100	BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4101	break;
4102	#endif /* emacs */
4103
4104
4105	case 'w':
4106	if (syntax & RE_NO_GNU_OPS)
4107	goto normal_char;
4108	laststart = b;
4109	BUF_PUSH (wordchar);
4110	break;
4111
4112
4113	case 'W':
4114	if (syntax & RE_NO_GNU_OPS)
4115	goto normal_char;
4116	laststart = b;
4117	BUF_PUSH (notwordchar);
4118	break;
4119
4120
4121	case '<':
4122	if (syntax & RE_NO_GNU_OPS)
4123	goto normal_char;
4124	BUF_PUSH (wordbeg);
4125	break;
4126
4127	case '>':
4128	if (syntax & RE_NO_GNU_OPS)
4129	goto normal_char;
4130	BUF_PUSH (wordend);
4131	break;
4132
4133	case 'b':
4134	if (syntax & RE_NO_GNU_OPS)
4135	goto normal_char;
4136	BUF_PUSH (wordbound);
4137	break;
4138
4139	case 'B':
4140	if (syntax & RE_NO_GNU_OPS)
4141	goto normal_char;
4142	BUF_PUSH (notwordbound);
4143	break;
4144
4145	case '`':
4146	if (syntax & RE_NO_GNU_OPS)
4147	goto normal_char;
4148	BUF_PUSH (begbuf);
4149	break;
4150
4151	case '\'':
4152	if (syntax & RE_NO_GNU_OPS)
4153	goto normal_char;
4154	BUF_PUSH (endbuf);
4155	break;
4156
4157	case '1': case '2': case '3': case '4': case '5':
4158	case '6': case '7': case '8': case '9':
4159	if (syntax & RE_NO_BK_REFS)
4160	goto normal_char;
4161
4162	c1 = c - '0';
4163
4164	if (c1 > regnum)
4165	FREE_STACK_RETURN (REG_ESUBREG);
4166
4167	/* Can't back reference to a subexpression if inside of it. */
4168	if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4169	goto normal_char;
4170
4171	laststart = b;
4172	BUF_PUSH_2 (duplicate, c1);
4173	break;
4174
4175
4176	case '+':
4177	case '?':
4178	if (syntax & RE_BK_PLUS_QM)
4179	goto handle_plus;
4180	else
4181	goto normal_backslash;
4182
4183	default:
4184	normal_backslash:
4185	/* You might think it would be useful for \ to mean
4186	not to translate; but if we don't translate it
4187	it will never match anything. */
4188	c = TRANSLATE (c);
4189	goto normal_char;
4190	}
4191	break;
4192
4193
4194	default:
4195	/* Expects the character in `c'. */
4196	normal_char:
4197	/* If no exactn currently being built. */
4198	if (!pending_exact
4199	#ifdef WCHAR
4200	/* If last exactn handle binary(or character) and
4201	new exactn handle character(or binary). */
4202	\|\| is_exactn_bin != is_binary[p - 1 - pattern]
4203	#endif /* WCHAR */
4204
4205	/* If last exactn not at current position. */
4206	\|\| pending_exact + *pending_exact + 1 != b
4207
4208	/* We have only one byte following the exactn for the count. */
4209	\|\| *pending_exact == (1 << BYTEWIDTH) - 1
4210
4211	/* If followed by a repetition operator. */
4212	\|\| p == '' \|\| *p == '^'
4213	\|\| ((syntax & RE_BK_PLUS_QM)
4214	? *p == '\\' && (p[1] == '+' \|\| p[1] == '?')
4215	: (p == '+' \|\| p == '?'))
4216	\|\| ((syntax & RE_INTERVALS)
4217	&& ((syntax & RE_NO_BK_BRACES)
4218	? *p == '{'
4219	: (p[0] == '\\' && p[1] == '{'))))
4220	{
4221	/* Start building a new exactn. */
4222
4223	laststart = b;
4224
4225	#ifdef WCHAR
4226	/* Is this exactn binary data or character? */
4227	is_exactn_bin = is_binary[p - 1 - pattern];
4228	if (is_exactn_bin)
4229	BUF_PUSH_2 (exactn_bin, 0);
4230	else
4231	BUF_PUSH_2 (exactn, 0);
4232	#else
4233	BUF_PUSH_2 (exactn, 0);
4234	#endif /* WCHAR */
4235	pending_exact = b - 1;
4236	}
4237
4238	BUF_PUSH (c);
4239	(*pending_exact)++;
4240	break;
4241	} /* switch (c) */
4242	} /* while p != pend */
4243
4244
4245	/* Through the pattern now. */
4246
4247	if (fixup_alt_jump)
4248	STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4249
4250	if (!COMPILE_STACK_EMPTY)
4251	FREE_STACK_RETURN (REG_EPAREN);
4252
4253	/* If we don't want backtracking, force success
4254	the first time we reach the end of the compiled pattern. */
4255	if (syntax & RE_NO_POSIX_BACKTRACKING)
4256	BUF_PUSH (succeed);
4257
4258	#ifdef WCHAR
4259	free (pattern);
4260	free (mbs_offset);
4261	free (is_binary);
4262	#endif
4263	free (compile_stack.stack);
4264
4265	/* We have succeeded; set the length of the buffer. */
4266	#ifdef WCHAR
4267	bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4268	#else
4269	bufp->used = b - bufp->buffer;
4270	#endif
4271
4272	#ifdef DEBUG
4273	if (debug)
4274	{
4275	DEBUG_PRINT1 ("\nCompiled pattern: \n");
4276	PREFIX(print_compiled_pattern) (bufp);
4277	}
4278	#endif /* DEBUG */
4279
4280	#ifndef MATCH_MAY_ALLOCATE
4281	/* Initialize the failure stack to the largest possible stack. This
4282	isn't necessary unless we're trying to avoid calling alloca in
4283	the search and match routines. */
4284	{
4285	int num_regs = bufp->re_nsub + 1;
4286
4287	/* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4288	is strictly greater than re_max_failures, the largest possible stack
4289	is 2 * re_max_failures failure points. */
4290	if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4291	{
4292	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4293
4294	# ifdef emacs
4295	if (! fail_stack.stack)
4296	fail_stack.stack
4297	= (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4298	* sizeof (PREFIX(fail_stack_elt_t)));
4299	else
4300	fail_stack.stack
4301	= (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4302	(fail_stack.size
4303	* sizeof (PREFIX(fail_stack_elt_t))));
4304	# else /* not emacs */
4305	if (! fail_stack.stack)
4306	fail_stack.stack
4307	= (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4308	* sizeof (PREFIX(fail_stack_elt_t)));
4309	else
4310	fail_stack.stack
4311	= (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4312	(fail_stack.size
4313	* sizeof (PREFIX(fail_stack_elt_t))));
4314	# endif /* not emacs */
4315	}
4316
4317	PREFIX(regex_grow_registers) (num_regs);
4318	}
4319	#endif /* not MATCH_MAY_ALLOCATE */
4320
4321	return REG_NOERROR;
4322	} /* regex_compile */
4323
4324	/* Subroutines for `regex_compile'. */
4325
4326	/* Store OP at LOC followed by two-byte integer parameter ARG. */
4327	/* ifdef WCHAR, integer parameter is 1 wchar_t. */
4328
4329	static void
4330	PREFIX(store_op1) (op, loc, arg)
4331	re_opcode_t op;
4332	UCHAR_T *loc;
4333	int arg;
4334	{
4335	*loc = (UCHAR_T) op;
4336	STORE_NUMBER (loc + 1, arg);
4337	}
4338
4339
4340	/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
4341	/* ifdef WCHAR, integer parameter is 1 wchar_t. */
4342
4343	static void
4344	PREFIX(store_op2) (op, loc, arg1, arg2)
4345	re_opcode_t op;
4346	UCHAR_T *loc;
4347	int arg1, arg2;
4348	{
4349	*loc = (UCHAR_T) op;
4350	STORE_NUMBER (loc + 1, arg1);
4351	STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4352	}
4353
4354
4355	/* Copy the bytes from LOC to END to open up three bytes of space at LOC
4356	for OP followed by two-byte integer parameter ARG. */
4357	/* ifdef WCHAR, integer parameter is 1 wchar_t. */
4358
4359	static void
4360	PREFIX(insert_op1) (op, loc, arg, end)
4361	re_opcode_t op;
4362	UCHAR_T *loc;
4363	int arg;
4364	UCHAR_T *end;
4365	{
4366	register UCHAR_T *pfrom = end;
4367	register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4368
4369	while (pfrom != loc)
4370	--pto = --pfrom;
4371
4372	PREFIX(store_op1) (op, loc, arg);
4373	}
4374
4375
4376	/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
4377	/* ifdef WCHAR, integer parameter is 1 wchar_t. */
4378
4379	static void
4380	PREFIX(insert_op2) (op, loc, arg1, arg2, end)
4381	re_opcode_t op;
4382	UCHAR_T *loc;
4383	int arg1, arg2;
4384	UCHAR_T *end;
4385	{
4386	register UCHAR_T *pfrom = end;
4387	register UCHAR_T pto = end + 1 + 2 OFFSET_ADDRESS_SIZE;
4388
4389	while (pfrom != loc)
4390	--pto = --pfrom;
4391
4392	PREFIX(store_op2) (op, loc, arg1, arg2);
4393	}
4394
4395
4396	/* P points to just after a ^ in PATTERN. Return true if that ^ comes
4397	after an alternative or a begin-subexpression. We assume there is at
4398	least one character before the ^. */
4399
4400	static boolean
4401	PREFIX(at_begline_loc_p) (pattern, p, syntax)
4402	const CHAR_T pattern, p;
4403	reg_syntax_t syntax;
4404	{
4405	const CHAR_T *prev = p - 2;
4406	boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4407
4408	return
4409	/* After a subexpression? */
4410	(*prev == '(' && (syntax & RE_NO_BK_PARENS \|\| prev_prev_backslash))
4411	/* After an alternative? */
4412	\|\| (*prev == '\|' && (syntax & RE_NO_BK_VBAR \|\| prev_prev_backslash));
4413	}
4414
4415
4416	/* The dual of at_begline_loc_p. This one is for $. We assume there is
4417	at least one character after the $, i.e., `P < PEND'. */
4418
4419	static boolean
4420	PREFIX(at_endline_loc_p) (p, pend, syntax)
4421	const CHAR_T p, pend;
4422	reg_syntax_t syntax;
4423	{
4424	const CHAR_T *next = p;
4425	boolean next_backslash = *next == '\\';
4426	const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4427
4428	return
4429	/* Before a subexpression? */
4430	(syntax & RE_NO_BK_PARENS ? *next == ')'
4431	: next_backslash && next_next && *next_next == ')')
4432	/* Before an alternative? */
4433	\|\| (syntax & RE_NO_BK_VBAR ? *next == '\|'
4434	: next_backslash && next_next && *next_next == '\|');
4435	}
4436
4437	#else /* not INSIDE_RECURSION */
4438
4439	/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4440	false if it's not. */
4441
4442	static boolean
4443	group_in_compile_stack (compile_stack, regnum)
4444	compile_stack_type compile_stack;
4445	regnum_t regnum;
4446	{
4447	int this_element;
4448
4449	for (this_element = compile_stack.avail - 1;
4450	this_element >= 0;
4451	this_element--)
4452	if (compile_stack.stack[this_element].regnum == regnum)
4453	return true;
4454
4455	return false;
4456	}
4457	#endif /* not INSIDE_RECURSION */
4458
4459	#ifdef INSIDE_RECURSION
4460
4461	#ifdef WCHAR
4462	/* This insert space, which size is "num", into the pattern at "loc".
4463	"end" must point the end of the allocated buffer. */
4464	static void
4465	insert_space (num, loc, end)
4466	int num;
4467	CHAR_T *loc;
4468	CHAR_T *end;
4469	{
4470	register CHAR_T *pto = end;
4471	register CHAR_T *pfrom = end - num;
4472
4473	while (pfrom >= loc)
4474	pto-- = pfrom--;
4475	}
4476	#endif /* WCHAR */
4477
4478	#ifdef WCHAR
4479	static reg_errcode_t
4480	wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
4481	char_set)
4482	CHAR_T range_start_char;
4483	const CHAR_T *p_ptr, pend;
4484	CHAR_T char_set, b;
4485	RE_TRANSLATE_TYPE translate;
4486	reg_syntax_t syntax;
4487	{
4488	const CHAR_T p = p_ptr;
4489	CHAR_T range_start, range_end;
4490	reg_errcode_t ret;
4491	# ifdef _LIBC
4492	uint32_t nrules;
4493	uint32_t start_val, end_val;
4494	# endif
4495	if (p == pend)
4496	return REG_ERANGE;
4497
4498	# ifdef _LIBC
4499	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4500	if (nrules != 0)
4501	{
4502	const char collseq = (const char ) _NL_CURRENT(LC_COLLATE,
4503	_NL_COLLATE_COLLSEQWC);
4504	const unsigned char extra = (const unsigned char )
4505	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4506
4507	if (range_start_char < -1)
4508	{
4509	/* range_start is a collating symbol. */
4510	int32_t *wextra;
4511	/* Retreive the index and get collation sequence value. */
4512	wextra = (int32_t*)(extra + char_set[-range_start_char]);
4513	start_val = wextra[1 + *wextra];
4514	}
4515	else
4516	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4517
4518	end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4519
4520	/* Report an error if the range is empty and the syntax prohibits
4521	this. */
4522	ret = ((syntax & RE_NO_EMPTY_RANGES)
4523	&& (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4524
4525	/* Insert space to the end of the char_ranges. */
4526	insert_space(2, b - char_set[5] - 2, b - 1);
4527	*(b - char_set[5] - 2) = (wchar_t)start_val;
4528	*(b - char_set[5] - 1) = (wchar_t)end_val;
4529	char_set[4]++; /* ranges_index */
4530	}
4531	else
4532	# endif
4533	{
4534	range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4535	range_start_char;
4536	range_end = TRANSLATE (p[0]);
4537	/* Report an error if the range is empty and the syntax prohibits
4538	this. */
4539	ret = ((syntax & RE_NO_EMPTY_RANGES)
4540	&& (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4541
4542	/* Insert space to the end of the char_ranges. */
4543	insert_space(2, b - char_set[5] - 2, b - 1);
4544	*(b - char_set[5] - 2) = range_start;
4545	*(b - char_set[5] - 1) = range_end;
4546	char_set[4]++; /* ranges_index */
4547	}
4548	/* Have to increment the pointer into the pattern string, so the
4549	caller isn't still at the ending character. */
4550	(*p_ptr)++;
4551
4552	return ret;
4553	}
4554	#else /* BYTE */
4555	/* Read the ending character of a range (in a bracket expression) from the
4556	uncompiled pattern *P_PTR (which ends at PEND). We assume the
4557	starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4558	Then we set the translation of all bits between the starting and
4559	ending characters (inclusive) in the compiled pattern B.
4560
4561	Return an error code.
4562
4563	We use these short variable names so we can use the same macros as
4564	`regex_compile' itself. */
4565
4566	static reg_errcode_t
4567	byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
4568	unsigned int range_start_char;
4569	const char *p_ptr, pend;
4570	RE_TRANSLATE_TYPE translate;
4571	reg_syntax_t syntax;
4572	unsigned char *b;
4573	{
4574	unsigned this_char;
4575	const char p = p_ptr;
4576	reg_errcode_t ret;
4577	# if _LIBC
4578	const unsigned char *collseq;
4579	unsigned int start_colseq;
4580	unsigned int end_colseq;
4581	# else
4582	unsigned end_char;
4583	# endif
4584
4585	if (p == pend)
4586	return REG_ERANGE;
4587
4588	/* Have to increment the pointer into the pattern string, so the
4589	caller isn't still at the ending character. */
4590	(*p_ptr)++;
4591
4592	/* Report an error if the range is empty and the syntax prohibits this. */
4593	ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4594
4595	# if _LIBC
4596	collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4597	_NL_COLLATE_COLLSEQMB);
4598
4599	start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4600	end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4601	for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4602	{
4603	unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4604
4605	if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4606	{
4607	SET_LIST_BIT (TRANSLATE (this_char));
4608	ret = REG_NOERROR;
4609	}
4610	}
4611	# else
4612	/* Here we see why `this_char' has to be larger than an `unsigned
4613	char' -- we would otherwise go into an infinite loop, since all
4614	characters <= 0xff. */
4615	range_start_char = TRANSLATE (range_start_char);
4616	/* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4617	and some compilers cast it to int implicitly, so following for_loop
4618	may fall to (almost) infinite loop.
4619	e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4620	To avoid this, we cast p[0] to unsigned int and truncate it. */
4621	end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4622
4623	for (this_char = range_start_char; this_char <= end_char; ++this_char)
4624	{
4625	SET_LIST_BIT (TRANSLATE (this_char));
4626	ret = REG_NOERROR;
4627	}
4628	# endif
4629
4630	return ret;
4631	}
4632	#endif /* WCHAR */
4633
4634
4635	/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4636	BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
4637	characters can start a string that matches the pattern. This fastmap
4638	is used by re_search to skip quickly over impossible starting points.
4639
4640	The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4641	area as BUFP->fastmap.
4642
4643	We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4644	the pattern buffer.
4645
4646	Returns 0 if we succeed, -2 if an internal error. */
4647
4648	#ifdef WCHAR
4649	/* local function for re_compile_fastmap.
4650	truncate wchar_t character to char. */
4651	static unsigned char truncate_wchar (CHAR_T c);
4652
4653	static unsigned char
4654	truncate_wchar (c)
4655	CHAR_T c;
4656	{
4657	unsigned char buf[MB_CUR_MAX];
4658	mbstate_t state;
4659	int retval;
4660	memset (&state, '\0', sizeof (state));
4661	# ifdef _LIBC
4662	retval = __wcrtomb (buf, c, &state);
4663	# else
4664	retval = wcrtomb (buf, c, &state);
4665	# endif
4666	return retval > 0 ? buf[0] : (unsigned char) c;
4667	}
4668	#endif /* WCHAR */
4669
4670	static int
4671	PREFIX(re_compile_fastmap) (bufp)
4672	struct re_pattern_buffer *bufp;
4673	{
4674	int j, k;
4675	#ifdef MATCH_MAY_ALLOCATE
4676	PREFIX(fail_stack_type) fail_stack;
4677	#endif
4678	#ifndef REGEX_MALLOC
4679	char *destination;
4680	#endif
4681
4682	register char *fastmap = bufp->fastmap;
4683
4684	#ifdef WCHAR
4685	/* We need to cast pattern to (wchar_t*), because we casted this compiled
4686	pattern to (char) in regex_compile. /
4687	UCHAR_T pattern = (UCHAR_T)bufp->buffer;
4688	register UCHAR_T pend = (UCHAR_T) (bufp->buffer + bufp->used);
4689	#else /* BYTE */
4690	UCHAR_T *pattern = bufp->buffer;
4691	register UCHAR_T *pend = pattern + bufp->used;
4692	#endif /* WCHAR */
4693	UCHAR_T *p = pattern;
4694
4695	#ifdef REL_ALLOC
4696	/* This holds the pointer to the failure stack, when
4697	it is allocated relocatably. */
4698	fail_stack_elt_t *failure_stack_ptr;
4699	#endif
4700
4701	/* Assume that each path through the pattern can be null until
4702	proven otherwise. We set this false at the bottom of switch
4703	statement, to which we get only if a particular path doesn't
4704	match the empty string. */
4705	boolean path_can_be_null = true;
4706
4707	/* We aren't doing a `succeed_n' to begin with. */
4708	boolean succeed_n_p = false;
4709
4710	assert (fastmap != NULL && p != NULL);
4711
4712	INIT_FAIL_STACK ();
4713	bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
4714	bufp->fastmap_accurate = 1; /* It will be when we're done. */
4715	bufp->can_be_null = 0;
4716
4717	while (1)
4718	{
4719	if (p == pend \|\| *p == succeed)
4720	{
4721	/* We have reached the (effective) end of pattern. */
4722	if (!FAIL_STACK_EMPTY ())
4723	{
4724	bufp->can_be_null \|= path_can_be_null;
4725
4726	/* Reset for next path. */
4727	path_can_be_null = true;
4728
4729	p = fail_stack.stack[--fail_stack.avail].pointer;
4730
4731	continue;
4732	}
4733	else
4734	break;
4735	}
4736
4737	/* We should never be about to go beyond the end of the pattern. */
4738	assert (p < pend);
4739
4740	switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4741	{
4742
4743	/* I guess the idea here is to simply not bother with a fastmap
4744	if a backreference is used, since it's too hard to figure out
4745	the fastmap for the corresponding group. Setting
4746	`can_be_null' stops `re_search_2' from using the fastmap, so
4747	that is all we do. */
4748	case duplicate:
4749	bufp->can_be_null = 1;
4750	goto done;
4751
4752
4753	/* Following are the cases which match a character. These end
4754	with `break'. */
4755
4756	#ifdef WCHAR
4757	case exactn:
4758	fastmap[truncate_wchar(p[1])] = 1;
4759	break;
4760	#else /* BYTE */
4761	case exactn:
4762	fastmap[p[1]] = 1;
4763	break;
4764	#endif /* WCHAR */
4765	#ifdef MBS_SUPPORT
4766	case exactn_bin:
4767	fastmap[p[1]] = 1;
4768	break;
4769	#endif
4770
4771	#ifdef WCHAR
4772	/* It is hard to distinguish fastmap from (multi byte) characters
4773	which depends on current locale. */
4774	case charset:
4775	case charset_not:
4776	case wordchar:
4777	case notwordchar:
4778	bufp->can_be_null = 1;
4779	goto done;
4780	#else /* BYTE */
4781	case charset:
4782	for (j = p++ BYTEWIDTH - 1; j >= 0; j--)
4783	if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4784	fastmap[j] = 1;
4785	break;
4786
4787
4788	case charset_not:
4789	/* Chars beyond end of map must be allowed. */
4790	for (j = p BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4791	fastmap[j] = 1;
4792
4793	for (j = p++ BYTEWIDTH - 1; j >= 0; j--)
4794	if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4795	fastmap[j] = 1;
4796	break;
4797
4798
4799	case wordchar:
4800	for (j = 0; j < (1 << BYTEWIDTH); j++)
4801	if (SYNTAX (j) == Sword)
4802	fastmap[j] = 1;
4803	break;
4804
4805
4806	case notwordchar:
4807	for (j = 0; j < (1 << BYTEWIDTH); j++)
4808	if (SYNTAX (j) != Sword)
4809	fastmap[j] = 1;
4810	break;
4811	#endif /* WCHAR */
4812
4813	case anychar:
4814	{
4815	int fastmap_newline = fastmap['\n'];
4816
4817	/* `.' matches anything ... */
4818	for (j = 0; j < (1 << BYTEWIDTH); j++)
4819	fastmap[j] = 1;
4820
4821	/* ... except perhaps newline. */
4822	if (!(bufp->syntax & RE_DOT_NEWLINE))
4823	fastmap['\n'] = fastmap_newline;
4824
4825	/* Return if we have already set `can_be_null'; if we have,
4826	then the fastmap is irrelevant. Something's wrong here. */
4827	else if (bufp->can_be_null)
4828	goto done;
4829
4830	/* Otherwise, have to check alternative paths. */
4831	break;
4832	}
4833
4834	#ifdef emacs
4835	case syntaxspec:
4836	k = *p++;
4837	for (j = 0; j < (1 << BYTEWIDTH); j++)
4838	if (SYNTAX (j) == (enum syntaxcode) k)
4839	fastmap[j] = 1;
4840	break;
4841
4842
4843	case notsyntaxspec:
4844	k = *p++;
4845	for (j = 0; j < (1 << BYTEWIDTH); j++)
4846	if (SYNTAX (j) != (enum syntaxcode) k)
4847	fastmap[j] = 1;
4848	break;
4849
4850
4851	/* All cases after this match the empty string. These end with
4852	`continue'. */
4853
4854
4855	case before_dot:
4856	case at_dot:
4857	case after_dot:
4858	continue;
4859	#endif /* emacs */
4860
4861
4862	case no_op:
4863	case begline:
4864	case endline:
4865	case begbuf:
4866	case endbuf:
4867	case wordbound:
4868	case notwordbound:
4869	case wordbeg:
4870	case wordend:
4871	case push_dummy_failure:
4872	continue;
4873
4874
4875	case jump_n:
4876	case pop_failure_jump:
4877	case maybe_pop_jump:
4878	case jump:
4879	case jump_past_alt:
4880	case dummy_failure_jump:
4881	EXTRACT_NUMBER_AND_INCR (j, p);
4882	p += j;
4883	if (j > 0)
4884	continue;
4885
4886	/* Jump backward implies we just went through the body of a
4887	loop and matched nothing. Opcode jumped to should be
4888	`on_failure_jump' or `succeed_n'. Just treat it like an
4889	ordinary jump. For a * loop, it has pushed its failure
4890	point already; if so, discard that as redundant. */
4891	if ((re_opcode_t) *p != on_failure_jump
4892	&& (re_opcode_t) *p != succeed_n)
4893	continue;
4894
4895	p++;
4896	EXTRACT_NUMBER_AND_INCR (j, p);
4897	p += j;
4898
4899	/* If what's on the stack is where we are now, pop it. */
4900	if (!FAIL_STACK_EMPTY ()
4901	&& fail_stack.stack[fail_stack.avail - 1].pointer == p)
4902	fail_stack.avail--;
4903
4904	continue;
4905
4906
4907	case on_failure_jump:
4908	case on_failure_keep_string_jump:
4909	handle_on_failure_jump:
4910	EXTRACT_NUMBER_AND_INCR (j, p);
4911
4912	/* For some patterns, e.g., `(a?)?', `p+j' here points to the
4913	end of the pattern. We don't want to push such a point,
4914	since when we restore it above, entering the switch will
4915	increment `p' past the end of the pattern. We don't need
4916	to push such a point since we obviously won't find any more
4917	fastmap entries beyond `pend'. Such a pattern can match
4918	the null string, though. */
4919	if (p + j < pend)
4920	{
4921	if (!PUSH_PATTERN_OP (p + j, fail_stack))
4922	{
4923	RESET_FAIL_STACK ();
4924	return -2;
4925	}
4926	}
4927	else
4928	bufp->can_be_null = 1;
4929
4930	if (succeed_n_p)
4931	{
4932	EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
4933	succeed_n_p = false;
4934	}
4935
4936	continue;
4937
4938
4939	case succeed_n:
4940	/* Get to the number of times to succeed. */
4941	p += OFFSET_ADDRESS_SIZE;
4942
4943	/* Increment p past the n for when k != 0. */
4944	EXTRACT_NUMBER_AND_INCR (k, p);
4945	if (k == 0)
4946	{
4947	p -= 2 * OFFSET_ADDRESS_SIZE;
4948	succeed_n_p = true; /* Spaghetti code alert. */
4949	goto handle_on_failure_jump;
4950	}
4951	continue;
4952
4953
4954	case set_number_at:
4955	p += 2 * OFFSET_ADDRESS_SIZE;
4956	continue;
4957
4958
4959	case start_memory:
4960	case stop_memory:
4961	p += 2;
4962	continue;
4963
4964
4965	default:
4966	abort (); /* We have listed all the cases. */
4967	} /* switch p++ /
4968
4969	/* Getting here means we have found the possible starting
4970	characters for one path of the pattern -- and that the empty
4971	string does not match. We need not follow this path further.
4972	Instead, look at the next alternative (remembered on the
4973	stack), or quit if no more. The test at the top of the loop
4974	does these things. */
4975	path_can_be_null = false;
4976	p = pend;
4977	} /* while p */
4978
4979	/* Set `can_be_null' for the last path (also the first path, if the
4980	pattern is empty). */
4981	bufp->can_be_null \|= path_can_be_null;
4982
4983	done:
4984	RESET_FAIL_STACK ();
4985	return 0;
4986	}
4987
4988	#else /* not INSIDE_RECURSION */
4989
4990	int
4991	re_compile_fastmap (bufp)
4992	struct re_pattern_buffer *bufp;
4993	{
4994	# ifdef MBS_SUPPORT
4995	if (MB_CUR_MAX != 1)
4996	return wcs_re_compile_fastmap(bufp);
4997	else
4998	# endif
4999	return byte_re_compile_fastmap(bufp);
5000	} /* re_compile_fastmap */
5001	#ifdef _LIBC
5002	weak_alias (__re_compile_fastmap, re_compile_fastmap)
5003	#endif
5004
5005
5006
5007	/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
5008	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
5009	this memory for recording register information. STARTS and ENDS
5010	must be allocated using the malloc library routine, and must each
5011	be at least NUM_REGS * sizeof (regoff_t) bytes long.
5012
5013	If NUM_REGS == 0, then subsequent matches should allocate their own
5014	register data.
5015
5016	Unless this function is called, the first search or match using
5017	PATTERN_BUFFER will allocate its own register data, without
5018	freeing the old data. */
5019
5020	void
5021	re_set_registers (bufp, regs, num_regs, starts, ends)
5022	struct re_pattern_buffer *bufp;
5023	struct re_registers *regs;
5024	unsigned num_regs;
5025	regoff_t starts, ends;
5026	{
5027	if (num_regs)
5028	{
5029	bufp->regs_allocated = REGS_REALLOCATE;
5030	regs->num_regs = num_regs;
5031	regs->start = starts;
5032	regs->end = ends;
5033	}
5034	else
5035	{
5036	bufp->regs_allocated = REGS_UNALLOCATED;
5037	regs->num_regs = 0;
5038	regs->start = regs->end = (regoff_t *) 0;
5039	}
5040	}
5041	#ifdef _LIBC
5042	weak_alias (__re_set_registers, re_set_registers)
5043	#endif
5044
5045
5046	/* Searching routines. */
5047
5048	/* Like re_search_2, below, but only one string is specified, and
5049	doesn't let you say where to stop matching. */
5050
5051	int
5052	re_search (bufp, string, size, startpos, range, regs)
5053	struct re_pattern_buffer *bufp;
5054	const char *string;
5055	int size, startpos, range;
5056	struct re_registers *regs;
5057	{
5058	return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
5059	regs, size);
5060	}
5061	#ifdef _LIBC
5062	weak_alias (__re_search, re_search)
5063	#endif
5064
5065
5066	/* Using the compiled pattern in BUFP->buffer, first tries to match the
5067	virtual concatenation of STRING1 and STRING2, starting first at index
5068	STARTPOS, then at STARTPOS + 1, and so on.
5069
5070	STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5071
5072	RANGE is how far to scan while trying to match. RANGE = 0 means try
5073	only at STARTPOS; in general, the last start tried is STARTPOS +
5074	RANGE.
5075
5076	In REGS, return the indices of the virtual concatenation of STRING1
5077	and STRING2 that matched the entire BUFP->buffer and its contained
5078	subexpressions.
5079
5080	Do not consider matching one past the index STOP in the virtual
5081	concatenation of STRING1 and STRING2.
5082
5083	We return either the position in the strings at which the match was
5084	found, -1 if no match, or -2 if error (such as failure
5085	stack overflow). */
5086
5087	int
5088	re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
5089	struct re_pattern_buffer *bufp;
5090	const char string1, string2;
5091	int size1, size2;
5092	int startpos;
5093	int range;
5094	struct re_registers *regs;
5095	int stop;
5096	{
5097	# ifdef MBS_SUPPORT
5098	if (MB_CUR_MAX != 1)
5099	return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5100	range, regs, stop);
5101	else
5102	# endif
5103	return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5104	range, regs, stop);
5105	} /* re_search_2 */
5106	#ifdef _LIBC
5107	weak_alias (__re_search_2, re_search_2)
5108	#endif
5109
5110	#endif /* not INSIDE_RECURSION */
5111
5112	#ifdef INSIDE_RECURSION
5113
5114	#ifdef MATCH_MAY_ALLOCATE
5115	# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
5116	#else
5117	# define FREE_VAR(var) if (var) free (var); var = NULL
5118	#endif
5119
5120	#ifdef WCHAR
5121	# define MAX_ALLOCA_SIZE 2000
5122
5123	# define FREE_WCS_BUFFERS() \
5124	do { \
5125	if (size1 > MAX_ALLOCA_SIZE) \
5126	{ \
5127	free (wcs_string1); \
5128	free (mbs_offset1); \
5129	} \
5130	else \
5131	{ \
5132	FREE_VAR (wcs_string1); \
5133	FREE_VAR (mbs_offset1); \
5134	} \
5135	if (size2 > MAX_ALLOCA_SIZE) \
5136	{ \
5137	free (wcs_string2); \
5138	free (mbs_offset2); \
5139	} \
5140	else \
5141	{ \
5142	FREE_VAR (wcs_string2); \
5143	FREE_VAR (mbs_offset2); \
5144	} \
5145	} while (0)
5146
5147	#endif
5148
5149
5150	static int
5151	PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
5152	regs, stop)
5153	struct re_pattern_buffer *bufp;
5154	const char string1, string2;
5155	int size1, size2;
5156	int startpos;
5157	int range;
5158	struct re_registers *regs;
5159	int stop;
5160	{
5161	int val;
5162	register char *fastmap = bufp->fastmap;
5163	register RE_TRANSLATE_TYPE translate = bufp->translate;
5164	int total_size = size1 + size2;
5165	int endpos = startpos + range;
5166	#ifdef WCHAR
5167	/* We need wchar_t* buffers correspond to cstring1, cstring2. */
5168	wchar_t wcs_string1 = NULL, wcs_string2 = NULL;
5169	/* We need the size of wchar_t buffers correspond to csize1, csize2. */
5170	int wcs_size1 = 0, wcs_size2 = 0;
5171	/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
5172	int mbs_offset1 = NULL, mbs_offset2 = NULL;
5173	/* They hold whether each wchar_t is binary data or not. */
5174	char *is_binary = NULL;
5175	#endif /* WCHAR */
5176
5177	/* Check for out-of-range STARTPOS. */
5178	if (startpos < 0 \|\| startpos > total_size)
5179	return -1;
5180
5181	/* Fix up RANGE if it might eventually take us outside
5182	the virtual concatenation of STRING1 and STRING2.
5183	Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
5184	if (endpos < 0)
5185	range = 0 - startpos;
5186	else if (endpos > total_size)
5187	range = total_size - startpos;
5188
5189	/* If the search isn't to be a backwards one, don't waste time in a
5190	search for a pattern that must be anchored. */
5191	if (bufp->used > 0 && range > 0
5192	&& ((re_opcode_t) bufp->buffer[0] == begbuf
5193	/* `begline' is like `begbuf' if it cannot match at newlines. */
5194	\|\| ((re_opcode_t) bufp->buffer[0] == begline
5195	&& !bufp->newline_anchor)))
5196	{
5197	if (startpos > 0)
5198	return -1;
5199	else
5200	range = 1;
5201	}
5202
5203	#ifdef emacs
5204	/* In a forward search for something that starts with \=.
5205	don't keep searching past point. */
5206	if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5207	{
5208	range = PT - startpos;
5209	if (range <= 0)
5210	return -1;
5211	}
5212	#endif /* emacs */
5213
5214	/* Update the fastmap now if not correct already. */
5215	if (fastmap && !bufp->fastmap_accurate)
5216	if (re_compile_fastmap (bufp) == -2)
5217	return -2;
5218
5219	#ifdef WCHAR
5220	/* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5221	fill them with converted string. */
5222	if (size1 != 0)
5223	{
5224	if (size1 > MAX_ALLOCA_SIZE)
5225	{
5226	wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5227	mbs_offset1 = TALLOC (size1 + 1, int);
5228	is_binary = TALLOC (size1 + 1, char);
5229	}
5230	else
5231	{
5232	wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5233	mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5234	is_binary = REGEX_TALLOC (size1 + 1, char);
5235	}
5236	if (!wcs_string1 \|\| !mbs_offset1 \|\| !is_binary)
5237	{
5238	if (size1 > MAX_ALLOCA_SIZE)
5239	{
5240	free (wcs_string1);
5241	free (mbs_offset1);
5242	free (is_binary);
5243	}
5244	else
5245	{
5246	FREE_VAR (wcs_string1);
5247	FREE_VAR (mbs_offset1);
5248	FREE_VAR (is_binary);
5249	}
5250	return -2;
5251	}
5252	wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5253	mbs_offset1, is_binary);
5254	wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
5255	if (size1 > MAX_ALLOCA_SIZE)
5256	free (is_binary);
5257	else
5258	FREE_VAR (is_binary);
5259	}
5260	if (size2 != 0)
5261	{
5262	if (size2 > MAX_ALLOCA_SIZE)
5263	{
5264	wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5265	mbs_offset2 = TALLOC (size2 + 1, int);
5266	is_binary = TALLOC (size2 + 1, char);
5267	}
5268	else
5269	{
5270	wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5271	mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5272	is_binary = REGEX_TALLOC (size2 + 1, char);
5273	}
5274	if (!wcs_string2 \|\| !mbs_offset2 \|\| !is_binary)
5275	{
5276	FREE_WCS_BUFFERS ();
5277	if (size2 > MAX_ALLOCA_SIZE)
5278	free (is_binary);
5279	else
5280	FREE_VAR (is_binary);
5281	return -2;
5282	}
5283	wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5284	mbs_offset2, is_binary);
5285	wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
5286	if (size2 > MAX_ALLOCA_SIZE)
5287	free (is_binary);
5288	else
5289	FREE_VAR (is_binary);
5290	}
5291	#endif /* WCHAR */
5292
5293
5294	/* Loop through the string, looking for a place to start matching. */
5295	for (;;)
5296	{
5297	/* If a fastmap is supplied, skip quickly over characters that
5298	cannot be the start of a match. If the pattern can match the
5299	null string, however, we don't need to skip characters; we want
5300	the first null string. */
5301	if (fastmap && startpos < total_size && !bufp->can_be_null)
5302	{
5303	if (range > 0) /* Searching forwards. */
5304	{
5305	register const char *d;
5306	register int lim = 0;
5307	int irange = range;
5308
5309	if (startpos < size1 && startpos + range >= size1)
5310	lim = range - (size1 - startpos);
5311
5312	d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5313
5314	/* Written out as an if-else to avoid testing `translate'
5315	inside the loop. */
5316	if (translate)
5317	while (range > lim
5318	&& !fastmap[(unsigned char)
5319	translate[(unsigned char) *d++]])
5320	range--;
5321	else
5322	while (range > lim && !fastmap[(unsigned char) *d++])
5323	range--;
5324
5325	startpos += irange - range;
5326	}
5327	else /* Searching backwards. */
5328	{
5329	register CHAR_T c = (size1 == 0 \|\| startpos >= size1
5330	? string2[startpos - size1]
5331	: string1[startpos]);
5332
5333	if (!fastmap[(unsigned char) TRANSLATE (c)])
5334	goto advance;
5335	}
5336	}
5337
5338	/* If can't match the null string, and that's all we have left, fail. */
5339	if (range >= 0 && startpos == total_size && fastmap
5340	&& !bufp->can_be_null)
5341	{
5342	#ifdef WCHAR
5343	FREE_WCS_BUFFERS ();
5344	#endif
5345	return -1;
5346	}
5347
5348	#ifdef WCHAR
5349	val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5350	size2, startpos, regs, stop,
5351	wcs_string1, wcs_size1,
5352	wcs_string2, wcs_size2,
5353	mbs_offset1, mbs_offset2);
5354	#else /* BYTE */
5355	val = byte_re_match_2_internal (bufp, string1, size1, string2,
5356	size2, startpos, regs, stop);
5357	#endif /* BYTE */
5358
5359	#ifndef REGEX_MALLOC
5360	# ifdef C_ALLOCA
5361	alloca (0);
5362	# endif
5363	#endif
5364
5365	if (val >= 0)
5366	{
5367	#ifdef WCHAR
5368	FREE_WCS_BUFFERS ();
5369	#endif
5370	return startpos;
5371	}
5372
5373	if (val == -2)
5374	{
5375	#ifdef WCHAR
5376	FREE_WCS_BUFFERS ();
5377	#endif
5378	return -2;
5379	}
5380
5381	advance:
5382	if (!range)
5383	break;
5384	else if (range > 0)
5385	{
5386	range--;
5387	startpos++;
5388	}
5389	else
5390	{
5391	range++;
5392	startpos--;
5393	}
5394	}
5395	#ifdef WCHAR
5396	FREE_WCS_BUFFERS ();
5397	#endif
5398	return -1;
5399	}
5400
5401	#ifdef WCHAR
5402	/* This converts PTR, a pointer into one of the search wchar_t strings
5403	`string1' and `string2' into an multibyte string offset from the
5404	beginning of that string. We use mbs_offset to optimize.
5405	See convert_mbs_to_wcs. */
5406	# define POINTER_TO_OFFSET(ptr) \
5407	(FIRST_STRING_P (ptr) \
5408	? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
5409	: ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
5410	+ csize1)))
5411	#else /* BYTE */
5412	/* This converts PTR, a pointer into one of the search strings `string1'
5413	and `string2' into an offset from the beginning of that string. */
5414	# define POINTER_TO_OFFSET(ptr) \
5415	(FIRST_STRING_P (ptr) \
5416	? ((regoff_t) ((ptr) - string1)) \
5417	: ((regoff_t) ((ptr) - string2 + size1)))
5418	#endif /* WCHAR */
5419
5420	/* Macros for dealing with the split strings in re_match_2. */
5421
5422	#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
5423
5424	/* Call before fetching a character with *d. This switches over to
5425	string2 if necessary. */
5426	#define PREFETCH() \
5427	while (d == dend) \
5428	{ \
5429	/* End of string2 => fail. */ \
5430	if (dend == end_match_2) \
5431	goto fail; \
5432	/* End of string1 => advance to string2. */ \
5433	d = string2; \
5434	dend = end_match_2; \
5435	}
5436
5437	/* Test if at very beginning or at very end of the virtual concatenation
5438	of `string1' and `string2'. If only one string, it's `string2'. */
5439	#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) \|\| !size2)
5440	#define AT_STRINGS_END(d) ((d) == end2)
5441
5442
5443	/* Test if D points to a character which is word-constituent. We have
5444	two special cases to check for: if past the end of string1, look at
5445	the first character in string2; and if before the beginning of
5446	string2, look at the last character in string1. */
5447	#ifdef WCHAR
5448	/* Use internationalized API instead of SYNTAX. */
5449	# define WORDCHAR_P(d) \
5450	(iswalnum ((wint_t)((d) == end1 ? *string2 \
5451	: (d) == string2 - 1 ? (end1 - 1) : (d))) != 0 \
5452	\|\| ((d) == end1 ? *string2 \
5453	: (d) == string2 - 1 ? (end1 - 1) : (d)) == L'_')
5454	#else /* BYTE */
5455	# define WORDCHAR_P(d) \
5456	(SYNTAX ((d) == end1 ? *string2 \
5457	: (d) == string2 - 1 ? (end1 - 1) : (d)) \
5458	== Sword)
5459	#endif /* WCHAR */
5460
5461	/* Disabled due to a compiler bug -- see comment at case wordbound */
5462	#if 0
5463	/* Test if the character before D and the one at D differ with respect
5464	to being word-constituent. */
5465	#define AT_WORD_BOUNDARY(d) \
5466	(AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d) \
5467	\|\| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5468	#endif
5469
5470	/* Free everything we malloc. */
5471	#ifdef MATCH_MAY_ALLOCATE
5472	# ifdef WCHAR
5473	# define FREE_VARIABLES() \
5474	do { \
5475	REGEX_FREE_STACK (fail_stack.stack); \
5476	FREE_VAR (regstart); \
5477	FREE_VAR (regend); \
5478	FREE_VAR (old_regstart); \
5479	FREE_VAR (old_regend); \
5480	FREE_VAR (best_regstart); \
5481	FREE_VAR (best_regend); \
5482	FREE_VAR (reg_info); \
5483	FREE_VAR (reg_dummy); \
5484	FREE_VAR (reg_info_dummy); \
5485	if (!cant_free_wcs_buf) \
5486	{ \
5487	FREE_VAR (string1); \
5488	FREE_VAR (string2); \
5489	FREE_VAR (mbs_offset1); \
5490	FREE_VAR (mbs_offset2); \
5491	} \
5492	} while (0)
5493	# else /* BYTE */
5494	# define FREE_VARIABLES() \
5495	do { \
5496	REGEX_FREE_STACK (fail_stack.stack); \
5497	FREE_VAR (regstart); \
5498	FREE_VAR (regend); \
5499	FREE_VAR (old_regstart); \
5500	FREE_VAR (old_regend); \
5501	FREE_VAR (best_regstart); \
5502	FREE_VAR (best_regend); \
5503	FREE_VAR (reg_info); \
5504	FREE_VAR (reg_dummy); \
5505	FREE_VAR (reg_info_dummy); \
5506	} while (0)
5507	# endif /* WCHAR */
5508	#else
5509	# ifdef WCHAR
5510	# define FREE_VARIABLES() \
5511	do { \
5512	if (!cant_free_wcs_buf) \
5513	{ \
5514	FREE_VAR (string1); \
5515	FREE_VAR (string2); \
5516	FREE_VAR (mbs_offset1); \
5517	FREE_VAR (mbs_offset2); \
5518	} \
5519	} while (0)
5520	# else /* BYTE */
5521	# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
5522	# endif /* WCHAR */
5523	#endif /* not MATCH_MAY_ALLOCATE */
5524
5525	/* These values must meet several constraints. They must not be valid
5526	register values; since we have a limit of 255 registers (because
5527	we use only one byte in the pattern for the register number), we can
5528	use numbers larger than 255. They must differ by 1, because of
5529	NUM_FAILURE_ITEMS above. And the value for the lowest register must
5530	be larger than the value for the highest register, so we do not try
5531	to actually save any registers when none are active. */
5532	#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5533	#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5534
5535
5536	#else /* not INSIDE_RECURSION */
5537	/* Matching routines. */
5538
5539	#ifndef emacs /* Emacs never uses this. */
5540	/* re_match is like re_match_2 except it takes only a single string. */
5541
5542	int
5543	re_match (bufp, string, size, pos, regs)
5544	struct re_pattern_buffer *bufp;
5545	const char *string;
5546	int size, pos;
5547	struct re_registers *regs;
5548	{
5549	int result;
5550	# ifdef MBS_SUPPORT
5551	if (MB_CUR_MAX != 1)
5552	result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5553	pos, regs, size,
5554	NULL, 0, NULL, 0, NULL, NULL);
5555	else
5556	# endif
5557	result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5558	pos, regs, size);
5559	# ifndef REGEX_MALLOC
5560	# ifdef C_ALLOCA
5561	alloca (0);
5562	# endif
5563	# endif
5564	return result;
5565	}
5566	# ifdef _LIBC
5567	weak_alias (__re_match, re_match)
5568	# endif
5569	#endif /* not emacs */
5570
5571	#endif /* not INSIDE_RECURSION */
5572
5573	#ifdef INSIDE_RECURSION
5574	static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5575	UCHAR_T *end,
5576	PREFIX(register_info_type) *reg_info));
5577	static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
5578	UCHAR_T *end,
5579	PREFIX(register_info_type) *reg_info));
5580	static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5581	UCHAR_T *end,
5582	PREFIX(register_info_type) *reg_info));
5583	static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T s1, const CHAR_T s2,
5584	int len, char *translate));
5585	#else /* not INSIDE_RECURSION */
5586
5587	/* re_match_2 matches the compiled pattern in BUFP against the
5588	the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5589	and SIZE2, respectively). We start matching at POS, and stop
5590	matching at STOP.
5591
5592	If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5593	store offsets for the substring each group matched in REGS. See the
5594	documentation for exactly how many groups we fill.
5595
5596	We return -1 if no match, -2 if an internal error (such as the
5597	failure stack overflowing). Otherwise, we return the length of the
5598	matched substring. */
5599
5600	int
5601	re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5602	struct re_pattern_buffer *bufp;
5603	const char string1, string2;
5604	int size1, size2;
5605	int pos;
5606	struct re_registers *regs;
5607	int stop;
5608	{
5609	int result;
5610	# ifdef MBS_SUPPORT
5611	if (MB_CUR_MAX != 1)
5612	result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5613	pos, regs, stop,
5614	NULL, 0, NULL, 0, NULL, NULL);
5615	else
5616	# endif
5617	result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5618	pos, regs, stop);
5619
5620	#ifndef REGEX_MALLOC
5621	# ifdef C_ALLOCA
5622	alloca (0);
5623	# endif
5624	#endif
5625	return result;
5626	}
5627	#ifdef _LIBC
5628	weak_alias (__re_match_2, re_match_2)
5629	#endif
5630
5631	#endif /* not INSIDE_RECURSION */
5632
5633	#ifdef INSIDE_RECURSION
5634
5635	#ifdef WCHAR
5636	static int count_mbs_length PARAMS ((int *, int));
5637
5638	/* This check the substring (from 0, to length) of the multibyte string,
5639	to which offset_buffer correspond. And count how many wchar_t_characters
5640	the substring occupy. We use offset_buffer to optimization.
5641	See convert_mbs_to_wcs. */
5642
5643	static int
5644	count_mbs_length(offset_buffer, length)
5645	int *offset_buffer;
5646	int length;
5647	{
5648	int upper, lower;
5649
5650	/* Check whether the size is valid. */
5651	if (length < 0)
5652	return -1;
5653
5654	if (offset_buffer == NULL)
5655	return 0;
5656
5657	/* If there are no multibyte character, offset_buffer[i] == i.
5658	Optmize for this case. */
5659	if (offset_buffer[length] == length)
5660	return length;
5661
5662	/* Set up upper with length. (because for all i, offset_buffer[i] >= i) */
5663	upper = length;
5664	lower = 0;
5665
5666	while (true)
5667	{
5668	int middle = (lower + upper) / 2;
5669	if (middle == lower \|\| middle == upper)
5670	break;
5671	if (offset_buffer[middle] > length)
5672	upper = middle;
5673	else if (offset_buffer[middle] < length)
5674	lower = middle;
5675	else
5676	return middle;
5677	}
5678
5679	return -1;
5680	}
5681	#endif /* WCHAR */
5682
5683	/* This is a separate function so that we can force an alloca cleanup
5684	afterwards. */
5685	#ifdef WCHAR
5686	static int
5687	wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
5688	regs, stop, string1, size1, string2, size2,
5689	mbs_offset1, mbs_offset2)
5690	struct re_pattern_buffer *bufp;
5691	const char cstring1, cstring2;
5692	int csize1, csize2;
5693	int pos;
5694	struct re_registers *regs;
5695	int stop;
5696	/* string1 == string2 == NULL means string1/2, size1/2 and
5697	mbs_offset1/2 need seting up in this function. */
5698	/* We need wchar_t* buffers correspond to cstring1, cstring2. */
5699	wchar_t string1, string2;
5700	/* We need the size of wchar_t buffers correspond to csize1, csize2. */
5701	int size1, size2;
5702	/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
5703	int mbs_offset1, mbs_offset2;
5704	#else /* BYTE */
5705	static int
5706	byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
5707	regs, stop)
5708	struct re_pattern_buffer *bufp;
5709	const char string1, string2;
5710	int size1, size2;
5711	int pos;
5712	struct re_registers *regs;
5713	int stop;
5714	#endif /* BYTE */
5715	{
5716	/* General temporaries. */
5717	int mcnt;
5718	UCHAR_T *p1;
5719	#ifdef WCHAR
5720	/* They hold whether each wchar_t is binary data or not. */
5721	char *is_binary = NULL;
5722	/* If true, we can't free string1/2, mbs_offset1/2. */
5723	int cant_free_wcs_buf = 1;
5724	#endif /* WCHAR */
5725
5726	/* Just past the end of the corresponding string. */
5727	const CHAR_T end1, end2;
5728
5729	/* Pointers into string1 and string2, just past the last characters in
5730	each to consider matching. */
5731	const CHAR_T end_match_1, end_match_2;
5732
5733	/* Where we are in the data, and the end of the current string. */
5734	const CHAR_T d, dend;
5735
5736	/* Where we are in the pattern, and the end of the pattern. */
5737	#ifdef WCHAR
5738	UCHAR_T pattern, p;
5739	register UCHAR_T *pend;
5740	#else /* BYTE */
5741	UCHAR_T *p = bufp->buffer;
5742	register UCHAR_T *pend = p + bufp->used;
5743	#endif /* WCHAR */
5744
5745	/* Mark the opcode just after a start_memory, so we can test for an
5746	empty subpattern when we get to the stop_memory. */
5747	UCHAR_T *just_past_start_mem = 0;
5748
5749	/* We use this to map every character in the string. */
5750	RE_TRANSLATE_TYPE translate = bufp->translate;
5751
5752	/* Failure point stack. Each place that can handle a failure further
5753	down the line pushes a failure point on this stack. It consists of
5754	restart, regend, and reg_info for all registers corresponding to
5755	the subexpressions we're currently inside, plus the number of such
5756	registers, and, finally, two char 's. The first char is where
5757	to resume scanning the pattern; the second one is where to resume
5758	scanning the strings. If the latter is zero, the failure point is
5759	a ``dummy''; if a failure happens and the failure point is a dummy,
5760	it gets discarded and the next next one is tried. */
5761	#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5762	PREFIX(fail_stack_type) fail_stack;
5763	#endif
5764	#ifdef DEBUG
5765	static unsigned failure_id;
5766	unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5767	#endif
5768
5769	#ifdef REL_ALLOC
5770	/* This holds the pointer to the failure stack, when
5771	it is allocated relocatably. */
5772	fail_stack_elt_t *failure_stack_ptr;
5773	#endif
5774
5775	/* We fill all the registers internally, independent of what we
5776	return, for use in backreferences. The number here includes
5777	an element for register zero. */
5778	size_t num_regs = bufp->re_nsub + 1;
5779
5780	/* The currently active registers. */
5781	active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5782	active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5783
5784	/* Information on the contents of registers. These are pointers into
5785	the input strings; they record just what was matched (on this
5786	attempt) by a subexpression part of the pattern, that is, the
5787	regnum-th regstart pointer points to where in the pattern we began
5788	matching and the regnum-th regend points to right after where we
5789	stopped matching the regnum-th subexpression. (The zeroth register
5790	keeps track of what the whole pattern matches.) */
5791	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5792	const CHAR_T regstart, regend;
5793	#endif
5794
5795	/* If a group that's operated upon by a repetition operator fails to
5796	match anything, then the register for its start will need to be
5797	restored because it will have been set to wherever in the string we
5798	are when we last see its open-group operator. Similarly for a
5799	register's end. */
5800	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5801	const CHAR_T old_regstart, old_regend;
5802	#endif
5803
5804	/* The is_active field of reg_info helps us keep track of which (possibly
5805	nested) subexpressions we are currently in. The matched_something
5806	field of reg_info[reg_num] helps us tell whether or not we have
5807	matched any of the pattern so far this time through the reg_num-th
5808	subexpression. These two fields get reset each time through any
5809	loop their register is in. */
5810	#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5811	PREFIX(register_info_type) *reg_info;
5812	#endif
5813
5814	/* The following record the register info as found in the above
5815	variables when we find a match better than any we've seen before.
5816	This happens as we backtrack through the failure points, which in
5817	turn happens only if we have not yet matched the entire string. */
5818	unsigned best_regs_set = false;
5819	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5820	const CHAR_T best_regstart, best_regend;
5821	#endif
5822
5823	/* Logically, this is `best_regend[0]'. But we don't want to have to
5824	allocate space for that if we're not allocating space for anything
5825	else (see below). Also, we never need info about register 0 for
5826	any of the other register vectors, and it seems rather a kludge to
5827	treat `best_regend' differently than the rest. So we keep track of
5828	the end of the best match so far in a separate variable. We
5829	initialize this to NULL so that when we backtrack the first time
5830	and need to test it, it's not garbage. */
5831	const CHAR_T *match_end = NULL;
5832
5833	/* This helps SET_REGS_MATCHED avoid doing redundant work. */
5834	int set_regs_matched_done = 0;
5835
5836	/* Used when we pop values we don't care about. */
5837	#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5838	const CHAR_T **reg_dummy;
5839	PREFIX(register_info_type) *reg_info_dummy;
5840	#endif
5841
5842	#ifdef DEBUG
5843	/* Counts the total number of registers pushed. */
5844	unsigned num_regs_pushed = 0;
5845	#endif
5846
5847	DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5848
5849	INIT_FAIL_STACK ();
5850
5851	#ifdef MATCH_MAY_ALLOCATE
5852	/* Do not bother to initialize all the register variables if there are
5853	no groups in the pattern, as it takes a fair amount of time. If
5854	there are groups, we include space for register 0 (the whole
5855	pattern), even though we never use it, since it simplifies the
5856	array indexing. We should fix this. */
5857	if (bufp->re_nsub)
5858	{
5859	regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5860	regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5861	old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5862	old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5863	best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5864	best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5865	reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5866	reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5867	reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5868
5869	if (!(regstart && regend && old_regstart && old_regend && reg_info
5870	&& best_regstart && best_regend && reg_dummy && reg_info_dummy))
5871	{
5872	FREE_VARIABLES ();
5873	return -2;
5874	}
5875	}
5876	else
5877	{
5878	/* We must initialize all our variables to NULL, so that
5879	`FREE_VARIABLES' doesn't try to free them. */
5880	regstart = regend = old_regstart = old_regend = best_regstart
5881	= best_regend = reg_dummy = NULL;
5882	reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5883	}
5884	#endif /* MATCH_MAY_ALLOCATE */
5885
5886	/* The starting position is bogus. */
5887	#ifdef WCHAR
5888	if (pos < 0 \|\| pos > csize1 + csize2)
5889	#else /* BYTE */
5890	if (pos < 0 \|\| pos > size1 + size2)
5891	#endif
5892	{
5893	FREE_VARIABLES ();
5894	return -1;
5895	}
5896
5897	#ifdef WCHAR
5898	/* Allocate wchar_t array for string1 and string2 and
5899	fill them with converted string. */
5900	if (string1 == NULL && string2 == NULL)
5901	{
5902	/* We need seting up buffers here. */
5903
5904	/* We must free wcs buffers in this function. */
5905	cant_free_wcs_buf = 0;
5906
5907	if (csize1 != 0)
5908	{
5909	string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5910	mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5911	is_binary = REGEX_TALLOC (csize1 + 1, char);
5912	if (!string1 \|\| !mbs_offset1 \|\| !is_binary)
5913	{
5914	FREE_VAR (string1);
5915	FREE_VAR (mbs_offset1);
5916	FREE_VAR (is_binary);
5917	return -2;
5918	}
5919	}
5920	if (csize2 != 0)
5921	{
5922	string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5923	mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5924	is_binary = REGEX_TALLOC (csize2 + 1, char);
5925	if (!string2 \|\| !mbs_offset2 \|\| !is_binary)
5926	{
5927	FREE_VAR (string1);
5928	FREE_VAR (mbs_offset1);
5929	FREE_VAR (string2);
5930	FREE_VAR (mbs_offset2);
5931	FREE_VAR (is_binary);
5932	return -2;
5933	}
5934	size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5935	mbs_offset2, is_binary);
5936	string2[size2] = L'\0'; /* for a sentinel */
5937	FREE_VAR (is_binary);
5938	}
5939	}
5940
5941	/* We need to cast pattern to (wchar_t*), because we casted this compiled
5942	pattern to (char) in regex_compile. /
5943	p = pattern = (CHAR_T*)bufp->buffer;
5944	pend = (CHAR_T*)(bufp->buffer + bufp->used);
5945
5946	#endif /* WCHAR */
5947
5948	/* Initialize subexpression text positions to -1 to mark ones that no
5949	start_memory/stop_memory has been seen for. Also initialize the
5950	register information struct. */
5951	for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5952	{
5953	regstart[mcnt] = regend[mcnt]
5954	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5955
5956	REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5957	IS_ACTIVE (reg_info[mcnt]) = 0;
5958	MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5959	EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5960	}
5961
5962	/* We move `string1' into `string2' if the latter's empty -- but not if
5963	`string1' is null. */
5964	if (size2 == 0 && string1 != NULL)
5965	{
5966	string2 = string1;
5967	size2 = size1;
5968	string1 = 0;
5969	size1 = 0;
5970	#ifdef WCHAR
5971	mbs_offset2 = mbs_offset1;
5972	csize2 = csize1;
5973	mbs_offset1 = NULL;
5974	csize1 = 0;
5975	#endif
5976	}
5977	end1 = string1 + size1;
5978	end2 = string2 + size2;
5979
5980	/* Compute where to stop matching, within the two strings. */
5981	#ifdef WCHAR
5982	if (stop <= csize1)
5983	{
5984	mcnt = count_mbs_length(mbs_offset1, stop);
5985	end_match_1 = string1 + mcnt;
5986	end_match_2 = string2;
5987	}
5988	else
5989	{
5990	if (stop > csize1 + csize2)
5991	stop = csize1 + csize2;
5992	end_match_1 = end1;
5993	mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5994	end_match_2 = string2 + mcnt;
5995	}
5996	if (mcnt < 0)
5997	{ /* count_mbs_length return error. */
5998	FREE_VARIABLES ();
5999	return -1;
6000	}
6001	#else
6002	if (stop <= size1)
6003	{
6004	end_match_1 = string1 + stop;
6005	end_match_2 = string2;
6006	}
6007	else
6008	{
6009	end_match_1 = end1;
6010	end_match_2 = string2 + stop - size1;
6011	}
6012	#endif /* WCHAR */
6013
6014	/* `p' scans through the pattern as `d' scans through the data.
6015	`dend' is the end of the input string that `d' points within. `d'
6016	is advanced into the following input string whenever necessary, but
6017	this happens before fetching; therefore, at the beginning of the
6018	loop, `d' can be pointing at the end of a string, but it cannot
6019	equal `string2'. */
6020	#ifdef WCHAR
6021	if (size1 > 0 && pos <= csize1)
6022	{
6023	mcnt = count_mbs_length(mbs_offset1, pos);
6024	d = string1 + mcnt;
6025	dend = end_match_1;
6026	}
6027	else
6028	{
6029	mcnt = count_mbs_length(mbs_offset2, pos-csize1);
6030	d = string2 + mcnt;
6031	dend = end_match_2;
6032	}
6033
6034	if (mcnt < 0)
6035	{ /* count_mbs_length return error. */
6036	FREE_VARIABLES ();
6037	return -1;
6038	}
6039	#else
6040	if (size1 > 0 && pos <= size1)
6041	{
6042	d = string1 + pos;
6043	dend = end_match_1;
6044	}
6045	else
6046	{
6047	d = string2 + pos - size1;
6048	dend = end_match_2;
6049	}
6050	#endif /* WCHAR */
6051
6052	DEBUG_PRINT1 ("The compiled pattern is:\n");
6053	DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
6054	DEBUG_PRINT1 ("The string to match is: `");
6055	DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
6056	DEBUG_PRINT1 ("'\n");
6057
6058	/* This loops over pattern commands. It exits by returning from the
6059	function if the match is complete, or it drops through if the match
6060	fails at this starting point in the input data. */
6061	for (;;)
6062	{
6063	#ifdef _LIBC
6064	DEBUG_PRINT2 ("\n%p: ", p);
6065	#else
6066	DEBUG_PRINT2 ("\n0x%x: ", p);
6067	#endif
6068
6069	if (p == pend)
6070	{ /* End of pattern means we might have succeeded. */
6071	DEBUG_PRINT1 ("end of pattern ... ");
6072
6073	/* If we haven't matched the entire string, and we want the
6074	longest match, try backtracking. */
6075	if (d != end_match_2)
6076	{
6077	/* 1 if this match ends in the same string (string1 or string2)
6078	as the best previous match. */
6079	boolean same_str_p = (FIRST_STRING_P (match_end)
6080	== MATCHING_IN_FIRST_STRING);
6081	/* 1 if this match is the best seen so far. */
6082	boolean best_match_p;
6083
6084	/* AIX compiler got confused when this was combined
6085	with the previous declaration. */
6086	if (same_str_p)
6087	best_match_p = d > match_end;
6088	else
6089	best_match_p = !MATCHING_IN_FIRST_STRING;
6090
6091	DEBUG_PRINT1 ("backtracking.\n");
6092
6093	if (!FAIL_STACK_EMPTY ())
6094	{ /* More failure points to try. */
6095
6096	/* If exceeds best match so far, save it. */
6097	if (!best_regs_set \|\| best_match_p)
6098	{
6099	best_regs_set = true;
6100	match_end = d;
6101
6102	DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
6103
6104	for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6105	{
6106	best_regstart[mcnt] = regstart[mcnt];
6107	best_regend[mcnt] = regend[mcnt];
6108	}
6109	}
6110	goto fail;
6111	}
6112
6113	/* If no failure points, don't restore garbage. And if
6114	last match is real best match, don't restore second
6115	best one. */
6116	else if (best_regs_set && !best_match_p)
6117	{
6118	restore_best_regs:
6119	/* Restore best match. It may happen that `dend ==
6120	end_match_1' while the restored d is in string2.
6121	For example, the pattern `x.y.z' against the
6122	strings `x-' and `y-z-', if the two strings are
6123	not consecutive in memory. */
6124	DEBUG_PRINT1 ("Restoring best registers.\n");
6125
6126	d = match_end;
6127	dend = ((d >= string1 && d <= end1)
6128	? end_match_1 : end_match_2);
6129
6130	for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6131	{
6132	regstart[mcnt] = best_regstart[mcnt];
6133	regend[mcnt] = best_regend[mcnt];
6134	}
6135	}
6136	} /* d != end_match_2 */
6137
6138	succeed_label:
6139	DEBUG_PRINT1 ("Accepting match.\n");
6140	/* If caller wants register contents data back, do it. */
6141	if (regs && !bufp->no_sub)
6142	{
6143	/* Have the register data arrays been allocated? */
6144	if (bufp->regs_allocated == REGS_UNALLOCATED)
6145	{ /* No. So allocate them with malloc. We need one
6146	extra element beyond `num_regs' for the `-1' marker
6147	GNU code uses. */
6148	regs->num_regs = MAX (RE_NREGS, num_regs + 1);
6149	regs->start = TALLOC (regs->num_regs, regoff_t);
6150	regs->end = TALLOC (regs->num_regs, regoff_t);
6151	if (regs->start == NULL \|\| regs->end == NULL)
6152	{
6153	FREE_VARIABLES ();
6154	return -2;
6155	}
6156	bufp->regs_allocated = REGS_REALLOCATE;
6157	}
6158	else if (bufp->regs_allocated == REGS_REALLOCATE)
6159	{ /* Yes. If we need more elements than were already
6160	allocated, reallocate them. If we need fewer, just
6161	leave it alone. */
6162	if (regs->num_regs < num_regs + 1)
6163	{
6164	regs->num_regs = num_regs + 1;
6165	RETALLOC (regs->start, regs->num_regs, regoff_t);
6166	RETALLOC (regs->end, regs->num_regs, regoff_t);
6167	if (regs->start == NULL \|\| regs->end == NULL)
6168	{
6169	FREE_VARIABLES ();
6170	return -2;
6171	}
6172	}
6173	}
6174	else
6175	{
6176	/* These braces fend off a "empty body in an else-statement"
6177	warning under GCC when assert expands to nothing. */
6178	assert (bufp->regs_allocated == REGS_FIXED);
6179	}
6180
6181	/* Convert the pointer data in `regstart' and `regend' to
6182	indices. Register zero has to be set differently,
6183	since we haven't kept track of any info for it. */
6184	if (regs->num_regs > 0)
6185	{
6186	regs->start[0] = pos;
6187	#ifdef WCHAR
6188	if (MATCHING_IN_FIRST_STRING)
6189	regs->end[0] = mbs_offset1 != NULL ?
6190	mbs_offset1[d-string1] : 0;
6191	else
6192	regs->end[0] = csize1 + (mbs_offset2 != NULL ?
6193	mbs_offset2[d-string2] : 0);
6194	#else
6195	regs->end[0] = (MATCHING_IN_FIRST_STRING
6196	? ((regoff_t) (d - string1))
6197	: ((regoff_t) (d - string2 + size1)));
6198	#endif /* WCHAR */
6199	}
6200
6201	/* Go through the first `min (num_regs, regs->num_regs)'
6202	registers, since that is all we initialized. */
6203	for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6204	mcnt++)
6205	{
6206	if (REG_UNSET (regstart[mcnt]) \|\| REG_UNSET (regend[mcnt]))
6207	regs->start[mcnt] = regs->end[mcnt] = -1;
6208	else
6209	{
6210	regs->start[mcnt]
6211	= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6212	regs->end[mcnt]
6213	= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6214	}
6215	}
6216
6217	/* If the regs structure we return has more elements than
6218	were in the pattern, set the extra elements to -1. If
6219	we (re)allocated the registers, this is the case,
6220	because we always allocate enough to have at least one
6221	-1 at the end. */
6222	for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6223	regs->start[mcnt] = regs->end[mcnt] = -1;
6224	} /* regs && !bufp->no_sub */
6225
6226	DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6227	nfailure_points_pushed, nfailure_points_popped,
6228	nfailure_points_pushed - nfailure_points_popped);
6229	DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6230
6231	#ifdef WCHAR
6232	if (MATCHING_IN_FIRST_STRING)
6233	mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6234	else
6235	mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6236	csize1;
6237	mcnt -= pos;
6238	#else
6239	mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6240	? string1
6241	: string2 - size1);
6242	#endif /* WCHAR */
6243
6244	DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6245
6246	FREE_VARIABLES ();
6247	return mcnt;
6248	}
6249
6250	/* Otherwise match next pattern command. */
6251	switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6252	{
6253	/* Ignore these. Used to ignore the n of succeed_n's which
6254	currently have n == 0. */
6255	case no_op:
6256	DEBUG_PRINT1 ("EXECUTING no_op.\n");
6257	break;
6258
6259	case succeed:
6260	DEBUG_PRINT1 ("EXECUTING succeed.\n");
6261	goto succeed_label;
6262
6263	/* Match the next n pattern characters exactly. The following
6264	byte in the pattern defines n, and the n bytes after that
6265	are the characters to match. */
6266	case exactn:
6267	#ifdef MBS_SUPPORT
6268	case exactn_bin:
6269	#endif
6270	mcnt = *p++;
6271	DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6272
6273	/* This is written out as an if-else so we don't waste time
6274	testing `translate' inside the loop. */
6275	if (translate)
6276	{
6277	do
6278	{
6279	PREFETCH ();
6280	#ifdef WCHAR
6281	if (*d <= 0xff)
6282	{
6283	if ((UCHAR_T) translate[(unsigned char) *d++]
6284	!= (UCHAR_T) *p++)
6285	goto fail;
6286	}
6287	else
6288	{
6289	if (d++ != (CHAR_T) p++)
6290	goto fail;
6291	}
6292	#else
6293	if ((UCHAR_T) translate[(unsigned char) *d++]
6294	!= (UCHAR_T) *p++)
6295	goto fail;
6296	#endif /* WCHAR */
6297	}
6298	while (--mcnt);
6299	}
6300	else
6301	{
6302	do
6303	{
6304	PREFETCH ();
6305	if (d++ != (CHAR_T) p++) goto fail;
6306	}
6307	while (--mcnt);
6308	}
6309	SET_REGS_MATCHED ();
6310	break;
6311
6312
6313	/* Match any character except possibly a newline or a null. */
6314	case anychar:
6315	DEBUG_PRINT1 ("EXECUTING anychar.\n");
6316
6317	PREFETCH ();
6318
6319	if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6320	\|\| (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6321	goto fail;
6322
6323	SET_REGS_MATCHED ();
6324	DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
6325	d++;
6326	break;
6327
6328
6329	case charset:
6330	case charset_not:
6331	{
6332	register UCHAR_T c;
6333	#ifdef WCHAR
6334	unsigned int i, char_class_length, coll_symbol_length,
6335	equiv_class_length, ranges_length, chars_length, length;
6336	CHAR_T workp, workp2, *charset_top;
6337	#define WORK_BUFFER_SIZE 128
6338	CHAR_T str_buf[WORK_BUFFER_SIZE];
6339	# ifdef _LIBC
6340	uint32_t nrules;
6341	# endif /* _LIBC */
6342	#endif /* WCHAR */
6343	boolean not = (re_opcode_t) *(p - 1) == charset_not;
6344
6345	DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
6346	PREFETCH ();
6347	c = TRANSLATE (d); / The character to match. */
6348	#ifdef WCHAR
6349	# ifdef _LIBC
6350	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6351	# endif /* _LIBC */
6352	charset_top = p - 1;
6353	char_class_length = *p++;
6354	coll_symbol_length = *p++;
6355	equiv_class_length = *p++;
6356	ranges_length = *p++;
6357	chars_length = *p++;
6358	/* p points charset[6], so the address of the next instruction
6359	(charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6360	where l=length of char_classes, m=length of collating_symbol,
6361	n=equivalence_class, o=length of char_range,
6362	p'=length of character. */
6363	workp = p;
6364	/* Update p to indicate the next instruction. */
6365	p += char_class_length + coll_symbol_length+ equiv_class_length +
6366	2*ranges_length + chars_length;
6367
6368	/* match with char_class? */
6369	for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6370	{
6371	wctype_t wctype;
6372	uintptr_t alignedp = ((uintptr_t)workp
6373	+ __alignof__(wctype_t) - 1)
6374	& ~(uintptr_t)(__alignof__(wctype_t) - 1);
6375	wctype = ((wctype_t)alignedp);
6376	workp += CHAR_CLASS_SIZE;
6377	# ifdef _LIBC
6378	if (__iswctype((wint_t)c, wctype))
6379	goto char_set_matched;
6380	# else
6381	if (iswctype((wint_t)c, wctype))
6382	goto char_set_matched;
6383	# endif
6384	}
6385
6386	/* match with collating_symbol? */
6387	# ifdef _LIBC
6388	if (nrules != 0)
6389	{
6390	const unsigned char extra = (const unsigned char )
6391	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6392
6393	for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6394	workp++)
6395	{
6396	int32_t *wextra;
6397	wextra = (int32_t)(extra + workp++);
6398	for (i = 0; i < *wextra; ++i)
6399	if (TRANSLATE(d[i]) != wextra[1 + i])
6400	break;
6401
6402	if (i == *wextra)
6403	{
6404	/* Update d, however d will be incremented at
6405	char_set_matched:, we decrement d here. */
6406	d += i - 1;
6407	goto char_set_matched;
6408	}
6409	}
6410	}
6411	else /* (nrules == 0) */
6412	# endif
6413	/* If we can't look up collation data, we use wcscoll
6414	instead. */
6415	{
6416	for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6417	{
6418	const CHAR_T backup_d = d, backup_dend = dend;
6419	# ifdef _LIBC
6420	length = __wcslen (workp);
6421	# else
6422	length = wcslen (workp);
6423	# endif
6424
6425	/* If wcscoll(the collating symbol, whole string) > 0,
6426	any substring of the string never match with the
6427	collating symbol. */
6428	# ifdef _LIBC
6429	if (__wcscoll (workp, d) > 0)
6430	# else
6431	if (wcscoll (workp, d) > 0)
6432	# endif
6433	{
6434	workp += length + 1;
6435	continue;
6436	}
6437
6438	/* First, we compare the collating symbol with
6439	the first character of the string.
6440	If it don't match, we add the next character to
6441	the compare buffer in turn. */
6442	for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6443	{
6444	int match;
6445	if (d == dend)
6446	{
6447	if (dend == end_match_2)
6448	break;
6449	d = string2;
6450	dend = end_match_2;
6451	}
6452
6453	/* add next character to the compare buffer. */
6454	str_buf[i] = TRANSLATE(*d);
6455	str_buf[i+1] = '\0';
6456
6457	# ifdef _LIBC
6458	match = __wcscoll (workp, str_buf);
6459	# else
6460	match = wcscoll (workp, str_buf);
6461	# endif
6462	if (match == 0)
6463	goto char_set_matched;
6464
6465	if (match < 0)
6466	/* (str_buf > workp) indicate (str_buf + X > workp),
6467	because for all X (str_buf + X > str_buf).
6468	So we don't need continue this loop. */
6469	break;
6470
6471	/* Otherwise(str_buf < workp),
6472	(str_buf+next_character) may equals (workp).
6473	So we continue this loop. */
6474	}
6475	/* not matched */
6476	d = backup_d;
6477	dend = backup_dend;
6478	workp += length + 1;
6479	}
6480	}
6481	/* match with equivalence_class? */
6482	# ifdef _LIBC
6483	if (nrules != 0)
6484	{
6485	const CHAR_T backup_d = d, backup_dend = dend;
6486	/* Try to match the equivalence class against
6487	those known to the collate implementation. */
6488	const int32_t *table;
6489	const int32_t *weights;
6490	const int32_t *extra;
6491	const int32_t *indirect;
6492	int32_t idx, idx2;
6493	wint_t *cp;
6494	size_t len;
6495
6496	/* This #include defines a local function! */
6497	# include <locale/weightwc.h>
6498
6499	table = (const int32_t *)
6500	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6501	weights = (const wint_t *)
6502	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6503	extra = (const wint_t *)
6504	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6505	indirect = (const int32_t *)
6506	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6507
6508	/* Write 1 collating element to str_buf, and
6509	get its index. */
6510	idx2 = 0;
6511
6512	for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6513	{
6514	cp = (wint_t*)str_buf;
6515	if (d == dend)
6516	{
6517	if (dend == end_match_2)
6518	break;
6519	d = string2;
6520	dend = end_match_2;
6521	}
6522	str_buf[i] = TRANSLATE(*(d+i));
6523	str_buf[i+1] = '\0'; /* sentinel */
6524	idx2 = findidx ((const wint_t**)&cp);
6525	}
6526
6527	/* Update d, however d will be incremented at
6528	char_set_matched:, we decrement d here. */
6529	d = backup_d + ((wchar_t)cp - (wchar_t)str_buf - 1);
6530	if (d >= dend)
6531	{
6532	if (dend == end_match_2)
6533	d = dend;
6534	else
6535	{
6536	d = string2;
6537	dend = end_match_2;
6538	}
6539	}
6540
6541	len = weights[idx2];
6542
6543	for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6544	workp++)
6545	{
6546	idx = (int32_t)*workp;
6547	/* We already checked idx != 0 in regex_compile. */
6548
6549	if (idx2 != 0 && len == weights[idx])
6550	{
6551	int cnt = 0;
6552	while (cnt < len && (weights[idx + 1 + cnt]
6553	== weights[idx2 + 1 + cnt]))
6554	++cnt;
6555
6556	if (cnt == len)
6557	goto char_set_matched;
6558	}
6559	}
6560	/* not matched */
6561	d = backup_d;
6562	dend = backup_dend;
6563	}
6564	else /* (nrules == 0) */
6565	# endif
6566	/* If we can't look up collation data, we use wcscoll
6567	instead. */
6568	{
6569	for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6570	{
6571	const CHAR_T backup_d = d, backup_dend = dend;
6572	# ifdef _LIBC
6573	length = __wcslen (workp);
6574	# else
6575	length = wcslen (workp);
6576	# endif
6577
6578	/* If wcscoll(the collating symbol, whole string) > 0,
6579	any substring of the string never match with the
6580	collating symbol. */
6581	# ifdef _LIBC
6582	if (__wcscoll (workp, d) > 0)
6583	# else
6584	if (wcscoll (workp, d) > 0)
6585	# endif
6586	{
6587	workp += length + 1;
6588	break;
6589	}
6590
6591	/* First, we compare the equivalence class with
6592	the first character of the string.
6593	If it don't match, we add the next character to
6594	the compare buffer in turn. */
6595	for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6596	{
6597	int match;
6598	if (d == dend)
6599	{
6600	if (dend == end_match_2)
6601	break;
6602	d = string2;
6603	dend = end_match_2;
6604	}
6605
6606	/* add next character to the compare buffer. */
6607	str_buf[i] = TRANSLATE(*d);
6608	str_buf[i+1] = '\0';
6609
6610	# ifdef _LIBC
6611	match = __wcscoll (workp, str_buf);
6612	# else
6613	match = wcscoll (workp, str_buf);
6614	# endif
6615
6616	if (match == 0)
6617	goto char_set_matched;
6618
6619	if (match < 0)
6620	/* (str_buf > workp) indicate (str_buf + X > workp),
6621	because for all X (str_buf + X > str_buf).
6622	So we don't need continue this loop. */
6623	break;
6624
6625	/* Otherwise(str_buf < workp),
6626	(str_buf+next_character) may equals (workp).
6627	So we continue this loop. */
6628	}
6629	/* not matched */
6630	d = backup_d;
6631	dend = backup_dend;
6632	workp += length + 1;
6633	}
6634	}
6635
6636	/* match with char_range? */
6637	# ifdef _LIBC
6638	if (nrules != 0)
6639	{
6640	uint32_t collseqval;
6641	const char collseq = (const char )
6642	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6643
6644	collseqval = collseq_table_lookup (collseq, c);
6645
6646	for (; workp < p - chars_length ;)
6647	{
6648	uint32_t start_val, end_val;
6649
6650	/* We already compute the collation sequence value
6651	of the characters (or collating symbols). */
6652	start_val = (uint32_t) workp++; / range_start */
6653	end_val = (uint32_t) workp++; / range_end */
6654
6655	if (start_val <= collseqval && collseqval <= end_val)
6656	goto char_set_matched;
6657	}
6658	}
6659	else
6660	# endif
6661	{
6662	/* We set range_start_char at str_buf[0], range_end_char
6663	at str_buf[4], and compared char at str_buf[2]. */
6664	str_buf[1] = 0;
6665	str_buf[2] = c;
6666	str_buf[3] = 0;
6667	str_buf[5] = 0;
6668	for (; workp < p - chars_length ;)
6669	{
6670	wchar_t range_start_char, range_end_char;
6671
6672	/* match if (range_start_char <= c <= range_end_char). */
6673
6674	/* If range_start(or end) < 0, we assume -range_start(end)
6675	is the offset of the collating symbol which is specified
6676	as the character of the range start(end). */
6677
6678	/* range_start */
6679	if (*workp < 0)
6680	range_start_char = charset_top - (*workp++);
6681	else
6682	{
6683	str_buf[0] = *workp++;
6684	range_start_char = str_buf;
6685	}
6686
6687	/* range_end */
6688	if (*workp < 0)
6689	range_end_char = charset_top - (*workp++);
6690	else
6691	{
6692	str_buf[4] = *workp++;
6693	range_end_char = str_buf + 4;
6694	}
6695
6696	# ifdef _LIBC
6697	if (__wcscoll (range_start_char, str_buf+2) <= 0
6698	&& __wcscoll (str_buf+2, range_end_char) <= 0)
6699	# else
6700	if (wcscoll (range_start_char, str_buf+2) <= 0
6701	&& wcscoll (str_buf+2, range_end_char) <= 0)
6702	# endif
6703	goto char_set_matched;
6704	}
6705	}
6706
6707	/* match with char? */
6708	for (; workp < p ; workp++)
6709	if (c == *workp)
6710	goto char_set_matched;
6711
6712	not = !not;
6713
6714	char_set_matched:
6715	if (not) goto fail;
6716	#else
6717	/* Cast to `unsigned' instead of `unsigned char' in case the
6718	bit list is a full 32 bytes long. */
6719	if (c < (unsigned) (p BYTEWIDTH)
6720	&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6721	not = !not;
6722
6723	p += 1 + *p;
6724
6725	if (!not) goto fail;
6726	#undef WORK_BUFFER_SIZE
6727	#endif /* WCHAR */
6728	SET_REGS_MATCHED ();
6729	d++;
6730	break;
6731	}
6732
6733
6734	/* The beginning of a group is represented by start_memory.
6735	The arguments are the register number in the next byte, and the
6736	number of groups inner to this one in the next. The text
6737	matched within the group is recorded (in the internal
6738	registers data structure) under the register number. */
6739	case start_memory:
6740	DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6741	(long int) *p, (long int) p[1]);
6742
6743	/* Find out if this group can match the empty string. */
6744	p1 = p; /* To send to group_match_null_string_p. */
6745
6746	if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6747	REG_MATCH_NULL_STRING_P (reg_info[*p])
6748	= PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6749
6750	/* Save the position in the string where we were the last time
6751	we were at this open-group operator in case the group is
6752	operated upon by a repetition operator, e.g., with `(a)b'
6753	against `ab'; then we want to ignore where we are now in
6754	the string in case this attempt to match fails. */
6755	old_regstart[p] = REG_MATCH_NULL_STRING_P (reg_info[p])
6756	? REG_UNSET (regstart[p]) ? d : regstart[p]
6757	: regstart[*p];
6758	DEBUG_PRINT2 (" old_regstart: %d\n",
6759	POINTER_TO_OFFSET (old_regstart[*p]));
6760
6761	regstart[*p] = d;
6762	DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6763
6764	IS_ACTIVE (reg_info[*p]) = 1;
6765	MATCHED_SOMETHING (reg_info[*p]) = 0;
6766
6767	/* Clear this whenever we change the register activity status. */
6768	set_regs_matched_done = 0;
6769
6770	/* This is the new highest active register. */
6771	highest_active_reg = *p;
6772
6773	/* If nothing was active before, this is the new lowest active
6774	register. */
6775	if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6776	lowest_active_reg = *p;
6777
6778	/* Move past the register number and inner group count. */
6779	p += 2;
6780	just_past_start_mem = p;
6781
6782	break;
6783
6784
6785	/* The stop_memory opcode represents the end of a group. Its
6786	arguments are the same as start_memory's: the register
6787	number, and the number of inner groups. */
6788	case stop_memory:
6789	DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6790	(long int) *p, (long int) p[1]);
6791
6792	/* We need to save the string position the last time we were at
6793	this close-group operator in case the group is operated
6794	upon by a repetition operator, e.g., with `((a)(b))*'
6795	against `aba'; then we want to ignore where we are now in
6796	the string in case this attempt to match fails. */
6797	old_regend[p] = REG_MATCH_NULL_STRING_P (reg_info[p])
6798	? REG_UNSET (regend[p]) ? d : regend[p]
6799	: regend[*p];
6800	DEBUG_PRINT2 (" old_regend: %d\n",
6801	POINTER_TO_OFFSET (old_regend[*p]));
6802
6803	regend[*p] = d;
6804	DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6805
6806	/* This register isn't active anymore. */
6807	IS_ACTIVE (reg_info[*p]) = 0;
6808
6809	/* Clear this whenever we change the register activity status. */
6810	set_regs_matched_done = 0;
6811
6812	/* If this was the only register active, nothing is active
6813	anymore. */
6814	if (lowest_active_reg == highest_active_reg)
6815	{
6816	lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6817	highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6818	}
6819	else
6820	{ /* We must scan for the new highest active register, since
6821	it isn't necessarily one less than now: consider
6822	(a(b)c(d(e)f)g). When group 3 ends, after the f), the
6823	new highest active register is 1. */
6824	UCHAR_T r = *p - 1;
6825	while (r > 0 && !IS_ACTIVE (reg_info[r]))
6826	r--;
6827
6828	/* If we end up at register zero, that means that we saved
6829	the registers as the result of an `on_failure_jump', not
6830	a `start_memory', and we jumped to past the innermost
6831	`stop_memory'. For example, in ((.)*) we save
6832	registers 1 and 2 as a result of the *, but when we pop
6833	back to the second ), we are at the stop_memory 1.
6834	Thus, nothing is active. */
6835	if (r == 0)
6836	{
6837	lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6838	highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6839	}
6840	else
6841	highest_active_reg = r;
6842	}
6843
6844	/* If just failed to match something this time around with a
6845	group that's operated on by a repetition operator, try to
6846	force exit from the ``loop'', and restore the register
6847	information for this group that we had before trying this
6848	last match. */
6849	if ((!MATCHED_SOMETHING (reg_info[*p])
6850	\|\| just_past_start_mem == p - 1)
6851	&& (p + 2) < pend)
6852	{
6853	boolean is_a_jump_n = false;
6854
6855	p1 = p + 2;
6856	mcnt = 0;
6857	switch ((re_opcode_t) *p1++)
6858	{
6859	case jump_n:
6860	is_a_jump_n = true;
6861	case pop_failure_jump:
6862	case maybe_pop_jump:
6863	case jump:
6864	case dummy_failure_jump:
6865	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6866	if (is_a_jump_n)
6867	p1 += OFFSET_ADDRESS_SIZE;
6868	break;
6869
6870	default:
6871	/* do nothing */ ;
6872	}
6873	p1 += mcnt;
6874
6875	/* If the next operation is a jump backwards in the pattern
6876	to an on_failure_jump right before the start_memory
6877	corresponding to this stop_memory, exit from the loop
6878	by forcing a failure after pushing on the stack the
6879	on_failure_jump's jump in the pattern, and d. */
6880	if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6881	&& (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6882	&& p1[2+OFFSET_ADDRESS_SIZE] == *p)
6883	{
6884	/* If this group ever matched anything, then restore
6885	what its registers were before trying this last
6886	failed match, e.g., with `(a)b' against `ab' for
6887	regstart[1], and, e.g., with `((a)(b))*'
6888	against `aba' for regend[3].
6889
6890	Also restore the registers for inner groups for,
6891	e.g., `((a)(b))*' against `aba' (register 3 would
6892	otherwise get trashed). */
6893
6894	if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6895	{
6896	unsigned r;
6897
6898	EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6899
6900	/* Restore this and inner groups' (if any) registers. */
6901	for (r = p; r < (unsigned) p + (unsigned) *(p + 1);
6902	r++)
6903	{
6904	regstart[r] = old_regstart[r];
6905
6906	/* xx why this test? */
6907	if (old_regend[r] >= regstart[r])
6908	regend[r] = old_regend[r];
6909	}
6910	}
6911	p1++;
6912	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6913	PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6914
6915	goto fail;
6916	}
6917	}
6918
6919	/* Move past the register number and the inner group count. */
6920	p += 2;
6921	break;
6922
6923
6924	/* \<digit> has been turned into a `duplicate' command which is
6925	followed by the numeric value of <digit> as the register number. */
6926	case duplicate:
6927	{
6928	register const CHAR_T d2, dend2;
6929	int regno = p++; / Get which register to match against. */
6930	DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6931
6932	/* Can't back reference a group which we've never matched. */
6933	if (REG_UNSET (regstart[regno]) \|\| REG_UNSET (regend[regno]))
6934	goto fail;
6935
6936	/* Where in input to try to start matching. */
6937	d2 = regstart[regno];
6938
6939	/* Where to stop matching; if both the place to start and
6940	the place to stop matching are in the same string, then
6941	set to the place to stop, otherwise, for now have to use
6942	the end of the first string. */
6943
6944	dend2 = ((FIRST_STRING_P (regstart[regno])
6945	== FIRST_STRING_P (regend[regno]))
6946	? regend[regno] : end_match_1);
6947	for (;;)
6948	{
6949	/* If necessary, advance to next segment in register
6950	contents. */
6951	while (d2 == dend2)
6952	{
6953	if (dend2 == end_match_2) break;
6954	if (dend2 == regend[regno]) break;
6955
6956	/* End of string1 => advance to string2. */
6957	d2 = string2;
6958	dend2 = regend[regno];
6959	}
6960	/* At end of register contents => success */
6961	if (d2 == dend2) break;
6962
6963	/* If necessary, advance to next segment in data. */
6964	PREFETCH ();
6965
6966	/* How many characters left in this segment to match. */
6967	mcnt = dend - d;
6968
6969	/* Want how many consecutive characters we can match in
6970	one shot, so, if necessary, adjust the count. */
6971	if (mcnt > dend2 - d2)
6972	mcnt = dend2 - d2;
6973
6974	/* Compare that many; failure if mismatch, else move
6975	past them. */
6976	if (translate
6977	? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6978	: memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
6979	goto fail;
6980	d += mcnt, d2 += mcnt;
6981
6982	/* Do this because we've match some characters. */
6983	SET_REGS_MATCHED ();
6984	}
6985	}
6986	break;
6987
6988
6989	/* begline matches the empty string at the beginning of the string
6990	(unless `not_bol' is set in `bufp'), and, if
6991	`newline_anchor' is set, after newlines. */
6992	case begline:
6993	DEBUG_PRINT1 ("EXECUTING begline.\n");
6994
6995	if (AT_STRINGS_BEG (d))
6996	{
6997	if (!bufp->not_bol) break;
6998	}
6999	else if (d[-1] == '\n' && bufp->newline_anchor)
7000	{
7001	break;
7002	}
7003	/* In all other cases, we fail. */
7004	goto fail;
7005
7006
7007	/* endline is the dual of begline. */
7008	case endline:
7009	DEBUG_PRINT1 ("EXECUTING endline.\n");
7010
7011	if (AT_STRINGS_END (d))
7012	{
7013	if (!bufp->not_eol) break;
7014	}
7015
7016	/* We have to ``prefetch'' the next character. */
7017	else if ((d == end1 ? string2 : d) == '\n'
7018	&& bufp->newline_anchor)
7019	{
7020	break;
7021	}
7022	goto fail;
7023
7024
7025	/* Match at the very beginning of the data. */
7026	case begbuf:
7027	DEBUG_PRINT1 ("EXECUTING begbuf.\n");
7028	if (AT_STRINGS_BEG (d))
7029	break;
7030	goto fail;
7031
7032
7033	/* Match at the very end of the data. */
7034	case endbuf:
7035	DEBUG_PRINT1 ("EXECUTING endbuf.\n");
7036	if (AT_STRINGS_END (d))
7037	break;
7038	goto fail;
7039
7040
7041	/* on_failure_keep_string_jump is used to optimize `.*\n'. It
7042	pushes NULL as the value for the string on the stack. Then
7043	`pop_failure_point' will keep the current value for the
7044	string, instead of restoring it. To see why, consider
7045	matching `foo\nbar' against `.\n'. The . matches the foo;
7046	then the . fails against the \n. But the next thing we want
7047	to do is match the \n against the \n; if we restored the
7048	string value, we would be back at the foo.
7049
7050	Because this is used only in specific cases, we don't need to
7051	check all the things that `on_failure_jump' does, to make
7052	sure the right things get saved on the stack. Hence we don't
7053	share its code. The only reason to push anything on the
7054	stack at all is that otherwise we would have to change
7055	`anychar's code to do something besides goto fail in this
7056	case; that seems worse than this. */
7057	case on_failure_keep_string_jump:
7058	DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
7059
7060	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7061	#ifdef _LIBC
7062	DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
7063	#else
7064	DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
7065	#endif
7066
7067	PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
7068	break;
7069
7070
7071	/* Uses of on_failure_jump:
7072
7073	Each alternative starts with an on_failure_jump that points
7074	to the beginning of the next alternative. Each alternative
7075	except the last ends with a jump that in effect jumps past
7076	the rest of the alternatives. (They really jump to the
7077	ending jump of the following alternative, because tensioning
7078	these jumps is a hassle.)
7079
7080	Repeats start with an on_failure_jump that points past both
7081	the repetition text and either the following jump or
7082	pop_failure_jump back to this on_failure_jump. */
7083	case on_failure_jump:
7084	on_failure:
7085	DEBUG_PRINT1 ("EXECUTING on_failure_jump");
7086
7087	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7088	#ifdef _LIBC
7089	DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
7090	#else
7091	DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
7092	#endif
7093
7094	/* If this on_failure_jump comes right before a group (i.e.,
7095	the original * applied to a group), save the information
7096	for that group and all inner ones, so that if we fail back
7097	to this point, the group's information will be correct.
7098	For example, in \(a\)\1, we need the preceding group,
7099	and in \(zz\(a\)b\)\2, we need the inner group. */
7100
7101	/* We can't use `p' to check ahead because we push
7102	a failure point to `p + mcnt' after we do this. */
7103	p1 = p;
7104
7105	/* We need to skip no_op's before we look for the
7106	start_memory in case this on_failure_jump is happening as
7107	the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
7108	against aba. */
7109	while (p1 < pend && (re_opcode_t) *p1 == no_op)
7110	p1++;
7111
7112	if (p1 < pend && (re_opcode_t) *p1 == start_memory)
7113	{
7114	/* We have a new highest active register now. This will
7115	get reset at the start_memory we are about to get to,
7116	but we will have saved all the registers relevant to
7117	this repetition op, as described above. */
7118	highest_active_reg = (p1 + 1) + (p1 + 2);
7119	if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
7120	lowest_active_reg = *(p1 + 1);
7121	}
7122
7123	DEBUG_PRINT1 (":\n");
7124	PUSH_FAILURE_POINT (p + mcnt, d, -2);
7125	break;
7126
7127
7128	/* A smart repeat ends with `maybe_pop_jump'.
7129	We change it to either `pop_failure_jump' or `jump'. */
7130	case maybe_pop_jump:
7131	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7132	DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
7133	{
7134	register UCHAR_T *p2 = p;
7135
7136	/* Compare the beginning of the repeat with what in the
7137	pattern follows its end. If we can establish that there
7138	is nothing that they would both match, i.e., that we
7139	would have to backtrack because of (as in, e.g., `a*a')
7140	then we can change to pop_failure_jump, because we'll
7141	never have to backtrack.
7142
7143	This is not true in the case of alternatives: in
7144	`(a\|ab)*' we do need to backtrack to the `ab' alternative
7145	(e.g., if the string was `ab'). But instead of trying to
7146	detect that here, the alternative has put on a dummy
7147	failure point which is what we will end up popping. */
7148
7149	/* Skip over open/close-group commands.
7150	If what follows this loop is a ...+ construct,
7151	look at what begins its body, since we will have to
7152	match at least one of that. */
7153	while (1)
7154	{
7155	if (p2 + 2 < pend
7156	&& ((re_opcode_t) *p2 == stop_memory
7157	\|\| (re_opcode_t) *p2 == start_memory))
7158	p2 += 3;
7159	else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7160	&& (re_opcode_t) *p2 == dummy_failure_jump)
7161	p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7162	else
7163	break;
7164	}
7165
7166	p1 = p + mcnt;
7167	/* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7168	to the `maybe_finalize_jump' of this case. Examine what
7169	follows. */
7170
7171	/* If we're at the end of the pattern, we can change. */
7172	if (p2 == pend)
7173	{
7174	/* Consider what happens when matching ":\(.*\)"
7175	against ":/". I don't really understand this code
7176	yet. */
7177	p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7178	pop_failure_jump;
7179	DEBUG_PRINT1
7180	(" End of pattern: change to `pop_failure_jump'.\n");
7181	}
7182
7183	else if ((re_opcode_t) *p2 == exactn
7184	#ifdef MBS_SUPPORT
7185	\|\| (re_opcode_t) *p2 == exactn_bin
7186	#endif
7187	\|\| (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7188	{
7189	register UCHAR_T c
7190	= *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7191
7192	if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7193	#ifdef MBS_SUPPORT
7194	\|\| (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7195	#endif
7196	) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7197	{
7198	p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7199	pop_failure_jump;
7200	#ifdef WCHAR
7201	DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
7202	(wint_t) c,
7203	(wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7204	#else
7205	DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
7206	(char) c,
7207	(char) p1[3+OFFSET_ADDRESS_SIZE]);
7208	#endif
7209	}
7210
7211	#ifndef WCHAR
7212	else if ((re_opcode_t) p1[3] == charset
7213	\|\| (re_opcode_t) p1[3] == charset_not)
7214	{
7215	int not = (re_opcode_t) p1[3] == charset_not;
7216
7217	if (c < (unsigned) (p1[4] * BYTEWIDTH)
7218	&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7219	not = !not;
7220
7221	/* `not' is equal to 1 if c would match, which means
7222	that we can't change to pop_failure_jump. */
7223	if (!not)
7224	{
7225	p[-3] = (unsigned char) pop_failure_jump;
7226	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7227	}
7228	}
7229	#endif /* not WCHAR */
7230	}
7231	#ifndef WCHAR
7232	else if ((re_opcode_t) *p2 == charset)
7233	{
7234	/* We win if the first character of the loop is not part
7235	of the charset. */
7236	if ((re_opcode_t) p1[3] == exactn
7237	&& ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7238	&& (p2[2 + p1[5] / BYTEWIDTH]
7239	& (1 << (p1[5] % BYTEWIDTH)))))
7240	{
7241	p[-3] = (unsigned char) pop_failure_jump;
7242	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7243	}
7244
7245	else if ((re_opcode_t) p1[3] == charset_not)
7246	{
7247	int idx;
7248	/* We win if the charset_not inside the loop
7249	lists every character listed in the charset after. */
7250	for (idx = 0; idx < (int) p2[1]; idx++)
7251	if (! (p2[2 + idx] == 0
7252	\|\| (idx < (int) p1[4]
7253	&& ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7254	break;
7255
7256	if (idx == p2[1])
7257	{
7258	p[-3] = (unsigned char) pop_failure_jump;
7259	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7260	}
7261	}
7262	else if ((re_opcode_t) p1[3] == charset)
7263	{
7264	int idx;
7265	/* We win if the charset inside the loop
7266	has no overlap with the one after the loop. */
7267	for (idx = 0;
7268	idx < (int) p2[1] && idx < (int) p1[4];
7269	idx++)
7270	if ((p2[2 + idx] & p1[5 + idx]) != 0)
7271	break;
7272
7273	if (idx == p2[1] \|\| idx == p1[4])
7274	{
7275	p[-3] = (unsigned char) pop_failure_jump;
7276	DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7277	}
7278	}
7279	}
7280	#endif /* not WCHAR */
7281	}
7282	p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */
7283	if ((re_opcode_t) p[-1] != pop_failure_jump)
7284	{
7285	p[-1] = (UCHAR_T) jump;
7286	DEBUG_PRINT1 (" Match => jump.\n");
7287	goto unconditional_jump;
7288	}
7289	/* Note fall through. */
7290
7291
7292	/* The end of a simple repeat has a pop_failure_jump back to
7293	its matching on_failure_jump, where the latter will push a
7294	failure point. The pop_failure_jump takes off failure
7295	points put on by this pop_failure_jump's matching
7296	on_failure_jump; we got through the pattern to here from the
7297	matching on_failure_jump, so didn't fail. */
7298	case pop_failure_jump:
7299	{
7300	/* We need to pass separate storage for the lowest and
7301	highest registers, even though we don't care about the
7302	actual values. Otherwise, we will restore only one
7303	register from the stack, since lowest will == highest in
7304	`pop_failure_point'. */
7305	active_reg_t dummy_low_reg, dummy_high_reg;
7306	UCHAR_T *pdummy = NULL;
7307	const CHAR_T *sdummy = NULL;
7308
7309	DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7310	POP_FAILURE_POINT (sdummy, pdummy,
7311	dummy_low_reg, dummy_high_reg,
7312	reg_dummy, reg_dummy, reg_info_dummy);
7313	}
7314	/* Note fall through. */
7315
7316	unconditional_jump:
7317	#ifdef _LIBC
7318	DEBUG_PRINT2 ("\n%p: ", p);
7319	#else
7320	DEBUG_PRINT2 ("\n0x%x: ", p);
7321	#endif
7322	/* Note fall through. */
7323
7324	/* Unconditionally jump (without popping any failure points). */
7325	case jump:
7326	EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
7327	DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7328	p += mcnt; /* Do the jump. */
7329	#ifdef _LIBC
7330	DEBUG_PRINT2 ("(to %p).\n", p);
7331	#else
7332	DEBUG_PRINT2 ("(to 0x%x).\n", p);
7333	#endif
7334	break;
7335
7336
7337	/* We need this opcode so we can detect where alternatives end
7338	in `group_match_null_string_p' et al. */
7339	case jump_past_alt:
7340	DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7341	goto unconditional_jump;
7342
7343
7344	/* Normally, the on_failure_jump pushes a failure point, which
7345	then gets popped at pop_failure_jump. We will end up at
7346	pop_failure_jump, also, and with a pattern of, say, `a+', we
7347	are skipping over the on_failure_jump, so we have to push
7348	something meaningless for pop_failure_jump to pop. */
7349	case dummy_failure_jump:
7350	DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7351	/* It doesn't matter what we push for the string here. What
7352	the code at `fail' tests is the value for the pattern. */
7353	PUSH_FAILURE_POINT (NULL, NULL, -2);
7354	goto unconditional_jump;
7355
7356
7357	/* At the end of an alternative, we need to push a dummy failure
7358	point in case we are followed by a `pop_failure_jump', because
7359	we don't want the failure point for the alternative to be
7360	popped. For example, matching `(a\|ab)*' against `aab'
7361	requires that we match the `ab' alternative. */
7362	case push_dummy_failure:
7363	DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7364	/* See comments just above at `dummy_failure_jump' about the
7365	two zeroes. */
7366	PUSH_FAILURE_POINT (NULL, NULL, -2);
7367	break;
7368
7369	/* Have to succeed matching what follows at least n times.
7370	After that, handle like `on_failure_jump'. */
7371	case succeed_n:
7372	EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7373	DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7374
7375	assert (mcnt >= 0);
7376	/* Originally, this is how many times we HAVE to succeed. */
7377	if (mcnt > 0)
7378	{
7379	mcnt--;
7380	p += OFFSET_ADDRESS_SIZE;
7381	STORE_NUMBER_AND_INCR (p, mcnt);
7382	#ifdef _LIBC
7383	DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7384	, mcnt);
7385	#else
7386	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7387	, mcnt);
7388	#endif
7389	}
7390	else if (mcnt == 0)
7391	{
7392	#ifdef _LIBC
7393	DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
7394	p + OFFSET_ADDRESS_SIZE);
7395	#else
7396	DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
7397	p + OFFSET_ADDRESS_SIZE);
7398	#endif /* _LIBC */
7399
7400	#ifdef WCHAR
7401	p[1] = (UCHAR_T) no_op;
7402	#else
7403	p[2] = (UCHAR_T) no_op;
7404	p[3] = (UCHAR_T) no_op;
7405	#endif /* WCHAR */
7406	goto on_failure;
7407	}
7408	break;
7409
7410	case jump_n:
7411	EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7412	DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7413
7414	/* Originally, this is how many times we CAN jump. */
7415	if (mcnt)
7416	{
7417	mcnt--;
7418	STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7419
7420	#ifdef _LIBC
7421	DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7422	mcnt);
7423	#else
7424	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7425	mcnt);
7426	#endif /* _LIBC */
7427	goto unconditional_jump;
7428	}
7429	/* If don't have to jump any more, skip over the rest of command. */
7430	else
7431	p += 2 * OFFSET_ADDRESS_SIZE;
7432	break;
7433
7434	case set_number_at:
7435	{
7436	DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7437
7438	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7439	p1 = p + mcnt;
7440	EXTRACT_NUMBER_AND_INCR (mcnt, p);
7441	#ifdef _LIBC
7442	DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
7443	#else
7444	DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
7445	#endif
7446	STORE_NUMBER (p1, mcnt);
7447	break;
7448	}
7449
7450	#if 0
7451	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7452	test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
7453	AT_WORD_BOUNDARY, so this code is disabled. Expanding the
7454	macro and introducing temporary variables works around the bug. */
7455
7456	case wordbound:
7457	DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7458	if (AT_WORD_BOUNDARY (d))
7459	break;
7460	goto fail;
7461
7462	case notwordbound:
7463	DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7464	if (AT_WORD_BOUNDARY (d))
7465	goto fail;
7466	break;
7467	#else
7468	case wordbound:
7469	{
7470	boolean prevchar, thischar;
7471
7472	DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7473	if (AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d))
7474	break;
7475
7476	prevchar = WORDCHAR_P (d - 1);
7477	thischar = WORDCHAR_P (d);
7478	if (prevchar != thischar)
7479	break;
7480	goto fail;
7481	}
7482
7483	case notwordbound:
7484	{
7485	boolean prevchar, thischar;
7486
7487	DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7488	if (AT_STRINGS_BEG (d) \|\| AT_STRINGS_END (d))
7489	goto fail;
7490
7491	prevchar = WORDCHAR_P (d - 1);
7492	thischar = WORDCHAR_P (d);
7493	if (prevchar != thischar)
7494	goto fail;
7495	break;
7496	}
7497	#endif
7498
7499	case wordbeg:
7500	DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7501	if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7502	&& (AT_STRINGS_BEG (d) \|\| !WORDCHAR_P (d - 1)))
7503	break;
7504	goto fail;
7505
7506	case wordend:
7507	DEBUG_PRINT1 ("EXECUTING wordend.\n");
7508	if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7509	&& (AT_STRINGS_END (d) \|\| !WORDCHAR_P (d)))
7510	break;
7511	goto fail;
7512
7513	#ifdef emacs
7514	case before_dot:
7515	DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7516	if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7517	goto fail;
7518	break;
7519
7520	case at_dot:
7521	DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7522	if (PTR_CHAR_POS ((unsigned char *) d) != point)
7523	goto fail;
7524	break;
7525
7526	case after_dot:
7527	DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7528	if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7529	goto fail;
7530	break;
7531
7532	case syntaxspec:
7533	DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7534	mcnt = *p++;
7535	goto matchsyntax;
7536
7537	case wordchar:
7538	DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7539	mcnt = (int) Sword;
7540	matchsyntax:
7541	PREFETCH ();
7542	/* Can't use d++ here; SYNTAX may be an unsafe macro. /
7543	d++;
7544	if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7545	goto fail;
7546	SET_REGS_MATCHED ();
7547	break;
7548
7549	case notsyntaxspec:
7550	DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7551	mcnt = *p++;
7552	goto matchnotsyntax;
7553
7554	case notwordchar:
7555	DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7556	mcnt = (int) Sword;
7557	matchnotsyntax:
7558	PREFETCH ();
7559	/* Can't use d++ here; SYNTAX may be an unsafe macro. /
7560	d++;
7561	if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7562	goto fail;
7563	SET_REGS_MATCHED ();
7564	break;
7565
7566	#else /* not emacs */
7567	case wordchar:
7568	DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7569	PREFETCH ();
7570	if (!WORDCHAR_P (d))
7571	goto fail;
7572	SET_REGS_MATCHED ();
7573	d++;
7574	break;
7575
7576	case notwordchar:
7577	DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7578	PREFETCH ();
7579	if (WORDCHAR_P (d))
7580	goto fail;
7581	SET_REGS_MATCHED ();
7582	d++;
7583	break;
7584	#endif /* not emacs */
7585
7586	default:
7587	abort ();
7588	}
7589	continue; /* Successfully executed one pattern command; keep going. */
7590
7591
7592	/* We goto here if a matching operation fails. */
7593	fail:
7594	if (!FAIL_STACK_EMPTY ())
7595	{ /* A restart point is known. Restore to that state. */
7596	DEBUG_PRINT1 ("\nFAIL:\n");
7597	POP_FAILURE_POINT (d, p,
7598	lowest_active_reg, highest_active_reg,
7599	regstart, regend, reg_info);
7600
7601	/* If this failure point is a dummy, try the next one. */
7602	if (!p)
7603	goto fail;
7604
7605	/* If we failed to the end of the pattern, don't examine p. /
7606	assert (p <= pend);
7607	if (p < pend)
7608	{
7609	boolean is_a_jump_n = false;
7610
7611	/* If failed to a backwards jump that's part of a repetition
7612	loop, need to pop this failure point and use the next one. */
7613	switch ((re_opcode_t) *p)
7614	{
7615	case jump_n:
7616	is_a_jump_n = true;
7617	case maybe_pop_jump:
7618	case pop_failure_jump:
7619	case jump:
7620	p1 = p + 1;
7621	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7622	p1 += mcnt;
7623
7624	if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7625	\|\| (!is_a_jump_n
7626	&& (re_opcode_t) *p1 == on_failure_jump))
7627	goto fail;
7628	break;
7629	default:
7630	/* do nothing */ ;
7631	}
7632	}
7633
7634	if (d >= string1 && d <= end1)
7635	dend = end_match_1;
7636	}
7637	else
7638	break; /* Matching at this starting point really fails. */
7639	} /* for (;;) */
7640
7641	if (best_regs_set)
7642	goto restore_best_regs;
7643
7644	FREE_VARIABLES ();
7645
7646	return -1; /* Failure to match. */
7647	} /* re_match_2 */
7648
7649
7650	/* Subroutine definitions for re_match_2. */
7651
7652
7653	/* We are passed P pointing to a register number after a start_memory.
7654
7655	Return true if the pattern up to the corresponding stop_memory can
7656	match the empty string, and false otherwise.
7657
7658	If we find the matching stop_memory, sets P to point to one past its number.
7659	Otherwise, sets P to an undefined byte less than or equal to END.
7660
7661	We don't handle duplicates properly (yet). */
7662
7663	static boolean
7664	PREFIX(group_match_null_string_p) (p, end, reg_info)
7665	UCHAR_T *p, end;
7666	PREFIX(register_info_type) *reg_info;
7667	{
7668	int mcnt;
7669	/* Point to after the args to the start_memory. */
7670	UCHAR_T p1 = p + 2;
7671
7672	while (p1 < end)
7673	{
7674	/* Skip over opcodes that can match nothing, and return true or
7675	false, as appropriate, when we get to one that can't, or to the
7676	matching stop_memory. */
7677
7678	switch ((re_opcode_t) *p1)
7679	{
7680	/* Could be either a loop or a series of alternatives. */
7681	case on_failure_jump:
7682	p1++;
7683	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7684
7685	/* If the next operation is not a jump backwards in the
7686	pattern. */
7687
7688	if (mcnt >= 0)
7689	{
7690	/* Go through the on_failure_jumps of the alternatives,
7691	seeing if any of the alternatives cannot match nothing.
7692	The last alternative starts with only a jump,
7693	whereas the rest start with on_failure_jump and end
7694	with a jump, e.g., here is the pattern for `a\|b\|c':
7695
7696	/on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7697	/on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7698	/exactn/1/c
7699
7700	So, we have to first go through the first (n-1)
7701	alternatives and then deal with the last one separately. */
7702
7703
7704	/* Deal with the first (n-1) alternatives, which start
7705	with an on_failure_jump (see above) that jumps to right
7706	past a jump_past_alt. */
7707
7708	while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7709	jump_past_alt)
7710	{
7711	/* `mcnt' holds how many bytes long the alternative
7712	is, including the ending `jump_past_alt' and
7713	its number. */
7714
7715	if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7716	(1 + OFFSET_ADDRESS_SIZE),
7717	reg_info))
7718	return false;
7719
7720	/* Move to right after this alternative, including the
7721	jump_past_alt. */
7722	p1 += mcnt;
7723
7724	/* Break if it's the beginning of an n-th alternative
7725	that doesn't begin with an on_failure_jump. */
7726	if ((re_opcode_t) *p1 != on_failure_jump)
7727	break;
7728
7729	/* Still have to check that it's not an n-th
7730	alternative that starts with an on_failure_jump. */
7731	p1++;
7732	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7733	if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7734	jump_past_alt)
7735	{
7736	/* Get to the beginning of the n-th alternative. */
7737	p1 -= 1 + OFFSET_ADDRESS_SIZE;
7738	break;
7739	}
7740	}
7741
7742	/* Deal with the last alternative: go back and get number
7743	of the `jump_past_alt' just before it. `mcnt' contains
7744	the length of the alternative. */
7745	EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7746
7747	if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7748	return false;
7749
7750	p1 += mcnt; /* Get past the n-th alternative. */
7751	} /* if mcnt > 0 */
7752	break;
7753
7754
7755	case stop_memory:
7756	assert (p1[1] == **p);
7757	*p = p1 + 2;
7758	return true;
7759
7760
7761	default:
7762	if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7763	return false;
7764	}
7765	} /* while p1 < end */
7766
7767	return false;
7768	} /* group_match_null_string_p */
7769
7770
7771	/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7772	It expects P to be the first byte of a single alternative and END one
7773	byte past the last. The alternative can contain groups. */
7774
7775	static boolean
7776	PREFIX(alt_match_null_string_p) (p, end, reg_info)
7777	UCHAR_T p, end;
7778	PREFIX(register_info_type) *reg_info;
7779	{
7780	int mcnt;
7781	UCHAR_T *p1 = p;
7782
7783	while (p1 < end)
7784	{
7785	/* Skip over opcodes that can match nothing, and break when we get
7786	to one that can't. */
7787
7788	switch ((re_opcode_t) *p1)
7789	{
7790	/* It's a loop. */
7791	case on_failure_jump:
7792	p1++;
7793	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7794	p1 += mcnt;
7795	break;
7796
7797	default:
7798	if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7799	return false;
7800	}
7801	} /* while p1 < end */
7802
7803	return true;
7804	} /* alt_match_null_string_p */
7805
7806
7807	/* Deals with the ops common to group_match_null_string_p and
7808	alt_match_null_string_p.
7809
7810	Sets P to one after the op and its arguments, if any. */
7811
7812	static boolean
7813	PREFIX(common_op_match_null_string_p) (p, end, reg_info)
7814	UCHAR_T *p, end;
7815	PREFIX(register_info_type) *reg_info;
7816	{
7817	int mcnt;
7818	boolean ret;
7819	int reg_no;
7820	UCHAR_T p1 = p;
7821
7822	switch ((re_opcode_t) *p1++)
7823	{
7824	case no_op:
7825	case begline:
7826	case endline:
7827	case begbuf:
7828	case endbuf:
7829	case wordbeg:
7830	case wordend:
7831	case wordbound:
7832	case notwordbound:
7833	#ifdef emacs
7834	case before_dot:
7835	case at_dot:
7836	case after_dot:
7837	#endif
7838	break;
7839
7840	case start_memory:
7841	reg_no = *p1;
7842	assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7843	ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7844
7845	/* Have to set this here in case we're checking a group which
7846	contains a group and a back reference to it. */
7847
7848	if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7849	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7850
7851	if (!ret)
7852	return false;
7853	break;
7854
7855	/* If this is an optimized succeed_n for zero times, make the jump. */
7856	case jump:
7857	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7858	if (mcnt >= 0)
7859	p1 += mcnt;
7860	else
7861	return false;
7862	break;
7863
7864	case succeed_n:
7865	/* Get to the number of times to succeed. */
7866	p1 += OFFSET_ADDRESS_SIZE;
7867	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7868
7869	if (mcnt == 0)
7870	{
7871	p1 -= 2 * OFFSET_ADDRESS_SIZE;
7872	EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7873	p1 += mcnt;
7874	}
7875	else
7876	return false;
7877	break;
7878
7879	case duplicate:
7880	if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7881	return false;
7882	break;
7883
7884	case set_number_at:
7885	p1 += 2 * OFFSET_ADDRESS_SIZE;
7886
7887	default:
7888	/* All other opcodes mean we cannot match the empty string. */
7889	return false;
7890	}
7891
7892	*p = p1;
7893	return true;
7894	} /* common_op_match_null_string_p */
7895
7896
7897	/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7898	bytes; nonzero otherwise. */
7899
7900	static int
7901	PREFIX(bcmp_translate) (s1, s2, len, translate)
7902	const CHAR_T s1, s2;
7903	register int len;
7904	RE_TRANSLATE_TYPE translate;
7905	{
7906	register const UCHAR_T p1 = (const UCHAR_T ) s1;
7907	register const UCHAR_T p2 = (const UCHAR_T ) s2;
7908	while (len)
7909	{
7910	#ifdef WCHAR
7911	if (((p1<=0xff)?translate[p1++]:*p1++)
7912	!= ((p2<=0xff)?translate[p2++]:*p2++))
7913	return 1;
7914	#else /* BYTE */
7915	if (translate[p1++] != translate[p2++]) return 1;
7916	#endif /* WCHAR */
7917	len--;
7918	}
7919	return 0;
7920	}
7921
7922
7923
7924	#else /* not INSIDE_RECURSION */
7925
7926	/* Entry points for GNU code. */
7927
7928	/* re_compile_pattern is the GNU regular expression compiler: it
7929	compiles PATTERN (of length SIZE) and puts the result in BUFP.
7930	Returns 0 if the pattern was valid, otherwise an error string.
7931
7932	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7933	are set in BUFP on entry.
7934
7935	We call regex_compile to do the actual compilation. */
7936
7937	const char *
7938	re_compile_pattern (pattern, length, bufp)
7939	const char *pattern;
7940	size_t length;
7941	struct re_pattern_buffer *bufp;
7942	{
7943	reg_errcode_t ret;
7944
7945	/* GNU code is written to assume at least RE_NREGS registers will be set
7946	(and at least one extra will be -1). */
7947	bufp->regs_allocated = REGS_UNALLOCATED;
7948
7949	/* And GNU code determines whether or not to get register information
7950	by passing null for the REGS argument to re_match, etc., not by
7951	setting no_sub. */
7952	bufp->no_sub = 0;
7953
7954	/* Match anchors at newline. */
7955	bufp->newline_anchor = 1;
7956
7957	# ifdef MBS_SUPPORT
7958	if (MB_CUR_MAX != 1)
7959	ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
7960	else
7961	# endif
7962	ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
7963
7964	if (!ret)
7965	return NULL;
7966	return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
7967	}
7968	#ifdef _LIBC
7969	weak_alias (__re_compile_pattern, re_compile_pattern)
7970	#endif
7971
7972
7973	/* Entry points compatible with 4.2 BSD regex library. We don't define
7974	them unless specifically requested. */
7975
7976	#if defined _REGEX_RE_COMP \|\| defined _LIBC
7977
7978	/* BSD has one and only one pattern buffer. */
7979	static struct re_pattern_buffer re_comp_buf;
7980
7981	char *
7982	#ifdef _LIBC
7983	/* Make these definitions weak in libc, so POSIX programs can redefine
7984	these names if they don't use our functions, and still use
7985	regcomp/regexec below without link errors. */
7986	weak_function
7987	#endif
7988	re_comp (s)
7989	const char *s;
7990	{
7991	reg_errcode_t ret;
7992
7993	if (!s)
7994	{
7995	if (!re_comp_buf.buffer)
7996	return gettext ("No previous regular expression");
7997	return 0;
7998	}
7999
8000	if (!re_comp_buf.buffer)
8001	{
8002	re_comp_buf.buffer = (unsigned char *) malloc (200);
8003	if (re_comp_buf.buffer == NULL)
8004	return (char *) gettext (re_error_msgid
8005	+ re_error_msgid_idx[(int) REG_ESPACE]);
8006	re_comp_buf.allocated = 200;
8007
8008	re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
8009	if (re_comp_buf.fastmap == NULL)
8010	return (char *) gettext (re_error_msgid
8011	+ re_error_msgid_idx[(int) REG_ESPACE]);
8012	}
8013
8014	/* Since `re_exec' always passes NULL for the `regs' argument, we
8015	don't need to initialize the pattern buffer fields which affect it. */
8016
8017	/* Match anchors at newlines. */
8018	re_comp_buf.newline_anchor = 1;
8019
8020	# ifdef MBS_SUPPORT
8021	if (MB_CUR_MAX != 1)
8022	ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8023	else
8024	# endif
8025	ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8026
8027	if (!ret)
8028	return NULL;
8029
8030	/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
8031	return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8032	}
8033
8034
8035	int
8036	#ifdef _LIBC
8037	weak_function
8038	#endif
8039	re_exec (s)
8040	const char *s;
8041	{
8042	const int len = strlen (s);
8043	return
8044	0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
8045	}
8046
8047	#endif /* _REGEX_RE_COMP */
8048
8049
8050	/* POSIX.2 functions. Don't define these for Emacs. */
8051
8052	#ifndef emacs
8053
8054	/* regcomp takes a regular expression as a string and compiles it.
8055
8056	PREG is a regex_t *. We do not expect any fields to be initialized,
8057	since POSIX says we shouldn't. Thus, we set
8058
8059	`buffer' to the compiled pattern;
8060	`used' to the length of the compiled pattern;
8061	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
8062	REG_EXTENDED bit in CFLAGS is set; otherwise, to
8063	RE_SYNTAX_POSIX_BASIC;
8064	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
8065	`fastmap' to an allocated space for the fastmap;
8066	`fastmap_accurate' to zero;
8067	`re_nsub' to the number of subexpressions in PATTERN.
8068
8069	PATTERN is the address of the pattern string.
8070
8071	CFLAGS is a series of bits which affect compilation.
8072
8073	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
8074	use POSIX basic syntax.
8075
8076	If REG_NEWLINE is set, then . and [^...] don't match newline.
8077	Also, regexec will try a match beginning after every newline.
8078
8079	If REG_ICASE is set, then we considers upper- and lowercase
8080	versions of letters to be equivalent when matching.
8081
8082	If REG_NOSUB is set, then when PREG is passed to regexec, that
8083	routine will report only success or failure, and nothing about the
8084	registers.
8085
8086	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
8087	the return codes and their meanings.) */
8088
8089	int
8090	regcomp (preg, pattern, cflags)
8091	regex_t *preg;
8092	const char *pattern;
8093	int cflags;
8094	{
8095	reg_errcode_t ret;
8096	reg_syntax_t syntax
8097	= (cflags & REG_EXTENDED) ?
8098	RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
8099
8100	/* regex_compile will allocate the space for the compiled pattern. */
8101	preg->buffer = 0;
8102	preg->allocated = 0;
8103	preg->used = 0;
8104
8105	/* Try to allocate space for the fastmap. */
8106	preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
8107
8108	if (cflags & REG_ICASE)
8109	{
8110	unsigned i;
8111
8112	preg->translate
8113	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
8114	* sizeof (*(RE_TRANSLATE_TYPE)0));
8115	if (preg->translate == NULL)
8116	return (int) REG_ESPACE;
8117
8118	/* Map uppercase characters to corresponding lowercase ones. */
8119	for (i = 0; i < CHAR_SET_SIZE; i++)
8120	preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
8121	}
8122	else
8123	preg->translate = NULL;
8124
8125	/* If REG_NEWLINE is set, newlines are treated differently. */
8126	if (cflags & REG_NEWLINE)
8127	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
8128	syntax &= ~RE_DOT_NEWLINE;
8129	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
8130	/* It also changes the matching behavior. */
8131	preg->newline_anchor = 1;
8132	}
8133	else
8134	preg->newline_anchor = 0;
8135
8136	preg->no_sub = !!(cflags & REG_NOSUB);
8137
8138	/* POSIX says a null character in the pattern terminates it, so we
8139	can use strlen here in compiling the pattern. */
8140	# ifdef MBS_SUPPORT
8141	if (MB_CUR_MAX != 1)
8142	ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
8143	else
8144	# endif
8145	ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
8146
8147	/* POSIX doesn't distinguish between an unmatched open-group and an
8148	unmatched close-group: both are REG_EPAREN. */
8149	if (ret == REG_ERPAREN) ret = REG_EPAREN;
8150
8151	if (ret == REG_NOERROR && preg->fastmap)
8152	{
8153	/* Compute the fastmap now, since regexec cannot modify the pattern
8154	buffer. */
8155	if (re_compile_fastmap (preg) == -2)
8156	{
8157	/* Some error occurred while computing the fastmap, just forget
8158	about it. */
8159	free (preg->fastmap);
8160	preg->fastmap = NULL;
8161	}
8162	}
8163
8164	return (int) ret;
8165	}
8166	#ifdef _LIBC
8167	weak_alias (__regcomp, regcomp)
8168	#endif
8169
8170
8171	/* regexec searches for a given pattern, specified by PREG, in the
8172	string STRING.
8173
8174	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8175	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
8176	least NMATCH elements, and we set them to the offsets of the
8177	corresponding matched substrings.
8178
8179	EFLAGS specifies `execution flags' which affect matching: if
8180	REG_NOTBOL is set, then ^ does not match at the beginning of the
8181	string; if REG_NOTEOL is set, then $ does not match at the end.
8182
8183	We return 0 if we find a match and REG_NOMATCH if not. */
8184
8185	int
8186	regexec (preg, string, nmatch, pmatch, eflags)
8187	const regex_t *preg;
8188	const char *string;
8189	size_t nmatch;
8190	regmatch_t pmatch[];
8191	int eflags;
8192	{
8193	int ret;
8194	struct re_registers regs;
8195	regex_t private_preg;
8196	int len = strlen (string);
8197	boolean want_reg_info = !preg->no_sub && nmatch > 0;
8198
8199	private_preg = *preg;
8200
8201	private_preg.not_bol = !!(eflags & REG_NOTBOL);
8202	private_preg.not_eol = !!(eflags & REG_NOTEOL);
8203
8204	/* The user has told us exactly how many registers to return
8205	information about, via `nmatch'. We have to pass that on to the
8206	matching routines. */
8207	private_preg.regs_allocated = REGS_FIXED;
8208
8209	if (want_reg_info)
8210	{
8211	regs.num_regs = nmatch;
8212	regs.start = TALLOC (nmatch * 2, regoff_t);
8213	if (regs.start == NULL)
8214	return (int) REG_NOMATCH;
8215	regs.end = regs.start + nmatch;
8216	}
8217
8218	/* Perform the searching operation. */
8219	ret = re_search (&private_preg, string, len,
8220	/* start: / 0, / range: */ len,
8221	want_reg_info ? &regs : (struct re_registers *) 0);
8222
8223	/* Copy the register information to the POSIX structure. */
8224	if (want_reg_info)
8225	{
8226	if (ret >= 0)
8227	{
8228	unsigned r;
8229
8230	for (r = 0; r < nmatch; r++)
8231	{
8232	pmatch[r].rm_so = regs.start[r];
8233	pmatch[r].rm_eo = regs.end[r];
8234	}
8235	}
8236
8237	/* If we needed the temporary register info, free the space now. */
8238	free (regs.start);
8239	}
8240
8241	/* We want zero return to mean success, unlike `re_search'. */
8242	return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8243	}
8244	#ifdef _LIBC
8245	weak_alias (__regexec, regexec)
8246	#endif
8247
8248
8249	/* Returns a message corresponding to an error code, ERRCODE, returned
8250	from either regcomp or regexec. We don't use PREG here. */
8251
8252	size_t
8253	regerror (errcode, preg, errbuf, errbuf_size)
8254	int errcode;
8255	const regex_t *preg;
8256	char *errbuf;
8257	size_t errbuf_size;
8258	{
8259	const char *msg;
8260	size_t msg_size;
8261
8262	if (errcode < 0
8263	\|\| errcode >= (int) (sizeof (re_error_msgid_idx)
8264	/ sizeof (re_error_msgid_idx[0])))
8265	/* Only error codes returned by the rest of the code should be passed
8266	to this routine. If we are given anything else, or if other regex
8267	code generates an invalid error code, then the program has a bug.
8268	Dump core so we can fix it. */
8269	abort ();
8270
8271	msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
8272
8273	msg_size = strlen (msg) + 1; /* Includes the null. */
8274
8275	if (errbuf_size != 0)
8276	{
8277	if (msg_size > errbuf_size)
8278	{
8279	#if defined HAVE_MEMPCPY \|\| defined _LIBC
8280	((char ) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8281	#else
8282	memcpy (errbuf, msg, errbuf_size - 1);
8283	errbuf[errbuf_size - 1] = 0;
8284	#endif
8285	}
8286	else
8287	memcpy (errbuf, msg, msg_size);
8288	}
8289
8290	return msg_size;
8291	}
8292	#ifdef _LIBC
8293	weak_alias (__regerror, regerror)
8294	#endif
8295
8296
8297	/* Free dynamically allocated space used by PREG. */
8298
8299	void
8300	regfree (preg)
8301	regex_t *preg;
8302	{
8303	if (preg->buffer != NULL)
8304	free (preg->buffer);
8305	preg->buffer = NULL;
8306
8307	preg->allocated = 0;
8308	preg->used = 0;
8309
8310	if (preg->fastmap != NULL)
8311	free (preg->fastmap);
8312	preg->fastmap = NULL;
8313	preg->fastmap_accurate = 0;
8314
8315	if (preg->translate != NULL)
8316	free (preg->translate);
8317	preg->translate = NULL;
8318	}
8319	#ifdef _LIBC
8320	weak_alias (__regfree, regfree)
8321	#endif
8322
8323	#endif /* not emacs */
8324
8325	#endif /* not INSIDE_RECURSION */
8326
8327
8328
8329	#undef STORE_NUMBER
8330	#undef STORE_NUMBER_AND_INCR
8331	#undef EXTRACT_NUMBER
8332	#undef EXTRACT_NUMBER_AND_INCR
8333
8334	#undef DEBUG_PRINT_COMPILED_PATTERN
8335	#undef DEBUG_PRINT_DOUBLE_STRING
8336
8337	#undef INIT_FAIL_STACK
8338	#undef RESET_FAIL_STACK
8339	#undef DOUBLE_FAIL_STACK
8340	#undef PUSH_PATTERN_OP
8341	#undef PUSH_FAILURE_POINTER
8342	#undef PUSH_FAILURE_INT
8343	#undef PUSH_FAILURE_ELT
8344	#undef POP_FAILURE_POINTER
8345	#undef POP_FAILURE_INT
8346	#undef POP_FAILURE_ELT
8347	#undef DEBUG_PUSH
8348	#undef DEBUG_POP
8349	#undef PUSH_FAILURE_POINT
8350	#undef POP_FAILURE_POINT
8351
8352	#undef REG_UNSET_VALUE
8353	#undef REG_UNSET
8354
8355	#undef PATFETCH
8356	#undef PATFETCH_RAW
8357	#undef PATUNFETCH
8358	#undef TRANSLATE
8359
8360	#undef INIT_BUF_SIZE
8361	#undef GET_BUFFER_SPACE
8362	#undef BUF_PUSH
8363	#undef BUF_PUSH_2
8364	#undef BUF_PUSH_3
8365	#undef STORE_JUMP
8366	#undef STORE_JUMP2
8367	#undef INSERT_JUMP
8368	#undef INSERT_JUMP2
8369	#undef EXTEND_BUFFER
8370	#undef GET_UNSIGNED_NUMBER
8371	#undef FREE_STACK_RETURN
8372
8373	# undef POINTER_TO_OFFSET
8374	# undef MATCHING_IN_FRST_STRING
8375	# undef PREFETCH
8376	# undef AT_STRINGS_BEG
8377	# undef AT_STRINGS_END
8378	# undef WORDCHAR_P
8379	# undef FREE_VAR
8380	# undef FREE_VARIABLES
8381	# undef NO_HIGHEST_ACTIVE_REG
8382	# undef NO_LOWEST_ACTIVE_REG
8383
8384	# undef CHAR_T
8385	# undef UCHAR_T
8386	# undef COMPILED_BUFFER_VAR
8387	# undef OFFSET_ADDRESS_SIZE
8388	# undef CHAR_CLASS_SIZE
8389	# undef PREFIX
8390	# undef ARG_PREFIX
8391	# undef PUT_CHAR
8392	# undef BYTE
8393	# undef WCHAR
8394
8395	# define DEFINED_ONCE

Note: See TracBrowser for help on using the repository browser.

Download in other formats: