Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

regexp.c

Visit:

Last change on this file was 238, checked in by umoeller, 23 years ago
Misc fixes.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 78.0 KB

Rev	Line
[155]	1
	2	/*
	3	*@@sourcefile regexp.c:
	4	* extended regular expressions, taken from "Andy's Extended
	5	* Regular Expressions", as written by Andy Key
	6	* (nyangau@interalpha.co.uk) and released into the public
	7	* domain, plus adjustments for the XWP helpers.
	8	*
	9	* Usage: All C programs; not OS/2-specific.
	10	*
	11	* Function prefixes:
	12	* -- rxp* regular expression functions.
	13	*
	14	* Regular expression matching is done in the following stages:
	15	*
	16	* 1) Call rxpCompile to parse the expression into a recursive
	17	* tree of matches.
	18	*
	19	* This tree is converted into a finite state machine (FSM).
	20	* A second FSM is built from the first but with epsilon
	21	* moves removed to elimate lockups and increase speed.
	22	*
	23	* The input string can be used to drive the FSM through all
	24	* the possible routes. The largest (or smallest) amount of
	25	* input string required to reach the finish state is recorded
	26	* since an extended regular expression is deemed to match as
	27	* much input string as possible.
	28	*
	29	* 2) Call one of rxpMatch, rxpMatch_fwd, or rxpMatch_bwd to
	30	* perform a match.
	31	*
	32	* 3) Call rxpFree to free the compiled ERE.
	33	*
	34	* Beware: The matching routine is highly recursive and can
	35	* require around 20 to 30 bytes per character in the source string
	36	* to match it. Thus you should try to limit the length of the source
	37	* string and/or allow a stack size of at least 20 (to 30) * max string
	38	* length. In addition, add 2KB to allow for the use of several nested
	39	* sub-expressions in the matching.
	40	*
	41	*@@header "helpers\regexp.h"
	42	*@@added V0.9.19 (2002-04-17) [umoeller]
	43	*/
	44
	45	/*
	46	* Copyright (C) 2002 Ulrich Mller.
	47	* This file is part of the "XWorkplace helpers" source package.
	48	* This is free software; you can redistribute it and/or modify
	49	* it under the terms of the GNU General Public License as published
	50	* by the Free Software Foundation, in version 2 as it comes in the
	51	* "COPYING" file of the XWorkplace main distribution.
	52	* This program is distributed in the hope that it will be useful,
	53	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	54	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	55	* GNU General Public License for more details.
	56	*/
	57
	58	#include <stdio.h>
	59	#include <ctype.h>
	60	#include <stdlib.h>
	61	#include <string.h>
	62	#include <malloc.h>
	63	#include <memory.h>
[222]	64
	65	#include "setup.h" // code generation and debugging options
	66
[155]	67	#define ERE_C
	68	#include "helpers\regexp.h"
	69
	70	#define isword(c) (isalnum(c)\|\|(c)=='_')
	71
[222]	72	STATIC int val_of_hex(char c)
[155]	73	{
	74	if (c >= '0' && c <= '9')
	75	return c - '0';
	76	if (c >= 'a' && c <= 'f')
	77	return c - 'a' + 10;
	78	if (c >= 'A' && c <= 'F')
	79	return c - 'A' + 10;
	80	return 0; // Shouldn't get here
	81	}
	82
[222]	83	STATIC int escaped(const char *s)
[155]	84	{
	85	if (s[0] == 'x' && isxdigit(s[1]))
	86	// \x followed by 1 or 2 hex digits
	87	{
	88	int n = val_of_hex(s[1]);
	89
	90	if (isxdigit(s[2]))
	91	n = n * 16 + val_of_hex(s[2]);
	92	return n;
	93	}
	94	else
	95	switch (s[0])
	96	{
	97	case 'n':
	98	return '\n';
	99	case 't':
	100	return '\t';
	101	case 'r':
	102	return '\r';
	103	case 'b':
	104	return '\b';
	105	case 'f':
	106	return '\f';
	107	case 'e':
	108	return 0x1b;
	109	default:
	110	return (unsigned char)s[0];
	111	}
	112	}
	113
[222]	114	STATIC const char past_escaped(const char s)
[155]	115	{
	116	if (s[0] == 'x' && isxdigit(s[1]))
	117	return isxdigit(s[2]) ? s + 3 : s + 2;
	118	else
	119	return s + 1;
	120	}
	121
	122	#define zero_cclass(cclass) memset(cclass, 0, 0x100 >> 3)
	123
[222]	124	STATIC unsigned char bits[] =
[155]	125	{0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
	126
	127	#define add_to_cclass(n, cclass) (cclass[(unsigned char)(n)>>3] \|= bits[(unsigned char)(n) & 7])
	128
	129	#define remove_from_cclass(n, cclass) (cclass[(unsigned char)(n)>>3] &= ~bits[(unsigned char)(n) & 7])
	130
[222]	131	STATIC void invert_cclass(unsigned char *cclass)
[155]	132	{
	133	int i;
	134
	135	for (i = 0; i < (0x100 >> 3); i++)
	136	cclass[i] ^= 0xff;
	137	}
	138
	139	#define match_cclass(n, cclass) ((cclass[(unsigned char)(n)>>3] & bits[(unsigned char)(n) & 7]) != 0)
	140
	141	#define CHL_EOS 0
	142	#define CHL_END_CCLASS (-1)
	143	#define CHL_COMP (-2)
	144	#define CHL_RANGE (-3)
	145	#define CHL_POSIX_COLLATING (-4)
	146	#define CHL_POSIX_EQUIVALENCE (-5)
	147	#define CHL_POSIX_CCLASS_BAD (-6)
	148	#define CHL_POSIX_CCLASS_BASE (-30)
	149	#define CHL_POSIX_CCLASS_END (-30+20)
	150
	151	/* I use my own wrapper functions so I can take their addresses.
	152	* Remember, isalnum etc. can be implemented as macros... */
[222]	153	STATIC BOOLEAN my_isalnum(int ch)
[155]	154	{
	155	return isalnum(ch);
	156	}
[222]	157	STATIC BOOLEAN my_isalpha(int ch)
[155]	158	{
	159	return isalpha(ch);
	160	}
[222]	161	STATIC BOOLEAN my_isblank(int ch)
[155]	162	{
	163	return ch == ' ' \|\| ch == '\t';
	164	}
[222]	165	STATIC BOOLEAN my_iscntrl(int ch)
[155]	166	{
	167	return iscntrl(ch);
	168	}
[222]	169	STATIC BOOLEAN my_isdigit(int ch)
[155]	170	{
	171	return isdigit(ch);
	172	}
[222]	173	STATIC BOOLEAN my_islower(int ch)
[155]	174	{
	175	return islower(ch);
	176	}
[222]	177	STATIC BOOLEAN my_isprint(int ch)
[155]	178	{
	179	return isprint(ch);
	180	}
[222]	181	STATIC BOOLEAN my_ispunct(int ch)
[155]	182	{
	183	return ispunct(ch);
	184	}
[222]	185	STATIC BOOLEAN my_isspace(int ch)
[155]	186	{
	187	return isspace(ch);
	188	}
[222]	189	STATIC BOOLEAN my_isupper(int ch)
[155]	190	{
	191	return isupper(ch);
	192	}
[222]	193	STATIC BOOLEAN my_isxdigit(int ch)
[155]	194	{
	195	return isxdigit(ch);
	196	}
	197
	198	typedef struct
	199	{
	200	int len_name;
	201	const char *name;
	202	BOOLEAN(*iscclass) (int ch);
	203	}
	204	POSIX_CCLASS;
	205
[222]	206	STATIC POSIX_CCLASS posix_cclass[] =
[155]	207	{
	208	5, "alnum", my_isalnum,
	209	5, "alpha", my_isalpha,
	210	5, "blank", my_isblank,
	211	5, "cntrl", my_iscntrl,
	212	5, "digit", my_isdigit,
	213	5, "lower", my_islower,
	214	5, "print", my_isprint,
	215	5, "punct", my_ispunct,
	216	5, "space", my_isspace,
	217	5, "upper", my_isupper,
	218	6, "xdigit", my_isxdigit,
	219	};
	220
[222]	221	STATIC int find_posix(const char *str)
[155]	222	{
	223	int p;
	224
	225	if (str[0] != '[')
	226	return str[0];
	227	if (str[1] == '.')
	228	return CHL_POSIX_COLLATING;
	229	if (str[1] == '=')
	230	return CHL_POSIX_EQUIVALENCE;
	231	if (str[1] != ':')
	232	return str[0];
	233	for (p = 0; p < sizeof(posix_cclass) / sizeof(posix_cclass[0]); p++)
	234	if (!memcmp(str + 2, posix_cclass[p].name, posix_cclass[p].len_name) &&
	235	str[2 + posix_cclass[p].len_name] == ':' &&
	236	str[2 + posix_cclass[p].len_name + 1] == ']')
	237	return CHL_POSIX_CCLASS_BASE + p;
	238	return CHL_POSIX_CCLASS_BAD;
	239	}
	240
[222]	241	STATIC int cclass_thisch(const char *str)
[155]	242	{
	243	switch (str[0])
	244	{
	245	case '\\':
	246	return escaped(str + 1);
	247	case ']':
	248	return CHL_END_CCLASS;
	249	case '^':
	250	return CHL_COMP;
	251	case '-':
	252	return CHL_RANGE;
	253	case '\0':
	254	return CHL_EOS;
	255	default:
	256	return find_posix(str);
	257	}
	258	}
	259
[222]	260	STATIC const char cclass_nextch(const char str)
[155]	261	{
	262	int p;
	263
	264	if (str[0] == '\\')
	265	return past_escaped(str + 1);
	266	p = find_posix(str);
	267	if (CHL_POSIX_CCLASS_BASE <= p && p < CHL_POSIX_CCLASS_END)
	268	return str + 2 + posix_cclass[p - CHL_POSIX_CCLASS_BASE].len_name + 2;
	269	else
	270	return str + 1;
	271	}
	272
[222]	273	STATIC unsigned char compile_cclass(const char str,
[155]	274	const char **str_after,
	275	int erecf,
	276	int *rc)
	277	{
	278	unsigned char *cclass;
	279	BOOLEAN complement;
	280	int i, c, last_c = -1;
	281
	282	if ((cclass = (unsigned char *)malloc(0x100 >> 3)) == NULL)
	283	{
	284	*rc = ERROR_NOT_ENOUGH_MEMORY;
	285	return NULL;
	286	}
	287	zero_cclass(cclass);
	288
	289	complement = (cclass_thisch(str) == CHL_COMP);
	290	if (complement)
	291	str = cclass_nextch(str);
	292
	293	while ((c = cclass_thisch(str)) != CHL_EOS && c != CHL_END_CCLASS)
	294	{
	295	if (CHL_POSIX_CCLASS_BASE <= c && c < CHL_POSIX_CCLASS_END)
	296	{
	297	for (i = 0; i < 0x100; i++)
	298	if (posix_cclass[c - CHL_POSIX_CCLASS_BASE].iscclass(i))
	299	add_to_cclass(i, cclass);
	300	last_c = -1;
	301	}
	302	else
	303	switch (c)
	304	{
	305	/...sCHL_POSIX_\42\ \45\ error cases:32: /
	306	case CHL_POSIX_COLLATING:
	307	free(cclass);
	308	*rc = EREE_POSIX_COLLATING;
	309	return NULL;
	310	case CHL_POSIX_EQUIVALENCE:
	311	free(cclass);
	312	*rc = EREE_POSIX_EQUIVALENCE;
	313	return NULL;
	314	case CHL_POSIX_CCLASS_BAD:
	315	free(cclass);
	316	*rc = EREE_POSIX_CCLASS_BAD;
	317	return NULL;
	318	/...sCHL_RANGE \45\ range:32: /
	319	case CHL_RANGE:
	320	if (last_c == -1)
	321	// Unexpected at this point
	322	{
	323	free(cclass);
	324	*rc = EREE_UNEX_RANGE;
	325	return NULL;
	326	}
	327	str = cclass_nextch(str);
	328	if ((c = cclass_thisch(str)) == CHL_EOS \|\| c == CHL_END_CCLASS)
	329	// Not followed by anything
	330	{
	331	free(cclass);
	332	*rc = EREE_UNF_RANGE;
	333	return NULL;
	334	}
	335	for (i = last_c + 1; i <= c; i++)
	336	add_to_cclass(i, cclass);
	337	last_c = c;
	338	break;
	339
	340	#pragma info(nogen) // do not warn here
	341
	342	/...sCHL_COMP \45\ complement:32: /
	343	case CHL_COMP:
	344	c = '^';
	345	// Fall through
	346	/...sdefault \45\ individual entry:32: /
	347	default:
	348	if (erecf & ERECF_TOLOWER)
	349	c = tolower(c);
	350	add_to_cclass(c, cclass);
	351	last_c = c;
	352	break;
	353
	354	#pragma info(restore)
	355
	356	}
	357	str = cclass_nextch(str);
	358	}
	359
	360	if (c == CHL_EOS)
	361	{
	362	free(cclass);
	363	*rc = EREE_UNF_CCLASS;
	364	return NULL;
	365	}
	366
	367	if (complement)
	368	invert_cclass(cclass);
	369
	370	remove_from_cclass(0, cclass);
	371
	372	*str_after = cclass_nextch(str);
	373	return cclass;
	374	}
	375
	376	#define delete_cclass(cclass) free(cclass)
	377
	378	/* A 'match' embodies all of regular expression functionality.
	379	* Matches can be defined in terms of each other. */
	380
	381	typedef unsigned char MTYPE;
	382
	383	#define MTYPE_NULL ((MTYPE) 0)
	384	#define MTYPE_CHAR ((MTYPE) 1)
	385	#define MTYPE_NCHAR ((MTYPE) 2)
	386	#define MTYPE_STRING ((MTYPE) 3)
	387	#define MTYPE_CCLASS ((MTYPE) 4)
	388	#define MTYPE_WORD ((MTYPE) 5)
	389	#define MTYPE_NWORD ((MTYPE) 6)
	390	#define MTYPE_DOT ((MTYPE) 7)
	391	#define MTYPE_QUERY ((MTYPE) 8)
	392	#define MTYPE_PLUS ((MTYPE) 9)
	393	#define MTYPE_STAR ((MTYPE) 10)
	394	#define MTYPE_CREP ((MTYPE) 11)
	395	#define MTYPE_OR ((MTYPE) 12)
	396	#define MTYPE_CAT ((MTYPE) 13)
	397	#define MTYPE_SUB ((MTYPE) 14)
	398	#define MTYPE_SOL ((MTYPE) 15)
	399	#define MTYPE_EOL ((MTYPE) 16)
	400	#define MTYPE_SOW ((MTYPE) 17)
	401	#define MTYPE_EOW ((MTYPE) 18)
	402	#define MTYPE_IW ((MTYPE) 19)
	403	#define MTYPE_EW ((MTYPE) 20)
	404	#define MTYPE_BACK ((MTYPE) 21)
	405
	406	typedef struct match_struct MATCH;
	407
	408	struct match_struct
	409	{
	410	MTYPE mtype;
	411	union
	412	{
	413	char character;
	414	unsigned char *cclass;
	415	MATCH *match;
	416	MATCH *matchs[2];
	417	int n_span;
	418	char *string;
	419	struct
	420	{
	421	unsigned m, n;
	422	MATCH *match;
	423	}
	424	crep;
	425	}
	426	u;
	427	};
	428
[222]	429	STATIC MATCH null_match =
[155]	430	{MTYPE_NULL};
	431
	432	#define NULL_MATCH (&null_match)
	433
[222]	434	STATIC void delete_match(MATCH * match)
[155]	435	{
	436	if (match == NULL_MATCH)
	437	return;
	438	switch (match->mtype)
	439	{
	440	case MTYPE_STRING:
	441	free(match->u.string);
	442	break;
	443	case MTYPE_CCLASS:
	444	delete_cclass(match->u.cclass);
	445	break;
	446	case MTYPE_QUERY:
	447	case MTYPE_PLUS:
	448	case MTYPE_STAR:
	449	delete_match(match->u.match);
	450	break;
	451	case MTYPE_CREP:
	452	delete_match(match->u.crep.match);
	453	break;
	454	case MTYPE_OR:
	455	case MTYPE_CAT:
	456	delete_match(match->u.matchs[0]);
	457	delete_match(match->u.matchs[1]);
	458	break;
	459	case MTYPE_SUB:
	460	delete_match(match->u.match);
	461	break;
	462	}
	463	free(match);
	464	}
	465
	466	/*
	467	*@@ shortest_match:
	468	* determines the shortest possible match length for a given match
	469	* tree. In the following example it is 3, thus allowing us to only
	470	* consider 4 positions for regular expression matching in a 6
	471	* character line.
	472	*
	473	+ aa*bc
	474	+
	475	+ 123456
	476	+ ---
	477	+ ---
	478	+ ---
	479	+ ---
	480	*/
	481
[222]	482	STATIC unsigned shortest_match(const MATCH * match)
[155]	483	{
	484	unsigned a, b;
	485
	486	switch (match->mtype)
	487	{
	488	case MTYPE_NULL:
	489	return 0;
	490	case MTYPE_CHAR:
	491	return 1;
	492	case MTYPE_NCHAR:
	493	return 1;
	494	case MTYPE_STRING:
	495	return (unsigned char)match->u.string[0];
	496	case MTYPE_CCLASS:
	497	return 1;
	498	case MTYPE_WORD:
	499	return 1;
	500	case MTYPE_NWORD:
	501	return 1;
	502	case MTYPE_DOT:
	503	return 1;
	504	case MTYPE_QUERY:
	505	return 0;
	506	case MTYPE_PLUS:
	507	return shortest_match(match->u.match);
	508	case MTYPE_STAR:
	509	return 0;
	510	case MTYPE_CREP:
	511	return match->u.crep.m *
	512	shortest_match(match->u.crep.match);
	513	case MTYPE_OR:
	514	a = shortest_match(match->u.matchs[0]);
	515	b = shortest_match(match->u.matchs[1]);
	516	return a <= b ? a : b;
	517	case MTYPE_CAT:
	518	return shortest_match(match->u.matchs[0]) +
	519	shortest_match(match->u.matchs[1]);
	520	case MTYPE_SUB:
	521	return shortest_match(match->u.match);
	522	case MTYPE_SOL:
	523	return 0;
	524	case MTYPE_EOL:
	525	return 0;
	526	case MTYPE_SOW:
	527	return 0;
	528	case MTYPE_EOW:
	529	return 0;
	530	case MTYPE_IW:
	531	return 0;
	532	case MTYPE_EW:
	533	return 0;
	534	case MTYPE_BACK:
	535	return 0;
	536	}
	537	return 0; // Should never happen
	538	}
	539
[222]	540	STATIC BOOLEAN got_backrefs(MATCH * match)
[155]	541	{
	542	switch (match->mtype)
	543	{
	544	case MTYPE_QUERY:
	545	case MTYPE_PLUS:
	546	case MTYPE_STAR:
	547	return got_backrefs(match->u.match);
	548	case MTYPE_CREP:
	549	return got_backrefs(match->u.crep.match);
	550	case MTYPE_OR:
	551	case MTYPE_CAT:
	552	return got_backrefs(match->u.matchs[0]) \|\|
	553	got_backrefs(match->u.matchs[1]);
	554	case MTYPE_BACK:
	555	return TRUE;
	556	}
	557	return FALSE;
	558	}
	559
[222]	560	STATIC MATCH remove_subs(MATCH match)
[155]	561	{
	562	switch (match->mtype)
	563	{
	564	case MTYPE_QUERY:
	565	case MTYPE_PLUS:
	566	case MTYPE_STAR:
	567	match->u.match = remove_subs(match->u.match);
	568	break;
	569	case MTYPE_CREP:
	570	match->u.crep.match = remove_subs(match->u.crep.match);
	571	break;
	572	case MTYPE_OR:
	573	case MTYPE_CAT:
	574	match->u.matchs[0] = remove_subs(match->u.matchs[0]);
	575	match->u.matchs[1] = remove_subs(match->u.matchs[1]);
	576	break;
	577	case MTYPE_SUB:
	578	{
	579	MATCH *m = match;
	580
	581	match = match->u.match;
	582	free(m);
	583	}
	584	break;
	585	}
	586	return match;
	587	}
	588
[222]	589	STATIC int count_sub(const MATCH * match)
[155]	590	{
	591	switch (match->mtype)
	592	{
	593	case MTYPE_SUB:
	594	return 1 + count_sub(match->u.match);
	595	case MTYPE_QUERY:
	596	case MTYPE_PLUS:
	597	case MTYPE_STAR:
	598	return count_sub(match->u.match);
	599	case MTYPE_CREP:
	600	return count_sub(match->u.crep.match);
	601	case MTYPE_OR:
	602	case MTYPE_CAT:
	603	return count_sub(match->u.matchs[0]) +
	604	count_sub(match->u.matchs[1]);
	605	}
	606	return 0;
	607	}
	608
	609	/...scompiling matches:0: /
	610	/*
	611	*
	612	* <term> ::= <character> \| <class> \| ( <match> )
	613	* <factor> ::= <term>? \| <term>+ \| <term>* \| <term>
	614	* <factors> ::= <factor> { <factor> }
	615	* <match> ::= <factors> { \|<factors> }
	616	*
	617	*/
	618
	619	/* We have an optimisation here in that if we can see a span of 'boring'
	620	* characters, we return them as a single entity. We mustn't do this for the
	621	* last in the span, as this may have modifiers applied to it, as in the
	622	* example string "abcd", where the d has the modifier, but abc do not.
	623	* We only do 255 chars at once, as the length is stored in a byte.
	624	* Reducing 255 levels of per-character recursive searching to 1 level with
	625	* a string compare has got to be a massive saving. */
	626
	627	#define STRING_MAX 255
	628
	629	#define CH_NOT_BASE 1000
	630	#define CH_NOT_END (1000+0x100)
	631	#define CH_EOS 0
	632	#define CH_DOT (-1)
	633	#define CH_WORD (-2)
	634	#define CH_NWORD (-3)
	635	#define CH_LSQR (-4)
	636	#define CH_RSQR (-5)
	637	#define CH_LPAR (-6)
	638	#define CH_RPAR (-7)
	639	#define CH_LCUR (-8)
	640	#define CH_RCUR (-9)
	641	#define CH_QUERY (-10)
	642	#define CH_PLUS (-11)
	643	#define CH_STAR (-12)
	644	#define CH_OR (-13)
	645	#define CH_SOL (-14)
	646	#define CH_EOL (-15)
	647	#define CH_SOW (-16)
	648	#define CH_EOW (-17)
	649	#define CH_IW (-18)
	650	#define CH_EW (-19)
	651	#define CH_BAD_TILDE (-20)
	652	#define CH_STRING_BASE (-1000)
	653	#define CH_STRING_END (-1000+STRING_MAX)
	654	#define CH_BACK_BASE (-2000)
	655	#define CH_BACK_END (-2000+9)
	656
[222]	657	STATIC int boring_string(const char *s)
[155]	658	{
	659	int n = 0;
	660
	661	while (*s &&
	662	strchr("\\.[](){}?+\|^$<>~", s) == NULL
	663	&& n < STRING_MAX)
	664	{
	665	s++;
	666	n++;
	667	}
	668	return n;
	669	}
	670
[222]	671	STATIC int thisch(const char *str)
[155]	672	{
	673	int n;
	674
	675	switch (str[0])
	676	{
	677	case '\\':
	678	if (str[1] >= '1' && str[1] <= '9')
	679	return CH_BACK_BASE + (str[1] - '1');
	680	switch (str[1])
	681	{
	682	case '`':
	683	return CH_SOL;
	684	case '\'':
	685	return CH_EOL;
	686	case '<':
	687	return CH_SOW;
	688	case '>':
	689	return CH_EOW;
	690	case 'w':
	691	return CH_WORD;
	692	case 'W':
	693	return CH_NWORD;
	694	case 'B':
	695	return CH_IW;
	696	case 'y':
	697	return CH_EW;
	698	default:
	699	return escaped(str + 1);
	700	}
	701
	702	case '~':
	703	if (str[1] == '\\')
	704	return CH_NOT_BASE + escaped(str + 2);
	705	else if (str[1] != '\0')
	706	return CH_NOT_BASE + (unsigned char)str[1];
	707	else
	708	return '~';
	709	case '.':
	710	return CH_DOT;
	711	case '[':
	712	return CH_LSQR;
	713	case ']':
	714	return CH_RSQR;
	715	case '(':
	716	return CH_LPAR;
	717	case ')':
	718	return CH_RPAR;
	719	case '{':
	720	return CH_LCUR;
	721	case '}':
	722	return CH_RCUR;
	723	case '?':
	724	return CH_QUERY;
	725	case '+':
	726	return CH_PLUS;
	727	case '*':
	728	return CH_STAR;
	729	case '\|':
	730	return CH_OR;
	731	case '^':
	732	return CH_SOL;
	733	case '$':
	734	return CH_EOL;
	735	}
	736
	737	n = boring_string(str);
	738	if (n < 3)
	739	return (unsigned char)str[0];
	740	else
	741	return CH_STRING_BASE + n - 1;
	742	}
	743
[222]	744	STATIC const char nextch(const char str)
[155]	745	{
	746	int n;
	747
	748	switch (str[0])
	749	{
	750	case '\\':
	751	if (str[1] >= '1' && str[1] <= '9')
	752	return str + 2;
	753	switch (str[1])
	754	{
	755	case '`':
	756	case '\'':
	757	case '<':
	758	case '>':
	759	case 'w':
	760	case 'W':
	761	case 'B':
	762	case 'y':
	763	return str + 2;
	764	default:
	765	return past_escaped(str + 1);
	766	}
	767
	768	case '~':
	769	if (str[1] == '\\')
	770	return past_escaped(str + 2);
	771	else if (str[1] != '\0')
	772	return str + 2;
	773	else
	774	return str + 1;
	775	case '.':
	776	case '[':
	777	case '(':
	778	case ')':
	779	case '{':
	780	case '}':
	781	case '?':
	782	case '+':
	783	case '*':
	784	case '\|':
	785	case '^':
	786	case '$':
	787	return str + 1;
	788	}
	789
	790	n = boring_string(str);
	791	if (n < 3)
	792	return str + 1;
	793	else
	794	return str + n - 1;
	795	}
	796
[222]	797	STATIC MATCH compile_match(const char str, const char *str_after, int erecf, int rc);
[155]	798
[222]	799	STATIC const char scan_number(const char str, unsigned *num)
[155]	800	{
	801	if (!isdigit(*str))
	802	return NULL;
	803	*num = 0;
	804	do
	805	(num) = (num) * 10 + (*str++ - '0');
	806	while (isdigit(*str));
	807	return str;
	808	}
	809
[222]	810	STATIC MATCH create_match(int rc)
[155]	811	{
	812	MATCH *match;
	813
	814	if ((match = (MATCH *) malloc(sizeof(MATCH))) == NULL)
	815	{
	816	*rc = ERROR_NOT_ENOUGH_MEMORY;
	817	return NULL;
	818	}
	819	return match;
	820	}
	821
[222]	822	STATIC MATCH compile_term(const char str, const char *str_after, int erecf, int rc)
[155]	823	{
	824	MATCH *match;
	825	int c;
	826
	827	c = thisch(str);
	828	switch (c)
	829	{
	830	case CH_RSQR:
	831	*rc = EREE_UNEX_RSQR;
	832	return NULL;
	833	case CH_QUERY:
	834	*rc = EREE_UNEX_QUERY;
	835	return NULL;
	836	case CH_PLUS:
	837	*rc = EREE_UNEX_PLUS;
	838	return NULL;
	839	case CH_STAR:
	840	*rc = EREE_UNEX_STAR;
	841	return NULL;
	842	case CH_LCUR:
	843	*rc = EREE_UNEX_LCUR;
	844	return NULL;
	845	case CH_RCUR:
	846	*rc = EREE_UNEX_RCUR;
	847	return NULL;
	848	}
	849
	850	if (c == CH_EOS \|\| c == CH_OR \|\| c == CH_RPAR)
	851	{
	852	*str_after = str;
	853	return NULL_MATCH;
	854	}
	855
	856	if ((match = create_match(rc)) == NULL)
	857	return NULL;
	858
	859	if (CH_NOT_BASE <= c && c < CH_NOT_END)
	860	/...snot a specific character:16: /
	861	{
	862	char ch = (char)(c - CH_NOT_BASE);
	863
	864	if (erecf & ERECF_TOLOWER)
	865	ch = (char)tolower(ch);
	866	match->mtype = MTYPE_NCHAR;
	867	match->u.character = ch;
	868	str = nextch(str);
	869	}
	870	else if (CH_STRING_BASE <= c && c < CH_STRING_END)
	871	/...sa string of non\45\special characters:16: /
	872	{
	873	unsigned len = c - CH_STRING_BASE;
	874
	875	match->mtype = MTYPE_STRING;
	876	match->u.string = (char *)malloc(1 + len);
	877	if (match->u.string == NULL)
	878	{
	879	free(match);
	880	*rc = ERROR_NOT_ENOUGH_MEMORY;
	881	return NULL;
	882	}
	883	match->u.string[0] = (char)len;
	884	memcpy(match->u.string + 1, str, len);
	885	if (erecf & ERECF_TOLOWER)
	886	{
	887	unsigned i;
	888
	889	for (i = 1; i <= len; i++)
	890	match->u.string[i] = (char)tolower(match->u.string[i]);
	891	}
	892	str = nextch(str);
	893	}
	894	else if (CH_BACK_BASE <= c && c < CH_BACK_END)
	895	/...sa backreference:16: /
	896	{
	897	match->mtype = MTYPE_BACK;
	898	match->u.n_span = c - CH_BACK_BASE;
	899	str = nextch(str);
	900	}
	901	else
	902	switch (c)
	903	{
	904	/...sCH_LSQR \45\ character class:24: /
	905	case CH_LSQR:
	906	match->mtype = MTYPE_CCLASS;
	907	str = nextch(str);
	908	if ((match->u.cclass = compile_cclass(str, &str, erecf, rc)) == NULL)
	909	{
	910	free(match);
	911	return NULL;
	912	}
	913	break;
	914	/...sCH_DOT \45\ any character:24: /
	915	case CH_DOT:
	916	match->mtype = MTYPE_DOT;
	917	str = nextch(str);
	918	break;
	919	/...sCH_WORD \45\ \92\w:24: /
	920	case CH_WORD:
	921	match->mtype = MTYPE_WORD;
	922	str = nextch(str);
	923	break;
	924	/...sCH_NWORD \45\ \92\W:24: /
	925	case CH_NWORD:
	926	match->mtype = MTYPE_NWORD;
	927	str = nextch(str);
	928	break;
	929	/...sCH_LPAR \45\ nested regular expression:24: /
	930	case CH_LPAR:
	931	{
	932	MATCH *sub_match;
	933
	934	str = nextch(str);
	935	if ((sub_match = compile_match(str, &str, erecf, rc)) == NULL)
	936	{
	937	free(match);
	938	return NULL;
	939	}
	940	if (!got_backrefs(sub_match))
	941	sub_match = remove_subs(sub_match);
	942	if (thisch(str) != CH_RPAR)
	943	{
	944	*rc = EREE_UNF_SUB;
	945	return NULL;
	946	}
	947	str = nextch(str);
	948	match->mtype = MTYPE_SUB;
	949	match->u.match = sub_match;
	950	}
	951	break;
	952	/...sCH_SOL \45\ \94\:24: /
	953	case CH_SOL:
	954	match->mtype = MTYPE_SOL;
	955	str = nextch(str);
	956	break;
	957	/...sCH_EOL \45\ \36\:24: /
	958	case CH_EOL:
	959	match->mtype = MTYPE_EOL;
	960	str = nextch(str);
	961	break;
	962	/...sCH_SOW \45\ \92\\60\:24: /
	963	case CH_SOW:
	964	match->mtype = MTYPE_SOW;
	965	str = nextch(str);
	966	break;
	967	/...sCH_EOW \45\ \92\\62\:24: /
	968	case CH_EOW:
	969	match->mtype = MTYPE_EOW;
	970	str = nextch(str);
	971	break;
	972	/...sCH_IW \45\ \92\B:24: /
	973	case CH_IW:
	974	match->mtype = MTYPE_IW;
	975	str = nextch(str);
	976	break;
	977	/...sCH_EW \45\ \92\y:24: /
	978	case CH_EW:
	979	match->mtype = MTYPE_EW;
	980	str = nextch(str);
	981	break;
	982	/...sdefault \45\ any old character:24: /
	983	default:
	984	{
	985	char ch = (char)c;
	986
	987	if (erecf & ERECF_TOLOWER)
	988	ch = (char)tolower(ch);
	989	match->mtype = MTYPE_CHAR;
	990	match->u.character = ch;
	991	str = nextch(str);
	992	}
	993	break;
	994	}
	995
	996	*str_after = str;
	997	return match;
	998	}
	999
[222]	1000	STATIC MTYPE repeat_type_of(int c)
[155]	1001	{
	1002	switch (c)
	1003	{
	1004	case CH_QUERY:
	1005	return MTYPE_QUERY;
	1006	case CH_PLUS:
	1007	return MTYPE_PLUS;
	1008	case CH_STAR:
	1009	return MTYPE_STAR;
	1010	case CH_LCUR:
	1011	return MTYPE_CREP;
	1012	default:
[238]	1013	return (MTYPE)-1;
[155]	1014	}
	1015	}
	1016
[222]	1017	STATIC MATCH compile_factor(const char str, const char *str_after, int erecf, int rc)
[155]	1018	{
	1019	MATCH match, parent;
	1020	MTYPE repeat_mtype;
	1021
	1022	if ((match = compile_term(str, &str, erecf, rc)) == NULL)
	1023	return NULL;
	1024
	1025	while ((repeat_mtype = repeat_type_of(thisch(str))) != (MTYPE) - 1)
	1026	/...smatch is to be repeated:16: /
	1027	{
	1028	if ((parent = create_match(rc)) == NULL)
	1029	{
	1030	delete_match(match);
	1031	return NULL;
	1032	}
	1033
	1034	parent->mtype = repeat_mtype;
	1035	str = nextch(str);
	1036	if (repeat_mtype == MTYPE_CREP)
	1037	{
	1038	parent->u.crep.match = match;
	1039	if ((str = scan_number(str, &(parent->u.crep.m))) == NULL)
	1040	{
	1041	delete_match(match);
	1042	free(parent);
	1043	*rc = EREE_BAD_CREP_M;
	1044	return NULL;
	1045	}
	1046	parent->u.crep.n = parent->u.crep.m;
	1047	if (*str == ',')
	1048	{
	1049	++str;
	1050	if (*str != '}')
	1051	{
	1052	if ((str = scan_number(str, &(parent->u.crep.n))) == NULL)
	1053	{
	1054	delete_match(match);
	1055	free(parent);
	1056	*rc = EREE_BAD_CREP_N;
	1057	return NULL;
	1058	}
	1059	}
	1060	else
	1061	parent->u.crep.n = (unsigned)~0;
	1062	}
	1063	if (*str != '}')
	1064	{
	1065	delete_match(match);
	1066	free(parent);
	1067	*rc = EREE_UNF_CREP;
	1068	return NULL;
	1069	}
	1070	++str;
	1071	if (parent->u.crep.m > parent->u.crep.n)
	1072	{
	1073	delete_match(match);
	1074	free(parent);
	1075	*rc = EREE_BAD_CREP;
	1076	return NULL;
	1077	}
	1078	}
	1079	else
	1080	parent->u.match = match;
	1081	match = parent;
	1082	}
	1083
	1084	*str_after = str;
	1085
	1086	return match;
	1087	}
	1088
[222]	1089	STATIC MATCH compile_factors(const char str, const char *str_after, int erecf, int rc)
[155]	1090	{
	1091	MATCH *match;
	1092	int c;
	1093
	1094	if ((match = compile_factor(str, &str, erecf, rc)) == NULL)
	1095	return NULL;
	1096
	1097	while ((c = thisch(str)) != CH_EOS && c != CH_RPAR && c != CH_OR)
	1098	/...sconsider catenation of more factors:16: /
	1099	{
	1100	MATCH sibling, parent;
	1101
	1102	if ((sibling = compile_factor(str, &str, erecf, rc)) == NULL)
	1103	{
	1104	delete_match(match);
	1105	return NULL;
	1106	}
	1107
	1108	if ((parent = create_match(rc)) == NULL)
	1109	{
	1110	delete_match(sibling);
	1111	delete_match(match);
	1112	return NULL;
	1113	}
	1114	parent->mtype = MTYPE_CAT;
	1115	parent->u.matchs[0] = match;
	1116	parent->u.matchs[1] = sibling;
	1117	match = parent;
	1118	}
	1119
	1120	*str_after = str;
	1121	return match;
	1122	}
	1123
	1124	/...scompile_match \45\ factors\124\factors:0: /
[222]	1125	STATIC MATCH compile_match(const char str, const char *str_after, int erecf, int rc)
[155]	1126	{
	1127	MATCH *match;
	1128
	1129	if ((match = compile_factors(str, &str, erecf, rc)) == NULL)
	1130	return NULL;
	1131
	1132	while (thisch(str) == CH_OR)
	1133	/...sfind sibling and or it in:16: /
	1134	{
	1135	MATCH sibling, parent;
	1136
	1137	str = nextch(str);
	1138	if ((sibling = compile_factors(str, &str, erecf, rc)) == NULL)
	1139	{
	1140	delete_match(match);
	1141	return NULL;
	1142	}
	1143	if ((parent = create_match(rc)) == NULL)
	1144	{
	1145	delete_match(sibling);
	1146	delete_match(match);
	1147	return NULL;
	1148	}
	1149	parent->mtype = MTYPE_OR;
	1150	parent->u.matchs[0] = match;
	1151	parent->u.matchs[1] = sibling;
	1152	match = parent;
	1153	}
	1154
	1155	*str_after = str;
	1156
	1157	return match;
	1158	}
	1159
	1160	#ifdef DEBUG
	1161	/...sprint_tree:0: /
	1162	/...sdo_indent:0: /
[222]	1163	STATIC void do_indent(int indent)
[155]	1164	{
	1165	while (indent--)
	1166	putchar('\t');
	1167	}
	1168
[222]	1169	STATIC void print_tree(const MATCH * match, int indent)
[155]	1170	{
	1171	do_indent(indent);
	1172	switch (match->mtype)
	1173	{
	1174	case MTYPE_NULL:
	1175	printf("null\n");
	1176	break;
	1177	case MTYPE_CHAR:
	1178	printf("%c\n", match->u.character);
	1179	break;
	1180	case MTYPE_NCHAR:
	1181	printf("~%c\n", match->u.character);
	1182	break;
	1183	case MTYPE_STRING:
	1184	printf("%.s\n",
	1185	(unsigned char)match->u.string[0],
	1186	(unsigned char)match->u.string[0],
	1187	match->u.string + 1);
	1188	break;
	1189	case MTYPE_CCLASS:
	1190	{
	1191	int i;
	1192
	1193	printf("[");
	1194	for (i = 0; i < 0x100; i++)
	1195	if (match_cclass(i, match->u.cclass))
	1196	printf("%c", i);
	1197	printf("]\n");
	1198	}
	1199	break;
	1200	case MTYPE_DOT:
	1201	printf(".\n");
	1202	break;
	1203	case MTYPE_WORD:
	1204	printf("\\w\n");
	1205	break;
	1206	case MTYPE_NWORD:
	1207	printf("\\W\n");
	1208	break;
	1209	case MTYPE_QUERY:
	1210	printf("?\n");
	1211	print_tree(match->u.match, indent + 1);
	1212	break;
	1213	case MTYPE_PLUS:
	1214	printf("+\n");
	1215	print_tree(match->u.match, indent + 1);
	1216	break;
	1217	case MTYPE_STAR:
	1218	printf("*\n");
	1219	print_tree(match->u.match, indent + 1);
	1220	break;
	1221	case MTYPE_CREP:
	1222	printf("{%u,%u}\n",
	1223	match->u.crep.m,
	1224	match->u.crep.n);
	1225	print_tree(match->u.crep.match, indent + 1);
	1226	break;
	1227	case MTYPE_OR:
	1228	printf("\|\n");
	1229	print_tree(match->u.matchs[0], indent + 1);
	1230	print_tree(match->u.matchs[1], indent + 1);
	1231	break;
	1232	case MTYPE_CAT:
	1233	printf("CAT\n");
	1234	print_tree(match->u.matchs[0], indent + 1);
	1235	print_tree(match->u.matchs[1], indent + 1);
	1236	break;
	1237	case MTYPE_SUB:
	1238	printf("SUB\n");
	1239	print_tree(match->u.match, indent + 1);
	1240	break;
	1241	case MTYPE_SOL:
	1242	printf("^\n");
	1243	break;
	1244	case MTYPE_EOL:
	1245	printf("$\n");
	1246	break;
	1247	case MTYPE_SOW:
	1248	printf("\\<\n");
	1249	break;
	1250	case MTYPE_EOW:
	1251	printf("\\>\n");
	1252	break;
	1253	case MTYPE_IW:
	1254	printf("\\B\n");
	1255	break;
	1256	case MTYPE_EW:
	1257	printf("\\y\n");
	1258	break;
	1259	case MTYPE_BACK:
	1260	printf("\\%d\n", match->u.n_span + 1);
	1261	break;
	1262	}
	1263	}
	1264	#endif
	1265
	1266	/...sfinite state machine:0: /
	1267	typedef unsigned char ETYPE; // Edge type
	1268
	1269	#define ETYPE_CHAR ((ETYPE) 0) // Can advance if given character
	1270	#define ETYPE_NCHAR ((ETYPE) 1) // Can advance if not given char.
	1271	#define ETYPE_STRING ((ETYPE) 2) // Can advance if strings match
	1272	#define ETYPE_DOT ((ETYPE) 3) // Can advance if any character
	1273	#define ETYPE_CCLASS ((ETYPE) 4) // Can advance if any in class
	1274	#define ETYPE_WORD ((ETYPE) 5) // Can advance if word constituent
	1275	#define ETYPE_NWORD ((ETYPE) 6) // Can advance if not word constit.
	1276	#define ETYPE_EPSILON ((ETYPE) 7) // Can advance without reading input
	1277	#define ETYPE_SOL ((ETYPE) 8) // Matches if at start of line
	1278	#define ETYPE_EOL ((ETYPE) 9) // Matches if at end of line
	1279	#define ETYPE_SOW ((ETYPE) 10) // Matches if at start of word
	1280	#define ETYPE_EOW ((ETYPE) 11) // Matches if at end of word
	1281	#define ETYPE_IW ((ETYPE) 12) // Matches if within word
	1282	#define ETYPE_EW ((ETYPE) 13) // Matches if at word start or end
	1283	#define ETYPE_SSUB ((ETYPE) 14) // Records passage over (
	1284	#define ETYPE_ESUB ((ETYPE) 15) // Records passage over )
	1285	#define ETYPE_BACK ((ETYPE) 16) // Back reference
	1286
	1287	typedef struct
	1288	{
	1289	ETYPE etype; // Edge type, (an ETYPE_ no)
	1290	union
	1291	{
	1292	char character; // Character to use if ETYPE_CHAR
	1293	unsigned char *cclass; // Class to use if ETYPE_CCLASS
	1294	char *string; // len, then chars if ETYPE_STRING
	1295	int n_span; // Used if ETYPE_BACK
	1296	}
	1297	u;
	1298	const char *gate; // Used if type of epsilon move
	1299	int to_state; // State to go to if test succeeds
	1300	int next_edge; // Next test to try after this
	1301	}
	1302	EDGE;
	1303
	1304	/* Under DOS, restrict FSM size to 13KB approx.. On other environments memory
	1305	* isn't such an issue, so allow something closer to 32KB. Of course, a better
	1306	* implementation would dynamically grow the FSM size as needed. Something for
	1307	* the future perhaps... */
	1308
	1309	#ifdef xxDOS
	1310	#define MAX_STATES 200
	1311	#define MAX_EDGES 600
	1312	#else
	1313	#define MAX_STATES 500
	1314	#define MAX_EDGES 1500
	1315	#endif
	1316
	1317	#define FLAG_FINISH 0x01
	1318	#define FLAG_VISITED 0x02
	1319	#define FLAG_REACHABLE 0x04
	1320
	1321	typedef struct
	1322	{
	1323	int n_states;
	1324	int state_first_edges[MAX_STATES];
	1325	unsigned char state_flags[MAX_STATES];
	1326	int n_edges;
	1327	EDGE edges[MAX_EDGES];
	1328	}
	1329	FSM;
	1330
	1331	/...screate_fsm:0: /
[222]	1332	STATIC FSM create_fsm(int rc)
[155]	1333	{
	1334	FSM *fsm;
	1335	int i;
	1336
	1337	if ((fsm = (FSM *) malloc(sizeof(FSM))) == NULL)
	1338	{
	1339	*rc = ERROR_NOT_ENOUGH_MEMORY;
	1340	return NULL;
	1341	}
	1342
	1343	fsm->n_states = 0;
	1344	fsm->n_edges = 0;
	1345
	1346	for (i = 0; i < MAX_STATES; i++)
	1347	{
	1348	fsm->state_first_edges[i] = -1;
	1349	fsm->state_flags[i] = 0;
	1350	}
	1351
	1352	return fsm;
	1353	}
	1354	/...sdelete_fsm:0: /
	1355	#define delete_fsm(fsm) free(fsm)
	1356
	1357	/...smalloc_state:0: /
[222]	1358	STATIC int malloc_state(FSM * fsm)
[155]	1359	{
	1360	if (fsm->n_states == MAX_STATES)
	1361	return -1;
	1362	else
	1363	return fsm->n_states++;
	1364	}
	1365	/...smalloc_edge:0: /
	1366	/* Allocate space for new supplied edge and fill it in.
	1367	* If already exists then don't bother (duplicates waste search time).
	1368	* Return TRUE if all went ok. */
	1369
[222]	1370	STATIC BOOLEAN malloc_edge(int s, EDGE * edge, FSM * fsm)
[155]	1371	{
	1372	int edge_no, n_edges = fsm->n_edges++;
	1373
	1374	// See if edge already exists
	1375
	1376	for (edge_no = fsm->state_first_edges[s];
	1377	edge_no != -1;
	1378	edge_no = fsm->edges[edge_no].next_edge)
	1379	// Do we already have this edge
	1380	if (fsm->edges[edge_no].to_state == edge->to_state &&
	1381	fsm->edges[edge_no].etype == edge->etype)
	1382	// An edge of this type already exists
	1383	switch (edge->etype)
	1384	{
	1385	/...sETYPE_CHAR\47\NCHAR:32: /
	1386	case ETYPE_CHAR:
	1387	case ETYPE_NCHAR:
	1388	if (edge->u.character == fsm->edges[edge_no].u.character)
	1389	return TRUE;
	1390	break;
	1391	/...sETYPE_STRING:32: /
	1392	case ETYPE_STRING:
	1393	if ((unsigned char)fsm->edges[edge_no].u.string[0] ==
	1394	(unsigned char)edge->u.string[0] &&
	1395	!memcmp(fsm->edges[edge_no].u.string + 1,
	1396	edge->u.string + 1,
	1397	(unsigned char)edge->u.string[0]))
	1398	return TRUE;
	1399	break;
	1400	/...sETYPE_CCLASS:32: /
	1401	case ETYPE_CCLASS:
	1402	if (edge->u.cclass == fsm->edges[edge_no].u.cclass)
	1403	return TRUE;
	1404	break;
	1405	/...sETYPE_DOT\47\EPSILON\47\SOL\47\EOL etc\46\\46\:32: /
	1406	case ETYPE_DOT:
	1407	case ETYPE_EPSILON:
	1408	case ETYPE_SOL:
	1409	case ETYPE_EOL:
	1410	case ETYPE_SOW:
	1411	case ETYPE_EOW:
	1412	case ETYPE_IW:
	1413	case ETYPE_EW:
	1414	case ETYPE_SSUB:
	1415	case ETYPE_ESUB:
	1416	return TRUE;
	1417	/...sETYPE_BACK:32: /
	1418	case ETYPE_BACK:
	1419	if (edge->u.n_span == fsm->edges[edge_no].u.n_span)
	1420	return TRUE;
	1421	break;
	1422	}
	1423
	1424	// Going to have to add the edge
	1425
	1426	if (n_edges >= MAX_EDGES)
	1427	return FALSE;
	1428
	1429	memcpy(&(fsm->edges[n_edges]), edge, sizeof(EDGE));
	1430	fsm->edges[n_edges].next_edge = fsm->state_first_edges[s];
	1431	fsm->state_first_edges[s] = n_edges;
	1432	return TRUE;
	1433	}
	1434	/...smake_fsm_from_match:0: /
	1435	/...sadd_edge_to_fsm_character:0: /
[222]	1436	STATIC BOOLEAN add_edge_to_fsm_character(int s, int f, FSM * fsm, char character)
[155]	1437	{
	1438	EDGE edge;
	1439
	1440	edge.etype = ETYPE_CHAR;
	1441	edge.u.character = character;
	1442	edge.to_state = f;
	1443	return malloc_edge(s, &edge, fsm);
	1444	}
	1445	/...sadd_edge_to_fsm_ncharacter:0: /
[222]	1446	STATIC BOOLEAN add_edge_to_fsm_ncharacter(int s, int f, FSM * fsm, char character)
[155]	1447	{
	1448	EDGE edge;
	1449
	1450	edge.etype = ETYPE_NCHAR;
	1451	edge.u.character = character;
	1452	edge.to_state = f;
	1453	return malloc_edge(s, &edge, fsm);
	1454	}
	1455	/...sadd_edge_to_fsm_string:0: /
[222]	1456	STATIC BOOLEAN add_edge_to_fsm_string(int s, int f, FSM * fsm, char *string)
[155]	1457	{
	1458	EDGE edge;
	1459
	1460	edge.etype = ETYPE_STRING;
	1461	edge.u.string = string;
	1462	edge.to_state = f;
	1463	return malloc_edge(s, &edge, fsm);
	1464	}
	1465	/...sadd_edge_to_fsm_cclass:0: /
[222]	1466	STATIC BOOLEAN add_edge_to_fsm_cclass(int s, int f, FSM * fsm, unsigned char *cclass)
[155]	1467	{
	1468	EDGE edge;
	1469
	1470	edge.etype = ETYPE_CCLASS;
	1471	edge.u.cclass = cclass;
	1472	edge.to_state = f;
	1473	return malloc_edge(s, &edge, fsm);
	1474	}
	1475	/...sadd_edge_to_fsm_dot:0: /
[222]	1476	STATIC BOOLEAN add_edge_to_fsm_dot(int s, int f, FSM * fsm)
[155]	1477	{
	1478	EDGE edge;
	1479
	1480	edge.etype = ETYPE_DOT;
	1481	edge.to_state = f;
	1482	return malloc_edge(s, &edge, fsm);
	1483	}
	1484	/...sadd_edge_to_fsm_word:0: /
[222]	1485	STATIC BOOLEAN add_edge_to_fsm_word(int s, int f, FSM * fsm)
[155]	1486	{
	1487	EDGE edge;
	1488
	1489	edge.etype = ETYPE_WORD;
	1490	edge.to_state = f;
	1491	return malloc_edge(s, &edge, fsm);
	1492	}
	1493	/...sadd_edge_to_fsm_nword:0: /
[222]	1494	STATIC BOOLEAN add_edge_to_fsm_nword(int s, int f, FSM * fsm)
[155]	1495	{
	1496	EDGE edge;
	1497
	1498	edge.etype = ETYPE_NWORD;
	1499	edge.to_state = f;
	1500	return malloc_edge(s, &edge, fsm);
	1501	}
	1502	/...sadd_edge_to_fsm_epsilon:0: /
[222]	1503	STATIC BOOLEAN add_edge_to_fsm_epsilon(int s, int f, FSM * fsm)
[155]	1504	{
	1505	EDGE edge;
	1506
	1507	edge.etype = ETYPE_EPSILON;
	1508	edge.to_state = f;
	1509	return malloc_edge(s, &edge, fsm);
	1510	}
	1511	/...sadd_edge_to_fsm_special:0: /
[222]	1512	STATIC BOOLEAN add_edge_to_fsm_special(int s, int f, FSM * fsm, ETYPE etype)
[155]	1513	{
	1514	EDGE edge;
	1515
	1516	edge.etype = etype;
	1517	edge.to_state = f;
	1518	edge.gate = NULL;
	1519	return malloc_edge(s, &edge, fsm);
	1520	}
	1521	/...sadd_edge_to_fsm_back:0: /
[222]	1522	STATIC BOOLEAN add_edge_to_fsm_back(int s, int f, FSM * fsm, int n_span)
[155]	1523	{
	1524	EDGE edge;
	1525
	1526	edge.etype = ETYPE_BACK;
	1527	edge.to_state = f;
	1528	edge.gate = NULL;
	1529	edge.u.n_span = n_span;
	1530	return malloc_edge(s, &edge, fsm);
	1531	}
	1532
[222]	1533	STATIC BOOLEAN make_fsm_from_match(MATCH * match, FSM * fsm, int s, int f)
[155]	1534	{
	1535	int n1, n2, n3, n4, i;
	1536
	1537	switch (match->mtype)
	1538	{
	1539	/...sMTYPE_NULL:16: /
	1540	/*
	1541	* e
	1542	* S ----> F
	1543	*
	1544	*/
	1545
	1546	case MTYPE_NULL:
	1547	return (*s = malloc_state(fsm)) != -1 &&
	1548	(*f = malloc_state(fsm)) != -1 &&
	1549	add_edge_to_fsm_epsilon(s, f, fsm);
	1550	/...sMTYPE_CHAR:16: /
	1551	/*
	1552	* c
	1553	* S ----> F
	1554	*
	1555	*/
	1556
	1557	case MTYPE_CHAR:
	1558	return (*s = malloc_state(fsm)) != -1 &&
	1559	(*f = malloc_state(fsm)) != -1 &&
	1560	add_edge_to_fsm_character(s, f, fsm, match->u.character);
	1561	/...sMTYPE_NCHAR:16: /
	1562	/*
	1563	* ~c
	1564	* S ----> F
	1565	*
	1566	*/
	1567
	1568	case MTYPE_NCHAR:
	1569	return (*s = malloc_state(fsm)) != -1 &&
	1570	(*f = malloc_state(fsm)) != -1 &&
	1571	add_edge_to_fsm_ncharacter(s, f, fsm, match->u.character);
	1572	/...sMTYPE_STRING:16: /
	1573	/*
	1574	* string
	1575	* S ----> F
	1576	*
	1577	*/
	1578
	1579	case MTYPE_STRING:
	1580	return (*s = malloc_state(fsm)) != -1 &&
	1581	(*f = malloc_state(fsm)) != -1 &&
	1582	add_edge_to_fsm_string(s, f, fsm, match->u.string);
	1583
	1584	/...sMTYPE_CCLASS:16: /
	1585	/*
	1586	* cclass
	1587	* S ----> F
	1588	*
	1589	*/
	1590
	1591	case MTYPE_CCLASS:
	1592	return (*s = malloc_state(fsm)) != -1 &&
	1593	(*f = malloc_state(fsm)) != -1 &&
	1594	add_edge_to_fsm_cclass(s, f, fsm, match->u.cclass);
	1595
	1596	/...sMTYPE_DOT:16: /
	1597	/*
	1598	* any
	1599	* S ----> F
	1600	*
	1601	*/
	1602
	1603	case MTYPE_DOT:
	1604	return (*s = malloc_state(fsm)) != -1 &&
	1605	(*f = malloc_state(fsm)) != -1 &&
	1606	add_edge_to_fsm_dot(s, f, fsm);
	1607
	1608	/...sMTYPE_WORD:16: /
	1609	/*
	1610	* word
	1611	* S ----> F
	1612	*
	1613	*/
	1614
	1615	case MTYPE_WORD:
	1616	return (*s = malloc_state(fsm)) != -1 &&
	1617	(*f = malloc_state(fsm)) != -1 &&
	1618	add_edge_to_fsm_word(s, f, fsm);
	1619
	1620	/...sMTYPE_NWORD:16: /
	1621	/*
	1622	* !word
	1623	* S ----> F
	1624	*
	1625	*/
	1626
	1627	case MTYPE_NWORD:
	1628	return (*s = malloc_state(fsm)) != -1 &&
	1629	(*f = malloc_state(fsm)) != -1 &&
	1630	add_edge_to_fsm_nword(s, f, fsm);
	1631
	1632	/...sMTYPE_QUERY:16: /
	1633	/*
	1634	* e
	1635	* S ----> F
	1636	* \| ^
	1637	* \| e \| e
	1638	* v \|
	1639	* [n1 n2]
	1640	*
	1641	*/
	1642
	1643	case MTYPE_QUERY:
	1644	if (!make_fsm_from_match(match->u.match, fsm, &n1, &n2))
	1645	return FALSE;
	1646	return (*s = malloc_state(fsm)) != -1 &&
	1647	(*f = malloc_state(fsm)) != -1 &&
	1648	add_edge_to_fsm_epsilon(s, f, fsm) &&
	1649	add_edge_to_fsm_epsilon(*s, n1, fsm) &&
	1650	add_edge_to_fsm_epsilon(n2, *f, fsm);
	1651
	1652	/...sMTYPE_PLUS:16: /
	1653	/*
	1654	*
	1655	* [S F]
	1656	* ^ e \|
	1657	* +---+
	1658	*
	1659	*/
	1660
	1661	case MTYPE_PLUS:
	1662	if (!make_fsm_from_match(match->u.match, fsm, s, f))
	1663	return FALSE;
	1664	return add_edge_to_fsm_epsilon(f, s, fsm);
	1665
	1666	/...sMTYPE_STAR:16: /
	1667	/*
	1668	*
	1669	* +---- S/F
	1670	* \| ^
	1671	* \| e \| e
	1672	* v \|
	1673	* [n1 n2]
	1674	*
	1675	*/
	1676
	1677	case MTYPE_STAR:
	1678	if (!make_fsm_from_match(match->u.match, fsm, &n1, &n2))
	1679	return FALSE;
	1680	return (s = f = malloc_state(fsm)) != -1 &&
	1681	add_edge_to_fsm_epsilon(*s, n1, fsm) &&
	1682	add_edge_to_fsm_epsilon(n2, *f, fsm);
	1683
	1684	/...sMTYPE_CREP:16: /
	1685	/*
	1686	* e e e e
	1687	* S-->[n1 n2]-->[n1 n2]-->[n1 n2]-->[n1 n2] <re>{2,4}
	1688	* \| \| \|
	1689	* \| e \| e \| e
	1690	* v \| \|
	1691	* F<---------+----------+
	1692	*
	1693	* e
	1694	* +----+
	1695	* \| \|
	1696	* e e e v \|
	1697	* S-->[n1 n2]-->[n1 n2]-->[n1 n2] <re>{2,}
	1698	* \| \|
	1699	* \| e \| e
	1700	* v \|
	1701	* F<---------+
	1702	*/
	1703
	1704	case MTYPE_CREP:
	1705	if ((*s = malloc_state(fsm)) == -1 \|\|
	1706	(*f = malloc_state(fsm)) == -1)
	1707	return FALSE;
	1708	n3 = *s;
	1709	for (i = 0; i < (int)match->u.crep.m; i++)
	1710	{
	1711	if (!make_fsm_from_match(match->u.crep.match, fsm, &n1, &n2))
	1712	return FALSE;
	1713	if (!add_edge_to_fsm_epsilon(n3, n1, fsm))
	1714	return FALSE;
	1715	n3 = n2;
	1716	}
	1717	if (!add_edge_to_fsm_epsilon(n3, *f, fsm))
	1718	return FALSE;
	1719	if (match->u.crep.n != ~0)
	1720	for (; i < (int)match->u.crep.n; i++)
	1721	{
	1722	if (!make_fsm_from_match(match->u.crep.match, fsm, &n1, &n2))
	1723	return FALSE;
	1724	if (!add_edge_to_fsm_epsilon(n3, n1, fsm))
	1725	return FALSE;
	1726	if (!add_edge_to_fsm_epsilon(n2, *f, fsm))
	1727	return FALSE;
	1728	n3 = n2;
	1729	}
	1730	else
	1731	{
	1732	if (!make_fsm_from_match(match->u.crep.match, fsm, &n1, &n2))
	1733	return FALSE;
	1734	if (!add_edge_to_fsm_epsilon(n3, n1, fsm))
	1735	return FALSE;
	1736	if (!add_edge_to_fsm_epsilon(n2, *f, fsm))
	1737	return FALSE;
	1738	if (!add_edge_to_fsm_epsilon(n2, n1, fsm))
	1739	return FALSE;
	1740	}
	1741	return TRUE;
	1742
	1743	/...sMTYPE_OR:16: /
	1744	/*
	1745	* e e
	1746	* +--->[n1 n2]----+
	1747	* \| v
	1748	* S F
	1749	* \| e e ^
	1750	* +--->[n3 n4]----+
	1751	*
	1752	*/
	1753
	1754	case MTYPE_OR:
	1755	if (!make_fsm_from_match(match->u.matchs[0], fsm, &n1, &n2) \|\|
	1756	!make_fsm_from_match(match->u.matchs[1], fsm, &n3, &n4))
	1757	return FALSE;
	1758	return (*s = malloc_state(fsm)) != -1 &&
	1759	(*f = malloc_state(fsm)) != -1 &&
	1760	add_edge_to_fsm_epsilon(*s, n1, fsm) &&
	1761	add_edge_to_fsm_epsilon(*s, n3, fsm) &&
	1762	add_edge_to_fsm_epsilon(n2, *f, fsm) &&
	1763	add_edge_to_fsm_epsilon(n4, *f, fsm);
	1764
	1765	/...sMTYPE_CAT:16: /
	1766	/*
	1767	* e
	1768	* [S n1]---->[n2 F]
	1769	*
	1770	*/
	1771
	1772	case MTYPE_CAT:
	1773	if (!make_fsm_from_match(match->u.matchs[0], fsm, s, &n1) \|\|
	1774	!make_fsm_from_match(match->u.matchs[1], fsm, &n2, f))
	1775	return FALSE;
	1776	return add_edge_to_fsm_epsilon(n1, n2, fsm);
	1777
	1778	/...sMTYPE_SUB:16: /
	1779	/*
	1780	* ssub esub
	1781	* S---->[n1 n2]---->F
	1782	*
	1783	*/
	1784
	1785	case MTYPE_SUB:
	1786	if (!make_fsm_from_match(match->u.match, fsm, &n1, &n2))
	1787	return FALSE;
	1788	return (*s = malloc_state(fsm)) != -1 &&
	1789	(*f = malloc_state(fsm)) != -1 &&
	1790	add_edge_to_fsm_special(*s, n1, fsm, ETYPE_SSUB) &&
	1791	add_edge_to_fsm_special(n2, *f, fsm, ETYPE_ESUB);
	1792
	1793	/...sMTYPE_SOL\47\EOL\47\SOW\47\EOW\47\IW\47\EW:16: /
	1794	/*
	1795	* special
	1796	* S ----> F
	1797	*
	1798	*/
	1799
	1800	case MTYPE_SOL:
	1801	return (*s = malloc_state(fsm)) != -1 &&
	1802	(*f = malloc_state(fsm)) != -1 &&
	1803	add_edge_to_fsm_special(s, f, fsm, ETYPE_SOL);
	1804	case MTYPE_EOL:
	1805	return (*s = malloc_state(fsm)) != -1 &&
	1806	(*f = malloc_state(fsm)) != -1 &&
	1807	add_edge_to_fsm_special(s, f, fsm, ETYPE_EOL);
	1808	case MTYPE_SOW:
	1809	return (*s = malloc_state(fsm)) != -1 &&
	1810	(*f = malloc_state(fsm)) != -1 &&
	1811	add_edge_to_fsm_special(s, f, fsm, ETYPE_SOW);
	1812	case MTYPE_EOW:
	1813	return (*s = malloc_state(fsm)) != -1 &&
	1814	(*f = malloc_state(fsm)) != -1 &&
	1815	add_edge_to_fsm_special(s, f, fsm, ETYPE_EOW);
	1816	case MTYPE_IW:
	1817	return (*s = malloc_state(fsm)) != -1 &&
	1818	(*f = malloc_state(fsm)) != -1 &&
	1819	add_edge_to_fsm_special(s, f, fsm, ETYPE_IW);
	1820	case MTYPE_EW:
	1821	return (*s = malloc_state(fsm)) != -1 &&
	1822	(*f = malloc_state(fsm)) != -1 &&
	1823	add_edge_to_fsm_special(s, f, fsm, ETYPE_EW);
	1824
	1825	/...sMTYPE_BACK:16: /
	1826	case MTYPE_BACK:
	1827	return (*s = malloc_state(fsm)) != -1 &&
	1828	(*f = malloc_state(fsm)) != -1 &&
	1829	add_edge_to_fsm_back(s, f, fsm, match->u.n_span);
	1830
	1831	}
	1832	return FALSE; // Should never happen
	1833	}
	1834
	1835	/...sremove_epsilons:0: /
	1836	/* The problems with epsilon moves are :-
	1837	* 1) They can gang up on you in groups to form loops of states that require
	1838	* no input to go all the way around them. Any function attempting a
	1839	* recursive search of the FSM will recurse forever.
	1840	* 2) They can slow the recognition process by as much as a factor of 2.
	1841	* eg:
	1842	* a e b e c
	1843	* O ----> O ----> O ----> O ----> O ----> O Is slow
	1844	*
	1845	* a b c
	1846	* O ----> O ----> O ----> O Is faster
	1847	*
	1848	*/
	1849
	1850	/...sfinish_states:0: /
	1851	/...sis_finish_reachable:0: /
	1852	/* When we recurse we mark the current state as visited to stop infinite
	1853	* recursion on loops of epsilon moves. */
	1854
[222]	1855	STATIC BOOLEAN is_finish_reachable(FSM * fsm, int state_no)
[155]	1856	{
	1857	int edge_no;
	1858	BOOLEAN ok;
	1859
	1860	if (fsm->state_flags[state_no] & FLAG_VISITED)
	1861	// Been here already
	1862	return FALSE;
	1863
	1864	if (fsm->state_flags[state_no] & FLAG_FINISH)
	1865	// At finish
	1866	return TRUE;
	1867
	1868	for (edge_no = fsm->state_first_edges[state_no];
	1869	edge_no != -1;
	1870	edge_no = fsm->edges[edge_no].next_edge)
	1871	if (fsm->edges[edge_no].etype == ETYPE_EPSILON)
	1872	{
	1873	fsm->state_flags[state_no] \|= FLAG_VISITED;
	1874	ok = is_finish_reachable(fsm, fsm->edges[edge_no].to_state);
	1875	fsm->state_flags[state_no] &= ~FLAG_VISITED;
	1876	if (ok)
	1877	return TRUE;
	1878	}
	1879
	1880
	1881	return FALSE;
	1882	}
	1883
	1884
[222]	1885	STATIC void finish_states(int f, FSM * fsm, FSM * fsm_without)
[155]	1886	{
	1887	int state_no;
	1888
	1889	fsm->state_flags[f] = FLAG_FINISH;
	1890	fsm_without->state_flags[f] = FLAG_FINISH;
	1891	for (state_no = 0; state_no < fsm->n_states; state_no++)
	1892	if (is_finish_reachable(fsm, state_no))
	1893	fsm_without->state_flags[state_no] = FLAG_FINISH;
	1894	}
	1895
	1896	/...sdetermine_reachable:0: /
[222]	1897	STATIC void determine_reachable(int s, FSM * fsm, FSM * fsm_without)
[155]	1898	{
	1899	int edge_no, to_state;
	1900
	1901	fsm_without->state_flags[s] \|= FLAG_REACHABLE;
	1902	for (edge_no = 0; edge_no < fsm->n_edges; edge_no++)
	1903	if (fsm->edges[edge_no].etype != ETYPE_EPSILON)
	1904	{
	1905	to_state = fsm->edges[edge_no].to_state;
	1906	fsm_without->state_flags[to_state] \|= FLAG_REACHABLE;
	1907	}
	1908	}
	1909
	1910	/...scopy_non_epsilons:0: /
[222]	1911	STATIC void copy_non_epsilons(FSM * fsm, FSM * fsm_without)
[155]	1912	{
	1913	int state_no, edge_no;
	1914
	1915	for (state_no = 0; state_no < fsm->n_states; state_no++)
	1916	if (fsm_without->state_flags[state_no] & FLAG_REACHABLE)
	1917	for (edge_no = fsm->state_first_edges[state_no];
	1918	edge_no != -1;
	1919	edge_no = fsm->edges[edge_no].next_edge)
	1920	if (fsm->edges[edge_no].etype != ETYPE_EPSILON)
	1921	malloc_edge(state_no, &(fsm->edges[edge_no]), fsm_without);
	1922	}
	1923
	1924	/...sfollow_epsilons:0: /
	1925	/...scopy_edges_reachable:0: /
	1926	/* What this says is :-
	1927	* If state A can reach state B, by an epsilon move, and
	1928	* state B can reach state C then
	1929	* state A can reach state C *
	1930	*
	1931	* If the state B to state C involves an epsilon move then
	1932	* A can reach whatever C can reach too (by recursion)
	1933	*/
	1934
[222]	1935	STATIC BOOLEAN copy_edges_reachable(
[155]	1936	FSM * fsm,
	1937	FSM * fsm_without,
	1938	int state_no_to, /* AK: Bad identifier, might better be
	1939	* described as 'original reachable state' */
	1940	int state_no_from
	1941	)
	1942	{
	1943	int edge_no;
	1944	BOOLEAN ok;
	1945
	1946	if (fsm->state_flags[state_no_from] & FLAG_VISITED)
	1947	// Been here already, therefore all copied from here ok
	1948	return TRUE;
	1949
	1950	for (edge_no = fsm->state_first_edges[state_no_from];
	1951	edge_no != -1;
	1952	edge_no = fsm->edges[edge_no].next_edge)
	1953	if (fsm->edges[edge_no].etype != ETYPE_EPSILON)
	1954	// Had better add this edge
	1955	/...sadd this edge to the \39\to\39\ state:24: /
	1956	{
	1957	if (!malloc_edge(state_no_to, &(fsm->edges[edge_no]), fsm_without))
	1958	return (FALSE);
	1959	}
	1960
	1961	else
	1962	{
	1963	fsm->state_flags[state_no_from] \|= FLAG_VISITED;
	1964	ok = copy_edges_reachable(fsm, fsm_without, state_no_to, fsm->edges[edge_no].to_state);
	1965	fsm->state_flags[state_no_from] &= ~FLAG_VISITED;
	1966	if (!ok)
	1967	return FALSE;
	1968	}
	1969
	1970	return TRUE;
	1971	}
	1972
	1973
[222]	1974	STATIC BOOLEAN follow_epsilons(FSM * fsm, FSM * fsm_without)
[155]	1975	{
	1976	int state_no;
	1977
	1978	for (state_no = 0; state_no < fsm->n_states; state_no++)
	1979	if (fsm_without->state_flags[state_no] & FLAG_REACHABLE)
	1980	if (!copy_edges_reachable(fsm, fsm_without, state_no, state_no))
	1981	return FALSE;
	1982	return TRUE;
	1983	}
	1984
	1985
[222]	1986	STATIC BOOLEAN remove_epsilons(int s, int f, FSM * fsm, FSM * fsm_without)
[155]	1987	{
	1988	// FSM with no epsilon moves will have the same number of states
	1989
	1990	fsm_without->n_states = fsm->n_states;
	1991
	1992	// Mark state f as a finish state in the new FSM
	1993	// Any state with epsilon move(s) to state f is also a finish state
	1994
	1995	finish_states(f, fsm, fsm_without);
	1996
	1997	// Determine which states can be reached by non-epsilon moves. Add
	1998	// to this set, the start state. The resulting states should have
	1999	// their edges considered, but the other will not be reachable. This
	2000	// is because they will be bypassed by follow_epsilons().
	2001
	2002	determine_reachable(s, fsm, fsm_without);
	2003
	2004	// Copy across all reachable, non epsilon moves to new FSM
	2005
	2006	copy_non_epsilons(fsm, fsm_without);
	2007
	2008	// For all states, determine all other states that can be reached by
	2009	// epsilon moves and add the edges leading from them to us
	2010
	2011	return follow_epsilons(fsm, fsm_without);
	2012	}
	2013
	2014	/...smatch_fsm:0: /
	2015	/* Stack requirements per call, (one call per character in string!).
	2016	*
	2017	* eg: 16 bit OS/2, large model :-
	2018	* Return address + Stack frame + SI and DI + Arguments + Locals
	2019	* 2 + 2 + 2+2 + 4+2+4+4 + 2 = 24
	2020	*
	2021	* eg: 16 bit DOS large model :-
	2022	* Return address + Stack frame + SI and DI + Arguments + Locals
	2023	* 2 + 2 + 2+2 + 4+2 + 2 = 16
	2024	*
	2025	* eg: RS/6000 AIX :-
	2026	* Massive stack frame per call, massive program stack size
	2027	* net effect - room for approx 200 levels before core dump!
	2028	*
	2029	* In addition, we can consume additional stack for every ETYPE_SSUB
	2030	* (and ETYPE_ESUB). Guessing this requirement to be approx 100 bytes,
	2031	* we place an arbitrary limit of 20 (ie: 2KB) on subexpressions in an ERE.
	2032	* This limit was enforced earlier during match tree parsing.
	2033	*
	2034	*/
	2035
	2036	typedef struct substruct SUBS;
	2037	struct substruct
	2038	{
	2039	int n_spans;
	2040	ERE_SPAN spans[MAX_SPANS];
	2041	SUBS *next;
	2042	};
	2043
	2044	#define MAX_SUBS 20
	2045
	2046	typedef struct
	2047	{
	2048	FSM *fsm;
	2049	int eremf;
	2050	const char *str_init;
	2051	const char *str_limit;
	2052	const char *str_best;
	2053	SUBS *subs;
	2054	SUBS *subs_base;
	2055	ERE_MATCHINFO *mi;
	2056	}
	2057	CONTEXT;
	2058
	2059	#define NR
	2060
[222]	2061	STATIC void NR walk_fsm(const char str, int state_no, CONTEXT cx);
[155]	2062
[222]	2063	STATIC void NR walk_fsm_gated(const char str, CONTEXT cx, EDGE * e)
[155]	2064	{
	2065	if (e->gate != str)
	2066	// Avoid looping via this edge
	2067	{
	2068	const char *gate = e->gate;
	2069
	2070	e->gate = str;
	2071	walk_fsm(str, e->to_state, cx);
	2072	e->gate = gate;
	2073	}
	2074	}
	2075
[222]	2076	STATIC void NR walk_fsm_ssub(const char str, CONTEXT cx, EDGE * e)
[155]	2077	{
	2078	SUBS subs;
	2079
	2080	if (cx->subs->n_spans < MAX_SPANS)
	2081	cx->subs->spans[cx->subs->n_spans].pos = str - cx->str_init;
	2082
	2083	subs.n_spans = 0;
	2084	subs.next = cx->subs;
	2085	cx->subs = &subs;
	2086	walk_fsm_gated(str, cx, e);
	2087	cx->subs = subs.next;
	2088	}
	2089
[222]	2090	STATIC void NR walk_fsm_esub(const char str, CONTEXT cx, EDGE * e)
[155]	2091	{
	2092	SUBS *subs = cx->subs;
	2093
	2094	cx->subs = cx->subs->next;
	2095
	2096	if (cx->subs->n_spans < MAX_SPANS)
	2097	cx->subs->spans[cx->subs->n_spans].len =
	2098	(str - cx->str_init) - cx->subs->spans[cx->subs->n_spans].pos;
	2099	++(cx->subs->n_spans);
	2100
	2101	walk_fsm_gated(str, cx, e);
	2102
	2103	--(cx->subs->n_spans);
	2104	cx->subs = subs;
	2105	}
	2106
[222]	2107	STATIC void NR walk_fsm(const char str, int state_no, CONTEXT cx)
[155]	2108	{
	2109	int edge_no;
	2110
	2111	if (cx->fsm->state_flags[state_no] & FLAG_FINISH)
	2112	// Got to finishing state, may have got a better match than before
	2113	{
	2114	if ((cx->str_best == NULL \|\|
	2115	(cx->str_best < str) == (cx->eremf & EREMF_SHORTEST) == 0) &&
	2116	str <= cx->str_limit)
	2117	{
	2118	cx->str_best = str;
	2119	if (cx->mi != NULL)
	2120	{
	2121	int i;
	2122
	2123	cx->mi->n_spans = cx->subs_base->n_spans;
	2124	for (i = 0; i < cx->mi->n_spans; i++)
	2125	cx->mi->spans[i] = cx->subs_base->spans[i];
	2126	}
	2127	}
	2128	if (cx->eremf & EREMF_ANY)
	2129	return;
	2130	// Continue, as may be able to get a better match
	2131	}
	2132
	2133	for (edge_no = cx->fsm->state_first_edges[state_no];
	2134	edge_no != -1;
	2135	edge_no = cx->fsm->edges[edge_no].next_edge)
	2136	// Consider taking a step along an edge to a new state
	2137	{
	2138	EDGE *e = &(cx->fsm->edges[edge_no]);
	2139
	2140	switch (e->etype)
	2141	{
	2142	/...sETYPE_CHAR \45\ if matches character\44\ we can advance:24: /
	2143	case ETYPE_CHAR:
	2144	if (*str == e->u.character)
	2145	walk_fsm(str + 1, e->to_state, cx);
	2146	break;
	2147
	2148	/...sETYPE_NCHAR \45\ if not matches character\44\ we can advance:24: /
	2149	case ETYPE_NCHAR:
	2150	if (str != '\0' && str != e->u.character)
	2151	walk_fsm(str + 1, e->to_state, cx);
	2152	break;
	2153
	2154	/...sETYPE_STRING \45\ if matches string\44\ we can advance:24: /
	2155	case ETYPE_STRING:
	2156	{
	2157	unsigned len = (unsigned char)e->u.string[0];
	2158
	2159	if (!memcmp(str, e->u.string + 1, len))
	2160	walk_fsm(str + len, e->to_state, cx);
	2161	}
	2162	break;
	2163
	2164	/...sETYPE_DOT \45\ if got any character\44\ we can advance:24: /
	2165	case ETYPE_DOT:
	2166	if (*str != '\0')
	2167	walk_fsm(str + 1, e->to_state, cx);
	2168	break;
	2169
	2170	/...sETYPE_WORD \45\ if got word constituent\44\ we can advance:24: /
	2171	case ETYPE_WORD:
	2172	if (str != '\0' && isword(str))
	2173	walk_fsm(str + 1, e->to_state, cx);
	2174	break;
	2175
	2176	/...sETYPE_NWORD \45\ if got non word constituent\44\ we can advance:24: /
	2177	case ETYPE_NWORD:
	2178	if (str != '\0' && !isword(str))
	2179	walk_fsm(str + 1, e->to_state, cx);
	2180	break;
	2181
	2182	/...sETYPE_CCLASS \45\ if in the class\44\ we can advance:24: /
	2183	case ETYPE_CCLASS:
	2184	if (match_cclass(*str, e->u.cclass))
	2185	walk_fsm(str + 1, e->to_state, cx);
	2186	break;
	2187
	2188	/...sETYPE_SOL\47\EOL\47\SOW\47\EOW\47\IW\47\EW \45\ special epsilon moves:24: /
	2189	case ETYPE_SOL:
	2190	if (str == cx->str_init)
	2191	walk_fsm_gated(str, cx, e);
	2192	break;
	2193	case ETYPE_EOL:
	2194	if (*str == '\0')
	2195	walk_fsm_gated(str, cx, e);
	2196	break;
	2197	case ETYPE_SOW:
	2198	if (isword(str[0]) &&
	2199	((cx->str_init < str && !isword(str[-1])) \|\|
	2200	(cx->str_init == str)))
	2201	walk_fsm_gated(str, cx, e);
	2202	break;
	2203	case ETYPE_EOW:
	2204	if ((cx->str_init < str && isword(str[-1])) &&
	2205	(str[0] == '\0' \|\| !isword(str[0])))
	2206	walk_fsm_gated(str, cx, e);
	2207	break;
	2208	case ETYPE_IW:
	2209	if (cx->str_init < str && isword(str[-1]) &&
	2210	str[0] != '\0' && isword(str[0]))
	2211	walk_fsm_gated(str, cx, e);
	2212	break;
	2213	case ETYPE_EW:
	2214	if (isword(str[0]) &&
	2215	((cx->str_init < str && !isword(str[-1])) \|\|
	2216	(cx->str_init == str)))
	2217	walk_fsm_gated(str, cx, e);
	2218	else if ((cx->str_init < str && isword(str[-1])) &&
	2219	(str[0] == '\0' \|\| !isword(str[0])))
	2220	walk_fsm_gated(str, cx, e);
	2221	break;
	2222
	2223	/...sETYPE_SSUB\47\ESUB \45\ handle nested subexpression:24: /
	2224	case ETYPE_SSUB:
	2225	walk_fsm_ssub(str, cx, e);
	2226	break;
	2227	case ETYPE_ESUB:
	2228	walk_fsm_esub(str, cx, e);
	2229	break;
	2230
	2231	/...sETYPE_BACK \45\ check backreference:24: /
	2232	case ETYPE_BACK:
	2233	if (e->u.n_span < cx->subs->n_spans)
	2234	{
	2235	int len = cx->subs->spans[e->u.n_span].len;
	2236
	2237	if (!memcmp(str, cx->str_init + cx->subs->spans[e->u.n_span].pos, len))
	2238	walk_fsm_gated(str + len, cx, e);
	2239	}
	2240	break;
	2241
	2242	}
	2243	}
	2244	}
	2245
[222]	2246	STATIC const char match_fsm(FSM fsm,
[155]	2247	int eremf,
	2248	const char *str,
	2249	int posn,
	2250	int limit,
	2251	int state_no,
	2252	ERE_MATCHINFO * mi)
	2253	{
	2254	CONTEXT cx;
	2255	SUBS subs;
	2256
	2257	cx.fsm = fsm;
	2258	cx.eremf = eremf;
	2259	cx.str_init = str;
	2260	cx.str_limit = str + limit;
	2261	cx.str_best = NULL;
	2262	cx.subs = &subs;
	2263	cx.subs_base = &subs;
	2264	cx.mi = mi;
	2265	subs.n_spans = 0;
	2266	walk_fsm(str + posn, state_no, &cx);
	2267	return cx.str_best;
	2268	}
	2269
	2270
	2271	#ifdef DEBUG
	2272	/...sprint_fsm:0: /
[222]	2273	STATIC void print_fsm(FSM * fsm, int s, BOOLEAN not_just_reachable)
[155]	2274	{
	2275	int state_no, edge_no;
	2276
	2277	printf("Starting state %02d\n", s);
	2278	for (state_no = 0; state_no < fsm->n_states; state_no++)
	2279	if ((fsm->state_flags[state_no] & FLAG_REACHABLE) != 0 \|\|
	2280	not_just_reachable)
	2281	{
	2282	printf("%02d:%c\t", state_no, ((fsm->state_flags[state_no] & FLAG_FINISH) != 0) ? 'F' : ' ');
	2283	for (edge_no = fsm->state_first_edges[state_no];
	2284	edge_no != -1;
	2285	edge_no = fsm->edges[edge_no].next_edge)
	2286	/...sshow edge:32: /
	2287	{
	2288	EDGE *e = &(fsm->edges[edge_no]);
	2289
	2290	switch (e->etype)
	2291	{
	2292	case ETYPE_CHAR:
	2293	printf("%c", e->u.character);
	2294	break;
	2295	case ETYPE_NCHAR:
	2296	printf("~%c", e->u.character);
	2297	break;
	2298	case ETYPE_STRING:
	2299	printf("%.s",
	2300	(unsigned char)e->u.string[0],
	2301	(unsigned char)e->u.string[0],
	2302	e->u.string + 1);
	2303	break;
	2304	case ETYPE_DOT:
	2305	printf(".");
	2306	break;
	2307	case ETYPE_CCLASS:
	2308	printf("[");
	2309	break;
	2310	case ETYPE_WORD:
	2311	printf("\\w");
	2312	break;
	2313	case ETYPE_NWORD:
	2314	printf("\\W");
	2315	break;
	2316	case ETYPE_EPSILON:
	2317	printf("e");
	2318	break;
	2319	case ETYPE_SOL:
	2320	printf("^");
	2321	break;
	2322	case ETYPE_EOL:
	2323	printf("$");
	2324	break;
	2325	case ETYPE_SOW:
	2326	printf("\\<");
	2327	break;
	2328	case ETYPE_EOW:
	2329	printf("\\>");
	2330	break;
	2331	case ETYPE_IW:
	2332	printf("\\B");
	2333	break;
	2334	case ETYPE_EW:
	2335	printf("\\y");
	2336	break;
	2337	case ETYPE_SSUB:
	2338	printf("(");
	2339	break;
	2340	case ETYPE_ESUB:
	2341	printf(")");
	2342	break;
	2343	case ETYPE_BACK:
	2344	printf("\\%d", e->u.n_span + 1);
	2345	break;
	2346	}
	2347	printf("->%02d\t", e->to_state);
	2348	}
	2349
	2350	printf("\n");
	2351	}
	2352	}
	2353
	2354	#endif
	2355
	2356	/...sextended regular expressions:0: /
	2357	/* An ERE knows its original match tree and also the FSM it is compiled into.
	2358	* Using a (largely epsilon move free) FSM makes for faster searching. */
	2359
	2360	typedef struct
	2361	{
	2362	MATCH *match; // Parse tree for expression
	2363	int shortest_match; // Shortest match possible
	2364	FSM *fsm; // Compiled FSM
	2365	int s; // Start state for FSM
	2366	}
	2367	ERE;
	2368
	2369	/*
	2370	*@@ rxpCompile:
	2371	* compiles the regular expression str for later matching.
	2372	*
	2373	* If ERECF_TOLOWER is passed with erecf, every character
	2374	* (or range of characters) to be matched are stored in the
	2375	* compiled ERE in lower case. Therefore, if strings to be
	2376	* matched are passed in lower case also, the result is a
	2377	* case-insensitive match.
	2378	*/
	2379
	2380	ERE* rxpCompile(const char *str,
	2381	int erecf,
	2382	int *rc) // out: error code
	2383	{
	2384	ERE *ere;
	2385	const char *str_after;
	2386	FSM *fsm;
	2387	int s, f;
	2388
	2389	*rc = NO_ERROR;
	2390
	2391	if ((ere = (ERE *) malloc(sizeof(ERE))) == NULL)
	2392	{
	2393	*rc = ERROR_NOT_ENOUGH_MEMORY;
	2394	return NULL;
	2395	}
	2396
	2397	if ((ere->match = compile_match(str, &str_after, erecf, rc)) == NULL)
	2398	{
	2399	free(ere);
	2400	return NULL;
	2401	}
	2402
	2403	if (thisch(str_after) == CH_RPAR)
	2404	{
	2405	delete_match(ere->match);
	2406	free(ere);
	2407	*rc = EREE_UNEX_RPAR;
	2408	return NULL;
	2409	}
	2410
	2411	if (count_sub(ere->match) > MAX_SUBS)
	2412	{
	2413	delete_match(ere->match);
	2414	free(ere);
	2415	*rc = EREE_TOO_MANY_SUB;
	2416	return NULL;
	2417	}
	2418
	2419	#ifdef DEBUG
	2420	print_tree(ere->match, 0);
	2421	#endif
	2422
	2423	ere->shortest_match = (int)shortest_match(ere->match);
	2424
	2425	if ((fsm = create_fsm(rc)) == NULL)
	2426	{
	2427	delete_match(ere->match);
	2428	free(ere);
	2429	return NULL;
	2430	}
	2431
	2432	if (!make_fsm_from_match(ere->match, fsm, &s, &f))
	2433	{
	2434	delete_fsm(fsm);
	2435	delete_match(ere->match);
	2436	free(ere);
	2437	*rc = EREE_COMPILE_FSM;
	2438	return NULL;
	2439	}
	2440
	2441	#ifdef DEBUG
	2442	print_fsm(fsm, s, TRUE);
	2443	#endif
	2444
	2445	if ((ere->fsm = create_fsm(rc)) == NULL)
	2446	{
	2447	delete_fsm(fsm);
	2448	delete_match(ere->match);
	2449	free(ere);
	2450	*rc = EREE_COMPILE_FSM;
	2451	return NULL;
	2452	}
	2453
	2454	if (!remove_epsilons(s, f, fsm, ere->fsm))
	2455	{
	2456	delete_fsm(ere->fsm);
	2457	delete_fsm(fsm);
	2458	delete_match(ere->match);
	2459	free(ere);
	2460	*rc = EREE_COMPILE_FSM;
	2461	return NULL;
	2462	}
	2463
	2464	delete_fsm(fsm);
	2465
	2466	ere->s = s;
	2467
	2468	#ifdef DEBUG
	2469	print_fsm(ere->fsm, s, FALSE);
	2470	#endif
	2471
	2472	return ere;
	2473	}
	2474
	2475	/*
	2476	*@@ rxpMinLen:
	2477	*
	2478	*/
	2479
	2480	int rxpMinLen(const ERE * ere)
	2481	{
	2482	return ere->shortest_match;
	2483	}
	2484
	2485	/*
	2486	*@@ rxpMatch:
	2487	* returns the number of characters in the match, starting from
	2488	* pos characters into the string to be searched. Details of
	2489	* sub-matches can also be returend. Returns -1 if no match.
	2490	*
	2491	* If EREMF_SHORTEST is passed with eremf, the code looks for
	2492	* the shortest match, instead of the longest match.
	2493	*
	2494	* If EREMF_ANY is passed with eremf, the code doesn't try to
	2495	* find the longest (or shortest) match, it will return with the
	2496	* first match it finds (which could be of any length).
	2497	* This can speed up matching.
	2498	*/
	2499
	2500	int rxpMatch(const ERE * ere,
	2501	int eremf,
	2502	const char *str,
	2503	int pos,
	2504	ERE_MATCHINFO * mi)
	2505	{
	2506	int len = pos + strlen(str + pos);
	2507	const char *str_best;
	2508
	2509	if ((str_best = match_fsm(ere->fsm, eremf, str, pos, len, ere->s, mi)) == NULL)
	2510	return -1;
	2511	return (str_best - str) - pos;
	2512	}
	2513
	2514	/*
	2515	*@@ rxpMatch_fwd:
	2516	* match forwards within a string from a specified start position.
	2517	*
	2518	* If a match, return TRUE, and also return pos and len of the match.
	2519	*
	2520	* If EREMF_SHORTEST is passed with eremf, the code looks for
	2521	* the shortest match, instead of the longest match.
	2522	*
	2523	* If EREMF_ANY is passed with eremf, the code doesn't try to
	2524	* find the longest (or shortest) match, it will return with the
	2525	* first match it finds (which could be of any length).
	2526	* This can speed up matching.
	2527	*/
	2528
[178]	2529	BOOLEAN rxpMatch_fwd(const ERE *ere, // in: compiled ERE (from rxpCompile)
	2530	int eremf, // in: EREMF_* flags
[155]	2531	const char *str, // in: string to test
	2532	int pos, // in: start position
	2533	int *pos_match, // out: position of match
	2534	int *len_match, // out: length of match
[178]	2535	ERE_MATCHINFO *mi) // out: match info (for rxpSubsWith)
[155]	2536	{
	2537	int len = pos + strlen(str + pos);
	2538	int i;
	2539
	2540	for (i = pos; i <= len - ere->shortest_match; i++)
	2541	{
	2542	const char *str_best;
	2543
	2544	if ((str_best = match_fsm(ere->fsm, eremf, str, i, len, ere->s, mi)) != NULL)
	2545	{
	2546	*pos_match = i;
	2547	*len_match = (str_best - str) - i;
	2548	return TRUE;
	2549	}
	2550	}
	2551	return FALSE;
	2552	}
	2553
	2554	/*
	2555	*@@ rxpMatch_bwd:
	2556	* match backwards within a string not passing a
	2557	* specified end position.
	2558	*
	2559	* If a match, return TRUE, and also return pos and
	2560	* len of the match. We need to consider matches from
	2561	* the beginning of the line. We want the one which
	2562	* ends up in the rightmost position. Of those which
	2563	* end up equally far right, we want the one which
	2564	* extends the furthest (or shortest if EREMF_SHORTEST)
	2565	* left. See how we get this as a side effect of the
	2566	* loop ordering and the '>= + delta' test.
	2567	*
	2568	* This may not look as efficient as scanning the string
	2569	* backwards, but note that this would require a reversed
	2570	* ERE too, and we can't reverse EREs as they may
	2571	* contain backreferences.
	2572	*
	2573	* If EREMF_SHORTEST is passed with eremf, the code looks for
	2574	* the shortest match, instead of the longest match.
	2575	*
	2576	* If EREMF_ANY is passed with eremf, the code doesn't try to
	2577	* find the longest (or shortest) match, it will return with the
	2578	* first match it finds (which could be of any length).
	2579	* This can speed up matching.
	2580	*/
	2581
[178]	2582	BOOLEAN rxpMatch_bwd(const ERE *ere, // in: compiled ERE (from rxpCompile)
	2583	int eremf, // in: EREMF_* flags
	2584	const char *str, // in: string to test
	2585	int pos, // in: start position
	2586	int *pos_match, // out: position of match
	2587	int *len_match, // out: length of match
	2588	ERE_MATCHINFO * mi) // out: match info (for rxpSubsWith)
[155]	2589	{
	2590	int i;
	2591	int delta = (eremf & EREMF_SHORTEST) ? 0 : 1;
	2592	const char *rightmost = NULL;
	2593	ERE_MATCHINFO mi2;
	2594
	2595	for (i = 0; i <= pos - ere->shortest_match; i++)
	2596	{
	2597	const char *str_best;
	2598
	2599	if ((str_best = match_fsm(ere->fsm, eremf, str, i, pos, ere->s, &mi2)) != NULL)
	2600	{
	2601	if (rightmost == NULL \|\|
	2602	str_best >= rightmost + delta)
	2603	{
	2604	*pos_match = i;
	2605	*len_match = (str_best - str) - i;
	2606	rightmost = str_best;
	2607	if (mi != NULL)
	2608	{
	2609	mi->n_spans = mi2.n_spans;
	2610	for (i = 0; i < mi->n_spans; i++)
	2611	mi->spans[i] = mi2.spans[i];
	2612	}
	2613	}
	2614	}
	2615	}
	2616	return rightmost != NULL;
	2617	}
	2618
	2619	/*
	2620	*@@ rxpFree:
	2621	* frees all resources allocated by rxpCompile.
	2622	*/
	2623
	2624	void rxpFree(ERE * ere)
	2625	{
	2626	if (ere)
	2627	{
	2628	delete_match(ere->match);
	2629	delete_fsm(ere->fsm);
	2630	free(ere);
	2631	}
	2632	}
	2633
	2634	/*
	2635	*@@ rxpSubsWith:
	2636	* perform a substitution based upon an earlier found match.
[178]	2637	* This allows for implementing a "find and replace" function.
[155]	2638	*/
	2639
[178]	2640	BOOLEAN rxpSubsWith(const char *str, // in: original string searched (same as str given to rxpMatch_fwd)
	2641	int pos, // in: span of the entire match (pos_match from rxpMatch_fwd)
	2642	int len, // in: span of the entire match (len_match from rxpMatch_fwd)
	2643	ERE_MATCHINFO *mi, // in: details of match sub-spans (as from rxpMatch_fwd)
	2644	const char *with, // in: replacement string with \1 etc.
	2645	char *out, // out: buffer for string substitutions
	2646	int len_out, // in: sizeof *out
	2647	int *rc) // out: error, if FALSE returned
[155]	2648	{
	2649	int i = 0;
	2650	int j;
	2651
	2652	memcpy(out, str, pos);
	2653	i += pos;
	2654	while (*with != '\0')
	2655	{
	2656	const char *rep;
	2657	int len_rep;
	2658
	2659	if (*with != '\\')
	2660	{
	2661	rep = with++;
	2662	len_rep = 1;
	2663	}
	2664	else
	2665	{
	2666	++with;
	2667	if (with >= '1' && with <= '9')
	2668	{
	2669	int span = *with - '1';
	2670
	2671	++with;
	2672	if (span >= mi->n_spans)
	2673	{
	2674	*rc = EREE_BAD_BACKREF;
	2675	return FALSE;
	2676	}
	2677	rep = str + mi->spans[span].pos;
	2678	len_rep = mi->spans[span].len;
	2679	}
	2680	else if (*with != '\0')
	2681	{
	2682	rep = with++;
	2683	len_rep = 1;
	2684	}
	2685	else
	2686	{
	2687	*rc = EREE_BAD_BACKSLASH;
	2688	return FALSE;
	2689	}
	2690	}
	2691	if (i + len_rep > len_out)
	2692	{
	2693	*rc = EREE_SUBS_LEN;
	2694	return FALSE;
	2695	}
	2696	memcpy(out + i, rep, len_rep);
	2697	i += len_rep;
	2698	}
	2699	j = pos + len + strlen(str + pos + len);
	2700	if (i + j > len_out)
	2701	{
	2702	*rc = EREE_SUBS_LEN;
	2703	return FALSE;
	2704	}
	2705	memcpy(out + i, str + pos + len, j);
	2706	i += j;
	2707	out[i] = '\0';
	2708	return TRUE;
	2709	}
	2710
	2711	#ifdef __TESTCASE__
	2712
	2713	int main(int argc, char *argv[])
	2714	{
	2715	ERE *ere;
	2716	ERE_MATCHINFO mi;
	2717	int rc;
	2718
	2719	const char pcsz, pcszEre;
	2720
	2721	if (argc != 3)
	2722	{
	2723	printf("Usage: regexp <teststring> <ere>\n");
	2724	exit(1);
	2725	}
	2726
	2727	pcsz = argv[1];
	2728	pcszEre = argv[2];
	2729
	2730	printf("matching \"%s\" against \"%s\"\n",
	2731	pcsz,
	2732	pcszEre);
	2733	fflush(stdout);
	2734
	2735	if (!(ere = rxpCompile(pcszEre,
	2736	0,
	2737	&rc)))
	2738	{
	2739	printf("Error %d in rxpCompile: %s\n", rc, rxpError(rc));
	2740	exit(rc);
	2741	}
	2742
	2743	{
	2744	int pos, length;
	2745	rc = rxpMatch_fwd(ere,
	2746	0,
	2747	pcsz,
	2748	0,
	2749	&pos,
	2750	&length,
	2751	&mi);
	2752
	2753	if (rc == 0)
	2754	printf("no match\n");
	2755	else
	2756	printf("found at pos %d, length %d\n", pos, length);
	2757	}
	2758
	2759	return 0;
	2760	}
	2761
	2762	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/helpers/regexp.c

Download in other formats: