Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

smatch.c

Visit:

Last change on this file was 3231, checked in by bird, 18 years ago
eol style.
Property svn:eol-style set to `native`
File size: 8.8 KB

Line
1	/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2	globbing. */
3
4	/* Copyright (C) 1991-2005 Free Software Foundation, Inc.
5
6	This file is part of GNU Bash, the Bourne Again SHell.
7
8	Bash is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 2, or (at your option) any later
11	version.
12
13	Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License along
19	with Bash; see the file COPYING. If not, write to the Free Software
20	Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
21
22	#include <config.h>
23
24	#include <stdio.h> /* for debugging */
25
26	#include "strmatch.h"
27	#include <chartypes.h>
28
29	#include "bashansi.h"
30	#include "shmbutil.h"
31	#include "xmalloc.h"
32
33	/* First, compile `sm_loop.c' for single-byte characters. */
34	#define CHAR unsigned char
35	#define U_CHAR unsigned char
36	#define XCHAR char
37	#define INT int
38	#define L(CS) CS
39	#define INVALID -1
40
41	#undef STREQ
42	#undef STREQN
43	#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44	#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
46	/* We use strcoll(3) for range comparisons in bracket expressions,
47	even though it can have unwanted side effects in locales
48	other than POSIX or US. For instance, in the de locale, [A-Z] matches
49	all characters. */
50
51	#if defined (HAVE_STRCOLL)
52	/* Helper function for collating symbol equivalence. */
53	static int rangecmp (c1, c2)
54	int c1, c2;
55	{
56	static char s1[2] = { ' ', '\0' };
57	static char s2[2] = { ' ', '\0' };
58	int ret;
59
60	/* Eight bits only. Period. */
61	c1 &= 0xFF;
62	c2 &= 0xFF;
63
64	if (c1 == c2)
65	return (0);
66
67	s1[0] = c1;
68	s2[0] = c2;
69
70	if ((ret = strcoll (s1, s2)) != 0)
71	return ret;
72	return (c1 - c2);
73	}
74	#else /* !HAVE_STRCOLL */
75	# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76	#endif /* !HAVE_STRCOLL */
77
78	#if defined (HAVE_STRCOLL)
79	static int
80	collequiv (c1, c2)
81	int c1, c2;
82	{
83	return (rangecmp (c1, c2) == 0);
84	}
85	#else
86	# define collequiv(c1, c2) ((c1) == (c2))
87	#endif
88
89	#define _COLLSYM _collsym
90	#define __COLLSYM __collsym
91	#define POSIXCOLL posix_collsyms
92	#include "collsyms.h"
93
94	static int
95	collsym (s, len)
96	CHAR *s;
97	int len;
98	{
99	register struct _collsym *csp;
100	char *x;
101
102	x = (char *)s;
103	for (csp = posix_collsyms; csp->name; csp++)
104	{
105	if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
106	return (csp->code);
107	}
108	if (len == 1)
109	return s[0];
110	return INVALID;
111	}
112
113	/* unibyte character classification */
114	#if !defined (isascii) && !defined (HAVE_ISASCII)
115	# define isascii(c) ((unsigned int)(c) <= 0177)
116	#endif
117
118	enum char_class
119	{
120	CC_NO_CLASS = 0,
121	CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
123	};
124
125	static char const *const cclass_name[] =
126	{
127	"",
128	"ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129	"lower", "print", "punct", "space", "upper", "word", "xdigit"
130	};
131
132	#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
133
134	static int
135	is_cclass (c, name)
136	int c;
137	const char *name;
138	{
139	enum char_class char_class = CC_NO_CLASS;
140	int i, result;
141
142	for (i = 1; i < N_CHAR_CLASS; i++)
143	{
144	if (STREQ (name, cclass_name[i]))
145	{
146	char_class = (enum char_class)i;
147	break;
148	}
149	}
150
151	if (char_class == 0)
152	return -1;
153
154	switch (char_class)
155	{
156	case CC_ASCII:
157	result = isascii (c);
158	break;
159	case CC_ALNUM:
160	result = ISALNUM (c);
161	break;
162	case CC_ALPHA:
163	result = ISALPHA (c);
164	break;
165	case CC_BLANK:
166	result = ISBLANK (c);
167	break;
168	case CC_CNTRL:
169	result = ISCNTRL (c);
170	break;
171	case CC_DIGIT:
172	result = ISDIGIT (c);
173	break;
174	case CC_GRAPH:
175	result = ISGRAPH (c);
176	break;
177	case CC_LOWER:
178	result = ISLOWER (c);
179	break;
180	case CC_PRINT:
181	result = ISPRINT (c);
182	break;
183	case CC_PUNCT:
184	result = ISPUNCT (c);
185	break;
186	case CC_SPACE:
187	result = ISSPACE (c);
188	break;
189	case CC_UPPER:
190	result = ISUPPER (c);
191	break;
192	case CC_WORD:
193	result = (ISALNUM (c) \|\| c == '_');
194	break;
195	case CC_XDIGIT:
196	result = ISXDIGIT (c);
197	break;
198	default:
199	result = -1;
200	break;
201	}
202
203	return result;
204	}
205
206	/* Now include `sm_loop.c' for single-byte characters. */
207	/* The result of FOLD is an `unsigned char' */
208	# define FOLD(c) ((flags & FNM_CASEFOLD) \
209	? TOLOWER ((unsigned char)c) \
210	: ((unsigned char)c))
211
212	#define FCT internal_strmatch
213	#define GMATCH gmatch
214	#define COLLSYM collsym
215	#define PARSE_COLLSYM parse_collsym
216	#define BRACKMATCH brackmatch
217	#define PATSCAN patscan
218	#define STRCOMPARE strcompare
219	#define EXTMATCH extmatch
220	#define STRCHR(S, C) strchr((S), (C))
221	#define STRCOLL(S1, S2) strcoll((S1), (S2))
222	#define STRLEN(S) strlen(S)
223	#define STRCMP(S1, S2) strcmp((S1), (S2))
224	#define RANGECMP(C1, C2) rangecmp((C1), (C2))
225	#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226	#define CTYPE_T enum char_class
227	#define IS_CCLASS(C, S) is_cclass((C), (S))
228	#include "sm_loop.c"
229
230	#if HANDLE_MULTIBYTE
231
232	# define CHAR wchar_t
233	# define U_CHAR wint_t
234	# define XCHAR wchar_t
235	# define INT wint_t
236	# define L(CS) L##CS
237	# define INVALID WEOF
238
239	# undef STREQ
240	# undef STREQN
241	# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242	# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
243
244	static int
245	rangecmp_wc (c1, c2)
246	wint_t c1, c2;
247	{
248	static wchar_t s1[2] = { L' ', L'\0' };
249	static wchar_t s2[2] = { L' ', L'\0' };
250	int ret;
251
252	if (c1 == c2)
253	return 0;
254
255	s1[0] = c1;
256	s2[0] = c2;
257
258	return (wcscoll (s1, s2));
259	}
260
261	static int
262	collequiv_wc (c, equiv)
263	wint_t c, equiv;
264	{
265	return (!(c - equiv));
266	}
267
268	/* Helper function for collating symbol. */
269	# define _COLLSYM _collwcsym
270	# define __COLLSYM __collwcsym
271	# define POSIXCOLL posix_collwcsyms
272	# include "collsyms.h"
273
274	static wint_t
275	collwcsym (s, len)
276	wchar_t *s;
277	int len;
278	{
279	register struct _collwcsym *csp;
280
281	for (csp = posix_collwcsyms; csp->name; csp++)
282	{
283	if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
284	return (csp->code);
285	}
286	if (len == 1)
287	return s[0];
288	return INVALID;
289	}
290
291	static int
292	is_wcclass (wc, name)
293	wint_t wc;
294	wchar_t *name;
295	{
296	char *mbs;
297	mbstate_t state;
298	size_t mbslength;
299	wctype_t desc;
300	int want_word;
301
302	if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
303	{
304	int c;
305
306	if ((c = wctob (wc)) == EOF)
307	return 0;
308	else
309	return (c <= 0x7F);
310	}
311
312	want_word = (wcscmp (name, L"word") == 0);
313	if (want_word)
314	name = L"alnum";
315
316	memset (&state, '\0', sizeof (mbstate_t));
317	mbs = (char ) malloc (wcslen(name) MB_CUR_MAX + 1);
318	mbslength = wcsrtombs(mbs, (const wchar_t *)&name, (wcslen(name) MB_CUR_MAX + 1), &state);
319
320	if (mbslength == (size_t)-1 \|\| mbslength == (size_t)-2)
321	{
322	free (mbs);
323	return -1;
324	}
325	desc = wctype (mbs);
326	free (mbs);
327
328	if (desc == (wctype_t)0)
329	return -1;
330
331	if (want_word)
332	return (iswctype (wc, desc) \|\| wc == L'_');
333	else
334	return (iswctype (wc, desc));
335	}
336
337	/* Now include `sm_loop.c' for multibyte characters. */
338	#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
339	#define FCT internal_wstrmatch
340	#define GMATCH gmatch_wc
341	#define COLLSYM collwcsym
342	#define PARSE_COLLSYM parse_collwcsym
343	#define BRACKMATCH brackmatch_wc
344	#define PATSCAN patscan_wc
345	#define STRCOMPARE wscompare
346	#define EXTMATCH extmatch_wc
347	#define STRCHR(S, C) wcschr((S), (C))
348	#define STRCOLL(S1, S2) wcscoll((S1), (S2))
349	#define STRLEN(S) wcslen(S)
350	#define STRCMP(S1, S2) wcscmp((S1), (S2))
351	#define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
352	#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
353	#define CTYPE_T enum char_class
354	#define IS_CCLASS(C, S) is_wcclass((C), (S))
355	#include "sm_loop.c"
356
357	#endif /* HAVE_MULTIBYTE */
358
359	int
360	xstrmatch (pattern, string, flags)
361	char *pattern;
362	char *string;
363	int flags;
364	{
365	#if HANDLE_MULTIBYTE
366	int ret;
367	size_t n;
368	wchar_t wpattern, wstring;
369
370	if (MB_CUR_MAX == 1)
371	return (internal_strmatch ((unsigned char )pattern, (unsigned char )string, flags));
372
373	n = xdupmbstowcs (&wpattern, NULL, pattern);
374	if (n == (size_t)-1 \|\| n == (size_t)-2)
375	return (internal_strmatch ((unsigned char )pattern, (unsigned char )string, flags));
376
377	n = xdupmbstowcs (&wstring, NULL, string);
378	if (n == (size_t)-1 \|\| n == (size_t)-2)
379	{
380	free (wpattern);
381	return (internal_strmatch ((unsigned char )pattern, (unsigned char )string, flags));
382	}
383
384	ret = internal_wstrmatch (wpattern, wstring, flags);
385
386	free (wpattern);
387	free (wstring);
388
389	return ret;
390	#else
391	return (internal_strmatch ((unsigned char )pattern, (unsigned char )string, flags));
392	#endif /* !HANDLE_MULTIBYTE */
393	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/bash/3.1-p17/lib/glob/smatch.c

Download in other formats: