source: trunk/essentials/sys-apps/findutils/lib/regexprops.c

Last change on this file was 3170, checked in by bird, 18 years ago

findutils 4.3.2

File size: 11.9 KB
Line 
1/* regexprops.c -- document the properties of the regular expressions
2 understood by gnulib.
3
4 Copyright 2005 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20/* Written by James Youngman, <jay@gnu.org>. */
21
22#if HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#include <stdio.h>
27#include <unistd.h>
28#include <errno.h>
29
30#include "regex.h"
31#include "regextype.h"
32
33
34/* Name this program was run with. */
35char *program_name;
36
37static void output(const char *s, int escape)
38{
39 (void) escape;
40
41 fputs(s, stdout);
42}
43
44
45static void newline(void)
46{
47 output("\n", 0);
48}
49
50static void content(const char *s)
51{
52 output(s, 1);
53}
54
55static void literal(const char *s)
56{
57 output(s, 0);
58}
59
60static void directive(const char *s)
61{
62 output(s, 0);
63}
64
65static void enum_item(const char *s)
66{
67 newline();
68 directive("@item ");
69 literal(s);
70 newline();
71}
72
73static void begin_subsection(const char *name,
74 const char *next,
75 const char *prev,
76 const char *up)
77{
78 (void) next;
79 (void) prev;
80 (void) up;
81
82 newline();
83
84 directive("@node ");
85 content(name);
86 content(" regular expression syntax");
87 newline();
88
89 directive("@subsection ");
90 output("@samp{", 0);
91 content(name);
92 output("}", 0);
93 content(" regular expression syntax");
94 newline();
95}
96
97static void begintable_markup(char const *markup)
98{
99 newline();
100 directive("@table ");
101 literal(markup);
102 newline();
103}
104
105static void endtable()
106{
107 newline();
108 directive("@end table");
109 newline();
110}
111
112static void beginenum()
113{
114 newline();
115 directive("@enumerate");
116 newline();
117}
118
119static void endenum()
120{
121 newline();
122 directive("@end enumerate");
123 newline();
124}
125
126static void newpara()
127{
128 content("\n\n");
129}
130
131
132static void
133describe_regex_syntax(int options)
134{
135 newpara();
136 content("The character @samp{.} matches any single character");
137 if ( (options & RE_DOT_NEWLINE) == 0 )
138 {
139 content(" except newline");
140 }
141 if (options & RE_DOT_NOT_NULL)
142 {
143 if ( (options & RE_DOT_NEWLINE) == 0 )
144 content(" and");
145 else
146 content(" except");
147
148 content(" the null character");
149 }
150 content(". ");
151 newpara();
152
153 if (!(options & RE_LIMITED_OPS))
154 {
155 begintable_markup("@samp");
156 if (options & RE_BK_PLUS_QM)
157 {
158 enum_item("\\+");
159 content("indicates that the regular expression should match one"
160 " or more occurrences of the previous atom or regexp. ");
161 enum_item("\\?");
162 content("indicates that the regular expression should match zero"
163 " or one occurrence of the previous atom or regexp. ");
164 enum_item("+ and ? ");
165 content("match themselves. ");
166 }
167 else
168 {
169 enum_item("+");
170 content("indicates that the regular expression should match one"
171 " or more occurrences of the previous atom or regexp. ");
172 enum_item("?");
173 content("indicates that the regular expression should match zero"
174 " or one occurrence of the previous atom or regexp. ");
175 enum_item("\\+");
176 literal("matches a @samp{+}");
177 enum_item("\\?");
178 literal("matches a @samp{?}. ");
179 }
180 endtable();
181 }
182
183 newpara();
184
185 content("Bracket expressions are used to match ranges of characters. ");
186 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
187 if (options & RE_NO_EMPTY_RANGES)
188 content("invalid");
189 else
190 content("ignored");
191 content(". ");
192
193 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
194 literal("Within square brackets, @samp{\\} can be used to quote "
195 "the following character. ");
196 else
197 literal("Within square brackets, @samp{\\} is taken literally. ");
198
199 if (options & RE_CHAR_CLASSES)
200 content("Character classes are supported; for example "
201 "@samp{[[:digit:]]} will match a single decimal digit. ");
202 else
203 literal("Character classes are not supported, so for example "
204 "you would need to use @samp{[0-9]} "
205 "instead of @samp{[[:digit:]]}. ");
206
207 if (options & RE_HAT_LISTS_NOT_NEWLINE)
208 {
209 literal("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
210 }
211 newpara();
212 if (options & RE_NO_GNU_OPS)
213 {
214 content("GNU extensions are not supported and so "
215 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
216 "match "
217 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
218 }
219 else
220 {
221 content("GNU extensions are supported:");
222 beginenum();
223 enum_item("@samp{\\w} matches a character within a word");
224 enum_item("@samp{\\W} matches a character which is not within a word");
225 enum_item("@samp{\\<} matches the beginning of a word");
226 enum_item("@samp{\\>} matches the end of a word");
227 enum_item("@samp{\\b} matches a word boundary");
228 enum_item("@samp{\\B} matches characters which are not a word boundary");
229 enum_item("@samp{\\`} matches the beginning of the whole input");
230 enum_item("@samp{\\'} matches the end of the whole input");
231 endenum();
232 }
233
234 newpara();
235
236
237 if (options & RE_NO_BK_PARENS)
238 {
239 literal("Grouping is performed with parentheses @samp{()}. ");
240
241 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
242 literal("An unmatched @samp{)} matches just itself. ");
243 }
244 else
245 {
246 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
247 }
248
249 if (options & RE_NO_BK_REFS)
250 {
251 content("A backslash followed by a digit matches that digit. ");
252 }
253 else
254 {
255 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
256 if (options & RE_NO_BK_PARENS)
257 literal("@samp{(}");
258 else
259 literal("@samp{\\(}");
260 content(". ");
261 }
262
263
264 newpara();
265 if (!(options & RE_LIMITED_OPS))
266 {
267 if (options & RE_NO_BK_VBAR)
268 literal("The alternation operator is @samp{|}. ");
269 else
270 literal("The alternation operator is @samp{\\|}. ");
271 }
272 newpara();
273
274 if (options & RE_CONTEXT_INDEP_ANCHORS)
275 {
276 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
277 }
278 else
279 {
280 literal("The character @samp{^} only represents the beginning of a string when it appears:");
281 beginenum();
282 enum_item("\nAt the beginning of a regular expression");
283 enum_item("After an open-group, signified by ");
284 if (options & RE_NO_BK_PARENS)
285 {
286 literal("@samp{(}");
287 }
288 else
289 {
290 literal("@samp{\\(}");
291 }
292 newline();
293 if (!(options & RE_LIMITED_OPS))
294 {
295 if (options & RE_NEWLINE_ALT)
296 enum_item("After a newline");
297
298 if (options & RE_NO_BK_VBAR )
299 enum_item("After the alternation operator @samp{|}");
300 else
301 enum_item("After the alternation operator @samp{\\|}");
302 }
303 endenum();
304
305 newpara();
306 literal("The character @samp{$} only represents the end of a string when it appears:");
307 beginenum();
308 enum_item("At the end of a regular expression");
309 enum_item("Before an close-group, signified by ");
310 if (options & RE_NO_BK_PARENS)
311 {
312 literal("@samp{)}");
313 }
314 else
315 {
316 literal("@samp{\\)}");
317 }
318 if (!(options & RE_LIMITED_OPS))
319 {
320 if (options & RE_NEWLINE_ALT)
321 enum_item("Before a newline");
322
323 if (options & RE_NO_BK_VBAR)
324 enum_item("Before the alternation operator @samp{|}");
325 else
326 enum_item("Before the alternation operator @samp{\\|}");
327 }
328 endenum();
329 }
330 newpara();
331 if (!(options & RE_LIMITED_OPS) )
332 {
333 if ((options & RE_CONTEXT_INDEP_OPS)
334 && !(options & RE_CONTEXT_INVALID_OPS))
335 {
336 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
337 }
338 else
339 {
340 if (options & RE_BK_PLUS_QM)
341 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
342 else
343 literal("@samp{*}, @samp{+} and @samp{?} ");
344
345 if (options & RE_CONTEXT_INVALID_OPS)
346 {
347 content("are special at any point in a regular expression except the following places, where they are not allowed:");
348 }
349 else
350 {
351 content("are special at any point in a regular expression except:");
352 }
353
354 beginenum();
355 enum_item("At the beginning of a regular expression");
356 enum_item("After an open-group, signified by ");
357 if (options & RE_NO_BK_PARENS)
358 {
359 literal("@samp{(}");
360 }
361 else
362 {
363 literal("@samp{\\(}");
364 }
365 if (!(options & RE_LIMITED_OPS))
366 {
367 if (options & RE_NEWLINE_ALT)
368 enum_item("After a newline");
369
370 if (options & RE_NO_BK_VBAR)
371 enum_item("After the alternation operator @samp{|}");
372 else
373 enum_item("After the alternation operator @samp{\\|}");
374 }
375 endenum();
376 }
377 }
378
379
380 newpara();
381 if (options & RE_INTERVALS)
382 {
383 if (options & RE_NO_BK_BRACES)
384 {
385 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
386 if (options & RE_INVALID_INTERVAL_ORD)
387 {
388 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
389 }
390 else
391 {
392 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
393 }
394 }
395 else
396 {
397 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
398 if (options & RE_INVALID_INTERVAL_ORD)
399 {
400 literal("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
401 }
402 else
403 {
404 literal("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
405 }
406 }
407
408 }
409
410 newpara();
411 if (options & RE_NO_POSIX_BACKTRACKING)
412 {
413 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
414 }
415 else
416 {
417 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
418 }
419 newpara();
420}
421
422
423
424static void
425menu(void)
426{
427 int i, options;
428 const char *name;
429
430 output("@menu\n", 0);
431 for (i=0;
432 options = get_regex_type_flags(i),
433 name=get_regex_type_name(i);
434 ++i)
435 {
436 output("* ", 0);
437 output(name, 0);
438 content(" regular expression syntax");
439 output("::", 0);
440 newline();
441 }
442 output("@end menu\n", 0);
443}
444
445
446static void
447describe_all(const char *up)
448{
449 const char *name, *next, *previous;
450 int options;
451 int i, parent;
452
453 menu();
454
455 previous = "";
456
457 for (i=0;
458 options = get_regex_type_flags(i),
459 name=get_regex_type_name(i);
460 ++i)
461 {
462 next = get_regex_type_name(i+1);
463 if (NULL == next)
464 next = "";
465 begin_subsection(name, next, previous, up);
466 parent = get_regex_type_synonym(i);
467 if (parent >= 0)
468 {
469 content("This is a synonym for ");
470 content(get_regex_type_name(parent));
471 content(".");
472 }
473 else
474 {
475 describe_regex_syntax(options);
476 }
477 previous = name;
478 }
479}
480
481
482
483int main (int argc, char *argv[])
484{
485 const char *up = "";
486 program_name = argv[0];
487
488 if (argc > 1)
489 up = argv[1];
490
491 describe_all(up);
492 return 0;
493}
Note: See TracBrowser for help on using the repository browser.