source: vendor/m4/1.4.8/src/input.c

Last change on this file was 3090, checked in by bird, 18 years ago

m4 1.4.8

File size: 30.9 KB
Line 
1/* GNU m4 -- A simple macro processor
2
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006
4 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA
20*/
21
22/* Handling of different input sources, and lexical analysis. */
23
24#include "m4.h"
25
26/* Unread input can be either files, that should be read (eg. included
27 files), strings, which should be rescanned (eg. macro expansion text),
28 or quoted macro definitions (as returned by the builtin "defn").
29 Unread input are organised in a stack, implemented with an obstack.
30 Each input source is described by a "struct input_block". The obstack
31 is "current_input". The top of the input stack is "isp".
32
33 The macro "m4wrap" places the text to be saved on another input
34 stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
35 is seen on normal input (eg, when "current_input" is empty), input is
36 switched over to "wrapup_stack", and the original "current_input" is
37 freed. A new stack is allocated for "wrapup_stack", which will
38 accept any text produced by calls to "m4wrap" from within the
39 wrapped text. This process of shuffling "wrapup_stack" to
40 "current_input" can continue indefinitely, even generating infinite
41 loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
42
43 Pushing new input on the input stack is done by push_file (),
44 push_string (), push_wrapup () (for wrapup text), and push_macro ()
45 (for macro definitions). Because macro expansion needs direct access
46 to the current input obstack (for optimisation), push_string () are
47 split in two functions, push_string_init (), which returns a pointer
48 to the current input stack, and push_string_finish (), which return a
49 pointer to the final text. The input_block *next is used to manage
50 the coordination between the different push routines.
51
52 The current file and line number are stored in two global
53 variables, for use by the error handling functions in m4.c. Macro
54 expansion wants to report the line where a macro name was detected,
55 rather than where it finished collecting arguments. This also
56 applies to text resulting from macro expansions. So each input
57 block maintains its own notion of the current file and line, and
58 swapping between input blocks updates the global variables
59 accordingly. */
60
61#ifdef ENABLE_CHANGEWORD
62#include "regex.h"
63#endif
64
65enum input_type
66{
67 INPUT_STRING, /* String resulting from macro expansion. */
68 INPUT_FILE, /* File from command line or include. */
69 INPUT_MACRO /* Builtin resulting from defn. */
70};
71
72typedef enum input_type input_type;
73
74struct input_block
75{
76 struct input_block *prev; /* previous input_block on the input stack */
77 input_type type; /* see enum values */
78 const char *file; /* file where this input is from */
79 int line; /* line where this input is from */
80 union
81 {
82 struct
83 {
84 char *string; /* remaining string value */
85 }
86 u_s; /* INPUT_STRING */
87 struct
88 {
89 FILE *fp; /* input file handle */
90 bool end : 1; /* true if peek has seen EOF */
91 bool close : 1; /* true if we should close file on pop */
92 bool advance_line : 1; /* track previous start_of_input_line */
93 }
94 u_f; /* INPUT_FILE */
95 builtin_func *func; /* pointer to macro's function */
96 }
97 u;
98};
99
100typedef struct input_block input_block;
101
102
103
104/* Current input file name. */
105const char *current_file;
106
107/* Current input line number. */
108int current_line;
109
110/* Obstack for storing individual tokens. */
111static struct obstack token_stack;
112
113/* Obstack for storing file names. */
114static struct obstack file_names;
115
116/* Wrapup input stack. */
117static struct obstack *wrapup_stack;
118
119/* Current stack, from input or wrapup. */
120static struct obstack *current_input;
121
122/* Bottom of token_stack, for obstack_free. */
123static void *token_bottom;
124
125/* Pointer to top of current_input. */
126static input_block *isp;
127
128/* Pointer to top of wrapup_stack. */
129static input_block *wsp;
130
131/* Aux. for handling split push_string (). */
132static input_block *next;
133
134/* Flag for next_char () to increment current_line. */
135static bool start_of_input_line;
136
137/* Flag for next_char () to recognize change in input block. */
138static bool input_change;
139
140#define CHAR_EOF 256 /* character return on EOF */
141#define CHAR_MACRO 257 /* character return for MACRO token */
142
143/* Quote chars. */
144STRING rquote;
145STRING lquote;
146
147/* Comment chars. */
148STRING bcomm;
149STRING ecomm;
150
151#ifdef ENABLE_CHANGEWORD
152
153# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
154
155static char *word_start;
156static struct re_pattern_buffer word_regexp;
157static int default_word_regexp;
158static struct re_registers regs;
159
160#else /* ! ENABLE_CHANGEWORD */
161# define default_word_regexp 1
162#endif /* ! ENABLE_CHANGEWORD */
163
164#ifdef DEBUG_INPUT
165static const char *token_type_string (token_type);
166#endif
167
168
169
170/*-------------------------------------------------------------------.
171| push_file () pushes an input file on the input stack, saving the |
172| current file name and line number. If next is non-NULL, this push |
173| invalidates a call to push_string_init (), whose storage is |
174| consequently released. If CLOSE, then close FP after EOF is |
175| detected. |
176`-------------------------------------------------------------------*/
177
178void
179push_file (FILE *fp, const char *title, bool close)
180{
181 input_block *i;
182
183 if (next != NULL)
184 {
185 obstack_free (current_input, next);
186 next = NULL;
187 }
188
189 if (debug_level & DEBUG_TRACE_INPUT)
190 DEBUG_MESSAGE1 ("input read from %s", title);
191
192 i = (input_block *) obstack_alloc (current_input,
193 sizeof (struct input_block));
194 i->type = INPUT_FILE;
195 i->file = (char *) obstack_copy0 (&file_names, title, strlen (title));
196 i->line = 1;
197 input_change = true;
198
199 i->u.u_f.fp = fp;
200 i->u.u_f.end = false;
201 i->u.u_f.close = close;
202 i->u.u_f.advance_line = start_of_input_line;
203 output_current_line = -1;
204
205 i->prev = isp;
206 isp = i;
207}
208
209/*---------------------------------------------------------------.
210| push_macro () pushes a builtin macro's definition on the input |
211| stack. If next is non-NULL, this push invalidates a call to |
212| push_string_init (), whose storage is consequently released. |
213`---------------------------------------------------------------*/
214
215void
216push_macro (builtin_func *func)
217{
218 input_block *i;
219
220 if (next != NULL)
221 {
222 obstack_free (current_input, next);
223 next = NULL;
224 }
225
226 i = (input_block *) obstack_alloc (current_input,
227 sizeof (struct input_block));
228 i->type = INPUT_MACRO;
229 i->file = current_file;
230 i->line = current_line;
231 input_change = true;
232
233 i->u.func = func;
234 i->prev = isp;
235 isp = i;
236}
237
238/*------------------------------------------------------------------.
239| First half of push_string (). The pointer next points to the new |
240| input_block. |
241`------------------------------------------------------------------*/
242
243struct obstack *
244push_string_init (void)
245{
246 if (next != NULL)
247 {
248 M4ERROR ((warning_status, 0,
249 "INTERNAL ERROR: recursive push_string!"));
250 abort ();
251 }
252
253 next = (input_block *) obstack_alloc (current_input,
254 sizeof (struct input_block));
255 next->type = INPUT_STRING;
256 next->file = current_file;
257 next->line = current_line;
258
259 return current_input;
260}
261
262/*------------------------------------------------------------------------.
263| Last half of push_string (). If next is now NULL, a call to push_file |
264| () has invalidated the previous call to push_string_init (), so we just |
265| give up. If the new object is void, we do not push it. The function |
266| push_string_finish () returns a pointer to the finished object. This |
267| pointer is only for temporary use, since reading the next token might |
268| release the memory used for the object. |
269`------------------------------------------------------------------------*/
270
271const char *
272push_string_finish (void)
273{
274 const char *ret = NULL;
275
276 if (next == NULL)
277 return NULL;
278
279 if (obstack_object_size (current_input) > 0)
280 {
281 obstack_1grow (current_input, '\0');
282 next->u.u_s.string = (char *) obstack_finish (current_input);
283 next->prev = isp;
284 isp = next;
285 ret = isp->u.u_s.string; /* for immediate use only */
286 input_change = true;
287 }
288 else
289 obstack_free (current_input, next); /* people might leave garbage on it. */
290 next = NULL;
291 return ret;
292}
293
294/*------------------------------------------------------------------.
295| The function push_wrapup () pushes a string on the wrapup stack. |
296| When the normal input stack gets empty, the wrapup stack will |
297| become the input stack, and push_string () and push_file () will |
298| operate on wrapup_stack. Push_wrapup should be done as |
299| push_string (), but this will suffice, as long as arguments to |
300| m4_m4wrap () are moderate in size. |
301`------------------------------------------------------------------*/
302
303void
304push_wrapup (const char *s)
305{
306 input_block *i;
307 i = (input_block *) obstack_alloc (wrapup_stack,
308 sizeof (struct input_block));
309 i->prev = wsp;
310 i->type = INPUT_STRING;
311 i->file = current_file;
312 i->line = current_line;
313 i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, strlen (s));
314 wsp = i;
315}
316
317
318
319/*-------------------------------------------------------------------------.
320| The function pop_input () pops one level of input sources. If the |
321| popped input_block is a file, current_file and current_line are reset to |
322| the saved values before the memory for the input_block are released. |
323`-------------------------------------------------------------------------*/
324
325static void
326pop_input (void)
327{
328 input_block *tmp = isp->prev;
329
330 switch (isp->type)
331 {
332 case INPUT_STRING:
333 case INPUT_MACRO:
334 break;
335
336 case INPUT_FILE:
337 if (debug_level & DEBUG_TRACE_INPUT)
338 {
339 if (tmp)
340 DEBUG_MESSAGE2 ("input reverted to %s, line %d",
341 tmp->file, tmp->line);
342 else
343 DEBUG_MESSAGE ("input exhausted");
344 }
345
346 if (ferror (isp->u.u_f.fp))
347 {
348 M4ERROR ((warning_status, 0, "read error"));
349 if (isp->u.u_f.close)
350 fclose (isp->u.u_f.fp);
351 retcode = EXIT_FAILURE;
352 }
353 else if (isp->u.u_f.close && fclose (isp->u.u_f.fp) == EOF)
354 {
355 M4ERROR ((warning_status, errno, "error reading file"));
356 retcode = EXIT_FAILURE;
357 }
358 start_of_input_line = isp->u.u_f.advance_line;
359 output_current_line = -1;
360 break;
361
362 default:
363 M4ERROR ((warning_status, 0,
364 "INTERNAL ERROR: input stack botch in pop_input ()"));
365 abort ();
366 }
367 obstack_free (current_input, isp);
368 next = NULL; /* might be set in push_string_init () */
369
370 isp = tmp;
371 input_change = true;
372}
373
374/*------------------------------------------------------------------------.
375| To switch input over to the wrapup stack, main () calls pop_wrapup (). |
376| Since wrapup text can install new wrapup text, pop_wrapup () returns |
377| false when there is no wrapup text on the stack, and true otherwise. |
378`------------------------------------------------------------------------*/
379
380bool
381pop_wrapup (void)
382{
383 next = NULL;
384 obstack_free (current_input, NULL);
385 free (current_input);
386
387 if (wsp == NULL)
388 {
389 /* End of the program. Free all memory even though we are about
390 to exit, since it makes leak detection easier. */
391 obstack_free (&token_stack, NULL);
392 obstack_free (&file_names, NULL);
393 obstack_free (wrapup_stack, NULL);
394 free (wrapup_stack);
395 return false;
396 }
397
398 current_input = wrapup_stack;
399 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
400 obstack_init (wrapup_stack);
401
402 isp = wsp;
403 wsp = NULL;
404 input_change = true;
405
406 return true;
407}
408
409/*-------------------------------------------------------------------.
410| When a MACRO token is seen, next_token () uses init_macro_token () |
411| to retrieve the value of the function pointer. |
412`-------------------------------------------------------------------*/
413
414static void
415init_macro_token (token_data *td)
416{
417 if (isp->type != INPUT_MACRO)
418 {
419 M4ERROR ((warning_status, 0,
420 "INTERNAL ERROR: bad call to init_macro_token ()"));
421 abort ();
422 }
423
424 TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
425 TOKEN_DATA_FUNC (td) = isp->u.func;
426}
427
428
429
430/*------------------------------------------------------------------------.
431| Low level input is done a character at a time. The function peek_input |
432| () is used to look at the next character in the input stream. At any |
433| given time, it reads from the input_block on the top of the current |
434| input stack. |
435`------------------------------------------------------------------------*/
436
437static int
438peek_input (void)
439{
440 int ch;
441 input_block *block = isp;
442
443 while (1)
444 {
445 if (block == NULL)
446 return CHAR_EOF;
447
448 switch (block->type)
449 {
450 case INPUT_STRING:
451 ch = to_uchar (block->u.u_s.string[0]);
452 if (ch != '\0')
453 return ch;
454 break;
455
456 case INPUT_FILE:
457 ch = getc (block->u.u_f.fp);
458 if (ch != EOF)
459 {
460 ungetc (ch, block->u.u_f.fp);
461 return ch;
462 }
463 block->u.u_f.end = true;
464 break;
465
466 case INPUT_MACRO:
467 return CHAR_MACRO;
468
469 default:
470 M4ERROR ((warning_status, 0,
471 "INTERNAL ERROR: input stack botch in peek_input ()"));
472 abort ();
473 }
474 block = block->prev;
475 }
476}
477
478/*-------------------------------------------------------------------------.
479| The function next_char () is used to read and advance the input to the |
480| next character. It also manages line numbers for error messages, so |
481| they do not get wrong, due to lookahead. The token consisting of a |
482| newline alone is taken as belonging to the line it ends, and the current |
483| line number is not incremented until the next character is read. |
484| 99.9% of all calls will read from a string, so factor that out into a |
485| macro for speed. |
486`-------------------------------------------------------------------------*/
487
488#define next_char() \
489 (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \
490 && !input_change \
491 ? to_uchar (*isp->u.u_s.string++) \
492 : next_char_1 ())
493
494static int
495next_char_1 (void)
496{
497 int ch;
498
499 while (1)
500 {
501 if (isp == NULL)
502 {
503 current_file = "";
504 current_line = 0;
505 return CHAR_EOF;
506 }
507
508 if (input_change)
509 {
510 current_file = isp->file;
511 current_line = isp->line;
512 input_change = false;
513 }
514
515 switch (isp->type)
516 {
517 case INPUT_STRING:
518 ch = to_uchar (*isp->u.u_s.string++);
519 if (ch != '\0')
520 return ch;
521 break;
522
523 case INPUT_FILE:
524 if (start_of_input_line)
525 {
526 start_of_input_line = false;
527 current_line = ++isp->line;
528 }
529
530 /* If stdin is a terminal, calling getc after peek_input
531 already called it would make the user have to hit ^D
532 twice to quit. */
533 ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.fp);
534 if (ch != EOF)
535 {
536 if (ch == '\n')
537 start_of_input_line = true;
538 return ch;
539 }
540 break;
541
542 case INPUT_MACRO:
543 pop_input (); /* INPUT_MACRO input sources has only one
544 token */
545 return CHAR_MACRO;
546
547 default:
548 M4ERROR ((warning_status, 0,
549 "INTERNAL ERROR: input stack botch in next_char ()"));
550 abort ();
551 }
552
553 /* End of input source --- pop one level. */
554 pop_input ();
555 }
556}
557
558/*------------------------------------------------------------------------.
559| skip_line () simply discards all immediately following characters, upto |
560| the first newline. It is only used from m4_dnl (). |
561`------------------------------------------------------------------------*/
562
563void
564skip_line (void)
565{
566 int ch;
567 const char *file = current_file;
568 int line = current_line;
569
570 while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
571 ;
572 if (ch == CHAR_EOF)
573 /* current_file changed to "" if we see CHAR_EOF, use the
574 previous value we stored earlier. */
575 M4ERROR_AT_LINE ((warning_status, 0, file, line,
576 "Warning: end of file treated as newline"));
577 /* On the rare occasion that dnl crosses include file boundaries
578 (either the input file did not end in a newline, or changeword
579 was used), calling next_char can update current_file and
580 current_line, and that update will be undone as we return to
581 expand_macro. This informs next_char to fix things again. */
582 if (file != current_file || line != current_line)
583 input_change = true;
584}
585
586
587
588/*------------------------------------------------------------------.
589| This function is for matching a string against a prefix of the |
590| input stream. If the string matches the input and consume is |
591| true, the input is discarded; otherwise any characters read are |
592| pushed back again. The function is used only when multicharacter |
593| quotes or comment delimiters are used. |
594`------------------------------------------------------------------*/
595
596static bool
597match_input (const char *s, bool consume)
598{
599 int n; /* number of characters matched */
600 int ch; /* input character */
601 const char *t;
602 bool result = false;
603
604 ch = peek_input ();
605 if (ch != to_uchar (*s))
606 return false; /* fail */
607
608 if (s[1] == '\0')
609 {
610 if (consume)
611 (void) next_char ();
612 return true; /* short match */
613 }
614
615 (void) next_char ();
616 for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
617 {
618 (void) next_char ();
619 n++;
620 if (*s == '\0') /* long match */
621 {
622 if (consume)
623 return true;
624 result = true;
625 break;
626 }
627 }
628
629 /* Failed or shouldn't consume, push back input. */
630 {
631 struct obstack *h = push_string_init ();
632
633 /* `obstack_grow' may be macro evaluating its arg 1 several times. */
634 obstack_grow (h, t, n);
635 }
636 push_string_finish ();
637 return result;
638}
639
640/*--------------------------------------------------------------------.
641| The macro MATCH() is used to match a string S against the input. |
642| The first character is handled inline, for speed. Hopefully, this |
643| will not hurt efficiency too much when single character quotes and |
644| comment delimiters are used. If CONSUME, then CH is the result of |
645| next_char, and a successful match will discard the matched string. |
646| Otherwise, CH is the result of peek_char, and the input stream is |
647| effectively unchanged. |
648`--------------------------------------------------------------------*/
649
650#define MATCH(ch, s, consume) \
651 (to_uchar ((s)[0]) == (ch) \
652 && (ch) != '\0' \
653 && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
654
655
656
657/*----------------------------------------------------------.
658| Inititialise input stacks, and quote/comment characters. |
659`----------------------------------------------------------*/
660
661void
662input_init (void)
663{
664 current_file = "";
665 current_line = 0;
666
667 current_input = (struct obstack *) xmalloc (sizeof (struct obstack));
668 obstack_init (current_input);
669 wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
670 obstack_init (wrapup_stack);
671
672 obstack_init (&file_names);
673
674 /* Allocate an object in the current chunk, so that obstack_free
675 will always work even if the first token parsed spills to a new
676 chunk. */
677 obstack_init (&token_stack);
678 obstack_alloc (&token_stack, 1);
679 token_bottom = obstack_base (&token_stack);
680
681 isp = NULL;
682 wsp = NULL;
683 next = NULL;
684
685 start_of_input_line = false;
686
687 lquote.string = xstrdup (DEF_LQUOTE);
688 lquote.length = strlen (lquote.string);
689 rquote.string = xstrdup (DEF_RQUOTE);
690 rquote.length = strlen (rquote.string);
691 bcomm.string = xstrdup (DEF_BCOMM);
692 bcomm.length = strlen (bcomm.string);
693 ecomm.string = xstrdup (DEF_ECOMM);
694 ecomm.length = strlen (ecomm.string);
695
696#ifdef ENABLE_CHANGEWORD
697 set_word_regexp (user_word_regexp);
698#endif
699}
700
701
702
703/*------------------------------------------------------------------.
704| Functions for setting quotes and comment delimiters. Used by |
705| m4_changecom () and m4_changequote (). Pass NULL if the argument |
706| was not present, to distinguish from an explicit empty string. |
707`------------------------------------------------------------------*/
708
709void
710set_quotes (const char *lq, const char *rq)
711{
712 free (lquote.string);
713 free (rquote.string);
714
715 /* POSIX states that with 0 arguments, the default quotes are used.
716 POSIX XCU ERN 112 states that behavior is implementation-defined
717 if there was only one argument, or if there is an empty string in
718 either position when there are two arguments. We allow an empty
719 left quote to disable quoting, but a non-empty left quote will
720 always create a non-empty right quote. See the texinfo for what
721 some other implementations do. */
722 if (!lq)
723 {
724 lq = DEF_LQUOTE;
725 rq = DEF_RQUOTE;
726 }
727 else if (!rq || (*lq && !*rq))
728 rq = DEF_RQUOTE;
729
730 lquote.string = xstrdup (lq);
731 lquote.length = strlen (lquote.string);
732 rquote.string = xstrdup (rq);
733 rquote.length = strlen (rquote.string);
734}
735
736void
737set_comment (const char *bc, const char *ec)
738{
739 free (bcomm.string);
740 free (ecomm.string);
741
742 /* POSIX requires no arguments to disable comments. It requires
743 empty arguments to be used as-is, but this is counter to
744 traditional behavior, because a non-null begin and null end makes
745 it impossible to end a comment. An aardvark has been filed:
746 http://www.opengroup.org/austin/mailarchives/ag-review/msg02168.html
747 This implementation assumes the aardvark will be approved. See
748 the texinfo for what some other implementations do. */
749 if (!bc)
750 bc = ec = "";
751 else if (!ec || (*bc && !*ec))
752 ec = DEF_ECOMM;
753
754 bcomm.string = xstrdup (bc);
755 bcomm.length = strlen (bcomm.string);
756 ecomm.string = xstrdup (ec);
757 ecomm.length = strlen (ecomm.string);
758}
759
760#ifdef ENABLE_CHANGEWORD
761
762static void
763init_pattern_buffer (struct re_pattern_buffer *buf)
764{
765 buf->translate = NULL;
766 buf->fastmap = NULL;
767 buf->buffer = NULL;
768 buf->allocated = 0;
769}
770
771void
772set_word_regexp (const char *regexp)
773{
774 int i;
775 char test[2];
776 const char *msg;
777 struct re_pattern_buffer new_word_regexp;
778
779 if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
780 {
781 default_word_regexp = true;
782 return;
783 }
784
785 /* Dry run to see whether the new expression is compilable. */
786 init_pattern_buffer (&new_word_regexp);
787 msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
788 regfree (&new_word_regexp);
789
790 if (msg != NULL)
791 {
792 M4ERROR ((warning_status, 0,
793 "bad regular expression `%s': %s", regexp, msg));
794 return;
795 }
796
797 /* If compilation worked, retry using the word_regexp struct.
798 Can't rely on struct assigns working, so redo the compilation. */
799 regfree (&word_regexp);
800 msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
801 re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
802
803 if (msg != NULL)
804 {
805 M4ERROR ((EXIT_FAILURE, 0,
806 "INTERNAL ERROR: expression recompilation `%s': %s",
807 regexp, msg));
808 }
809
810 default_word_regexp = false;
811
812 if (word_start == NULL)
813 word_start = (char *) xmalloc (256);
814
815 word_start[0] = '\0';
816 test[1] = '\0';
817 for (i = 1; i < 256; i++)
818 {
819 test[0] = i;
820 word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
821 }
822}
823
824#endif /* ENABLE_CHANGEWORD */
825
826
827
828/*-------------------------------------------------------------------------.
829| Parse and return a single token from the input stream. A token can |
830| either be TOKEN_EOF, if the input_stack is empty; it can be TOKEN_STRING |
831| for a quoted string; TOKEN_WORD for something that is a potential macro |
832| name; and TOKEN_SIMPLE for any single character that is not a part of |
833| any of the previous types. |
834| |
835| Next_token () return the token type, and passes back a pointer to the |
836| token data through TD. The token text is collected on the obstack |
837| token_stack, which never contains more than one token text at a time. |
838| The storage pointed to by the fields in TD is therefore subject to |
839| change the next time next_token () is called. |
840`-------------------------------------------------------------------------*/
841
842token_type
843next_token (token_data *td)
844{
845 int ch;
846 int quote_level;
847 token_type type;
848#ifdef ENABLE_CHANGEWORD
849 int startpos;
850 char *orig_text = NULL;
851#endif
852 const char *file;
853 int line;
854
855 obstack_free (&token_stack, token_bottom);
856
857 /* Can't consume character until after CHAR_MACRO is handled. */
858 ch = peek_input ();
859 if (ch == CHAR_EOF)
860 {
861#ifdef DEBUG_INPUT
862 fprintf (stderr, "next_token -> EOF\n");
863#endif
864 next_char ();
865 return TOKEN_EOF;
866 }
867 if (ch == CHAR_MACRO)
868 {
869 init_macro_token (td);
870 next_char ();
871#ifdef DEBUG_INPUT
872 fprintf (stderr, "next_token -> MACDEF (%s)\n",
873 find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
874#endif
875 return TOKEN_MACDEF;
876 }
877
878 next_char (); /* Consume character we already peeked at. */
879 file = current_file;
880 line = current_line;
881 if (MATCH (ch, bcomm.string, true))
882 {
883 obstack_grow (&token_stack, bcomm.string, bcomm.length);
884 while ((ch = next_char ()) != CHAR_EOF
885 && !MATCH (ch, ecomm.string, true))
886 obstack_1grow (&token_stack, ch);
887 if (ch != CHAR_EOF)
888 obstack_grow (&token_stack, ecomm.string, ecomm.length);
889 else
890 /* current_file changed to "" if we see CHAR_EOF, use the
891 previous value we stored earlier. */
892 M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
893 "ERROR: end of file in comment"));
894
895 type = TOKEN_STRING;
896 }
897 else if (default_word_regexp && (isalpha (ch) || ch == '_'))
898 {
899 obstack_1grow (&token_stack, ch);
900 while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
901 {
902 obstack_1grow (&token_stack, ch);
903 (void) next_char ();
904 }
905 type = TOKEN_WORD;
906 }
907
908#ifdef ENABLE_CHANGEWORD
909
910 else if (!default_word_regexp && word_start[ch])
911 {
912 obstack_1grow (&token_stack, ch);
913 while (1)
914 {
915 ch = peek_input ();
916 if (ch == CHAR_EOF)
917 break;
918 obstack_1grow (&token_stack, ch);
919 startpos = re_search (&word_regexp,
920 (char *) obstack_base (&token_stack),
921 obstack_object_size (&token_stack), 0, 0,
922 &regs);
923 if (startpos != 0 ||
924 regs.end [0] != obstack_object_size (&token_stack))
925 {
926 *(((char *) obstack_base (&token_stack)
927 + obstack_object_size (&token_stack)) - 1) = '\0';
928 break;
929 }
930 next_char ();
931 }
932
933 obstack_1grow (&token_stack, '\0');
934 orig_text = (char *) obstack_finish (&token_stack);
935
936 if (regs.start[1] != -1)
937 obstack_grow (&token_stack,orig_text + regs.start[1],
938 regs.end[1] - regs.start[1]);
939 else
940 obstack_grow (&token_stack, orig_text,regs.end[0]);
941
942 type = TOKEN_WORD;
943 }
944
945#endif /* ENABLE_CHANGEWORD */
946
947 else if (!MATCH (ch, lquote.string, true))
948 {
949 switch (ch)
950 {
951 case '(':
952 type = TOKEN_OPEN;
953 break;
954 case ',':
955 type = TOKEN_COMMA;
956 break;
957 case ')':
958 type = TOKEN_CLOSE;
959 break;
960 default:
961 type = TOKEN_SIMPLE;
962 break;
963 }
964 obstack_1grow (&token_stack, ch);
965 }
966 else
967 {
968 quote_level = 1;
969 while (1)
970 {
971 ch = next_char ();
972 if (ch == CHAR_EOF)
973 /* current_file changed to "" if we see CHAR_EOF, use
974 the previous value we stored earlier. */
975 M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
976 "ERROR: end of file in string"));
977
978 if (MATCH (ch, rquote.string, true))
979 {
980 if (--quote_level == 0)
981 break;
982 obstack_grow (&token_stack, rquote.string, rquote.length);
983 }
984 else if (MATCH (ch, lquote.string, true))
985 {
986 quote_level++;
987 obstack_grow (&token_stack, lquote.string, lquote.length);
988 }
989 else
990 obstack_1grow (&token_stack, ch);
991 }
992 type = TOKEN_STRING;
993 }
994
995 obstack_1grow (&token_stack, '\0');
996
997 TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
998 TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack);
999#ifdef ENABLE_CHANGEWORD
1000 if (orig_text == NULL)
1001 orig_text = TOKEN_DATA_TEXT (td);
1002 TOKEN_DATA_ORIG_TEXT (td) = orig_text;
1003#endif
1004#ifdef DEBUG_INPUT
1005 fprintf (stderr, "next_token -> %s (%s)\n",
1006 token_type_string (type), TOKEN_DATA_TEXT (td));
1007#endif
1008 return type;
1009}
1010
1011/*-----------------------------------------------.
1012| Peek at the next token from the input stream. |
1013`-----------------------------------------------*/
1014
1015token_type
1016peek_token (void)
1017{
1018 token_type result;
1019 int ch = peek_input ();
1020
1021 if (ch == CHAR_EOF)
1022 {
1023 result = TOKEN_EOF;
1024 }
1025 else if (ch == CHAR_MACRO)
1026 {
1027 result = TOKEN_MACDEF;
1028 }
1029 else if (MATCH (ch, bcomm.string, false))
1030 {
1031 result = TOKEN_STRING;
1032 }
1033 else if ((default_word_regexp && (isalpha (ch) || ch == '_'))
1034#ifdef ENABLE_CHANGEWORD
1035 || (! default_word_regexp && word_start[ch])
1036#endif /* ENABLE_CHANGEWORD */
1037 )
1038 {
1039 result = TOKEN_WORD;
1040 }
1041 else if (MATCH (ch, lquote.string, false))
1042 {
1043 result = TOKEN_STRING;
1044 }
1045 else
1046 switch (ch)
1047 {
1048 case '(':
1049 result = TOKEN_OPEN;
1050 break;
1051 case ',':
1052 result = TOKEN_COMMA;
1053 break;
1054 case ')':
1055 result = TOKEN_CLOSE;
1056 break;
1057 default:
1058 result = TOKEN_SIMPLE;
1059 }
1060
1061#ifdef DEBUG_INPUT
1062 fprintf (stderr, "peek_token -> %s\n", token_type_string (result));
1063#endif /* DEBUG_INPUT */
1064 return result;
1065}
1066
1067
1068
1069#ifdef DEBUG_INPUT
1070
1071static const char *
1072token_type_string (token_type t)
1073{
1074 switch (t)
1075 { /* TOKSW */
1076 case TOKEN_EOF:
1077 return "EOF";
1078 case TOKEN_STRING:
1079 return "STRING";
1080 case TOKEN_WORD:
1081 return "WORD";
1082 case TOKEN_OPEN:
1083 return "OPEN";
1084 case TOKEN_COMMA:
1085 return "COMMA";
1086 case TOKEN_CLOSE:
1087 return "CLOSE";
1088 case TOKEN_SIMPLE:
1089 return "SIMPLE";
1090 case TOKEN_MACDEF:
1091 return "MACDEF";
1092 default:
1093 abort ();
1094 }
1095 }
1096
1097static void
1098print_token (const char *s, token_type t, token_data *td)
1099{
1100 fprintf (stderr, "%s: ", s);
1101 switch (t)
1102 { /* TOKSW */
1103 case TOKEN_OPEN:
1104 case TOKEN_COMMA:
1105 case TOKEN_CLOSE:
1106 case TOKEN_SIMPLE:
1107 fprintf (stderr, "char:");
1108 break;
1109
1110 case TOKEN_WORD:
1111 fprintf (stderr, "word:");
1112 break;
1113
1114 case TOKEN_STRING:
1115 fprintf (stderr, "string:");
1116 break;
1117
1118 case TOKEN_MACDEF:
1119 fprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
1120 break;
1121
1122 case TOKEN_EOF:
1123 fprintf (stderr, "eof\n");
1124 break;
1125 }
1126 fprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
1127}
1128
1129static void M4_GNUC_UNUSED
1130lex_debug (void)
1131{
1132 token_type t;
1133 token_data td;
1134
1135 while ((t = next_token (&td)) != TOKEN_EOF)
1136 print_token ("lex", t, &td);
1137}
1138#endif
Note: See TracBrowser for help on using the repository browser.