Context Navigation

source: trunk/binutils/gas/app.c@ 3770

Visit:

Last change on this file since 3770 was 610, checked in by bird, 22 years ago
This commit was generated by cvs2svn to compensate for changes in r609, which included commits to RCS files with non-trunk default branches.
Property cvs2svn:cvs-rev set to `1.1.1.2` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 32.3 KB

Line
1	/* This is the Assembler Pre-Processor
2	Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3	1999, 2000, 2002, 2003
4	Free Software Foundation, Inc.
5
6	This file is part of GAS, the GNU Assembler.
7
8	GAS is free software; you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation; either version 2, or (at your option)
11	any later version.
12
13	GAS is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GAS; see the file COPYING. If not, write to the Free
20	Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21	02111-1307, USA. */
22
23	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
24	/* App, the assembler pre-processor. This pre-processor strips out excess
25	spaces, turns single-quoted characters into a decimal constant, and turns
26	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
27	pair. This needs better error-handling. */
28
29	#include <stdio.h>
30	#include "as.h" /* For BAD_CASE() only. */
31
32	#if (__STDC__ != 1)
33	#ifndef const
34	#define const /* empty */
35	#endif
36	#endif
37
38	#ifdef TC_M68K
39	/* Whether we are scrubbing in m68k MRI mode. This is different from
40	flag_m68k_mri, because the two flags will be affected by the .mri
41	pseudo-op at different times. */
42	static int scrub_m68k_mri;
43
44	/* The pseudo-op which switches in and out of MRI mode. See the
45	comment in do_scrub_chars. */
46	static const char mri_pseudo[] = ".mri 0";
47	#else
48	#define scrub_m68k_mri 0
49	#endif
50
51	#if defined TC_ARM && defined OBJ_ELF
52	/* The pseudo-op for which we need to special-case `@' characters.
53	See the comment in do_scrub_chars. */
54	static const char symver_pseudo[] = ".symver";
55	static const char * symver_state;
56	#endif
57
58	static char lex[256];
59	static const char symbol_chars[] =
60	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62	#define LEX_IS_SYMBOL_COMPONENT 1
63	#define LEX_IS_WHITESPACE 2
64	#define LEX_IS_LINE_SEPARATOR 3
65	#define LEX_IS_COMMENT_START 4
66	#define LEX_IS_LINE_COMMENT_START 5
67	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
68	#define LEX_IS_STRINGQUOTE 8
69	#define LEX_IS_COLON 9
70	#define LEX_IS_NEWLINE 10
71	#define LEX_IS_ONECHAR_QUOTE 11
72	#ifdef TC_V850
73	#define LEX_IS_DOUBLEDASH_1ST 12
74	#endif
75	#ifdef TC_M32R
76	#define DOUBLEBAR_PARALLEL
77	#endif
78	#ifdef DOUBLEBAR_PARALLEL
79	#define LEX_IS_DOUBLEBAR_1ST 13
80	#endif
81	#define LEX_IS_PARALLEL_SEPARATOR 14
82	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
83	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
84	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
85	#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
86	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
87	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
88	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
89
90	static int process_escape PARAMS ((int));
91
92	/* FIXME-soon: The entire lexer/parser thingy should be
93	built statically at compile time rather than dynamically
94	each and every time the assembler is run. xoxorich. */
95
96	void
97	do_scrub_begin (m68k_mri)
98	int m68k_mri ATTRIBUTE_UNUSED;
99	{
100	const char *p;
101	int c;
102
103	lex[' '] = LEX_IS_WHITESPACE;
104	lex['\t'] = LEX_IS_WHITESPACE;
105	lex['\r'] = LEX_IS_WHITESPACE;
106	lex['\n'] = LEX_IS_NEWLINE;
107	lex[':'] = LEX_IS_COLON;
108
109	#ifdef TC_M68K
110	scrub_m68k_mri = m68k_mri;
111
112	if (! m68k_mri)
113	#endif
114	{
115	lex['"'] = LEX_IS_STRINGQUOTE;
116
117	#if ! defined (TC_HPPA) && ! defined (TC_I370)
118	/* I370 uses single-quotes to delimit integer, float constants. */
119	lex['\''] = LEX_IS_ONECHAR_QUOTE;
120	#endif
121
122	#ifdef SINGLE_QUOTE_STRINGS
123	lex['\''] = LEX_IS_STRINGQUOTE;
124	#endif
125	}
126
127	/* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
128	in state 5 of do_scrub_chars must be changed. */
129
130	/* Note that these override the previous defaults, e.g. if ';' is a
131	comment char, then it isn't a line separator. */
132	for (p = symbol_chars; *p; ++p)
133	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
134
135	for (c = 128; c < 256; ++c)
136	lex[c] = LEX_IS_SYMBOL_COMPONENT;
137
138	#ifdef tc_symbol_chars
139	/* This macro permits the processor to specify all characters which
140	may appears in an operand. This will prevent the scrubber from
141	discarding meaningful whitespace in certain cases. The i386
142	backend uses this to support prefixes, which can confuse the
143	scrubber as to whether it is parsing operands or opcodes. */
144	for (p = tc_symbol_chars; *p; ++p)
145	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
146	#endif
147
148	/* The m68k backend wants to be able to change comment_chars. */
149	#ifndef tc_comment_chars
150	#define tc_comment_chars comment_chars
151	#endif
152	for (p = tc_comment_chars; *p; p++)
153	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
154
155	for (p = line_comment_chars; *p; p++)
156	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
157
158	for (p = line_separator_chars; *p; p++)
159	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
160
161	#ifdef tc_parallel_separator_chars
162	/* This macro permits the processor to specify all characters which
163	separate parallel insns on the same line. */
164	for (p = tc_parallel_separator_chars; *p; p++)
165	lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
166	#endif
167
168	/* Only allow slash-star comments if slash is not in use.
169	FIXME: This isn't right. We should always permit them. */
170	if (lex['/'] == 0)
171	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
172
173	#ifdef TC_M68K
174	if (m68k_mri)
175	{
176	lex['\''] = LEX_IS_STRINGQUOTE;
177	lex[';'] = LEX_IS_COMMENT_START;
178	lex['*'] = LEX_IS_LINE_COMMENT_START;
179	/* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
180	then it can't be used in an expression. */
181	lex['!'] = LEX_IS_LINE_COMMENT_START;
182	}
183	#endif
184
185	#ifdef TC_V850
186	lex['-'] = LEX_IS_DOUBLEDASH_1ST;
187	#endif
188	#ifdef DOUBLEBAR_PARALLEL
189	lex['\|'] = LEX_IS_DOUBLEBAR_1ST;
190	#endif
191	#ifdef TC_D30V
192	/* Must do this is we want VLIW instruction with "->" or "<-". */
193	lex['-'] = LEX_IS_SYMBOL_COMPONENT;
194	#endif
195	}
196
197	/* Saved state of the scrubber. */
198	static int state;
199	static int old_state;
200	static char *out_string;
201	static char out_buf[20];
202	static int add_newlines;
203	static char *saved_input;
204	static int saved_input_len;
205	static char input_buffer[32 * 1024];
206	static const char *mri_state;
207	static char mri_last_ch;
208
209	/* Data structure for saving the state of app across #include's. Note that
210	app is called asynchronously to the parsing of the .include's, so our
211	state at the time .include is interpreted is completely unrelated.
212	That's why we have to save it all. */
213
214	struct app_save
215	{
216	int state;
217	int old_state;
218	char * out_string;
219	char out_buf[sizeof (out_buf)];
220	int add_newlines;
221	char * saved_input;
222	int saved_input_len;
223	#ifdef TC_M68K
224	int scrub_m68k_mri;
225	#endif
226	const char * mri_state;
227	char mri_last_ch;
228	#if defined TC_ARM && defined OBJ_ELF
229	const char * symver_state;
230	#endif
231	};
232
233	char *
234	app_push ()
235	{
236	register struct app_save *saved;
237
238	saved = (struct app_save ) xmalloc (sizeof (saved));
239	saved->state = state;
240	saved->old_state = old_state;
241	saved->out_string = out_string;
242	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
243	saved->add_newlines = add_newlines;
244	if (saved_input == NULL)
245	saved->saved_input = NULL;
246	else
247	{
248	saved->saved_input = xmalloc (saved_input_len);
249	memcpy (saved->saved_input, saved_input, saved_input_len);
250	saved->saved_input_len = saved_input_len;
251	}
252	#ifdef TC_M68K
253	saved->scrub_m68k_mri = scrub_m68k_mri;
254	#endif
255	saved->mri_state = mri_state;
256	saved->mri_last_ch = mri_last_ch;
257	#if defined TC_ARM && defined OBJ_ELF
258	saved->symver_state = symver_state;
259	#endif
260
261	/* do_scrub_begin() is not useful, just wastes time. */
262
263	state = 0;
264	saved_input = NULL;
265
266	return (char *) saved;
267	}
268
269	void
270	app_pop (arg)
271	char *arg;
272	{
273	register struct app_save saved = (struct app_save ) arg;
274
275	/* There is no do_scrub_end (). */
276	state = saved->state;
277	old_state = saved->old_state;
278	out_string = saved->out_string;
279	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
280	add_newlines = saved->add_newlines;
281	if (saved->saved_input == NULL)
282	saved_input = NULL;
283	else
284	{
285	assert (saved->saved_input_len <= (int) (sizeof input_buffer));
286	memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
287	saved_input = input_buffer;
288	saved_input_len = saved->saved_input_len;
289	free (saved->saved_input);
290	}
291	#ifdef TC_M68K
292	scrub_m68k_mri = saved->scrub_m68k_mri;
293	#endif
294	mri_state = saved->mri_state;
295	mri_last_ch = saved->mri_last_ch;
296	#if defined TC_ARM && defined OBJ_ELF
297	symver_state = saved->symver_state;
298	#endif
299
300	free (arg);
301	}
302
303	/* @@ This assumes that \n &c are the same on host and target. This is not
304	necessarily true. */
305
306	static int
307	process_escape (ch)
308	int ch;
309	{
310	switch (ch)
311	{
312	case 'b':
313	return '\b';
314	case 'f':
315	return '\f';
316	case 'n':
317	return '\n';
318	case 'r':
319	return '\r';
320	case 't':
321	return '\t';
322	case '\'':
323	return '\'';
324	case '"':
325	return '\"';
326	default:
327	return ch;
328	}
329	}
330
331	/* This function is called to process input characters. The GET
332	parameter is used to retrieve more input characters. GET should
333	set its parameter to point to a buffer, and return the length of
334	the buffer; it should return 0 at end of file. The scrubbed output
335	characters are put into the buffer starting at TOSTART; the TOSTART
336	buffer is TOLEN bytes in length. The function returns the number
337	of scrubbed characters put into TOSTART. This will be TOLEN unless
338	end of file was seen. This function is arranged as a state
339	machine, and saves its state so that it may return at any point.
340	This is the way the old code used to work. */
341
342	int
343	do_scrub_chars (get, tostart, tolen)
344	int (get) PARAMS ((char , int));
345	char *tostart;
346	int tolen;
347	{
348	char *to = tostart;
349	char *toend = tostart + tolen;
350	char *from;
351	char *fromend;
352	int fromlen;
353	register int ch, ch2 = 0;
354
355	/*State 0: beginning of normal line
356	1: After first whitespace on line (flush more white)
357	2: After first non-white (opcode) on line (keep 1white)
358	3: after second white on line (into operands) (flush white)
359	4: after putting out a .line, put out digits
360	5: parsing a string, then go to old-state
361	6: putting out \ escape in a "d string.
362	7: After putting out a .appfile, put out string.
363	8: After putting out a .appfile string, flush until newline.
364	9: After seeing symbol char in state 3 (keep 1white after symchar)
365	10: After seeing whitespace in state 9 (keep white before symchar)
366	11: After seeing a symbol character in state 0 (eg a label definition)
367	-1: output string in out_string and go to the state in old_state
368	-2: flush text until a '*' '/' is seen, then go to state old_state
369	#ifdef TC_V850
370	12: After seeing a dash, looking for a second dash as a start
371	of comment.
372	#endif
373	#ifdef DOUBLEBAR_PARALLEL
374	13: After seeing a vertical bar, looking for a second
375	vertical bar as a parallel expression separator.
376	#endif
377	*/
378
379	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
380	constructs like ``.loc 1 20''. This was turning into ``.loc
381	120''. States 9 and 10 ensure that a space is never dropped in
382	between characters which could appear in an identifier. Ian
383	Taylor, ian@cygnus.com.
384
385	I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
386	correctly on the PA (and any other target where colons are optional).
387	Jeff Law, law@cs.utah.edu.
388
389	I added state 13 so that something like "cmp r1, r2 \|\| trap #1" does not
390	get squashed into "cmp r1,r2\|\|trap#1", with the all important space
391	between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
392
393	/* This macro gets the next input character. */
394
395	#define GET() \
396	(from < fromend \
397	? * (unsigned char *) (from++) \
398	: (saved_input = NULL, \
399	fromlen = (*get) (input_buffer, sizeof input_buffer), \
400	from = input_buffer, \
401	fromend = from + fromlen, \
402	(fromlen == 0 \
403	? EOF \
404	: * (unsigned char *) (from++))))
405
406	/* This macro pushes a character back on the input stream. */
407
408	#define UNGET(uch) (*--from = (uch))
409
410	/* This macro puts a character into the output buffer. If this
411	character fills the output buffer, this macro jumps to the label
412	TOFULL. We use this rather ugly approach because we need to
413	handle two different termination conditions: EOF on the input
414	stream, and a full output buffer. It would be simpler if we
415	always read in the entire input stream before processing it, but
416	I don't want to make such a significant change to the assembler's
417	memory usage. */
418
419	#define PUT(pch) \
420	do \
421	{ \
422	*to++ = (pch); \
423	if (to >= toend) \
424	goto tofull; \
425	} \
426	while (0)
427
428	if (saved_input != NULL)
429	{
430	from = saved_input;
431	fromend = from + saved_input_len;
432	}
433	else
434	{
435	fromlen = (*get) (input_buffer, sizeof input_buffer);
436	if (fromlen == 0)
437	return 0;
438	from = input_buffer;
439	fromend = from + fromlen;
440	}
441
442	while (1)
443	{
444	/* The cases in this switch end with continue, in order to
445	branch back to the top of this while loop and generate the
446	next output character in the appropriate state. */
447	switch (state)
448	{
449	case -1:
450	ch = *out_string++;
451	if (*out_string == '\0')
452	{
453	state = old_state;
454	old_state = 3;
455	}
456	PUT (ch);
457	continue;
458
459	case -2:
460	for (;;)
461	{
462	do
463	{
464	ch = GET ();
465
466	if (ch == EOF)
467	{
468	as_warn (_("end of file in comment"));
469	goto fromeof;
470	}
471
472	if (ch == '\n')
473	PUT ('\n');
474	}
475	while (ch != '*');
476
477	while ((ch = GET ()) == '*')
478	;
479
480	if (ch == EOF)
481	{
482	as_warn (_("end of file in comment"));
483	goto fromeof;
484	}
485
486	if (ch == '/')
487	break;
488
489	UNGET (ch);
490	}
491
492	state = old_state;
493	UNGET (' ');
494	continue;
495
496	case 4:
497	ch = GET ();
498	if (ch == EOF)
499	goto fromeof;
500	else if (ch >= '0' && ch <= '9')
501	PUT (ch);
502	else
503	{
504	while (ch != EOF && IS_WHITESPACE (ch))
505	ch = GET ();
506	if (ch == '"')
507	{
508	UNGET (ch);
509	if (scrub_m68k_mri)
510	out_string = "\n\tappfile ";
511	else
512	out_string = "\n\t.appfile ";
513	old_state = 7;
514	state = -1;
515	PUT (*out_string++);
516	}
517	else
518	{
519	while (ch != EOF && ch != '\n')
520	ch = GET ();
521	state = 0;
522	PUT (ch);
523	}
524	}
525	continue;
526
527	case 5:
528	/* We are going to copy everything up to a quote character,
529	with special handling for a backslash. We try to
530	optimize the copying in the simple case without using the
531	GET and PUT macros. */
532	{
533	char *s;
534	int len;
535
536	for (s = from; s < fromend; s++)
537	{
538	ch = *s;
539	/* This condition must be changed if the type of any
540	other character can be LEX_IS_STRINGQUOTE. */
541	if (ch == '\\'
542	\|\| ch == '"'
543	\|\| ch == '\''
544	\|\| ch == '\n')
545	break;
546	}
547	len = s - from;
548	if (len > toend - to)
549	len = toend - to;
550	if (len > 0)
551	{
552	memcpy (to, from, len);
553	to += len;
554	from += len;
555	}
556	}
557
558	ch = GET ();
559	if (ch == EOF)
560	{
561	as_warn (_("end of file in string; inserted '\"'"));
562	state = old_state;
563	UNGET ('\n');
564	PUT ('"');
565	}
566	else if (lex[ch] == LEX_IS_STRINGQUOTE)
567	{
568	state = old_state;
569	PUT (ch);
570	}
571	#ifndef NO_STRING_ESCAPES
572	else if (ch == '\\')
573	{
574	state = 6;
575	PUT (ch);
576	}
577	#endif
578	else if (scrub_m68k_mri && ch == '\n')
579	{
580	/* Just quietly terminate the string. This permits lines like
581	bne label loop if we haven't reach end yet. */
582	state = old_state;
583	UNGET (ch);
584	PUT ('\'');
585	}
586	else
587	{
588	PUT (ch);
589	}
590	continue;
591
592	case 6:
593	state = 5;
594	ch = GET ();
595	switch (ch)
596	{
597	/* Handle strings broken across lines, by turning '\n' into
598	'\\' and 'n'. */
599	case '\n':
600	UNGET ('n');
601	add_newlines++;
602	PUT ('\\');
603	continue;
604
605	case EOF:
606	as_warn (_("end of file in string; '\"' inserted"));
607	PUT ('"');
608	continue;
609
610	case '"':
611	case '\\':
612	case 'b':
613	case 'f':
614	case 'n':
615	case 'r':
616	case 't':
617	case 'v':
618	case 'x':
619	case 'X':
620	case '0':
621	case '1':
622	case '2':
623	case '3':
624	case '4':
625	case '5':
626	case '6':
627	case '7':
628	break;
629
630	default:
631	#ifdef ONLY_STANDARD_ESCAPES
632	as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
633	#endif
634	break;
635	}
636	PUT (ch);
637	continue;
638
639	case 7:
640	ch = GET ();
641	state = 5;
642	old_state = 8;
643	if (ch == EOF)
644	goto fromeof;
645	PUT (ch);
646	continue;
647
648	case 8:
649	do
650	ch = GET ();
651	while (ch != '\n' && ch != EOF);
652	if (ch == EOF)
653	goto fromeof;
654	state = 0;
655	PUT (ch);
656	continue;
657
658	#ifdef DOUBLEBAR_PARALLEL
659	case 13:
660	ch = GET ();
661	if (ch != '\|')
662	abort ();
663
664	/* Reset back to state 1 and pretend that we are parsing a
665	line from just after the first white space. */
666	state = 1;
667	PUT ('\|');
668	continue;
669	#endif
670	}
671
672	/* OK, we are somewhere in states 0 through 4 or 9 through 11. */
673
674	/* flushchar: */
675	ch = GET ();
676
677	recycle:
678
679	#if defined TC_ARM && defined OBJ_ELF
680	/* We need to watch out for .symver directives. See the comment later
681	in this function. */
682	if (symver_state == NULL)
683	{
684	if ((state == 0 \|\| state == 1) && ch == symver_pseudo[0])
685	symver_state = symver_pseudo + 1;
686	}
687	else
688	{
689	/* We advance to the next state if we find the right
690	character. */
691	if (ch != '\0' && (*symver_state == ch))
692	++symver_state;
693	else if (*symver_state != '\0')
694	/* We did not get the expected character, or we didn't
695	get a valid terminating character after seeing the
696	entire pseudo-op, so we must go back to the beginning. */
697	symver_state = NULL;
698	else
699	{
700	/* We've read the entire pseudo-op. If this is the end
701	of the line, go back to the beginning. */
702	if (IS_NEWLINE (ch))
703	symver_state = NULL;
704	}
705	}
706	#endif /* TC_ARM && OBJ_ELF */
707
708	#ifdef TC_M68K
709	/* We want to have pseudo-ops which control whether we are in
710	MRI mode or not. Unfortunately, since m68k MRI mode affects
711	the scrubber, that means that we need a special purpose
712	recognizer here. */
713	if (mri_state == NULL)
714	{
715	if ((state == 0 \|\| state == 1)
716	&& ch == mri_pseudo[0])
717	mri_state = mri_pseudo + 1;
718	}
719	else
720	{
721	/* We advance to the next state if we find the right
722	character, or if we need a space character and we get any
723	whitespace character, or if we need a '0' and we get a
724	'1' (this is so that we only need one state to handle
725	``.mri 0'' and ``.mri 1''). */
726	if (ch != '\0'
727	&& (*mri_state == ch
728	\|\| (*mri_state == ' '
729	&& lex[ch] == LEX_IS_WHITESPACE)
730	\|\| (*mri_state == '0'
731	&& ch == '1')))
732	{
733	mri_last_ch = ch;
734	++mri_state;
735	}
736	else if (*mri_state != '\0'
737	\|\| (lex[ch] != LEX_IS_WHITESPACE
738	&& lex[ch] != LEX_IS_NEWLINE))
739	{
740	/* We did not get the expected character, or we didn't
741	get a valid terminating character after seeing the
742	entire pseudo-op, so we must go back to the
743	beginning. */
744	mri_state = NULL;
745	}
746	else
747	{
748	/* We've read the entire pseudo-op. mips_last_ch is
749	either '0' or '1' indicating whether to enter or
750	leave MRI mode. */
751	do_scrub_begin (mri_last_ch == '1');
752	mri_state = NULL;
753
754	/* We continue handling the character as usual. The
755	main gas reader must also handle the .mri pseudo-op
756	to control expression parsing and the like. */
757	}
758	}
759	#endif
760
761	if (ch == EOF)
762	{
763	if (state != 0)
764	{
765	as_warn (_("end of file not at end of a line; newline inserted"));
766	state = 0;
767	PUT ('\n');
768	}
769	goto fromeof;
770	}
771
772	switch (lex[ch])
773	{
774	case LEX_IS_WHITESPACE:
775	do
776	{
777	ch = GET ();
778	}
779	while (ch != EOF && IS_WHITESPACE (ch));
780	if (ch == EOF)
781	goto fromeof;
782
783	if (state == 0)
784	{
785	/* Preserve a single whitespace character at the
786	beginning of a line. */
787	state = 1;
788	UNGET (ch);
789	PUT (' ');
790	break;
791	}
792
793	#ifdef KEEP_WHITE_AROUND_COLON
794	if (lex[ch] == LEX_IS_COLON)
795	{
796	/* Only keep this white if there's no white after the
797	colon. */
798	ch2 = GET ();
799	UNGET (ch2);
800	if (!IS_WHITESPACE (ch2))
801	{
802	state = 9;
803	UNGET (ch);
804	PUT (' ');
805	break;
806	}
807	}
808	#endif
809	if (IS_COMMENT (ch)
810	\|\| ch == '/'
811	\|\| IS_LINE_SEPARATOR (ch)
812	\|\| IS_PARALLEL_SEPARATOR (ch))
813	{
814	if (scrub_m68k_mri)
815	{
816	/* In MRI mode, we keep these spaces. */
817	UNGET (ch);
818	PUT (' ');
819	break;
820	}
821	goto recycle;
822	}
823
824	/* If we're in state 2 or 11, we've seen a non-white
825	character followed by whitespace. If the next character
826	is ':', this is whitespace after a label name which we
827	normally must ignore. In MRI mode, though, spaces are
828	not permitted between the label and the colon. */
829	if ((state == 2 \|\| state == 11)
830	&& lex[ch] == LEX_IS_COLON
831	&& ! scrub_m68k_mri)
832	{
833	state = 1;
834	PUT (ch);
835	break;
836	}
837
838	switch (state)
839	{
840	case 0:
841	state++;
842	goto recycle; /* Punted leading sp */
843	case 1:
844	/* We can arrive here if we leave a leading whitespace
845	character at the beginning of a line. */
846	goto recycle;
847	case 2:
848	state = 3;
849	if (to + 1 < toend)
850	{
851	/* Optimize common case by skipping UNGET/GET. */
852	PUT (' '); /* Sp after opco */
853	goto recycle;
854	}
855	UNGET (ch);
856	PUT (' ');
857	break;
858	case 3:
859	if (scrub_m68k_mri)
860	{
861	/* In MRI mode, we keep these spaces. */
862	UNGET (ch);
863	PUT (' ');
864	break;
865	}
866	goto recycle; /* Sp in operands */
867	case 9:
868	case 10:
869	if (scrub_m68k_mri)
870	{
871	/* In MRI mode, we keep these spaces. */
872	state = 3;
873	UNGET (ch);
874	PUT (' ');
875	break;
876	}
877	state = 10; /* Sp after symbol char */
878	goto recycle;
879	case 11:
880	if (LABELS_WITHOUT_COLONS \|\| flag_m68k_mri)
881	state = 1;
882	else
883	{
884	/* We know that ch is not ':', since we tested that
885	case above. Therefore this is not a label, so it
886	must be the opcode, and we've just seen the
887	whitespace after it. */
888	state = 3;
889	}
890	UNGET (ch);
891	PUT (' '); /* Sp after label definition. */
892	break;
893	default:
894	BAD_CASE (state);
895	}
896	break;
897
898	case LEX_IS_TWOCHAR_COMMENT_1ST:
899	ch2 = GET ();
900	if (ch2 == '*')
901	{
902	for (;;)
903	{
904	do
905	{
906	ch2 = GET ();
907	if (ch2 != EOF && IS_NEWLINE (ch2))
908	add_newlines++;
909	}
910	while (ch2 != EOF && ch2 != '*');
911
912	while (ch2 == '*')
913	ch2 = GET ();
914
915	if (ch2 == EOF \|\| ch2 == '/')
916	break;
917
918	/* This UNGET will ensure that we count newlines
919	correctly. */
920	UNGET (ch2);
921	}
922
923	if (ch2 == EOF)
924	as_warn (_("end of file in multiline comment"));
925
926	ch = ' ';
927	goto recycle;
928	}
929	#ifdef DOUBLESLASH_LINE_COMMENTS
930	else if (ch2 == '/')
931	{
932	do
933	{
934	ch = GET ();
935	}
936	while (ch != EOF && !IS_NEWLINE (ch));
937	if (ch == EOF)
938	as_warn ("end of file in comment; newline inserted");
939	state = 0;
940	PUT ('\n');
941	break;
942	}
943	#endif
944	else
945	{
946	if (ch2 != EOF)
947	UNGET (ch2);
948	if (state == 9 \|\| state == 10)
949	state = 3;
950	PUT (ch);
951	}
952	break;
953
954	case LEX_IS_STRINGQUOTE:
955	if (state == 10)
956	{
957	/* Preserve the whitespace in foo "bar". */
958	UNGET (ch);
959	state = 3;
960	PUT (' ');
961
962	/* PUT didn't jump out. We could just break, but we
963	know what will happen, so optimize a bit. */
964	ch = GET ();
965	old_state = 3;
966	}
967	else if (state == 9)
968	old_state = 3;
969	else
970	old_state = state;
971	state = 5;
972	PUT (ch);
973	break;
974
975	#ifndef IEEE_STYLE
976	case LEX_IS_ONECHAR_QUOTE:
977	if (state == 10)
978	{
979	/* Preserve the whitespace in foo 'b'. */
980	UNGET (ch);
981	state = 3;
982	PUT (' ');
983	break;
984	}
985	ch = GET ();
986	if (ch == EOF)
987	{
988	as_warn (_("end of file after a one-character quote; \\0 inserted"));
989	ch = 0;
990	}
991	if (ch == '\\')
992	{
993	ch = GET ();
994	if (ch == EOF)
995	{
996	as_warn (_("end of file in escape character"));
997	ch = '\\';
998	}
999	else
1000	ch = process_escape (ch);
1001	}
1002	sprintf (out_buf, "%d", (int) (unsigned char) ch);
1003
1004	/* None of these 'x constants for us. We want 'x'. */
1005	if ((ch = GET ()) != '\'')
1006	{
1007	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1008	as_warn (_("missing close quote; (assumed)"));
1009	#else
1010	if (ch != EOF)
1011	UNGET (ch);
1012	#endif
1013	}
1014	if (strlen (out_buf) == 1)
1015	{
1016	PUT (out_buf[0]);
1017	break;
1018	}
1019	if (state == 9)
1020	old_state = 3;
1021	else
1022	old_state = state;
1023	state = -1;
1024	out_string = out_buf;
1025	PUT (*out_string++);
1026	break;
1027	#endif
1028
1029	case LEX_IS_COLON:
1030	#ifdef KEEP_WHITE_AROUND_COLON
1031	state = 9;
1032	#else
1033	if (state == 9 \|\| state == 10)
1034	state = 3;
1035	else if (state != 3)
1036	state = 1;
1037	#endif
1038	PUT (ch);
1039	break;
1040
1041	case LEX_IS_NEWLINE:
1042	/* Roll out a bunch of newlines from inside comments, etc. */
1043	if (add_newlines)
1044	{
1045	--add_newlines;
1046	UNGET (ch);
1047	}
1048	/* Fall through. */
1049
1050	case LEX_IS_LINE_SEPARATOR:
1051	state = 0;
1052	PUT (ch);
1053	break;
1054
1055	case LEX_IS_PARALLEL_SEPARATOR:
1056	state = 1;
1057	PUT (ch);
1058	break;
1059
1060	#ifdef TC_V850
1061	case LEX_IS_DOUBLEDASH_1ST:
1062	ch2 = GET ();
1063	if (ch2 != '-')
1064	{
1065	UNGET (ch2);
1066	goto de_fault;
1067	}
1068	/* Read and skip to end of line. */
1069	do
1070	{
1071	ch = GET ();
1072	}
1073	while (ch != EOF && ch != '\n');
1074
1075	if (ch == EOF)
1076	as_warn (_("end of file in comment; newline inserted"));
1077
1078	state = 0;
1079	PUT ('\n');
1080	break;
1081	#endif
1082	#ifdef DOUBLEBAR_PARALLEL
1083	case LEX_IS_DOUBLEBAR_1ST:
1084	ch2 = GET ();
1085	UNGET (ch2);
1086	if (ch2 != '\|')
1087	goto de_fault;
1088
1089	/* Handle '\|\|' in two states as invoking PUT twice might
1090	result in the first one jumping out of this loop. We'd
1091	then lose track of the state and one '\|' char. */
1092	state = 13;
1093	PUT ('\|');
1094	break;
1095	#endif
1096	case LEX_IS_LINE_COMMENT_START:
1097	/* FIXME-someday: The two character comment stuff was badly
1098	thought out. On i386, we want '/' as line comment start
1099	AND we want C style comments. hence this hack. The
1100	whole lexical process should be reworked. xoxorich. */
1101	if (ch == '/')
1102	{
1103	ch2 = GET ();
1104	if (ch2 == '*')
1105	{
1106	old_state = 3;
1107	state = -2;
1108	break;
1109	}
1110	else
1111	{
1112	UNGET (ch2);
1113	}
1114	}
1115
1116	if (state == 0 \|\| state == 1) /* Only comment at start of line. */
1117	{
1118	int startch;
1119
1120	startch = ch;
1121
1122	do
1123	{
1124	ch = GET ();
1125	}
1126	while (ch != EOF && IS_WHITESPACE (ch));
1127
1128	if (ch == EOF)
1129	{
1130	as_warn (_("end of file in comment; newline inserted"));
1131	PUT ('\n');
1132	break;
1133	}
1134
1135	if (ch < '0' \|\| ch > '9' \|\| state != 0 \|\| startch != '#')
1136	{
1137	/* Not a cpp line. */
1138	while (ch != EOF && !IS_NEWLINE (ch))
1139	ch = GET ();
1140	if (ch == EOF)
1141	as_warn (_("end of file in comment; newline inserted"));
1142	state = 0;
1143	PUT ('\n');
1144	break;
1145	}
1146	/* Looks like `# 123 "filename"' from cpp. */
1147	UNGET (ch);
1148	old_state = 4;
1149	state = -1;
1150	if (scrub_m68k_mri)
1151	out_string = "\tappline ";
1152	else
1153	out_string = "\t.appline ";
1154	PUT (*out_string++);
1155	break;
1156	}
1157
1158	#ifdef TC_D10V
1159	/* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1160	Trap is the only short insn that has a first operand that is
1161	neither register nor label.
1162	We must prevent exef0f \|\|trap #1 to degenerate to exef0f \|\|trap#1 .
1163	We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1164	already LEX_IS_LINE_COMMENT_START. However, it is the
1165	only character in line_comment_chars for d10v, hence we
1166	can recognize it as such. */
1167	/* An alternative approach would be to reset the state to 1 when
1168	we see '\|\|', '<'- or '->', but that seems to be overkill. */
1169	if (state == 10)
1170	PUT (' ');
1171	#endif
1172	/* We have a line comment character which is not at the
1173	start of a line. If this is also a normal comment
1174	character, fall through. Otherwise treat it as a default
1175	character. */
1176	if (strchr (tc_comment_chars, ch) == NULL
1177	&& (! scrub_m68k_mri
1178	\|\| (ch != '!' && ch != '*')))
1179	goto de_fault;
1180	if (scrub_m68k_mri
1181	&& (ch == '!' \|\| ch == '*' \|\| ch == '#')
1182	&& state != 1
1183	&& state != 10)
1184	goto de_fault;
1185	/* Fall through. */
1186	case LEX_IS_COMMENT_START:
1187	#if defined TC_ARM && defined OBJ_ELF
1188	/* On the ARM, `@' is the comment character.
1189	Unfortunately this is also a special character in ELF .symver
1190	directives (and .type, though we deal with those another way).
1191	So we check if this line is such a directive, and treat
1192	the character as default if so. This is a hack. */
1193	if ((symver_state != NULL) && (*symver_state == 0))
1194	goto de_fault;
1195	#endif
1196	#ifdef WARN_COMMENTS
1197	if (!found_comment)
1198	as_where (&found_comment_file, &found_comment);
1199	#endif
1200	do
1201	{
1202	ch = GET ();
1203	}
1204	while (ch != EOF && !IS_NEWLINE (ch));
1205	if (ch == EOF)
1206	as_warn (_("end of file in comment; newline inserted"));
1207	state = 0;
1208	PUT ('\n');
1209	break;
1210
1211	case LEX_IS_SYMBOL_COMPONENT:
1212	if (state == 10)
1213	{
1214	/* This is a symbol character following another symbol
1215	character, with whitespace in between. We skipped
1216	the whitespace earlier, so output it now. */
1217	UNGET (ch);
1218	state = 3;
1219	PUT (' ');
1220	break;
1221	}
1222
1223	if (state == 3)
1224	state = 9;
1225
1226	/* This is a common case. Quickly copy CH and all the
1227	following symbol component or normal characters. */
1228	if (to + 1 < toend
1229	&& mri_state == NULL
1230	#if defined TC_ARM && defined OBJ_ELF
1231	&& symver_state == NULL
1232	#endif
1233	)
1234	{
1235	char *s;
1236	int len;
1237
1238	for (s = from; s < fromend; s++)
1239	{
1240	int type;
1241
1242	ch2 = (unsigned char ) s;
1243	type = lex[ch2];
1244	if (type != 0
1245	&& type != LEX_IS_SYMBOL_COMPONENT)
1246	break;
1247	}
1248
1249	if (s > from)
1250	/* Handle the last character normally, for
1251	simplicity. */
1252	--s;
1253
1254	len = s - from;
1255
1256	if (len > (toend - to) - 1)
1257	len = (toend - to) - 1;
1258
1259	if (len > 0)
1260	{
1261	PUT (ch);
1262	if (len > 8)
1263	{
1264	memcpy (to, from, len);
1265	to += len;
1266	from += len;
1267	}
1268	else
1269	{
1270	switch (len)
1271	{
1272	case 8: to++ = from++;
1273	case 7: to++ = from++;
1274	case 6: to++ = from++;
1275	case 5: to++ = from++;
1276	case 4: to++ = from++;
1277	case 3: to++ = from++;
1278	case 2: to++ = from++;
1279	case 1: to++ = from++;
1280	}
1281	}
1282	ch = GET ();
1283	}
1284	}
1285
1286	/* Fall through. */
1287	default:
1288	de_fault:
1289	/* Some relatively `normal' character. */
1290	if (state == 0)
1291	{
1292	state = 11; /* Now seeing label definition. */
1293	}
1294	else if (state == 1)
1295	{
1296	state = 2; /* Ditto. */
1297	}
1298	else if (state == 9)
1299	{
1300	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1301	state = 3;
1302	}
1303	else if (state == 10)
1304	{
1305	if (ch == '\\')
1306	{
1307	/* Special handling for backslash: a backslash may
1308	be the beginning of a formal parameter (of a
1309	macro) following another symbol character, with
1310	whitespace in between. If that is the case, we
1311	output a space before the parameter. Strictly
1312	speaking, correct handling depends upon what the
1313	macro parameter expands into; if the parameter
1314	expands into something which does not start with
1315	an operand character, then we don't want to keep
1316	the space. We don't have enough information to
1317	make the right choice, so here we are making the
1318	choice which is more likely to be correct. */
1319	PUT (' ');
1320	}
1321
1322	state = 3;
1323	}
1324	PUT (ch);
1325	break;
1326	}
1327	}
1328
1329	/NOTREACHED/
1330
1331	fromeof:
1332	/* We have reached the end of the input. */
1333	return to - tostart;
1334
1335	tofull:
1336	/* The output buffer is full. Save any input we have not yet
1337	processed. */
1338	if (fromend > from)
1339	{
1340	saved_input = from;
1341	saved_input_len = fromend - from;
1342	}
1343	else
1344	saved_input = NULL;
1345
1346	return to - tostart;
1347	}
1348

Note: See TracBrowser for help on using the repository browser.

Download in other formats: