Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/src/binutils/gas/app.c@ 10

Visit:

Last change on this file since 10 was 10, checked in by bird, 22 years ago
Initial revision
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 32.2 KB

Line
1	/* This is the Assembler Pre-Processor
2	Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3	1999, 2000
4	Free Software Foundation, Inc.
5
6	This file is part of GAS, the GNU Assembler.
7
8	GAS is free software; you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation; either version 2, or (at your option)
11	any later version.
12
13	GAS is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GAS; see the file COPYING. If not, write to the Free
20	Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21	02111-1307, USA. */
22
23	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
24	/* App, the assembler pre-processor. This pre-processor strips out excess
25	spaces, turns single-quoted characters into a decimal constant, and turns
26	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
27	pair. This needs better error-handling. */
28
29	#include <stdio.h>
30	#include "as.h" /* For BAD_CASE() only */
31
32	#if (__STDC__ != 1)
33	#ifndef const
34	#define const /* empty */
35	#endif
36	#endif
37
38	#ifdef TC_M68K
39	/* Whether we are scrubbing in m68k MRI mode. This is different from
40	flag_m68k_mri, because the two flags will be affected by the .mri
41	pseudo-op at different times. */
42	static int scrub_m68k_mri;
43	#else
44	#define scrub_m68k_mri 0
45	#endif
46
47	/* The pseudo-op which switches in and out of MRI mode. See the
48	comment in do_scrub_chars. */
49	static const char mri_pseudo[] = ".mri 0";
50
51	#if defined TC_ARM && defined OBJ_ELF
52	/* The pseudo-op for which we need to special-case `@' characters.
53	See the comment in do_scrub_chars. */
54	static const char symver_pseudo[] = ".symver";
55	static const char * symver_state;
56	#endif
57
58	static char lex[256];
59	static const char symbol_chars[] =
60	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62	#define LEX_IS_SYMBOL_COMPONENT 1
63	#define LEX_IS_WHITESPACE 2
64	#define LEX_IS_LINE_SEPARATOR 3
65	#define LEX_IS_COMMENT_START 4
66	#define LEX_IS_LINE_COMMENT_START 5
67	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
68	#define LEX_IS_STRINGQUOTE 8
69	#define LEX_IS_COLON 9
70	#define LEX_IS_NEWLINE 10
71	#define LEX_IS_ONECHAR_QUOTE 11
72	#ifdef TC_V850
73	#define LEX_IS_DOUBLEDASH_1ST 12
74	#endif
75	#ifdef TC_M32R
76	#define DOUBLEBAR_PARALLEL
77	#endif
78	#ifdef DOUBLEBAR_PARALLEL
79	#define LEX_IS_DOUBLEBAR_1ST 13
80	#endif
81	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
82	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
83	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
84	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
85	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
86	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
87
88	static int process_escape PARAMS ((int));
89
90	/* FIXME-soon: The entire lexer/parser thingy should be
91	built statically at compile time rather than dynamically
92	each and every time the assembler is run. xoxorich. */
93
94	void
95	do_scrub_begin (m68k_mri)
96	int m68k_mri ATTRIBUTE_UNUSED;
97	{
98	const char *p;
99	int c;
100
101	lex[' '] = LEX_IS_WHITESPACE;
102	lex['\t'] = LEX_IS_WHITESPACE;
103	lex['\r'] = LEX_IS_WHITESPACE;
104	lex['\n'] = LEX_IS_NEWLINE;
105	lex[':'] = LEX_IS_COLON;
106
107	#ifdef TC_M68K
108	scrub_m68k_mri = m68k_mri;
109
110	if (! m68k_mri)
111	#endif
112	{
113	lex['"'] = LEX_IS_STRINGQUOTE;
114
115	#if ! defined (TC_HPPA) && ! defined (TC_I370)
116	/* I370 uses single-quotes to delimit integer, float constants */
117	lex['\''] = LEX_IS_ONECHAR_QUOTE;
118	#endif
119
120	#ifdef SINGLE_QUOTE_STRINGS
121	lex['\''] = LEX_IS_STRINGQUOTE;
122	#endif
123	}
124
125	/* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
126	in state 5 of do_scrub_chars must be changed. */
127
128	/* Note that these override the previous defaults, e.g. if ';' is a
129	comment char, then it isn't a line separator. */
130	for (p = symbol_chars; *p; ++p)
131	{
132	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
133	} /* declare symbol characters */
134
135	for (c = 128; c < 256; ++c)
136	lex[c] = LEX_IS_SYMBOL_COMPONENT;
137
138	#ifdef tc_symbol_chars
139	/* This macro permits the processor to specify all characters which
140	may appears in an operand. This will prevent the scrubber from
141	discarding meaningful whitespace in certain cases. The i386
142	backend uses this to support prefixes, which can confuse the
143	scrubber as to whether it is parsing operands or opcodes. */
144	for (p = tc_symbol_chars; *p; ++p)
145	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
146	#endif
147
148	/* The m68k backend wants to be able to change comment_chars. */
149	#ifndef tc_comment_chars
150	#define tc_comment_chars comment_chars
151	#endif
152	for (p = tc_comment_chars; *p; p++)
153	{
154	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
155	} /* declare comment chars */
156
157	for (p = line_comment_chars; *p; p++)
158	{
159	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
160	} /* declare line comment chars */
161
162	for (p = line_separator_chars; *p; p++)
163	{
164	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
165	} /* declare line separators */
166
167	/* Only allow slash-star comments if slash is not in use.
168	FIXME: This isn't right. We should always permit them. */
169	if (lex['/'] == 0)
170	{
171	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
172	}
173
174	#ifdef TC_M68K
175	if (m68k_mri)
176	{
177	lex['\''] = LEX_IS_STRINGQUOTE;
178	lex[';'] = LEX_IS_COMMENT_START;
179	lex['*'] = LEX_IS_LINE_COMMENT_START;
180	/* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
181	then it can't be used in an expression. */
182	lex['!'] = LEX_IS_LINE_COMMENT_START;
183	}
184	#endif
185
186	#ifdef TC_V850
187	lex['-'] = LEX_IS_DOUBLEDASH_1ST;
188	#endif
189	#ifdef DOUBLEBAR_PARALLEL
190	lex['\|'] = LEX_IS_DOUBLEBAR_1ST;
191	#endif
192	#ifdef TC_D30V
193	/* must do this is we want VLIW instruction with "->" or "<-" */
194	lex['-'] = LEX_IS_SYMBOL_COMPONENT;
195	#endif
196	} /* do_scrub_begin() */
197
198	/* Saved state of the scrubber */
199	static int state;
200	static int old_state;
201	static char *out_string;
202	static char out_buf[20];
203	static int add_newlines;
204	static char *saved_input;
205	static int saved_input_len;
206	static char input_buffer[32 * 1024];
207	static const char *mri_state;
208	static char mri_last_ch;
209
210	/* Data structure for saving the state of app across #include's. Note that
211	app is called asynchronously to the parsing of the .include's, so our
212	state at the time .include is interpreted is completely unrelated.
213	That's why we have to save it all. */
214
215	struct app_save {
216	int state;
217	int old_state;
218	char * out_string;
219	char out_buf[sizeof (out_buf)];
220	int add_newlines;
221	char * saved_input;
222	int saved_input_len;
223	#ifdef TC_M68K
224	int scrub_m68k_mri;
225	#endif
226	const char * mri_state;
227	char mri_last_ch;
228	#if defined TC_ARM && defined OBJ_ELF
229	const char * symver_state;
230	#endif
231	};
232
233	char *
234	app_push ()
235	{
236	register struct app_save *saved;
237
238	saved = (struct app_save ) xmalloc (sizeof (saved));
239	saved->state = state;
240	saved->old_state = old_state;
241	saved->out_string = out_string;
242	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
243	saved->add_newlines = add_newlines;
244	if (saved_input == NULL)
245	saved->saved_input = NULL;
246	else
247	{
248	saved->saved_input = xmalloc (saved_input_len);
249	memcpy (saved->saved_input, saved_input, saved_input_len);
250	saved->saved_input_len = saved_input_len;
251	}
252	#ifdef TC_M68K
253	saved->scrub_m68k_mri = scrub_m68k_mri;
254	#endif
255	saved->mri_state = mri_state;
256	saved->mri_last_ch = mri_last_ch;
257	#if defined TC_ARM && defined OBJ_ELF
258	saved->symver_state = symver_state;
259	#endif
260
261	/* do_scrub_begin() is not useful, just wastes time. */
262
263	state = 0;
264	saved_input = NULL;
265
266	return (char *) saved;
267	}
268
269	void
270	app_pop (arg)
271	char *arg;
272	{
273	register struct app_save saved = (struct app_save ) arg;
274
275	/* There is no do_scrub_end (). */
276	state = saved->state;
277	old_state = saved->old_state;
278	out_string = saved->out_string;
279	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
280	add_newlines = saved->add_newlines;
281	if (saved->saved_input == NULL)
282	saved_input = NULL;
283	else
284	{
285	assert (saved->saved_input_len <= (int) (sizeof input_buffer));
286	memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
287	saved_input = input_buffer;
288	saved_input_len = saved->saved_input_len;
289	free (saved->saved_input);
290	}
291	#ifdef TC_M68K
292	scrub_m68k_mri = saved->scrub_m68k_mri;
293	#endif
294	mri_state = saved->mri_state;
295	mri_last_ch = saved->mri_last_ch;
296	#if defined TC_ARM && defined OBJ_ELF
297	symver_state = saved->symver_state;
298	#endif
299
300	free (arg);
301	} /* app_pop() */
302
303	/* @@ This assumes that \n &c are the same on host and target. This is not
304	necessarily true. */
305	static int
306	process_escape (ch)
307	int ch;
308	{
309	switch (ch)
310	{
311	case 'b':
312	return '\b';
313	case 'f':
314	return '\f';
315	case 'n':
316	return '\n';
317	case 'r':
318	return '\r';
319	case 't':
320	return '\t';
321	case '\'':
322	return '\'';
323	case '"':
324	return '\"';
325	default:
326	return ch;
327	}
328	}
329
330	/* This function is called to process input characters. The GET
331	parameter is used to retrieve more input characters. GET should
332	set its parameter to point to a buffer, and return the length of
333	the buffer; it should return 0 at end of file. The scrubbed output
334	characters are put into the buffer starting at TOSTART; the TOSTART
335	buffer is TOLEN bytes in length. The function returns the number
336	of scrubbed characters put into TOSTART. This will be TOLEN unless
337	end of file was seen. This function is arranged as a state
338	machine, and saves its state so that it may return at any point.
339	This is the way the old code used to work. */
340
341	int
342	do_scrub_chars (get, tostart, tolen)
343	int (get) PARAMS ((char , int));
344	char *tostart;
345	int tolen;
346	{
347	char *to = tostart;
348	char *toend = tostart + tolen;
349	char *from;
350	char *fromend;
351	int fromlen;
352	register int ch, ch2 = 0;
353
354	/*State 0: beginning of normal line
355	1: After first whitespace on line (flush more white)
356	2: After first non-white (opcode) on line (keep 1white)
357	3: after second white on line (into operands) (flush white)
358	4: after putting out a .line, put out digits
359	5: parsing a string, then go to old-state
360	6: putting out \ escape in a "d string.
361	7: After putting out a .appfile, put out string.
362	8: After putting out a .appfile string, flush until newline.
363	9: After seeing symbol char in state 3 (keep 1white after symchar)
364	10: After seeing whitespace in state 9 (keep white before symchar)
365	11: After seeing a symbol character in state 0 (eg a label definition)
366	-1: output string in out_string and go to the state in old_state
367	-2: flush text until a '*' '/' is seen, then go to state old_state
368	#ifdef TC_V850
369	12: After seeing a dash, looking for a second dash as a start of comment.
370	#endif
371	#ifdef DOUBLEBAR_PARALLEL
372	13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
373	#endif
374	*/
375
376	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
377	constructs like ``.loc 1 20''. This was turning into ``.loc
378	120''. States 9 and 10 ensure that a space is never dropped in
379	between characters which could appear in a identifier. Ian
380	Taylor, ian@cygnus.com.
381
382	I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
383	correctly on the PA (and any other target where colons are optional).
384	Jeff Law, law@cs.utah.edu.
385
386	I added state 13 so that something like "cmp r1, r2 \|\| trap #1" does not
387	get squashed into "cmp r1,r2\|\|trap#1", with the all important space
388	between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
389
390	/* This macro gets the next input character. */
391
392	#define GET() \
393	(from < fromend \
394	? * (unsigned char *) (from++) \
395	: (saved_input = NULL, \
396	fromlen = (*get) (input_buffer, sizeof input_buffer), \
397	from = input_buffer, \
398	fromend = from + fromlen, \
399	(fromlen == 0 \
400	? EOF \
401	: * (unsigned char *) (from++))))
402
403	/* This macro pushes a character back on the input stream. */
404
405	#define UNGET(uch) (*--from = (uch))
406
407	/* This macro puts a character into the output buffer. If this
408	character fills the output buffer, this macro jumps to the label
409	TOFULL. We use this rather ugly approach because we need to
410	handle two different termination conditions: EOF on the input
411	stream, and a full output buffer. It would be simpler if we
412	always read in the entire input stream before processing it, but
413	I don't want to make such a significant change to the assembler's
414	memory usage. */
415
416	#define PUT(pch) \
417	do \
418	{ \
419	*to++ = (pch); \
420	if (to >= toend) \
421	goto tofull; \
422	} \
423	while (0)
424
425	if (saved_input != NULL)
426	{
427	from = saved_input;
428	fromend = from + saved_input_len;
429	}
430	else
431	{
432	fromlen = (*get) (input_buffer, sizeof input_buffer);
433	if (fromlen == 0)
434	return 0;
435	from = input_buffer;
436	fromend = from + fromlen;
437	}
438
439	while (1)
440	{
441	/* The cases in this switch end with continue, in order to
442	branch back to the top of this while loop and generate the
443	next output character in the appropriate state. */
444	switch (state)
445	{
446	case -1:
447	ch = *out_string++;
448	if (*out_string == '\0')
449	{
450	state = old_state;
451	old_state = 3;
452	}
453	PUT (ch);
454	continue;
455
456	case -2:
457	for (;;)
458	{
459	do
460	{
461	ch = GET ();
462
463	if (ch == EOF)
464	{
465	as_warn (_("end of file in comment"));
466	goto fromeof;
467	}
468
469	if (ch == '\n')
470	PUT ('\n');
471	}
472	while (ch != '*');
473
474	while ((ch = GET ()) == '*')
475	;
476
477	if (ch == EOF)
478	{
479	as_warn (_("end of file in comment"));
480	goto fromeof;
481	}
482
483	if (ch == '/')
484	break;
485
486	UNGET (ch);
487	}
488
489	state = old_state;
490	UNGET (' ');
491	continue;
492
493	case 4:
494	ch = GET ();
495	if (ch == EOF)
496	goto fromeof;
497	else if (ch >= '0' && ch <= '9')
498	PUT (ch);
499	else
500	{
501	while (ch != EOF && IS_WHITESPACE (ch))
502	ch = GET ();
503	if (ch == '"')
504	{
505	UNGET (ch);
506	if (scrub_m68k_mri)
507	out_string = "\n\tappfile ";
508	else
509	out_string = "\n\t.appfile ";
510	old_state = 7;
511	state = -1;
512	PUT (*out_string++);
513	}
514	else
515	{
516	while (ch != EOF && ch != '\n')
517	ch = GET ();
518	state = 0;
519	PUT (ch);
520	}
521	}
522	continue;
523
524	case 5:
525	/* We are going to copy everything up to a quote character,
526	with special handling for a backslash. We try to
527	optimize the copying in the simple case without using the
528	GET and PUT macros. */
529	{
530	char *s;
531	int len;
532
533	for (s = from; s < fromend; s++)
534	{
535	ch = *s;
536	/* This condition must be changed if the type of any
537	other character can be LEX_IS_STRINGQUOTE. */
538	if (ch == '\\'
539	\|\| ch == '"'
540	\|\| ch == '\''
541	\|\| ch == '\n')
542	break;
543	}
544	len = s - from;
545	if (len > toend - to)
546	len = toend - to;
547	if (len > 0)
548	{
549	memcpy (to, from, len);
550	to += len;
551	from += len;
552	}
553	}
554
555	ch = GET ();
556	if (ch == EOF)
557	{
558	as_warn (_("end of file in string: inserted '\"'"));
559	state = old_state;
560	UNGET ('\n');
561	PUT ('"');
562	}
563	else if (lex[ch] == LEX_IS_STRINGQUOTE)
564	{
565	state = old_state;
566	PUT (ch);
567	}
568	#ifndef NO_STRING_ESCAPES
569	else if (ch == '\\')
570	{
571	state = 6;
572	PUT (ch);
573	}
574	#endif
575	else if (scrub_m68k_mri && ch == '\n')
576	{
577	/* Just quietly terminate the string. This permits lines like
578	bne label loop if we haven't reach end yet
579	*/
580	state = old_state;
581	UNGET (ch);
582	PUT ('\'');
583	}
584	else
585	{
586	PUT (ch);
587	}
588	continue;
589
590	case 6:
591	state = 5;
592	ch = GET ();
593	switch (ch)
594	{
595	/* Handle strings broken across lines, by turning '\n' into
596	'\\' and 'n'. */
597	case '\n':
598	UNGET ('n');
599	add_newlines++;
600	PUT ('\\');
601	continue;
602
603	case '"':
604	case '\\':
605	case 'b':
606	case 'f':
607	case 'n':
608	case 'r':
609	case 't':
610	case 'v':
611	case 'x':
612	case 'X':
613	case '0':
614	case '1':
615	case '2':
616	case '3':
617	case '4':
618	case '5':
619	case '6':
620	case '7':
621	break;
622	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
623	default:
624	as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
625	break;
626	#else /* ONLY_STANDARD_ESCAPES */
627	default:
628	/* Accept \x as x for any x */
629	break;
630	#endif /* ONLY_STANDARD_ESCAPES */
631
632	case EOF:
633	as_warn (_("End of file in string: '\"' inserted"));
634	PUT ('"');
635	continue;
636	}
637	PUT (ch);
638	continue;
639
640	case 7:
641	ch = GET ();
642	state = 5;
643	old_state = 8;
644	if (ch == EOF)
645	goto fromeof;
646	PUT (ch);
647	continue;
648
649	case 8:
650	do
651	ch = GET ();
652	while (ch != '\n' && ch != EOF);
653	if (ch == EOF)
654	goto fromeof;
655	state = 0;
656	PUT (ch);
657	continue;
658	}
659
660	/* OK, we are somewhere in states 0 through 4 or 9 through 11 */
661
662	/* flushchar: */
663	ch = GET ();
664
665	recycle:
666
667	#if defined TC_ARM && defined OBJ_ELF
668	/* We need to watch out for .symver directives. See the comment later
669	in this function. */
670	if (symver_state == NULL)
671	{
672	if ((state == 0 \|\| state == 1) && ch == symver_pseudo[0])
673	symver_state = symver_pseudo + 1;
674	}
675	else
676	{
677	/* We advance to the next state if we find the right
678	character. */
679	if (ch != '\0' && (*symver_state == ch))
680	++symver_state;
681	else if (*symver_state != '\0')
682	/* We did not get the expected character, or we didn't
683	get a valid terminating character after seeing the
684	entire pseudo-op, so we must go back to the beginning. */
685	symver_state = NULL;
686	else
687	{
688	/* We've read the entire pseudo-op. If this is the end
689	of the line, go back to the beginning. */
690	if (IS_NEWLINE (ch))
691	symver_state = NULL;
692	}
693	}
694	#endif /* TC_ARM && OBJ_ELF */
695
696	#ifdef TC_M68K
697	/* We want to have pseudo-ops which control whether we are in
698	MRI mode or not. Unfortunately, since m68k MRI mode affects
699	the scrubber, that means that we need a special purpose
700	recognizer here. */
701	if (mri_state == NULL)
702	{
703	if ((state == 0 \|\| state == 1)
704	&& ch == mri_pseudo[0])
705	mri_state = mri_pseudo + 1;
706	}
707	else
708	{
709	/* We advance to the next state if we find the right
710	character, or if we need a space character and we get any
711	whitespace character, or if we need a '0' and we get a
712	'1' (this is so that we only need one state to handle
713	``.mri 0'' and ``.mri 1''). */
714	if (ch != '\0'
715	&& (*mri_state == ch
716	\|\| (*mri_state == ' '
717	&& lex[ch] == LEX_IS_WHITESPACE)
718	\|\| (*mri_state == '0'
719	&& ch == '1')))
720	{
721	mri_last_ch = ch;
722	++mri_state;
723	}
724	else if (*mri_state != '\0'
725	\|\| (lex[ch] != LEX_IS_WHITESPACE
726	&& lex[ch] != LEX_IS_NEWLINE))
727	{
728	/* We did not get the expected character, or we didn't
729	get a valid terminating character after seeing the
730	entire pseudo-op, so we must go back to the
731	beginning. */
732	mri_state = NULL;
733	}
734	else
735	{
736	/* We've read the entire pseudo-op. mips_last_ch is
737	either '0' or '1' indicating whether to enter or
738	leave MRI mode. */
739	do_scrub_begin (mri_last_ch == '1');
740	mri_state = NULL;
741
742	/* We continue handling the character as usual. The
743	main gas reader must also handle the .mri pseudo-op
744	to control expression parsing and the like. */
745	}
746	}
747	#endif
748
749	if (ch == EOF)
750	{
751	if (state != 0)
752	{
753	as_warn (_("end of file not at end of a line; newline inserted"));
754	state = 0;
755	PUT ('\n');
756	}
757	goto fromeof;
758	}
759
760	switch (lex[ch])
761	{
762	case LEX_IS_WHITESPACE:
763	do
764	{
765	ch = GET ();
766	}
767	while (ch != EOF && IS_WHITESPACE (ch));
768	if (ch == EOF)
769	goto fromeof;
770
771	if (state == 0)
772	{
773	/* Preserve a single whitespace character at the
774	beginning of a line. */
775	state = 1;
776	UNGET (ch);
777	PUT (' ');
778	break;
779	}
780
781	#ifdef KEEP_WHITE_AROUND_COLON
782	if (lex[ch] == LEX_IS_COLON)
783	{
784	/* Only keep this white if there's no white after the
785	colon. */
786	ch2 = GET ();
787	UNGET (ch2);
788	if (!IS_WHITESPACE (ch2))
789	{
790	state = 9;
791	UNGET (ch);
792	PUT (' ');
793	break;
794	}
795	}
796	#endif
797	if (IS_COMMENT (ch)
798	\|\| ch == '/'
799	\|\| IS_LINE_SEPARATOR (ch))
800	{
801	if (scrub_m68k_mri)
802	{
803	/* In MRI mode, we keep these spaces. */
804	UNGET (ch);
805	PUT (' ');
806	break;
807	}
808	goto recycle;
809	}
810
811	/* If we're in state 2 or 11, we've seen a non-white
812	character followed by whitespace. If the next character
813	is ':', this is whitespace after a label name which we
814	normally must ignore. In MRI mode, though, spaces are
815	not permitted between the label and the colon. */
816	if ((state == 2 \|\| state == 11)
817	&& lex[ch] == LEX_IS_COLON
818	&& ! scrub_m68k_mri)
819	{
820	state = 1;
821	PUT (ch);
822	break;
823	}
824
825	switch (state)
826	{
827	case 0:
828	state++;
829	goto recycle; /* Punted leading sp */
830	case 1:
831	/* We can arrive here if we leave a leading whitespace
832	character at the beginning of a line. */
833	goto recycle;
834	case 2:
835	state = 3;
836	if (to + 1 < toend)
837	{
838	/* Optimize common case by skipping UNGET/GET. */
839	PUT (' '); /* Sp after opco */
840	goto recycle;
841	}
842	UNGET (ch);
843	PUT (' ');
844	break;
845	case 3:
846	if (scrub_m68k_mri)
847	{
848	/* In MRI mode, we keep these spaces. */
849	UNGET (ch);
850	PUT (' ');
851	break;
852	}
853	goto recycle; /* Sp in operands */
854	case 9:
855	case 10:
856	if (scrub_m68k_mri)
857	{
858	/* In MRI mode, we keep these spaces. */
859	state = 3;
860	UNGET (ch);
861	PUT (' ');
862	break;
863	}
864	state = 10; /* Sp after symbol char */
865	goto recycle;
866	case 11:
867	if (LABELS_WITHOUT_COLONS \|\| flag_m68k_mri)
868	state = 1;
869	else
870	{
871	/* We know that ch is not ':', since we tested that
872	case above. Therefore this is not a label, so it
873	must be the opcode, and we've just seen the
874	whitespace after it. */
875	state = 3;
876	}
877	UNGET (ch);
878	PUT (' '); /* Sp after label definition. */
879	break;
880	default:
881	BAD_CASE (state);
882	}
883	break;
884
885	case LEX_IS_TWOCHAR_COMMENT_1ST:
886	ch2 = GET ();
887	if (ch2 == '*')
888	{
889	for (;;)
890	{
891	do
892	{
893	ch2 = GET ();
894	if (ch2 != EOF && IS_NEWLINE (ch2))
895	add_newlines++;
896	}
897	while (ch2 != EOF && ch2 != '*');
898
899	while (ch2 == '*')
900	ch2 = GET ();
901
902	if (ch2 == EOF \|\| ch2 == '/')
903	break;
904
905	/* This UNGET will ensure that we count newlines
906	correctly. */
907	UNGET (ch2);
908	}
909
910	if (ch2 == EOF)
911	as_warn (_("end of file in multiline comment"));
912
913	ch = ' ';
914	goto recycle;
915	}
916	#ifdef DOUBLESLASH_LINE_COMMENTS
917	else if (ch2 == '/')
918	{
919	do
920	{
921	ch = GET ();
922	}
923	while (ch != EOF && !IS_NEWLINE (ch));
924	if (ch == EOF)
925	as_warn ("end of file in comment; newline inserted");
926	state = 0;
927	PUT ('\n');
928	break;
929	}
930	#endif
931	else
932	{
933	if (ch2 != EOF)
934	UNGET (ch2);
935	if (state == 9 \|\| state == 10)
936	state = 3;
937	PUT (ch);
938	}
939	break;
940
941	case LEX_IS_STRINGQUOTE:
942	if (state == 10)
943	{
944	/* Preserve the whitespace in foo "bar" */
945	UNGET (ch);
946	state = 3;
947	PUT (' ');
948
949	/* PUT didn't jump out. We could just break, but we
950	know what will happen, so optimize a bit. */
951	ch = GET ();
952	old_state = 3;
953	}
954	else if (state == 9)
955	old_state = 3;
956	else
957	old_state = state;
958	state = 5;
959	PUT (ch);
960	break;
961
962	#ifndef IEEE_STYLE
963	case LEX_IS_ONECHAR_QUOTE:
964	if (state == 10)
965	{
966	/* Preserve the whitespace in foo 'b' */
967	UNGET (ch);
968	state = 3;
969	PUT (' ');
970	break;
971	}
972	ch = GET ();
973	if (ch == EOF)
974	{
975	as_warn (_("end of file after a one-character quote; \\0 inserted"));
976	ch = 0;
977	}
978	if (ch == '\\')
979	{
980	ch = GET ();
981	if (ch == EOF)
982	{
983	as_warn (_("end of file in escape character"));
984	ch = '\\';
985	}
986	else
987	ch = process_escape (ch);
988	}
989	sprintf (out_buf, "%d", (int) (unsigned char) ch);
990
991	/* None of these 'x constants for us. We want 'x'. */
992	if ((ch = GET ()) != '\'')
993	{
994	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
995	as_warn (_("Missing close quote: (assumed)"));
996	#else
997	if (ch != EOF)
998	UNGET (ch);
999	#endif
1000	}
1001	if (strlen (out_buf) == 1)
1002	{
1003	PUT (out_buf[0]);
1004	break;
1005	}
1006	if (state == 9)
1007	old_state = 3;
1008	else
1009	old_state = state;
1010	state = -1;
1011	out_string = out_buf;
1012	PUT (*out_string++);
1013	break;
1014	#endif
1015
1016	case LEX_IS_COLON:
1017	#ifdef KEEP_WHITE_AROUND_COLON
1018	state = 9;
1019	#else
1020	if (state == 9 \|\| state == 10)
1021	state = 3;
1022	else if (state != 3)
1023	state = 1;
1024	#endif
1025	PUT (ch);
1026	break;
1027
1028	case LEX_IS_NEWLINE:
1029	/* Roll out a bunch of newlines from inside comments, etc. */
1030	if (add_newlines)
1031	{
1032	--add_newlines;
1033	UNGET (ch);
1034	}
1035	/* Fall through. */
1036
1037	case LEX_IS_LINE_SEPARATOR:
1038	state = 0;
1039	PUT (ch);
1040	break;
1041
1042	#ifdef TC_V850
1043	case LEX_IS_DOUBLEDASH_1ST:
1044	ch2 = GET ();
1045	if (ch2 != '-')
1046	{
1047	UNGET (ch2);
1048	goto de_fault;
1049	}
1050	/* Read and skip to end of line. */
1051	do
1052	{
1053	ch = GET ();
1054	}
1055	while (ch != EOF && ch != '\n');
1056	if (ch == EOF)
1057	{
1058	as_warn (_("end of file in comment; newline inserted"));
1059	}
1060	state = 0;
1061	PUT ('\n');
1062	break;
1063	#endif
1064	#ifdef DOUBLEBAR_PARALLEL
1065	case LEX_IS_DOUBLEBAR_1ST:
1066	ch2 = GET ();
1067	if (ch2 != '\|')
1068	{
1069	UNGET (ch2);
1070	goto de_fault;
1071	}
1072	/* Reset back to state 1 and pretend that we are parsing a line from
1073	just after the first white space. */
1074	state = 1;
1075	PUT ('\|');
1076	PUT ('\|');
1077	break;
1078	#endif
1079	case LEX_IS_LINE_COMMENT_START:
1080	/* FIXME-someday: The two character comment stuff was badly
1081	thought out. On i386, we want '/' as line comment start
1082	AND we want C style comments. hence this hack. The
1083	whole lexical process should be reworked. xoxorich. */
1084	if (ch == '/')
1085	{
1086	ch2 = GET ();
1087	if (ch2 == '*')
1088	{
1089	old_state = 3;
1090	state = -2;
1091	break;
1092	}
1093	else
1094	{
1095	UNGET (ch2);
1096	}
1097	} /* bad hack */
1098
1099	if (state == 0 \|\| state == 1) /* Only comment at start of line. */
1100	{
1101	int startch;
1102
1103	startch = ch;
1104
1105	do
1106	{
1107	ch = GET ();
1108	}
1109	while (ch != EOF && IS_WHITESPACE (ch));
1110	if (ch == EOF)
1111	{
1112	as_warn (_("end of file in comment; newline inserted"));
1113	PUT ('\n');
1114	break;
1115	}
1116	if (ch < '0' \|\| ch > '9' \|\| state != 0 \|\| startch != '#')
1117	{
1118	/* Not a cpp line. */
1119	while (ch != EOF && !IS_NEWLINE (ch))
1120	ch = GET ();
1121	if (ch == EOF)
1122	as_warn (_("EOF in Comment: Newline inserted"));
1123	state = 0;
1124	PUT ('\n');
1125	break;
1126	}
1127	/* Looks like `# 123 "filename"' from cpp. */
1128	UNGET (ch);
1129	old_state = 4;
1130	state = -1;
1131	if (scrub_m68k_mri)
1132	out_string = "\tappline ";
1133	else
1134	out_string = "\t.appline ";
1135	PUT (*out_string++);
1136	break;
1137	}
1138
1139	#ifdef TC_D10V
1140	/* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1141	Trap is the only short insn that has a first operand that is
1142	neither register nor label.
1143	We must prevent exef0f \|\|trap #1 to degenerate to exef0f \|\|trap#1 .
1144	We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1145	already LEX_IS_LINE_COMMENT_START. However, it is the
1146	only character in line_comment_chars for d10v, hence we
1147	can recognize it as such. */
1148	/* An alternative approach would be to reset the state to 1 when
1149	we see '\|\|', '<'- or '->', but that seems to be overkill. */
1150	if (state == 10)
1151	PUT (' ');
1152	#endif
1153	/* We have a line comment character which is not at the
1154	start of a line. If this is also a normal comment
1155	character, fall through. Otherwise treat it as a default
1156	character. */
1157	if (strchr (tc_comment_chars, ch) == NULL
1158	&& (! scrub_m68k_mri
1159	\|\| (ch != '!' && ch != '*')))
1160	goto de_fault;
1161	if (scrub_m68k_mri
1162	&& (ch == '!' \|\| ch == '*' \|\| ch == '#')
1163	&& state != 1
1164	&& state != 10)
1165	goto de_fault;
1166	/* Fall through. */
1167	case LEX_IS_COMMENT_START:
1168	#if defined TC_ARM && defined OBJ_ELF
1169	/* On the ARM, `@' is the comment character.
1170	Unfortunately this is also a special character in ELF .symver
1171	directives (and .type, though we deal with those another way).
1172	So we check if this line is such a directive, and treat
1173	the character as default if so. This is a hack. */
1174	if ((symver_state != NULL) && (*symver_state == 0))
1175	goto de_fault;
1176	#endif
1177	#ifdef WARN_COMMENTS
1178	if (!found_comment)
1179	as_where (&found_comment_file, &found_comment);
1180	#endif
1181	do
1182	{
1183	ch = GET ();
1184	}
1185	while (ch != EOF && !IS_NEWLINE (ch));
1186	if (ch == EOF)
1187	as_warn (_("end of file in comment; newline inserted"));
1188	state = 0;
1189	PUT ('\n');
1190	break;
1191
1192	case LEX_IS_SYMBOL_COMPONENT:
1193	if (state == 10)
1194	{
1195	/* This is a symbol character following another symbol
1196	character, with whitespace in between. We skipped
1197	the whitespace earlier, so output it now. */
1198	UNGET (ch);
1199	state = 3;
1200	PUT (' ');
1201	break;
1202	}
1203
1204	if (state == 3)
1205	state = 9;
1206
1207	/* This is a common case. Quickly copy CH and all the
1208	following symbol component or normal characters. */
1209	if (to + 1 < toend
1210	&& mri_state == NULL
1211	#if defined TC_ARM && defined OBJ_ELF
1212	&& symver_state == NULL
1213	#endif
1214	)
1215	{
1216	char *s;
1217	int len;
1218
1219	for (s = from; s < fromend; s++)
1220	{
1221	int type;
1222
1223	ch2 = (unsigned char ) s;
1224	type = lex[ch2];
1225	if (type != 0
1226	&& type != LEX_IS_SYMBOL_COMPONENT)
1227	break;
1228	}
1229	if (s > from)
1230	{
1231	/* Handle the last character normally, for
1232	simplicity. */
1233	--s;
1234	}
1235	len = s - from;
1236	if (len > (toend - to) - 1)
1237	len = (toend - to) - 1;
1238	if (len > 0)
1239	{
1240	PUT (ch);
1241	if (len > 8)
1242	{
1243	memcpy (to, from, len);
1244	to += len;
1245	from += len;
1246	}
1247	else
1248	{
1249	switch (len)
1250	{
1251	case 8: to++ = from++;
1252	case 7: to++ = from++;
1253	case 6: to++ = from++;
1254	case 5: to++ = from++;
1255	case 4: to++ = from++;
1256	case 3: to++ = from++;
1257	case 2: to++ = from++;
1258	case 1: to++ = from++;
1259	}
1260	}
1261	ch = GET ();
1262	}
1263	}
1264
1265	/* Fall through. */
1266	default:
1267	de_fault:
1268	/* Some relatively `normal' character. */
1269	if (state == 0)
1270	{
1271	state = 11; /* Now seeing label definition */
1272	}
1273	else if (state == 1)
1274	{
1275	state = 2; /* Ditto */
1276	}
1277	else if (state == 9)
1278	{
1279	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1280	state = 3;
1281	}
1282	else if (state == 10)
1283	{
1284	if (ch == '\\')
1285	{
1286	/* Special handling for backslash: a backslash may
1287	be the beginning of a formal parameter (of a
1288	macro) following another symbol character, with
1289	whitespace in between. If that is the case, we
1290	output a space before the parameter. Strictly
1291	speaking, correct handling depends upon what the
1292	macro parameter expands into; if the parameter
1293	expands into something which does not start with
1294	an operand character, then we don't want to keep
1295	the space. We don't have enough information to
1296	make the right choice, so here we are making the
1297	choice which is more likely to be correct. */
1298	PUT (' ');
1299	}
1300
1301	state = 3;
1302	}
1303	PUT (ch);
1304	break;
1305	}
1306	}
1307
1308	/NOTREACHED/
1309
1310	fromeof:
1311	/* We have reached the end of the input. */
1312	return to - tostart;
1313
1314	tofull:
1315	/* The output buffer is full. Save any input we have not yet
1316	processed. */
1317	if (fromend > from)
1318	{
1319	saved_input = from;
1320	saved_input_len = fromend - from;
1321	}
1322	else
1323	saved_input = NULL;
1324
1325	return to - tostart;
1326	}
1327
1328	/* end of app.c */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: