source: trunk/src/gcc/gcc/c-lex.c@ 1392

Last change on this file since 1392 was 1392, checked in by bird, 21 years ago

This commit was generated by cvs2svn to compensate for changes in r1391,
which included commits to RCS files with non-trunk default branches.

  • Property cvs2svn:cvs-rev set to 1.1.1.2
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 28.4 KB
Line 
1/* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
24
25#include "real.h"
26#include "rtl.h"
27#include "tree.h"
28#include "expr.h"
29#include "input.h"
30#include "output.h"
31#include "c-tree.h"
32#include "c-common.h"
33#include "flags.h"
34#include "timevar.h"
35#include "cpplib.h"
36#include "c-pragma.h"
37#include "toplev.h"
38#include "intl.h"
39#include "tm_p.h"
40#include "splay-tree.h"
41#include "debug.h"
42
43#ifdef MULTIBYTE_CHARS
44#include "mbchar.h"
45#include <locale.h>
46#endif /* MULTIBYTE_CHARS */
47
48/* The current line map. */
49static const struct line_map *map;
50
51/* The line used to refresh the lineno global variable after each token. */
52static unsigned int src_lineno;
53
54/* We may keep statistics about how long which files took to compile. */
55static int header_time, body_time;
56static splay_tree file_info_tree;
57
58/* File used for outputting assembler code. */
59extern FILE *asm_out_file;
60
61#undef WCHAR_TYPE_SIZE
62#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
63
64/* Number of bytes in a wide character. */
65#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
66
67int pending_lang_change; /* If we need to switch languages - C++ only */
68int c_header_level; /* depth in C headers - C++ only */
69
70/* Nonzero tells yylex to ignore \ in string constants. */
71static int ignore_escape_flag;
72
73static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
74static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
75static enum integer_type_kind
76 narrowest_unsigned_type PARAMS ((tree, unsigned int));
77static enum integer_type_kind
78 narrowest_signed_type PARAMS ((tree, unsigned int));
79static tree lex_string PARAMS ((const unsigned char *, unsigned int,
80 int));
81static tree lex_charconst PARAMS ((const cpp_token *));
82static void update_header_times PARAMS ((const char *));
83static int dump_one_header PARAMS ((splay_tree_node, void *));
84static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
85static void cb_ident PARAMS ((cpp_reader *, unsigned int,
86 const cpp_string *));
87static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
88static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
89static void cb_define PARAMS ((cpp_reader *, unsigned int,
90 cpp_hashnode *));
91static void cb_undef PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93
94
95const char *
96init_c_lex (filename)
97 const char *filename;
98{
99 struct cpp_callbacks *cb;
100 struct c_fileinfo *toplevel;
101
102 /* Set up filename timing. Must happen before cpp_read_main_file. */
103 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
104 0,
105 (splay_tree_delete_value_fn)free);
106 toplevel = get_fileinfo ("<top level>");
107 if (flag_detailed_statistics)
108 {
109 header_time = 0;
110 body_time = get_run_time ();
111 toplevel->time = body_time;
112 }
113
114#ifdef MULTIBYTE_CHARS
115 /* Change to the native locale for multibyte conversions. */
116 setlocale (LC_CTYPE, "");
117 GET_ENVIRONMENT (literal_codeset, "LANG");
118#endif
119
120 cb = cpp_get_callbacks (parse_in);
121
122 cb->line_change = cb_line_change;
123 cb->ident = cb_ident;
124 cb->file_change = cb_file_change;
125 cb->def_pragma = cb_def_pragma;
126
127 /* Set the debug callbacks if we can use them. */
128 if (debug_info_level == DINFO_LEVEL_VERBOSE
129 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
130 || write_symbols == VMS_AND_DWARF2_DEBUG))
131 {
132 cb->define = cb_define;
133 cb->undef = cb_undef;
134 }
135
136 /* Start it at 0. */
137 lineno = 0;
138
139 return cpp_read_main_file (parse_in, filename, ident_hash);
140}
141
142/* A thin wrapper around the real parser that initializes the
143 integrated preprocessor after debug output has been initialized.
144 Also, make sure the start_source_file debug hook gets called for
145 the primary source file. */
146
147void
148c_common_parse_file (set_yydebug)
149 int set_yydebug ATTRIBUTE_UNUSED;
150{
151#if YYDEBUG != 0
152 yydebug = set_yydebug;
153#else
154 warning ("YYDEBUG not defined");
155#endif
156
157 (*debug_hooks->start_source_file) (lineno, input_filename);
158 cpp_finish_options (parse_in);
159
160 yyparse ();
161 free_parser_stacks ();
162}
163
164struct c_fileinfo *
165get_fileinfo (name)
166 const char *name;
167{
168 splay_tree_node n;
169 struct c_fileinfo *fi;
170
171 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
172 if (n)
173 return (struct c_fileinfo *) n->value;
174
175 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
176 fi->time = 0;
177 fi->interface_only = 0;
178 fi->interface_unknown = 1;
179 splay_tree_insert (file_info_tree, (splay_tree_key) name,
180 (splay_tree_value) fi);
181 return fi;
182}
183
184static void
185update_header_times (name)
186 const char *name;
187{
188 /* Changing files again. This means currently collected time
189 is charged against header time, and body time starts back at 0. */
190 if (flag_detailed_statistics)
191 {
192 int this_time = get_run_time ();
193 struct c_fileinfo *file = get_fileinfo (name);
194 header_time += this_time - body_time;
195 file->time += this_time - body_time;
196 body_time = this_time;
197 }
198}
199
200static int
201dump_one_header (n, dummy)
202 splay_tree_node n;
203 void *dummy ATTRIBUTE_UNUSED;
204{
205 print_time ((const char *) n->key,
206 ((struct c_fileinfo *) n->value)->time);
207 return 0;
208}
209
210void
211dump_time_statistics ()
212{
213 struct c_fileinfo *file = get_fileinfo (input_filename);
214 int this_time = get_run_time ();
215 file->time += this_time - body_time;
216
217 fprintf (stderr, "\n******\n");
218 print_time ("header files (total)", header_time);
219 print_time ("main file (total)", this_time - body_time);
220 fprintf (stderr, "ratio = %g : 1\n",
221 (double)header_time / (double)(this_time - body_time));
222 fprintf (stderr, "\n******\n");
223
224 splay_tree_foreach (file_info_tree, dump_one_header, 0);
225}
226
227static void
228cb_ident (pfile, line, str)
229 cpp_reader *pfile ATTRIBUTE_UNUSED;
230 unsigned int line ATTRIBUTE_UNUSED;
231 const cpp_string *str ATTRIBUTE_UNUSED;
232{
233#ifdef ASM_OUTPUT_IDENT
234 if (! flag_no_ident)
235 {
236 /* Convert escapes in the string. */
237 tree value = lex_string (str->text, str->len, 0);
238 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
239 }
240#endif
241}
242
243/* Called at the start of every non-empty line. TOKEN is the first
244 lexed token on the line. Used for diagnostic line numbers. */
245static void
246cb_line_change (pfile, token, parsing_args)
247 cpp_reader *pfile ATTRIBUTE_UNUSED;
248 const cpp_token *token;
249 int parsing_args;
250{
251 if (token->type == CPP_EOF || parsing_args)
252 return;
253
254 src_lineno = SOURCE_LINE (map, token->line);
255}
256
257static void
258cb_file_change (pfile, new_map)
259 cpp_reader *pfile ATTRIBUTE_UNUSED;
260 const struct line_map *new_map;
261{
262 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
263
264 if (new_map->reason == LC_ENTER)
265 {
266 /* Don't stack the main buffer on the input stack;
267 we already did in compile_file. */
268 if (map == NULL)
269 main_input_filename = new_map->to_file;
270 else
271 {
272 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
273
274 lineno = included_at;
275 push_srcloc (new_map->to_file, 1);
276 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
277#ifndef NO_IMPLICIT_EXTERN_C
278 if (c_header_level)
279 ++c_header_level;
280 else if (new_map->sysp == 2)
281 {
282 c_header_level = 1;
283 ++pending_lang_change;
284 }
285#endif
286 }
287 }
288 else if (new_map->reason == LC_LEAVE)
289 {
290#ifndef NO_IMPLICIT_EXTERN_C
291 if (c_header_level && --c_header_level == 0)
292 {
293 if (new_map->sysp == 2)
294 warning ("badly nested C headers from preprocessor");
295 --pending_lang_change;
296 }
297#endif
298 pop_srcloc ();
299
300 (*debug_hooks->end_source_file) (to_line);
301 }
302
303 update_header_times (new_map->to_file);
304 in_system_header = new_map->sysp != 0;
305 input_filename = new_map->to_file;
306 lineno = to_line;
307 map = new_map;
308
309 /* Hook for C++. */
310 extract_interface_info ();
311}
312
313static void
314cb_def_pragma (pfile, line)
315 cpp_reader *pfile;
316 unsigned int line;
317{
318 /* Issue a warning message if we have been asked to do so. Ignore
319 unknown pragmas in system headers unless an explicit
320 -Wunknown-pragmas has been given. */
321 if (warn_unknown_pragmas > in_system_header)
322 {
323 const unsigned char *space, *name;
324 const cpp_token *s;
325
326 space = name = (const unsigned char *) "";
327 s = cpp_get_token (pfile);
328 if (s->type != CPP_EOF)
329 {
330 space = cpp_token_as_text (pfile, s);
331 s = cpp_get_token (pfile);
332 if (s->type == CPP_NAME)
333 name = cpp_token_as_text (pfile, s);
334 }
335
336 lineno = SOURCE_LINE (map, line);
337 warning ("ignoring #pragma %s %s", space, name);
338 }
339}
340
341/* #define callback for DWARF and DWARF2 debug info. */
342static void
343cb_define (pfile, line, node)
344 cpp_reader *pfile;
345 unsigned int line;
346 cpp_hashnode *node;
347{
348 (*debug_hooks->define) (SOURCE_LINE (map, line),
349 (const char *) cpp_macro_definition (pfile, node));
350}
351
352/* #undef callback for DWARF and DWARF2 debug info. */
353static void
354cb_undef (pfile, line, node)
355 cpp_reader *pfile ATTRIBUTE_UNUSED;
356 unsigned int line;
357 cpp_hashnode *node;
358{
359 (*debug_hooks->undef) (SOURCE_LINE (map, line),
360 (const char *) NODE_NAME (node));
361}
362
363#if 0 /* not yet */
364/* Returns nonzero if C is a universal-character-name. Give an error if it
365 is not one which may appear in an identifier, as per [extendid].
366
367 Note that extended character support in identifiers has not yet been
368 implemented. It is my personal opinion that this is not a desirable
369 feature. Portable code cannot count on support for more than the basic
370 identifier character set. */
371
372static inline int
373is_extended_char (c)
374 int c;
375{
376#ifdef TARGET_EBCDIC
377 return 0;
378#else
379 /* ASCII. */
380 if (c < 0x7f)
381 return 0;
382
383 /* None of the valid chars are outside the Basic Multilingual Plane (the
384 low 16 bits). */
385 if (c > 0xffff)
386 {
387 error ("universal-character-name '\\U%08x' not valid in identifier", c);
388 return 1;
389 }
390
391 /* Latin */
392 if ((c >= 0x00c0 && c <= 0x00d6)
393 || (c >= 0x00d8 && c <= 0x00f6)
394 || (c >= 0x00f8 && c <= 0x01f5)
395 || (c >= 0x01fa && c <= 0x0217)
396 || (c >= 0x0250 && c <= 0x02a8)
397 || (c >= 0x1e00 && c <= 0x1e9a)
398 || (c >= 0x1ea0 && c <= 0x1ef9))
399 return 1;
400
401 /* Greek */
402 if ((c == 0x0384)
403 || (c >= 0x0388 && c <= 0x038a)
404 || (c == 0x038c)
405 || (c >= 0x038e && c <= 0x03a1)
406 || (c >= 0x03a3 && c <= 0x03ce)
407 || (c >= 0x03d0 && c <= 0x03d6)
408 || (c == 0x03da)
409 || (c == 0x03dc)
410 || (c == 0x03de)
411 || (c == 0x03e0)
412 || (c >= 0x03e2 && c <= 0x03f3)
413 || (c >= 0x1f00 && c <= 0x1f15)
414 || (c >= 0x1f18 && c <= 0x1f1d)
415 || (c >= 0x1f20 && c <= 0x1f45)
416 || (c >= 0x1f48 && c <= 0x1f4d)
417 || (c >= 0x1f50 && c <= 0x1f57)
418 || (c == 0x1f59)
419 || (c == 0x1f5b)
420 || (c == 0x1f5d)
421 || (c >= 0x1f5f && c <= 0x1f7d)
422 || (c >= 0x1f80 && c <= 0x1fb4)
423 || (c >= 0x1fb6 && c <= 0x1fbc)
424 || (c >= 0x1fc2 && c <= 0x1fc4)
425 || (c >= 0x1fc6 && c <= 0x1fcc)
426 || (c >= 0x1fd0 && c <= 0x1fd3)
427 || (c >= 0x1fd6 && c <= 0x1fdb)
428 || (c >= 0x1fe0 && c <= 0x1fec)
429 || (c >= 0x1ff2 && c <= 0x1ff4)
430 || (c >= 0x1ff6 && c <= 0x1ffc))
431 return 1;
432
433 /* Cyrillic */
434 if ((c >= 0x0401 && c <= 0x040d)
435 || (c >= 0x040f && c <= 0x044f)
436 || (c >= 0x0451 && c <= 0x045c)
437 || (c >= 0x045e && c <= 0x0481)
438 || (c >= 0x0490 && c <= 0x04c4)
439 || (c >= 0x04c7 && c <= 0x04c8)
440 || (c >= 0x04cb && c <= 0x04cc)
441 || (c >= 0x04d0 && c <= 0x04eb)
442 || (c >= 0x04ee && c <= 0x04f5)
443 || (c >= 0x04f8 && c <= 0x04f9))
444 return 1;
445
446 /* Armenian */
447 if ((c >= 0x0531 && c <= 0x0556)
448 || (c >= 0x0561 && c <= 0x0587))
449 return 1;
450
451 /* Hebrew */
452 if ((c >= 0x05d0 && c <= 0x05ea)
453 || (c >= 0x05f0 && c <= 0x05f4))
454 return 1;
455
456 /* Arabic */
457 if ((c >= 0x0621 && c <= 0x063a)
458 || (c >= 0x0640 && c <= 0x0652)
459 || (c >= 0x0670 && c <= 0x06b7)
460 || (c >= 0x06ba && c <= 0x06be)
461 || (c >= 0x06c0 && c <= 0x06ce)
462 || (c >= 0x06e5 && c <= 0x06e7))
463 return 1;
464
465 /* Devanagari */
466 if ((c >= 0x0905 && c <= 0x0939)
467 || (c >= 0x0958 && c <= 0x0962))
468 return 1;
469
470 /* Bengali */
471 if ((c >= 0x0985 && c <= 0x098c)
472 || (c >= 0x098f && c <= 0x0990)
473 || (c >= 0x0993 && c <= 0x09a8)
474 || (c >= 0x09aa && c <= 0x09b0)
475 || (c == 0x09b2)
476 || (c >= 0x09b6 && c <= 0x09b9)
477 || (c >= 0x09dc && c <= 0x09dd)
478 || (c >= 0x09df && c <= 0x09e1)
479 || (c >= 0x09f0 && c <= 0x09f1))
480 return 1;
481
482 /* Gurmukhi */
483 if ((c >= 0x0a05 && c <= 0x0a0a)
484 || (c >= 0x0a0f && c <= 0x0a10)
485 || (c >= 0x0a13 && c <= 0x0a28)
486 || (c >= 0x0a2a && c <= 0x0a30)
487 || (c >= 0x0a32 && c <= 0x0a33)
488 || (c >= 0x0a35 && c <= 0x0a36)
489 || (c >= 0x0a38 && c <= 0x0a39)
490 || (c >= 0x0a59 && c <= 0x0a5c)
491 || (c == 0x0a5e))
492 return 1;
493
494 /* Gujarati */
495 if ((c >= 0x0a85 && c <= 0x0a8b)
496 || (c == 0x0a8d)
497 || (c >= 0x0a8f && c <= 0x0a91)
498 || (c >= 0x0a93 && c <= 0x0aa8)
499 || (c >= 0x0aaa && c <= 0x0ab0)
500 || (c >= 0x0ab2 && c <= 0x0ab3)
501 || (c >= 0x0ab5 && c <= 0x0ab9)
502 || (c == 0x0ae0))
503 return 1;
504
505 /* Oriya */
506 if ((c >= 0x0b05 && c <= 0x0b0c)
507 || (c >= 0x0b0f && c <= 0x0b10)
508 || (c >= 0x0b13 && c <= 0x0b28)
509 || (c >= 0x0b2a && c <= 0x0b30)
510 || (c >= 0x0b32 && c <= 0x0b33)
511 || (c >= 0x0b36 && c <= 0x0b39)
512 || (c >= 0x0b5c && c <= 0x0b5d)
513 || (c >= 0x0b5f && c <= 0x0b61))
514 return 1;
515
516 /* Tamil */
517 if ((c >= 0x0b85 && c <= 0x0b8a)
518 || (c >= 0x0b8e && c <= 0x0b90)
519 || (c >= 0x0b92 && c <= 0x0b95)
520 || (c >= 0x0b99 && c <= 0x0b9a)
521 || (c == 0x0b9c)
522 || (c >= 0x0b9e && c <= 0x0b9f)
523 || (c >= 0x0ba3 && c <= 0x0ba4)
524 || (c >= 0x0ba8 && c <= 0x0baa)
525 || (c >= 0x0bae && c <= 0x0bb5)
526 || (c >= 0x0bb7 && c <= 0x0bb9))
527 return 1;
528
529 /* Telugu */
530 if ((c >= 0x0c05 && c <= 0x0c0c)
531 || (c >= 0x0c0e && c <= 0x0c10)
532 || (c >= 0x0c12 && c <= 0x0c28)
533 || (c >= 0x0c2a && c <= 0x0c33)
534 || (c >= 0x0c35 && c <= 0x0c39)
535 || (c >= 0x0c60 && c <= 0x0c61))
536 return 1;
537
538 /* Kannada */
539 if ((c >= 0x0c85 && c <= 0x0c8c)
540 || (c >= 0x0c8e && c <= 0x0c90)
541 || (c >= 0x0c92 && c <= 0x0ca8)
542 || (c >= 0x0caa && c <= 0x0cb3)
543 || (c >= 0x0cb5 && c <= 0x0cb9)
544 || (c >= 0x0ce0 && c <= 0x0ce1))
545 return 1;
546
547 /* Malayalam */
548 if ((c >= 0x0d05 && c <= 0x0d0c)
549 || (c >= 0x0d0e && c <= 0x0d10)
550 || (c >= 0x0d12 && c <= 0x0d28)
551 || (c >= 0x0d2a && c <= 0x0d39)
552 || (c >= 0x0d60 && c <= 0x0d61))
553 return 1;
554
555 /* Thai */
556 if ((c >= 0x0e01 && c <= 0x0e30)
557 || (c >= 0x0e32 && c <= 0x0e33)
558 || (c >= 0x0e40 && c <= 0x0e46)
559 || (c >= 0x0e4f && c <= 0x0e5b))
560 return 1;
561
562 /* Lao */
563 if ((c >= 0x0e81 && c <= 0x0e82)
564 || (c == 0x0e84)
565 || (c == 0x0e87)
566 || (c == 0x0e88)
567 || (c == 0x0e8a)
568 || (c == 0x0e0d)
569 || (c >= 0x0e94 && c <= 0x0e97)
570 || (c >= 0x0e99 && c <= 0x0e9f)
571 || (c >= 0x0ea1 && c <= 0x0ea3)
572 || (c == 0x0ea5)
573 || (c == 0x0ea7)
574 || (c == 0x0eaa)
575 || (c == 0x0eab)
576 || (c >= 0x0ead && c <= 0x0eb0)
577 || (c == 0x0eb2)
578 || (c == 0x0eb3)
579 || (c == 0x0ebd)
580 || (c >= 0x0ec0 && c <= 0x0ec4)
581 || (c == 0x0ec6))
582 return 1;
583
584 /* Georgian */
585 if ((c >= 0x10a0 && c <= 0x10c5)
586 || (c >= 0x10d0 && c <= 0x10f6))
587 return 1;
588
589 /* Hiragana */
590 if ((c >= 0x3041 && c <= 0x3094)
591 || (c >= 0x309b && c <= 0x309e))
592 return 1;
593
594 /* Katakana */
595 if ((c >= 0x30a1 && c <= 0x30fe))
596 return 1;
597
598 /* Bopmofo */
599 if ((c >= 0x3105 && c <= 0x312c))
600 return 1;
601
602 /* Hangul */
603 if ((c >= 0x1100 && c <= 0x1159)
604 || (c >= 0x1161 && c <= 0x11a2)
605 || (c >= 0x11a8 && c <= 0x11f9))
606 return 1;
607
608 /* CJK Unified Ideographs */
609 if ((c >= 0xf900 && c <= 0xfa2d)
610 || (c >= 0xfb1f && c <= 0xfb36)
611 || (c >= 0xfb38 && c <= 0xfb3c)
612 || (c == 0xfb3e)
613 || (c >= 0xfb40 && c <= 0xfb41)
614 || (c >= 0xfb42 && c <= 0xfb44)
615 || (c >= 0xfb46 && c <= 0xfbb1)
616 || (c >= 0xfbd3 && c <= 0xfd3f)
617 || (c >= 0xfd50 && c <= 0xfd8f)
618 || (c >= 0xfd92 && c <= 0xfdc7)
619 || (c >= 0xfdf0 && c <= 0xfdfb)
620 || (c >= 0xfe70 && c <= 0xfe72)
621 || (c == 0xfe74)
622 || (c >= 0xfe76 && c <= 0xfefc)
623 || (c >= 0xff21 && c <= 0xff3a)
624 || (c >= 0xff41 && c <= 0xff5a)
625 || (c >= 0xff66 && c <= 0xffbe)
626 || (c >= 0xffc2 && c <= 0xffc7)
627 || (c >= 0xffca && c <= 0xffcf)
628 || (c >= 0xffd2 && c <= 0xffd7)
629 || (c >= 0xffda && c <= 0xffdc)
630 || (c >= 0x4e00 && c <= 0x9fa5))
631 return 1;
632
633 error ("universal-character-name '\\u%04x' not valid in identifier", c);
634 return 1;
635#endif
636}
637
638/* Add the UTF-8 representation of C to the token_buffer. */
639
640static void
641utf8_extend_token (c)
642 int c;
643{
644 int shift, mask;
645
646 if (c <= 0x0000007f)
647 {
648 extend_token (c);
649 return;
650 }
651 else if (c <= 0x000007ff)
652 shift = 6, mask = 0xc0;
653 else if (c <= 0x0000ffff)
654 shift = 12, mask = 0xe0;
655 else if (c <= 0x001fffff)
656 shift = 18, mask = 0xf0;
657 else if (c <= 0x03ffffff)
658 shift = 24, mask = 0xf8;
659 else
660 shift = 30, mask = 0xfc;
661
662 extend_token (mask | (c >> shift));
663 do
664 {
665 shift -= 6;
666 extend_token ((unsigned char) (0x80 | (c >> shift)));
667 }
668 while (shift);
669}
670#endif
671
672
673int
674c_lex (value)
675 tree *value;
676{
677 const cpp_token *tok;
678
679 retry:
680 timevar_push (TV_CPP);
681 do
682 tok = cpp_get_token (parse_in);
683 while (tok->type == CPP_PADDING);
684 timevar_pop (TV_CPP);
685
686 /* The C++ front end does horrible things with the current line
687 number. To ensure an accurate line number, we must reset it
688 every time we return a token. */
689 lineno = src_lineno;
690
691 *value = NULL_TREE;
692 switch (tok->type)
693 {
694 /* Issue this error here, where we can get at tok->val.c. */
695 case CPP_OTHER:
696 if (ISGRAPH (tok->val.c))
697 error ("stray '%c' in program", tok->val.c);
698 else
699 error ("stray '\\%o' in program", tok->val.c);
700 goto retry;
701
702 case CPP_NAME:
703 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
704 break;
705
706 case CPP_NUMBER:
707 {
708 unsigned int flags = cpp_classify_number (parse_in, tok);
709
710 switch (flags & CPP_N_CATEGORY)
711 {
712 case CPP_N_INVALID:
713 /* cpplib has issued an error. */
714 *value = error_mark_node;
715 break;
716
717 case CPP_N_INTEGER:
718 *value = interpret_integer (tok, flags);
719 break;
720
721 case CPP_N_FLOATING:
722 *value = interpret_float (tok, flags);
723 break;
724
725 default:
726 abort ();
727 }
728 }
729 break;
730
731 case CPP_CHAR:
732 case CPP_WCHAR:
733 *value = lex_charconst (tok);
734 break;
735
736 case CPP_STRING:
737 case CPP_WSTRING:
738 *value = lex_string (tok->val.str.text, tok->val.str.len,
739 tok->type == CPP_WSTRING);
740 break;
741
742 /* These tokens should not be visible outside cpplib. */
743 case CPP_HEADER_NAME:
744 case CPP_COMMENT:
745 case CPP_MACRO_ARG:
746 abort ();
747
748 default: break;
749 }
750
751 return tok->type;
752}
753
754/* Returns the narrowest C-visible unsigned type, starting with the
755 minimum specified by FLAGS, that can fit VALUE, or itk_none if
756 there isn't one. */
757static enum integer_type_kind
758narrowest_unsigned_type (value, flags)
759 tree value;
760 unsigned int flags;
761{
762 enum integer_type_kind itk;
763
764 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
765 itk = itk_unsigned_int;
766 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
767 itk = itk_unsigned_long;
768 else
769 itk = itk_unsigned_long_long;
770
771 /* int_fits_type_p must think the type of its first argument is
772 wider than its second argument, or it won't do the proper check. */
773 TREE_TYPE (value) = widest_unsigned_literal_type_node;
774
775 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
776 if (int_fits_type_p (value, integer_types[itk]))
777 return itk;
778
779 return itk_none;
780}
781
782/* Ditto, but narrowest signed type. */
783static enum integer_type_kind
784narrowest_signed_type (value, flags)
785 tree value;
786 unsigned int flags;
787{
788 enum integer_type_kind itk;
789
790 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
791 itk = itk_int;
792 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
793 itk = itk_long;
794 else
795 itk = itk_long_long;
796
797 /* int_fits_type_p must think the type of its first argument is
798 wider than its second argument, or it won't do the proper check. */
799 TREE_TYPE (value) = widest_unsigned_literal_type_node;
800
801 for (; itk < itk_none; itk += 2 /* skip signed types */)
802 if (int_fits_type_p (value, integer_types[itk]))
803 return itk;
804
805 return itk_none;
806}
807
808/* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
809static tree
810interpret_integer (token, flags)
811 const cpp_token *token;
812 unsigned int flags;
813{
814 tree value, type;
815 enum integer_type_kind itk;
816 cpp_num integer;
817 cpp_options *options = cpp_get_options (parse_in);
818
819 integer = cpp_interpret_integer (parse_in, token, flags);
820 integer = cpp_num_sign_extend (integer, options->precision);
821 value = build_int_2_wide (integer.low, integer.high);
822
823 /* The type of a constant with a U suffix is straightforward. */
824 if (flags & CPP_N_UNSIGNED)
825 itk = narrowest_unsigned_type (value, flags);
826 else
827 {
828 /* The type of a potentially-signed integer constant varies
829 depending on the base it's in, the standard in use, and the
830 length suffixes. */
831 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
832 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
833
834 /* In both C89 and C99, octal and hex constants may be signed or
835 unsigned, whichever fits tighter. We do not warn about this
836 choice differing from the traditional choice, as the constant
837 is probably a bit pattern and either way will work. */
838 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
839 itk = MIN (itk_u, itk_s);
840 else
841 {
842 /* In C99, decimal constants are always signed.
843 In C89, decimal constants that don't fit in long have
844 undefined behavior; we try to make them unsigned long.
845 In GCC's extended C89, that last is true of decimal
846 constants that don't fit in long long, too. */
847
848 itk = itk_s;
849 if (itk_s > itk_u && itk_s > itk_long)
850 {
851 if (!flag_isoc99)
852 {
853 if (itk_u < itk_unsigned_long)
854 itk_u = itk_unsigned_long;
855 itk = itk_u;
856 warning ("this decimal constant is unsigned only in ISO C90");
857 }
858 else if (warn_traditional)
859 warning ("this decimal constant would be unsigned in ISO C90");
860 }
861 }
862 }
863
864 if (itk == itk_none)
865 /* cpplib has already issued a warning for overflow. */
866 type = ((flags & CPP_N_UNSIGNED)
867 ? widest_unsigned_literal_type_node
868 : widest_integer_literal_type_node);
869 else
870 type = integer_types[itk];
871
872 if (itk > itk_unsigned_long
873 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
874 && ! in_system_header && ! flag_isoc99)
875 pedwarn ("integer constant is too large for \"%s\" type",
876 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
877
878 TREE_TYPE (value) = type;
879
880 /* Convert imaginary to a complex type. */
881 if (flags & CPP_N_IMAGINARY)
882 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
883
884 return value;
885}
886
887/* Interpret TOKEN, a floating point number with FLAGS as classified
888 by cpplib. */
889static tree
890interpret_float (token, flags)
891 const cpp_token *token;
892 unsigned int flags;
893{
894 tree type;
895 tree value;
896 REAL_VALUE_TYPE real;
897 char *copy;
898 size_t copylen;
899 const char *typename;
900
901 /* FIXME: make %T work in error/warning, then we don't need typename. */
902 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
903 {
904 type = long_double_type_node;
905 typename = "long double";
906 }
907 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
908 || flag_single_precision_constant)
909 {
910 type = float_type_node;
911 typename = "float";
912 }
913 else
914 {
915 type = double_type_node;
916 typename = "double";
917 }
918
919 /* Copy the constant to a nul-terminated buffer. If the constant
920 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
921 can't handle them. */
922 copylen = token->val.str.len;
923 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
924 /* Must be an F or L suffix. */
925 copylen--;
926 if (flags & CPP_N_IMAGINARY)
927 /* I or J suffix. */
928 copylen--;
929
930 copy = alloca (copylen + 1);
931 memcpy (copy, token->val.str.text, copylen);
932 copy[copylen] = '\0';
933
934 real_from_string (&real, copy);
935 real_convert (&real, TYPE_MODE (type), &real);
936
937 /* A diagnostic is required for "soft" overflow by some ISO C
938 testsuites. This is not pedwarn, because some people don't want
939 an error for this.
940 ??? That's a dubious reason... is this a mandatory diagnostic or
941 isn't it? -- zw, 2001-08-21. */
942 if (REAL_VALUE_ISINF (real) && pedantic)
943 warning ("floating constant exceeds range of \"%s\"", typename);
944
945 /* Create a node with determined type and value. */
946 value = build_real (type, real);
947 if (flags & CPP_N_IMAGINARY)
948 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
949
950 return value;
951}
952
953static tree
954lex_string (str, len, wide)
955 const unsigned char *str;
956 unsigned int len;
957 int wide;
958{
959 tree value;
960 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
961 char *q = buf;
962 const unsigned char *p = str, *limit = str + len;
963 cppchar_t c;
964
965#ifdef MULTIBYTE_CHARS
966 /* Reset multibyte conversion state. */
967 (void) local_mbtowc (NULL, NULL, 0);
968#endif
969
970 while (p < limit)
971 {
972#ifdef MULTIBYTE_CHARS
973 wchar_t wc;
974 int char_len;
975
976 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
977 if (char_len == -1)
978 {
979 warning ("ignoring invalid multibyte character");
980 char_len = 1;
981 c = *p++;
982 }
983 else
984 {
985 p += char_len;
986 c = wc;
987 }
988#else
989 c = *p++;
990#endif
991
992 if (c == '\\' && !ignore_escape_flag)
993 c = cpp_parse_escape (parse_in, &p, limit, wide);
994
995 /* Add this single character into the buffer either as a wchar_t,
996 a multibyte sequence, or as a single byte. */
997 if (wide)
998 {
999 unsigned charwidth = TYPE_PRECISION (char_type_node);
1000 unsigned bytemask = (1 << charwidth) - 1;
1001 int byte;
1002
1003 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1004 {
1005 int n;
1006 if (byte >= (int) sizeof (c))
1007 n = 0;
1008 else
1009 n = (c >> (byte * charwidth)) & bytemask;
1010 if (BYTES_BIG_ENDIAN)
1011 q[WCHAR_BYTES - byte - 1] = n;
1012 else
1013 q[byte] = n;
1014 }
1015 q += WCHAR_BYTES;
1016 }
1017#ifdef MULTIBYTE_CHARS
1018 else if (char_len > 1)
1019 {
1020 /* We're dealing with a multibyte character. */
1021 for ( ; char_len >0; --char_len)
1022 {
1023 *q++ = *(p - char_len);
1024 }
1025 }
1026#endif
1027 else
1028 {
1029 *q++ = c;
1030 }
1031 }
1032
1033 /* Terminate the string value, either with a single byte zero
1034 or with a wide zero. */
1035
1036 if (wide)
1037 {
1038 memset (q, 0, WCHAR_BYTES);
1039 q += WCHAR_BYTES;
1040 }
1041 else
1042 {
1043 *q++ = '\0';
1044 }
1045
1046 value = build_string (q - buf, buf);
1047
1048 if (wide)
1049 TREE_TYPE (value) = wchar_array_type_node;
1050 else
1051 TREE_TYPE (value) = char_array_type_node;
1052 return value;
1053}
1054
1055/* Converts a (possibly wide) character constant token into a tree. */
1056static tree
1057lex_charconst (token)
1058 const cpp_token *token;
1059{
1060 cppchar_t result;
1061 tree type, value;
1062 unsigned int chars_seen;
1063 int unsignedp;
1064
1065 result = cpp_interpret_charconst (parse_in, token,
1066 &chars_seen, &unsignedp);
1067
1068 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1069 before possibly widening to HOST_WIDE_INT for build_int_2. */
1070 if (unsignedp || (cppchar_signed_t) result >= 0)
1071 value = build_int_2 (result, 0);
1072 else
1073 value = build_int_2 ((cppchar_signed_t) result, -1);
1074
1075 if (token->type == CPP_WCHAR)
1076 type = wchar_type_node;
1077 /* In C, a character constant has type 'int'.
1078 In C++ 'char', but multi-char charconsts have type 'int'. */
1079 else if ((c_language == clk_c) || chars_seen > 1)
1080 type = integer_type_node;
1081 else
1082 type = char_type_node;
1083
1084 TREE_TYPE (value) = type;
1085 return value;
1086}
Note: See TracBrowser for help on using the repository browser.