1 | /* CPP Library - lexical analysis.
|
---|
2 | Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
|
---|
3 | Contributed by Per Bothner, 1994-95.
|
---|
4 | Based on CCCP program by Paul Rubin, June 1986
|
---|
5 | Adapted to ANSI C, Richard Stallman, Jan 1987
|
---|
6 | Broken out to separate file, Zack Weinberg, Mar 2000
|
---|
7 | Single-pass line tokenization by Neil Booth, April 2000
|
---|
8 |
|
---|
9 | This program is free software; you can redistribute it and/or modify it
|
---|
10 | under the terms of the GNU General Public License as published by the
|
---|
11 | Free Software Foundation; either version 2, or (at your option) any
|
---|
12 | later version.
|
---|
13 |
|
---|
14 | This program is distributed in the hope that it will be useful,
|
---|
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
17 | GNU General Public License for more details.
|
---|
18 |
|
---|
19 | You should have received a copy of the GNU General Public License
|
---|
20 | along with this program; if not, write to the Free Software
|
---|
21 | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
|
---|
22 |
|
---|
23 | #include "config.h"
|
---|
24 | #include "system.h"
|
---|
25 | #include "cpplib.h"
|
---|
26 | #include "cpphash.h"
|
---|
27 |
|
---|
28 | #ifdef MULTIBYTE_CHARS
|
---|
29 | #include "mbchar.h"
|
---|
30 | #include <locale.h>
|
---|
31 | #endif
|
---|
32 |
|
---|
33 | /* Tokens with SPELL_STRING store their spelling in the token list,
|
---|
34 | and it's length in the token->val.name.len. */
|
---|
35 | enum spell_type
|
---|
36 | {
|
---|
37 | SPELL_OPERATOR = 0,
|
---|
38 | SPELL_CHAR,
|
---|
39 | SPELL_IDENT,
|
---|
40 | SPELL_NUMBER,
|
---|
41 | SPELL_STRING,
|
---|
42 | SPELL_NONE
|
---|
43 | };
|
---|
44 |
|
---|
45 | struct token_spelling
|
---|
46 | {
|
---|
47 | enum spell_type category;
|
---|
48 | const unsigned char *name;
|
---|
49 | };
|
---|
50 |
|
---|
51 | static const unsigned char *const digraph_spellings[] =
|
---|
52 | { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
|
---|
53 |
|
---|
54 | #define OP(e, s) { SPELL_OPERATOR, U s },
|
---|
55 | #define TK(e, s) { s, U STRINGX (e) },
|
---|
56 | static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
|
---|
57 | #undef OP
|
---|
58 | #undef TK
|
---|
59 |
|
---|
60 | #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
|
---|
61 | #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
|
---|
62 | #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
|
---|
63 |
|
---|
64 | static void handle_newline PARAMS ((cpp_reader *));
|
---|
65 | static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
|
---|
66 | static cppchar_t get_effective_char PARAMS ((cpp_reader *));
|
---|
67 |
|
---|
68 | static int skip_block_comment PARAMS ((cpp_reader *));
|
---|
69 | static int skip_line_comment PARAMS ((cpp_reader *));
|
---|
70 | static void adjust_column PARAMS ((cpp_reader *));
|
---|
71 | static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
|
---|
72 | static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
|
---|
73 | static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
|
---|
74 | unsigned int *));
|
---|
75 | static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
|
---|
76 | static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
|
---|
77 | static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
|
---|
78 | static bool trigraph_p PARAMS ((cpp_reader *));
|
---|
79 | static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
|
---|
80 | cppchar_t));
|
---|
81 | static bool continue_after_nul PARAMS ((cpp_reader *));
|
---|
82 | static int name_p PARAMS ((cpp_reader *, const cpp_string *));
|
---|
83 | static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
|
---|
84 | const unsigned char *, cppchar_t *));
|
---|
85 | static tokenrun *next_tokenrun PARAMS ((tokenrun *));
|
---|
86 |
|
---|
87 | static unsigned int hex_digit_value PARAMS ((unsigned int));
|
---|
88 | static _cpp_buff *new_buff PARAMS ((size_t));
|
---|
89 |
|
---|
90 | /* Utility routine:
|
---|
91 |
|
---|
92 | Compares, the token TOKEN to the NUL-terminated string STRING.
|
---|
93 | TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
|
---|
94 | int
|
---|
95 | cpp_ideq (token, string)
|
---|
96 | const cpp_token *token;
|
---|
97 | const char *string;
|
---|
98 | {
|
---|
99 | if (token->type != CPP_NAME)
|
---|
100 | return 0;
|
---|
101 |
|
---|
102 | return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
|
---|
103 | }
|
---|
104 |
|
---|
105 | /* Call when meeting a newline, assumed to be in buffer->cur[-1].
|
---|
106 | Returns with buffer->cur pointing to the character immediately
|
---|
107 | following the newline (combination). */
|
---|
108 | static void
|
---|
109 | handle_newline (pfile)
|
---|
110 | cpp_reader *pfile;
|
---|
111 | {
|
---|
112 | cpp_buffer *buffer = pfile->buffer;
|
---|
113 |
|
---|
114 | /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
|
---|
115 | only accept CR-LF; maybe we should fall back to that behavior? */
|
---|
116 | if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
|
---|
117 | buffer->cur++;
|
---|
118 |
|
---|
119 | buffer->line_base = buffer->cur;
|
---|
120 | buffer->col_adjust = 0;
|
---|
121 | pfile->line++;
|
---|
122 | }
|
---|
123 |
|
---|
124 | /* Subroutine of skip_escaped_newlines; called when a 3-character
|
---|
125 | sequence beginning with "??" is encountered. buffer->cur points to
|
---|
126 | the second '?'.
|
---|
127 |
|
---|
128 | Warn if necessary, and returns true if the sequence forms a
|
---|
129 | trigraph and the trigraph should be honored. */
|
---|
130 | static bool
|
---|
131 | trigraph_p (pfile)
|
---|
132 | cpp_reader *pfile;
|
---|
133 | {
|
---|
134 | cpp_buffer *buffer = pfile->buffer;
|
---|
135 | cppchar_t from_char = buffer->cur[1];
|
---|
136 | bool accept;
|
---|
137 |
|
---|
138 | if (!_cpp_trigraph_map[from_char])
|
---|
139 | return false;
|
---|
140 |
|
---|
141 | accept = CPP_OPTION (pfile, trigraphs);
|
---|
142 |
|
---|
143 | /* Don't warn about trigraphs in comments. */
|
---|
144 | if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
|
---|
145 | {
|
---|
146 | if (accept)
|
---|
147 | cpp_error_with_line (pfile, DL_WARNING,
|
---|
148 | pfile->line, CPP_BUF_COL (buffer) - 1,
|
---|
149 | "trigraph ??%c converted to %c",
|
---|
150 | (int) from_char,
|
---|
151 | (int) _cpp_trigraph_map[from_char]);
|
---|
152 | else if (buffer->cur != buffer->last_Wtrigraphs)
|
---|
153 | {
|
---|
154 | buffer->last_Wtrigraphs = buffer->cur;
|
---|
155 | cpp_error_with_line (pfile, DL_WARNING,
|
---|
156 | pfile->line, CPP_BUF_COL (buffer) - 1,
|
---|
157 | "trigraph ??%c ignored", (int) from_char);
|
---|
158 | }
|
---|
159 | }
|
---|
160 |
|
---|
161 | return accept;
|
---|
162 | }
|
---|
163 |
|
---|
164 | /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
|
---|
165 | lie in buffer->cur[-1]. Returns the next byte, which will be in
|
---|
166 | buffer->cur[-1]. This routine performs preprocessing stages 1 and
|
---|
167 | 2 of the ISO C standard. */
|
---|
168 | static cppchar_t
|
---|
169 | skip_escaped_newlines (pfile)
|
---|
170 | cpp_reader *pfile;
|
---|
171 | {
|
---|
172 | cpp_buffer *buffer = pfile->buffer;
|
---|
173 | cppchar_t next = buffer->cur[-1];
|
---|
174 |
|
---|
175 | /* Only do this if we apply stages 1 and 2. */
|
---|
176 | if (!buffer->from_stage3)
|
---|
177 | {
|
---|
178 | const unsigned char *saved_cur;
|
---|
179 | cppchar_t next1;
|
---|
180 |
|
---|
181 | do
|
---|
182 | {
|
---|
183 | if (next == '?')
|
---|
184 | {
|
---|
185 | if (buffer->cur[0] != '?' || !trigraph_p (pfile))
|
---|
186 | break;
|
---|
187 |
|
---|
188 | /* Translate the trigraph. */
|
---|
189 | next = _cpp_trigraph_map[buffer->cur[1]];
|
---|
190 | buffer->cur += 2;
|
---|
191 | if (next != '\\')
|
---|
192 | break;
|
---|
193 | }
|
---|
194 |
|
---|
195 | if (buffer->cur == buffer->rlimit)
|
---|
196 | break;
|
---|
197 |
|
---|
198 | /* We have a backslash, and room for at least one more
|
---|
199 | character. Skip horizontal whitespace. */
|
---|
200 | saved_cur = buffer->cur;
|
---|
201 | do
|
---|
202 | next1 = *buffer->cur++;
|
---|
203 | while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
|
---|
204 |
|
---|
205 | if (!is_vspace (next1))
|
---|
206 | {
|
---|
207 | buffer->cur = saved_cur;
|
---|
208 | break;
|
---|
209 | }
|
---|
210 |
|
---|
211 | if (saved_cur != buffer->cur - 1
|
---|
212 | && !pfile->state.lexing_comment)
|
---|
213 | cpp_error (pfile, DL_WARNING,
|
---|
214 | "backslash and newline separated by space");
|
---|
215 |
|
---|
216 | handle_newline (pfile);
|
---|
217 | buffer->backup_to = buffer->cur;
|
---|
218 | if (buffer->cur == buffer->rlimit)
|
---|
219 | {
|
---|
220 | cpp_error (pfile, DL_PEDWARN,
|
---|
221 | "backslash-newline at end of file");
|
---|
222 | next = EOF;
|
---|
223 | }
|
---|
224 | else
|
---|
225 | next = *buffer->cur++;
|
---|
226 | }
|
---|
227 | while (next == '\\' || next == '?');
|
---|
228 | }
|
---|
229 |
|
---|
230 | return next;
|
---|
231 | }
|
---|
232 |
|
---|
233 | /* Obtain the next character, after trigraph conversion and skipping
|
---|
234 | an arbitrarily long string of escaped newlines. The common case of
|
---|
235 | no trigraphs or escaped newlines falls through quickly. On return,
|
---|
236 | buffer->backup_to points to where to return to if the character is
|
---|
237 | not to be processed. */
|
---|
238 | static cppchar_t
|
---|
239 | get_effective_char (pfile)
|
---|
240 | cpp_reader *pfile;
|
---|
241 | {
|
---|
242 | cppchar_t next;
|
---|
243 | cpp_buffer *buffer = pfile->buffer;
|
---|
244 |
|
---|
245 | buffer->backup_to = buffer->cur;
|
---|
246 | next = *buffer->cur++;
|
---|
247 | if (__builtin_expect (next == '?' || next == '\\', 0))
|
---|
248 | next = skip_escaped_newlines (pfile);
|
---|
249 |
|
---|
250 | return next;
|
---|
251 | }
|
---|
252 |
|
---|
253 | /* Skip a C-style block comment. We find the end of the comment by
|
---|
254 | seeing if an asterisk is before every '/' we encounter. Returns
|
---|
255 | nonzero if comment terminated by EOF, zero otherwise. */
|
---|
256 | static int
|
---|
257 | skip_block_comment (pfile)
|
---|
258 | cpp_reader *pfile;
|
---|
259 | {
|
---|
260 | cpp_buffer *buffer = pfile->buffer;
|
---|
261 | cppchar_t c = EOF, prevc = EOF;
|
---|
262 |
|
---|
263 | pfile->state.lexing_comment = 1;
|
---|
264 | while (buffer->cur != buffer->rlimit)
|
---|
265 | {
|
---|
266 | prevc = c, c = *buffer->cur++;
|
---|
267 |
|
---|
268 | /* FIXME: For speed, create a new character class of characters
|
---|
269 | of interest inside block comments. */
|
---|
270 | if (c == '?' || c == '\\')
|
---|
271 | c = skip_escaped_newlines (pfile);
|
---|
272 |
|
---|
273 | /* People like decorating comments with '*', so check for '/'
|
---|
274 | instead for efficiency. */
|
---|
275 | if (c == '/')
|
---|
276 | {
|
---|
277 | if (prevc == '*')
|
---|
278 | break;
|
---|
279 |
|
---|
280 | /* Warn about potential nested comments, but not if the '/'
|
---|
281 | comes immediately before the true comment delimiter.
|
---|
282 | Don't bother to get it right across escaped newlines. */
|
---|
283 | if (CPP_OPTION (pfile, warn_comments)
|
---|
284 | && buffer->cur[0] == '*' && buffer->cur[1] != '/')
|
---|
285 | cpp_error_with_line (pfile, DL_WARNING,
|
---|
286 | pfile->line, CPP_BUF_COL (buffer),
|
---|
287 | "\"/*\" within comment");
|
---|
288 | }
|
---|
289 | else if (is_vspace (c))
|
---|
290 | handle_newline (pfile);
|
---|
291 | else if (c == '\t')
|
---|
292 | adjust_column (pfile);
|
---|
293 | }
|
---|
294 |
|
---|
295 | pfile->state.lexing_comment = 0;
|
---|
296 | return c != '/' || prevc != '*';
|
---|
297 | }
|
---|
298 |
|
---|
299 | /* Skip a C++ line comment, leaving buffer->cur pointing to the
|
---|
300 | terminating newline. Handles escaped newlines. Returns nonzero
|
---|
301 | if a multiline comment. */
|
---|
302 | static int
|
---|
303 | skip_line_comment (pfile)
|
---|
304 | cpp_reader *pfile;
|
---|
305 | {
|
---|
306 | cpp_buffer *buffer = pfile->buffer;
|
---|
307 | unsigned int orig_line = pfile->line;
|
---|
308 | cppchar_t c;
|
---|
309 | #ifdef MULTIBYTE_CHARS
|
---|
310 | wchar_t wc;
|
---|
311 | int char_len;
|
---|
312 | #endif
|
---|
313 |
|
---|
314 | pfile->state.lexing_comment = 1;
|
---|
315 | #ifdef MULTIBYTE_CHARS
|
---|
316 | /* Reset multibyte conversion state. */
|
---|
317 | (void) local_mbtowc (NULL, NULL, 0);
|
---|
318 | #endif
|
---|
319 | do
|
---|
320 | {
|
---|
321 | if (buffer->cur == buffer->rlimit)
|
---|
322 | goto at_eof;
|
---|
323 |
|
---|
324 | #ifdef MULTIBYTE_CHARS
|
---|
325 | char_len = local_mbtowc (&wc, (const char *) buffer->cur,
|
---|
326 | buffer->rlimit - buffer->cur);
|
---|
327 | if (char_len == -1)
|
---|
328 | {
|
---|
329 | cpp_error (pfile, DL_WARNING,
|
---|
330 | "ignoring invalid multibyte character");
|
---|
331 | char_len = 1;
|
---|
332 | c = *buffer->cur++;
|
---|
333 | }
|
---|
334 | else
|
---|
335 | {
|
---|
336 | buffer->cur += char_len;
|
---|
337 | c = wc;
|
---|
338 | }
|
---|
339 | #else
|
---|
340 | c = *buffer->cur++;
|
---|
341 | #endif
|
---|
342 | if (c == '?' || c == '\\')
|
---|
343 | c = skip_escaped_newlines (pfile);
|
---|
344 | }
|
---|
345 | while (!is_vspace (c));
|
---|
346 |
|
---|
347 | /* Step back over the newline, except at EOF. */
|
---|
348 | buffer->cur--;
|
---|
349 | at_eof:
|
---|
350 |
|
---|
351 | pfile->state.lexing_comment = 0;
|
---|
352 | return orig_line != pfile->line;
|
---|
353 | }
|
---|
354 |
|
---|
355 | /* pfile->buffer->cur is one beyond the \t character. Update
|
---|
356 | col_adjust so we track the column correctly. */
|
---|
357 | static void
|
---|
358 | adjust_column (pfile)
|
---|
359 | cpp_reader *pfile;
|
---|
360 | {
|
---|
361 | cpp_buffer *buffer = pfile->buffer;
|
---|
362 | unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
|
---|
363 |
|
---|
364 | /* Round it up to multiple of the tabstop, but subtract 1 since the
|
---|
365 | tab itself occupies a character position. */
|
---|
366 | buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
|
---|
367 | - col % CPP_OPTION (pfile, tabstop)) - 1;
|
---|
368 | }
|
---|
369 |
|
---|
370 | /* Skips whitespace, saving the next non-whitespace character.
|
---|
371 | Adjusts pfile->col_adjust to account for tabs. Without this,
|
---|
372 | tokens might be assigned an incorrect column. */
|
---|
373 | static int
|
---|
374 | skip_whitespace (pfile, c)
|
---|
375 | cpp_reader *pfile;
|
---|
376 | cppchar_t c;
|
---|
377 | {
|
---|
378 | cpp_buffer *buffer = pfile->buffer;
|
---|
379 | unsigned int warned = 0;
|
---|
380 |
|
---|
381 | do
|
---|
382 | {
|
---|
383 | /* Horizontal space always OK. */
|
---|
384 | if (c == ' ')
|
---|
385 | ;
|
---|
386 | else if (c == '\t')
|
---|
387 | adjust_column (pfile);
|
---|
388 | /* Just \f \v or \0 left. */
|
---|
389 | else if (c == '\0')
|
---|
390 | {
|
---|
391 | if (buffer->cur - 1 == buffer->rlimit)
|
---|
392 | return 0;
|
---|
393 | if (!warned)
|
---|
394 | {
|
---|
395 | cpp_error (pfile, DL_WARNING, "null character(s) ignored");
|
---|
396 | warned = 1;
|
---|
397 | }
|
---|
398 | }
|
---|
399 | else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
|
---|
400 | cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
|
---|
401 | CPP_BUF_COL (buffer),
|
---|
402 | "%s in preprocessing directive",
|
---|
403 | c == '\f' ? "form feed" : "vertical tab");
|
---|
404 |
|
---|
405 | c = *buffer->cur++;
|
---|
406 | }
|
---|
407 | /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
|
---|
408 | while (is_nvspace (c));
|
---|
409 |
|
---|
410 | buffer->cur--;
|
---|
411 | return 1;
|
---|
412 | }
|
---|
413 |
|
---|
414 | /* See if the characters of a number token are valid in a name (no
|
---|
415 | '.', '+' or '-'). */
|
---|
416 | static int
|
---|
417 | name_p (pfile, string)
|
---|
418 | cpp_reader *pfile;
|
---|
419 | const cpp_string *string;
|
---|
420 | {
|
---|
421 | unsigned int i;
|
---|
422 |
|
---|
423 | for (i = 0; i < string->len; i++)
|
---|
424 | if (!is_idchar (string->text[i]))
|
---|
425 | return 0;
|
---|
426 |
|
---|
427 | return 1;
|
---|
428 | }
|
---|
429 |
|
---|
430 | /* Parse an identifier, skipping embedded backslash-newlines. This is
|
---|
431 | a critical inner loop. The common case is an identifier which has
|
---|
432 | not been split by backslash-newline, does not contain a dollar
|
---|
433 | sign, and has already been scanned (roughly 10:1 ratio of
|
---|
434 | seen:unseen identifiers in normal code; the distribution is
|
---|
435 | Poisson-like). Second most common case is a new identifier, not
|
---|
436 | split and no dollar sign. The other possibilities are rare and
|
---|
437 | have been relegated to parse_slow. */
|
---|
438 | static cpp_hashnode *
|
---|
439 | parse_identifier (pfile)
|
---|
440 | cpp_reader *pfile;
|
---|
441 | {
|
---|
442 | cpp_hashnode *result;
|
---|
443 | const uchar *cur, *base;
|
---|
444 |
|
---|
445 | /* Fast-path loop. Skim over a normal identifier.
|
---|
446 | N.B. ISIDNUM does not include $. */
|
---|
447 | cur = pfile->buffer->cur;
|
---|
448 | while (ISIDNUM (*cur))
|
---|
449 | cur++;
|
---|
450 |
|
---|
451 | /* Check for slow-path cases. */
|
---|
452 | if (*cur == '?' || *cur == '\\' || *cur == '$')
|
---|
453 | {
|
---|
454 | unsigned int len;
|
---|
455 |
|
---|
456 | base = parse_slow (pfile, cur, 0, &len);
|
---|
457 | result = (cpp_hashnode *)
|
---|
458 | ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
|
---|
459 | }
|
---|
460 | else
|
---|
461 | {
|
---|
462 | base = pfile->buffer->cur - 1;
|
---|
463 | pfile->buffer->cur = cur;
|
---|
464 | result = (cpp_hashnode *)
|
---|
465 | ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
|
---|
466 | }
|
---|
467 |
|
---|
468 | /* Rarely, identifiers require diagnostics when lexed.
|
---|
469 | XXX Has to be forced out of the fast path. */
|
---|
470 | if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
|
---|
471 | && !pfile->state.skipping, 0))
|
---|
472 | {
|
---|
473 | /* It is allowed to poison the same identifier twice. */
|
---|
474 | if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
|
---|
475 | cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
|
---|
476 | NODE_NAME (result));
|
---|
477 |
|
---|
478 | /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
|
---|
479 | replacement list of a variadic macro. */
|
---|
480 | if (result == pfile->spec_nodes.n__VA_ARGS__
|
---|
481 | && !pfile->state.va_args_ok)
|
---|
482 | cpp_error (pfile, DL_PEDWARN,
|
---|
483 | "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
|
---|
484 | }
|
---|
485 |
|
---|
486 | return result;
|
---|
487 | }
|
---|
488 |
|
---|
489 | /* Slow path. This handles numbers and identifiers which have been
|
---|
490 | split, or contain dollar signs. The part of the token from
|
---|
491 | PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
|
---|
492 | 1 if it's a number, and 2 if it has a leading period. Returns a
|
---|
493 | pointer to the token's NUL-terminated spelling in permanent
|
---|
494 | storage, and sets PLEN to its length. */
|
---|
495 | static uchar *
|
---|
496 | parse_slow (pfile, cur, number_p, plen)
|
---|
497 | cpp_reader *pfile;
|
---|
498 | const uchar *cur;
|
---|
499 | int number_p;
|
---|
500 | unsigned int *plen;
|
---|
501 | {
|
---|
502 | cpp_buffer *buffer = pfile->buffer;
|
---|
503 | const uchar *base = buffer->cur - 1;
|
---|
504 | struct obstack *stack = &pfile->hash_table->stack;
|
---|
505 | unsigned int c, prevc, saw_dollar = 0;
|
---|
506 |
|
---|
507 | /* Place any leading period. */
|
---|
508 | if (number_p == 2)
|
---|
509 | obstack_1grow (stack, '.');
|
---|
510 |
|
---|
511 | /* Copy the part of the token which is known to be okay. */
|
---|
512 | obstack_grow (stack, base, cur - base);
|
---|
513 |
|
---|
514 | /* Now process the part which isn't. We are looking at one of
|
---|
515 | '$', '\\', or '?' on entry to this loop. */
|
---|
516 | prevc = cur[-1];
|
---|
517 | c = *cur++;
|
---|
518 | buffer->cur = cur;
|
---|
519 | for (;;)
|
---|
520 | {
|
---|
521 | /* Potential escaped newline? */
|
---|
522 | buffer->backup_to = buffer->cur - 1;
|
---|
523 | if (c == '?' || c == '\\')
|
---|
524 | c = skip_escaped_newlines (pfile);
|
---|
525 |
|
---|
526 | if (!is_idchar (c))
|
---|
527 | {
|
---|
528 | if (!number_p)
|
---|
529 | break;
|
---|
530 | if (c != '.' && !VALID_SIGN (c, prevc))
|
---|
531 | break;
|
---|
532 | }
|
---|
533 |
|
---|
534 | /* Handle normal identifier characters in this loop. */
|
---|
535 | do
|
---|
536 | {
|
---|
537 | prevc = c;
|
---|
538 | obstack_1grow (stack, c);
|
---|
539 |
|
---|
540 | if (c == '$')
|
---|
541 | saw_dollar++;
|
---|
542 |
|
---|
543 | c = *buffer->cur++;
|
---|
544 | }
|
---|
545 | while (is_idchar (c));
|
---|
546 | }
|
---|
547 |
|
---|
548 | /* Step back over the unwanted char. */
|
---|
549 | BACKUP ();
|
---|
550 |
|
---|
551 | /* $ is not an identifier character in the standard, but is commonly
|
---|
552 | accepted as an extension. Don't warn about it in skipped
|
---|
553 | conditional blocks. */
|
---|
554 | if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
|
---|
555 | cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
|
---|
556 |
|
---|
557 | /* Identifiers and numbers are null-terminated. */
|
---|
558 | *plen = obstack_object_size (stack);
|
---|
559 | obstack_1grow (stack, '\0');
|
---|
560 | return obstack_finish (stack);
|
---|
561 | }
|
---|
562 |
|
---|
563 | /* Parse a number, beginning with character C, skipping embedded
|
---|
564 | backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
|
---|
565 | before C. Place the result in NUMBER. */
|
---|
566 | static void
|
---|
567 | parse_number (pfile, number, leading_period)
|
---|
568 | cpp_reader *pfile;
|
---|
569 | cpp_string *number;
|
---|
570 | int leading_period;
|
---|
571 | {
|
---|
572 | const uchar *cur;
|
---|
573 |
|
---|
574 | /* Fast-path loop. Skim over a normal number.
|
---|
575 | N.B. ISIDNUM does not include $. */
|
---|
576 | cur = pfile->buffer->cur;
|
---|
577 | while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
|
---|
578 | cur++;
|
---|
579 |
|
---|
580 | /* Check for slow-path cases. */
|
---|
581 | if (*cur == '?' || *cur == '\\' || *cur == '$')
|
---|
582 | number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
|
---|
583 | else
|
---|
584 | {
|
---|
585 | const uchar *base = pfile->buffer->cur - 1;
|
---|
586 | uchar *dest;
|
---|
587 |
|
---|
588 | number->len = cur - base + leading_period;
|
---|
589 | dest = _cpp_unaligned_alloc (pfile, number->len + 1);
|
---|
590 | dest[number->len] = '\0';
|
---|
591 | number->text = dest;
|
---|
592 |
|
---|
593 | if (leading_period)
|
---|
594 | *dest++ = '.';
|
---|
595 | memcpy (dest, base, cur - base);
|
---|
596 | pfile->buffer->cur = cur;
|
---|
597 | }
|
---|
598 | }
|
---|
599 |
|
---|
600 | /* Subroutine of parse_string. */
|
---|
601 | static int
|
---|
602 | unescaped_terminator_p (pfile, dest)
|
---|
603 | cpp_reader *pfile;
|
---|
604 | const unsigned char *dest;
|
---|
605 | {
|
---|
606 | const unsigned char *start, *temp;
|
---|
607 |
|
---|
608 | /* In #include-style directives, terminators are not escapeable. */
|
---|
609 | if (pfile->state.angled_headers)
|
---|
610 | return 1;
|
---|
611 |
|
---|
612 | start = BUFF_FRONT (pfile->u_buff);
|
---|
613 |
|
---|
614 | /* An odd number of consecutive backslashes represents an escaped
|
---|
615 | terminator. */
|
---|
616 | for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
|
---|
617 | ;
|
---|
618 |
|
---|
619 | return ((dest - temp) & 1) == 0;
|
---|
620 | }
|
---|
621 |
|
---|
622 | /* Parses a string, character constant, or angle-bracketed header file
|
---|
623 | name. Handles embedded trigraphs and escaped newlines. The stored
|
---|
624 | string is guaranteed NUL-terminated, but it is not guaranteed that
|
---|
625 | this is the first NUL since embedded NULs are preserved.
|
---|
626 |
|
---|
627 | When this function returns, buffer->cur points to the next
|
---|
628 | character to be processed. */
|
---|
629 | static void
|
---|
630 | parse_string (pfile, token, terminator)
|
---|
631 | cpp_reader *pfile;
|
---|
632 | cpp_token *token;
|
---|
633 | cppchar_t terminator;
|
---|
634 | {
|
---|
635 | cpp_buffer *buffer = pfile->buffer;
|
---|
636 | unsigned char *dest, *limit;
|
---|
637 | cppchar_t c;
|
---|
638 | bool warned_nulls = false;
|
---|
639 | #ifdef MULTIBYTE_CHARS
|
---|
640 | wchar_t wc;
|
---|
641 | int char_len;
|
---|
642 | #endif
|
---|
643 |
|
---|
644 | dest = BUFF_FRONT (pfile->u_buff);
|
---|
645 | limit = BUFF_LIMIT (pfile->u_buff);
|
---|
646 |
|
---|
647 | #ifdef MULTIBYTE_CHARS
|
---|
648 | /* Reset multibyte conversion state. */
|
---|
649 | (void) local_mbtowc (NULL, NULL, 0);
|
---|
650 | #endif
|
---|
651 | for (;;)
|
---|
652 | {
|
---|
653 | /* We need room for another char, possibly the terminating NUL. */
|
---|
654 | if ((size_t) (limit - dest) < 1)
|
---|
655 | {
|
---|
656 | size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
|
---|
657 | _cpp_extend_buff (pfile, &pfile->u_buff, 2);
|
---|
658 | dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
|
---|
659 | limit = BUFF_LIMIT (pfile->u_buff);
|
---|
660 | }
|
---|
661 |
|
---|
662 | #ifdef MULTIBYTE_CHARS
|
---|
663 | char_len = local_mbtowc (&wc, (const char *) buffer->cur,
|
---|
664 | buffer->rlimit - buffer->cur);
|
---|
665 | if (char_len == -1)
|
---|
666 | {
|
---|
667 | cpp_error (pfile, DL_WARNING,
|
---|
668 | "ignoring invalid multibyte character");
|
---|
669 | char_len = 1;
|
---|
670 | c = *buffer->cur++;
|
---|
671 | }
|
---|
672 | else
|
---|
673 | {
|
---|
674 | buffer->cur += char_len;
|
---|
675 | c = wc;
|
---|
676 | }
|
---|
677 | #else
|
---|
678 | c = *buffer->cur++;
|
---|
679 | #endif
|
---|
680 |
|
---|
681 | /* Handle trigraphs, escaped newlines etc. */
|
---|
682 | if (c == '?' || c == '\\')
|
---|
683 | c = skip_escaped_newlines (pfile);
|
---|
684 |
|
---|
685 | if (c == terminator)
|
---|
686 | {
|
---|
687 | if (unescaped_terminator_p (pfile, dest))
|
---|
688 | break;
|
---|
689 | }
|
---|
690 | else if (is_vspace (c))
|
---|
691 | {
|
---|
692 | /* No string literal may extend over multiple lines. In
|
---|
693 | assembly language, suppress the error except for <>
|
---|
694 | includes. This is a kludge around not knowing where
|
---|
695 | comments are. */
|
---|
696 | unterminated:
|
---|
697 | if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
|
---|
698 | cpp_error (pfile, DL_ERROR, "missing terminating %c character",
|
---|
699 | (int) terminator);
|
---|
700 | buffer->cur--;
|
---|
701 | break;
|
---|
702 | }
|
---|
703 | else if (c == '\0')
|
---|
704 | {
|
---|
705 | if (buffer->cur - 1 == buffer->rlimit)
|
---|
706 | goto unterminated;
|
---|
707 | if (!warned_nulls)
|
---|
708 | {
|
---|
709 | warned_nulls = true;
|
---|
710 | cpp_error (pfile, DL_WARNING,
|
---|
711 | "null character(s) preserved in literal");
|
---|
712 | }
|
---|
713 | }
|
---|
714 | #ifdef MULTIBYTE_CHARS
|
---|
715 | if (char_len > 1)
|
---|
716 | {
|
---|
717 | for ( ; char_len > 0; --char_len)
|
---|
718 | *dest++ = (*buffer->cur - char_len);
|
---|
719 | }
|
---|
720 | else
|
---|
721 | #endif
|
---|
722 | *dest++ = c;
|
---|
723 | }
|
---|
724 |
|
---|
725 | *dest = '\0';
|
---|
726 |
|
---|
727 | token->val.str.text = BUFF_FRONT (pfile->u_buff);
|
---|
728 | token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
|
---|
729 | BUFF_FRONT (pfile->u_buff) = dest + 1;
|
---|
730 | }
|
---|
731 |
|
---|
732 | /* The stored comment includes the comment start and any terminator. */
|
---|
733 | static void
|
---|
734 | save_comment (pfile, token, from, type)
|
---|
735 | cpp_reader *pfile;
|
---|
736 | cpp_token *token;
|
---|
737 | const unsigned char *from;
|
---|
738 | cppchar_t type;
|
---|
739 | {
|
---|
740 | unsigned char *buffer;
|
---|
741 | unsigned int len, clen;
|
---|
742 |
|
---|
743 | len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
|
---|
744 |
|
---|
745 | /* C++ comments probably (not definitely) have moved past a new
|
---|
746 | line, which we don't want to save in the comment. */
|
---|
747 | if (is_vspace (pfile->buffer->cur[-1]))
|
---|
748 | len--;
|
---|
749 |
|
---|
750 | /* If we are currently in a directive, then we need to store all
|
---|
751 | C++ comments as C comments internally, and so we need to
|
---|
752 | allocate a little extra space in that case.
|
---|
753 |
|
---|
754 | Note that the only time we encounter a directive here is
|
---|
755 | when we are saving comments in a "#define". */
|
---|
756 | clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
|
---|
757 |
|
---|
758 | buffer = _cpp_unaligned_alloc (pfile, clen);
|
---|
759 |
|
---|
760 | token->type = CPP_COMMENT;
|
---|
761 | token->val.str.len = clen;
|
---|
762 | token->val.str.text = buffer;
|
---|
763 |
|
---|
764 | buffer[0] = '/';
|
---|
765 | memcpy (buffer + 1, from, len - 1);
|
---|
766 |
|
---|
767 | /* Finish conversion to a C comment, if necessary. */
|
---|
768 | if (pfile->state.in_directive && type == '/')
|
---|
769 | {
|
---|
770 | buffer[1] = '*';
|
---|
771 | buffer[clen - 2] = '*';
|
---|
772 | buffer[clen - 1] = '/';
|
---|
773 | }
|
---|
774 | }
|
---|
775 |
|
---|
776 | /* Allocate COUNT tokens for RUN. */
|
---|
777 | void
|
---|
778 | _cpp_init_tokenrun (run, count)
|
---|
779 | tokenrun *run;
|
---|
780 | unsigned int count;
|
---|
781 | {
|
---|
782 | run->base = xnewvec (cpp_token, count);
|
---|
783 | run->limit = run->base + count;
|
---|
784 | run->next = NULL;
|
---|
785 | }
|
---|
786 |
|
---|
787 | /* Returns the next tokenrun, or creates one if there is none. */
|
---|
788 | static tokenrun *
|
---|
789 | next_tokenrun (run)
|
---|
790 | tokenrun *run;
|
---|
791 | {
|
---|
792 | if (run->next == NULL)
|
---|
793 | {
|
---|
794 | run->next = xnew (tokenrun);
|
---|
795 | run->next->prev = run;
|
---|
796 | _cpp_init_tokenrun (run->next, 250);
|
---|
797 | }
|
---|
798 |
|
---|
799 | return run->next;
|
---|
800 | }
|
---|
801 |
|
---|
802 | /* Allocate a single token that is invalidated at the same time as the
|
---|
803 | rest of the tokens on the line. Has its line and col set to the
|
---|
804 | same as the last lexed token, so that diagnostics appear in the
|
---|
805 | right place. */
|
---|
806 | cpp_token *
|
---|
807 | _cpp_temp_token (pfile)
|
---|
808 | cpp_reader *pfile;
|
---|
809 | {
|
---|
810 | cpp_token *old, *result;
|
---|
811 |
|
---|
812 | old = pfile->cur_token - 1;
|
---|
813 | if (pfile->cur_token == pfile->cur_run->limit)
|
---|
814 | {
|
---|
815 | pfile->cur_run = next_tokenrun (pfile->cur_run);
|
---|
816 | pfile->cur_token = pfile->cur_run->base;
|
---|
817 | }
|
---|
818 |
|
---|
819 | result = pfile->cur_token++;
|
---|
820 | result->line = old->line;
|
---|
821 | result->col = old->col;
|
---|
822 | return result;
|
---|
823 | }
|
---|
824 |
|
---|
825 | /* Lex a token into RESULT (external interface). Takes care of issues
|
---|
826 | like directive handling, token lookahead, multiple include
|
---|
827 | optimization and skipping. */
|
---|
828 | const cpp_token *
|
---|
829 | _cpp_lex_token (pfile)
|
---|
830 | cpp_reader *pfile;
|
---|
831 | {
|
---|
832 | cpp_token *result;
|
---|
833 |
|
---|
834 | for (;;)
|
---|
835 | {
|
---|
836 | if (pfile->cur_token == pfile->cur_run->limit)
|
---|
837 | {
|
---|
838 | pfile->cur_run = next_tokenrun (pfile->cur_run);
|
---|
839 | pfile->cur_token = pfile->cur_run->base;
|
---|
840 | }
|
---|
841 |
|
---|
842 | if (pfile->lookaheads)
|
---|
843 | {
|
---|
844 | pfile->lookaheads--;
|
---|
845 | result = pfile->cur_token++;
|
---|
846 | }
|
---|
847 | else
|
---|
848 | result = _cpp_lex_direct (pfile);
|
---|
849 |
|
---|
850 | if (result->flags & BOL)
|
---|
851 | {
|
---|
852 | /* Is this a directive. If _cpp_handle_directive returns
|
---|
853 | false, it is an assembler #. */
|
---|
854 | if (result->type == CPP_HASH
|
---|
855 | /* 6.10.3 p 11: Directives in a list of macro arguments
|
---|
856 | gives undefined behavior. This implementation
|
---|
857 | handles the directive as normal. */
|
---|
858 | && pfile->state.parsing_args != 1
|
---|
859 | && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
|
---|
860 | continue;
|
---|
861 | if (pfile->cb.line_change && !pfile->state.skipping)
|
---|
862 | (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
|
---|
863 | }
|
---|
864 |
|
---|
865 | /* We don't skip tokens in directives. */
|
---|
866 | if (pfile->state.in_directive)
|
---|
867 | break;
|
---|
868 |
|
---|
869 | /* Outside a directive, invalidate controlling macros. At file
|
---|
870 | EOF, _cpp_lex_direct takes care of popping the buffer, so we never
|
---|
871 | get here and MI optimisation works. */
|
---|
872 | pfile->mi_valid = false;
|
---|
873 |
|
---|
874 | if (!pfile->state.skipping || result->type == CPP_EOF)
|
---|
875 | break;
|
---|
876 | }
|
---|
877 |
|
---|
878 | return result;
|
---|
879 | }
|
---|
880 |
|
---|
881 | /* A NUL terminates the current buffer. For ISO preprocessing this is
|
---|
882 | EOF, but for traditional preprocessing it indicates we need a line
|
---|
883 | refill. Returns TRUE to continue preprocessing a new buffer, FALSE
|
---|
884 | to return a CPP_EOF to the caller. */
|
---|
885 | static bool
|
---|
886 | continue_after_nul (pfile)
|
---|
887 | cpp_reader *pfile;
|
---|
888 | {
|
---|
889 | cpp_buffer *buffer = pfile->buffer;
|
---|
890 | bool more = false;
|
---|
891 |
|
---|
892 | buffer->saved_flags = BOL;
|
---|
893 | if (CPP_OPTION (pfile, traditional))
|
---|
894 | {
|
---|
895 | if (pfile->state.in_directive)
|
---|
896 | return false;
|
---|
897 |
|
---|
898 | _cpp_remove_overlay (pfile);
|
---|
899 | more = _cpp_read_logical_line_trad (pfile);
|
---|
900 | _cpp_overlay_buffer (pfile, pfile->out.base,
|
---|
901 | pfile->out.cur - pfile->out.base);
|
---|
902 | pfile->line = pfile->out.first_line;
|
---|
903 | }
|
---|
904 | else
|
---|
905 | {
|
---|
906 | /* Stop parsing arguments with a CPP_EOF. When we finally come
|
---|
907 | back here, do the work of popping the buffer. */
|
---|
908 | if (!pfile->state.parsing_args)
|
---|
909 | {
|
---|
910 | if (buffer->cur != buffer->line_base)
|
---|
911 | {
|
---|
912 | /* Non-empty files should end in a newline. Don't warn
|
---|
913 | for command line and _Pragma buffers. */
|
---|
914 | if (!buffer->from_stage3)
|
---|
915 | cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
|
---|
916 | handle_newline (pfile);
|
---|
917 | }
|
---|
918 |
|
---|
919 | /* Similarly, finish an in-progress directive with CPP_EOF
|
---|
920 | before popping the buffer. */
|
---|
921 | if (!pfile->state.in_directive && buffer->prev)
|
---|
922 | {
|
---|
923 | more = !buffer->return_at_eof;
|
---|
924 | _cpp_pop_buffer (pfile);
|
---|
925 | }
|
---|
926 | }
|
---|
927 | }
|
---|
928 |
|
---|
929 | return more;
|
---|
930 | }
|
---|
931 |
|
---|
932 | #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
|
---|
933 | do { \
|
---|
934 | if (get_effective_char (pfile) == CHAR) \
|
---|
935 | result->type = THEN_TYPE; \
|
---|
936 | else \
|
---|
937 | { \
|
---|
938 | BACKUP (); \
|
---|
939 | result->type = ELSE_TYPE; \
|
---|
940 | } \
|
---|
941 | } while (0)
|
---|
942 |
|
---|
943 | /* Lex a token into pfile->cur_token, which is also incremented, to
|
---|
944 | get diagnostics pointing to the correct location.
|
---|
945 |
|
---|
946 | Does not handle issues such as token lookahead, multiple-include
|
---|
947 | optimisation, directives, skipping etc. This function is only
|
---|
948 | suitable for use by _cpp_lex_token, and in special cases like
|
---|
949 | lex_expansion_token which doesn't care for any of these issues.
|
---|
950 |
|
---|
951 | When meeting a newline, returns CPP_EOF if parsing a directive,
|
---|
952 | otherwise returns to the start of the token buffer if permissible.
|
---|
953 | Returns the location of the lexed token. */
|
---|
954 | cpp_token *
|
---|
955 | _cpp_lex_direct (pfile)
|
---|
956 | cpp_reader *pfile;
|
---|
957 | {
|
---|
958 | cppchar_t c;
|
---|
959 | cpp_buffer *buffer;
|
---|
960 | const unsigned char *comment_start;
|
---|
961 | cpp_token *result = pfile->cur_token++;
|
---|
962 |
|
---|
963 | fresh_line:
|
---|
964 | buffer = pfile->buffer;
|
---|
965 | result->flags = buffer->saved_flags;
|
---|
966 | buffer->saved_flags = 0;
|
---|
967 | update_tokens_line:
|
---|
968 | result->line = pfile->line;
|
---|
969 |
|
---|
970 | skipped_white:
|
---|
971 | c = *buffer->cur++;
|
---|
972 | result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
|
---|
973 |
|
---|
974 | trigraph:
|
---|
975 | switch (c)
|
---|
976 | {
|
---|
977 | case ' ': case '\t': case '\f': case '\v': case '\0':
|
---|
978 | result->flags |= PREV_WHITE;
|
---|
979 | if (skip_whitespace (pfile, c))
|
---|
980 | goto skipped_white;
|
---|
981 |
|
---|
982 | /* End of buffer. */
|
---|
983 | buffer->cur--;
|
---|
984 | if (continue_after_nul (pfile))
|
---|
985 | goto fresh_line;
|
---|
986 | result->type = CPP_EOF;
|
---|
987 | break;
|
---|
988 |
|
---|
989 | case '\n': case '\r':
|
---|
990 | handle_newline (pfile);
|
---|
991 | buffer->saved_flags = BOL;
|
---|
992 | if (! pfile->state.in_directive)
|
---|
993 | {
|
---|
994 | if (pfile->state.parsing_args == 2)
|
---|
995 | buffer->saved_flags |= PREV_WHITE;
|
---|
996 | if (!pfile->keep_tokens)
|
---|
997 | {
|
---|
998 | pfile->cur_run = &pfile->base_run;
|
---|
999 | result = pfile->base_run.base;
|
---|
1000 | pfile->cur_token = result + 1;
|
---|
1001 | }
|
---|
1002 | goto fresh_line;
|
---|
1003 | }
|
---|
1004 | result->type = CPP_EOF;
|
---|
1005 | break;
|
---|
1006 |
|
---|
1007 | case '?':
|
---|
1008 | case '\\':
|
---|
1009 | /* These could start an escaped newline, or '?' a trigraph. Let
|
---|
1010 | skip_escaped_newlines do all the work. */
|
---|
1011 | {
|
---|
1012 | unsigned int line = pfile->line;
|
---|
1013 |
|
---|
1014 | c = skip_escaped_newlines (pfile);
|
---|
1015 | if (line != pfile->line)
|
---|
1016 | {
|
---|
1017 | buffer->cur--;
|
---|
1018 | /* We had at least one escaped newline of some sort.
|
---|
1019 | Update the token's line and column. */
|
---|
1020 | goto update_tokens_line;
|
---|
1021 | }
|
---|
1022 | }
|
---|
1023 |
|
---|
1024 | /* We are either the original '?' or '\\', or a trigraph. */
|
---|
1025 | if (c == '?')
|
---|
1026 | result->type = CPP_QUERY;
|
---|
1027 | else if (c == '\\')
|
---|
1028 | goto random_char;
|
---|
1029 | else
|
---|
1030 | goto trigraph;
|
---|
1031 | break;
|
---|
1032 |
|
---|
1033 | case '0': case '1': case '2': case '3': case '4':
|
---|
1034 | case '5': case '6': case '7': case '8': case '9':
|
---|
1035 | result->type = CPP_NUMBER;
|
---|
1036 | parse_number (pfile, &result->val.str, 0);
|
---|
1037 | break;
|
---|
1038 |
|
---|
1039 | case 'L':
|
---|
1040 | /* 'L' may introduce wide characters or strings. */
|
---|
1041 | {
|
---|
1042 | const unsigned char *pos = buffer->cur;
|
---|
1043 |
|
---|
1044 | c = get_effective_char (pfile);
|
---|
1045 | if (c == '\'' || c == '"')
|
---|
1046 | {
|
---|
1047 | result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
|
---|
1048 | parse_string (pfile, result, c);
|
---|
1049 | break;
|
---|
1050 | }
|
---|
1051 | buffer->cur = pos;
|
---|
1052 | }
|
---|
1053 | /* Fall through. */
|
---|
1054 |
|
---|
1055 | start_ident:
|
---|
1056 | case '_':
|
---|
1057 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
---|
1058 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
---|
1059 | case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
---|
1060 | case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
---|
1061 | case 'y': case 'z':
|
---|
1062 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
---|
1063 | case 'G': case 'H': case 'I': case 'J': case 'K':
|
---|
1064 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
---|
1065 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
---|
1066 | case 'Y': case 'Z':
|
---|
1067 | result->type = CPP_NAME;
|
---|
1068 | result->val.node = parse_identifier (pfile);
|
---|
1069 |
|
---|
1070 | /* Convert named operators to their proper types. */
|
---|
1071 | if (result->val.node->flags & NODE_OPERATOR)
|
---|
1072 | {
|
---|
1073 | result->flags |= NAMED_OP;
|
---|
1074 | result->type = result->val.node->value.operator;
|
---|
1075 | }
|
---|
1076 | break;
|
---|
1077 |
|
---|
1078 | case '\'':
|
---|
1079 | case '"':
|
---|
1080 | result->type = c == '"' ? CPP_STRING: CPP_CHAR;
|
---|
1081 | parse_string (pfile, result, c);
|
---|
1082 | break;
|
---|
1083 |
|
---|
1084 | case '/':
|
---|
1085 | /* A potential block or line comment. */
|
---|
1086 | comment_start = buffer->cur;
|
---|
1087 | c = get_effective_char (pfile);
|
---|
1088 |
|
---|
1089 | if (c == '*')
|
---|
1090 | {
|
---|
1091 | if (skip_block_comment (pfile))
|
---|
1092 | cpp_error (pfile, DL_ERROR, "unterminated comment");
|
---|
1093 | }
|
---|
1094 | else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
|
---|
1095 | || CPP_IN_SYSTEM_HEADER (pfile)))
|
---|
1096 | {
|
---|
1097 | /* Warn about comments only if pedantically GNUC89, and not
|
---|
1098 | in system headers. */
|
---|
1099 | if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
|
---|
1100 | && ! buffer->warned_cplusplus_comments)
|
---|
1101 | {
|
---|
1102 | cpp_error (pfile, DL_PEDWARN,
|
---|
1103 | "C++ style comments are not allowed in ISO C90");
|
---|
1104 | cpp_error (pfile, DL_PEDWARN,
|
---|
1105 | "(this will be reported only once per input file)");
|
---|
1106 | buffer->warned_cplusplus_comments = 1;
|
---|
1107 | }
|
---|
1108 |
|
---|
1109 | if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
|
---|
1110 | cpp_error (pfile, DL_WARNING, "multi-line comment");
|
---|
1111 | }
|
---|
1112 | else if (c == '=')
|
---|
1113 | {
|
---|
1114 | result->type = CPP_DIV_EQ;
|
---|
1115 | break;
|
---|
1116 | }
|
---|
1117 | else
|
---|
1118 | {
|
---|
1119 | BACKUP ();
|
---|
1120 | result->type = CPP_DIV;
|
---|
1121 | break;
|
---|
1122 | }
|
---|
1123 |
|
---|
1124 | if (!pfile->state.save_comments)
|
---|
1125 | {
|
---|
1126 | result->flags |= PREV_WHITE;
|
---|
1127 | goto update_tokens_line;
|
---|
1128 | }
|
---|
1129 |
|
---|
1130 | /* Save the comment as a token in its own right. */
|
---|
1131 | save_comment (pfile, result, comment_start, c);
|
---|
1132 | break;
|
---|
1133 |
|
---|
1134 | case '<':
|
---|
1135 | if (pfile->state.angled_headers)
|
---|
1136 | {
|
---|
1137 | result->type = CPP_HEADER_NAME;
|
---|
1138 | parse_string (pfile, result, '>');
|
---|
1139 | break;
|
---|
1140 | }
|
---|
1141 |
|
---|
1142 | c = get_effective_char (pfile);
|
---|
1143 | if (c == '=')
|
---|
1144 | result->type = CPP_LESS_EQ;
|
---|
1145 | else if (c == '<')
|
---|
1146 | IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
|
---|
1147 | else if (c == '?' && CPP_OPTION (pfile, cplusplus))
|
---|
1148 | IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
|
---|
1149 | else if (c == ':' && CPP_OPTION (pfile, digraphs))
|
---|
1150 | {
|
---|
1151 | result->type = CPP_OPEN_SQUARE;
|
---|
1152 | result->flags |= DIGRAPH;
|
---|
1153 | }
|
---|
1154 | else if (c == '%' && CPP_OPTION (pfile, digraphs))
|
---|
1155 | {
|
---|
1156 | result->type = CPP_OPEN_BRACE;
|
---|
1157 | result->flags |= DIGRAPH;
|
---|
1158 | }
|
---|
1159 | else
|
---|
1160 | {
|
---|
1161 | BACKUP ();
|
---|
1162 | result->type = CPP_LESS;
|
---|
1163 | }
|
---|
1164 | break;
|
---|
1165 |
|
---|
1166 | case '>':
|
---|
1167 | c = get_effective_char (pfile);
|
---|
1168 | if (c == '=')
|
---|
1169 | result->type = CPP_GREATER_EQ;
|
---|
1170 | else if (c == '>')
|
---|
1171 | IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
|
---|
1172 | else if (c == '?' && CPP_OPTION (pfile, cplusplus))
|
---|
1173 | IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
|
---|
1174 | else
|
---|
1175 | {
|
---|
1176 | BACKUP ();
|
---|
1177 | result->type = CPP_GREATER;
|
---|
1178 | }
|
---|
1179 | break;
|
---|
1180 |
|
---|
1181 | case '%':
|
---|
1182 | c = get_effective_char (pfile);
|
---|
1183 | if (c == '=')
|
---|
1184 | result->type = CPP_MOD_EQ;
|
---|
1185 | else if (CPP_OPTION (pfile, digraphs) && c == ':')
|
---|
1186 | {
|
---|
1187 | result->flags |= DIGRAPH;
|
---|
1188 | result->type = CPP_HASH;
|
---|
1189 | if (get_effective_char (pfile) == '%')
|
---|
1190 | {
|
---|
1191 | const unsigned char *pos = buffer->cur;
|
---|
1192 |
|
---|
1193 | if (get_effective_char (pfile) == ':')
|
---|
1194 | result->type = CPP_PASTE;
|
---|
1195 | else
|
---|
1196 | buffer->cur = pos - 1;
|
---|
1197 | }
|
---|
1198 | else
|
---|
1199 | BACKUP ();
|
---|
1200 | }
|
---|
1201 | else if (CPP_OPTION (pfile, digraphs) && c == '>')
|
---|
1202 | {
|
---|
1203 | result->flags |= DIGRAPH;
|
---|
1204 | result->type = CPP_CLOSE_BRACE;
|
---|
1205 | }
|
---|
1206 | else
|
---|
1207 | {
|
---|
1208 | BACKUP ();
|
---|
1209 | result->type = CPP_MOD;
|
---|
1210 | }
|
---|
1211 | break;
|
---|
1212 |
|
---|
1213 | case '.':
|
---|
1214 | result->type = CPP_DOT;
|
---|
1215 | c = get_effective_char (pfile);
|
---|
1216 | if (c == '.')
|
---|
1217 | {
|
---|
1218 | const unsigned char *pos = buffer->cur;
|
---|
1219 |
|
---|
1220 | if (get_effective_char (pfile) == '.')
|
---|
1221 | result->type = CPP_ELLIPSIS;
|
---|
1222 | else
|
---|
1223 | buffer->cur = pos - 1;
|
---|
1224 | }
|
---|
1225 | /* All known character sets have 0...9 contiguous. */
|
---|
1226 | else if (ISDIGIT (c))
|
---|
1227 | {
|
---|
1228 | result->type = CPP_NUMBER;
|
---|
1229 | parse_number (pfile, &result->val.str, 1);
|
---|
1230 | }
|
---|
1231 | else if (c == '*' && CPP_OPTION (pfile, cplusplus))
|
---|
1232 | result->type = CPP_DOT_STAR;
|
---|
1233 | else
|
---|
1234 | BACKUP ();
|
---|
1235 | break;
|
---|
1236 |
|
---|
1237 | case '+':
|
---|
1238 | c = get_effective_char (pfile);
|
---|
1239 | if (c == '+')
|
---|
1240 | result->type = CPP_PLUS_PLUS;
|
---|
1241 | else if (c == '=')
|
---|
1242 | result->type = CPP_PLUS_EQ;
|
---|
1243 | else
|
---|
1244 | {
|
---|
1245 | BACKUP ();
|
---|
1246 | result->type = CPP_PLUS;
|
---|
1247 | }
|
---|
1248 | break;
|
---|
1249 |
|
---|
1250 | case '-':
|
---|
1251 | c = get_effective_char (pfile);
|
---|
1252 | if (c == '>')
|
---|
1253 | {
|
---|
1254 | result->type = CPP_DEREF;
|
---|
1255 | if (CPP_OPTION (pfile, cplusplus))
|
---|
1256 | {
|
---|
1257 | if (get_effective_char (pfile) == '*')
|
---|
1258 | result->type = CPP_DEREF_STAR;
|
---|
1259 | else
|
---|
1260 | BACKUP ();
|
---|
1261 | }
|
---|
1262 | }
|
---|
1263 | else if (c == '-')
|
---|
1264 | result->type = CPP_MINUS_MINUS;
|
---|
1265 | else if (c == '=')
|
---|
1266 | result->type = CPP_MINUS_EQ;
|
---|
1267 | else
|
---|
1268 | {
|
---|
1269 | BACKUP ();
|
---|
1270 | result->type = CPP_MINUS;
|
---|
1271 | }
|
---|
1272 | break;
|
---|
1273 |
|
---|
1274 | case '&':
|
---|
1275 | c = get_effective_char (pfile);
|
---|
1276 | if (c == '&')
|
---|
1277 | result->type = CPP_AND_AND;
|
---|
1278 | else if (c == '=')
|
---|
1279 | result->type = CPP_AND_EQ;
|
---|
1280 | else
|
---|
1281 | {
|
---|
1282 | BACKUP ();
|
---|
1283 | result->type = CPP_AND;
|
---|
1284 | }
|
---|
1285 | break;
|
---|
1286 |
|
---|
1287 | case '|':
|
---|
1288 | c = get_effective_char (pfile);
|
---|
1289 | if (c == '|')
|
---|
1290 | result->type = CPP_OR_OR;
|
---|
1291 | else if (c == '=')
|
---|
1292 | result->type = CPP_OR_EQ;
|
---|
1293 | else
|
---|
1294 | {
|
---|
1295 | BACKUP ();
|
---|
1296 | result->type = CPP_OR;
|
---|
1297 | }
|
---|
1298 | break;
|
---|
1299 |
|
---|
1300 | case ':':
|
---|
1301 | c = get_effective_char (pfile);
|
---|
1302 | if (c == ':' && CPP_OPTION (pfile, cplusplus))
|
---|
1303 | result->type = CPP_SCOPE;
|
---|
1304 | else if (c == '>' && CPP_OPTION (pfile, digraphs))
|
---|
1305 | {
|
---|
1306 | result->flags |= DIGRAPH;
|
---|
1307 | result->type = CPP_CLOSE_SQUARE;
|
---|
1308 | }
|
---|
1309 | else
|
---|
1310 | {
|
---|
1311 | BACKUP ();
|
---|
1312 | result->type = CPP_COLON;
|
---|
1313 | }
|
---|
1314 | break;
|
---|
1315 |
|
---|
1316 | case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
|
---|
1317 | case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
|
---|
1318 | case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
|
---|
1319 | case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
|
---|
1320 | case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
|
---|
1321 |
|
---|
1322 | case '~': result->type = CPP_COMPL; break;
|
---|
1323 | case ',': result->type = CPP_COMMA; break;
|
---|
1324 | case '(': result->type = CPP_OPEN_PAREN; break;
|
---|
1325 | case ')': result->type = CPP_CLOSE_PAREN; break;
|
---|
1326 | case '[': result->type = CPP_OPEN_SQUARE; break;
|
---|
1327 | case ']': result->type = CPP_CLOSE_SQUARE; break;
|
---|
1328 | case '{': result->type = CPP_OPEN_BRACE; break;
|
---|
1329 | case '}': result->type = CPP_CLOSE_BRACE; break;
|
---|
1330 | case ';': result->type = CPP_SEMICOLON; break;
|
---|
1331 |
|
---|
1332 | /* @ is a punctuator in Objective-C. */
|
---|
1333 | case '@': result->type = CPP_ATSIGN; break;
|
---|
1334 |
|
---|
1335 | case '$':
|
---|
1336 | if (CPP_OPTION (pfile, dollars_in_ident))
|
---|
1337 | goto start_ident;
|
---|
1338 | /* Fall through... */
|
---|
1339 |
|
---|
1340 | random_char:
|
---|
1341 | default:
|
---|
1342 | result->type = CPP_OTHER;
|
---|
1343 | result->val.c = c;
|
---|
1344 | break;
|
---|
1345 | }
|
---|
1346 |
|
---|
1347 | return result;
|
---|
1348 | }
|
---|
1349 |
|
---|
1350 | /* An upper bound on the number of bytes needed to spell TOKEN,
|
---|
1351 | including preceding whitespace. */
|
---|
1352 | unsigned int
|
---|
1353 | cpp_token_len (token)
|
---|
1354 | const cpp_token *token;
|
---|
1355 | {
|
---|
1356 | unsigned int len;
|
---|
1357 |
|
---|
1358 | switch (TOKEN_SPELL (token))
|
---|
1359 | {
|
---|
1360 | default: len = 0; break;
|
---|
1361 | case SPELL_NUMBER:
|
---|
1362 | case SPELL_STRING: len = token->val.str.len; break;
|
---|
1363 | case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
|
---|
1364 | }
|
---|
1365 | /* 1 for whitespace, 4 for comment delimiters. */
|
---|
1366 | return len + 5;
|
---|
1367 | }
|
---|
1368 |
|
---|
1369 | /* Write the spelling of a token TOKEN to BUFFER. The buffer must
|
---|
1370 | already contain the enough space to hold the token's spelling.
|
---|
1371 | Returns a pointer to the character after the last character
|
---|
1372 | written. */
|
---|
1373 | unsigned char *
|
---|
1374 | cpp_spell_token (pfile, token, buffer)
|
---|
1375 | cpp_reader *pfile; /* Would be nice to be rid of this... */
|
---|
1376 | const cpp_token *token;
|
---|
1377 | unsigned char *buffer;
|
---|
1378 | {
|
---|
1379 | switch (TOKEN_SPELL (token))
|
---|
1380 | {
|
---|
1381 | case SPELL_OPERATOR:
|
---|
1382 | {
|
---|
1383 | const unsigned char *spelling;
|
---|
1384 | unsigned char c;
|
---|
1385 |
|
---|
1386 | if (token->flags & DIGRAPH)
|
---|
1387 | spelling
|
---|
1388 | = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
|
---|
1389 | else if (token->flags & NAMED_OP)
|
---|
1390 | goto spell_ident;
|
---|
1391 | else
|
---|
1392 | spelling = TOKEN_NAME (token);
|
---|
1393 |
|
---|
1394 | while ((c = *spelling++) != '\0')
|
---|
1395 | *buffer++ = c;
|
---|
1396 | }
|
---|
1397 | break;
|
---|
1398 |
|
---|
1399 | case SPELL_CHAR:
|
---|
1400 | *buffer++ = token->val.c;
|
---|
1401 | break;
|
---|
1402 |
|
---|
1403 | spell_ident:
|
---|
1404 | case SPELL_IDENT:
|
---|
1405 | memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
|
---|
1406 | buffer += NODE_LEN (token->val.node);
|
---|
1407 | break;
|
---|
1408 |
|
---|
1409 | case SPELL_NUMBER:
|
---|
1410 | memcpy (buffer, token->val.str.text, token->val.str.len);
|
---|
1411 | buffer += token->val.str.len;
|
---|
1412 | break;
|
---|
1413 |
|
---|
1414 | case SPELL_STRING:
|
---|
1415 | {
|
---|
1416 | int left, right, tag;
|
---|
1417 | switch (token->type)
|
---|
1418 | {
|
---|
1419 | case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
|
---|
1420 | case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
|
---|
1421 | case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
|
---|
1422 | case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
|
---|
1423 | case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
|
---|
1424 | default:
|
---|
1425 | cpp_error (pfile, DL_ICE, "unknown string token %s\n",
|
---|
1426 | TOKEN_NAME (token));
|
---|
1427 | return buffer;
|
---|
1428 | }
|
---|
1429 | if (tag) *buffer++ = tag;
|
---|
1430 | *buffer++ = left;
|
---|
1431 | memcpy (buffer, token->val.str.text, token->val.str.len);
|
---|
1432 | buffer += token->val.str.len;
|
---|
1433 | *buffer++ = right;
|
---|
1434 | }
|
---|
1435 | break;
|
---|
1436 |
|
---|
1437 | case SPELL_NONE:
|
---|
1438 | cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
|
---|
1439 | break;
|
---|
1440 | }
|
---|
1441 |
|
---|
1442 | return buffer;
|
---|
1443 | }
|
---|
1444 |
|
---|
1445 | /* Returns TOKEN spelt as a null-terminated string. The string is
|
---|
1446 | freed when the reader is destroyed. Useful for diagnostics. */
|
---|
1447 | unsigned char *
|
---|
1448 | cpp_token_as_text (pfile, token)
|
---|
1449 | cpp_reader *pfile;
|
---|
1450 | const cpp_token *token;
|
---|
1451 | {
|
---|
1452 | unsigned int len = cpp_token_len (token);
|
---|
1453 | unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
|
---|
1454 |
|
---|
1455 | end = cpp_spell_token (pfile, token, start);
|
---|
1456 | end[0] = '\0';
|
---|
1457 |
|
---|
1458 | return start;
|
---|
1459 | }
|
---|
1460 |
|
---|
1461 | /* Used by C front ends, which really should move to using
|
---|
1462 | cpp_token_as_text. */
|
---|
1463 | const char *
|
---|
1464 | cpp_type2name (type)
|
---|
1465 | enum cpp_ttype type;
|
---|
1466 | {
|
---|
1467 | return (const char *) token_spellings[type].name;
|
---|
1468 | }
|
---|
1469 |
|
---|
1470 | /* Writes the spelling of token to FP, without any preceding space.
|
---|
1471 | Separated from cpp_spell_token for efficiency - to avoid stdio
|
---|
1472 | double-buffering. */
|
---|
1473 | void
|
---|
1474 | cpp_output_token (token, fp)
|
---|
1475 | const cpp_token *token;
|
---|
1476 | FILE *fp;
|
---|
1477 | {
|
---|
1478 | switch (TOKEN_SPELL (token))
|
---|
1479 | {
|
---|
1480 | case SPELL_OPERATOR:
|
---|
1481 | {
|
---|
1482 | const unsigned char *spelling;
|
---|
1483 | int c;
|
---|
1484 |
|
---|
1485 | if (token->flags & DIGRAPH)
|
---|
1486 | spelling
|
---|
1487 | = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
|
---|
1488 | else if (token->flags & NAMED_OP)
|
---|
1489 | goto spell_ident;
|
---|
1490 | else
|
---|
1491 | spelling = TOKEN_NAME (token);
|
---|
1492 |
|
---|
1493 | c = *spelling;
|
---|
1494 | do
|
---|
1495 | putc (c, fp);
|
---|
1496 | while ((c = *++spelling) != '\0');
|
---|
1497 | }
|
---|
1498 | break;
|
---|
1499 |
|
---|
1500 | case SPELL_CHAR:
|
---|
1501 | putc (token->val.c, fp);
|
---|
1502 | break;
|
---|
1503 |
|
---|
1504 | spell_ident:
|
---|
1505 | case SPELL_IDENT:
|
---|
1506 | fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
|
---|
1507 | break;
|
---|
1508 |
|
---|
1509 | case SPELL_NUMBER:
|
---|
1510 | fwrite (token->val.str.text, 1, token->val.str.len, fp);
|
---|
1511 | break;
|
---|
1512 |
|
---|
1513 | case SPELL_STRING:
|
---|
1514 | {
|
---|
1515 | int left, right, tag;
|
---|
1516 | switch (token->type)
|
---|
1517 | {
|
---|
1518 | case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
|
---|
1519 | case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
|
---|
1520 | case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
|
---|
1521 | case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
|
---|
1522 | case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
|
---|
1523 | default:
|
---|
1524 | fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
|
---|
1525 | return;
|
---|
1526 | }
|
---|
1527 | if (tag) putc (tag, fp);
|
---|
1528 | putc (left, fp);
|
---|
1529 | fwrite (token->val.str.text, 1, token->val.str.len, fp);
|
---|
1530 | putc (right, fp);
|
---|
1531 | }
|
---|
1532 | break;
|
---|
1533 |
|
---|
1534 | case SPELL_NONE:
|
---|
1535 | /* An error, most probably. */
|
---|
1536 | break;
|
---|
1537 | }
|
---|
1538 | }
|
---|
1539 |
|
---|
1540 | /* Compare two tokens. */
|
---|
1541 | int
|
---|
1542 | _cpp_equiv_tokens (a, b)
|
---|
1543 | const cpp_token *a, *b;
|
---|
1544 | {
|
---|
1545 | if (a->type == b->type && a->flags == b->flags)
|
---|
1546 | switch (TOKEN_SPELL (a))
|
---|
1547 | {
|
---|
1548 | default: /* Keep compiler happy. */
|
---|
1549 | case SPELL_OPERATOR:
|
---|
1550 | return 1;
|
---|
1551 | case SPELL_CHAR:
|
---|
1552 | return a->val.c == b->val.c; /* Character. */
|
---|
1553 | case SPELL_NONE:
|
---|
1554 | return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
|
---|
1555 | case SPELL_IDENT:
|
---|
1556 | return a->val.node == b->val.node;
|
---|
1557 | case SPELL_NUMBER:
|
---|
1558 | case SPELL_STRING:
|
---|
1559 | return (a->val.str.len == b->val.str.len
|
---|
1560 | && !memcmp (a->val.str.text, b->val.str.text,
|
---|
1561 | a->val.str.len));
|
---|
1562 | }
|
---|
1563 |
|
---|
1564 | return 0;
|
---|
1565 | }
|
---|
1566 |
|
---|
1567 | /* Returns nonzero if a space should be inserted to avoid an
|
---|
1568 | accidental token paste for output. For simplicity, it is
|
---|
1569 | conservative, and occasionally advises a space where one is not
|
---|
1570 | needed, e.g. "." and ".2". */
|
---|
1571 | int
|
---|
1572 | cpp_avoid_paste (pfile, token1, token2)
|
---|
1573 | cpp_reader *pfile;
|
---|
1574 | const cpp_token *token1, *token2;
|
---|
1575 | {
|
---|
1576 | enum cpp_ttype a = token1->type, b = token2->type;
|
---|
1577 | cppchar_t c;
|
---|
1578 |
|
---|
1579 | if (token1->flags & NAMED_OP)
|
---|
1580 | a = CPP_NAME;
|
---|
1581 | if (token2->flags & NAMED_OP)
|
---|
1582 | b = CPP_NAME;
|
---|
1583 |
|
---|
1584 | c = EOF;
|
---|
1585 | if (token2->flags & DIGRAPH)
|
---|
1586 | c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
|
---|
1587 | else if (token_spellings[b].category == SPELL_OPERATOR)
|
---|
1588 | c = token_spellings[b].name[0];
|
---|
1589 |
|
---|
1590 | /* Quickly get everything that can paste with an '='. */
|
---|
1591 | if ((int) a <= (int) CPP_LAST_EQ && c == '=')
|
---|
1592 | return 1;
|
---|
1593 |
|
---|
1594 | switch (a)
|
---|
1595 | {
|
---|
1596 | case CPP_GREATER: return c == '>' || c == '?';
|
---|
1597 | case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
|
---|
1598 | case CPP_PLUS: return c == '+';
|
---|
1599 | case CPP_MINUS: return c == '-' || c == '>';
|
---|
1600 | case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
|
---|
1601 | case CPP_MOD: return c == ':' || c == '>';
|
---|
1602 | case CPP_AND: return c == '&';
|
---|
1603 | case CPP_OR: return c == '|';
|
---|
1604 | case CPP_COLON: return c == ':' || c == '>';
|
---|
1605 | case CPP_DEREF: return c == '*';
|
---|
1606 | case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
|
---|
1607 | case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
|
---|
1608 | case CPP_NAME: return ((b == CPP_NUMBER
|
---|
1609 | && name_p (pfile, &token2->val.str))
|
---|
1610 | || b == CPP_NAME
|
---|
1611 | || b == CPP_CHAR || b == CPP_STRING); /* L */
|
---|
1612 | case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
|
---|
1613 | || c == '.' || c == '+' || c == '-');
|
---|
1614 | case CPP_OTHER: return (CPP_OPTION (pfile, objc)
|
---|
1615 | && token1->val.c == '@'
|
---|
1616 | && (b == CPP_NAME || b == CPP_STRING));
|
---|
1617 | default: break;
|
---|
1618 | }
|
---|
1619 |
|
---|
1620 | return 0;
|
---|
1621 | }
|
---|
1622 |
|
---|
1623 | /* Output all the remaining tokens on the current line, and a newline
|
---|
1624 | character, to FP. Leading whitespace is removed. If there are
|
---|
1625 | macros, special token padding is not performed. */
|
---|
1626 | void
|
---|
1627 | cpp_output_line (pfile, fp)
|
---|
1628 | cpp_reader *pfile;
|
---|
1629 | FILE *fp;
|
---|
1630 | {
|
---|
1631 | const cpp_token *token;
|
---|
1632 |
|
---|
1633 | token = cpp_get_token (pfile);
|
---|
1634 | while (token->type != CPP_EOF)
|
---|
1635 | {
|
---|
1636 | cpp_output_token (token, fp);
|
---|
1637 | token = cpp_get_token (pfile);
|
---|
1638 | if (token->flags & PREV_WHITE)
|
---|
1639 | putc (' ', fp);
|
---|
1640 | }
|
---|
1641 |
|
---|
1642 | putc ('\n', fp);
|
---|
1643 | }
|
---|
1644 |
|
---|
1645 | /* Returns the value of a hexadecimal digit. */
|
---|
1646 | static unsigned int
|
---|
1647 | hex_digit_value (c)
|
---|
1648 | unsigned int c;
|
---|
1649 | {
|
---|
1650 | if (hex_p (c))
|
---|
1651 | return hex_value (c);
|
---|
1652 | else
|
---|
1653 | abort ();
|
---|
1654 | }
|
---|
1655 |
|
---|
1656 | /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
|
---|
1657 | failure if cpplib is not parsing C++ or C99. Such failure is
|
---|
1658 | silent, and no variables are updated. Otherwise returns 0, and
|
---|
1659 | warns if -Wtraditional.
|
---|
1660 |
|
---|
1661 | [lex.charset]: The character designated by the universal character
|
---|
1662 | name \UNNNNNNNN is that character whose character short name in
|
---|
1663 | ISO/IEC 10646 is NNNNNNNN; the character designated by the
|
---|
1664 | universal character name \uNNNN is that character whose character
|
---|
1665 | short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
|
---|
1666 | for a universal character name is less than 0x20 or in the range
|
---|
1667 | 0x7F-0x9F (inclusive), or if the universal character name
|
---|
1668 | designates a character in the basic source character set, then the
|
---|
1669 | program is ill-formed.
|
---|
1670 |
|
---|
1671 | We assume that wchar_t is Unicode, so we don't need to do any
|
---|
1672 | mapping. Is this ever wrong?
|
---|
1673 |
|
---|
1674 | PC points to the 'u' or 'U', PSTR is points to the byte after PC,
|
---|
1675 | LIMIT is the end of the string or charconst. PSTR is updated to
|
---|
1676 | point after the UCS on return, and the UCS is written into PC. */
|
---|
1677 |
|
---|
1678 | static int
|
---|
1679 | maybe_read_ucs (pfile, pstr, limit, pc)
|
---|
1680 | cpp_reader *pfile;
|
---|
1681 | const unsigned char **pstr;
|
---|
1682 | const unsigned char *limit;
|
---|
1683 | cppchar_t *pc;
|
---|
1684 | {
|
---|
1685 | const unsigned char *p = *pstr;
|
---|
1686 | unsigned int code = 0;
|
---|
1687 | unsigned int c = *pc, length;
|
---|
1688 |
|
---|
1689 | /* Only attempt to interpret a UCS for C++ and C99. */
|
---|
1690 | if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
|
---|
1691 | return 1;
|
---|
1692 |
|
---|
1693 | if (CPP_WTRADITIONAL (pfile))
|
---|
1694 | cpp_error (pfile, DL_WARNING,
|
---|
1695 | "the meaning of '\\%c' is different in traditional C", c);
|
---|
1696 |
|
---|
1697 | length = (c == 'u' ? 4: 8);
|
---|
1698 |
|
---|
1699 | if ((size_t) (limit - p) < length)
|
---|
1700 | {
|
---|
1701 | cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
|
---|
1702 | /* Skip to the end to avoid more diagnostics. */
|
---|
1703 | p = limit;
|
---|
1704 | }
|
---|
1705 | else
|
---|
1706 | {
|
---|
1707 | for (; length; length--, p++)
|
---|
1708 | {
|
---|
1709 | c = *p;
|
---|
1710 | if (ISXDIGIT (c))
|
---|
1711 | code = (code << 4) + hex_digit_value (c);
|
---|
1712 | else
|
---|
1713 | {
|
---|
1714 | cpp_error (pfile, DL_ERROR,
|
---|
1715 | "non-hex digit '%c' in universal-character-name", c);
|
---|
1716 | /* We shouldn't skip in case there are multibyte chars. */
|
---|
1717 | break;
|
---|
1718 | }
|
---|
1719 | }
|
---|
1720 | }
|
---|
1721 |
|
---|
1722 | #ifdef TARGET_EBCDIC
|
---|
1723 | cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
|
---|
1724 | code = 0x3f; /* EBCDIC invalid character */
|
---|
1725 | #else
|
---|
1726 | /* True extended characters are OK. */
|
---|
1727 | if (code >= 0xa0
|
---|
1728 | && !(code & 0x80000000)
|
---|
1729 | && !(code >= 0xD800 && code <= 0xDFFF))
|
---|
1730 | ;
|
---|
1731 | /* The standard permits $, @ and ` to be specified as UCNs. We use
|
---|
1732 | hex escapes so that this also works with EBCDIC hosts. */
|
---|
1733 | else if (code == 0x24 || code == 0x40 || code == 0x60)
|
---|
1734 | ;
|
---|
1735 | /* Don't give another error if one occurred above. */
|
---|
1736 | else if (length == 0)
|
---|
1737 | cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
|
---|
1738 | #endif
|
---|
1739 |
|
---|
1740 | *pstr = p;
|
---|
1741 | *pc = code;
|
---|
1742 | return 0;
|
---|
1743 | }
|
---|
1744 |
|
---|
1745 | /* Returns the value of an escape sequence, truncated to the correct
|
---|
1746 | target precision. PSTR points to the input pointer, which is just
|
---|
1747 | after the backslash. LIMIT is how much text we have. WIDE is true
|
---|
1748 | if the escape sequence is part of a wide character constant or
|
---|
1749 | string literal. Handles all relevant diagnostics. */
|
---|
1750 | cppchar_t
|
---|
1751 | cpp_parse_escape (pfile, pstr, limit, wide)
|
---|
1752 | cpp_reader *pfile;
|
---|
1753 | const unsigned char **pstr;
|
---|
1754 | const unsigned char *limit;
|
---|
1755 | int wide;
|
---|
1756 | {
|
---|
1757 | int unknown = 0;
|
---|
1758 | const unsigned char *str = *pstr;
|
---|
1759 | cppchar_t c, mask;
|
---|
1760 | unsigned int width;
|
---|
1761 |
|
---|
1762 | if (wide)
|
---|
1763 | width = CPP_OPTION (pfile, wchar_precision);
|
---|
1764 | else
|
---|
1765 | width = CPP_OPTION (pfile, char_precision);
|
---|
1766 | if (width < BITS_PER_CPPCHAR_T)
|
---|
1767 | mask = ((cppchar_t) 1 << width) - 1;
|
---|
1768 | else
|
---|
1769 | mask = ~0;
|
---|
1770 |
|
---|
1771 | c = *str++;
|
---|
1772 | switch (c)
|
---|
1773 | {
|
---|
1774 | case '\\': case '\'': case '"': case '?': break;
|
---|
1775 | case 'b': c = TARGET_BS; break;
|
---|
1776 | case 'f': c = TARGET_FF; break;
|
---|
1777 | case 'n': c = TARGET_NEWLINE; break;
|
---|
1778 | case 'r': c = TARGET_CR; break;
|
---|
1779 | case 't': c = TARGET_TAB; break;
|
---|
1780 | case 'v': c = TARGET_VT; break;
|
---|
1781 |
|
---|
1782 | case '(': case '{': case '[': case '%':
|
---|
1783 | /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
|
---|
1784 | '\%' is used to prevent SCCS from getting confused. */
|
---|
1785 | unknown = CPP_PEDANTIC (pfile);
|
---|
1786 | break;
|
---|
1787 |
|
---|
1788 | case 'a':
|
---|
1789 | if (CPP_WTRADITIONAL (pfile))
|
---|
1790 | cpp_error (pfile, DL_WARNING,
|
---|
1791 | "the meaning of '\\a' is different in traditional C");
|
---|
1792 | c = TARGET_BELL;
|
---|
1793 | break;
|
---|
1794 |
|
---|
1795 | case 'e': case 'E':
|
---|
1796 | if (CPP_PEDANTIC (pfile))
|
---|
1797 | cpp_error (pfile, DL_PEDWARN,
|
---|
1798 | "non-ISO-standard escape sequence, '\\%c'", (int) c);
|
---|
1799 | c = TARGET_ESC;
|
---|
1800 | break;
|
---|
1801 |
|
---|
1802 | case 'u': case 'U':
|
---|
1803 | unknown = maybe_read_ucs (pfile, &str, limit, &c);
|
---|
1804 | break;
|
---|
1805 |
|
---|
1806 | case 'x':
|
---|
1807 | if (CPP_WTRADITIONAL (pfile))
|
---|
1808 | cpp_error (pfile, DL_WARNING,
|
---|
1809 | "the meaning of '\\x' is different in traditional C");
|
---|
1810 |
|
---|
1811 | {
|
---|
1812 | cppchar_t i = 0, overflow = 0;
|
---|
1813 | int digits_found = 0;
|
---|
1814 |
|
---|
1815 | while (str < limit)
|
---|
1816 | {
|
---|
1817 | c = *str;
|
---|
1818 | if (! ISXDIGIT (c))
|
---|
1819 | break;
|
---|
1820 | str++;
|
---|
1821 | overflow |= i ^ (i << 4 >> 4);
|
---|
1822 | i = (i << 4) + hex_digit_value (c);
|
---|
1823 | digits_found = 1;
|
---|
1824 | }
|
---|
1825 |
|
---|
1826 | if (!digits_found)
|
---|
1827 | cpp_error (pfile, DL_ERROR,
|
---|
1828 | "\\x used with no following hex digits");
|
---|
1829 |
|
---|
1830 | if (overflow | (i != (i & mask)))
|
---|
1831 | {
|
---|
1832 | cpp_error (pfile, DL_PEDWARN,
|
---|
1833 | "hex escape sequence out of range");
|
---|
1834 | i &= mask;
|
---|
1835 | }
|
---|
1836 | c = i;
|
---|
1837 | }
|
---|
1838 | break;
|
---|
1839 |
|
---|
1840 | case '0': case '1': case '2': case '3':
|
---|
1841 | case '4': case '5': case '6': case '7':
|
---|
1842 | {
|
---|
1843 | size_t count = 0;
|
---|
1844 | cppchar_t i = c - '0';
|
---|
1845 |
|
---|
1846 | while (str < limit && ++count < 3)
|
---|
1847 | {
|
---|
1848 | c = *str;
|
---|
1849 | if (c < '0' || c > '7')
|
---|
1850 | break;
|
---|
1851 | str++;
|
---|
1852 | i = (i << 3) + c - '0';
|
---|
1853 | }
|
---|
1854 |
|
---|
1855 | if (i != (i & mask))
|
---|
1856 | {
|
---|
1857 | cpp_error (pfile, DL_PEDWARN,
|
---|
1858 | "octal escape sequence out of range");
|
---|
1859 | i &= mask;
|
---|
1860 | }
|
---|
1861 | c = i;
|
---|
1862 | }
|
---|
1863 | break;
|
---|
1864 |
|
---|
1865 | default:
|
---|
1866 | unknown = 1;
|
---|
1867 | break;
|
---|
1868 | }
|
---|
1869 |
|
---|
1870 | if (unknown)
|
---|
1871 | {
|
---|
1872 | if (ISGRAPH (c))
|
---|
1873 | cpp_error (pfile, DL_PEDWARN,
|
---|
1874 | "unknown escape sequence '\\%c'", (int) c);
|
---|
1875 | else
|
---|
1876 | cpp_error (pfile, DL_PEDWARN,
|
---|
1877 | "unknown escape sequence: '\\%03o'", (int) c);
|
---|
1878 | }
|
---|
1879 |
|
---|
1880 | if (c > mask)
|
---|
1881 | {
|
---|
1882 | cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
|
---|
1883 | c &= mask;
|
---|
1884 | }
|
---|
1885 |
|
---|
1886 | *pstr = str;
|
---|
1887 | return c;
|
---|
1888 | }
|
---|
1889 |
|
---|
1890 | /* Interpret a (possibly wide) character constant in TOKEN.
|
---|
1891 | WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
|
---|
1892 | points to a variable that is filled in with the number of
|
---|
1893 | characters seen, and UNSIGNEDP to a variable that indicates whether
|
---|
1894 | the result has signed type. */
|
---|
1895 | cppchar_t
|
---|
1896 | cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
|
---|
1897 | cpp_reader *pfile;
|
---|
1898 | const cpp_token *token;
|
---|
1899 | unsigned int *pchars_seen;
|
---|
1900 | int *unsignedp;
|
---|
1901 | {
|
---|
1902 | const unsigned char *str = token->val.str.text;
|
---|
1903 | const unsigned char *limit = str + token->val.str.len;
|
---|
1904 | unsigned int chars_seen = 0;
|
---|
1905 | size_t width, max_chars;
|
---|
1906 | cppchar_t c, mask, result = 0;
|
---|
1907 | bool unsigned_p;
|
---|
1908 |
|
---|
1909 | #ifdef MULTIBYTE_CHARS
|
---|
1910 | (void) local_mbtowc (NULL, NULL, 0);
|
---|
1911 | #endif
|
---|
1912 |
|
---|
1913 | /* Width in bits. */
|
---|
1914 | if (token->type == CPP_CHAR)
|
---|
1915 | {
|
---|
1916 | width = CPP_OPTION (pfile, char_precision);
|
---|
1917 | max_chars = CPP_OPTION (pfile, int_precision) / width;
|
---|
1918 | unsigned_p = CPP_OPTION (pfile, unsigned_char);
|
---|
1919 | }
|
---|
1920 | else
|
---|
1921 | {
|
---|
1922 | width = CPP_OPTION (pfile, wchar_precision);
|
---|
1923 | max_chars = 1;
|
---|
1924 | unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
|
---|
1925 | }
|
---|
1926 |
|
---|
1927 | if (width < BITS_PER_CPPCHAR_T)
|
---|
1928 | mask = ((cppchar_t) 1 << width) - 1;
|
---|
1929 | else
|
---|
1930 | mask = ~0;
|
---|
1931 |
|
---|
1932 | while (str < limit)
|
---|
1933 | {
|
---|
1934 | #ifdef MULTIBYTE_CHARS
|
---|
1935 | wchar_t wc;
|
---|
1936 | int char_len;
|
---|
1937 |
|
---|
1938 | char_len = local_mbtowc (&wc, str, limit - str);
|
---|
1939 | if (char_len == -1)
|
---|
1940 | {
|
---|
1941 | cpp_error (pfile, DL_WARNING,
|
---|
1942 | "ignoring invalid multibyte character");
|
---|
1943 | c = *str++;
|
---|
1944 | }
|
---|
1945 | else
|
---|
1946 | {
|
---|
1947 | str += char_len;
|
---|
1948 | c = wc;
|
---|
1949 | }
|
---|
1950 | #else
|
---|
1951 | c = *str++;
|
---|
1952 | #endif
|
---|
1953 |
|
---|
1954 | if (c == '\\')
|
---|
1955 | c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
|
---|
1956 |
|
---|
1957 | #ifdef MAP_CHARACTER
|
---|
1958 | if (ISPRINT (c))
|
---|
1959 | c = MAP_CHARACTER (c);
|
---|
1960 | #endif
|
---|
1961 |
|
---|
1962 | chars_seen++;
|
---|
1963 |
|
---|
1964 | /* Truncate the character, scale the result and merge the two. */
|
---|
1965 | c &= mask;
|
---|
1966 | if (width < BITS_PER_CPPCHAR_T)
|
---|
1967 | result = (result << width) | c;
|
---|
1968 | else
|
---|
1969 | result = c;
|
---|
1970 | }
|
---|
1971 |
|
---|
1972 | if (chars_seen == 0)
|
---|
1973 | cpp_error (pfile, DL_ERROR, "empty character constant");
|
---|
1974 | else if (chars_seen > 1)
|
---|
1975 | {
|
---|
1976 | /* Multichar charconsts are of type int and therefore signed. */
|
---|
1977 | unsigned_p = 0;
|
---|
1978 |
|
---|
1979 | if (chars_seen > max_chars)
|
---|
1980 | {
|
---|
1981 | chars_seen = max_chars;
|
---|
1982 | cpp_error (pfile, DL_WARNING,
|
---|
1983 | "character constant too long for its type");
|
---|
1984 | }
|
---|
1985 | else if (CPP_OPTION (pfile, warn_multichar))
|
---|
1986 | cpp_error (pfile, DL_WARNING, "multi-character character constant");
|
---|
1987 | }
|
---|
1988 |
|
---|
1989 | /* Sign-extend or truncate the constant to cppchar_t. The value is
|
---|
1990 | in WIDTH bits, but for multi-char charconsts it's value is the
|
---|
1991 | full target type's width. */
|
---|
1992 | if (chars_seen > 1)
|
---|
1993 | width *= max_chars;
|
---|
1994 | if (width < BITS_PER_CPPCHAR_T)
|
---|
1995 | {
|
---|
1996 | mask = ((cppchar_t) 1 << width) - 1;
|
---|
1997 | if (unsigned_p || !(result & (1 << (width - 1))))
|
---|
1998 | result &= mask;
|
---|
1999 | else
|
---|
2000 | result |= ~mask;
|
---|
2001 | }
|
---|
2002 |
|
---|
2003 | *pchars_seen = chars_seen;
|
---|
2004 | *unsignedp = unsigned_p;
|
---|
2005 | return result;
|
---|
2006 | }
|
---|
2007 |
|
---|
2008 | /* Memory buffers. Changing these three constants can have a dramatic
|
---|
2009 | effect on performance. The values here are reasonable defaults,
|
---|
2010 | but might be tuned. If you adjust them, be sure to test across a
|
---|
2011 | range of uses of cpplib, including heavy nested function-like macro
|
---|
2012 | expansion. Also check the change in peak memory usage (NJAMD is a
|
---|
2013 | good tool for this). */
|
---|
2014 | #define MIN_BUFF_SIZE 8000
|
---|
2015 | #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
|
---|
2016 | #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
|
---|
2017 | (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
|
---|
2018 |
|
---|
2019 | #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
|
---|
2020 | #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
|
---|
2021 | #endif
|
---|
2022 |
|
---|
2023 | /* Create a new allocation buffer. Place the control block at the end
|
---|
2024 | of the buffer, so that buffer overflows will cause immediate chaos. */
|
---|
2025 | static _cpp_buff *
|
---|
2026 | new_buff (len)
|
---|
2027 | size_t len;
|
---|
2028 | {
|
---|
2029 | _cpp_buff *result;
|
---|
2030 | unsigned char *base;
|
---|
2031 |
|
---|
2032 | if (len < MIN_BUFF_SIZE)
|
---|
2033 | len = MIN_BUFF_SIZE;
|
---|
2034 | len = CPP_ALIGN (len);
|
---|
2035 |
|
---|
2036 | base = xmalloc (len + sizeof (_cpp_buff));
|
---|
2037 | result = (_cpp_buff *) (base + len);
|
---|
2038 | result->base = base;
|
---|
2039 | result->cur = base;
|
---|
2040 | result->limit = base + len;
|
---|
2041 | result->next = NULL;
|
---|
2042 | return result;
|
---|
2043 | }
|
---|
2044 |
|
---|
2045 | /* Place a chain of unwanted allocation buffers on the free list. */
|
---|
2046 | void
|
---|
2047 | _cpp_release_buff (pfile, buff)
|
---|
2048 | cpp_reader *pfile;
|
---|
2049 | _cpp_buff *buff;
|
---|
2050 | {
|
---|
2051 | _cpp_buff *end = buff;
|
---|
2052 |
|
---|
2053 | while (end->next)
|
---|
2054 | end = end->next;
|
---|
2055 | end->next = pfile->free_buffs;
|
---|
2056 | pfile->free_buffs = buff;
|
---|
2057 | }
|
---|
2058 |
|
---|
2059 | /* Return a free buffer of size at least MIN_SIZE. */
|
---|
2060 | _cpp_buff *
|
---|
2061 | _cpp_get_buff (pfile, min_size)
|
---|
2062 | cpp_reader *pfile;
|
---|
2063 | size_t min_size;
|
---|
2064 | {
|
---|
2065 | _cpp_buff *result, **p;
|
---|
2066 |
|
---|
2067 | for (p = &pfile->free_buffs;; p = &(*p)->next)
|
---|
2068 | {
|
---|
2069 | size_t size;
|
---|
2070 |
|
---|
2071 | if (*p == NULL)
|
---|
2072 | return new_buff (min_size);
|
---|
2073 | result = *p;
|
---|
2074 | size = result->limit - result->base;
|
---|
2075 | /* Return a buffer that's big enough, but don't waste one that's
|
---|
2076 | way too big. */
|
---|
2077 | if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
|
---|
2078 | break;
|
---|
2079 | }
|
---|
2080 |
|
---|
2081 | *p = result->next;
|
---|
2082 | result->next = NULL;
|
---|
2083 | result->cur = result->base;
|
---|
2084 | return result;
|
---|
2085 | }
|
---|
2086 |
|
---|
2087 | /* Creates a new buffer with enough space to hold the uncommitted
|
---|
2088 | remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
|
---|
2089 | the excess bytes to the new buffer. Chains the new buffer after
|
---|
2090 | BUFF, and returns the new buffer. */
|
---|
2091 | _cpp_buff *
|
---|
2092 | _cpp_append_extend_buff (pfile, buff, min_extra)
|
---|
2093 | cpp_reader *pfile;
|
---|
2094 | _cpp_buff *buff;
|
---|
2095 | size_t min_extra;
|
---|
2096 | {
|
---|
2097 | size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
|
---|
2098 | _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
|
---|
2099 |
|
---|
2100 | buff->next = new_buff;
|
---|
2101 | memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
|
---|
2102 | return new_buff;
|
---|
2103 | }
|
---|
2104 |
|
---|
2105 | /* Creates a new buffer with enough space to hold the uncommitted
|
---|
2106 | remaining bytes of the buffer pointed to by BUFF, and at least
|
---|
2107 | MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
|
---|
2108 | Chains the new buffer before the buffer pointed to by BUFF, and
|
---|
2109 | updates the pointer to point to the new buffer. */
|
---|
2110 | void
|
---|
2111 | _cpp_extend_buff (pfile, pbuff, min_extra)
|
---|
2112 | cpp_reader *pfile;
|
---|
2113 | _cpp_buff **pbuff;
|
---|
2114 | size_t min_extra;
|
---|
2115 | {
|
---|
2116 | _cpp_buff *new_buff, *old_buff = *pbuff;
|
---|
2117 | size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
|
---|
2118 |
|
---|
2119 | new_buff = _cpp_get_buff (pfile, size);
|
---|
2120 | memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
|
---|
2121 | new_buff->next = old_buff;
|
---|
2122 | *pbuff = new_buff;
|
---|
2123 | }
|
---|
2124 |
|
---|
2125 | /* Free a chain of buffers starting at BUFF. */
|
---|
2126 | void
|
---|
2127 | _cpp_free_buff (buff)
|
---|
2128 | _cpp_buff *buff;
|
---|
2129 | {
|
---|
2130 | _cpp_buff *next;
|
---|
2131 |
|
---|
2132 | for (; buff; buff = next)
|
---|
2133 | {
|
---|
2134 | next = buff->next;
|
---|
2135 | free (buff->base);
|
---|
2136 | }
|
---|
2137 | }
|
---|
2138 |
|
---|
2139 | /* Allocate permanent, unaligned storage of length LEN. */
|
---|
2140 | unsigned char *
|
---|
2141 | _cpp_unaligned_alloc (pfile, len)
|
---|
2142 | cpp_reader *pfile;
|
---|
2143 | size_t len;
|
---|
2144 | {
|
---|
2145 | _cpp_buff *buff = pfile->u_buff;
|
---|
2146 | unsigned char *result = buff->cur;
|
---|
2147 |
|
---|
2148 | if (len > (size_t) (buff->limit - result))
|
---|
2149 | {
|
---|
2150 | buff = _cpp_get_buff (pfile, len);
|
---|
2151 | buff->next = pfile->u_buff;
|
---|
2152 | pfile->u_buff = buff;
|
---|
2153 | result = buff->cur;
|
---|
2154 | }
|
---|
2155 |
|
---|
2156 | buff->cur = result + len;
|
---|
2157 | return result;
|
---|
2158 | }
|
---|
2159 |
|
---|
2160 | /* Allocate permanent, unaligned storage of length LEN from a_buff.
|
---|
2161 | That buffer is used for growing allocations when saving macro
|
---|
2162 | replacement lists in a #define, and when parsing an answer to an
|
---|
2163 | assertion in #assert, #unassert or #if (and therefore possibly
|
---|
2164 | whilst expanding macros). It therefore must not be used by any
|
---|
2165 | code that they might call: specifically the lexer and the guts of
|
---|
2166 | the macro expander.
|
---|
2167 |
|
---|
2168 | All existing other uses clearly fit this restriction: storing
|
---|
2169 | registered pragmas during initialization. */
|
---|
2170 | unsigned char *
|
---|
2171 | _cpp_aligned_alloc (pfile, len)
|
---|
2172 | cpp_reader *pfile;
|
---|
2173 | size_t len;
|
---|
2174 | {
|
---|
2175 | _cpp_buff *buff = pfile->a_buff;
|
---|
2176 | unsigned char *result = buff->cur;
|
---|
2177 |
|
---|
2178 | if (len > (size_t) (buff->limit - result))
|
---|
2179 | {
|
---|
2180 | buff = _cpp_get_buff (pfile, len);
|
---|
2181 | buff->next = pfile->a_buff;
|
---|
2182 | pfile->a_buff = buff;
|
---|
2183 | result = buff->cur;
|
---|
2184 | }
|
---|
2185 |
|
---|
2186 | buff->cur = result + len;
|
---|
2187 | return result;
|
---|
2188 | }
|
---|