1 | /* StreamTokenizer.java -- parses streams of characters into tokens
|
---|
2 | Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation
|
---|
3 |
|
---|
4 | This file is part of GNU Classpath.
|
---|
5 |
|
---|
6 | GNU Classpath is free software; you can redistribute it and/or modify
|
---|
7 | it under the terms of the GNU General Public License as published by
|
---|
8 | the Free Software Foundation; either version 2, or (at your option)
|
---|
9 | any later version.
|
---|
10 |
|
---|
11 | GNU Classpath is distributed in the hope that it will be useful, but
|
---|
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | General Public License for more details.
|
---|
15 |
|
---|
16 | You should have received a copy of the GNU General Public License
|
---|
17 | along with GNU Classpath; see the file COPYING. If not, write to the
|
---|
18 | Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
---|
19 | 02111-1307 USA.
|
---|
20 |
|
---|
21 | Linking this library statically or dynamically with other modules is
|
---|
22 | making a combined work based on this library. Thus, the terms and
|
---|
23 | conditions of the GNU General Public License cover the whole
|
---|
24 | combination.
|
---|
25 |
|
---|
26 | As a special exception, the copyright holders of this library give you
|
---|
27 | permission to link this library with independent modules to produce an
|
---|
28 | executable, regardless of the license terms of these independent
|
---|
29 | modules, and to copy and distribute the resulting executable under
|
---|
30 | terms of your choice, provided that you also meet, for each linked
|
---|
31 | independent module, the terms and conditions of the license of that
|
---|
32 | module. An independent module is a module which is not derived from
|
---|
33 | or based on this library. If you modify this library, you may extend
|
---|
34 | this exception to your version of the library, but you are not
|
---|
35 | obligated to do so. If you do not wish to do so, delete this
|
---|
36 | exception statement from your version. */
|
---|
37 |
|
---|
38 | package java.io;
|
---|
39 |
|
---|
40 | /**
|
---|
41 | * This class parses streams of characters into tokens. There are a
|
---|
42 | * million-zillion flags that can be set to control the parsing, as
|
---|
43 | * described under the various method headings.
|
---|
44 | *
|
---|
45 | * @author Warren Levy <warrenl@cygnus.com>
|
---|
46 | * @date October 25, 1998.
|
---|
47 | */
|
---|
48 | /* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
|
---|
49 | * "The Java Language Specification", ISBN 0-201-63451-1
|
---|
50 | * plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
|
---|
51 | * Status: Believed complete and correct.
|
---|
52 | */
|
---|
53 |
|
---|
54 | public class StreamTokenizer
|
---|
55 | {
|
---|
56 | /** A constant indicating that the end of the stream has been read. */
|
---|
57 | public static final int TT_EOF = -1;
|
---|
58 |
|
---|
59 | /** A constant indicating that the end of the line has been read. */
|
---|
60 | public static final int TT_EOL = '\n';
|
---|
61 |
|
---|
62 | /** A constant indicating that a number token has been read. */
|
---|
63 | public static final int TT_NUMBER = -2;
|
---|
64 |
|
---|
65 | /** A constant indicating that a word token has been read. */
|
---|
66 | public static final int TT_WORD = -3;
|
---|
67 |
|
---|
68 | /** A constant indicating that no tokens have been read yet. */
|
---|
69 | private static final int TT_NONE = -4;
|
---|
70 |
|
---|
71 | /**
|
---|
72 | * Contains the type of the token read resulting from a call to nextToken
|
---|
73 | * The rules are as follows:
|
---|
74 | * <ul>
|
---|
75 | * <li>For a token consisting of a single ordinary character, this is the
|
---|
76 | * value of that character.
|
---|
77 | * <li>For a quoted string, this is the value of the quote character
|
---|
78 | * <li>For a word, this is TT_WORD
|
---|
79 | * <li>For a number, this is TT_NUMBER
|
---|
80 | * <li>For the end of the line, this is TT_EOL
|
---|
81 | * <li>For the end of the stream, this is TT_EOF
|
---|
82 | * </ul>
|
---|
83 | */
|
---|
84 | public int ttype = TT_NONE;
|
---|
85 |
|
---|
86 | /** The String associated with word and string tokens. */
|
---|
87 | public String sval;
|
---|
88 |
|
---|
89 | /** The numeric value associated with number tokens. */
|
---|
90 | public double nval;
|
---|
91 |
|
---|
92 | /* Indicates whether end-of-line is recognized as a token. */
|
---|
93 | private boolean eolSignificant = false;
|
---|
94 |
|
---|
95 | /* Indicates whether word tokens are automatically made lower case. */
|
---|
96 | private boolean lowerCase = false;
|
---|
97 |
|
---|
98 | /* Indicates whether C++ style comments are recognized and skipped. */
|
---|
99 | private boolean slashSlash = false;
|
---|
100 |
|
---|
101 | /* Indicates whether C style comments are recognized and skipped. */
|
---|
102 | private boolean slashStar = false;
|
---|
103 |
|
---|
104 | /* Attribute tables of each byte from 0x00 to 0xFF. */
|
---|
105 | private boolean[] whitespace = new boolean[256];
|
---|
106 | private boolean[] alphabetic = new boolean[256];
|
---|
107 | private boolean[] numeric = new boolean[256];
|
---|
108 | private boolean[] quote = new boolean[256];
|
---|
109 | private boolean[] comment = new boolean[256];
|
---|
110 |
|
---|
111 | /* The Reader associated with this class. */
|
---|
112 | private PushbackReader in;
|
---|
113 |
|
---|
114 | /* Indicates if a token has been pushed back. */
|
---|
115 | private boolean pushedBack = false;
|
---|
116 |
|
---|
117 | /* Contains the current line number of the reader. */
|
---|
118 | private int lineNumber = 1;
|
---|
119 |
|
---|
120 | /**
|
---|
121 | * This method reads bytes from an <code>InputStream</code> and tokenizes
|
---|
122 | * them. For details on how this method operates by default, see
|
---|
123 | * <code>StreamTokenizer(Reader)</code>.
|
---|
124 | *
|
---|
125 | * @param in The <code>InputStream</code> to read from
|
---|
126 | *
|
---|
127 | * @deprecated Since JDK 1.1.
|
---|
128 | */
|
---|
129 | public StreamTokenizer(InputStream is)
|
---|
130 | {
|
---|
131 | this(new InputStreamReader(is));
|
---|
132 | }
|
---|
133 |
|
---|
134 | /**
|
---|
135 | * This method initializes a new <code>StreamTokenizer</code> to read
|
---|
136 | * characters from a <code>Reader</code> and parse them. The char values
|
---|
137 | * have their hight bits masked so that the value is treated a character
|
---|
138 | * in the range of 0x0000 to 0x00FF.
|
---|
139 | * <p>
|
---|
140 | * This constructor sets up the parsing table to parse the stream in the
|
---|
141 | * following manner:
|
---|
142 | * <ul>
|
---|
143 | * <li>The values 'A' through 'Z', 'a' through 'z' and 0xA0 through 0xFF
|
---|
144 | * are initialized as alphabetic
|
---|
145 | * <li>The values 0x00 through 0x20 are initialized as whitespace
|
---|
146 | * <li>The values '\'' and '"' are initialized as quote characters
|
---|
147 | * <li>'/' is a comment character
|
---|
148 | * <li>Numbers will be parsed
|
---|
149 | * <li>EOL is not treated as significant
|
---|
150 | * <li>C and C++ (//) comments are not recognized
|
---|
151 | * </ul>
|
---|
152 | *
|
---|
153 | * @param in The <code>Reader</code> to read chars from
|
---|
154 | */
|
---|
155 | public StreamTokenizer(Reader r)
|
---|
156 | {
|
---|
157 | in = new PushbackReader(r);
|
---|
158 |
|
---|
159 | whitespaceChars(0x00, 0x20);
|
---|
160 | wordChars('A', 'Z');
|
---|
161 | wordChars('a', 'z');
|
---|
162 | wordChars(0xA0, 0xFF);
|
---|
163 | commentChar('/');
|
---|
164 | quoteChar('\'');
|
---|
165 | quoteChar('"');
|
---|
166 | parseNumbers();
|
---|
167 | }
|
---|
168 |
|
---|
169 | /**
|
---|
170 | * This method sets the comment attribute on the specified character.
|
---|
171 | *
|
---|
172 | * @param c The character to set the comment attribute for, passed as an int
|
---|
173 | */
|
---|
174 | public void commentChar(int ch)
|
---|
175 | {
|
---|
176 | if (ch >= 0 && ch <= 255)
|
---|
177 | comment[ch] = true;
|
---|
178 | }
|
---|
179 |
|
---|
180 | /**
|
---|
181 | * This method sets a flag that indicates whether or not the end of line
|
---|
182 | * sequence terminates and is a token. The defaults to <code>false</code>
|
---|
183 | *
|
---|
184 | * @param flag <code>true</code> if EOF is significant, <code>false</code>
|
---|
185 | * otherwise
|
---|
186 | */
|
---|
187 | public void eolIsSignificant(boolean flag)
|
---|
188 | {
|
---|
189 | eolSignificant = flag;
|
---|
190 | }
|
---|
191 |
|
---|
192 | /**
|
---|
193 | * This method returns the current line number. Note that if the
|
---|
194 | * <code>pushBack()</code> method is called, it has no effect on the
|
---|
195 | * line number returned by this method.
|
---|
196 | *
|
---|
197 | * @return The current line number
|
---|
198 | */
|
---|
199 | public int lineno()
|
---|
200 | {
|
---|
201 | return lineNumber;
|
---|
202 | }
|
---|
203 |
|
---|
204 | /**
|
---|
205 | * This method sets a flag that indicates whether or not alphabetic
|
---|
206 | * tokens that are returned should be converted to lower case.
|
---|
207 | *
|
---|
208 | * @param flag <code>true</code> to convert to lower case,
|
---|
209 | * <code>false</code> otherwise
|
---|
210 | */
|
---|
211 | public void lowerCaseMode(boolean flag)
|
---|
212 | {
|
---|
213 | lowerCase = flag;
|
---|
214 | }
|
---|
215 |
|
---|
216 | private boolean isWhitespace(int ch)
|
---|
217 | {
|
---|
218 | return (ch >= 0 && ch <= 255 && whitespace[ch]);
|
---|
219 | }
|
---|
220 |
|
---|
221 | private boolean isAlphabetic(int ch)
|
---|
222 | {
|
---|
223 | return ((ch > 255) || (ch >= 0 && alphabetic[ch]));
|
---|
224 | }
|
---|
225 |
|
---|
226 | private boolean isNumeric(int ch)
|
---|
227 | {
|
---|
228 | return (ch >= 0 && ch <= 255 && numeric[ch]);
|
---|
229 | }
|
---|
230 |
|
---|
231 | private boolean isQuote(int ch)
|
---|
232 | {
|
---|
233 | return (ch >= 0 && ch <= 255 && quote[ch]);
|
---|
234 | }
|
---|
235 |
|
---|
236 | private boolean isComment(int ch)
|
---|
237 | {
|
---|
238 | return (ch >= 0 && ch <= 255 && comment[ch]);
|
---|
239 | }
|
---|
240 |
|
---|
241 | /**
|
---|
242 | * This method reads the next token from the stream. It sets the
|
---|
243 | * <code>ttype</code> variable to the appropriate token type and
|
---|
244 | * returns it. It also can set <code>sval</code> or <code>nval</code>
|
---|
245 | * as described below. The parsing strategy is as follows:
|
---|
246 | * <ul>
|
---|
247 | * <li>Skip any whitespace characters.
|
---|
248 | * <li>If a numeric character is encountered, attempt to parse a numeric
|
---|
249 | * value. Leading '-' characters indicate a numeric only if followed by
|
---|
250 | * another non-'-' numeric. The value of the numeric token is terminated
|
---|
251 | * by either the first non-numeric encountered, or the second occurrence of
|
---|
252 | * '-' or '.'. The token type returned is TT_NUMBER and <code>nval</code>
|
---|
253 | * is set to the value parsed.
|
---|
254 | * <li>If an alphabetic character is parsed, all subsequent characters
|
---|
255 | * are read until the first non-alphabetic or non-numeric character is
|
---|
256 | * encountered. The token type returned is TT_WORD and the value parsed
|
---|
257 | * is stored in <code>sval</code>. If lower case mode is set, the token
|
---|
258 | * stored in <code>sval</code> is converted to lower case. The end of line
|
---|
259 | * sequence terminates a word only if EOL signficance has been turned on.
|
---|
260 | * The start of a comment also terminates a word. Any character with a
|
---|
261 | * non-alphabetic and non-numeric attribute (such as white space, a quote,
|
---|
262 | * or a commet) are treated as non-alphabetic and terminate the word.
|
---|
263 | * <li>If a comment character is parsed, then all remaining characters on
|
---|
264 | * the current line are skipped and another token is parsed. Any EOL or
|
---|
265 | * EOF's encountered are not discarded, but rather terminate the comment.
|
---|
266 | * <li>If a quote character is parsed, then all characters up to the
|
---|
267 | * second occurrence of the same quote character are parsed into a
|
---|
268 | * <code>String</code>. This <code>String</code> is stored as
|
---|
269 | * <code>sval</code>, but is not converted to lower case, even if lower case
|
---|
270 | * mode is enabled. The token type returned is the value of the quote
|
---|
271 | * character encountered. Any escape sequences
|
---|
272 | * (\b (backspace), \t (HTAB), \n (linefeed), \f (form feed), \r
|
---|
273 | * (carriage return), \" (double quote), \' (single quote), \\
|
---|
274 | * (backslash), \XXX (octal esacpe)) are converted to the appropriate
|
---|
275 | * char values. Invalid esacape sequences are left in untranslated.
|
---|
276 | * Unicode characters like ('\ u0000') are not recognized.
|
---|
277 | * <li>If the C++ comment sequence "//" is encountered, and the parser
|
---|
278 | * is configured to handle that sequence, then the remainder of the line
|
---|
279 | * is skipped and another token is read exactly as if a character with
|
---|
280 | * the comment attribute was encountered.
|
---|
281 | * <li>If the C comment sequence "/*" is encountered, and the parser
|
---|
282 | * is configured to handle that sequence, then all characters up to and
|
---|
283 | * including the comment terminator sequence are discarded and another
|
---|
284 | * token is parsed.
|
---|
285 | * <li>If all cases above are not met, then the character is an ordinary
|
---|
286 | * character that is parsed as a token by itself. The char encountered
|
---|
287 | * is returned as the token type.
|
---|
288 | * </ul>
|
---|
289 | *
|
---|
290 | * @return The token type
|
---|
291 | * @exception IOException If an I/O error occurs
|
---|
292 | */
|
---|
293 | public int nextToken() throws IOException
|
---|
294 | {
|
---|
295 | if (pushedBack)
|
---|
296 | {
|
---|
297 | pushedBack = false;
|
---|
298 | if (ttype != TT_NONE)
|
---|
299 | return ttype;
|
---|
300 | }
|
---|
301 |
|
---|
302 | sval = null;
|
---|
303 | int ch;
|
---|
304 |
|
---|
305 | // Skip whitespace. Deal with EOL along the way.
|
---|
306 | while (isWhitespace(ch = in.read()))
|
---|
307 | if (ch == '\n' || ch == '\r')
|
---|
308 | {
|
---|
309 | lineNumber++;
|
---|
310 |
|
---|
311 | // Throw away \n if in combination with \r.
|
---|
312 | if (ch == '\r' && (ch = in.read()) != '\n')
|
---|
313 | {
|
---|
314 | if (ch != TT_EOF)
|
---|
315 | in.unread(ch);
|
---|
316 | }
|
---|
317 | if (eolSignificant)
|
---|
318 | return (ttype = TT_EOL);
|
---|
319 | }
|
---|
320 |
|
---|
321 | if (ch == '/')
|
---|
322 | if ((ch = in.read()) == '/' && slashSlash)
|
---|
323 | {
|
---|
324 | while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF)
|
---|
325 | ;
|
---|
326 | if (ch != TT_EOF)
|
---|
327 | in.unread(ch);
|
---|
328 | return nextToken(); // Recursive, but not too deep in normal cases
|
---|
329 | }
|
---|
330 | else if (ch == '*' && slashStar)
|
---|
331 | {
|
---|
332 | while (true)
|
---|
333 | {
|
---|
334 | ch = in.read();
|
---|
335 | if (ch == '*')
|
---|
336 | {
|
---|
337 | if ((ch = in.read()) == '/')
|
---|
338 | break;
|
---|
339 | else if (ch != TT_EOF)
|
---|
340 | in.unread(ch);
|
---|
341 | }
|
---|
342 | else if (ch == '\n' || ch == '\r')
|
---|
343 | {
|
---|
344 | lineNumber++;
|
---|
345 | if (ch == '\r' && (ch = in.read()) != '\n')
|
---|
346 | {
|
---|
347 | if (ch != TT_EOF)
|
---|
348 | in.unread(ch);
|
---|
349 | }
|
---|
350 | }
|
---|
351 | else if (ch == TT_EOF)
|
---|
352 | {
|
---|
353 | break;
|
---|
354 | }
|
---|
355 | }
|
---|
356 | return nextToken(); // Recursive, but not too deep in normal cases
|
---|
357 | }
|
---|
358 | else
|
---|
359 | {
|
---|
360 | if (ch != TT_EOF)
|
---|
361 | in.unread(ch);
|
---|
362 | ch = '/';
|
---|
363 | }
|
---|
364 |
|
---|
365 | if (ch == TT_EOF)
|
---|
366 | ttype = TT_EOF;
|
---|
367 | else if (isNumeric(ch))
|
---|
368 | {
|
---|
369 | boolean isNegative = false;
|
---|
370 | if (ch == '-')
|
---|
371 | {
|
---|
372 | // Read ahead to see if this is an ordinary '-' rather than numeric.
|
---|
373 | ch = in.read();
|
---|
374 | if (isNumeric(ch) && ch != '-')
|
---|
375 | {
|
---|
376 | isNegative = true;
|
---|
377 | }
|
---|
378 | else
|
---|
379 | {
|
---|
380 | if (ch != TT_EOF)
|
---|
381 | in.unread(ch);
|
---|
382 | return (ttype = '-');
|
---|
383 | }
|
---|
384 | }
|
---|
385 |
|
---|
386 | StringBuffer tokbuf = new StringBuffer();
|
---|
387 | tokbuf.append((char) ch);
|
---|
388 |
|
---|
389 | int decCount = 0;
|
---|
390 | while (isNumeric(ch = in.read()) && ch != '-')
|
---|
391 | if (ch == '.' && decCount++ > 0)
|
---|
392 | break;
|
---|
393 | else
|
---|
394 | tokbuf.append((char) ch);
|
---|
395 |
|
---|
396 | if (ch != TT_EOF)
|
---|
397 | in.unread(ch);
|
---|
398 | ttype = TT_NUMBER;
|
---|
399 | try
|
---|
400 | {
|
---|
401 | nval = Double.valueOf(tokbuf.toString()).doubleValue();
|
---|
402 | }
|
---|
403 | catch (NumberFormatException _)
|
---|
404 | {
|
---|
405 | nval = 0.0;
|
---|
406 | }
|
---|
407 | if (isNegative)
|
---|
408 | nval = -nval;
|
---|
409 | }
|
---|
410 | else if (isAlphabetic(ch))
|
---|
411 | {
|
---|
412 | StringBuffer tokbuf = new StringBuffer();
|
---|
413 | tokbuf.append((char) ch);
|
---|
414 | while (isAlphabetic(ch = in.read()) || isNumeric(ch))
|
---|
415 | tokbuf.append((char) ch);
|
---|
416 | if (ch != TT_EOF)
|
---|
417 | in.unread(ch);
|
---|
418 | ttype = TT_WORD;
|
---|
419 | sval = tokbuf.toString();
|
---|
420 | if (lowerCase)
|
---|
421 | sval = sval.toLowerCase();
|
---|
422 | }
|
---|
423 | else if (isComment(ch))
|
---|
424 | {
|
---|
425 | while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF)
|
---|
426 | ;
|
---|
427 | if (ch != TT_EOF)
|
---|
428 | in.unread(ch);
|
---|
429 | return nextToken(); // Recursive, but not too deep in normal cases.
|
---|
430 | }
|
---|
431 | else if (isQuote(ch))
|
---|
432 | {
|
---|
433 | ttype = ch;
|
---|
434 | StringBuffer tokbuf = new StringBuffer();
|
---|
435 | while ((ch = in.read()) != ttype && ch != '\n' && ch != '\r' &&
|
---|
436 | ch != TT_EOF)
|
---|
437 | {
|
---|
438 | if (ch == '\\')
|
---|
439 | switch (ch = in.read())
|
---|
440 | {
|
---|
441 | case 'a': ch = 0x7;
|
---|
442 | break;
|
---|
443 | case 'b': ch = '\b';
|
---|
444 | break;
|
---|
445 | case 'f': ch = 0xC;
|
---|
446 | break;
|
---|
447 | case 'n': ch = '\n';
|
---|
448 | break;
|
---|
449 | case 'r': ch = '\r';
|
---|
450 | break;
|
---|
451 | case 't': ch = '\t';
|
---|
452 | break;
|
---|
453 | case 'v': ch = 0xB;
|
---|
454 | break;
|
---|
455 | case '\n': ch = '\n';
|
---|
456 | break;
|
---|
457 | case '\r': ch = '\r';
|
---|
458 | break;
|
---|
459 | case '\"':
|
---|
460 | case '\'':
|
---|
461 | case '\\':
|
---|
462 | break;
|
---|
463 | default:
|
---|
464 | int ch1, nextch;
|
---|
465 | if ((nextch = ch1 = ch) >= '0' && ch <= '7')
|
---|
466 | {
|
---|
467 | ch -= '0';
|
---|
468 | if ((nextch = in.read()) >= '0' && nextch <= '7')
|
---|
469 | {
|
---|
470 | ch = ch * 8 + nextch - '0';
|
---|
471 | if ((nextch = in.read()) >= '0' && nextch <= '7' &&
|
---|
472 | ch1 >= '0' && ch1 <= '3')
|
---|
473 | {
|
---|
474 | ch = ch * 8 + nextch - '0';
|
---|
475 | nextch = in.read();
|
---|
476 | }
|
---|
477 | }
|
---|
478 | }
|
---|
479 |
|
---|
480 | if (nextch != TT_EOF)
|
---|
481 | in.unread(nextch);
|
---|
482 | }
|
---|
483 |
|
---|
484 | tokbuf.append((char) ch);
|
---|
485 | }
|
---|
486 |
|
---|
487 | // Throw away matching quote char.
|
---|
488 | if (ch != ttype && ch != TT_EOF)
|
---|
489 | in.unread(ch);
|
---|
490 |
|
---|
491 | sval = tokbuf.toString();
|
---|
492 | }
|
---|
493 | else
|
---|
494 | {
|
---|
495 | ttype = ch;
|
---|
496 | }
|
---|
497 |
|
---|
498 | return ttype;
|
---|
499 | }
|
---|
500 |
|
---|
501 | private void resetChar(int ch)
|
---|
502 | {
|
---|
503 | whitespace[ch] = alphabetic[ch] = numeric[ch] = quote[ch] = comment[ch] =
|
---|
504 | false;
|
---|
505 | }
|
---|
506 |
|
---|
507 | /**
|
---|
508 | * This method makes the specified character an ordinary character. This
|
---|
509 | * means that none of the attributes (whitespace, alphabetic, numeric,
|
---|
510 | * quote, or comment) will be set on this character. This character will
|
---|
511 | * parse as its own token.
|
---|
512 | *
|
---|
513 | * @param c The character to make ordinary, passed as an int
|
---|
514 | */
|
---|
515 | public void ordinaryChar(int ch)
|
---|
516 | {
|
---|
517 | if (ch >= 0 && ch <= 255)
|
---|
518 | resetChar(ch);
|
---|
519 | }
|
---|
520 |
|
---|
521 | /**
|
---|
522 | * This method makes all the characters in the specified range, range
|
---|
523 | * terminators included, ordinary. This means the none of the attributes
|
---|
524 | * (whitespace, alphabetic, numeric, quote, or comment) will be set on
|
---|
525 | * any of the characters in the range. This makes each character in this
|
---|
526 | * range parse as its own token.
|
---|
527 | *
|
---|
528 | * @param low The low end of the range of values to set the whitespace
|
---|
529 | * attribute for
|
---|
530 | * @param high The high end of the range of values to set the whitespace
|
---|
531 | * attribute for
|
---|
532 | */
|
---|
533 | public void ordinaryChars(int low, int hi)
|
---|
534 | {
|
---|
535 | if (low < 0)
|
---|
536 | low = 0;
|
---|
537 | if (hi > 255)
|
---|
538 | hi = 255;
|
---|
539 | for (int i = low; i <= hi; i++)
|
---|
540 | resetChar(i);
|
---|
541 | }
|
---|
542 |
|
---|
543 | /**
|
---|
544 | * This method sets the numeric attribute on the characters '0' - '9' and
|
---|
545 | * the characters '.' and '-'.
|
---|
546 | */
|
---|
547 | public void parseNumbers()
|
---|
548 | {
|
---|
549 | for (int i = 0; i <= 9; i++)
|
---|
550 | numeric['0' + i] = true;
|
---|
551 |
|
---|
552 | numeric['.'] = true;
|
---|
553 | numeric['-'] = true;
|
---|
554 | }
|
---|
555 |
|
---|
556 | /**
|
---|
557 | * Puts the current token back into the StreamTokenizer so
|
---|
558 | * <code>nextToken</code> will return the same value on the next call.
|
---|
559 | * May cause the lineno method to return an incorrect value
|
---|
560 | * if lineno is called before the next call to nextToken.
|
---|
561 | */
|
---|
562 | public void pushBack()
|
---|
563 | {
|
---|
564 | pushedBack = true;
|
---|
565 | }
|
---|
566 |
|
---|
567 | /**
|
---|
568 | * This method sets the quote attribute on the specified character.
|
---|
569 | *
|
---|
570 | * @param c The character to set the quote attribute for, passed as an int.
|
---|
571 | */
|
---|
572 | public void quoteChar(int ch)
|
---|
573 | {
|
---|
574 | if (ch >= 0 && ch <= 255)
|
---|
575 | quote[ch] = true;
|
---|
576 | }
|
---|
577 |
|
---|
578 | /**
|
---|
579 | * This method removes all attributes (whitespace, alphabetic, numeric,
|
---|
580 | * quote, and comment) from all characters. It is equivalent to calling
|
---|
581 | * <code>ordinaryChars(0x00, 0xFF)</code>.
|
---|
582 | *
|
---|
583 | * @see ordinaryChars
|
---|
584 | */
|
---|
585 | public void resetSyntax()
|
---|
586 | {
|
---|
587 | ordinaryChars(0x00, 0xFF);
|
---|
588 | }
|
---|
589 |
|
---|
590 | /**
|
---|
591 | * This method sets a flag that indicates whether or not "C++" language style
|
---|
592 | * comments ("//" comments through EOL ) are handled by the parser.
|
---|
593 | * If this is <code>true</code> commented out sequences are skipped and
|
---|
594 | * ignored by the parser. This defaults to <code>false</code>.
|
---|
595 | *
|
---|
596 | * @param flag <code>true</code> to recognized and handle "C++" style
|
---|
597 | * comments, <code>false</code> otherwise
|
---|
598 | */
|
---|
599 | public void slashSlashComments(boolean flag)
|
---|
600 | {
|
---|
601 | slashSlash = flag;
|
---|
602 | }
|
---|
603 |
|
---|
604 | /**
|
---|
605 | * This method sets a flag that indicates whether or not "C" language style
|
---|
606 | * comments (with nesting not allowed) are handled by the parser.
|
---|
607 | * If this is <code>true</code> commented out sequences are skipped and
|
---|
608 | * ignored by the parser. This defaults to <code>false</code>.
|
---|
609 | *
|
---|
610 | * @param flag <code>true</code> to recognized and handle "C" style comments,
|
---|
611 | * <code>false</code> otherwise
|
---|
612 | */
|
---|
613 | public void slashStarComments(boolean flag)
|
---|
614 | {
|
---|
615 | slashStar = flag;
|
---|
616 | }
|
---|
617 |
|
---|
618 | /**
|
---|
619 | * This method returns the current token value as a <code>String</code> in
|
---|
620 | * the form "Token[x], line n", where 'n' is the current line numbers and
|
---|
621 | * 'x' is determined as follows.
|
---|
622 | * <p>
|
---|
623 | * <ul>
|
---|
624 | * <li>If no token has been read, then 'x' is "NOTHING" and 'n' is 0
|
---|
625 | * <li>If <code>ttype</code> is TT_EOF, then 'x' is "EOF"
|
---|
626 | * <li>If <code>ttype</code> is TT_EOL, then 'x' is "EOL"
|
---|
627 | * <li>If <code>ttype</code> is TT_WORD, then 'x' is <code>sval</code>
|
---|
628 | * <li>If <code>ttype</code> is TT_NUMBER, then 'x' is "n=strnval" where
|
---|
629 | * 'strnval' is <code>String.valueOf(nval)</code>.
|
---|
630 | * <li>If <code>ttype</code> is a quote character, then 'x' is
|
---|
631 | * <code>sval</code>
|
---|
632 | * <li>For all other cases, 'x' is <code>ttype</code>
|
---|
633 | * </ul>
|
---|
634 | */
|
---|
635 | public String toString()
|
---|
636 | {
|
---|
637 | String tempstr;
|
---|
638 | if (ttype == TT_EOF)
|
---|
639 | tempstr = "EOF";
|
---|
640 | else if (ttype == TT_EOL)
|
---|
641 | tempstr = "EOL";
|
---|
642 | else if (ttype == TT_WORD)
|
---|
643 | tempstr = sval;
|
---|
644 | else if (ttype == TT_NUMBER)
|
---|
645 | tempstr = "n=" + nval;
|
---|
646 | else if (ttype == TT_NONE)
|
---|
647 | tempstr = "NOTHING";
|
---|
648 | else // must be an ordinary char.
|
---|
649 | tempstr = "\'" + (char) ttype + "\'";
|
---|
650 |
|
---|
651 | return "Token[" + tempstr + "], line " + lineno();
|
---|
652 | }
|
---|
653 |
|
---|
654 | /**
|
---|
655 | * This method sets the whitespace attribute for all characters in the
|
---|
656 | * specified range, range terminators included.
|
---|
657 | *
|
---|
658 | * @param low The low end of the range of values to set the whitespace
|
---|
659 | * attribute for
|
---|
660 | * @param high The high end of the range of values to set the whitespace
|
---|
661 | * attribute for
|
---|
662 | */
|
---|
663 | public void whitespaceChars(int low, int hi)
|
---|
664 | {
|
---|
665 | if (low < 0)
|
---|
666 | low = 0;
|
---|
667 | if (hi > 255)
|
---|
668 | hi = 255;
|
---|
669 | for (int i = low; i <= hi; i++)
|
---|
670 | {
|
---|
671 | resetChar(i);
|
---|
672 | whitespace[i] = true;
|
---|
673 | }
|
---|
674 | }
|
---|
675 |
|
---|
676 | /**
|
---|
677 | * This method sets the alphabetic attribute for all characters in the
|
---|
678 | * specified range, range terminators included.
|
---|
679 | *
|
---|
680 | * @param low The low end of the range of values to set the alphabetic
|
---|
681 | * attribute for
|
---|
682 | * @param high The high end of the range of values to set the alphabetic
|
---|
683 | * attribute for
|
---|
684 | */
|
---|
685 | public void wordChars(int low, int hi)
|
---|
686 | {
|
---|
687 | if (low < 0)
|
---|
688 | low = 0;
|
---|
689 | if (hi > 255)
|
---|
690 | hi = 255;
|
---|
691 | for (int i = low; i <= hi; i++)
|
---|
692 | alphabetic[i] = true;
|
---|
693 | }
|
---|
694 | }
|
---|