source: trunk/src/script/parser/qscriptlexer.cpp@ 815

Last change on this file since 815 was 769, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.6.3 sources from branches/vendor/nokia/qt.

  • Property svn:eol-style set to native
File size: 40.6 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation (qt-info@nokia.com)
6**
7** This file is part of the QtScript module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL-ONLY$
10** GNU Lesser General Public License Usage
11** This file may be used under the terms of the GNU Lesser
12** General Public License version 2.1 as published by the Free Software
13** Foundation and appearing in the file LICENSE.LGPL included in the
14** packaging of this file. Please review the following information to
15** ensure the GNU Lesser General Public License version 2.1 requirements
16** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17**
18** If you have questions regarding the use of this file, please contact
19** Nokia at qt-info@nokia.com.
20** $QT_END_LICENSE$
21**
22****************************************************************************/
23
24#include "qscriptlexer_p.h"
25
26#include "qscriptgrammar_p.h"
27#include <ctype.h>
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31
32QT_BEGIN_NAMESPACE
33
34Q_DECL_IMPORT extern double qstrtod(const char *s00, char const **se, bool *ok);
35
36#define shiftWindowsLineBreak() \
37 do { \
38 if (((current == '\r') && (next1 == '\n')) \
39 || ((current == '\n') && (next1 == '\r'))) { \
40 shift(1); \
41 } \
42 } \
43 while (0)
44
45typedef double qsreal; // ###
46
47namespace QScript {
48extern qsreal integerFromString(const char *buf, int size, int radix);
49}
50
51QScript::Lexer::Lexer(QScriptEnginePrivate *eng)
52 : driver(eng),
53 yylineno(0),
54 size8(128), size16(128), restrKeyword(false),
55 stackToken(-1), pos(0),
56 code(0), length(0),
57 bol(true),
58 current(0), next1(0), next2(0), next3(0),
59 err(NoError),
60 check_reserved(true),
61 parenthesesState(IgnoreParentheses),
62 prohibitAutomaticSemicolon(false)
63{
64 // allocate space for read buffers
65 buffer8 = new char[size8];
66 buffer16 = new QChar[size16];
67 pattern = 0;
68 flags = 0;
69
70}
71
72QScript::Lexer::~Lexer()
73{
74 delete [] buffer8;
75 delete [] buffer16;
76}
77
78void QScript::Lexer::setCode(const QString &c, int lineno)
79{
80 errmsg = QString();
81 yylineno = lineno;
82 yycolumn = 1;
83 restrKeyword = false;
84 delimited = false;
85 stackToken = -1;
86 pos = 0;
87 code = c.unicode();
88 length = c.length();
89 bol = true;
90
91 // read first characters
92 current = (length > 0) ? code[0].unicode() : 0;
93 next1 = (length > 1) ? code[1].unicode() : 0;
94 next2 = (length > 2) ? code[2].unicode() : 0;
95 next3 = (length > 3) ? code[3].unicode() : 0;
96}
97
98void QScript::Lexer::shift(uint p)
99{
100 while (p--) {
101 ++pos;
102 ++yycolumn;
103 current = next1;
104 next1 = next2;
105 next2 = next3;
106 next3 = (pos + 3 < length) ? code[pos+3].unicode() : 0;
107 }
108}
109
110void QScript::Lexer::setDone(State s)
111{
112 state = s;
113 done = true;
114}
115
116int QScript::Lexer::findReservedWord(const QChar *c, int size) const
117{
118 switch (size) {
119 case 2: {
120 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('o'))
121 return QScriptGrammar::T_DO;
122 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('f'))
123 return QScriptGrammar::T_IF;
124 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n'))
125 return QScriptGrammar::T_IN;
126 } break;
127
128 case 3: {
129 if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('o') && c[2] == QLatin1Char('r'))
130 return QScriptGrammar::T_FOR;
131 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('e') && c[2] == QLatin1Char('w'))
132 return QScriptGrammar::T_NEW;
133 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r') && c[2] == QLatin1Char('y'))
134 return QScriptGrammar::T_TRY;
135 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('a') && c[2] == QLatin1Char('r'))
136 return QScriptGrammar::T_VAR;
137 else if (check_reserved) {
138 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n') && c[2] == QLatin1Char('t'))
139 return QScriptGrammar::T_RESERVED_WORD;
140 }
141 } break;
142
143 case 4: {
144 if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('a')
145 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('e'))
146 return QScriptGrammar::T_CASE;
147 else if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('l')
148 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('e'))
149 return QScriptGrammar::T_ELSE;
150 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
151 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('s'))
152 return QScriptGrammar::T_THIS;
153 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('o')
154 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('d'))
155 return QScriptGrammar::T_VOID;
156 else if (c[0] == QLatin1Char('w') && c[1] == QLatin1Char('i')
157 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('h'))
158 return QScriptGrammar::T_WITH;
159 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r')
160 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('e'))
161 return QScriptGrammar::T_TRUE;
162 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('u')
163 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('l'))
164 return QScriptGrammar::T_NULL;
165 else if (check_reserved) {
166 if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('n')
167 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('m'))
168 return QScriptGrammar::T_RESERVED_WORD;
169 else if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('y')
170 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e'))
171 return QScriptGrammar::T_RESERVED_WORD;
172 else if (c[0] == QLatin1Char('l') && c[1] == QLatin1Char('o')
173 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('g'))
174 return QScriptGrammar::T_RESERVED_WORD;
175 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('h')
176 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('r'))
177 return QScriptGrammar::T_RESERVED_WORD;
178 else if (c[0] == QLatin1Char('g') && c[1] == QLatin1Char('o')
179 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('o'))
180 return QScriptGrammar::T_RESERVED_WORD;
181 }
182 } break;
183
184 case 5: {
185 if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('r')
186 && c[2] == QLatin1Char('e') && c[3] == QLatin1Char('a')
187 && c[4] == QLatin1Char('k'))
188 return QScriptGrammar::T_BREAK;
189 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('a')
190 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('c')
191 && c[4] == QLatin1Char('h'))
192 return QScriptGrammar::T_CATCH;
193 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
194 && c[2] == QLatin1Char('r') && c[3] == QLatin1Char('o')
195 && c[4] == QLatin1Char('w'))
196 return QScriptGrammar::T_THROW;
197 else if (c[0] == QLatin1Char('w') && c[1] == QLatin1Char('h')
198 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('l')
199 && c[4] == QLatin1Char('e'))
200 return QScriptGrammar::T_WHILE;
201 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('o')
202 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('s')
203 && c[4] == QLatin1Char('t'))
204 return QScriptGrammar::T_CONST;
205 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('a')
206 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('s')
207 && c[4] == QLatin1Char('e'))
208 return QScriptGrammar::T_FALSE;
209 else if (check_reserved) {
210 if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('h')
211 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('r')
212 && c[4] == QLatin1Char('t'))
213 return QScriptGrammar::T_RESERVED_WORD;
214 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('u')
215 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('e')
216 && c[4] == QLatin1Char('r'))
217 return QScriptGrammar::T_RESERVED_WORD;
218 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('i')
219 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('a')
220 && c[4] == QLatin1Char('l'))
221 return QScriptGrammar::T_RESERVED_WORD;
222 else if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('l')
223 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('s')
224 && c[4] == QLatin1Char('s'))
225 return QScriptGrammar::T_RESERVED_WORD;
226 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('l')
227 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('a')
228 && c[4] == QLatin1Char('t'))
229 return QScriptGrammar::T_RESERVED_WORD;
230 }
231 } break;
232
233 case 6: {
234 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
235 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('e')
236 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('e'))
237 return QScriptGrammar::T_DELETE;
238 else if (c[0] == QLatin1Char('r') && c[1] == QLatin1Char('e')
239 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('u')
240 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('n'))
241 return QScriptGrammar::T_RETURN;
242 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('w')
243 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('t')
244 && c[4] == QLatin1Char('c') && c[5] == QLatin1Char('h'))
245 return QScriptGrammar::T_SWITCH;
246 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('y')
247 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('e')
248 && c[4] == QLatin1Char('o') && c[5] == QLatin1Char('f'))
249 return QScriptGrammar::T_TYPEOF;
250 else if (check_reserved) {
251 if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('x')
252 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('o')
253 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('t'))
254 return QScriptGrammar::T_RESERVED_WORD;
255 else if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('t')
256 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('t')
257 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('c'))
258 return QScriptGrammar::T_RESERVED_WORD;
259 else if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('o')
260 && c[2] == QLatin1Char('u') && c[3] == QLatin1Char('b')
261 && c[4] == QLatin1Char('l') && c[5] == QLatin1Char('e'))
262 return QScriptGrammar::T_RESERVED_WORD;
263 else if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('m')
264 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('o')
265 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('t'))
266 return QScriptGrammar::T_RESERVED_WORD;
267 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('u')
268 && c[2] == QLatin1Char('b') && c[3] == QLatin1Char('l')
269 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('c'))
270 return QScriptGrammar::T_RESERVED_WORD;
271 else if (c[0] == QLatin1Char('n') && c[1] == QLatin1Char('a')
272 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('i')
273 && c[4] == QLatin1Char('v') && c[5] == QLatin1Char('e'))
274 return QScriptGrammar::T_RESERVED_WORD;
275 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('h')
276 && c[2] == QLatin1Char('r') && c[3] == QLatin1Char('o')
277 && c[4] == QLatin1Char('w') && c[5] == QLatin1Char('s'))
278 return QScriptGrammar::T_RESERVED_WORD;
279 }
280 } break;
281
282 case 7: {
283 if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
284 && c[2] == QLatin1Char('f') && c[3] == QLatin1Char('a')
285 && c[4] == QLatin1Char('u') && c[5] == QLatin1Char('l')
286 && c[6] == QLatin1Char('t'))
287 return QScriptGrammar::T_DEFAULT;
288 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('i')
289 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('a')
290 && c[4] == QLatin1Char('l') && c[5] == QLatin1Char('l')
291 && c[6] == QLatin1Char('y'))
292 return QScriptGrammar::T_FINALLY;
293 else if (check_reserved) {
294 if (c[0] == QLatin1Char('b') && c[1] == QLatin1Char('o')
295 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('l')
296 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('a')
297 && c[6] == QLatin1Char('n'))
298 return QScriptGrammar::T_RESERVED_WORD;
299 else if (c[0] == QLatin1Char('e') && c[1] == QLatin1Char('x')
300 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e')
301 && c[4] == QLatin1Char('n') && c[5] == QLatin1Char('d')
302 && c[6] == QLatin1Char('s'))
303 return QScriptGrammar::T_RESERVED_WORD;
304 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('a')
305 && c[2] == QLatin1Char('c') && c[3] == QLatin1Char('k')
306 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('g')
307 && c[6] == QLatin1Char('e'))
308 return QScriptGrammar::T_RESERVED_WORD;
309 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('r')
310 && c[2] == QLatin1Char('i') && c[3] == QLatin1Char('v')
311 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('t')
312 && c[6] == QLatin1Char('e'))
313 return QScriptGrammar::T_RESERVED_WORD;
314 }
315 } break;
316
317 case 8: {
318 if (c[0] == QLatin1Char('c') && c[1] == QLatin1Char('o')
319 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('t')
320 && c[4] == QLatin1Char('i') && c[5] == QLatin1Char('n')
321 && c[6] == QLatin1Char('u') && c[7] == QLatin1Char('e'))
322 return QScriptGrammar::T_CONTINUE;
323 else if (c[0] == QLatin1Char('f') && c[1] == QLatin1Char('u')
324 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('c')
325 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('i')
326 && c[6] == QLatin1Char('o') && c[7] == QLatin1Char('n'))
327 return QScriptGrammar::T_FUNCTION;
328 else if (c[0] == QLatin1Char('d') && c[1] == QLatin1Char('e')
329 && c[2] == QLatin1Char('b') && c[3] == QLatin1Char('u')
330 && c[4] == QLatin1Char('g') && c[5] == QLatin1Char('g')
331 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('r'))
332 return QScriptGrammar::T_DEBUGGER;
333 else if (check_reserved) {
334 if (c[0] == QLatin1Char('a') && c[1] == QLatin1Char('b')
335 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('t')
336 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('a')
337 && c[6] == QLatin1Char('c') && c[7] == QLatin1Char('t'))
338 return QScriptGrammar::T_RESERVED_WORD;
339 else if (c[0] == QLatin1Char('v') && c[1] == QLatin1Char('o')
340 && c[2] == QLatin1Char('l') && c[3] == QLatin1Char('a')
341 && c[4] == QLatin1Char('t') && c[5] == QLatin1Char('i')
342 && c[6] == QLatin1Char('l') && c[7] == QLatin1Char('e'))
343 return QScriptGrammar::T_RESERVED_WORD;
344 }
345 } break;
346
347 case 9: {
348 if (check_reserved) {
349 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n')
350 && c[2] == QLatin1Char('t') && c[3] == QLatin1Char('e')
351 && c[4] == QLatin1Char('r') && c[5] == QLatin1Char('f')
352 && c[6] == QLatin1Char('a') && c[7] == QLatin1Char('c')
353 && c[8] == QLatin1Char('e'))
354 return QScriptGrammar::T_RESERVED_WORD;
355 else if (c[0] == QLatin1Char('t') && c[1] == QLatin1Char('r')
356 && c[2] == QLatin1Char('a') && c[3] == QLatin1Char('n')
357 && c[4] == QLatin1Char('s') && c[5] == QLatin1Char('i')
358 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('n')
359 && c[8] == QLatin1Char('t'))
360 return QScriptGrammar::T_RESERVED_WORD;
361 else if (c[0] == QLatin1Char('p') && c[1] == QLatin1Char('r')
362 && c[2] == QLatin1Char('o') && c[3] == QLatin1Char('t')
363 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('c')
364 && c[6] == QLatin1Char('t') && c[7] == QLatin1Char('e')
365 && c[8] == QLatin1Char('d'))
366 return QScriptGrammar::T_RESERVED_WORD;
367 }
368 } break;
369
370 case 10: {
371 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('n')
372 && c[2] == QLatin1Char('s') && c[3] == QLatin1Char('t')
373 && c[4] == QLatin1Char('a') && c[5] == QLatin1Char('n')
374 && c[6] == QLatin1Char('c') && c[7] == QLatin1Char('e')
375 && c[8] == QLatin1Char('o') && c[9] == QLatin1Char('f'))
376 return QScriptGrammar::T_INSTANCEOF;
377 else if (check_reserved) {
378 if (c[0] == QLatin1Char('i') && c[1] == QLatin1Char('m')
379 && c[2] == QLatin1Char('p') && c[3] == QLatin1Char('l')
380 && c[4] == QLatin1Char('e') && c[5] == QLatin1Char('m')
381 && c[6] == QLatin1Char('e') && c[7] == QLatin1Char('n')
382 && c[8] == QLatin1Char('t') && c[9] == QLatin1Char('s'))
383 return QScriptGrammar::T_RESERVED_WORD;
384 }
385 } break;
386
387 case 12: {
388 if (check_reserved) {
389 if (c[0] == QLatin1Char('s') && c[1] == QLatin1Char('y')
390 && c[2] == QLatin1Char('n') && c[3] == QLatin1Char('c')
391 && c[4] == QLatin1Char('h') && c[5] == QLatin1Char('r')
392 && c[6] == QLatin1Char('o') && c[7] == QLatin1Char('n')
393 && c[8] == QLatin1Char('i') && c[9] == QLatin1Char('z')
394 && c[10] == QLatin1Char('e') && c[11] == QLatin1Char('d'))
395 return QScriptGrammar::T_RESERVED_WORD;
396 }
397 } break;
398
399 } // switch
400
401 return -1;
402}
403
404int QScript::Lexer::lex()
405{
406 int token = 0;
407 state = Start;
408 ushort stringType = 0; // either single or double quotes
409 pos8 = pos16 = 0;
410 done = false;
411 terminator = false;
412
413 // did we push a token on the stack previously ?
414 // (after an automatic semicolon insertion)
415 if (stackToken >= 0) {
416 setDone(Other);
417 token = stackToken;
418 stackToken = -1;
419 }
420
421 while (!done) {
422 switch (state) {
423 case Start:
424 if (isWhiteSpace()) {
425 // do nothing
426 } else if (current == '/' && next1 == '/') {
427 recordStartPos();
428 shift(1);
429 state = InSingleLineComment;
430 } else if (current == '/' && next1 == '*') {
431 recordStartPos();
432 shift(1);
433 state = InMultiLineComment;
434 } else if (current == 0) {
435 syncProhibitAutomaticSemicolon();
436 if (!terminator && !delimited && !prohibitAutomaticSemicolon) {
437 // automatic semicolon insertion if program incomplete
438 token = QScriptGrammar::T_SEMICOLON;
439 stackToken = 0;
440 setDone(Other);
441 } else {
442 setDone(Eof);
443 }
444 } else if (isLineTerminator()) {
445 shiftWindowsLineBreak();
446 yylineno++;
447 yycolumn = 0;
448 bol = true;
449 terminator = true;
450 syncProhibitAutomaticSemicolon();
451 if (restrKeyword) {
452 token = QScriptGrammar::T_SEMICOLON;
453 setDone(Other);
454 }
455 } else if (current == '"' || current == '\'') {
456 recordStartPos();
457 state = InString;
458 stringType = current;
459 } else if (isIdentLetter(current)) {
460 recordStartPos();
461 record16(current);
462 state = InIdentifier;
463 } else if (current == '0') {
464 recordStartPos();
465 record8(current);
466 state = InNum0;
467 } else if (isDecimalDigit(current)) {
468 recordStartPos();
469 record8(current);
470 state = InNum;
471 } else if (current == '.' && isDecimalDigit(next1)) {
472 recordStartPos();
473 record8(current);
474 state = InDecimal;
475 } else {
476 recordStartPos();
477 token = matchPunctuator(current, next1, next2, next3);
478 if (token != -1) {
479 if (terminator && !delimited && !prohibitAutomaticSemicolon
480 && (token == QScriptGrammar::T_PLUS_PLUS
481 || token == QScriptGrammar::T_MINUS_MINUS)) {
482 // automatic semicolon insertion
483 stackToken = token;
484 token = QScriptGrammar::T_SEMICOLON;
485 }
486 setDone(Other);
487 }
488 else {
489 setDone(Bad);
490 err = IllegalCharacter;
491 errmsg = QLatin1String("Illegal character");
492 }
493 }
494 break;
495 case InString:
496 if (current == stringType) {
497 shift(1);
498 setDone(String);
499 } else if (current == 0 || isLineTerminator()) {
500 setDone(Bad);
501 err = UnclosedStringLiteral;
502 errmsg = QLatin1String("Unclosed string at end of line");
503 } else if (current == '\\') {
504 state = InEscapeSequence;
505 } else {
506 record16(current);
507 }
508 break;
509 // Escape Sequences inside of strings
510 case InEscapeSequence:
511 if (isOctalDigit(current)) {
512 if (current >= '0' && current <= '3' &&
513 isOctalDigit(next1) && isOctalDigit(next2)) {
514 record16(convertOctal(current, next1, next2));
515 shift(2);
516 state = InString;
517 } else if (isOctalDigit(current) &&
518 isOctalDigit(next1)) {
519 record16(convertOctal('0', current, next1));
520 shift(1);
521 state = InString;
522 } else if (isOctalDigit(current)) {
523 record16(convertOctal('0', '0', current));
524 state = InString;
525 } else {
526 setDone(Bad);
527 err = IllegalEscapeSequence;
528 errmsg = QLatin1String("Illegal escape squence");
529 }
530 } else if (current == 'x')
531 state = InHexEscape;
532 else if (current == 'u')
533 state = InUnicodeEscape;
534 else {
535 if (isLineTerminator()) {
536 shiftWindowsLineBreak();
537 yylineno++;
538 yycolumn = 0;
539 bol = true;
540 } else {
541 record16(singleEscape(current));
542 }
543 state = InString;
544 }
545 break;
546 case InHexEscape:
547 if (isHexDigit(current) && isHexDigit(next1)) {
548 state = InString;
549 record16(QLatin1Char(convertHex(current, next1)));
550 shift(1);
551 } else if (current == stringType) {
552 record16(QLatin1Char('x'));
553 shift(1);
554 setDone(String);
555 } else {
556 record16(QLatin1Char('x'));
557 record16(current);
558 state = InString;
559 }
560 break;
561 case InUnicodeEscape:
562 if (isHexDigit(current) && isHexDigit(next1) &&
563 isHexDigit(next2) && isHexDigit(next3)) {
564 record16(convertUnicode(current, next1, next2, next3));
565 shift(3);
566 state = InString;
567 } else if (current == stringType) {
568 record16(QLatin1Char('u'));
569 shift(1);
570 setDone(String);
571 } else {
572 setDone(Bad);
573 err = IllegalUnicodeEscapeSequence;
574 errmsg = QLatin1String("Illegal unicode escape sequence");
575 }
576 break;
577 case InSingleLineComment:
578 if (isLineTerminator()) {
579 shiftWindowsLineBreak();
580 yylineno++;
581 yycolumn = 0;
582 terminator = true;
583 bol = true;
584 if (restrKeyword) {
585 token = QScriptGrammar::T_SEMICOLON;
586 setDone(Other);
587 } else
588 state = Start;
589 } else if (current == 0) {
590 setDone(Eof);
591 }
592 break;
593 case InMultiLineComment:
594 if (current == 0) {
595 setDone(Bad);
596 err = UnclosedComment;
597 errmsg = QLatin1String("Unclosed comment at end of file");
598 } else if (isLineTerminator()) {
599 shiftWindowsLineBreak();
600 yylineno++;
601 } else if (current == '*' && next1 == '/') {
602 state = Start;
603 shift(1);
604 }
605 break;
606 case InIdentifier:
607 if (isIdentLetter(current) || isDecimalDigit(current)) {
608 record16(current);
609 break;
610 }
611 setDone(Identifier);
612 break;
613 case InNum0:
614 if (current == 'x' || current == 'X') {
615 record8(current);
616 state = InHex;
617 } else if (current == '.') {
618 record8(current);
619 state = InDecimal;
620 } else if (current == 'e' || current == 'E') {
621 record8(current);
622 state = InExponentIndicator;
623 } else if (isOctalDigit(current)) {
624 record8(current);
625 state = InOctal;
626 } else if (isDecimalDigit(current)) {
627 record8(current);
628 state = InDecimal;
629 } else {
630 setDone(Number);
631 }
632 break;
633 case InHex:
634 if (isHexDigit(current))
635 record8(current);
636 else
637 setDone(Hex);
638 break;
639 case InOctal:
640 if (isOctalDigit(current)) {
641 record8(current);
642 } else if (isDecimalDigit(current)) {
643 record8(current);
644 state = InDecimal;
645 } else {
646 setDone(Octal);
647 }
648 break;
649 case InNum:
650 if (isDecimalDigit(current)) {
651 record8(current);
652 } else if (current == '.') {
653 record8(current);
654 state = InDecimal;
655 } else if (current == 'e' || current == 'E') {
656 record8(current);
657 state = InExponentIndicator;
658 } else {
659 setDone(Number);
660 }
661 break;
662 case InDecimal:
663 if (isDecimalDigit(current)) {
664 record8(current);
665 } else if (current == 'e' || current == 'E') {
666 record8(current);
667 state = InExponentIndicator;
668 } else {
669 setDone(Number);
670 }
671 break;
672 case InExponentIndicator:
673 if (current == '+' || current == '-') {
674 record8(current);
675 } else if (isDecimalDigit(current)) {
676 record8(current);
677 state = InExponent;
678 } else {
679 setDone(Bad);
680 err = IllegalExponentIndicator;
681 errmsg = QLatin1String("Illegal syntax for exponential number");
682 }
683 break;
684 case InExponent:
685 if (isDecimalDigit(current)) {
686 record8(current);
687 } else {
688 setDone(Number);
689 }
690 break;
691 default:
692 Q_ASSERT_X(0, "Lexer::lex", "Unhandled state in switch statement");
693 }
694
695 // move on to the next character
696 if (!done)
697 shift(1);
698 if (state != Start && state != InSingleLineComment)
699 bol = false;
700 }
701
702 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
703 if ((state == Number || state == Octal || state == Hex)
704 && isIdentLetter(current)) {
705 state = Bad;
706 err = IllegalIdentifier;
707 errmsg = QLatin1String("Identifier cannot start with numeric literal");
708 }
709
710 // terminate string
711 buffer8[pos8] = '\0';
712
713 double dval = 0;
714 if (state == Number) {
715 dval = qstrtod(buffer8, 0, 0);
716 } else if (state == Hex) { // scan hex numbers
717 dval = QScript::integerFromString(buffer8, pos8, 16);
718 state = Number;
719 } else if (state == Octal) { // scan octal number
720 dval = QScript::integerFromString(buffer8, pos8, 8);
721 state = Number;
722 }
723
724 restrKeyword = false;
725 delimited = false;
726
727 switch (parenthesesState) {
728 case IgnoreParentheses:
729 break;
730 case CountParentheses:
731 if (token == QScriptGrammar::T_RPAREN) {
732 --parenthesesCount;
733 if (parenthesesCount == 0)
734 parenthesesState = BalancedParentheses;
735 } else if (token == QScriptGrammar::T_LPAREN) {
736 ++parenthesesCount;
737 }
738 break;
739 case BalancedParentheses:
740 parenthesesState = IgnoreParentheses;
741 break;
742 }
743
744 switch (state) {
745 case Eof:
746 return 0;
747 case Other:
748 if(token == QScriptGrammar::T_RBRACE || token == QScriptGrammar::T_SEMICOLON)
749 delimited = true;
750 return token;
751 case Identifier:
752 if ((token = findReservedWord(buffer16, pos16)) < 0) {
753 /* TODO: close leak on parse error. same holds true for String */
754 if (driver) {
755 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
756 qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
757 } else
758 qsyylval.ustr = 0;
759 return QScriptGrammar::T_IDENTIFIER;
760 }
761 if (token == QScriptGrammar::T_CONTINUE || token == QScriptGrammar::T_BREAK
762 || token == QScriptGrammar::T_RETURN || token == QScriptGrammar::T_THROW) {
763 restrKeyword = true;
764 } else if (token == QScriptGrammar::T_IF || token == QScriptGrammar::T_FOR
765 || token == QScriptGrammar::T_WHILE || token == QScriptGrammar::T_WITH) {
766 parenthesesState = CountParentheses;
767 parenthesesCount = 0;
768 } else if (token == QScriptGrammar::T_DO) {
769 parenthesesState = BalancedParentheses;
770 }
771 return token;
772 case String:
773 if (driver) {
774 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
775 qsyylval.ustr = 0; // driver->intern(buffer16, pos16);
776 } else
777 qsyylval.ustr = 0;
778 return QScriptGrammar::T_STRING_LITERAL;
779 case Number:
780 qsyylval.dval = dval;
781 return QScriptGrammar::T_NUMERIC_LITERAL;
782 case Bad:
783 return -1;
784 default:
785 Q_ASSERT(!"unhandled numeration value in switch");
786 return -1;
787 }
788}
789
790bool QScript::Lexer::isWhiteSpace() const
791{
792 return (current == ' ' || current == '\t' ||
793 current == 0x0b || current == 0x0c);
794}
795
796bool QScript::Lexer::isLineTerminator() const
797{
798 return (current == '\n' || current == '\r');
799}
800
801bool QScript::Lexer::isIdentLetter(ushort c)
802{
803 /* TODO: allow other legitimate unicode chars */
804 return ((c >= 'a' && c <= 'z')
805 || (c >= 'A' && c <= 'Z')
806 || c == '$'
807 || c == '_');
808}
809
810bool QScript::Lexer::isDecimalDigit(ushort c)
811{
812 return (c >= '0' && c <= '9');
813}
814
815bool QScript::Lexer::isHexDigit(ushort c) const
816{
817 return ((c >= '0' && c <= '9')
818 || (c >= 'a' && c <= 'f')
819 || (c >= 'A' && c <= 'F'));
820}
821
822bool QScript::Lexer::isOctalDigit(ushort c) const
823{
824 return (c >= '0' && c <= '7');
825}
826
827int QScript::Lexer::matchPunctuator(ushort c1, ushort c2,
828 ushort c3, ushort c4)
829{
830 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
831 shift(4);
832 return QScriptGrammar::T_GT_GT_GT_EQ;
833 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
834 shift(3);
835 return QScriptGrammar::T_EQ_EQ_EQ;
836 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
837 shift(3);
838 return QScriptGrammar::T_NOT_EQ_EQ;
839 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
840 shift(3);
841 return QScriptGrammar::T_GT_GT_GT;
842 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
843 shift(3);
844 return QScriptGrammar::T_LT_LT_EQ;
845 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
846 shift(3);
847 return QScriptGrammar::T_GT_GT_EQ;
848 } else if (c1 == '<' && c2 == '=') {
849 shift(2);
850 return QScriptGrammar::T_LE;
851 } else if (c1 == '>' && c2 == '=') {
852 shift(2);
853 return QScriptGrammar::T_GE;
854 } else if (c1 == '!' && c2 == '=') {
855 shift(2);
856 return QScriptGrammar::T_NOT_EQ;
857 } else if (c1 == '+' && c2 == '+') {
858 shift(2);
859 return QScriptGrammar::T_PLUS_PLUS;
860 } else if (c1 == '-' && c2 == '-') {
861 shift(2);
862 return QScriptGrammar::T_MINUS_MINUS;
863 } else if (c1 == '=' && c2 == '=') {
864 shift(2);
865 return QScriptGrammar::T_EQ_EQ;
866 } else if (c1 == '+' && c2 == '=') {
867 shift(2);
868 return QScriptGrammar::T_PLUS_EQ;
869 } else if (c1 == '-' && c2 == '=') {
870 shift(2);
871 return QScriptGrammar::T_MINUS_EQ;
872 } else if (c1 == '*' && c2 == '=') {
873 shift(2);
874 return QScriptGrammar::T_STAR_EQ;
875 } else if (c1 == '/' && c2 == '=') {
876 shift(2);
877 return QScriptGrammar::T_DIVIDE_EQ;
878 } else if (c1 == '&' && c2 == '=') {
879 shift(2);
880 return QScriptGrammar::T_AND_EQ;
881 } else if (c1 == '^' && c2 == '=') {
882 shift(2);
883 return QScriptGrammar::T_XOR_EQ;
884 } else if (c1 == '%' && c2 == '=') {
885 shift(2);
886 return QScriptGrammar::T_REMAINDER_EQ;
887 } else if (c1 == '|' && c2 == '=') {
888 shift(2);
889 return QScriptGrammar::T_OR_EQ;
890 } else if (c1 == '<' && c2 == '<') {
891 shift(2);
892 return QScriptGrammar::T_LT_LT;
893 } else if (c1 == '>' && c2 == '>') {
894 shift(2);
895 return QScriptGrammar::T_GT_GT;
896 } else if (c1 == '&' && c2 == '&') {
897 shift(2);
898 return QScriptGrammar::T_AND_AND;
899 } else if (c1 == '|' && c2 == '|') {
900 shift(2);
901 return QScriptGrammar::T_OR_OR;
902 }
903
904 switch(c1) {
905 case '=': shift(1); return QScriptGrammar::T_EQ;
906 case '>': shift(1); return QScriptGrammar::T_GT;
907 case '<': shift(1); return QScriptGrammar::T_LT;
908 case ',': shift(1); return QScriptGrammar::T_COMMA;
909 case '!': shift(1); return QScriptGrammar::T_NOT;
910 case '~': shift(1); return QScriptGrammar::T_TILDE;
911 case '?': shift(1); return QScriptGrammar::T_QUESTION;
912 case ':': shift(1); return QScriptGrammar::T_COLON;
913 case '.': shift(1); return QScriptGrammar::T_DOT;
914 case '+': shift(1); return QScriptGrammar::T_PLUS;
915 case '-': shift(1); return QScriptGrammar::T_MINUS;
916 case '*': shift(1); return QScriptGrammar::T_STAR;
917 case '/': shift(1); return QScriptGrammar::T_DIVIDE_;
918 case '&': shift(1); return QScriptGrammar::T_AND;
919 case '|': shift(1); return QScriptGrammar::T_OR;
920 case '^': shift(1); return QScriptGrammar::T_XOR;
921 case '%': shift(1); return QScriptGrammar::T_REMAINDER;
922 case '(': shift(1); return QScriptGrammar::T_LPAREN;
923 case ')': shift(1); return QScriptGrammar::T_RPAREN;
924 case '{': shift(1); return QScriptGrammar::T_LBRACE;
925 case '}': shift(1); return QScriptGrammar::T_RBRACE;
926 case '[': shift(1); return QScriptGrammar::T_LBRACKET;
927 case ']': shift(1); return QScriptGrammar::T_RBRACKET;
928 case ';': shift(1); return QScriptGrammar::T_SEMICOLON;
929
930 default: return -1;
931 }
932}
933
934ushort QScript::Lexer::singleEscape(ushort c) const
935{
936 switch(c) {
937 case 'b':
938 return 0x08;
939 case 't':
940 return 0x09;
941 case 'n':
942 return 0x0A;
943 case 'v':
944 return 0x0B;
945 case 'f':
946 return 0x0C;
947 case 'r':
948 return 0x0D;
949 case '"':
950 return 0x22;
951 case '\'':
952 return 0x27;
953 case '\\':
954 return 0x5C;
955 default:
956 return c;
957 }
958}
959
960ushort QScript::Lexer::convertOctal(ushort c1, ushort c2,
961 ushort c3) const
962{
963 return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
964}
965
966unsigned char QScript::Lexer::convertHex(ushort c)
967{
968 if (c >= '0' && c <= '9')
969 return (c - '0');
970 else if (c >= 'a' && c <= 'f')
971 return (c - 'a' + 10);
972 else
973 return (c - 'A' + 10);
974}
975
976unsigned char QScript::Lexer::convertHex(ushort c1, ushort c2)
977{
978 return ((convertHex(c1) << 4) + convertHex(c2));
979}
980
981QChar QScript::Lexer::convertUnicode(ushort c1, ushort c2,
982 ushort c3, ushort c4)
983{
984 return QChar((convertHex(c3) << 4) + convertHex(c4),
985 (convertHex(c1) << 4) + convertHex(c2));
986}
987
988void QScript::Lexer::record8(ushort c)
989{
990 Q_ASSERT(c <= 0xff);
991
992 // enlarge buffer if full
993 if (pos8 >= size8 - 1) {
994 char *tmp = new char[2 * size8];
995 memcpy(tmp, buffer8, size8 * sizeof(char));
996 delete [] buffer8;
997 buffer8 = tmp;
998 size8 *= 2;
999 }
1000
1001 buffer8[pos8++] = (char) c;
1002}
1003
1004void QScript::Lexer::record16(QChar c)
1005{
1006 // enlarge buffer if full
1007 if (pos16 >= size16 - 1) {
1008 QChar *tmp = new QChar[2 * size16];
1009 memcpy(tmp, buffer16, size16 * sizeof(QChar));
1010 delete [] buffer16;
1011 buffer16 = tmp;
1012 size16 *= 2;
1013 }
1014
1015 buffer16[pos16++] = c;
1016}
1017
1018void QScript::Lexer::recordStartPos()
1019{
1020 startlineno = yylineno;
1021 startcolumn = yycolumn;
1022}
1023
1024bool QScript::Lexer::scanRegExp(RegExpBodyPrefix prefix)
1025{
1026 pos16 = 0;
1027 bool lastWasEscape = false;
1028
1029 if (prefix == EqualPrefix)
1030 record16(QLatin1Char('='));
1031
1032 while (1) {
1033 if (isLineTerminator() || current == 0) {
1034 errmsg = QLatin1String("Unterminated regular expression literal");
1035 return false;
1036 }
1037 else if (current != '/' || lastWasEscape == true)
1038 {
1039 record16(current);
1040 lastWasEscape = !lastWasEscape && (current == '\\');
1041 }
1042 else {
1043 if (driver) {
1044 Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented");
1045 pattern = 0; // driver->intern(buffer16, pos16);
1046 } else
1047 pattern = 0;
1048 pos16 = 0;
1049 shift(1);
1050 break;
1051 }
1052 shift(1);
1053 }
1054
1055 flags = 0;
1056 while (isIdentLetter(current)) {
1057 // current version was remade from this line:
1058 //int flag = QScript::Ecma::RegExp::flagFromChar(current);
1059 //code was "inlined" because it was only one call to this function
1060 int flag;
1061 switch (current) {
1062 case 'g': flag = 0x01; break;
1063 case 'm': flag = 0x02; break;
1064 case 'i': flag = 0x04; break;
1065 default: flag = 0;
1066 }
1067 if (flag == 0) {
1068 errmsg = QString::fromLatin1("Invalid regular expression flag '%0'")
1069 .arg(QChar(current));
1070 return false;
1071 }
1072 flags |= flag;
1073 record16(current);
1074 shift(1);
1075 }
1076
1077 return true;
1078}
1079
1080void QScript::Lexer::syncProhibitAutomaticSemicolon()
1081{
1082 if (parenthesesState == BalancedParentheses) {
1083 // we have seen something like "if (foo)", which means we should
1084 // never insert an automatic semicolon at this point, since it would
1085 // then be expanded into an empty statement (ECMA-262 7.9.1)
1086 prohibitAutomaticSemicolon = true;
1087 parenthesesState = IgnoreParentheses;
1088 } else {
1089 prohibitAutomaticSemicolon = false;
1090 }
1091}
1092
1093QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.