source: trunk/tools/qdoc3/tokenizer.cpp@ 553

Last change on this file since 553 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 23.8 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information (qt-info@nokia.com)
5**
6** This file is part of the tools applications of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at qt-sales@nokia.com.
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "config.h"
43#include "tokenizer.h"
44
45#include <qdebug.h>
46#include <qfile.h>
47#include <qhash.h>
48#include <qregexp.h>
49#include <qstring.h>
50
51#include <ctype.h>
52#include <string.h>
53
54QT_BEGIN_NAMESPACE
55
56#define LANGUAGE_CPP "Cpp"
57
58/* qmake ignore Q_OBJECT */
59
60/*
61 Keep in sync with tokenizer.h.
62*/
63static const char *kwords[] = {
64 "char", "class", "const", "double", "enum", "explicit",
65 "friend", "inline", "int", "long", "namespace", "operator",
66 "private", "protected", "public", "short", "signals", "signed",
67 "slots", "static", "struct", "template", "typedef", "typename",
68 "union", "unsigned", "using", "virtual", "void", "volatile",
69 "__int64", "Q_OBJECT", "Q_OVERRIDE", "Q_PROPERTY",
70 "Q_DECLARE_SEQUENTIAL_ITERATOR",
71 "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
72 "Q_DECLARE_ASSOCIATIVE_ITERATOR",
73 "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
74 "Q_DECLARE_FLAGS",
75 "Q_SIGNALS",
76 "Q_SLOTS",
77 "QT_COMPAT",
78 "QT_COMPAT_CONSTRUCTOR",
79 "QT_DEPRECATED",
80 "QT_MOC_COMPAT",
81 "QT_MODULE",
82 "QT3_SUPPORT",
83 "QT3_SUPPORT_CONSTRUCTOR",
84 "QT3_MOC_SUPPORT",
85 "QDOC_PROPERTY"
86};
87
88static const int KwordHashTableSize = 4096;
89static int kwordHashTable[KwordHashTableSize];
90
91static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0;
92
93static QRegExp *comment = 0;
94static QRegExp *versionX = 0;
95static QRegExp *definedX = 0;
96
97static QRegExp *defines = 0;
98static QRegExp *falsehoods = 0;
99
100/*
101 This function is a perfect hash function for the 37 keywords of C99
102 (with a hash table size of 512). It should perform well on our
103 Qt-enhanced C++ subset.
104*/
105static int hashKword(const char *s, int len)
106{
107 return (((uchar) s[0]) + (((uchar) s[2]) << 5) +
108 (((uchar) s[len - 1]) << 3)) % KwordHashTableSize;
109}
110
111static void insertKwordIntoHash(const char *s, int number)
112{
113 int k = hashKword(s, strlen(s));
114 while (kwordHashTable[k]) {
115 if (++k == KwordHashTableSize)
116 k = 0;
117 }
118 kwordHashTable[k] = number;
119}
120
121Tokenizer::Tokenizer(const Location& loc, FILE *in)
122{
123 init();
124 QFile file;
125 file.open(in, QIODevice::ReadOnly);
126 yyIn = file.readAll();
127 file.close();
128 yyPos = 0;
129 start(loc);
130}
131
132Tokenizer::Tokenizer(const Location& loc, const QByteArray &in)
133 : yyIn(in)
134{
135 init();
136 yyPos = 0;
137 start(loc);
138}
139
140Tokenizer::~Tokenizer()
141{
142 delete[] yyLexBuf1;
143 delete[] yyLexBuf2;
144}
145
146int Tokenizer::getToken()
147{
148 char *t = yyPrevLex;
149 yyPrevLex = yyLex;
150 yyLex = t;
151
152 while (yyCh != EOF) {
153 yyTokLoc = yyCurLoc;
154 yyLexLen = 0;
155
156 if (isspace(yyCh)) {
157 do {
158 yyCh = getChar();
159 } while (isspace(yyCh));
160 }
161 else if (isalpha(yyCh) || yyCh == '_') {
162 do {
163 yyCh = getChar();
164 } while (isalnum(yyCh) || yyCh == '_');
165
166 int k = hashKword(yyLex, yyLexLen);
167 for (;;) {
168 int i = kwordHashTable[k];
169 if (i == 0) {
170 return Tok_Ident;
171 }
172 else if (i == -1) {
173 if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
174 if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
175 int parenDepth = 0;
176 while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
177 if (yyCh == '(')
178 ++parenDepth;
179 else if (yyCh == ')')
180 --parenDepth;
181 yyCh = getChar();
182 }
183 if (yyCh == ')')
184 yyCh = getChar();
185 }
186 break;
187 }
188 }
189 else if (strcmp(yyLex, kwords[i - 1]) == 0) {
190 int ret = (int) Tok_FirstKeyword + i - 1;
191 if (ret != Tok_explicit && ret != Tok_inline && ret != Tok_typename)
192 return ret;
193 break;
194 }
195
196 if (++k == KwordHashTableSize)
197 k = 0;
198 }
199 }
200 else if (isdigit(yyCh)) {
201 do {
202 yyCh = getChar();
203 } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
204 yyCh == '-');
205 return Tok_Number;
206 }
207 else {
208 switch (yyCh) {
209 case '!':
210 case '%':
211 yyCh = getChar();
212 if (yyCh == '=')
213 yyCh = getChar();
214 return Tok_SomeOperator;
215 case '"':
216 yyCh = getChar();
217
218 while (yyCh != EOF && yyCh != '"') {
219 if (yyCh == '\\')
220 yyCh = getChar();
221 yyCh = getChar();
222 }
223 yyCh = getChar();
224
225 if (yyCh == EOF)
226 yyTokLoc.warning(tr("Unterminated C++ string literal"),
227 tr("Maybe you forgot '/*!' at the beginning of the file?"));
228 else
229 return Tok_String;
230 break;
231 case '#':
232 return getTokenAfterPreprocessor();
233 case '&':
234 yyCh = getChar();
235 if (yyCh == '&' || yyCh == '=') {
236 yyCh = getChar();
237 return Tok_SomeOperator;
238 }
239 else {
240 return Tok_Ampersand;
241 }
242 case '\'':
243 yyCh = getChar();
244 if (yyCh == '\\')
245 yyCh = getChar();
246 do {
247 yyCh = getChar();
248 } while (yyCh != EOF && yyCh != '\'');
249
250 if (yyCh == EOF) {
251 yyTokLoc.warning(tr("Unterminated C++ character"
252 " literal"));
253 }
254 else {
255 yyCh = getChar();
256 return Tok_Number;
257 }
258 break;
259 case '(':
260 yyCh = getChar();
261 if (yyNumPreprocessorSkipping == 0)
262 yyParenDepth++;
263 if (isspace(yyCh)) {
264 do {
265 yyCh = getChar();
266 } while (isspace(yyCh));
267 yyLexLen = 1;
268 yyLex[1] = '\0';
269 }
270 if (yyCh == '*') {
271 yyCh = getChar();
272 return Tok_LeftParenAster;
273 }
274 return Tok_LeftParen;
275 case ')':
276 yyCh = getChar();
277 if (yyNumPreprocessorSkipping == 0)
278 yyParenDepth--;
279 return Tok_RightParen;
280 case '*':
281 yyCh = getChar();
282 if (yyCh == '=') {
283 yyCh = getChar();
284 return Tok_SomeOperator;
285 } else {
286 return Tok_Aster;
287 }
288 case '^':
289 yyCh = getChar();
290 if (yyCh == '=') {
291 yyCh = getChar();
292 return Tok_SomeOperator;
293 } else {
294 return Tok_Caret;
295 }
296 case '+':
297 yyCh = getChar();
298 if (yyCh == '+' || yyCh == '=')
299 yyCh = getChar();
300 return Tok_SomeOperator;
301 case ',':
302 yyCh = getChar();
303 return Tok_Comma;
304 case '-':
305 yyCh = getChar();
306 if (yyCh == '-' || yyCh == '=') {
307 yyCh = getChar();
308 } else if (yyCh == '>') {
309 yyCh = getChar();
310 if (yyCh == '*')
311 yyCh = getChar();
312 }
313 return Tok_SomeOperator;
314 case '.':
315 yyCh = getChar();
316 if (yyCh == '*') {
317 yyCh = getChar();
318 } else if (yyCh == '.') {
319 do {
320 yyCh = getChar();
321 } while (yyCh == '.');
322 return Tok_Ellipsis;
323 } else if (isdigit(yyCh)) {
324 do {
325 yyCh = getChar();
326 } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
327 yyCh == '-');
328 return Tok_Number;
329 }
330 return Tok_SomeOperator;
331 case '/':
332 yyCh = getChar();
333 if (yyCh == '/') {
334 do {
335 yyCh = getChar();
336 } while (yyCh != EOF && yyCh != '\n');
337 } else if (yyCh == '*') {
338 bool metDoc = false; // empty doc is no doc
339 bool metSlashAsterBang = false;
340 bool metAster = false;
341 bool metAsterSlash = false;
342
343 yyCh = getChar();
344 if (yyCh == '!')
345 metSlashAsterBang = true;
346
347 while (!metAsterSlash) {
348 if (yyCh == EOF) {
349 yyTokLoc.warning(tr("Unterminated C++ comment"));
350 break;
351 } else {
352 if (yyCh == '*') {
353 metAster = true;
354 } else if (metAster && yyCh == '/') {
355 metAsterSlash = true;
356 } else {
357 metAster = false;
358 if (isgraph(yyCh))
359 metDoc = true;
360 }
361 }
362 yyCh = getChar();
363 }
364 if (metSlashAsterBang && metDoc)
365 return Tok_Doc;
366 else if (yyParenDepth > 0)
367 return Tok_Comment;
368 } else {
369 if (yyCh == '=')
370 yyCh = getChar();
371 return Tok_SomeOperator;
372 }
373 break;
374 case ':':
375 yyCh = getChar();
376 if (yyCh == ':') {
377 yyCh = getChar();
378 return Tok_Gulbrandsen;
379 } else {
380 return Tok_Colon;
381 }
382 case ';':
383 yyCh = getChar();
384 return Tok_Semicolon;
385 case '<':
386 yyCh = getChar();
387 if (yyCh == '<') {
388 yyCh = getChar();
389 if (yyCh == '=')
390 yyCh = getChar();
391 return Tok_SomeOperator;
392 } else if (yyCh == '=') {
393 yyCh = getChar();
394 return Tok_SomeOperator;
395 } else {
396 return Tok_LeftAngle;
397 }
398 case '=':
399 yyCh = getChar();
400 if (yyCh == '=') {
401 yyCh = getChar();
402 return Tok_SomeOperator;
403 } else {
404 return Tok_Equal;
405 }
406 case '>':
407 yyCh = getChar();
408 if (yyCh == '>') {
409 yyCh = getChar();
410 if (yyCh == '=')
411 yyCh = getChar();
412 return Tok_SomeOperator;
413 } else if (yyCh == '=') {
414 yyCh = getChar();
415 return Tok_SomeOperator;
416 } else {
417 return Tok_RightAngle;
418 }
419 case '?':
420 yyCh = getChar();
421 return Tok_SomeOperator;
422 case '[':
423 yyCh = getChar();
424 if (yyNumPreprocessorSkipping == 0)
425 yyBracketDepth++;
426 return Tok_LeftBracket;
427 case '\\':
428 yyCh = getChar();
429 yyCh = getChar(); // skip one character
430 break;
431 case ']':
432 yyCh = getChar();
433 if (yyNumPreprocessorSkipping == 0)
434 yyBracketDepth--;
435 return Tok_RightBracket;
436 case '{':
437 yyCh = getChar();
438 if (yyNumPreprocessorSkipping == 0)
439 yyBraceDepth++;
440 return Tok_LeftBrace;
441 case '}':
442 yyCh = getChar();
443 if (yyNumPreprocessorSkipping == 0)
444 yyBraceDepth--;
445 return Tok_RightBrace;
446 case '|':
447 yyCh = getChar();
448 if (yyCh == '|' || yyCh == '=')
449 yyCh = getChar();
450 return Tok_SomeOperator;
451 case '~':
452 yyCh = getChar();
453 return Tok_Tilde;
454 case '@':
455 yyCh = getChar();
456 return Tok_At;
457 default:
458 // ### We should really prevent qdoc from looking at snippet files rather than
459 // ### suppress warnings when reading them.
460 if (yyNumPreprocessorSkipping == 0 && !yyTokLoc.fileName().endsWith(".qdoc")) {
461 yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source")
462 .arg((uchar)yyCh, 1, 16));
463 }
464 yyCh = getChar();
465 }
466 }
467 }
468
469 if (yyPreprocessorSkipping.count() > 1) {
470 yyTokLoc.warning(tr("Expected #endif before end of file"));
471 // clear it out or we get an infinite loop!
472 while (!yyPreprocessorSkipping.isEmpty()) {
473 popSkipping();
474 }
475 }
476
477 strcpy(yyLex, "end-of-input");
478 yyLexLen = strlen(yyLex);
479 return Tok_Eoi;
480}
481
482void Tokenizer::initialize(const Config &config)
483{
484 QString versionSym = config.getString(CONFIG_VERSIONSYM);
485
486 comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
487 comment->setMinimal(true);
488 versionX = new QRegExp("$cannot possibly match^");
489 if (!versionSym.isEmpty())
490 versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
491 + ")[ \t]+\"([^\"]*)\"[ \t]*");
492 definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)");
493
494 QStringList d = config.getStringList(CONFIG_DEFINES);
495 d += "qdoc";
496 defines = new QRegExp(d.join("|"));
497 falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join("|"));
498
499 memset(kwordHashTable, 0, sizeof(kwordHashTable));
500 for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
501 insertKwordIntoHash(kwords[i], i + 1);
502
503 ignoredTokensAndDirectives = new QHash<QByteArray, bool>;
504
505 QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
506 foreach (const QString &t, tokens) {
507 const QByteArray tb = t.toAscii();
508 ignoredTokensAndDirectives->insert(tb, false);
509 insertKwordIntoHash(tb.data(), -1);
510 }
511
512 QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot
513 + CONFIG_IGNOREDIRECTIVES);
514 foreach (const QString &d, directives) {
515 const QByteArray db = d.toAscii();
516 ignoredTokensAndDirectives->insert(db, true);
517 insertKwordIntoHash(db.data(), -1);
518 }
519}
520
521void Tokenizer::terminate()
522{
523 delete comment;
524 comment = 0;
525 delete versionX;
526 versionX = 0;
527 delete definedX;
528 definedX = 0;
529 delete defines;
530 defines = 0;
531 delete falsehoods;
532 falsehoods = 0;
533 delete ignoredTokensAndDirectives;
534 ignoredTokensAndDirectives = 0;
535}
536
537void Tokenizer::init()
538{
539 yyLexBuf1 = new char[(int) yyLexBufSize];
540 yyLexBuf2 = new char[(int) yyLexBufSize];
541 yyPrevLex = yyLexBuf1;
542 yyPrevLex[0] = '\0';
543 yyLex = yyLexBuf2;
544 yyLex[0] = '\0';
545 yyLexLen = 0;
546 yyPreprocessorSkipping.push(false);
547 yyNumPreprocessorSkipping = 0;
548 yyBraceDepth = 0;
549 yyParenDepth = 0;
550 yyBracketDepth = 0;
551 yyCh = '\0';
552 parsingMacro = false;
553}
554
555void Tokenizer::start(const Location& loc)
556{
557 yyTokLoc = loc;
558 yyCurLoc = loc;
559 yyCurLoc.start();
560 strcpy(yyPrevLex, "beginning-of-input");
561 strcpy(yyLex, "beginning-of-input");
562 yyLexLen = strlen(yyLex);
563 yyBraceDepth = 0;
564 yyParenDepth = 0;
565 yyBracketDepth = 0;
566 yyCh = '\0';
567 yyCh = getChar();
568}
569
570/*
571 Returns the next token, if # was met. This function interprets the
572 preprocessor directive, skips over any #ifdef'd out tokens, and returns the
573 token after all of that.
574*/
575int Tokenizer::getTokenAfterPreprocessor()
576{
577 yyCh = getChar();
578 while (isspace(yyCh) && yyCh != '\n')
579 yyCh = getChar();
580
581 /*
582 #directive condition
583 */
584 QString directive;
585 QString condition;
586
587 while (isalpha(yyCh)) {
588 directive += QChar(yyCh);
589 yyCh = getChar();
590 }
591 if (!directive.isEmpty()) {
592 while (yyCh != EOF && yyCh != '\n') {
593 if (yyCh == '\\')
594 yyCh = getChar();
595 condition += yyCh;
596 yyCh = getChar();
597 }
598 condition.replace(*comment, "");
599 condition = condition.simplified();
600
601 /*
602 The #if, #ifdef, #ifndef, #elif, #else, and #endif
603 directives have an effect on the skipping stack. For
604 instance, if the code processed so far is
605
606 #if 1
607 #if 0
608 #if 1
609 // ...
610 #else
611
612 the skipping stack contains, from bottom to top, false true
613 true (assuming 0 is false and 1 is true). If at least one
614 entry of the stack is true, the tokens are skipped.
615
616 This mechanism is simple yet hard to understand.
617 */
618 if (directive[0] == QChar('i')) {
619 if (directive == QString("if"))
620 pushSkipping(!isTrue(condition));
621 else if (directive == QString("ifdef"))
622 pushSkipping(!defines->exactMatch(condition));
623 else if (directive == QString("ifndef"))
624 pushSkipping(defines->exactMatch(condition));
625 } else if (directive[0] == QChar('e')) {
626 if (directive == QString("elif")) {
627 bool old = popSkipping();
628 if (old)
629 pushSkipping(!isTrue(condition));
630 else
631 pushSkipping(true);
632 } else if (directive == QString("else")) {
633 pushSkipping(!popSkipping());
634 } else if (directive == QString("endif")) {
635 popSkipping();
636 }
637 } else if (directive == QString("define")) {
638 if (versionX->exactMatch(condition))
639 yyVersion = versionX->cap(1);
640 }
641 }
642
643 int tok;
644 do {
645 /*
646 We set yyLex now, and after getToken() this will be
647 yyPrevLex. This way, we skip over the preprocessor
648 directive.
649 */
650 qstrcpy(yyLex, yyPrevLex);
651
652 /*
653 If getToken() meets another #, it will call
654 getTokenAfterPreprocessor() once again, which could in turn
655 call getToken() again, etc. Unless there are 10,000 or so
656 preprocessor directives in a row, this shouldn't overflow
657 the stack.
658 */
659 tok = getToken();
660 } while (yyNumPreprocessorSkipping > 0);
661 return tok;
662}
663
664/*
665 Pushes a new skipping value onto the stack. This corresponds to entering a
666 new #if block.
667*/
668void Tokenizer::pushSkipping(bool skip)
669{
670 yyPreprocessorSkipping.push(skip);
671 if (skip)
672 yyNumPreprocessorSkipping++;
673}
674
675/*
676 Pops a skipping value from the stack. This corresponds to reaching a #endif.
677*/
678bool Tokenizer::popSkipping()
679{
680 if (yyPreprocessorSkipping.isEmpty()) {
681 yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
682 return true;
683 }
684
685 bool skip = yyPreprocessorSkipping.pop();
686 if (skip)
687 yyNumPreprocessorSkipping--;
688 return skip;
689}
690
691/*
692 Returns true if the condition evaluates as true, otherwise false. The
693 condition is represented by a string. Unsophisticated parsing techniques are
694 used. The preprocessing method could be named StriNg-Oriented PreProcessing,
695 as SNOBOL stands for StriNg-Oriented symBOlic Language.
696*/
697bool Tokenizer::isTrue(const QString &condition)
698{
699 int firstOr = -1;
700 int firstAnd = -1;
701 int parenDepth = 0;
702
703 /*
704 Find the first logical operator at top level, but be careful
705 about precedence. Examples:
706
707 X || Y // the or
708 X || Y || Z // the leftmost or
709 X || Y && Z // the or
710 X && Y || Z // the or
711 (X || Y) && Z // the and
712 */
713 for (int i = 0; i < (int) condition.length() - 1; i++) {
714 QChar ch = condition[i];
715 if (ch == QChar('(')) {
716 parenDepth++;
717 } else if (ch == QChar(')')) {
718 parenDepth--;
719 } else if (parenDepth == 0) {
720 if (condition[i + 1] == ch) {
721 if (ch == QChar('|')) {
722 firstOr = i;
723 break;
724 } else if (ch == QChar('&')) {
725 if (firstAnd == -1)
726 firstAnd = i;
727 }
728 }
729 }
730 }
731 if (firstOr != -1)
732 return isTrue(condition.left(firstOr)) ||
733 isTrue(condition.mid(firstOr + 2));
734 if (firstAnd != -1)
735 return isTrue(condition.left(firstAnd)) &&
736 isTrue(condition.mid(firstAnd + 2));
737
738 QString t = condition.simplified();
739 if (t.isEmpty())
740 return true;
741
742 if (t[0] == QChar('!'))
743 return !isTrue(t.mid(1));
744 if (t[0] == QChar('(') && t.right(1)[0] == QChar(')'))
745 return isTrue(t.mid(1, t.length() - 2));
746
747 if (definedX->exactMatch(t))
748 return defines->exactMatch(definedX->cap(1));
749 else
750 return !falsehoods->exactMatch(t);
751}
752
753QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.