source: trunk/tools/designer/plugins/cppeditor/yyreg.cpp

Last change on this file was 197, checked in by rudi, 14 years ago

Added QtDesigner

File size: 19.5 KB
Line 
1/**********************************************************************
2**
3** Copyright (C) 2005-2007 Trolltech ASA. All rights reserved.
4**
5** This file is part of Qt Designer.
6**
7** This file may be distributed and/or modified under the terms of the
8** GNU General Public License version 2 as published by the Free Software
9** Foundation and appearing in the file LICENSE.GPL included in the
10** packaging of this file.
11**
12** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
13** licenses may use this file in accordance with the Qt Commercial License
14** Agreement provided with the Software.
15**
16** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
17** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18**
19** See http://www.trolltech.com/gpl/ for GPL licensing information.
20** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
21** information about Qt Commercial License Agreements.
22**
23** Contact info@trolltech.com if any conditions of this licensing are
24** not clear to you.
25**
26**********************************************************************/
27
28#include <qregexp.h>
29
30#include <ctype.h>
31#include <stdio.h>
32
33#include "yyreg.h"
34
35/*
36 First comes the tokenizer. We don't need something that knows much
37 about C++. However, we need something that gives tokens from the
38 end of the file to the start, which is tricky.
39
40 If you are not familiar with hand-written tokenizers and parsers,
41 you might want to read other simpler parsers written in the same
42 style:
43
44 $(QTDIR)/src/tools/qregexp.cpp
45 $(QTDIR)/tools/inspector/cppparser.cpp
46
47 You might also want to read Section 2 in the Dragon Book.
48*/
49
50/*
51 Those are the tokens we are interested in. Tok_Something represents
52 any C++ token that does not interest us, but it's dangerous to
53 ignore tokens completely.
54*/
55enum { Tok_Boi, Tok_Ampersand, Tok_Aster, Tok_LeftParen, Tok_RightParen,
56 Tok_Equal, Tok_LeftBrace, Tok_RightBrace, Tok_Semicolon, Tok_Colon,
57 Tok_LeftAngle, Tok_RightAngle, Tok_Comma, Tok_Ellipsis, Tok_Gulbrandsen,
58 Tok_LeftBracket, Tok_RightBracket, Tok_Tilde, Tok_Something, Tok_Comment,
59 Tok_Ident,
60
61 Tok_char, Tok_const, Tok_double, Tok_int, Tok_long, Tok_operator,
62 Tok_short, Tok_signed, Tok_unsigned };
63
64/*
65 The following variables store the lexical analyzer state. The best way
66 to understand them is to implement a function myGetToken() that calls
67 getToken(), to add some qDebug() statements in there and then to
68 #define getToken() myGetToken().
69*/
70static QString *yyIn; // the input stream
71static int yyPos; // the position of the current token in yyIn
72static int yyCurPos; // the position of the next lookahead character
73static char *yyLexBuf; // the lexeme buffer
74static const int YYLexBufSize = 65536; // big enough for long comments
75static char *yyLex; // the lexeme itself (a pointer into yyLexBuf)
76static int yyCh; // the lookbehind character
77
78/*
79 Moves back to the previous character in the input stream and
80 updates the tokenizer state. This function is to be used only by
81 getToken(), which provides the right abstraction.
82*/
83static inline void readChar()
84{
85 if ( yyCh == EOF )
86 return;
87
88 if ( yyLex > yyLexBuf )
89 *--yyLex = (char) yyCh;
90
91 if ( yyCurPos < 0 )
92 yyCh = EOF;
93 else
94 yyCh = (*yyIn)[yyCurPos].unicode();
95 yyCurPos--;
96}
97
98/*
99 Sets up the tokenizer.
100*/
101static void startTokenizer( const QString& in )
102{
103 yyIn = new QString;
104 *yyIn = in;
105 yyPos = yyIn->length() - 1;
106 yyCurPos = yyPos;
107 yyLexBuf = new char[YYLexBufSize];
108 yyLex = yyLexBuf + YYLexBufSize - 1;
109 *yyLex = '\0';
110 yyCh = '\0';
111 readChar();
112}
113
114/*
115 Frees resources allocated by the tokenizer.
116*/
117static void stopTokenizer()
118{
119 delete yyIn;
120 delete[] yyLexBuf;
121 yyLexBuf = 0;
122}
123
124/*
125 These two macros implement quick-and-dirty hashing for telling
126 apart keywords fast.
127*/
128#define HASH( ch, len ) ( (ch) | ((len) << 8) )
129#define CHECK( target ) \
130 if ( strcmp((target), yyLex) != 0 ) \
131 break;
132
133/*
134 Returns the previous token in the abstract token stream. The parser
135 deals only with tokens, not with characters.
136*/
137static int getToken()
138{
139 // why "+ 2"? try putting some qDebug()'s and see
140 yyPos = yyCurPos + 2;
141
142 for ( ;; ) {
143 /*
144 See if the previous token is interesting. If it isn't, we
145 will loop anyway an go to the token before the previous
146 token, and so on.
147 */
148
149 yyLex = yyLexBuf + YYLexBufSize - 1;
150 *yyLex = '\0';
151
152 if ( yyCh == EOF ) {
153 break;
154 } else if ( isspace(yyCh) ) {
155 bool metNL = FALSE;
156 do {
157 metNL = ( metNL || yyCh == '\n' );
158 readChar();
159 } while ( isspace(yyCh) );
160
161 if ( metNL ) {
162 /*
163 C++ style comments are tricky. In left-to-right
164 thinking, C++ comments start with "//" and end with
165 '\n'. In right-to-left thinking, they start with a
166 '\n'; but of course not every '\n' starts a comment.
167
168 When we meet the '\n', we look behind, on the same
169 line, for a "//", and if there is one we mess
170 around with the tokenizer state to effectively
171 ignore the comment. Beware of off-by-one and
172 off-by-two bugs when you modify this code by adding
173 qDebug()'s here and there.
174 */
175 if ( yyCurPos >= 0 ) {
176 int lineStart = yyIn->findRev( QChar('\n'), yyCurPos ) + 1;
177 QString line = yyIn->mid( lineStart,
178 yyCurPos - lineStart + 2 );
179 int commentStart = line.find( QString("//") );
180 if ( commentStart != -1 ) {
181 yyCurPos = lineStart + commentStart - 1;
182 yyPos = yyCurPos + 2;
183 readChar();
184 }
185 }
186 }
187 } else if ( isalnum(yyCh) || yyCh == '_' ) {
188 do {
189 readChar();
190 } while ( isalnum(yyCh) || yyCh == '_' );
191
192 switch ( HASH(yyLex[0], strlen(yyLex)) ) {
193 case HASH( 'c', 4 ):
194 CHECK( "char" );
195 return Tok_char;
196 case HASH( 'c', 5 ):
197 CHECK( "const" );
198 return Tok_const;
199 case HASH( 'd', 6 ):
200 CHECK( "double" );
201 return Tok_double;
202 case HASH( 'i', 3 ):
203 CHECK( "int" );
204 return Tok_int;
205 case HASH( 'l', 4 ):
206 CHECK( "long" );
207 return Tok_long;
208 case HASH( 'o', 8 ):
209 CHECK( "operator" );
210 return Tok_operator;
211 case HASH( 's', 5 ):
212 CHECK( "short" );
213 return Tok_short;
214 case HASH( 's', 6 ):
215 CHECK( "signed" );
216 return Tok_signed;
217 case HASH( 'u', 8 ):
218 CHECK( "unsigned" );
219 return Tok_unsigned;
220 }
221 if ( isdigit(*yyLex) )
222 return Tok_Something;
223 else
224 return Tok_Ident;
225 } else {
226 int quote;
227
228 switch ( yyCh ) {
229 case '!':
230 case '%':
231 case '^':
232 case '+':
233 case '-':
234 case '?':
235 case '|':
236 readChar();
237 return Tok_Something;
238 case '"':
239 case '\'':
240 quote = yyCh;
241 readChar();
242
243 while ( yyCh != EOF && yyCh != '\n' ) {
244 if ( yyCh == quote ) {
245 readChar();
246 if ( yyCh != '\\' )
247 break;
248 } else {
249 readChar();
250 }
251 }
252 return Tok_Something;
253 case '&':
254 readChar();
255 if ( yyCh == '&' ) {
256 readChar();
257 return Tok_Something;
258 } else {
259 return Tok_Ampersand;
260 }
261 case '(':
262 readChar();
263 return Tok_LeftParen;
264 case ')':
265 readChar();
266 return Tok_RightParen;
267 case '*':
268 readChar();
269 return Tok_Aster;
270 case ',':
271 readChar();
272 return Tok_Comma;
273 case '.':
274 readChar();
275 if ( yyCh == '.' ) {
276 do {
277 readChar();
278 } while ( yyCh == '.' );
279 return Tok_Ellipsis;
280 } else {
281 return Tok_Something;
282 }
283 case '/':
284 /*
285 C-style comments are symmetric. C++-style comments
286 are handled elsewhere.
287 */
288 readChar();
289 if ( yyCh == '*' ) {
290 bool metAster = FALSE;
291 bool metAsterSlash = FALSE;
292
293 readChar();
294
295 while ( !metAsterSlash ) {
296 if ( yyCh == EOF )
297 break;
298
299 if ( yyCh == '*' )
300 metAster = TRUE;
301 else if ( metAster && yyCh == '/' )
302 metAsterSlash = TRUE;
303 else
304 metAster = FALSE;
305 readChar();
306 }
307 break;
308 // return Tok_Comment;
309 } else {
310 return Tok_Something;
311 }
312 case ':':
313 readChar();
314 if ( yyCh == ':' ) {
315 readChar();
316 return Tok_Gulbrandsen;
317 } else {
318 return Tok_Colon;
319 }
320 case ';':
321 readChar();
322 return Tok_Semicolon;
323 case '<':
324 readChar();
325 return Tok_LeftAngle;
326 case '=':
327 readChar();
328 return Tok_Equal;
329 case '>':
330 readChar();
331 return Tok_RightAngle;
332 case '[':
333 readChar();
334 return Tok_LeftBracket;
335 case ']':
336 readChar();
337 return Tok_RightBracket;
338 case '{':
339 readChar();
340 return Tok_LeftBrace;
341 case '}':
342 readChar();
343 return Tok_RightBrace;
344 case '~':
345 readChar();
346 return Tok_Tilde;
347 default:
348 readChar();
349 }
350 }
351 }
352 return Tok_Boi;
353}
354
355/*
356 Follow the member function(s) of CppFunction.
357*/
358
359/*
360 Returns the prototype for the C++ function, without the semicolon.
361*/
362QString CppFunction::prototype() const
363{
364 QString proto;
365
366 if ( !returnType().isEmpty() )
367 proto = returnType() + QChar( ' ' );
368 proto += scopedName();
369 proto += QChar( '(' );
370 if ( !parameterList().isEmpty() ) {
371 QStringList::ConstIterator p = parameterList().begin();
372 proto += *p;
373 ++p;
374 while ( p != parameterList().end() ) {
375 proto += QString( ", " );
376 proto += *p;
377 ++p;
378 }
379 }
380 proto += QChar( ')' );
381 if ( isConst() )
382 proto += QString( " const" );
383 return proto;
384}
385
386/*
387 The parser follows. We are not really parsing C++, just trying to
388 find the start and end of function definitions.
389
390 One important pitfall is that the parsed code needs not be valid.
391 Parsing from right to left helps cope with that, as explained in
392 comments below.
393
394 In the examples, we will use the symbol @ to stand for the position
395 in the token stream. In "int @ x ;", the lookahead token (yyTok) is
396 'int'.
397*/
398
399static int yyTok; // the current token
400
401/*
402 Returns TRUE if thingy is a constructor or a destructor; otherwise
403 returns FALSE.
404*/
405static bool isCtorOrDtor( const QString& thingy )
406{
407 // e.g., Alpha<a>::Beta<Bar<b, c> >::~Beta
408 QRegExp xtor( QString(
409 "(?:([A-Z_a-z][0-9A-Z_a-z]*)" // class name
410 "(?:<(?:[^>]|<[^>]*>)*>)*" // template arguments
411 "::)+" // many in a row
412 "~?" // ctor or dtor?
413 "\\1") ); // function has same name as class
414 return xtor.exactMatch( thingy );
415}
416
417/*
418 Skips over any template arguments with balanced angle brackets, and
419 returns the skipped material as a string.
420
421 Before: QMap < QString , QValueList < QString > > @ m ;
422 After: QMap @ < QString , QValueList < QString > > m ;
423*/
424static QString matchTemplateAngles()
425{
426 QString t;
427
428 if ( yyTok == Tok_RightAngle ) {
429 int depth = 0;
430 do {
431 if ( yyTok == Tok_RightAngle )
432 depth++;
433 else if ( yyTok == Tok_LeftAngle )
434 depth--;
435 t.prepend( yyLex );
436 yyTok = getToken();
437 } while ( depth > 0 && yyTok != Tok_Boi && yyTok != Tok_LeftBrace );
438 }
439 return t;
440}
441
442/*
443 Similar to matchTemplateAngles(), but for array brackets in parameter
444 data types (as in "int *argv[]").
445*/
446static QString matchArrayBrackets()
447{
448 QString t;
449
450 while ( yyTok == Tok_RightBracket ) {
451 t.prepend( yyLex );
452 yyTok = getToken();
453 if ( yyTok == Tok_Something ) {
454 t.prepend( yyLex );
455 yyTok = getToken();
456 }
457 if ( yyTok != Tok_LeftBracket )
458 return QString::null;
459 t.prepend( yyLex );
460 yyTok = getToken();
461 }
462 return t;
463}
464
465/*
466 Prepends prefix to *type. This operation is in theory trivial, but
467 for the spacing to look good, we have to do something. The original
468 spacing is lost as the input is tokenized.
469*/
470static void prependToType( QString *type, const QString& prefix )
471{
472 if ( !type->isEmpty() && !prefix.isEmpty() ) {
473 QChar left = prefix[(int) prefix.length() - 1];
474 QChar right = (*type)[0];
475
476 if ( left.isLetter() &&
477 (right.isLetter() || right == QChar('*') || right == QChar('&')) )
478 type->prepend( QChar(' ') );
479 }
480 type->prepend( prefix );
481}
482
483static bool isModifier( int tok )
484{
485 return ( tok == Tok_signed || tok == Tok_unsigned ||
486 tok == Tok_short || tok == Tok_long );
487}
488
489/*
490 Parses a data type (backwards as usual) and returns a textual
491 representation of it.
492*/
493static QString matchDataType()
494{
495 QString type;
496
497 while ( yyTok == Tok_Ampersand || yyTok == Tok_Aster ||
498 yyTok == Tok_const ) {
499 prependToType( &type, yyLex );
500 yyTok = getToken();
501 }
502
503 /*
504 This code is really hard to follow... sorry. The loop matches
505 Alpha::Beta::Gamma::...::Omega.
506 */
507 for ( ;; ) {
508 bool modifierMet = FALSE;
509
510 prependToType( &type, matchTemplateAngles() );
511
512 if ( yyTok != Tok_Ident ) {
513 /*
514 People may write 'const unsigned short' or
515 'short unsigned const' or any other permutation.
516 */
517 while ( yyTok == Tok_const || isModifier(yyTok) ) {
518 prependToType( &type, yyLex );
519 yyTok = getToken();
520 if ( yyTok != Tok_const )
521 modifierMet = TRUE;
522 }
523
524 if ( yyTok == Tok_Tilde ) {
525 prependToType( &type, yyLex );
526 yyTok = getToken();
527 }
528 }
529
530 if ( !modifierMet ) {
531 if ( yyTok == Tok_Ellipsis || yyTok == Tok_Ident ||
532 yyTok == Tok_char || yyTok == Tok_int ||
533 yyTok == Tok_double ) {
534 prependToType( &type, yyLex );
535 yyTok = getToken();
536 } else {
537 return QString::null;
538 }
539 } else if ( yyTok == Tok_int || yyTok == Tok_char ||
540 yyTok == Tok_double ) {
541 prependToType( &type, yyLex );
542 yyTok = getToken();
543 }
544
545 while ( yyTok == Tok_const || isModifier(yyTok) ) {
546 prependToType( &type, yyLex );
547 yyTok = getToken();
548 }
549
550 if ( yyTok == Tok_Gulbrandsen ) {
551 prependToType( &type, yyLex );
552 yyTok = getToken();
553 } else {
554 break;
555 }
556 }
557 return type;
558}
559
560/*
561 Parses a function prototype (without the semicolon) and returns an
562 object that stores information about this function.
563*/
564static CppFunction matchFunctionPrototype( bool stripParamNames )
565{
566 CppFunction func;
567#if 0
568 QString documentation;
569#endif
570 QString returnType;
571 QString scopedName;
572 QStringList params;
573 QString qualifier;
574 bool cnst = FALSE;
575
576 if ( yyTok == Tok_const ) {
577 cnst = TRUE;
578 yyTok = getToken();
579 }
580
581 if ( yyTok != Tok_RightParen )
582 return func;
583 yyTok = getToken();
584
585 if ( yyTok != Tok_LeftParen ) {
586 for ( ;; ) {
587 QString brackets = matchArrayBrackets();
588 QString name;
589 if ( yyTok == Tok_Ident ) {
590 name = yyLex;
591 yyTok = getToken();
592 }
593 QString type = matchDataType();
594
595 if ( type.isEmpty() ) {
596 if ( name.isEmpty() )
597 return func;
598 type = name;
599 name = QString::null;
600 }
601 if ( stripParamNames )
602 name = QString::null;
603
604 QString param = type + QChar( ' ' ) + name + brackets;
605 params.prepend( param.stripWhiteSpace() );
606
607 if ( yyTok != Tok_Comma )
608 break;
609 yyTok = getToken();
610 }
611 if ( yyTok != Tok_LeftParen )
612 return func;
613 }
614 yyTok = getToken();
615
616 for ( ;; ) {
617 scopedName.prepend( matchTemplateAngles() );
618
619 if ( yyTok != Tok_Ident ) {
620 // the operator keyword should be close
621 int i = 0;
622 while ( i < 4 && yyTok != Tok_operator ) {
623 scopedName.prepend( yyLex );
624 i++;
625 }
626 if ( yyTok != Tok_operator )
627 return func;
628 }
629 scopedName.prepend( yyLex );
630 yyTok = getToken();
631
632 if ( yyTok != Tok_Gulbrandsen )
633 break;
634 scopedName.prepend( yyLex );
635 yyTok = getToken();
636 }
637
638 if ( !isCtorOrDtor(scopedName) ) {
639 returnType = matchDataType();
640 if ( returnType.isEmpty() )
641 return func;
642 }
643
644 /*
645 The documentation feature is unused so far, since we cannot
646 really distinguist between a normal comment between two
647 functions and one that relates to the following function. One
648 good heuristic is to assume that a comment immediately followed
649 by a function with no blank line in between relates to the
650 function, but there's no easy way to find that out with a
651 tokenizer.
652 */
653#if 0
654 if ( yyTok == Tok_Comment ) {
655 documentation = yyLex;
656 yyTok = getToken();
657 }
658
659 func.setDocumentation( documentation );
660#endif
661 func.setReturnType( returnType );
662 func.setScopedName( scopedName );
663 func.setParameterList( params );
664 func.setConst( cnst );
665 return func;
666}
667
668/*
669 Try to set the body. It's not sufficient to call
670 func->setBody(somewhatBody), as the somewhatBody might be too large.
671 Case in point:
672
673 void foo()
674 {
675 printf( "Hello" );
676 }
677
678 int n;
679
680 void bar()
681 {
682 printf( " world!\n" );
683 }
684
685 The parser first finds bar(). Then it finds "void foo() {" and
686 naively expects the body to extend up to "void bar()". This
687 function's job is to count braces and make sure "int n;" is not
688 counted as part of the body.
689
690 Cases where the closing brace of the body is missing require no
691 special processing.
692*/
693static void setBody( CppFunction *func, const QString& somewhatBody )
694{
695 QString body = somewhatBody;
696
697 int braceDepth = 0;
698 int i = 0;
699 while ( i < (int) body.length() ) {
700 if ( body[i] == QChar('{') ) {
701 braceDepth++;
702 } else if ( body[i] == QChar('}') ) {
703 braceDepth--;
704 if ( braceDepth == 0 ) {
705 body.truncate( i + 1 );
706 break;
707 }
708 }
709 i++;
710 }
711
712 func->setBody( body );
713}
714
715/*
716 Parses a whole C++ file, looking for function definitions. Case in
717 point:
718
719 void foo()
720 {
721 printf( "Hello" );
722
723 void bar()
724 {
725 printf( " world!\n" );
726 }
727
728 The parser looks for left braces and tries to parse a function
729 prototype backwards. First it finds "void bar() {". Then it works
730 up and finds "void foo() {".
731*/
732static void matchTranslationUnit( QValueList<CppFunction> *flist )
733{
734 int endBody = -1;
735 int startBody;
736
737 for ( ;; ) {
738 if ( endBody == -1 )
739 endBody = yyPos;
740
741 while ( yyTok != Tok_Boi && yyTok != Tok_LeftBrace )
742 yyTok = getToken();
743 if ( yyTok == Tok_Boi )
744 break;
745
746 // found a left brace
747 yyTok = getToken();
748 startBody = yyPos;
749 CppFunction func = matchFunctionPrototype( FALSE );
750 if ( !func.scopedName().isEmpty() ) {
751 QString body = yyIn->mid( startBody, endBody - startBody );
752 setBody( &func, body );
753 body = func.body(); // setBody() can change the body
754
755 /*
756 Compute important line numbers.
757 */
758 int functionStartLineNo = 1 + QConstString( yyIn->unicode(), yyPos )
759 .string().contains( QChar('\n') );
760 int startLineNo = functionStartLineNo +
761 QConstString( yyIn->unicode() + yyPos, startBody - yyPos )
762 .string().contains( QChar('\n') );
763 int endLineNo = startLineNo + body.contains( QChar('\n') );
764
765 func.setLineNums( functionStartLineNo, startLineNo, endLineNo );
766 flist->prepend( func );
767 endBody = -1;
768 }
769 }
770}
771
772/*
773 Extracts C++ function from source code and put them in a list.
774*/
775void extractCppFunctions( const QString& code, QValueList<CppFunction> *flist )
776{
777 startTokenizer( code );
778 yyTok = getToken();
779 matchTranslationUnit( flist );
780 stopTokenizer();
781}
782
783/*
784 Returns the prototype with the parameter names removed.
785*/
786QString canonicalCppProto( const QString& proto )
787{
788 startTokenizer( proto );
789 yyTok = getToken();
790 CppFunction func = matchFunctionPrototype( TRUE );
791 stopTokenizer();
792 return func.prototype();
793}
Note: See TracBrowser for help on using the repository browser.