Context Navigation

yyreg.cpp

Last change on this file was 197, checked in by rudi, 14 years ago
Added QtDesigner
File size: 19.5 KB

Line
1	/**********************************************************************
2	**
3	** Copyright (C) 2005-2007 Trolltech ASA. All rights reserved.
4	**
5	** This file is part of Qt Designer.
6	**
7	** This file may be distributed and/or modified under the terms of the
8	** GNU General Public License version 2 as published by the Free Software
9	** Foundation and appearing in the file LICENSE.GPL included in the
10	** packaging of this file.
11	**
12	** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
13	** licenses may use this file in accordance with the Qt Commercial License
14	** Agreement provided with the Software.
15	**
16	** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
17	** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18	**
19	** See http://www.trolltech.com/gpl/ for GPL licensing information.
20	** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
21	** information about Qt Commercial License Agreements.
22	**
23	** Contact info@trolltech.com if any conditions of this licensing are
24	** not clear to you.
25	**
26	**********************************************************************/
27
28	#include <qregexp.h>
29
30	#include <ctype.h>
31	#include <stdio.h>
32
33	#include "yyreg.h"
34
35	/*
36	First comes the tokenizer. We don't need something that knows much
37	about C++. However, we need something that gives tokens from the
38	end of the file to the start, which is tricky.
39
40	If you are not familiar with hand-written tokenizers and parsers,
41	you might want to read other simpler parsers written in the same
42	style:
43
44	$(QTDIR)/src/tools/qregexp.cpp
45	$(QTDIR)/tools/inspector/cppparser.cpp
46
47	You might also want to read Section 2 in the Dragon Book.
48	*/
49
50	/*
51	Those are the tokens we are interested in. Tok_Something represents
52	any C++ token that does not interest us, but it's dangerous to
53	ignore tokens completely.
54	*/
55	enum { Tok_Boi, Tok_Ampersand, Tok_Aster, Tok_LeftParen, Tok_RightParen,
56	Tok_Equal, Tok_LeftBrace, Tok_RightBrace, Tok_Semicolon, Tok_Colon,
57	Tok_LeftAngle, Tok_RightAngle, Tok_Comma, Tok_Ellipsis, Tok_Gulbrandsen,
58	Tok_LeftBracket, Tok_RightBracket, Tok_Tilde, Tok_Something, Tok_Comment,
59	Tok_Ident,
60
61	Tok_char, Tok_const, Tok_double, Tok_int, Tok_long, Tok_operator,
62	Tok_short, Tok_signed, Tok_unsigned };
63
64	/*
65	The following variables store the lexical analyzer state. The best way
66	to understand them is to implement a function myGetToken() that calls
67	getToken(), to add some qDebug() statements in there and then to
68	#define getToken() myGetToken().
69	*/
70	static QString *yyIn; // the input stream
71	static int yyPos; // the position of the current token in yyIn
72	static int yyCurPos; // the position of the next lookahead character
73	static char *yyLexBuf; // the lexeme buffer
74	static const int YYLexBufSize = 65536; // big enough for long comments
75	static char *yyLex; // the lexeme itself (a pointer into yyLexBuf)
76	static int yyCh; // the lookbehind character
77
78	/*
79	Moves back to the previous character in the input stream and
80	updates the tokenizer state. This function is to be used only by
81	getToken(), which provides the right abstraction.
82	*/
83	static inline void readChar()
84	{
85	if ( yyCh == EOF )
86	return;
87
88	if ( yyLex > yyLexBuf )
89	*--yyLex = (char) yyCh;
90
91	if ( yyCurPos < 0 )
92	yyCh = EOF;
93	else
94	yyCh = (*yyIn)[yyCurPos].unicode();
95	yyCurPos--;
96	}
97
98	/*
99	Sets up the tokenizer.
100	*/
101	static void startTokenizer( const QString& in )
102	{
103	yyIn = new QString;
104	*yyIn = in;
105	yyPos = yyIn->length() - 1;
106	yyCurPos = yyPos;
107	yyLexBuf = new char[YYLexBufSize];
108	yyLex = yyLexBuf + YYLexBufSize - 1;
109	*yyLex = '\0';
110	yyCh = '\0';
111	readChar();
112	}
113
114	/*
115	Frees resources allocated by the tokenizer.
116	*/
117	static void stopTokenizer()
118	{
119	delete yyIn;
120	delete[] yyLexBuf;
121	yyLexBuf = 0;
122	}
123
124	/*
125	These two macros implement quick-and-dirty hashing for telling
126	apart keywords fast.
127	*/
128	#define HASH( ch, len ) ( (ch) \| ((len) << 8) )
129	#define CHECK( target ) \
130	if ( strcmp((target), yyLex) != 0 ) \
131	break;
132
133	/*
134	Returns the previous token in the abstract token stream. The parser
135	deals only with tokens, not with characters.
136	*/
137	static int getToken()
138	{
139	// why "+ 2"? try putting some qDebug()'s and see
140	yyPos = yyCurPos + 2;
141
142	for ( ;; ) {
143	/*
144	See if the previous token is interesting. If it isn't, we
145	will loop anyway an go to the token before the previous
146	token, and so on.
147	*/
148
149	yyLex = yyLexBuf + YYLexBufSize - 1;
150	*yyLex = '\0';
151
152	if ( yyCh == EOF ) {
153	break;
154	} else if ( isspace(yyCh) ) {
155	bool metNL = FALSE;
156	do {
157	metNL = ( metNL \|\| yyCh == '\n' );
158	readChar();
159	} while ( isspace(yyCh) );
160
161	if ( metNL ) {
162	/*
163	C++ style comments are tricky. In left-to-right
164	thinking, C++ comments start with "//" and end with
165	'\n'. In right-to-left thinking, they start with a
166	'\n'; but of course not every '\n' starts a comment.
167
168	When we meet the '\n', we look behind, on the same
169	line, for a "//", and if there is one we mess
170	around with the tokenizer state to effectively
171	ignore the comment. Beware of off-by-one and
172	off-by-two bugs when you modify this code by adding
173	qDebug()'s here and there.
174	*/
175	if ( yyCurPos >= 0 ) {
176	int lineStart = yyIn->findRev( QChar('\n'), yyCurPos ) + 1;
177	QString line = yyIn->mid( lineStart,
178	yyCurPos - lineStart + 2 );
179	int commentStart = line.find( QString("//") );
180	if ( commentStart != -1 ) {
181	yyCurPos = lineStart + commentStart - 1;
182	yyPos = yyCurPos + 2;
183	readChar();
184	}
185	}
186	}
187	} else if ( isalnum(yyCh) \|\| yyCh == '_' ) {
188	do {
189	readChar();
190	} while ( isalnum(yyCh) \|\| yyCh == '_' );
191
192	switch ( HASH(yyLex[0], strlen(yyLex)) ) {
193	case HASH( 'c', 4 ):
194	CHECK( "char" );
195	return Tok_char;
196	case HASH( 'c', 5 ):
197	CHECK( "const" );
198	return Tok_const;
199	case HASH( 'd', 6 ):
200	CHECK( "double" );
201	return Tok_double;
202	case HASH( 'i', 3 ):
203	CHECK( "int" );
204	return Tok_int;
205	case HASH( 'l', 4 ):
206	CHECK( "long" );
207	return Tok_long;
208	case HASH( 'o', 8 ):
209	CHECK( "operator" );
210	return Tok_operator;
211	case HASH( 's', 5 ):
212	CHECK( "short" );
213	return Tok_short;
214	case HASH( 's', 6 ):
215	CHECK( "signed" );
216	return Tok_signed;
217	case HASH( 'u', 8 ):
218	CHECK( "unsigned" );
219	return Tok_unsigned;
220	}
221	if ( isdigit(*yyLex) )
222	return Tok_Something;
223	else
224	return Tok_Ident;
225	} else {
226	int quote;
227
228	switch ( yyCh ) {
229	case '!':
230	case '%':
231	case '^':
232	case '+':
233	case '-':
234	case '?':
235	case '\|':
236	readChar();
237	return Tok_Something;
238	case '"':
239	case '\'':
240	quote = yyCh;
241	readChar();
242
243	while ( yyCh != EOF && yyCh != '\n' ) {
244	if ( yyCh == quote ) {
245	readChar();
246	if ( yyCh != '\\' )
247	break;
248	} else {
249	readChar();
250	}
251	}
252	return Tok_Something;
253	case '&':
254	readChar();
255	if ( yyCh == '&' ) {
256	readChar();
257	return Tok_Something;
258	} else {
259	return Tok_Ampersand;
260	}
261	case '(':
262	readChar();
263	return Tok_LeftParen;
264	case ')':
265	readChar();
266	return Tok_RightParen;
267	case '*':
268	readChar();
269	return Tok_Aster;
270	case ',':
271	readChar();
272	return Tok_Comma;
273	case '.':
274	readChar();
275	if ( yyCh == '.' ) {
276	do {
277	readChar();
278	} while ( yyCh == '.' );
279	return Tok_Ellipsis;
280	} else {
281	return Tok_Something;
282	}
283	case '/':
284	/*
285	C-style comments are symmetric. C++-style comments
286	are handled elsewhere.
287	*/
288	readChar();
289	if ( yyCh == '*' ) {
290	bool metAster = FALSE;
291	bool metAsterSlash = FALSE;
292
293	readChar();
294
295	while ( !metAsterSlash ) {
296	if ( yyCh == EOF )
297	break;
298
299	if ( yyCh == '*' )
300	metAster = TRUE;
301	else if ( metAster && yyCh == '/' )
302	metAsterSlash = TRUE;
303	else
304	metAster = FALSE;
305	readChar();
306	}
307	break;
308	// return Tok_Comment;
309	} else {
310	return Tok_Something;
311	}
312	case ':':
313	readChar();
314	if ( yyCh == ':' ) {
315	readChar();
316	return Tok_Gulbrandsen;
317	} else {
318	return Tok_Colon;
319	}
320	case ';':
321	readChar();
322	return Tok_Semicolon;
323	case '<':
324	readChar();
325	return Tok_LeftAngle;
326	case '=':
327	readChar();
328	return Tok_Equal;
329	case '>':
330	readChar();
331	return Tok_RightAngle;
332	case '[':
333	readChar();
334	return Tok_LeftBracket;
335	case ']':
336	readChar();
337	return Tok_RightBracket;
338	case '{':
339	readChar();
340	return Tok_LeftBrace;
341	case '}':
342	readChar();
343	return Tok_RightBrace;
344	case '~':
345	readChar();
346	return Tok_Tilde;
347	default:
348	readChar();
349	}
350	}
351	}
352	return Tok_Boi;
353	}
354
355	/*
356	Follow the member function(s) of CppFunction.
357	*/
358
359	/*
360	Returns the prototype for the C++ function, without the semicolon.
361	*/
362	QString CppFunction::prototype() const
363	{
364	QString proto;
365
366	if ( !returnType().isEmpty() )
367	proto = returnType() + QChar( ' ' );
368	proto += scopedName();
369	proto += QChar( '(' );
370	if ( !parameterList().isEmpty() ) {
371	QStringList::ConstIterator p = parameterList().begin();
372	proto += *p;
373	++p;
374	while ( p != parameterList().end() ) {
375	proto += QString( ", " );
376	proto += *p;
377	++p;
378	}
379	}
380	proto += QChar( ')' );
381	if ( isConst() )
382	proto += QString( " const" );
383	return proto;
384	}
385
386	/*
387	The parser follows. We are not really parsing C++, just trying to
388	find the start and end of function definitions.
389
390	One important pitfall is that the parsed code needs not be valid.
391	Parsing from right to left helps cope with that, as explained in
392	comments below.
393
394	In the examples, we will use the symbol @ to stand for the position
395	in the token stream. In "int @ x ;", the lookahead token (yyTok) is
396	'int'.
397	*/
398
399	static int yyTok; // the current token
400
401	/*
402	Returns TRUE if thingy is a constructor or a destructor; otherwise
403	returns FALSE.
404	*/
405	static bool isCtorOrDtor( const QString& thingy )
406	{
407	// e.g., Alpha<a>::Beta<Bar<b, c> >::~Beta
408	QRegExp xtor( QString(
409	"(?:([A-Z_a-z][0-9A-Z_a-z]*)" // class name
410	"(?:<(?:[^>]\|<[^>]>)>)*" // template arguments
411	"::)+" // many in a row
412	"~?" // ctor or dtor?
413	"\\1") ); // function has same name as class
414	return xtor.exactMatch( thingy );
415	}
416
417	/*
418	Skips over any template arguments with balanced angle brackets, and
419	returns the skipped material as a string.
420
421	Before: QMap < QString , QValueList < QString > > @ m ;
422	After: QMap @ < QString , QValueList < QString > > m ;
423	*/
424	static QString matchTemplateAngles()
425	{
426	QString t;
427
428	if ( yyTok == Tok_RightAngle ) {
429	int depth = 0;
430	do {
431	if ( yyTok == Tok_RightAngle )
432	depth++;
433	else if ( yyTok == Tok_LeftAngle )
434	depth--;
435	t.prepend( yyLex );
436	yyTok = getToken();
437	} while ( depth > 0 && yyTok != Tok_Boi && yyTok != Tok_LeftBrace );
438	}
439	return t;
440	}
441
442	/*
443	Similar to matchTemplateAngles(), but for array brackets in parameter
444	data types (as in "int *argv[]").
445	*/
446	static QString matchArrayBrackets()
447	{
448	QString t;
449
450	while ( yyTok == Tok_RightBracket ) {
451	t.prepend( yyLex );
452	yyTok = getToken();
453	if ( yyTok == Tok_Something ) {
454	t.prepend( yyLex );
455	yyTok = getToken();
456	}
457	if ( yyTok != Tok_LeftBracket )
458	return QString::null;
459	t.prepend( yyLex );
460	yyTok = getToken();
461	}
462	return t;
463	}
464
465	/*
466	Prepends prefix to *type. This operation is in theory trivial, but
467	for the spacing to look good, we have to do something. The original
468	spacing is lost as the input is tokenized.
469	*/
470	static void prependToType( QString *type, const QString& prefix )
471	{
472	if ( !type->isEmpty() && !prefix.isEmpty() ) {
473	QChar left = prefix[(int) prefix.length() - 1];
474	QChar right = (*type)[0];
475
476	if ( left.isLetter() &&
477	(right.isLetter() \|\| right == QChar('*') \|\| right == QChar('&')) )
478	type->prepend( QChar(' ') );
479	}
480	type->prepend( prefix );
481	}
482
483	static bool isModifier( int tok )
484	{
485	return ( tok == Tok_signed \|\| tok == Tok_unsigned \|\|
486	tok == Tok_short \|\| tok == Tok_long );
487	}
488
489	/*
490	Parses a data type (backwards as usual) and returns a textual
491	representation of it.
492	*/
493	static QString matchDataType()
494	{
495	QString type;
496
497	while ( yyTok == Tok_Ampersand \|\| yyTok == Tok_Aster \|\|
498	yyTok == Tok_const ) {
499	prependToType( &type, yyLex );
500	yyTok = getToken();
501	}
502
503	/*
504	This code is really hard to follow... sorry. The loop matches
505	Alpha::Beta::Gamma::...::Omega.
506	*/
507	for ( ;; ) {
508	bool modifierMet = FALSE;
509
510	prependToType( &type, matchTemplateAngles() );
511
512	if ( yyTok != Tok_Ident ) {
513	/*
514	People may write 'const unsigned short' or
515	'short unsigned const' or any other permutation.
516	*/
517	while ( yyTok == Tok_const \|\| isModifier(yyTok) ) {
518	prependToType( &type, yyLex );
519	yyTok = getToken();
520	if ( yyTok != Tok_const )
521	modifierMet = TRUE;
522	}
523
524	if ( yyTok == Tok_Tilde ) {
525	prependToType( &type, yyLex );
526	yyTok = getToken();
527	}
528	}
529
530	if ( !modifierMet ) {
531	if ( yyTok == Tok_Ellipsis \|\| yyTok == Tok_Ident \|\|
532	yyTok == Tok_char \|\| yyTok == Tok_int \|\|
533	yyTok == Tok_double ) {
534	prependToType( &type, yyLex );
535	yyTok = getToken();
536	} else {
537	return QString::null;
538	}
539	} else if ( yyTok == Tok_int \|\| yyTok == Tok_char \|\|
540	yyTok == Tok_double ) {
541	prependToType( &type, yyLex );
542	yyTok = getToken();
543	}
544
545	while ( yyTok == Tok_const \|\| isModifier(yyTok) ) {
546	prependToType( &type, yyLex );
547	yyTok = getToken();
548	}
549
550	if ( yyTok == Tok_Gulbrandsen ) {
551	prependToType( &type, yyLex );
552	yyTok = getToken();
553	} else {
554	break;
555	}
556	}
557	return type;
558	}
559
560	/*
561	Parses a function prototype (without the semicolon) and returns an
562	object that stores information about this function.
563	*/
564	static CppFunction matchFunctionPrototype( bool stripParamNames )
565	{
566	CppFunction func;
567	#if 0
568	QString documentation;
569	#endif
570	QString returnType;
571	QString scopedName;
572	QStringList params;
573	QString qualifier;
574	bool cnst = FALSE;
575
576	if ( yyTok == Tok_const ) {
577	cnst = TRUE;
578	yyTok = getToken();
579	}
580
581	if ( yyTok != Tok_RightParen )
582	return func;
583	yyTok = getToken();
584
585	if ( yyTok != Tok_LeftParen ) {
586	for ( ;; ) {
587	QString brackets = matchArrayBrackets();
588	QString name;
589	if ( yyTok == Tok_Ident ) {
590	name = yyLex;
591	yyTok = getToken();
592	}
593	QString type = matchDataType();
594
595	if ( type.isEmpty() ) {
596	if ( name.isEmpty() )
597	return func;
598	type = name;
599	name = QString::null;
600	}
601	if ( stripParamNames )
602	name = QString::null;
603
604	QString param = type + QChar( ' ' ) + name + brackets;
605	params.prepend( param.stripWhiteSpace() );
606
607	if ( yyTok != Tok_Comma )
608	break;
609	yyTok = getToken();
610	}
611	if ( yyTok != Tok_LeftParen )
612	return func;
613	}
614	yyTok = getToken();
615
616	for ( ;; ) {
617	scopedName.prepend( matchTemplateAngles() );
618
619	if ( yyTok != Tok_Ident ) {
620	// the operator keyword should be close
621	int i = 0;
622	while ( i < 4 && yyTok != Tok_operator ) {
623	scopedName.prepend( yyLex );
624	i++;
625	}
626	if ( yyTok != Tok_operator )
627	return func;
628	}
629	scopedName.prepend( yyLex );
630	yyTok = getToken();
631
632	if ( yyTok != Tok_Gulbrandsen )
633	break;
634	scopedName.prepend( yyLex );
635	yyTok = getToken();
636	}
637
638	if ( !isCtorOrDtor(scopedName) ) {
639	returnType = matchDataType();
640	if ( returnType.isEmpty() )
641	return func;
642	}
643
644	/*
645	The documentation feature is unused so far, since we cannot
646	really distinguist between a normal comment between two
647	functions and one that relates to the following function. One
648	good heuristic is to assume that a comment immediately followed
649	by a function with no blank line in between relates to the
650	function, but there's no easy way to find that out with a
651	tokenizer.
652	*/
653	#if 0
654	if ( yyTok == Tok_Comment ) {
655	documentation = yyLex;
656	yyTok = getToken();
657	}
658
659	func.setDocumentation( documentation );
660	#endif
661	func.setReturnType( returnType );
662	func.setScopedName( scopedName );
663	func.setParameterList( params );
664	func.setConst( cnst );
665	return func;
666	}
667
668	/*
669	Try to set the body. It's not sufficient to call
670	func->setBody(somewhatBody), as the somewhatBody might be too large.
671	Case in point:
672
673	void foo()
674	{
675	printf( "Hello" );
676	}
677
678	int n;
679
680	void bar()
681	{
682	printf( " world!\n" );
683	}
684
685	The parser first finds bar(). Then it finds "void foo() {" and
686	naively expects the body to extend up to "void bar()". This
687	function's job is to count braces and make sure "int n;" is not
688	counted as part of the body.
689
690	Cases where the closing brace of the body is missing require no
691	special processing.
692	*/
693	static void setBody( CppFunction *func, const QString& somewhatBody )
694	{
695	QString body = somewhatBody;
696
697	int braceDepth = 0;
698	int i = 0;
699	while ( i < (int) body.length() ) {
700	if ( body[i] == QChar('{') ) {
701	braceDepth++;
702	} else if ( body[i] == QChar('}') ) {
703	braceDepth--;
704	if ( braceDepth == 0 ) {
705	body.truncate( i + 1 );
706	break;
707	}
708	}
709	i++;
710	}
711
712	func->setBody( body );
713	}
714
715	/*
716	Parses a whole C++ file, looking for function definitions. Case in
717	point:
718
719	void foo()
720	{
721	printf( "Hello" );
722
723	void bar()
724	{
725	printf( " world!\n" );
726	}
727
728	The parser looks for left braces and tries to parse a function
729	prototype backwards. First it finds "void bar() {". Then it works
730	up and finds "void foo() {".
731	*/
732	static void matchTranslationUnit( QValueList<CppFunction> *flist )
733	{
734	int endBody = -1;
735	int startBody;
736
737	for ( ;; ) {
738	if ( endBody == -1 )
739	endBody = yyPos;
740
741	while ( yyTok != Tok_Boi && yyTok != Tok_LeftBrace )
742	yyTok = getToken();
743	if ( yyTok == Tok_Boi )
744	break;
745
746	// found a left brace
747	yyTok = getToken();
748	startBody = yyPos;
749	CppFunction func = matchFunctionPrototype( FALSE );
750	if ( !func.scopedName().isEmpty() ) {
751	QString body = yyIn->mid( startBody, endBody - startBody );
752	setBody( &func, body );
753	body = func.body(); // setBody() can change the body
754
755	/*
756	Compute important line numbers.
757	*/
758	int functionStartLineNo = 1 + QConstString( yyIn->unicode(), yyPos )
759	.string().contains( QChar('\n') );
760	int startLineNo = functionStartLineNo +
761	QConstString( yyIn->unicode() + yyPos, startBody - yyPos )
762	.string().contains( QChar('\n') );
763	int endLineNo = startLineNo + body.contains( QChar('\n') );
764
765	func.setLineNums( functionStartLineNo, startLineNo, endLineNo );
766	flist->prepend( func );
767	endBody = -1;
768	}
769	}
770	}
771
772	/*
773	Extracts C++ function from source code and put them in a list.
774	*/
775	void extractCppFunctions( const QString& code, QValueList<CppFunction> *flist )
776	{
777	startTokenizer( code );
778	yyTok = getToken();
779	matchTranslationUnit( flist );
780	stopTokenizer();
781	}
782
783	/*
784	Returns the prototype with the parameter names removed.
785	*/
786	QString canonicalCppProto( const QString& proto )
787	{
788	startTokenizer( proto );
789	yyTok = getToken();
790	CppFunction func = matchFunctionPrototype( TRUE );
791	stopTokenizer();
792	return func.prototype();
793	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/tools/designer/plugins/cppeditor/yyreg.cpp

Download in other formats: