source: python/trunk/Lib/pyclbr.py@ 602

Last change on this file since 602 was 391, checked in by dmik, 12 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 13.1 KB
Line 
1"""Parse a Python module and describe its classes and methods.
2
3Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
17
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
20 module -- the module name
21 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
26The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
34A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
40"""
41
42import sys
43import imp
44import tokenize
45from token import NAME, DEDENT, OP
46from operator import itemgetter
47
48__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
49
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
55 def __init__(self, module, name, super, file, lineno):
56 self.module = module
57 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
64
65 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
67
68class Function:
69 '''Class to represent a top-level Python function'''
70 def __init__(self, module, name, file, lineno):
71 self.module = module
72 self.name = name
73 self.file = file
74 self.lineno = lineno
75
76def readmodule(module, path=None):
77 '''Backwards compatible interface.
78
79 Call readmodule_ex() and then only keep Class objects from the
80 resulting dictionary.'''
81
82 res = {}
83 for key, value in _readmodule(module, path or []).items():
84 if isinstance(value, Class):
85 res[key] = value
86 return res
87
88def readmodule_ex(module, path=None):
89 '''Read a module file and return a dictionary of classes.
90
91 Search for MODULE in PATH and sys.path, read and parse the
92 module and return a dictionary with one entry for each class
93 found in the module.
94 '''
95 return _readmodule(module, path or [])
96
97def _readmodule(module, path, inpackage=None):
98 '''Do the hard work for readmodule[_ex].
99
100 If INPACKAGE is given, it must be the dotted name of the package in
101 which we are searching for a submodule, and then PATH must be the
102 package search path; otherwise, we are searching for a top-level
103 module, and PATH is combined with sys.path.
104 '''
105 # Compute the full module name (prepending inpackage if set)
106 if inpackage is not None:
107 fullmodule = "%s.%s" % (inpackage, module)
108 else:
109 fullmodule = module
110
111 # Check in the cache
112 if fullmodule in _modules:
113 return _modules[fullmodule]
114
115 # Initialize the dict for this module's contents
116 dict = {}
117
118 # Check if it is a built-in module; we don't do much for these
119 if module in sys.builtin_module_names and inpackage is None:
120 _modules[module] = dict
121 return dict
122
123 # Check for a dotted module name
124 i = module.rfind('.')
125 if i >= 0:
126 package = module[:i]
127 submodule = module[i+1:]
128 parent = _readmodule(package, path, inpackage)
129 if inpackage is not None:
130 package = "%s.%s" % (inpackage, package)
131 if not '__path__' in parent:
132 raise ImportError('No package named {}'.format(package))
133 return _readmodule(submodule, parent['__path__'], package)
134
135 # Search the path for the module
136 f = None
137 if inpackage is not None:
138 f, fname, (_s, _m, ty) = imp.find_module(module, path)
139 else:
140 f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
141 if ty == imp.PKG_DIRECTORY:
142 dict['__path__'] = [fname]
143 path = [fname] + path
144 f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
145 _modules[fullmodule] = dict
146 if ty != imp.PY_SOURCE:
147 # not Python source, can't do anything with this module
148 f.close()
149 return dict
150
151 stack = [] # stack of (class, indent) pairs
152
153 g = tokenize.generate_tokens(f.readline)
154 try:
155 for tokentype, token, start, _end, _line in g:
156 if tokentype == DEDENT:
157 lineno, thisindent = start
158 # close nested classes and defs
159 while stack and stack[-1][1] >= thisindent:
160 del stack[-1]
161 elif token == 'def':
162 lineno, thisindent = start
163 # close previous nested classes and defs
164 while stack and stack[-1][1] >= thisindent:
165 del stack[-1]
166 tokentype, meth_name, start = g.next()[0:3]
167 if tokentype != NAME:
168 continue # Syntax error
169 if stack:
170 cur_class = stack[-1][0]
171 if isinstance(cur_class, Class):
172 # it's a method
173 cur_class._addmethod(meth_name, lineno)
174 # else it's a nested def
175 else:
176 # it's a function
177 dict[meth_name] = Function(fullmodule, meth_name,
178 fname, lineno)
179 stack.append((None, thisindent)) # Marker for nested fns
180 elif token == 'class':
181 lineno, thisindent = start
182 # close previous nested classes and defs
183 while stack and stack[-1][1] >= thisindent:
184 del stack[-1]
185 tokentype, class_name, start = g.next()[0:3]
186 if tokentype != NAME:
187 continue # Syntax error
188 # parse what follows the class name
189 tokentype, token, start = g.next()[0:3]
190 inherit = None
191 if token == '(':
192 names = [] # List of superclasses
193 # there's a list of superclasses
194 level = 1
195 super = [] # Tokens making up current superclass
196 while True:
197 tokentype, token, start = g.next()[0:3]
198 if token in (')', ',') and level == 1:
199 n = "".join(super)
200 if n in dict:
201 # we know this super class
202 n = dict[n]
203 else:
204 c = n.split('.')
205 if len(c) > 1:
206 # super class is of the form
207 # module.class: look in module for
208 # class
209 m = c[-2]
210 c = c[-1]
211 if m in _modules:
212 d = _modules[m]
213 if c in d:
214 n = d[c]
215 names.append(n)
216 super = []
217 if token == '(':
218 level += 1
219 elif token == ')':
220 level -= 1
221 if level == 0:
222 break
223 elif token == ',' and level == 1:
224 pass
225 # only use NAME and OP (== dot) tokens for type name
226 elif tokentype in (NAME, OP) and level == 1:
227 super.append(token)
228 # expressions in the base list are not supported
229 inherit = names
230 cur_class = Class(fullmodule, class_name, inherit,
231 fname, lineno)
232 if not stack:
233 dict[class_name] = cur_class
234 stack.append((cur_class, thisindent))
235 elif token == 'import' and start[1] == 0:
236 modules = _getnamelist(g)
237 for mod, _mod2 in modules:
238 try:
239 # Recursively read the imported module
240 if inpackage is None:
241 _readmodule(mod, path)
242 else:
243 try:
244 _readmodule(mod, path, inpackage)
245 except ImportError:
246 _readmodule(mod, [])
247 except:
248 # If we can't find or parse the imported module,
249 # too bad -- don't die here.
250 pass
251 elif token == 'from' and start[1] == 0:
252 mod, token = _getname(g)
253 if not mod or token != "import":
254 continue
255 names = _getnamelist(g)
256 try:
257 # Recursively read the imported module
258 d = _readmodule(mod, path, inpackage)
259 except:
260 # If we can't find or parse the imported module,
261 # too bad -- don't die here.
262 continue
263 # add any classes that were defined in the imported module
264 # to our name space if they were mentioned in the list
265 for n, n2 in names:
266 if n in d:
267 dict[n2 or n] = d[n]
268 elif n == '*':
269 # don't add names that start with _
270 for n in d:
271 if n[0] != '_':
272 dict[n] = d[n]
273 except StopIteration:
274 pass
275
276 f.close()
277 return dict
278
279def _getnamelist(g):
280 # Helper to get a comma-separated list of dotted names plus 'as'
281 # clauses. Return a list of pairs (name, name2) where name2 is
282 # the 'as' name, or None if there is no 'as' clause.
283 names = []
284 while True:
285 name, token = _getname(g)
286 if not name:
287 break
288 if token == 'as':
289 name2, token = _getname(g)
290 else:
291 name2 = None
292 names.append((name, name2))
293 while token != "," and "\n" not in token:
294 token = g.next()[1]
295 if token != ",":
296 break
297 return names
298
299def _getname(g):
300 # Helper to get a dotted name, return a pair (name, token) where
301 # name is the dotted name, or None if there was no dotted name,
302 # and token is the next input token.
303 parts = []
304 tokentype, token = g.next()[0:2]
305 if tokentype != NAME and token != '*':
306 return (None, token)
307 parts.append(token)
308 while True:
309 tokentype, token = g.next()[0:2]
310 if token != '.':
311 break
312 tokentype, token = g.next()[0:2]
313 if tokentype != NAME:
314 break
315 parts.append(token)
316 return (".".join(parts), token)
317
318def _main():
319 # Main program for testing.
320 import os
321 mod = sys.argv[1]
322 if os.path.exists(mod):
323 path = [os.path.dirname(mod)]
324 mod = os.path.basename(mod)
325 if mod.lower().endswith(".py"):
326 mod = mod[:-3]
327 else:
328 path = []
329 dict = readmodule_ex(mod, path)
330 objs = dict.values()
331 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
332 getattr(b, 'lineno', 0)))
333 for obj in objs:
334 if isinstance(obj, Class):
335 print "class", obj.name, obj.super, obj.lineno
336 methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
337 for name, lineno in methods:
338 if name != "__path__":
339 print " def", name, lineno
340 elif isinstance(obj, Function):
341 print "def", obj.name, obj.lineno
342
343if __name__ == "__main__":
344 _main()
Note: See TracBrowser for help on using the repository browser.