| 1 | """Module symbol-table generator""" | 
|---|
| 2 |  | 
|---|
| 3 | from compiler import ast | 
|---|
| 4 | from compiler.consts import SC_LOCAL, SC_GLOBAL_IMPLICIT, SC_GLOBAL_EXPLICIT, \ | 
|---|
| 5 | SC_FREE, SC_CELL, SC_UNKNOWN | 
|---|
| 6 | from compiler.misc import mangle | 
|---|
| 7 | import types | 
|---|
| 8 |  | 
|---|
| 9 |  | 
|---|
| 10 | import sys | 
|---|
| 11 |  | 
|---|
| 12 | MANGLE_LEN = 256 | 
|---|
| 13 |  | 
|---|
| 14 | class Scope: | 
|---|
| 15 | # XXX how much information do I need about each name? | 
|---|
| 16 | def __init__(self, name, module, klass=None): | 
|---|
| 17 | self.name = name | 
|---|
| 18 | self.module = module | 
|---|
| 19 | self.defs = {} | 
|---|
| 20 | self.uses = {} | 
|---|
| 21 | self.globals = {} | 
|---|
| 22 | self.params = {} | 
|---|
| 23 | self.frees = {} | 
|---|
| 24 | self.cells = {} | 
|---|
| 25 | self.children = [] | 
|---|
| 26 | # nested is true if the class could contain free variables, | 
|---|
| 27 | # i.e. if it is nested within another function. | 
|---|
| 28 | self.nested = None | 
|---|
| 29 | self.generator = None | 
|---|
| 30 | self.klass = None | 
|---|
| 31 | if klass is not None: | 
|---|
| 32 | for i in range(len(klass)): | 
|---|
| 33 | if klass[i] != '_': | 
|---|
| 34 | self.klass = klass[i:] | 
|---|
| 35 | break | 
|---|
| 36 |  | 
|---|
| 37 | def __repr__(self): | 
|---|
| 38 | return "<%s: %s>" % (self.__class__.__name__, self.name) | 
|---|
| 39 |  | 
|---|
| 40 | def mangle(self, name): | 
|---|
| 41 | if self.klass is None: | 
|---|
| 42 | return name | 
|---|
| 43 | return mangle(name, self.klass) | 
|---|
| 44 |  | 
|---|
| 45 | def add_def(self, name): | 
|---|
| 46 | self.defs[self.mangle(name)] = 1 | 
|---|
| 47 |  | 
|---|
| 48 | def add_use(self, name): | 
|---|
| 49 | self.uses[self.mangle(name)] = 1 | 
|---|
| 50 |  | 
|---|
| 51 | def add_global(self, name): | 
|---|
| 52 | name = self.mangle(name) | 
|---|
| 53 | if name in self.uses or name in self.defs: | 
|---|
| 54 | pass # XXX warn about global following def/use | 
|---|
| 55 | if name in self.params: | 
|---|
| 56 | raise SyntaxError, "%s in %s is global and parameter" % \ | 
|---|
| 57 | (name, self.name) | 
|---|
| 58 | self.globals[name] = 1 | 
|---|
| 59 | self.module.add_def(name) | 
|---|
| 60 |  | 
|---|
| 61 | def add_param(self, name): | 
|---|
| 62 | name = self.mangle(name) | 
|---|
| 63 | self.defs[name] = 1 | 
|---|
| 64 | self.params[name] = 1 | 
|---|
| 65 |  | 
|---|
| 66 | def get_names(self): | 
|---|
| 67 | d = {} | 
|---|
| 68 | d.update(self.defs) | 
|---|
| 69 | d.update(self.uses) | 
|---|
| 70 | d.update(self.globals) | 
|---|
| 71 | return d.keys() | 
|---|
| 72 |  | 
|---|
| 73 | def add_child(self, child): | 
|---|
| 74 | self.children.append(child) | 
|---|
| 75 |  | 
|---|
| 76 | def get_children(self): | 
|---|
| 77 | return self.children | 
|---|
| 78 |  | 
|---|
| 79 | def DEBUG(self): | 
|---|
| 80 | print >> sys.stderr, self.name, self.nested and "nested" or "" | 
|---|
| 81 | print >> sys.stderr, "\tglobals: ", self.globals | 
|---|
| 82 | print >> sys.stderr, "\tcells: ", self.cells | 
|---|
| 83 | print >> sys.stderr, "\tdefs: ", self.defs | 
|---|
| 84 | print >> sys.stderr, "\tuses: ", self.uses | 
|---|
| 85 | print >> sys.stderr, "\tfrees:", self.frees | 
|---|
| 86 |  | 
|---|
| 87 | def check_name(self, name): | 
|---|
| 88 | """Return scope of name. | 
|---|
| 89 |  | 
|---|
| 90 | The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. | 
|---|
| 91 | """ | 
|---|
| 92 | if name in self.globals: | 
|---|
| 93 | return SC_GLOBAL_EXPLICIT | 
|---|
| 94 | if name in self.cells: | 
|---|
| 95 | return SC_CELL | 
|---|
| 96 | if name in self.defs: | 
|---|
| 97 | return SC_LOCAL | 
|---|
| 98 | if self.nested and (name in self.frees or name in self.uses): | 
|---|
| 99 | return SC_FREE | 
|---|
| 100 | if self.nested: | 
|---|
| 101 | return SC_UNKNOWN | 
|---|
| 102 | else: | 
|---|
| 103 | return SC_GLOBAL_IMPLICIT | 
|---|
| 104 |  | 
|---|
| 105 | def get_free_vars(self): | 
|---|
| 106 | if not self.nested: | 
|---|
| 107 | return () | 
|---|
| 108 | free = {} | 
|---|
| 109 | free.update(self.frees) | 
|---|
| 110 | for name in self.uses.keys(): | 
|---|
| 111 | if name not in self.defs and name not in self.globals: | 
|---|
| 112 | free[name] = 1 | 
|---|
| 113 | return free.keys() | 
|---|
| 114 |  | 
|---|
| 115 | def handle_children(self): | 
|---|
| 116 | for child in self.children: | 
|---|
| 117 | frees = child.get_free_vars() | 
|---|
| 118 | globals = self.add_frees(frees) | 
|---|
| 119 | for name in globals: | 
|---|
| 120 | child.force_global(name) | 
|---|
| 121 |  | 
|---|
| 122 | def force_global(self, name): | 
|---|
| 123 | """Force name to be global in scope. | 
|---|
| 124 |  | 
|---|
| 125 | Some child of the current node had a free reference to name. | 
|---|
| 126 | When the child was processed, it was labelled a free | 
|---|
| 127 | variable.  Now that all its enclosing scope have been | 
|---|
| 128 | processed, the name is known to be a global or builtin.  So | 
|---|
| 129 | walk back down the child chain and set the name to be global | 
|---|
| 130 | rather than free. | 
|---|
| 131 |  | 
|---|
| 132 | Be careful to stop if a child does not think the name is | 
|---|
| 133 | free. | 
|---|
| 134 | """ | 
|---|
| 135 | self.globals[name] = 1 | 
|---|
| 136 | if name in self.frees: | 
|---|
| 137 | del self.frees[name] | 
|---|
| 138 | for child in self.children: | 
|---|
| 139 | if child.check_name(name) == SC_FREE: | 
|---|
| 140 | child.force_global(name) | 
|---|
| 141 |  | 
|---|
| 142 | def add_frees(self, names): | 
|---|
| 143 | """Process list of free vars from nested scope. | 
|---|
| 144 |  | 
|---|
| 145 | Returns a list of names that are either 1) declared global in the | 
|---|
| 146 | parent or 2) undefined in a top-level parent.  In either case, | 
|---|
| 147 | the nested scope should treat them as globals. | 
|---|
| 148 | """ | 
|---|
| 149 | child_globals = [] | 
|---|
| 150 | for name in names: | 
|---|
| 151 | sc = self.check_name(name) | 
|---|
| 152 | if self.nested: | 
|---|
| 153 | if sc == SC_UNKNOWN or sc == SC_FREE \ | 
|---|
| 154 | or isinstance(self, ClassScope): | 
|---|
| 155 | self.frees[name] = 1 | 
|---|
| 156 | elif sc == SC_GLOBAL_IMPLICIT: | 
|---|
| 157 | child_globals.append(name) | 
|---|
| 158 | elif isinstance(self, FunctionScope) and sc == SC_LOCAL: | 
|---|
| 159 | self.cells[name] = 1 | 
|---|
| 160 | elif sc != SC_CELL: | 
|---|
| 161 | child_globals.append(name) | 
|---|
| 162 | else: | 
|---|
| 163 | if sc == SC_LOCAL: | 
|---|
| 164 | self.cells[name] = 1 | 
|---|
| 165 | elif sc != SC_CELL: | 
|---|
| 166 | child_globals.append(name) | 
|---|
| 167 | return child_globals | 
|---|
| 168 |  | 
|---|
| 169 | def get_cell_vars(self): | 
|---|
| 170 | return self.cells.keys() | 
|---|
| 171 |  | 
|---|
| 172 | class ModuleScope(Scope): | 
|---|
| 173 | __super_init = Scope.__init__ | 
|---|
| 174 |  | 
|---|
| 175 | def __init__(self): | 
|---|
| 176 | self.__super_init("global", self) | 
|---|
| 177 |  | 
|---|
| 178 | class FunctionScope(Scope): | 
|---|
| 179 | pass | 
|---|
| 180 |  | 
|---|
| 181 | class GenExprScope(Scope): | 
|---|
| 182 | __super_init = Scope.__init__ | 
|---|
| 183 |  | 
|---|
| 184 | __counter = 1 | 
|---|
| 185 |  | 
|---|
| 186 | def __init__(self, module, klass=None): | 
|---|
| 187 | i = self.__counter | 
|---|
| 188 | self.__counter += 1 | 
|---|
| 189 | self.__super_init("generator expression<%d>"%i, module, klass) | 
|---|
| 190 | self.add_param('.0') | 
|---|
| 191 |  | 
|---|
| 192 | def get_names(self): | 
|---|
| 193 | keys = Scope.get_names(self) | 
|---|
| 194 | return keys | 
|---|
| 195 |  | 
|---|
| 196 | class LambdaScope(FunctionScope): | 
|---|
| 197 | __super_init = Scope.__init__ | 
|---|
| 198 |  | 
|---|
| 199 | __counter = 1 | 
|---|
| 200 |  | 
|---|
| 201 | def __init__(self, module, klass=None): | 
|---|
| 202 | i = self.__counter | 
|---|
| 203 | self.__counter += 1 | 
|---|
| 204 | self.__super_init("lambda.%d" % i, module, klass) | 
|---|
| 205 |  | 
|---|
| 206 | class ClassScope(Scope): | 
|---|
| 207 | __super_init = Scope.__init__ | 
|---|
| 208 |  | 
|---|
| 209 | def __init__(self, name, module): | 
|---|
| 210 | self.__super_init(name, module, name) | 
|---|
| 211 |  | 
|---|
| 212 | class SymbolVisitor: | 
|---|
| 213 | def __init__(self): | 
|---|
| 214 | self.scopes = {} | 
|---|
| 215 | self.klass = None | 
|---|
| 216 |  | 
|---|
| 217 | # node that define new scopes | 
|---|
| 218 |  | 
|---|
| 219 | def visitModule(self, node): | 
|---|
| 220 | scope = self.module = self.scopes[node] = ModuleScope() | 
|---|
| 221 | self.visit(node.node, scope) | 
|---|
| 222 |  | 
|---|
| 223 | visitExpression = visitModule | 
|---|
| 224 |  | 
|---|
| 225 | def visitFunction(self, node, parent): | 
|---|
| 226 | if node.decorators: | 
|---|
| 227 | self.visit(node.decorators, parent) | 
|---|
| 228 | parent.add_def(node.name) | 
|---|
| 229 | for n in node.defaults: | 
|---|
| 230 | self.visit(n, parent) | 
|---|
| 231 | scope = FunctionScope(node.name, self.module, self.klass) | 
|---|
| 232 | if parent.nested or isinstance(parent, FunctionScope): | 
|---|
| 233 | scope.nested = 1 | 
|---|
| 234 | self.scopes[node] = scope | 
|---|
| 235 | self._do_args(scope, node.argnames) | 
|---|
| 236 | self.visit(node.code, scope) | 
|---|
| 237 | self.handle_free_vars(scope, parent) | 
|---|
| 238 |  | 
|---|
| 239 | def visitGenExpr(self, node, parent): | 
|---|
| 240 | scope = GenExprScope(self.module, self.klass); | 
|---|
| 241 | if parent.nested or isinstance(parent, FunctionScope) \ | 
|---|
| 242 | or isinstance(parent, GenExprScope): | 
|---|
| 243 | scope.nested = 1 | 
|---|
| 244 |  | 
|---|
| 245 | self.scopes[node] = scope | 
|---|
| 246 | self.visit(node.code, scope) | 
|---|
| 247 |  | 
|---|
| 248 | self.handle_free_vars(scope, parent) | 
|---|
| 249 |  | 
|---|
| 250 | def visitGenExprInner(self, node, scope): | 
|---|
| 251 | for genfor in node.quals: | 
|---|
| 252 | self.visit(genfor, scope) | 
|---|
| 253 |  | 
|---|
| 254 | self.visit(node.expr, scope) | 
|---|
| 255 |  | 
|---|
| 256 | def visitGenExprFor(self, node, scope): | 
|---|
| 257 | self.visit(node.assign, scope, 1) | 
|---|
| 258 | self.visit(node.iter, scope) | 
|---|
| 259 | for if_ in node.ifs: | 
|---|
| 260 | self.visit(if_, scope) | 
|---|
| 261 |  | 
|---|
| 262 | def visitGenExprIf(self, node, scope): | 
|---|
| 263 | self.visit(node.test, scope) | 
|---|
| 264 |  | 
|---|
| 265 | def visitLambda(self, node, parent, assign=0): | 
|---|
| 266 | # Lambda is an expression, so it could appear in an expression | 
|---|
| 267 | # context where assign is passed.  The transformer should catch | 
|---|
| 268 | # any code that has a lambda on the left-hand side. | 
|---|
| 269 | assert not assign | 
|---|
| 270 |  | 
|---|
| 271 | for n in node.defaults: | 
|---|
| 272 | self.visit(n, parent) | 
|---|
| 273 | scope = LambdaScope(self.module, self.klass) | 
|---|
| 274 | if parent.nested or isinstance(parent, FunctionScope): | 
|---|
| 275 | scope.nested = 1 | 
|---|
| 276 | self.scopes[node] = scope | 
|---|
| 277 | self._do_args(scope, node.argnames) | 
|---|
| 278 | self.visit(node.code, scope) | 
|---|
| 279 | self.handle_free_vars(scope, parent) | 
|---|
| 280 |  | 
|---|
| 281 | def _do_args(self, scope, args): | 
|---|
| 282 | for name in args: | 
|---|
| 283 | if type(name) == types.TupleType: | 
|---|
| 284 | self._do_args(scope, name) | 
|---|
| 285 | else: | 
|---|
| 286 | scope.add_param(name) | 
|---|
| 287 |  | 
|---|
| 288 | def handle_free_vars(self, scope, parent): | 
|---|
| 289 | parent.add_child(scope) | 
|---|
| 290 | scope.handle_children() | 
|---|
| 291 |  | 
|---|
| 292 | def visitClass(self, node, parent): | 
|---|
| 293 | parent.add_def(node.name) | 
|---|
| 294 | for n in node.bases: | 
|---|
| 295 | self.visit(n, parent) | 
|---|
| 296 | scope = ClassScope(node.name, self.module) | 
|---|
| 297 | if parent.nested or isinstance(parent, FunctionScope): | 
|---|
| 298 | scope.nested = 1 | 
|---|
| 299 | if node.doc is not None: | 
|---|
| 300 | scope.add_def('__doc__') | 
|---|
| 301 | scope.add_def('__module__') | 
|---|
| 302 | self.scopes[node] = scope | 
|---|
| 303 | prev = self.klass | 
|---|
| 304 | self.klass = node.name | 
|---|
| 305 | self.visit(node.code, scope) | 
|---|
| 306 | self.klass = prev | 
|---|
| 307 | self.handle_free_vars(scope, parent) | 
|---|
| 308 |  | 
|---|
| 309 | # name can be a def or a use | 
|---|
| 310 |  | 
|---|
| 311 | # XXX a few calls and nodes expect a third "assign" arg that is | 
|---|
| 312 | # true if the name is being used as an assignment.  only | 
|---|
| 313 | # expressions contained within statements may have the assign arg. | 
|---|
| 314 |  | 
|---|
| 315 | def visitName(self, node, scope, assign=0): | 
|---|
| 316 | if assign: | 
|---|
| 317 | scope.add_def(node.name) | 
|---|
| 318 | else: | 
|---|
| 319 | scope.add_use(node.name) | 
|---|
| 320 |  | 
|---|
| 321 | # operations that bind new names | 
|---|
| 322 |  | 
|---|
| 323 | def visitFor(self, node, scope): | 
|---|
| 324 | self.visit(node.assign, scope, 1) | 
|---|
| 325 | self.visit(node.list, scope) | 
|---|
| 326 | self.visit(node.body, scope) | 
|---|
| 327 | if node.else_: | 
|---|
| 328 | self.visit(node.else_, scope) | 
|---|
| 329 |  | 
|---|
| 330 | def visitFrom(self, node, scope): | 
|---|
| 331 | for name, asname in node.names: | 
|---|
| 332 | if name == "*": | 
|---|
| 333 | continue | 
|---|
| 334 | scope.add_def(asname or name) | 
|---|
| 335 |  | 
|---|
| 336 | def visitImport(self, node, scope): | 
|---|
| 337 | for name, asname in node.names: | 
|---|
| 338 | i = name.find(".") | 
|---|
| 339 | if i > -1: | 
|---|
| 340 | name = name[:i] | 
|---|
| 341 | scope.add_def(asname or name) | 
|---|
| 342 |  | 
|---|
| 343 | def visitGlobal(self, node, scope): | 
|---|
| 344 | for name in node.names: | 
|---|
| 345 | scope.add_global(name) | 
|---|
| 346 |  | 
|---|
| 347 | def visitAssign(self, node, scope): | 
|---|
| 348 | """Propagate assignment flag down to child nodes. | 
|---|
| 349 |  | 
|---|
| 350 | The Assign node doesn't itself contains the variables being | 
|---|
| 351 | assigned to.  Instead, the children in node.nodes are visited | 
|---|
| 352 | with the assign flag set to true.  When the names occur in | 
|---|
| 353 | those nodes, they are marked as defs. | 
|---|
| 354 |  | 
|---|
| 355 | Some names that occur in an assignment target are not bound by | 
|---|
| 356 | the assignment, e.g. a name occurring inside a slice.  The | 
|---|
| 357 | visitor handles these nodes specially; they do not propagate | 
|---|
| 358 | the assign flag to their children. | 
|---|
| 359 | """ | 
|---|
| 360 | for n in node.nodes: | 
|---|
| 361 | self.visit(n, scope, 1) | 
|---|
| 362 | self.visit(node.expr, scope) | 
|---|
| 363 |  | 
|---|
| 364 | def visitAssName(self, node, scope, assign=1): | 
|---|
| 365 | scope.add_def(node.name) | 
|---|
| 366 |  | 
|---|
| 367 | def visitAssAttr(self, node, scope, assign=0): | 
|---|
| 368 | self.visit(node.expr, scope, 0) | 
|---|
| 369 |  | 
|---|
| 370 | def visitSubscript(self, node, scope, assign=0): | 
|---|
| 371 | self.visit(node.expr, scope, 0) | 
|---|
| 372 | for n in node.subs: | 
|---|
| 373 | self.visit(n, scope, 0) | 
|---|
| 374 |  | 
|---|
| 375 | def visitSlice(self, node, scope, assign=0): | 
|---|
| 376 | self.visit(node.expr, scope, 0) | 
|---|
| 377 | if node.lower: | 
|---|
| 378 | self.visit(node.lower, scope, 0) | 
|---|
| 379 | if node.upper: | 
|---|
| 380 | self.visit(node.upper, scope, 0) | 
|---|
| 381 |  | 
|---|
| 382 | def visitAugAssign(self, node, scope): | 
|---|
| 383 | # If the LHS is a name, then this counts as assignment. | 
|---|
| 384 | # Otherwise, it's just use. | 
|---|
| 385 | self.visit(node.node, scope) | 
|---|
| 386 | if isinstance(node.node, ast.Name): | 
|---|
| 387 | self.visit(node.node, scope, 1) # XXX worry about this | 
|---|
| 388 | self.visit(node.expr, scope) | 
|---|
| 389 |  | 
|---|
| 390 | # prune if statements if tests are false | 
|---|
| 391 |  | 
|---|
| 392 | _const_types = types.StringType, types.IntType, types.FloatType | 
|---|
| 393 |  | 
|---|
| 394 | def visitIf(self, node, scope): | 
|---|
| 395 | for test, body in node.tests: | 
|---|
| 396 | if isinstance(test, ast.Const): | 
|---|
| 397 | if type(test.value) in self._const_types: | 
|---|
| 398 | if not test.value: | 
|---|
| 399 | continue | 
|---|
| 400 | self.visit(test, scope) | 
|---|
| 401 | self.visit(body, scope) | 
|---|
| 402 | if node.else_: | 
|---|
| 403 | self.visit(node.else_, scope) | 
|---|
| 404 |  | 
|---|
| 405 | # a yield statement signals a generator | 
|---|
| 406 |  | 
|---|
| 407 | def visitYield(self, node, scope): | 
|---|
| 408 | scope.generator = 1 | 
|---|
| 409 | self.visit(node.value, scope) | 
|---|
| 410 |  | 
|---|
| 411 | def list_eq(l1, l2): | 
|---|
| 412 | return sorted(l1) == sorted(l2) | 
|---|
| 413 |  | 
|---|
| 414 | if __name__ == "__main__": | 
|---|
| 415 | import sys | 
|---|
| 416 | from compiler import parseFile, walk | 
|---|
| 417 | import symtable | 
|---|
| 418 |  | 
|---|
| 419 | def get_names(syms): | 
|---|
| 420 | return [s for s in [s.get_name() for s in syms.get_symbols()] | 
|---|
| 421 | if not (s.startswith('_[') or s.startswith('.'))] | 
|---|
| 422 |  | 
|---|
| 423 | for file in sys.argv[1:]: | 
|---|
| 424 | print file | 
|---|
| 425 | f = open(file) | 
|---|
| 426 | buf = f.read() | 
|---|
| 427 | f.close() | 
|---|
| 428 | syms = symtable.symtable(buf, file, "exec") | 
|---|
| 429 | mod_names = get_names(syms) | 
|---|
| 430 | tree = parseFile(file) | 
|---|
| 431 | s = SymbolVisitor() | 
|---|
| 432 | walk(tree, s) | 
|---|
| 433 |  | 
|---|
| 434 | # compare module-level symbols | 
|---|
| 435 | names2 = s.scopes[tree].get_names() | 
|---|
| 436 |  | 
|---|
| 437 | if not list_eq(mod_names, names2): | 
|---|
| 438 | print | 
|---|
| 439 | print "oops", file | 
|---|
| 440 | print sorted(mod_names) | 
|---|
| 441 | print sorted(names2) | 
|---|
| 442 | sys.exit(-1) | 
|---|
| 443 |  | 
|---|
| 444 | d = {} | 
|---|
| 445 | d.update(s.scopes) | 
|---|
| 446 | del d[tree] | 
|---|
| 447 | scopes = d.values() | 
|---|
| 448 | del d | 
|---|
| 449 |  | 
|---|
| 450 | for s in syms.get_symbols(): | 
|---|
| 451 | if s.is_namespace(): | 
|---|
| 452 | l = [sc for sc in scopes | 
|---|
| 453 | if sc.name == s.get_name()] | 
|---|
| 454 | if len(l) > 1: | 
|---|
| 455 | print "skipping", s.get_name() | 
|---|
| 456 | else: | 
|---|
| 457 | if not list_eq(get_names(s.get_namespace()), | 
|---|
| 458 | l[0].get_names()): | 
|---|
| 459 | print s.get_name() | 
|---|
| 460 | print sorted(get_names(s.get_namespace())) | 
|---|
| 461 | print sorted(l[0].get_names()) | 
|---|
| 462 | sys.exit(-1) | 
|---|