Package cheesecake :: Module model
[hide private]
[frames] | no frames]

Source Code for Module cheesecake.model

  1  """ 
  2  Code borrowed from Michael Hudson's docextractor package with the author's 
  3  permission. 
  4   
  5  The original code is available at http://codespeak.net/svn/user/mwh/docextractor/. 
  6   
  7  Changes: 
  8    * do not print warnings to stdout (in System.warning) 
  9    * collect all function calls 
 10  """ 
 11   
 12   
 13  from compiler import ast 
 14  import sys 
 15  import os 
 16  import cPickle as pickle 
 17  import __builtin__ 
 18  import sets 
 19   
 20  from compiler.transformer import parse, parseFile 
 21  from compiler.visitor import walk 
 22   
 23  import ast_pp 
 24   
 25   
26 -def get_call_name(node):
27 assert isinstance(node, ast.CallFunc) 28
29 - def get_name(node):
30 if isinstance(node, ast.CallFunc): 31 return None 32 elif isinstance(node, ast.Name): 33 return node.name 34 elif isinstance(node, str): 35 return node 36 elif isinstance(node, tuple): 37 if len(node) == 1: 38 return node[0] 39 else: 40 return "%s.%s" % (get_name(node[:-1][0]), node[-1]) 41 elif isinstance(node, ast.Getattr): 42 return get_name(node.asList()) 43 else: 44 raise TypeError("Bad function name type: %s." % node)
45 46 return get_name(node.node)
47
48 -def get_function_calls(node, fc):
49 if not isinstance(node, ast.Node): 50 return 51 52 for child in node.getChildren(): 53 if isinstance(child, ast.CallFunc): 54 func_called = get_call_name(child) 55 fc[func_called] = 1 56 57 get_function_calls(child, fc)
58 59
60 -class Documentable(object):
61 - def __init__(self, system, prefix, name, docstring, parent=None):
62 self.system = system 63 self.prefix = prefix 64 self.name = name 65 self.docstring = docstring 66 self.parent = parent 67 self.setup()
68 - def setup(self):
69 self.contents = {} 70 self.orderedcontents = [] 71 self._name2fullname = {}
72 - def fullName(self):
73 return self.prefix + self.name
74 - def shortdocstring(self):
75 docstring = self.docstring 76 if docstring: 77 docstring = docstring.rstrip() 78 if len(docstring) > 20: 79 docstring = docstring[:8] + '...' + docstring[-8:] 80 return docstring
81 - def __repr__(self):
82 return "%s %r"%(self.__class__.__name__, self.fullName())
83 - def name2fullname(self, name):
84 if name in self._name2fullname: 85 return self._name2fullname[name] 86 else: 87 return self.parent.name2fullname(name)
88
89 - def resolveDottedName(self, dottedname, verbose=False):
90 parts = dottedname.split('.') 91 obj = self 92 system = self.system 93 while parts[0] not in obj._name2fullname: 94 obj = obj.parent 95 if obj is None: 96 if parts[0] in system.allobjects: 97 obj = system.allobjects[parts[0]] 98 break 99 for othersys in system.moresystems: 100 if parts[0] in othersys.allobjects: 101 obj = othersys.allobjects[parts[0]] 102 break 103 else: 104 if verbose: 105 print "1 didn't find %r from %r"%(dottedname, 106 self.fullName()) 107 return None 108 break 109 else: 110 fn = obj._name2fullname[parts[0]] 111 if fn in system.allobjects: 112 obj = system.allobjects[fn] 113 else: 114 if verbose: 115 print "1.5 didn't find %r from %r"%(dottedname, 116 self.fullName()) 117 return None 118 for p in parts[1:]: 119 if p not in obj.contents: 120 if verbose: 121 print "2 didn't find %r from %r"%(dottedname, 122 self.fullName()) 123 return None 124 obj = obj.contents[p] 125 if verbose: 126 print dottedname, '->', obj.fullName(), 'in', self.fullName() 127 return obj
128
129 - def dottedNameToFullName(self, dottedname):
130 if '.' not in dottedname: 131 start, rest = dottedname, '' 132 else: 133 start, rest = dottedname.split('.', 1) 134 rest = '.' + rest 135 obj = self 136 while start not in obj._name2fullname: 137 obj = obj.parent 138 if obj is None: 139 return dottedname 140 return obj._name2fullname[start] + rest
141
142 - def __getstate__(self):
143 # this is so very, very evil. 144 # see doc/extreme-pickling-pain.txt for more. 145 r = {} 146 for k, v in self.__dict__.iteritems(): 147 if isinstance(v, Documentable): 148 r['$'+k] = v.fullName() 149 elif isinstance(v, list) and v: 150 for vv in v: 151 if vv is not None and not isinstance(vv, Documentable): 152 r[k] = v 153 break 154 else: 155 rr = [] 156 for vv in v: 157 if vv is None: 158 rr.append(vv) 159 else: 160 rr.append(vv.fullName()) 161 r['@'+k] = rr 162 elif isinstance(v, dict) and v: 163 for vv in v.itervalues(): 164 if not isinstance(vv, Documentable): 165 r[k] = v 166 break 167 else: 168 rr = {} 169 for kk, vv in v.iteritems(): 170 rr[kk] = vv.fullName() 171 r['!'+k] = rr 172 else: 173 r[k] = v 174 return r
175
176 -class Package(Documentable):
177 kind = "Package"
178 - def name2fullname(self, name):
179 raise NameError
180 181
182 -class Module(Documentable):
183 kind = "Module"
184 - def name2fullname(self, name):
185 if name in self._name2fullname: 186 return self._name2fullname[name] 187 elif name in __builtin__.__dict__: 188 return name 189 else: 190 self.system.warning("optimistic name resolution", name) 191 return name
192 193
194 -class Class(Documentable):
195 kind = "Class"
196 - def setup(self):
197 super(Class, self).setup() 198 self.bases = [] 199 self.rawbases = [] 200 self.baseobjects = [] 201 self.subclasses = []
202 203
204 -class Function(Documentable):
205 kind = "Function"
206 207
208 -class ModuleVistor(object):
209 - def __init__(self, system, modname):
210 self.system = system 211 self.modname = modname 212 self.morenodes = []
213
214 - def default(self, node):
215 for child in node.getChildNodes(): 216 self.visit(child)
217
218 - def postpone(self, docable, node):
219 self.morenodes.append((docable, node))
220
221 - def visitModule(self, node):
222 if self.system.current and self.modname in self.system.current.contents: 223 m = self.system.current.contents[self.modname] 224 assert m.docstring is None 225 m.docstring = node.doc 226 self.system.push(m, node) 227 self.default(node) 228 self.system.pop(m) 229 else: 230 if not self.system.current: 231 roots = [x for x in self.system.rootobjects if x.name == self.modname] 232 if roots: 233 mod, = roots 234 self.system.push(mod, node) 235 self.default(node) 236 self.system.pop(mod) 237 return 238 self.system.pushModule(self.modname, node.doc) 239 self.default(node) 240 self.system.popModule()
241
242 - def visitClass(self, node):
243 cls = self.system.pushClass(node.name, node.doc) 244 if node.lineno is not None: 245 cls.linenumber = node.lineno 246 for n in node.bases: 247 str_base = ast_pp.pp(n) 248 cls.rawbases.append(str_base) 249 base = cls.dottedNameToFullName(str_base) 250 cls.bases.append(base) 251 self.default(node) 252 self.system.popClass()
253
254 - def visitFrom(self, node):
255 modname = expandModname(self.system, node.modname) 256 name2fullname = self.system.current._name2fullname 257 for fromname, asname in node.names: 258 if fromname == '*': 259 self.system.warning("import *", modname) 260 if modname not in self.system.allobjects: 261 return 262 mod = self.system.allobjects[modname] 263 # this might fail if you have an import-* cycle, or if 264 # you're just not running the import star finder to 265 # save time (not that this is possibly without 266 # commenting stuff out yet, but...) 267 if isinstance(mod, Package): 268 self.system.warning("import * from a package", modname) 269 return 270 if mod.processed: 271 for n in mod.contents: 272 name2fullname[n] = modname + '.' + n 273 else: 274 self.system.warning("unresolvable import *", modname) 275 return 276 if asname is None: 277 asname = fromname 278 name2fullname[asname] = modname + '.' + fromname
279
280 - def visitImport(self, node):
281 name2fullname = self.system.current._name2fullname 282 for fromname, asname in node.names: 283 fullname = expandModname(self.system, fromname) 284 if asname is None: 285 asname = fromname.split('.', 1)[0] 286 # aaaaargh! python sucks. 287 parts = fullname.split('.') 288 for i, part in enumerate(fullname.split('.')[::-1]): 289 if part == asname: 290 fullname = '.'.join(parts[:len(parts)-i]) 291 name2fullname[asname] = fullname 292 break 293 else: 294 name2fullname[asname] = '.'.join(parts) 295 else: 296 name2fullname[asname] = fullname
297
298 - def visitFunction(self, node):
299 fc = {} 300 get_function_calls(node, fc) 301 func = self.system.pushFunction(node.name, node.doc, fc) 302 if node.lineno is not None: 303 func.linenumber = node.lineno 304 # ast.Function has a pretty lame representation of 305 # arguments. Let's convert it to a nice concise format 306 # somewhat like what inspect.getargspec returns 307 argnames = node.argnames[:] 308 kwname = starargname = None 309 if node.kwargs: 310 kwname = argnames.pop(-1) 311 if node.varargs: 312 starargname = argnames.pop(-1) 313 defaults = [] 314 for default in node.defaults: 315 try: 316 defaults.append(ast_pp.pp(default)) 317 except (KeyboardInterrupt, SystemExit): 318 raise 319 except Exception, e: 320 self.system.warning("unparseable default", "%s: %s %r"%(e.__class__.__name__, 321 e, default)) 322 defaults.append('???') 323 # argh, convert unpacked-arguments from tuples to lists, 324 # because that's what getargspec uses and the unit test 325 # compares it 326 argnames2 = [] 327 for argname in argnames: 328 if isinstance(argname, tuple): 329 argname = list(argname) 330 argnames2.append(argname) 331 func.argspec = (argnames2, starargname, kwname, tuple(defaults)) 332 self.postpone(func, node.code) 333 self.system.popFunction()
334 335 states = [ 336 'blank', 337 'preparse', 338 'importstarred', 339 'parsed', 340 'finalized', 341 ] 342 343
344 -class System(object):
345 Class = Class 346 Module = Module 347 Package = Package 348 Function = Function 349 ModuleVistor = ModuleVistor 350
351 - def __init__(self):
352 self.current = None 353 self._stack = [] 354 self.allobjects = {} 355 self.orderedallobjects = [] 356 self.rootobjects = [] 357 self.warnings = {} 358 # importstargraph contains edges {importer:[imported]} but only 359 # for import * statements 360 self.importstargraph = {} 361 self.func_called = {} 362 self.state = 'blank' 363 self.packages = [] 364 self.moresystems = [] 365 self.urlprefix = ''
366
367 - def _push(self, cls, name, docstring):
368 if self.current: 369 prefix = self.current.fullName() + '.' 370 parent = self.current 371 else: 372 prefix = '' 373 parent = None 374 obj = cls(self, prefix, name, docstring, parent) 375 if parent: 376 parent.orderedcontents.append(obj) 377 parent.contents[name] = obj 378 parent._name2fullname[name] = obj.fullName() 379 else: 380 self.rootobjects.append(obj) 381 self.current = obj 382 self.orderedallobjects.append(obj) 383 fullName = obj.fullName() 384 #print 'push', cls.__name__, fullName 385 if fullName in self.allobjects: 386 obj = self.handleDuplicate(obj) 387 else: 388 self.allobjects[obj.fullName()] = obj 389 return obj
390
391 - def handleDuplicate(self, obj):
392 '''This is called when we see two objects with the same 393 .fullName(), for example: 394 395 class C: 396 if something: 397 def meth(self): 398 implementation 1 399 else: 400 def meth(self): 401 implementation 2 402 403 The default is that the second definition "wins". 404 ''' 405 i = 0 406 fn = obj.fullName() 407 while (fn + ' ' + str(i)) in self.allobjects: 408 i += 1 409 prev = self.allobjects[obj.fullName()] 410 prev.name = obj.name + ' ' + str(i) 411 self.allobjects[prev.fullName()] = prev 412 self.warning("duplicate", self.allobjects[obj.fullName()]) 413 self.allobjects[obj.fullName()] = obj 414 return obj
415 416
417 - def _pop(self, cls):
418 assert isinstance(self.current, cls) 419 ## if self.current.parent: 420 ## print 'pop', self.current.fullName(), '->', self.current.parent.fullName() 421 ## else: 422 ## print 'pop', self.current.fullName(), '->', self.current.parent 423 self.current = self.current.parent
424
425 - def push(self, obj, node=None):
426 self._stack.append(self.current) 427 self.current = obj
428
429 - def pop(self, obj):
430 assert self.current is obj, "%r is not %r"%(self.current, obj) 431 self.current = self._stack.pop()
432
433 - def pushClass(self, name, docstring):
434 return self._push(self.Class, name, docstring)
435 - def popClass(self):
436 self._pop(self.Class)
437
438 - def pushModule(self, name, docstring):
439 return self._push(self.Module, name, docstring)
440 - def popModule(self):
441 self._pop(self.Module)
442
443 - def pushFunction(self, name, docstring, func_called):
444 self.func_called.update(func_called) 445 return self._push(self.Function, name, docstring)
446 - def popFunction(self):
447 self._pop(self.Function)
448
449 - def pushPackage(self, name, docstring):
450 return self._push(self.Package, name, docstring)
451 - def popPackage(self):
452 self._pop(self.Package)
453
454 - def report(self):
455 for o in self.rootobjects: 456 self._report(o, '')
457
458 - def _report(self, o, indent):
459 print indent, o 460 for o2 in o.orderedcontents: 461 self._report(o2, indent+' ')
462
463 - def resolveAlias(self, n):
464 if '.' not in n: 465 return n 466 mod, clsname = n.split('.') 467 if not mod or mod not in self.allobjects: 468 return n 469 m = self.allobjects[mod] 470 if not isinstance(m, Module): 471 return n 472 if clsname in m._name2fullname: 473 newname = m.name2fullname(clsname) 474 if newname not in self.allobjects: 475 return self.resolveAlias(newname) 476 else: 477 return newname
478
479 - def resolveAliases(self):
480 for ob in self.orderedallobjects: 481 if not isinstance(ob, Class): 482 continue 483 for i, b in enumerate(ob.bases): 484 if b not in self.allobjects: 485 ob.bases[i] = self.resolveAlias(b)
486
487 - def warning(self, type, detail):
488 if self.current is not None: 489 fn = self.current.fullName() 490 else: 491 fn = '<None>' 492 self.warnings.setdefault(type, []).append((fn, detail))
493
494 - def objectsOfType(self, cls):
495 for o in self.orderedallobjects: 496 if isinstance(o, cls): 497 yield o
498
499 - def finalStateComputations(self):
501
502 - def recordBasesAndSubclasses(self):
503 for cls in self.objectsOfType(Class): 504 for n in cls.bases: 505 o = cls.parent.resolveDottedName(n) 506 cls.baseobjects.append(o) 507 if o: 508 o.subclasses.append(cls)
509
510 - def __getstate__(self):
511 state = self.__dict__.copy() 512 del state['moresystems'] 513 return state
514
515 - def __setstate__(self, state):
516 self.moresystems = [] 517 # this is so very, very evil. 518 # see doc/extreme-pickling-pain.txt for more. 519 self.__dict__.update(state) 520 for obj in self.orderedallobjects: 521 for k, v in obj.__dict__.copy().iteritems(): 522 if k.startswith('$'): 523 del obj.__dict__[k] 524 obj.__dict__[k[1:]] = self.allobjects[v] 525 elif k.startswith('@'): 526 n = [] 527 for vv in v: 528 if vv is None: 529 n.append(None) 530 else: 531 n.append(self.allobjects[vv]) 532 del obj.__dict__[k] 533 obj.__dict__[k[1:]] = n 534 elif k.startswith('!'): 535 n = {} 536 for kk, vv in v.iteritems(): 537 n[kk] = self.allobjects[vv] 538 del obj.__dict__[k] 539 obj.__dict__[k[1:]] = n
540 541
542 -def expandModname(system, modname, givewarning=True):
543 c = system.current 544 if '.' in modname: 545 prefix, suffix = modname.split('.', 1) 546 suffix = '.' + suffix 547 else: 548 prefix, suffix = modname, '' 549 while c is not None and not isinstance(c, Package): 550 c = c.parent 551 while c is not None: 552 if prefix in c.contents: 553 break 554 c = c.parent 555 if c is not None: 556 if givewarning: 557 system.warning("local import", modname) 558 return c.contents[prefix].fullName() + suffix 559 else: 560 return prefix + suffix
561
562 -class ImportStarFinder(object):
563 - def __init__(self, system, modfullname):
564 self.system = system 565 self.modfullname = modfullname
566
567 - def visitFrom(self, node):
568 if node.names[0][0] == '*': 569 modname = expandModname(self.system, node.modname, False) 570 self.system.importstargraph.setdefault( 571 self.modfullname, []).append(modname)
572
573 -def processModuleAst(ast, name, system):
574 mv = system.ModuleVistor(system, name) 575 walk(ast, mv) 576 while mv.morenodes: 577 obj, node = mv.morenodes.pop(0) 578 system.push(obj, node) 579 mv.visit(node) 580 system.pop(obj)
581 582
583 -def fromText(src, modname='<test>', system=None):
584 if system is None: 585 _system = System() 586 else: 587 _system = system 588 processModuleAst(parse(src), modname, _system) 589 if system is None: 590 _system.finalStateComputations() 591 return _system.rootobjects[0]
592 593
594 -def preprocessDirectory(system, dirpath):
595 assert system.state in ['blank', 'preparse'] 596 if os.path.basename(dirpath): 597 package = system.pushPackage(os.path.basename(dirpath), None) 598 else: 599 package = None 600 for fname in os.listdir(dirpath): 601 fullname = os.path.join(dirpath, fname) 602 if os.path.isdir(fullname) and os.path.exists(os.path.join(fullname, '__init__.py')) and fname != 'test': 603 preprocessDirectory(system, fullname) 604 elif fname.endswith('.py'): 605 modname = os.path.splitext(fname)[0] 606 mod = system.pushModule(modname, None) 607 mod.filepath = fullname 608 mod.processed = False 609 system.popModule() 610 if package: 611 system.popPackage() 612 system.state = 'preparse'
613
614 -def findImportStars(system):
615 assert system.state in ['preparse'] 616 modlist = list(system.objectsOfType(Module)) 617 for mod in modlist: 618 system.push(mod.parent) 619 isf = ImportStarFinder(system, mod.fullName()) 620 try: 621 ast = parseFile(mod.filepath) 622 except (SyntaxError, ValueError): 623 system.warning("cannot parse", mod.filepath) 624 walk(ast, isf) 625 system.pop(mod.parent) 626 system.state = 'importstarred'
627
628 -def extractDocstrings(system):
629 assert system.state in ['preparse', 'importstarred'] 630 # and so much more... 631 modlist = list(system.objectsOfType(Module)) 632 newlist = toposort([m.fullName() for m in modlist], system.importstargraph) 633 634 for mod in newlist: 635 mod = system.allobjects[mod] 636 system.push(mod.parent) 637 try: 638 ast = parseFile(mod.filepath) 639 except (SyntaxError, ValueError): 640 system.warning("cannot parse", mod.filepath) 641 processModuleAst(ast, mod.name, system) 642 mod.processed = True 643 system.pop(mod.parent) 644 system.state = 'parsed'
645
646 -def finalStateComputations(system):
647 assert system.state in ['parsed'] 648 system.finalStateComputations() 649 system.state = 'finalized'
650
651 -def processDirectory(system, dirpath):
652 preprocessDirectory(system, dirpath) 653 findImportStars(system) 654 extractDocstrings(system) 655 finalStateComputations(system)
656
657 -def toposort(input, edges):
658 # this doesn't detect cycles in any clever way. 659 output = [] 660 input = dict.fromkeys(input)
661 - def p(i):
662 for j in edges.get(i, []): 663 if j in input: 664 del input[j] 665 p(j) 666 output.append(i)
667 while input: 668 p(input.popitem()[0]) 669 return output
670 671
672 -def main(systemcls, argv):
673 if '-r' in argv: 674 argv.remove('-r') 675 assert len(argv) == 1 676 system = systemcls() 677 processDirectory(system, argv[0]) 678 pickle.dump(system, open('da.out', 'wb'), pickle.HIGHEST_PROTOCOL) 679 print 680 print 'warning summary:' 681 for k, v in system.warnings.iteritems(): 682 print k, len(v) 683 else: 684 system = systemcls() 685 for fname in argv: 686 modname = os.path.splitext(os.path.basename(fname))[0] # XXX! 687 processModuleAst(parseFile(fname), modname, system) 688 system.report()
689 690 691 692 if __name__ == '__main__': 693 main(System, sys.argv[1:]) 694