| 1 | """Simple code to extract class & function docstrings from a module. | 
|---|
| 2 |  | 
|---|
| 3 | This code is used as an example in the library reference manual in the | 
|---|
| 4 | section on using the parser module.  Refer to the manual for a thorough | 
|---|
| 5 | discussion of the operation of this code. | 
|---|
| 6 | """ | 
|---|
| 7 |  | 
|---|
| 8 | import os | 
|---|
| 9 | import parser | 
|---|
| 10 | import symbol | 
|---|
| 11 | import token | 
|---|
| 12 | import types | 
|---|
| 13 |  | 
|---|
| 14 | from types import ListType, TupleType | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | def get_docs(fileName): | 
|---|
| 18 | """Retrieve information from the parse tree of a source file. | 
|---|
| 19 |  | 
|---|
| 20 | fileName | 
|---|
| 21 | Name of the file to read Python source code from. | 
|---|
| 22 | """ | 
|---|
| 23 | source = open(fileName).read() | 
|---|
| 24 | basename = os.path.basename(os.path.splitext(fileName)[0]) | 
|---|
| 25 | ast = parser.suite(source) | 
|---|
| 26 | return ModuleInfo(ast.totuple(), basename) | 
|---|
| 27 |  | 
|---|
| 28 |  | 
|---|
| 29 | class SuiteInfoBase: | 
|---|
| 30 | _docstring = '' | 
|---|
| 31 | _name = '' | 
|---|
| 32 |  | 
|---|
| 33 | def __init__(self, tree = None): | 
|---|
| 34 | self._class_info = {} | 
|---|
| 35 | self._function_info = {} | 
|---|
| 36 | if tree: | 
|---|
| 37 | self._extract_info(tree) | 
|---|
| 38 |  | 
|---|
| 39 | def _extract_info(self, tree): | 
|---|
| 40 | # extract docstring | 
|---|
| 41 | if len(tree) == 2: | 
|---|
| 42 | found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) | 
|---|
| 43 | else: | 
|---|
| 44 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) | 
|---|
| 45 | if found: | 
|---|
| 46 | self._docstring = eval(vars['docstring']) | 
|---|
| 47 | # discover inner definitions | 
|---|
| 48 | for node in tree[1:]: | 
|---|
| 49 | found, vars = match(COMPOUND_STMT_PATTERN, node) | 
|---|
| 50 | if found: | 
|---|
| 51 | cstmt = vars['compound'] | 
|---|
| 52 | if cstmt[0] == symbol.funcdef: | 
|---|
| 53 | name = cstmt[2][1] | 
|---|
| 54 | self._function_info[name] = FunctionInfo(cstmt) | 
|---|
| 55 | elif cstmt[0] == symbol.classdef: | 
|---|
| 56 | name = cstmt[2][1] | 
|---|
| 57 | self._class_info[name] = ClassInfo(cstmt) | 
|---|
| 58 |  | 
|---|
| 59 | def get_docstring(self): | 
|---|
| 60 | return self._docstring | 
|---|
| 61 |  | 
|---|
| 62 | def get_name(self): | 
|---|
| 63 | return self._name | 
|---|
| 64 |  | 
|---|
| 65 | def get_class_names(self): | 
|---|
| 66 | return self._class_info.keys() | 
|---|
| 67 |  | 
|---|
| 68 | def get_class_info(self, name): | 
|---|
| 69 | return self._class_info[name] | 
|---|
| 70 |  | 
|---|
| 71 | def __getitem__(self, name): | 
|---|
| 72 | try: | 
|---|
| 73 | return self._class_info[name] | 
|---|
| 74 | except KeyError: | 
|---|
| 75 | return self._function_info[name] | 
|---|
| 76 |  | 
|---|
| 77 |  | 
|---|
| 78 | class SuiteFuncInfo: | 
|---|
| 79 | #  Mixin class providing access to function names and info. | 
|---|
| 80 |  | 
|---|
| 81 | def get_function_names(self): | 
|---|
| 82 | return self._function_info.keys() | 
|---|
| 83 |  | 
|---|
| 84 | def get_function_info(self, name): | 
|---|
| 85 | return self._function_info[name] | 
|---|
| 86 |  | 
|---|
| 87 |  | 
|---|
| 88 | class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): | 
|---|
| 89 | def __init__(self, tree = None): | 
|---|
| 90 | self._name = tree[2][1] | 
|---|
| 91 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) | 
|---|
| 92 |  | 
|---|
| 93 |  | 
|---|
| 94 | class ClassInfo(SuiteInfoBase): | 
|---|
| 95 | def __init__(self, tree = None): | 
|---|
| 96 | self._name = tree[2][1] | 
|---|
| 97 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) | 
|---|
| 98 |  | 
|---|
| 99 | def get_method_names(self): | 
|---|
| 100 | return self._function_info.keys() | 
|---|
| 101 |  | 
|---|
| 102 | def get_method_info(self, name): | 
|---|
| 103 | return self._function_info[name] | 
|---|
| 104 |  | 
|---|
| 105 |  | 
|---|
| 106 | class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): | 
|---|
| 107 | def __init__(self, tree = None, name = "<string>"): | 
|---|
| 108 | self._name = name | 
|---|
| 109 | SuiteInfoBase.__init__(self, tree) | 
|---|
| 110 | if tree: | 
|---|
| 111 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) | 
|---|
| 112 | if found: | 
|---|
| 113 | self._docstring = vars["docstring"] | 
|---|
| 114 |  | 
|---|
| 115 |  | 
|---|
| 116 | def match(pattern, data, vars=None): | 
|---|
| 117 | """Match `data' to `pattern', with variable extraction. | 
|---|
| 118 |  | 
|---|
| 119 | pattern | 
|---|
| 120 | Pattern to match against, possibly containing variables. | 
|---|
| 121 |  | 
|---|
| 122 | data | 
|---|
| 123 | Data to be checked and against which variables are extracted. | 
|---|
| 124 |  | 
|---|
| 125 | vars | 
|---|
| 126 | Dictionary of variables which have already been found.  If not | 
|---|
| 127 | provided, an empty dictionary is created. | 
|---|
| 128 |  | 
|---|
| 129 | The `pattern' value may contain variables of the form ['varname'] which | 
|---|
| 130 | are allowed to match anything.  The value that is matched is returned as | 
|---|
| 131 | part of a dictionary which maps 'varname' to the matched value.  'varname' | 
|---|
| 132 | is not required to be a string object, but using strings makes patterns | 
|---|
| 133 | and the code which uses them more readable. | 
|---|
| 134 |  | 
|---|
| 135 | This function returns two values: a boolean indicating whether a match | 
|---|
| 136 | was found and a dictionary mapping variable names to their associated | 
|---|
| 137 | values. | 
|---|
| 138 | """ | 
|---|
| 139 | if vars is None: | 
|---|
| 140 | vars = {} | 
|---|
| 141 | if type(pattern) is ListType:       # 'variables' are ['varname'] | 
|---|
| 142 | vars[pattern[0]] = data | 
|---|
| 143 | return 1, vars | 
|---|
| 144 | if type(pattern) is not TupleType: | 
|---|
| 145 | return (pattern == data), vars | 
|---|
| 146 | if len(data) != len(pattern): | 
|---|
| 147 | return 0, vars | 
|---|
| 148 | for pattern, data in map(None, pattern, data): | 
|---|
| 149 | same, vars = match(pattern, data, vars) | 
|---|
| 150 | if not same: | 
|---|
| 151 | break | 
|---|
| 152 | return same, vars | 
|---|
| 153 |  | 
|---|
| 154 |  | 
|---|
| 155 | #  This pattern identifies compound statements, allowing them to be readily | 
|---|
| 156 | #  differentiated from simple statements. | 
|---|
| 157 | # | 
|---|
| 158 | COMPOUND_STMT_PATTERN = ( | 
|---|
| 159 | symbol.stmt, | 
|---|
| 160 | (symbol.compound_stmt, ['compound']) | 
|---|
| 161 | ) | 
|---|
| 162 |  | 
|---|
| 163 |  | 
|---|
| 164 | #  This pattern will match a 'stmt' node which *might* represent a docstring; | 
|---|
| 165 | #  docstrings require that the statement which provides the docstring be the | 
|---|
| 166 | #  first statement in the class or function, which this pattern does not check. | 
|---|
| 167 | # | 
|---|
| 168 | DOCSTRING_STMT_PATTERN = ( | 
|---|
| 169 | symbol.stmt, | 
|---|
| 170 | (symbol.simple_stmt, | 
|---|
| 171 | (symbol.small_stmt, | 
|---|
| 172 | (symbol.expr_stmt, | 
|---|
| 173 | (symbol.testlist, | 
|---|
| 174 | (symbol.test, | 
|---|
| 175 | (symbol.and_test, | 
|---|
| 176 | (symbol.not_test, | 
|---|
| 177 | (symbol.comparison, | 
|---|
| 178 | (symbol.expr, | 
|---|
| 179 | (symbol.xor_expr, | 
|---|
| 180 | (symbol.and_expr, | 
|---|
| 181 | (symbol.shift_expr, | 
|---|
| 182 | (symbol.arith_expr, | 
|---|
| 183 | (symbol.term, | 
|---|
| 184 | (symbol.factor, | 
|---|
| 185 | (symbol.power, | 
|---|
| 186 | (symbol.atom, | 
|---|
| 187 | (token.STRING, ['docstring']) | 
|---|
| 188 | )))))))))))))))), | 
|---|
| 189 | (token.NEWLINE, '') | 
|---|
| 190 | )) | 
|---|