| 1 | #! /usr/bin/env python | 
|---|
| 2 |  | 
|---|
| 3 | # Released to the public domain, by Tim Peters, 03 October 2000. | 
|---|
| 4 |  | 
|---|
| 5 | """reindent [-d][-r][-v] [ path ... ] | 
|---|
| 6 |  | 
|---|
| 7 | -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files. | 
|---|
| 8 | -r (--recurse)  Recurse.   Search for all .py files in subdirectories too. | 
|---|
| 9 | -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. | 
|---|
| 10 | -v (--verbose)  Verbose.   Print informative msgs; else no output. | 
|---|
| 11 | -h (--help)     Help.      Print this usage information and exit. | 
|---|
| 12 |  | 
|---|
| 13 | Change Python (.py) files to use 4-space indents and no hard tab characters. | 
|---|
| 14 | Also trim excess spaces and tabs from ends of lines, and remove empty lines | 
|---|
| 15 | at the end of files.  Also ensure the last line ends with a newline. | 
|---|
| 16 |  | 
|---|
| 17 | If no paths are given on the command line, reindent operates as a filter, | 
|---|
| 18 | reading a single source file from standard input and writing the transformed | 
|---|
| 19 | source to standard output.  In this case, the -d, -r and -v flags are | 
|---|
| 20 | ignored. | 
|---|
| 21 |  | 
|---|
| 22 | You can pass one or more file and/or directory paths.  When a directory | 
|---|
| 23 | path, all .py files within the directory will be examined, and, if the -r | 
|---|
| 24 | option is given, likewise recursively for subdirectories. | 
|---|
| 25 |  | 
|---|
| 26 | If output is not to standard output, reindent overwrites files in place, | 
|---|
| 27 | renaming the originals with a .bak extension.  If it finds nothing to | 
|---|
| 28 | change, the file is left alone.  If reindent does change a file, the changed | 
|---|
| 29 | file is a fixed-point for future runs (i.e., running reindent on the | 
|---|
| 30 | resulting .py file won't change it again). | 
|---|
| 31 |  | 
|---|
| 32 | The hard part of reindenting is figuring out what to do with comment | 
|---|
| 33 | lines.  So long as the input files get a clean bill of health from | 
|---|
| 34 | tabnanny.py, reindent should do a good job. | 
|---|
| 35 |  | 
|---|
| 36 | The backup file is a copy of the one that is being reindented. The ".bak" | 
|---|
| 37 | file is generated with shutil.copy(), but some corner cases regarding | 
|---|
| 38 | user/group and permissions could leave the backup file more readable than | 
|---|
| 39 | you'd prefer. You can always use the --nobackup option to prevent this. | 
|---|
| 40 | """ | 
|---|
| 41 |  | 
|---|
| 42 | __version__ = "1" | 
|---|
| 43 |  | 
|---|
| 44 | import tokenize | 
|---|
| 45 | import os, shutil | 
|---|
| 46 | import sys | 
|---|
| 47 | import io | 
|---|
| 48 |  | 
|---|
| 49 | verbose    = 0 | 
|---|
| 50 | recurse    = 0 | 
|---|
| 51 | dryrun     = 0 | 
|---|
| 52 | makebackup = True | 
|---|
| 53 |  | 
|---|
| 54 | def usage(msg=None): | 
|---|
| 55 | if msg is not None: | 
|---|
| 56 | print >> sys.stderr, msg | 
|---|
| 57 | print >> sys.stderr, __doc__ | 
|---|
| 58 |  | 
|---|
| 59 | def errprint(*args): | 
|---|
| 60 | sep = "" | 
|---|
| 61 | for arg in args: | 
|---|
| 62 | sys.stderr.write(sep + str(arg)) | 
|---|
| 63 | sep = " " | 
|---|
| 64 | sys.stderr.write("\n") | 
|---|
| 65 |  | 
|---|
| 66 | def main(): | 
|---|
| 67 | import getopt | 
|---|
| 68 | global verbose, recurse, dryrun, makebackup | 
|---|
| 69 | try: | 
|---|
| 70 | opts, args = getopt.getopt(sys.argv[1:], "drnvh", | 
|---|
| 71 | ["dryrun", "recurse", "nobackup", "verbose", "help"]) | 
|---|
| 72 | except getopt.error, msg: | 
|---|
| 73 | usage(msg) | 
|---|
| 74 | return | 
|---|
| 75 | for o, a in opts: | 
|---|
| 76 | if o in ('-d', '--dryrun'): | 
|---|
| 77 | dryrun += 1 | 
|---|
| 78 | elif o in ('-r', '--recurse'): | 
|---|
| 79 | recurse += 1 | 
|---|
| 80 | elif o in ('-n', '--nobackup'): | 
|---|
| 81 | makebackup = False | 
|---|
| 82 | elif o in ('-v', '--verbose'): | 
|---|
| 83 | verbose += 1 | 
|---|
| 84 | elif o in ('-h', '--help'): | 
|---|
| 85 | usage() | 
|---|
| 86 | return | 
|---|
| 87 | if not args: | 
|---|
| 88 | r = Reindenter(sys.stdin) | 
|---|
| 89 | r.run() | 
|---|
| 90 | r.write(sys.stdout) | 
|---|
| 91 | return | 
|---|
| 92 | for arg in args: | 
|---|
| 93 | check(arg) | 
|---|
| 94 |  | 
|---|
| 95 | def check(file): | 
|---|
| 96 | if os.path.isdir(file) and not os.path.islink(file): | 
|---|
| 97 | if verbose: | 
|---|
| 98 | print "listing directory", file | 
|---|
| 99 | names = os.listdir(file) | 
|---|
| 100 | for name in names: | 
|---|
| 101 | fullname = os.path.join(file, name) | 
|---|
| 102 | if ((recurse and os.path.isdir(fullname) and | 
|---|
| 103 | not os.path.islink(fullname) and | 
|---|
| 104 | not os.path.split(fullname)[1].startswith(".")) | 
|---|
| 105 | or name.lower().endswith(".py")): | 
|---|
| 106 | check(fullname) | 
|---|
| 107 | return | 
|---|
| 108 |  | 
|---|
| 109 | if verbose: | 
|---|
| 110 | print "checking", file, "...", | 
|---|
| 111 | try: | 
|---|
| 112 | f = io.open(file) | 
|---|
| 113 | except IOError, msg: | 
|---|
| 114 | errprint("%s: I/O Error: %s" % (file, str(msg))) | 
|---|
| 115 | return | 
|---|
| 116 |  | 
|---|
| 117 | r = Reindenter(f) | 
|---|
| 118 | f.close() | 
|---|
| 119 |  | 
|---|
| 120 | newline = r.newlines | 
|---|
| 121 | if isinstance(newline, tuple): | 
|---|
| 122 | errprint("%s: mixed newlines detected; cannot process file" % file) | 
|---|
| 123 | return | 
|---|
| 124 |  | 
|---|
| 125 | if r.run(): | 
|---|
| 126 | if verbose: | 
|---|
| 127 | print "changed." | 
|---|
| 128 | if dryrun: | 
|---|
| 129 | print "But this is a dry run, so leaving it alone." | 
|---|
| 130 | if not dryrun: | 
|---|
| 131 | bak = file + ".bak" | 
|---|
| 132 | if makebackup: | 
|---|
| 133 | shutil.copyfile(file, bak) | 
|---|
| 134 | if verbose: | 
|---|
| 135 | print "backed up", file, "to", bak | 
|---|
| 136 | f = io.open(file, "w", newline=newline) | 
|---|
| 137 | r.write(f) | 
|---|
| 138 | f.close() | 
|---|
| 139 | if verbose: | 
|---|
| 140 | print "wrote new", file | 
|---|
| 141 | return True | 
|---|
| 142 | else: | 
|---|
| 143 | if verbose: | 
|---|
| 144 | print "unchanged." | 
|---|
| 145 | return False | 
|---|
| 146 |  | 
|---|
| 147 | def _rstrip(line, JUNK='\n \t'): | 
|---|
| 148 | """Return line stripped of trailing spaces, tabs, newlines. | 
|---|
| 149 |  | 
|---|
| 150 | Note that line.rstrip() instead also strips sundry control characters, | 
|---|
| 151 | but at least one known Emacs user expects to keep junk like that, not | 
|---|
| 152 | mentioning Barry by name or anything <wink>. | 
|---|
| 153 | """ | 
|---|
| 154 |  | 
|---|
| 155 | i = len(line) | 
|---|
| 156 | while i > 0 and line[i-1] in JUNK: | 
|---|
| 157 | i -= 1 | 
|---|
| 158 | return line[:i] | 
|---|
| 159 |  | 
|---|
| 160 | class Reindenter: | 
|---|
| 161 |  | 
|---|
| 162 | def __init__(self, f): | 
|---|
| 163 | self.find_stmt = 1  # next token begins a fresh stmt? | 
|---|
| 164 | self.level = 0      # current indent level | 
|---|
| 165 |  | 
|---|
| 166 | # Raw file lines. | 
|---|
| 167 | self.raw = f.readlines() | 
|---|
| 168 |  | 
|---|
| 169 | # File lines, rstripped & tab-expanded.  Dummy at start is so | 
|---|
| 170 | # that we can use tokenize's 1-based line numbering easily. | 
|---|
| 171 | # Note that a line is all-blank iff it's "\n". | 
|---|
| 172 | self.lines = [_rstrip(line).expandtabs() + "\n" | 
|---|
| 173 | for line in self.raw] | 
|---|
| 174 | self.lines.insert(0, None) | 
|---|
| 175 | self.index = 1  # index into self.lines of next line | 
|---|
| 176 |  | 
|---|
| 177 | # List of (lineno, indentlevel) pairs, one for each stmt and | 
|---|
| 178 | # comment line.  indentlevel is -1 for comment lines, as a | 
|---|
| 179 | # signal that tokenize doesn't know what to do about them; | 
|---|
| 180 | # indeed, they're our headache! | 
|---|
| 181 | self.stats = [] | 
|---|
| 182 |  | 
|---|
| 183 | # Save the newlines found in the file so they can be used to | 
|---|
| 184 | #  create output without mutating the newlines. | 
|---|
| 185 | self.newlines = f.newlines | 
|---|
| 186 |  | 
|---|
| 187 | def run(self): | 
|---|
| 188 | tokenize.tokenize(self.getline, self.tokeneater) | 
|---|
| 189 | # Remove trailing empty lines. | 
|---|
| 190 | lines = self.lines | 
|---|
| 191 | while lines and lines[-1] == "\n": | 
|---|
| 192 | lines.pop() | 
|---|
| 193 | # Sentinel. | 
|---|
| 194 | stats = self.stats | 
|---|
| 195 | stats.append((len(lines), 0)) | 
|---|
| 196 | # Map count of leading spaces to # we want. | 
|---|
| 197 | have2want = {} | 
|---|
| 198 | # Program after transformation. | 
|---|
| 199 | after = self.after = [] | 
|---|
| 200 | # Copy over initial empty lines -- there's nothing to do until | 
|---|
| 201 | # we see a line with *something* on it. | 
|---|
| 202 | i = stats[0][0] | 
|---|
| 203 | after.extend(lines[1:i]) | 
|---|
| 204 | for i in range(len(stats)-1): | 
|---|
| 205 | thisstmt, thislevel = stats[i] | 
|---|
| 206 | nextstmt = stats[i+1][0] | 
|---|
| 207 | have = getlspace(lines[thisstmt]) | 
|---|
| 208 | want = thislevel * 4 | 
|---|
| 209 | if want < 0: | 
|---|
| 210 | # A comment line. | 
|---|
| 211 | if have: | 
|---|
| 212 | # An indented comment line.  If we saw the same | 
|---|
| 213 | # indentation before, reuse what it most recently | 
|---|
| 214 | # mapped to. | 
|---|
| 215 | want = have2want.get(have, -1) | 
|---|
| 216 | if want < 0: | 
|---|
| 217 | # Then it probably belongs to the next real stmt. | 
|---|
| 218 | for j in xrange(i+1, len(stats)-1): | 
|---|
| 219 | jline, jlevel = stats[j] | 
|---|
| 220 | if jlevel >= 0: | 
|---|
| 221 | if have == getlspace(lines[jline]): | 
|---|
| 222 | want = jlevel * 4 | 
|---|
| 223 | break | 
|---|
| 224 | if want < 0:           # Maybe it's a hanging | 
|---|
| 225 | # comment like this one, | 
|---|
| 226 | # in which case we should shift it like its base | 
|---|
| 227 | # line got shifted. | 
|---|
| 228 | for j in xrange(i-1, -1, -1): | 
|---|
| 229 | jline, jlevel = stats[j] | 
|---|
| 230 | if jlevel >= 0: | 
|---|
| 231 | want = have + getlspace(after[jline-1]) - \ | 
|---|
| 232 | getlspace(lines[jline]) | 
|---|
| 233 | break | 
|---|
| 234 | if want < 0: | 
|---|
| 235 | # Still no luck -- leave it alone. | 
|---|
| 236 | want = have | 
|---|
| 237 | else: | 
|---|
| 238 | want = 0 | 
|---|
| 239 | assert want >= 0 | 
|---|
| 240 | have2want[have] = want | 
|---|
| 241 | diff = want - have | 
|---|
| 242 | if diff == 0 or have == 0: | 
|---|
| 243 | after.extend(lines[thisstmt:nextstmt]) | 
|---|
| 244 | else: | 
|---|
| 245 | for line in lines[thisstmt:nextstmt]: | 
|---|
| 246 | if diff > 0: | 
|---|
| 247 | if line == "\n": | 
|---|
| 248 | after.append(line) | 
|---|
| 249 | else: | 
|---|
| 250 | after.append(" " * diff + line) | 
|---|
| 251 | else: | 
|---|
| 252 | remove = min(getlspace(line), -diff) | 
|---|
| 253 | after.append(line[remove:]) | 
|---|
| 254 | return self.raw != self.after | 
|---|
| 255 |  | 
|---|
| 256 | def write(self, f): | 
|---|
| 257 | f.writelines(self.after) | 
|---|
| 258 |  | 
|---|
| 259 | # Line-getter for tokenize. | 
|---|
| 260 | def getline(self): | 
|---|
| 261 | if self.index >= len(self.lines): | 
|---|
| 262 | line = "" | 
|---|
| 263 | else: | 
|---|
| 264 | line = self.lines[self.index] | 
|---|
| 265 | self.index += 1 | 
|---|
| 266 | return line | 
|---|
| 267 |  | 
|---|
| 268 | # Line-eater for tokenize. | 
|---|
| 269 | def tokeneater(self, type, token, (sline, scol), end, line, | 
|---|
| 270 | INDENT=tokenize.INDENT, | 
|---|
| 271 | DEDENT=tokenize.DEDENT, | 
|---|
| 272 | NEWLINE=tokenize.NEWLINE, | 
|---|
| 273 | COMMENT=tokenize.COMMENT, | 
|---|
| 274 | NL=tokenize.NL): | 
|---|
| 275 |  | 
|---|
| 276 | if type == NEWLINE: | 
|---|
| 277 | # A program statement, or ENDMARKER, will eventually follow, | 
|---|
| 278 | # after some (possibly empty) run of tokens of the form | 
|---|
| 279 | #     (NL | COMMENT)* (INDENT | DEDENT+)? | 
|---|
| 280 | self.find_stmt = 1 | 
|---|
| 281 |  | 
|---|
| 282 | elif type == INDENT: | 
|---|
| 283 | self.find_stmt = 1 | 
|---|
| 284 | self.level += 1 | 
|---|
| 285 |  | 
|---|
| 286 | elif type == DEDENT: | 
|---|
| 287 | self.find_stmt = 1 | 
|---|
| 288 | self.level -= 1 | 
|---|
| 289 |  | 
|---|
| 290 | elif type == COMMENT: | 
|---|
| 291 | if self.find_stmt: | 
|---|
| 292 | self.stats.append((sline, -1)) | 
|---|
| 293 | # but we're still looking for a new stmt, so leave | 
|---|
| 294 | # find_stmt alone | 
|---|
| 295 |  | 
|---|
| 296 | elif type == NL: | 
|---|
| 297 | pass | 
|---|
| 298 |  | 
|---|
| 299 | elif self.find_stmt: | 
|---|
| 300 | # This is the first "real token" following a NEWLINE, so it | 
|---|
| 301 | # must be the first token of the next program statement, or an | 
|---|
| 302 | # ENDMARKER. | 
|---|
| 303 | self.find_stmt = 0 | 
|---|
| 304 | if line:   # not endmarker | 
|---|
| 305 | self.stats.append((sline, self.level)) | 
|---|
| 306 |  | 
|---|
| 307 | # Count number of leading blanks. | 
|---|
| 308 | def getlspace(line): | 
|---|
| 309 | i, n = 0, len(line) | 
|---|
| 310 | while i < n and line[i] == " ": | 
|---|
| 311 | i += 1 | 
|---|
| 312 | return i | 
|---|
| 313 |  | 
|---|
| 314 | if __name__ == '__main__': | 
|---|
| 315 | main() | 
|---|