[2] | 1 | #! /usr/bin/env python
|
---|
| 2 |
|
---|
| 3 | # Released to the public domain, by Tim Peters, 03 October 2000.
|
---|
| 4 |
|
---|
| 5 | """reindent [-d][-r][-v] [ path ... ]
|
---|
| 6 |
|
---|
| 7 | -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
|
---|
| 8 | -r (--recurse) Recurse. Search for all .py files in subdirectories too.
|
---|
| 9 | -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
|
---|
| 10 | -v (--verbose) Verbose. Print informative msgs; else no output.
|
---|
| 11 | -h (--help) Help. Print this usage information and exit.
|
---|
| 12 |
|
---|
| 13 | Change Python (.py) files to use 4-space indents and no hard tab characters.
|
---|
| 14 | Also trim excess spaces and tabs from ends of lines, and remove empty lines
|
---|
| 15 | at the end of files. Also ensure the last line ends with a newline.
|
---|
| 16 |
|
---|
| 17 | If no paths are given on the command line, reindent operates as a filter,
|
---|
| 18 | reading a single source file from standard input and writing the transformed
|
---|
| 19 | source to standard output. In this case, the -d, -r and -v flags are
|
---|
| 20 | ignored.
|
---|
| 21 |
|
---|
| 22 | You can pass one or more file and/or directory paths. When a directory
|
---|
| 23 | path, all .py files within the directory will be examined, and, if the -r
|
---|
| 24 | option is given, likewise recursively for subdirectories.
|
---|
| 25 |
|
---|
| 26 | If output is not to standard output, reindent overwrites files in place,
|
---|
| 27 | renaming the originals with a .bak extension. If it finds nothing to
|
---|
| 28 | change, the file is left alone. If reindent does change a file, the changed
|
---|
| 29 | file is a fixed-point for future runs (i.e., running reindent on the
|
---|
| 30 | resulting .py file won't change it again).
|
---|
| 31 |
|
---|
| 32 | The hard part of reindenting is figuring out what to do with comment
|
---|
| 33 | lines. So long as the input files get a clean bill of health from
|
---|
| 34 | tabnanny.py, reindent should do a good job.
|
---|
| 35 |
|
---|
| 36 | The backup file is a copy of the one that is being reindented. The ".bak"
|
---|
| 37 | file is generated with shutil.copy(), but some corner cases regarding
|
---|
[391] | 38 | user/group and permissions could leave the backup file more readable than
|
---|
[2] | 39 | you'd prefer. You can always use the --nobackup option to prevent this.
|
---|
| 40 | """
|
---|
| 41 |
|
---|
| 42 | __version__ = "1"
|
---|
| 43 |
|
---|
| 44 | import tokenize
|
---|
| 45 | import os, shutil
|
---|
| 46 | import sys
|
---|
[391] | 47 | import io
|
---|
[2] | 48 |
|
---|
| 49 | verbose = 0
|
---|
| 50 | recurse = 0
|
---|
| 51 | dryrun = 0
|
---|
| 52 | makebackup = True
|
---|
| 53 |
|
---|
| 54 | def usage(msg=None):
|
---|
| 55 | if msg is not None:
|
---|
| 56 | print >> sys.stderr, msg
|
---|
| 57 | print >> sys.stderr, __doc__
|
---|
| 58 |
|
---|
| 59 | def errprint(*args):
|
---|
| 60 | sep = ""
|
---|
| 61 | for arg in args:
|
---|
| 62 | sys.stderr.write(sep + str(arg))
|
---|
| 63 | sep = " "
|
---|
| 64 | sys.stderr.write("\n")
|
---|
| 65 |
|
---|
| 66 | def main():
|
---|
| 67 | import getopt
|
---|
| 68 | global verbose, recurse, dryrun, makebackup
|
---|
| 69 | try:
|
---|
| 70 | opts, args = getopt.getopt(sys.argv[1:], "drnvh",
|
---|
| 71 | ["dryrun", "recurse", "nobackup", "verbose", "help"])
|
---|
| 72 | except getopt.error, msg:
|
---|
| 73 | usage(msg)
|
---|
| 74 | return
|
---|
| 75 | for o, a in opts:
|
---|
| 76 | if o in ('-d', '--dryrun'):
|
---|
| 77 | dryrun += 1
|
---|
| 78 | elif o in ('-r', '--recurse'):
|
---|
| 79 | recurse += 1
|
---|
| 80 | elif o in ('-n', '--nobackup'):
|
---|
| 81 | makebackup = False
|
---|
| 82 | elif o in ('-v', '--verbose'):
|
---|
| 83 | verbose += 1
|
---|
| 84 | elif o in ('-h', '--help'):
|
---|
| 85 | usage()
|
---|
| 86 | return
|
---|
| 87 | if not args:
|
---|
| 88 | r = Reindenter(sys.stdin)
|
---|
| 89 | r.run()
|
---|
| 90 | r.write(sys.stdout)
|
---|
| 91 | return
|
---|
| 92 | for arg in args:
|
---|
| 93 | check(arg)
|
---|
| 94 |
|
---|
| 95 | def check(file):
|
---|
| 96 | if os.path.isdir(file) and not os.path.islink(file):
|
---|
| 97 | if verbose:
|
---|
| 98 | print "listing directory", file
|
---|
| 99 | names = os.listdir(file)
|
---|
| 100 | for name in names:
|
---|
| 101 | fullname = os.path.join(file, name)
|
---|
| 102 | if ((recurse and os.path.isdir(fullname) and
|
---|
| 103 | not os.path.islink(fullname) and
|
---|
| 104 | not os.path.split(fullname)[1].startswith("."))
|
---|
| 105 | or name.lower().endswith(".py")):
|
---|
| 106 | check(fullname)
|
---|
| 107 | return
|
---|
| 108 |
|
---|
| 109 | if verbose:
|
---|
| 110 | print "checking", file, "...",
|
---|
| 111 | try:
|
---|
[391] | 112 | f = io.open(file)
|
---|
[2] | 113 | except IOError, msg:
|
---|
| 114 | errprint("%s: I/O Error: %s" % (file, str(msg)))
|
---|
| 115 | return
|
---|
| 116 |
|
---|
| 117 | r = Reindenter(f)
|
---|
| 118 | f.close()
|
---|
[391] | 119 |
|
---|
| 120 | newline = r.newlines
|
---|
| 121 | if isinstance(newline, tuple):
|
---|
| 122 | errprint("%s: mixed newlines detected; cannot process file" % file)
|
---|
| 123 | return
|
---|
| 124 |
|
---|
[2] | 125 | if r.run():
|
---|
| 126 | if verbose:
|
---|
| 127 | print "changed."
|
---|
| 128 | if dryrun:
|
---|
| 129 | print "But this is a dry run, so leaving it alone."
|
---|
| 130 | if not dryrun:
|
---|
| 131 | bak = file + ".bak"
|
---|
| 132 | if makebackup:
|
---|
| 133 | shutil.copyfile(file, bak)
|
---|
| 134 | if verbose:
|
---|
| 135 | print "backed up", file, "to", bak
|
---|
[391] | 136 | f = io.open(file, "w", newline=newline)
|
---|
[2] | 137 | r.write(f)
|
---|
| 138 | f.close()
|
---|
| 139 | if verbose:
|
---|
| 140 | print "wrote new", file
|
---|
| 141 | return True
|
---|
| 142 | else:
|
---|
| 143 | if verbose:
|
---|
| 144 | print "unchanged."
|
---|
| 145 | return False
|
---|
| 146 |
|
---|
| 147 | def _rstrip(line, JUNK='\n \t'):
|
---|
| 148 | """Return line stripped of trailing spaces, tabs, newlines.
|
---|
| 149 |
|
---|
| 150 | Note that line.rstrip() instead also strips sundry control characters,
|
---|
| 151 | but at least one known Emacs user expects to keep junk like that, not
|
---|
| 152 | mentioning Barry by name or anything <wink>.
|
---|
| 153 | """
|
---|
| 154 |
|
---|
| 155 | i = len(line)
|
---|
| 156 | while i > 0 and line[i-1] in JUNK:
|
---|
| 157 | i -= 1
|
---|
| 158 | return line[:i]
|
---|
| 159 |
|
---|
| 160 | class Reindenter:
|
---|
| 161 |
|
---|
| 162 | def __init__(self, f):
|
---|
| 163 | self.find_stmt = 1 # next token begins a fresh stmt?
|
---|
| 164 | self.level = 0 # current indent level
|
---|
| 165 |
|
---|
| 166 | # Raw file lines.
|
---|
| 167 | self.raw = f.readlines()
|
---|
| 168 |
|
---|
| 169 | # File lines, rstripped & tab-expanded. Dummy at start is so
|
---|
| 170 | # that we can use tokenize's 1-based line numbering easily.
|
---|
| 171 | # Note that a line is all-blank iff it's "\n".
|
---|
| 172 | self.lines = [_rstrip(line).expandtabs() + "\n"
|
---|
| 173 | for line in self.raw]
|
---|
| 174 | self.lines.insert(0, None)
|
---|
| 175 | self.index = 1 # index into self.lines of next line
|
---|
| 176 |
|
---|
| 177 | # List of (lineno, indentlevel) pairs, one for each stmt and
|
---|
| 178 | # comment line. indentlevel is -1 for comment lines, as a
|
---|
| 179 | # signal that tokenize doesn't know what to do about them;
|
---|
| 180 | # indeed, they're our headache!
|
---|
| 181 | self.stats = []
|
---|
| 182 |
|
---|
[391] | 183 | # Save the newlines found in the file so they can be used to
|
---|
| 184 | # create output without mutating the newlines.
|
---|
| 185 | self.newlines = f.newlines
|
---|
| 186 |
|
---|
[2] | 187 | def run(self):
|
---|
| 188 | tokenize.tokenize(self.getline, self.tokeneater)
|
---|
| 189 | # Remove trailing empty lines.
|
---|
| 190 | lines = self.lines
|
---|
| 191 | while lines and lines[-1] == "\n":
|
---|
| 192 | lines.pop()
|
---|
| 193 | # Sentinel.
|
---|
| 194 | stats = self.stats
|
---|
| 195 | stats.append((len(lines), 0))
|
---|
| 196 | # Map count of leading spaces to # we want.
|
---|
| 197 | have2want = {}
|
---|
| 198 | # Program after transformation.
|
---|
| 199 | after = self.after = []
|
---|
| 200 | # Copy over initial empty lines -- there's nothing to do until
|
---|
| 201 | # we see a line with *something* on it.
|
---|
| 202 | i = stats[0][0]
|
---|
| 203 | after.extend(lines[1:i])
|
---|
| 204 | for i in range(len(stats)-1):
|
---|
| 205 | thisstmt, thislevel = stats[i]
|
---|
| 206 | nextstmt = stats[i+1][0]
|
---|
| 207 | have = getlspace(lines[thisstmt])
|
---|
| 208 | want = thislevel * 4
|
---|
| 209 | if want < 0:
|
---|
| 210 | # A comment line.
|
---|
| 211 | if have:
|
---|
| 212 | # An indented comment line. If we saw the same
|
---|
| 213 | # indentation before, reuse what it most recently
|
---|
| 214 | # mapped to.
|
---|
| 215 | want = have2want.get(have, -1)
|
---|
| 216 | if want < 0:
|
---|
| 217 | # Then it probably belongs to the next real stmt.
|
---|
| 218 | for j in xrange(i+1, len(stats)-1):
|
---|
| 219 | jline, jlevel = stats[j]
|
---|
| 220 | if jlevel >= 0:
|
---|
| 221 | if have == getlspace(lines[jline]):
|
---|
| 222 | want = jlevel * 4
|
---|
| 223 | break
|
---|
| 224 | if want < 0: # Maybe it's a hanging
|
---|
| 225 | # comment like this one,
|
---|
| 226 | # in which case we should shift it like its base
|
---|
| 227 | # line got shifted.
|
---|
| 228 | for j in xrange(i-1, -1, -1):
|
---|
| 229 | jline, jlevel = stats[j]
|
---|
| 230 | if jlevel >= 0:
|
---|
| 231 | want = have + getlspace(after[jline-1]) - \
|
---|
| 232 | getlspace(lines[jline])
|
---|
| 233 | break
|
---|
| 234 | if want < 0:
|
---|
| 235 | # Still no luck -- leave it alone.
|
---|
| 236 | want = have
|
---|
| 237 | else:
|
---|
| 238 | want = 0
|
---|
| 239 | assert want >= 0
|
---|
| 240 | have2want[have] = want
|
---|
| 241 | diff = want - have
|
---|
| 242 | if diff == 0 or have == 0:
|
---|
| 243 | after.extend(lines[thisstmt:nextstmt])
|
---|
| 244 | else:
|
---|
| 245 | for line in lines[thisstmt:nextstmt]:
|
---|
| 246 | if diff > 0:
|
---|
| 247 | if line == "\n":
|
---|
| 248 | after.append(line)
|
---|
| 249 | else:
|
---|
| 250 | after.append(" " * diff + line)
|
---|
| 251 | else:
|
---|
| 252 | remove = min(getlspace(line), -diff)
|
---|
| 253 | after.append(line[remove:])
|
---|
| 254 | return self.raw != self.after
|
---|
| 255 |
|
---|
| 256 | def write(self, f):
|
---|
| 257 | f.writelines(self.after)
|
---|
| 258 |
|
---|
| 259 | # Line-getter for tokenize.
|
---|
| 260 | def getline(self):
|
---|
| 261 | if self.index >= len(self.lines):
|
---|
| 262 | line = ""
|
---|
| 263 | else:
|
---|
| 264 | line = self.lines[self.index]
|
---|
| 265 | self.index += 1
|
---|
| 266 | return line
|
---|
| 267 |
|
---|
| 268 | # Line-eater for tokenize.
|
---|
| 269 | def tokeneater(self, type, token, (sline, scol), end, line,
|
---|
| 270 | INDENT=tokenize.INDENT,
|
---|
| 271 | DEDENT=tokenize.DEDENT,
|
---|
| 272 | NEWLINE=tokenize.NEWLINE,
|
---|
| 273 | COMMENT=tokenize.COMMENT,
|
---|
| 274 | NL=tokenize.NL):
|
---|
| 275 |
|
---|
| 276 | if type == NEWLINE:
|
---|
| 277 | # A program statement, or ENDMARKER, will eventually follow,
|
---|
| 278 | # after some (possibly empty) run of tokens of the form
|
---|
| 279 | # (NL | COMMENT)* (INDENT | DEDENT+)?
|
---|
| 280 | self.find_stmt = 1
|
---|
| 281 |
|
---|
| 282 | elif type == INDENT:
|
---|
| 283 | self.find_stmt = 1
|
---|
| 284 | self.level += 1
|
---|
| 285 |
|
---|
| 286 | elif type == DEDENT:
|
---|
| 287 | self.find_stmt = 1
|
---|
| 288 | self.level -= 1
|
---|
| 289 |
|
---|
| 290 | elif type == COMMENT:
|
---|
| 291 | if self.find_stmt:
|
---|
| 292 | self.stats.append((sline, -1))
|
---|
| 293 | # but we're still looking for a new stmt, so leave
|
---|
| 294 | # find_stmt alone
|
---|
| 295 |
|
---|
| 296 | elif type == NL:
|
---|
| 297 | pass
|
---|
| 298 |
|
---|
| 299 | elif self.find_stmt:
|
---|
| 300 | # This is the first "real token" following a NEWLINE, so it
|
---|
| 301 | # must be the first token of the next program statement, or an
|
---|
| 302 | # ENDMARKER.
|
---|
| 303 | self.find_stmt = 0
|
---|
| 304 | if line: # not endmarker
|
---|
| 305 | self.stats.append((sline, self.level))
|
---|
| 306 |
|
---|
| 307 | # Count number of leading blanks.
|
---|
| 308 | def getlspace(line):
|
---|
| 309 | i, n = 0, len(line)
|
---|
| 310 | while i < n and line[i] == " ":
|
---|
| 311 | i += 1
|
---|
| 312 | return i
|
---|
| 313 |
|
---|
| 314 | if __name__ == '__main__':
|
---|
| 315 | main()
|
---|