1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | # Released to the public domain, by Tim Peters, 03 October 2000.
|
---|
4 |
|
---|
5 | """reindent [-d][-r][-v] [ path ... ]
|
---|
6 |
|
---|
7 | -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
|
---|
8 | -r (--recurse) Recurse. Search for all .py files in subdirectories too.
|
---|
9 | -v (--verbose) Verbose. Print informative msgs; else no output.
|
---|
10 | -h (--help) Help. Print this usage information and exit.
|
---|
11 |
|
---|
12 | Change Python (.py) files to use 4-space indents and no hard tab characters.
|
---|
13 | Also trim excess spaces and tabs from ends of lines, and remove empty lines
|
---|
14 | at the end of files. Also ensure the last line ends with a newline.
|
---|
15 |
|
---|
16 | If no paths are given on the command line, reindent operates as a filter,
|
---|
17 | reading a single source file from standard input and writing the transformed
|
---|
18 | source to standard output. In this case, the -d, -r and -v flags are
|
---|
19 | ignored.
|
---|
20 |
|
---|
21 | You can pass one or more file and/or directory paths. When a directory
|
---|
22 | path, all .py files within the directory will be examined, and, if the -r
|
---|
23 | option is given, likewise recursively for subdirectories.
|
---|
24 |
|
---|
25 | If output is not to standard output, reindent overwrites files in place,
|
---|
26 | renaming the originals with a .bak extension. If it finds nothing to
|
---|
27 | change, the file is left alone. If reindent does change a file, the changed
|
---|
28 | file is a fixed-point for future runs (i.e., running reindent on the
|
---|
29 | resulting .py file won't change it again).
|
---|
30 |
|
---|
31 | The hard part of reindenting is figuring out what to do with comment
|
---|
32 | lines. So long as the input files get a clean bill of health from
|
---|
33 | tabnanny.py, reindent should do a good job.
|
---|
34 | """
|
---|
35 |
|
---|
36 | __version__ = "1"
|
---|
37 |
|
---|
38 | import tokenize
|
---|
39 | import os
|
---|
40 | import sys
|
---|
41 |
|
---|
42 | verbose = 0
|
---|
43 | recurse = 0
|
---|
44 | dryrun = 0
|
---|
45 |
|
---|
46 | def usage(msg=None):
|
---|
47 | if msg is not None:
|
---|
48 | print >> sys.stderr, msg
|
---|
49 | print >> sys.stderr, __doc__
|
---|
50 |
|
---|
51 | def errprint(*args):
|
---|
52 | sep = ""
|
---|
53 | for arg in args:
|
---|
54 | sys.stderr.write(sep + str(arg))
|
---|
55 | sep = " "
|
---|
56 | sys.stderr.write("\n")
|
---|
57 |
|
---|
58 | def main():
|
---|
59 | import getopt
|
---|
60 | global verbose, recurse, dryrun
|
---|
61 | try:
|
---|
62 | opts, args = getopt.getopt(sys.argv[1:], "drvh",
|
---|
63 | ["dryrun", "recurse", "verbose", "help"])
|
---|
64 | except getopt.error, msg:
|
---|
65 | usage(msg)
|
---|
66 | return
|
---|
67 | for o, a in opts:
|
---|
68 | if o in ('-d', '--dryrun'):
|
---|
69 | dryrun += 1
|
---|
70 | elif o in ('-r', '--recurse'):
|
---|
71 | recurse += 1
|
---|
72 | elif o in ('-v', '--verbose'):
|
---|
73 | verbose += 1
|
---|
74 | elif o in ('-h', '--help'):
|
---|
75 | usage()
|
---|
76 | return
|
---|
77 | if not args:
|
---|
78 | r = Reindenter(sys.stdin)
|
---|
79 | r.run()
|
---|
80 | r.write(sys.stdout)
|
---|
81 | return
|
---|
82 | for arg in args:
|
---|
83 | check(arg)
|
---|
84 |
|
---|
85 | def check(file):
|
---|
86 | if os.path.isdir(file) and not os.path.islink(file):
|
---|
87 | if verbose:
|
---|
88 | print "listing directory", file
|
---|
89 | names = os.listdir(file)
|
---|
90 | for name in names:
|
---|
91 | fullname = os.path.join(file, name)
|
---|
92 | if ((recurse and os.path.isdir(fullname) and
|
---|
93 | not os.path.islink(fullname))
|
---|
94 | or name.lower().endswith(".py")):
|
---|
95 | check(fullname)
|
---|
96 | return
|
---|
97 |
|
---|
98 | if verbose:
|
---|
99 | print "checking", file, "...",
|
---|
100 | try:
|
---|
101 | f = open(file)
|
---|
102 | except IOError, msg:
|
---|
103 | errprint("%s: I/O Error: %s" % (file, str(msg)))
|
---|
104 | return
|
---|
105 |
|
---|
106 | r = Reindenter(f)
|
---|
107 | f.close()
|
---|
108 | if r.run():
|
---|
109 | if verbose:
|
---|
110 | print "changed."
|
---|
111 | if dryrun:
|
---|
112 | print "But this is a dry run, so leaving it alone."
|
---|
113 | if not dryrun:
|
---|
114 | bak = file + ".bak"
|
---|
115 | if os.path.exists(bak):
|
---|
116 | os.remove(bak)
|
---|
117 | os.rename(file, bak)
|
---|
118 | if verbose:
|
---|
119 | print "renamed", file, "to", bak
|
---|
120 | f = open(file, "w")
|
---|
121 | r.write(f)
|
---|
122 | f.close()
|
---|
123 | if verbose:
|
---|
124 | print "wrote new", file
|
---|
125 | else:
|
---|
126 | if verbose:
|
---|
127 | print "unchanged."
|
---|
128 |
|
---|
129 | def _rstrip(line, JUNK='\n \t'):
|
---|
130 | """Return line stripped of trailing spaces, tabs, newlines.
|
---|
131 |
|
---|
132 | Note that line.rstrip() instead also strips sundry control characters,
|
---|
133 | but at least one known Emacs user expects to keep junk like that, not
|
---|
134 | mentioning Barry by name or anything <wink>.
|
---|
135 | """
|
---|
136 |
|
---|
137 | i = len(line)
|
---|
138 | while i > 0 and line[i-1] in JUNK:
|
---|
139 | i -= 1
|
---|
140 | return line[:i]
|
---|
141 |
|
---|
142 | class Reindenter:
|
---|
143 |
|
---|
144 | def __init__(self, f):
|
---|
145 | self.find_stmt = 1 # next token begins a fresh stmt?
|
---|
146 | self.level = 0 # current indent level
|
---|
147 |
|
---|
148 | # Raw file lines.
|
---|
149 | self.raw = f.readlines()
|
---|
150 |
|
---|
151 | # File lines, rstripped & tab-expanded. Dummy at start is so
|
---|
152 | # that we can use tokenize's 1-based line numbering easily.
|
---|
153 | # Note that a line is all-blank iff it's "\n".
|
---|
154 | self.lines = [_rstrip(line).expandtabs() + "\n"
|
---|
155 | for line in self.raw]
|
---|
156 | self.lines.insert(0, None)
|
---|
157 | self.index = 1 # index into self.lines of next line
|
---|
158 |
|
---|
159 | # List of (lineno, indentlevel) pairs, one for each stmt and
|
---|
160 | # comment line. indentlevel is -1 for comment lines, as a
|
---|
161 | # signal that tokenize doesn't know what to do about them;
|
---|
162 | # indeed, they're our headache!
|
---|
163 | self.stats = []
|
---|
164 |
|
---|
165 | def run(self):
|
---|
166 | tokenize.tokenize(self.getline, self.tokeneater)
|
---|
167 | # Remove trailing empty lines.
|
---|
168 | lines = self.lines
|
---|
169 | while lines and lines[-1] == "\n":
|
---|
170 | lines.pop()
|
---|
171 | # Sentinel.
|
---|
172 | stats = self.stats
|
---|
173 | stats.append((len(lines), 0))
|
---|
174 | # Map count of leading spaces to # we want.
|
---|
175 | have2want = {}
|
---|
176 | # Program after transformation.
|
---|
177 | after = self.after = []
|
---|
178 | # Copy over initial empty lines -- there's nothing to do until
|
---|
179 | # we see a line with *something* on it.
|
---|
180 | i = stats[0][0]
|
---|
181 | after.extend(lines[1:i])
|
---|
182 | for i in range(len(stats)-1):
|
---|
183 | thisstmt, thislevel = stats[i]
|
---|
184 | nextstmt = stats[i+1][0]
|
---|
185 | have = getlspace(lines[thisstmt])
|
---|
186 | want = thislevel * 4
|
---|
187 | if want < 0:
|
---|
188 | # A comment line.
|
---|
189 | if have:
|
---|
190 | # An indented comment line. If we saw the same
|
---|
191 | # indentation before, reuse what it most recently
|
---|
192 | # mapped to.
|
---|
193 | want = have2want.get(have, -1)
|
---|
194 | if want < 0:
|
---|
195 | # Then it probably belongs to the next real stmt.
|
---|
196 | for j in xrange(i+1, len(stats)-1):
|
---|
197 | jline, jlevel = stats[j]
|
---|
198 | if jlevel >= 0:
|
---|
199 | if have == getlspace(lines[jline]):
|
---|
200 | want = jlevel * 4
|
---|
201 | break
|
---|
202 | if want < 0: # Maybe it's a hanging
|
---|
203 | # comment like this one,
|
---|
204 | # in which case we should shift it like its base
|
---|
205 | # line got shifted.
|
---|
206 | for j in xrange(i-1, -1, -1):
|
---|
207 | jline, jlevel = stats[j]
|
---|
208 | if jlevel >= 0:
|
---|
209 | want = have + getlspace(after[jline-1]) - \
|
---|
210 | getlspace(lines[jline])
|
---|
211 | break
|
---|
212 | if want < 0:
|
---|
213 | # Still no luck -- leave it alone.
|
---|
214 | want = have
|
---|
215 | else:
|
---|
216 | want = 0
|
---|
217 | assert want >= 0
|
---|
218 | have2want[have] = want
|
---|
219 | diff = want - have
|
---|
220 | if diff == 0 or have == 0:
|
---|
221 | after.extend(lines[thisstmt:nextstmt])
|
---|
222 | else:
|
---|
223 | for line in lines[thisstmt:nextstmt]:
|
---|
224 | if diff > 0:
|
---|
225 | if line == "\n":
|
---|
226 | after.append(line)
|
---|
227 | else:
|
---|
228 | after.append(" " * diff + line)
|
---|
229 | else:
|
---|
230 | remove = min(getlspace(line), -diff)
|
---|
231 | after.append(line[remove:])
|
---|
232 | return self.raw != self.after
|
---|
233 |
|
---|
234 | def write(self, f):
|
---|
235 | f.writelines(self.after)
|
---|
236 |
|
---|
237 | # Line-getter for tokenize.
|
---|
238 | def getline(self):
|
---|
239 | if self.index >= len(self.lines):
|
---|
240 | line = ""
|
---|
241 | else:
|
---|
242 | line = self.lines[self.index]
|
---|
243 | self.index += 1
|
---|
244 | return line
|
---|
245 |
|
---|
246 | # Line-eater for tokenize.
|
---|
247 | def tokeneater(self, type, token, (sline, scol), end, line,
|
---|
248 | INDENT=tokenize.INDENT,
|
---|
249 | DEDENT=tokenize.DEDENT,
|
---|
250 | NEWLINE=tokenize.NEWLINE,
|
---|
251 | COMMENT=tokenize.COMMENT,
|
---|
252 | NL=tokenize.NL):
|
---|
253 |
|
---|
254 | if type == NEWLINE:
|
---|
255 | # A program statement, or ENDMARKER, will eventually follow,
|
---|
256 | # after some (possibly empty) run of tokens of the form
|
---|
257 | # (NL | COMMENT)* (INDENT | DEDENT+)?
|
---|
258 | self.find_stmt = 1
|
---|
259 |
|
---|
260 | elif type == INDENT:
|
---|
261 | self.find_stmt = 1
|
---|
262 | self.level += 1
|
---|
263 |
|
---|
264 | elif type == DEDENT:
|
---|
265 | self.find_stmt = 1
|
---|
266 | self.level -= 1
|
---|
267 |
|
---|
268 | elif type == COMMENT:
|
---|
269 | if self.find_stmt:
|
---|
270 | self.stats.append((sline, -1))
|
---|
271 | # but we're still looking for a new stmt, so leave
|
---|
272 | # find_stmt alone
|
---|
273 |
|
---|
274 | elif type == NL:
|
---|
275 | pass
|
---|
276 |
|
---|
277 | elif self.find_stmt:
|
---|
278 | # This is the first "real token" following a NEWLINE, so it
|
---|
279 | # must be the first token of the next program statement, or an
|
---|
280 | # ENDMARKER.
|
---|
281 | self.find_stmt = 0
|
---|
282 | if line: # not endmarker
|
---|
283 | self.stats.append((sline, self.level))
|
---|
284 |
|
---|
285 | # Count number of leading blanks.
|
---|
286 | def getlspace(line):
|
---|
287 | i, n = 0, len(line)
|
---|
288 | while i < n and line[i] == " ":
|
---|
289 | i += 1
|
---|
290 | return i
|
---|
291 |
|
---|
292 | if __name__ == '__main__':
|
---|
293 | main()
|
---|