1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | # This file contains a class and a main program that perform three
|
---|
4 | # related (though complimentary) formatting operations on Python
|
---|
5 | # programs. When called as "pindent -c", it takes a valid Python
|
---|
6 | # program as input and outputs a version augmented with block-closing
|
---|
7 | # comments. When called as "pindent -d", it assumes its input is a
|
---|
8 | # Python program with block-closing comments and outputs a commentless
|
---|
9 | # version. When called as "pindent -r" it assumes its input is a
|
---|
10 | # Python program with block-closing comments but with its indentation
|
---|
11 | # messed up, and outputs a properly indented version.
|
---|
12 |
|
---|
13 | # A "block-closing comment" is a comment of the form '# end <keyword>'
|
---|
14 | # where <keyword> is the keyword that opened the block. If the
|
---|
15 | # opening keyword is 'def' or 'class', the function or class name may
|
---|
16 | # be repeated in the block-closing comment as well. Here is an
|
---|
17 | # example of a program fully augmented with block-closing comments:
|
---|
18 |
|
---|
19 | # def foobar(a, b):
|
---|
20 | # if a == b:
|
---|
21 | # a = a+1
|
---|
22 | # elif a < b:
|
---|
23 | # b = b-1
|
---|
24 | # if b > a: a = a-1
|
---|
25 | # # end if
|
---|
26 | # else:
|
---|
27 | # print 'oops!'
|
---|
28 | # # end if
|
---|
29 | # # end def foobar
|
---|
30 |
|
---|
31 | # Note that only the last part of an if...elif...else... block needs a
|
---|
32 | # block-closing comment; the same is true for other compound
|
---|
33 | # statements (e.g. try...except). Also note that "short-form" blocks
|
---|
34 | # like the second 'if' in the example must be closed as well;
|
---|
35 | # otherwise the 'else' in the example would be ambiguous (remember
|
---|
36 | # that indentation is not significant when interpreting block-closing
|
---|
37 | # comments).
|
---|
38 |
|
---|
39 | # The operations are idempotent (i.e. applied to their own output
|
---|
40 | # they yield an identical result). Running first "pindent -c" and
|
---|
41 | # then "pindent -r" on a valid Python program produces a program that
|
---|
42 | # is semantically identical to the input (though its indentation may
|
---|
43 | # be different). Running "pindent -e" on that output produces a
|
---|
44 | # program that only differs from the original in indentation.
|
---|
45 |
|
---|
46 | # Other options:
|
---|
47 | # -s stepsize: set the indentation step size (default 8)
|
---|
48 | # -t tabsize : set the number of spaces a tab character is worth (default 8)
|
---|
49 | # -e : expand TABs into spaces
|
---|
50 | # file ... : input file(s) (default standard input)
|
---|
51 | # The results always go to standard output
|
---|
52 |
|
---|
53 | # Caveats:
|
---|
54 | # - comments ending in a backslash will be mistaken for continued lines
|
---|
55 | # - continuations using backslash are always left unchanged
|
---|
56 | # - continuations inside parentheses are not extra indented by -r
|
---|
57 | # but must be indented for -c to work correctly (this breaks
|
---|
58 | # idempotency!)
|
---|
59 | # - continued lines inside triple-quoted strings are totally garbled
|
---|
60 |
|
---|
61 | # Secret feature:
|
---|
62 | # - On input, a block may also be closed with an "end statement" --
|
---|
63 | # this is a block-closing comment without the '#' sign.
|
---|
64 |
|
---|
65 | # Possible improvements:
|
---|
66 | # - check syntax based on transitions in 'next' table
|
---|
67 | # - better error reporting
|
---|
68 | # - better error recovery
|
---|
69 | # - check identifier after class/def
|
---|
70 |
|
---|
71 | # The following wishes need a more complete tokenization of the source:
|
---|
72 | # - Don't get fooled by comments ending in backslash
|
---|
73 | # - reindent continuation lines indicated by backslash
|
---|
74 | # - handle continuation lines inside parentheses/braces/brackets
|
---|
75 | # - handle triple quoted strings spanning lines
|
---|
76 | # - realign comments
|
---|
77 | # - optionally do much more thorough reformatting, a la C indent
|
---|
78 |
|
---|
79 | from __future__ import print_function
|
---|
80 |
|
---|
81 | # Defaults
|
---|
82 | STEPSIZE = 8
|
---|
83 | TABSIZE = 8
|
---|
84 | EXPANDTABS = False
|
---|
85 |
|
---|
86 | import io
|
---|
87 | import re
|
---|
88 | import sys
|
---|
89 |
|
---|
90 | next = {}
|
---|
91 | next['if'] = next['elif'] = 'elif', 'else', 'end'
|
---|
92 | next['while'] = next['for'] = 'else', 'end'
|
---|
93 | next['try'] = 'except', 'finally'
|
---|
94 | next['except'] = 'except', 'else', 'finally', 'end'
|
---|
95 | next['else'] = next['finally'] = next['with'] = \
|
---|
96 | next['def'] = next['class'] = 'end'
|
---|
97 | next['end'] = ()
|
---|
98 | start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
|
---|
99 |
|
---|
100 | class PythonIndenter:
|
---|
101 |
|
---|
102 | def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
|
---|
103 | indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
104 | self.fpi = fpi
|
---|
105 | self.fpo = fpo
|
---|
106 | self.indentsize = indentsize
|
---|
107 | self.tabsize = tabsize
|
---|
108 | self.lineno = 0
|
---|
109 | self.expandtabs = expandtabs
|
---|
110 | self._write = fpo.write
|
---|
111 | self.kwprog = re.compile(
|
---|
112 | r'^(?:\s|\\\n)*(?P<kw>[a-z]+)'
|
---|
113 | r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?'
|
---|
114 | r'[^\w]')
|
---|
115 | self.endprog = re.compile(
|
---|
116 | r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)'
|
---|
117 | r'(\s+(?P<id>[a-zA-Z_]\w*))?'
|
---|
118 | r'[^\w]')
|
---|
119 | self.wsprog = re.compile(r'^[ \t]*')
|
---|
120 | # end def __init__
|
---|
121 |
|
---|
122 | def write(self, line):
|
---|
123 | if self.expandtabs:
|
---|
124 | self._write(line.expandtabs(self.tabsize))
|
---|
125 | else:
|
---|
126 | self._write(line)
|
---|
127 | # end if
|
---|
128 | # end def write
|
---|
129 |
|
---|
130 | def readline(self):
|
---|
131 | line = self.fpi.readline()
|
---|
132 | if line: self.lineno += 1
|
---|
133 | # end if
|
---|
134 | return line
|
---|
135 | # end def readline
|
---|
136 |
|
---|
137 | def error(self, fmt, *args):
|
---|
138 | if args: fmt = fmt % args
|
---|
139 | # end if
|
---|
140 | sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
|
---|
141 | self.write('### %s ###\n' % fmt)
|
---|
142 | # end def error
|
---|
143 |
|
---|
144 | def getline(self):
|
---|
145 | line = self.readline()
|
---|
146 | while line[-2:] == '\\\n':
|
---|
147 | line2 = self.readline()
|
---|
148 | if not line2: break
|
---|
149 | # end if
|
---|
150 | line += line2
|
---|
151 | # end while
|
---|
152 | return line
|
---|
153 | # end def getline
|
---|
154 |
|
---|
155 | def putline(self, line, indent):
|
---|
156 | tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
|
---|
157 | i = self.wsprog.match(line).end()
|
---|
158 | line = line[i:]
|
---|
159 | if line[:1] not in ('\n', '\r', ''):
|
---|
160 | line = '\t'*tabs + ' '*spaces + line
|
---|
161 | # end if
|
---|
162 | self.write(line)
|
---|
163 | # end def putline
|
---|
164 |
|
---|
165 | def reformat(self):
|
---|
166 | stack = []
|
---|
167 | while True:
|
---|
168 | line = self.getline()
|
---|
169 | if not line: break # EOF
|
---|
170 | # end if
|
---|
171 | m = self.endprog.match(line)
|
---|
172 | if m:
|
---|
173 | kw = 'end'
|
---|
174 | kw2 = m.group('kw')
|
---|
175 | if not stack:
|
---|
176 | self.error('unexpected end')
|
---|
177 | elif stack.pop()[0] != kw2:
|
---|
178 | self.error('unmatched end')
|
---|
179 | # end if
|
---|
180 | self.putline(line, len(stack))
|
---|
181 | continue
|
---|
182 | # end if
|
---|
183 | m = self.kwprog.match(line)
|
---|
184 | if m:
|
---|
185 | kw = m.group('kw')
|
---|
186 | if kw in start:
|
---|
187 | self.putline(line, len(stack))
|
---|
188 | stack.append((kw, kw))
|
---|
189 | continue
|
---|
190 | # end if
|
---|
191 | if next.has_key(kw) and stack:
|
---|
192 | self.putline(line, len(stack)-1)
|
---|
193 | kwa, kwb = stack[-1]
|
---|
194 | stack[-1] = kwa, kw
|
---|
195 | continue
|
---|
196 | # end if
|
---|
197 | # end if
|
---|
198 | self.putline(line, len(stack))
|
---|
199 | # end while
|
---|
200 | if stack:
|
---|
201 | self.error('unterminated keywords')
|
---|
202 | for kwa, kwb in stack:
|
---|
203 | self.write('\t%s\n' % kwa)
|
---|
204 | # end for
|
---|
205 | # end if
|
---|
206 | # end def reformat
|
---|
207 |
|
---|
208 | def delete(self):
|
---|
209 | begin_counter = 0
|
---|
210 | end_counter = 0
|
---|
211 | while True:
|
---|
212 | line = self.getline()
|
---|
213 | if not line: break # EOF
|
---|
214 | # end if
|
---|
215 | m = self.endprog.match(line)
|
---|
216 | if m:
|
---|
217 | end_counter += 1
|
---|
218 | continue
|
---|
219 | # end if
|
---|
220 | m = self.kwprog.match(line)
|
---|
221 | if m:
|
---|
222 | kw = m.group('kw')
|
---|
223 | if kw in start:
|
---|
224 | begin_counter += 1
|
---|
225 | # end if
|
---|
226 | # end if
|
---|
227 | self.write(line)
|
---|
228 | # end while
|
---|
229 | if begin_counter - end_counter < 0:
|
---|
230 | sys.stderr.write('Warning: input contained more end tags than expected\n')
|
---|
231 | elif begin_counter - end_counter > 0:
|
---|
232 | sys.stderr.write('Warning: input contained less end tags than expected\n')
|
---|
233 | # end if
|
---|
234 | # end def delete
|
---|
235 |
|
---|
236 | def complete(self):
|
---|
237 | stack = []
|
---|
238 | todo = []
|
---|
239 | currentws = thisid = firstkw = lastkw = topid = ''
|
---|
240 | while True:
|
---|
241 | line = self.getline()
|
---|
242 | i = self.wsprog.match(line).end()
|
---|
243 | m = self.endprog.match(line)
|
---|
244 | if m:
|
---|
245 | thiskw = 'end'
|
---|
246 | endkw = m.group('kw')
|
---|
247 | thisid = m.group('id')
|
---|
248 | else:
|
---|
249 | m = self.kwprog.match(line)
|
---|
250 | if m:
|
---|
251 | thiskw = m.group('kw')
|
---|
252 | if not next.has_key(thiskw):
|
---|
253 | thiskw = ''
|
---|
254 | # end if
|
---|
255 | if thiskw in ('def', 'class'):
|
---|
256 | thisid = m.group('id')
|
---|
257 | else:
|
---|
258 | thisid = ''
|
---|
259 | # end if
|
---|
260 | elif line[i:i+1] in ('\n', '#'):
|
---|
261 | todo.append(line)
|
---|
262 | continue
|
---|
263 | else:
|
---|
264 | thiskw = ''
|
---|
265 | # end if
|
---|
266 | # end if
|
---|
267 | indentws = line[:i]
|
---|
268 | indent = len(indentws.expandtabs(self.tabsize))
|
---|
269 | current = len(currentws.expandtabs(self.tabsize))
|
---|
270 | while indent < current:
|
---|
271 | if firstkw:
|
---|
272 | if topid:
|
---|
273 | s = '# end %s %s\n' % (
|
---|
274 | firstkw, topid)
|
---|
275 | else:
|
---|
276 | s = '# end %s\n' % firstkw
|
---|
277 | # end if
|
---|
278 | self.write(currentws + s)
|
---|
279 | firstkw = lastkw = ''
|
---|
280 | # end if
|
---|
281 | currentws, firstkw, lastkw, topid = stack.pop()
|
---|
282 | current = len(currentws.expandtabs(self.tabsize))
|
---|
283 | # end while
|
---|
284 | if indent == current and firstkw:
|
---|
285 | if thiskw == 'end':
|
---|
286 | if endkw != firstkw:
|
---|
287 | self.error('mismatched end')
|
---|
288 | # end if
|
---|
289 | firstkw = lastkw = ''
|
---|
290 | elif not thiskw or thiskw in start:
|
---|
291 | if topid:
|
---|
292 | s = '# end %s %s\n' % (
|
---|
293 | firstkw, topid)
|
---|
294 | else:
|
---|
295 | s = '# end %s\n' % firstkw
|
---|
296 | # end if
|
---|
297 | self.write(currentws + s)
|
---|
298 | firstkw = lastkw = topid = ''
|
---|
299 | # end if
|
---|
300 | # end if
|
---|
301 | if indent > current:
|
---|
302 | stack.append((currentws, firstkw, lastkw, topid))
|
---|
303 | if thiskw and thiskw not in start:
|
---|
304 | # error
|
---|
305 | thiskw = ''
|
---|
306 | # end if
|
---|
307 | currentws, firstkw, lastkw, topid = \
|
---|
308 | indentws, thiskw, thiskw, thisid
|
---|
309 | # end if
|
---|
310 | if thiskw:
|
---|
311 | if thiskw in start:
|
---|
312 | firstkw = lastkw = thiskw
|
---|
313 | topid = thisid
|
---|
314 | else:
|
---|
315 | lastkw = thiskw
|
---|
316 | # end if
|
---|
317 | # end if
|
---|
318 | for l in todo: self.write(l)
|
---|
319 | # end for
|
---|
320 | todo = []
|
---|
321 | if not line: break
|
---|
322 | # end if
|
---|
323 | self.write(line)
|
---|
324 | # end while
|
---|
325 | # end def complete
|
---|
326 | # end class PythonIndenter
|
---|
327 |
|
---|
328 | # Simplified user interface
|
---|
329 | # - xxx_filter(input, output): read and write file objects
|
---|
330 | # - xxx_string(s): take and return string object
|
---|
331 | # - xxx_file(filename): process file in place, return true iff changed
|
---|
332 |
|
---|
333 | def complete_filter(input = sys.stdin, output = sys.stdout,
|
---|
334 | stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
335 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
336 | pi.complete()
|
---|
337 | # end def complete_filter
|
---|
338 |
|
---|
339 | def delete_filter(input= sys.stdin, output = sys.stdout,
|
---|
340 | stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
341 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
342 | pi.delete()
|
---|
343 | # end def delete_filter
|
---|
344 |
|
---|
345 | def reformat_filter(input = sys.stdin, output = sys.stdout,
|
---|
346 | stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
347 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
348 | pi.reformat()
|
---|
349 | # end def reformat_filter
|
---|
350 |
|
---|
351 | def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
352 | input = io.BytesIO(source)
|
---|
353 | output = io.BytesIO()
|
---|
354 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
355 | pi.complete()
|
---|
356 | return output.getvalue()
|
---|
357 | # end def complete_string
|
---|
358 |
|
---|
359 | def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
360 | input = io.BytesIO(source)
|
---|
361 | output = io.BytesIO()
|
---|
362 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
363 | pi.delete()
|
---|
364 | return output.getvalue()
|
---|
365 | # end def delete_string
|
---|
366 |
|
---|
367 | def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
368 | input = io.BytesIO(source)
|
---|
369 | output = io.BytesIO()
|
---|
370 | pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
|
---|
371 | pi.reformat()
|
---|
372 | return output.getvalue()
|
---|
373 | # end def reformat_string
|
---|
374 |
|
---|
375 | def make_backup(filename):
|
---|
376 | import os, os.path
|
---|
377 | backup = filename + '~'
|
---|
378 | if os.path.lexists(backup):
|
---|
379 | try:
|
---|
380 | os.remove(backup)
|
---|
381 | except os.error:
|
---|
382 | print("Can't remove backup %r" % (backup,), file=sys.stderr)
|
---|
383 | # end try
|
---|
384 | # end if
|
---|
385 | try:
|
---|
386 | os.rename(filename, backup)
|
---|
387 | except os.error:
|
---|
388 | print("Can't rename %r to %r" % (filename, backup), file=sys.stderr)
|
---|
389 | # end try
|
---|
390 | # end def make_backup
|
---|
391 |
|
---|
392 | def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
393 | with open(filename, 'r') as f:
|
---|
394 | source = f.read()
|
---|
395 | # end with
|
---|
396 | result = complete_string(source, stepsize, tabsize, expandtabs)
|
---|
397 | if source == result: return 0
|
---|
398 | # end if
|
---|
399 | make_backup(filename)
|
---|
400 | with open(filename, 'w') as f:
|
---|
401 | f.write(result)
|
---|
402 | # end with
|
---|
403 | return 1
|
---|
404 | # end def complete_file
|
---|
405 |
|
---|
406 | def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
407 | with open(filename, 'r') as f:
|
---|
408 | source = f.read()
|
---|
409 | # end with
|
---|
410 | result = delete_string(source, stepsize, tabsize, expandtabs)
|
---|
411 | if source == result: return 0
|
---|
412 | # end if
|
---|
413 | make_backup(filename)
|
---|
414 | with open(filename, 'w') as f:
|
---|
415 | f.write(result)
|
---|
416 | # end with
|
---|
417 | return 1
|
---|
418 | # end def delete_file
|
---|
419 |
|
---|
420 | def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
|
---|
421 | with open(filename, 'r') as f:
|
---|
422 | source = f.read()
|
---|
423 | # end with
|
---|
424 | result = reformat_string(source, stepsize, tabsize, expandtabs)
|
---|
425 | if source == result: return 0
|
---|
426 | # end if
|
---|
427 | make_backup(filename)
|
---|
428 | with open(filename, 'w') as f:
|
---|
429 | f.write(result)
|
---|
430 | # end with
|
---|
431 | return 1
|
---|
432 | # end def reformat_file
|
---|
433 |
|
---|
434 | # Test program when called as a script
|
---|
435 |
|
---|
436 | usage = """
|
---|
437 | usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
|
---|
438 | -c : complete a correctly indented program (add #end directives)
|
---|
439 | -d : delete #end directives
|
---|
440 | -r : reformat a completed program (use #end directives)
|
---|
441 | -s stepsize: indentation step (default %(STEPSIZE)d)
|
---|
442 | -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
|
---|
443 | -e : expand TABs into spaces (default OFF)
|
---|
444 | [file] ... : files are changed in place, with backups in file~
|
---|
445 | If no files are specified or a single - is given,
|
---|
446 | the program acts as a filter (reads stdin, writes stdout).
|
---|
447 | """ % vars()
|
---|
448 |
|
---|
449 | def error_both(op1, op2):
|
---|
450 | sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
|
---|
451 | sys.stderr.write(usage)
|
---|
452 | sys.exit(2)
|
---|
453 | # end def error_both
|
---|
454 |
|
---|
455 | def test():
|
---|
456 | import getopt
|
---|
457 | try:
|
---|
458 | opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
|
---|
459 | except getopt.error, msg:
|
---|
460 | sys.stderr.write('Error: %s\n' % msg)
|
---|
461 | sys.stderr.write(usage)
|
---|
462 | sys.exit(2)
|
---|
463 | # end try
|
---|
464 | action = None
|
---|
465 | stepsize = STEPSIZE
|
---|
466 | tabsize = TABSIZE
|
---|
467 | expandtabs = EXPANDTABS
|
---|
468 | for o, a in opts:
|
---|
469 | if o == '-c':
|
---|
470 | if action: error_both(o, action)
|
---|
471 | # end if
|
---|
472 | action = 'complete'
|
---|
473 | elif o == '-d':
|
---|
474 | if action: error_both(o, action)
|
---|
475 | # end if
|
---|
476 | action = 'delete'
|
---|
477 | elif o == '-r':
|
---|
478 | if action: error_both(o, action)
|
---|
479 | # end if
|
---|
480 | action = 'reformat'
|
---|
481 | elif o == '-s':
|
---|
482 | stepsize = int(a)
|
---|
483 | elif o == '-t':
|
---|
484 | tabsize = int(a)
|
---|
485 | elif o == '-e':
|
---|
486 | expandtabs = True
|
---|
487 | # end if
|
---|
488 | # end for
|
---|
489 | if not action:
|
---|
490 | sys.stderr.write(
|
---|
491 | 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
|
---|
492 | sys.stderr.write(usage)
|
---|
493 | sys.exit(2)
|
---|
494 | # end if
|
---|
495 | if not args or args == ['-']:
|
---|
496 | action = eval(action + '_filter')
|
---|
497 | action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
|
---|
498 | else:
|
---|
499 | action = eval(action + '_file')
|
---|
500 | for filename in args:
|
---|
501 | action(filename, stepsize, tabsize, expandtabs)
|
---|
502 | # end for
|
---|
503 | # end if
|
---|
504 | # end def test
|
---|
505 |
|
---|
506 | if __name__ == '__main__':
|
---|
507 | test()
|
---|
508 | # end if
|
---|