Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

reindent.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 10.6 KB

Rev	Line
[2]	1	#! /usr/bin/env python
	2
	3	# Released to the public domain, by Tim Peters, 03 October 2000.
	4
	5	"""reindent [-d][-r][-v] [ path ... ]
	6
	7	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
	8	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
	9	-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
	10	-v (--verbose) Verbose. Print informative msgs; else no output.
	11	-h (--help) Help. Print this usage information and exit.
	12
	13	Change Python (.py) files to use 4-space indents and no hard tab characters.
	14	Also trim excess spaces and tabs from ends of lines, and remove empty lines
	15	at the end of files. Also ensure the last line ends with a newline.
	16
	17	If no paths are given on the command line, reindent operates as a filter,
	18	reading a single source file from standard input and writing the transformed
	19	source to standard output. In this case, the -d, -r and -v flags are
	20	ignored.
	21
	22	You can pass one or more file and/or directory paths. When a directory
	23	path, all .py files within the directory will be examined, and, if the -r
	24	option is given, likewise recursively for subdirectories.
	25
	26	If output is not to standard output, reindent overwrites files in place,
	27	renaming the originals with a .bak extension. If it finds nothing to
	28	change, the file is left alone. If reindent does change a file, the changed
	29	file is a fixed-point for future runs (i.e., running reindent on the
	30	resulting .py file won't change it again).
	31
	32	The hard part of reindenting is figuring out what to do with comment
	33	lines. So long as the input files get a clean bill of health from
	34	tabnanny.py, reindent should do a good job.
	35
	36	The backup file is a copy of the one that is being reindented. The ".bak"
	37	file is generated with shutil.copy(), but some corner cases regarding
[391]	38	user/group and permissions could leave the backup file more readable than
[2]	39	you'd prefer. You can always use the --nobackup option to prevent this.
	40	"""
	41
	42	__version__ = "1"
	43
	44	import tokenize
	45	import os, shutil
	46	import sys
[391]	47	import io
[2]	48
	49	verbose = 0
	50	recurse = 0
	51	dryrun = 0
	52	makebackup = True
	53
	54	def usage(msg=None):
	55	if msg is not None:
	56	print >> sys.stderr, msg
	57	print >> sys.stderr, __doc__
	58
	59	def errprint(*args):
	60	sep = ""
	61	for arg in args:
	62	sys.stderr.write(sep + str(arg))
	63	sep = " "
	64	sys.stderr.write("\n")
	65
	66	def main():
	67	import getopt
	68	global verbose, recurse, dryrun, makebackup
	69	try:
	70	opts, args = getopt.getopt(sys.argv[1:], "drnvh",
	71	["dryrun", "recurse", "nobackup", "verbose", "help"])
	72	except getopt.error, msg:
	73	usage(msg)
	74	return
	75	for o, a in opts:
	76	if o in ('-d', '--dryrun'):
	77	dryrun += 1
	78	elif o in ('-r', '--recurse'):
	79	recurse += 1
	80	elif o in ('-n', '--nobackup'):
	81	makebackup = False
	82	elif o in ('-v', '--verbose'):
	83	verbose += 1
	84	elif o in ('-h', '--help'):
	85	usage()
	86	return
	87	if not args:
	88	r = Reindenter(sys.stdin)
	89	r.run()
	90	r.write(sys.stdout)
	91	return
	92	for arg in args:
	93	check(arg)
	94
	95	def check(file):
	96	if os.path.isdir(file) and not os.path.islink(file):
	97	if verbose:
	98	print "listing directory", file
	99	names = os.listdir(file)
	100	for name in names:
	101	fullname = os.path.join(file, name)
	102	if ((recurse and os.path.isdir(fullname) and
	103	not os.path.islink(fullname) and
	104	not os.path.split(fullname)[1].startswith("."))
	105	or name.lower().endswith(".py")):
	106	check(fullname)
	107	return
	108
	109	if verbose:
	110	print "checking", file, "...",
	111	try:
[391]	112	f = io.open(file)
[2]	113	except IOError, msg:
	114	errprint("%s: I/O Error: %s" % (file, str(msg)))
	115	return
	116
	117	r = Reindenter(f)
	118	f.close()
[391]	119
	120	newline = r.newlines
	121	if isinstance(newline, tuple):
	122	errprint("%s: mixed newlines detected; cannot process file" % file)
	123	return
	124
[2]	125	if r.run():
	126	if verbose:
	127	print "changed."
	128	if dryrun:
	129	print "But this is a dry run, so leaving it alone."
	130	if not dryrun:
	131	bak = file + ".bak"
	132	if makebackup:
	133	shutil.copyfile(file, bak)
	134	if verbose:
	135	print "backed up", file, "to", bak
[391]	136	f = io.open(file, "w", newline=newline)
[2]	137	r.write(f)
	138	f.close()
	139	if verbose:
	140	print "wrote new", file
	141	return True
	142	else:
	143	if verbose:
	144	print "unchanged."
	145	return False
	146
	147	def _rstrip(line, JUNK='\n \t'):
	148	"""Return line stripped of trailing spaces, tabs, newlines.
	149
	150	Note that line.rstrip() instead also strips sundry control characters,
	151	but at least one known Emacs user expects to keep junk like that, not
	152	mentioning Barry by name or anything <wink>.
	153	"""
	154
	155	i = len(line)
	156	while i > 0 and line[i-1] in JUNK:
	157	i -= 1
	158	return line[:i]
	159
	160	class Reindenter:
	161
	162	def __init__(self, f):
	163	self.find_stmt = 1 # next token begins a fresh stmt?
	164	self.level = 0 # current indent level
	165
	166	# Raw file lines.
	167	self.raw = f.readlines()
	168
	169	# File lines, rstripped & tab-expanded. Dummy at start is so
	170	# that we can use tokenize's 1-based line numbering easily.
	171	# Note that a line is all-blank iff it's "\n".
	172	self.lines = [_rstrip(line).expandtabs() + "\n"
	173	for line in self.raw]
	174	self.lines.insert(0, None)
	175	self.index = 1 # index into self.lines of next line
	176
	177	# List of (lineno, indentlevel) pairs, one for each stmt and
	178	# comment line. indentlevel is -1 for comment lines, as a
	179	# signal that tokenize doesn't know what to do about them;
	180	# indeed, they're our headache!
	181	self.stats = []
	182
[391]	183	# Save the newlines found in the file so they can be used to
	184	# create output without mutating the newlines.
	185	self.newlines = f.newlines
	186
[2]	187	def run(self):
	188	tokenize.tokenize(self.getline, self.tokeneater)
	189	# Remove trailing empty lines.
	190	lines = self.lines
	191	while lines and lines[-1] == "\n":
	192	lines.pop()
	193	# Sentinel.
	194	stats = self.stats
	195	stats.append((len(lines), 0))
	196	# Map count of leading spaces to # we want.
	197	have2want = {}
	198	# Program after transformation.
	199	after = self.after = []
	200	# Copy over initial empty lines -- there's nothing to do until
	201	# we see a line with something on it.
	202	i = stats[0][0]
	203	after.extend(lines[1:i])
	204	for i in range(len(stats)-1):
	205	thisstmt, thislevel = stats[i]
	206	nextstmt = stats[i+1][0]
	207	have = getlspace(lines[thisstmt])
	208	want = thislevel * 4
	209	if want < 0:
	210	# A comment line.
	211	if have:
	212	# An indented comment line. If we saw the same
	213	# indentation before, reuse what it most recently
	214	# mapped to.
	215	want = have2want.get(have, -1)
	216	if want < 0:
	217	# Then it probably belongs to the next real stmt.
	218	for j in xrange(i+1, len(stats)-1):
	219	jline, jlevel = stats[j]
	220	if jlevel >= 0:
	221	if have == getlspace(lines[jline]):
	222	want = jlevel * 4
	223	break
	224	if want < 0: # Maybe it's a hanging
	225	# comment like this one,
	226	# in which case we should shift it like its base
	227	# line got shifted.
	228	for j in xrange(i-1, -1, -1):
	229	jline, jlevel = stats[j]
	230	if jlevel >= 0:
	231	want = have + getlspace(after[jline-1]) - \
	232	getlspace(lines[jline])
	233	break
	234	if want < 0:
	235	# Still no luck -- leave it alone.
	236	want = have
	237	else:
	238	want = 0
	239	assert want >= 0
	240	have2want[have] = want
	241	diff = want - have
	242	if diff == 0 or have == 0:
	243	after.extend(lines[thisstmt:nextstmt])
	244	else:
	245	for line in lines[thisstmt:nextstmt]:
	246	if diff > 0:
	247	if line == "\n":
	248	after.append(line)
	249	else:
	250	after.append(" " * diff + line)
	251	else:
	252	remove = min(getlspace(line), -diff)
	253	after.append(line[remove:])
	254	return self.raw != self.after
	255
	256	def write(self, f):
	257	f.writelines(self.after)
	258
	259	# Line-getter for tokenize.
	260	def getline(self):
	261	if self.index >= len(self.lines):
	262	line = ""
	263	else:
	264	line = self.lines[self.index]
	265	self.index += 1
	266	return line
	267
	268	# Line-eater for tokenize.
	269	def tokeneater(self, type, token, (sline, scol), end, line,
	270	INDENT=tokenize.INDENT,
	271	DEDENT=tokenize.DEDENT,
	272	NEWLINE=tokenize.NEWLINE,
	273	COMMENT=tokenize.COMMENT,
	274	NL=tokenize.NL):
	275
	276	if type == NEWLINE:
	277	# A program statement, or ENDMARKER, will eventually follow,
	278	# after some (possibly empty) run of tokens of the form
	279	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
	280	self.find_stmt = 1
	281
	282	elif type == INDENT:
	283	self.find_stmt = 1
	284	self.level += 1
	285
	286	elif type == DEDENT:
	287	self.find_stmt = 1
	288	self.level -= 1
	289
	290	elif type == COMMENT:
	291	if self.find_stmt:
	292	self.stats.append((sline, -1))
	293	# but we're still looking for a new stmt, so leave
	294	# find_stmt alone
	295
	296	elif type == NL:
	297	pass
	298
	299	elif self.find_stmt:
	300	# This is the first "real token" following a NEWLINE, so it
	301	# must be the first token of the next program statement, or an
	302	# ENDMARKER.
	303	self.find_stmt = 0
	304	if line: # not endmarker
	305	self.stats.append((sline, self.level))
	306
	307	# Count number of leading blanks.
	308	def getlspace(line):
	309	i, n = 0, len(line)
	310	while i < n and line[i] == " ":
	311	i += 1
	312	return i
	313
	314	if __name__ == '__main__':
	315	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Tools/scripts/reindent.py

Download in other formats: