Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

tabnanny.py@ 1538

Last change on this file since 1538 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 11.1 KB

Line
1	#! /usr/bin/env python
2
3	"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
5	tabnanny -- Detection of ambiguous indentation
6
7	For the time being this module is intended to be called as a script.
8	However it is possible to import it into an IDE and use the function
9	check() described below.
10
11	Warning: The API provided by this module is likely to change in future
12	releases; such changes may not be backward compatible.
13	"""
14
15	# Released to the public domain, by Tim Peters, 15 April 1998.
16
17	# XXX Note: this is now a standard library module.
18	# XXX The API needs to undergo changes however; the current code is too
19	# XXX script-like. This will be addressed later.
20
21	__version__ = "6"
22
23	import os
24	import sys
25	import getopt
26	import tokenize
27	if not hasattr(tokenize, 'NL'):
28	raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
29
30	__all__ = ["check", "NannyNag", "process_tokens"]
31
32	verbose = 0
33	filename_only = 0
34
35	def errprint(*args):
36	sep = ""
37	for arg in args:
38	sys.stderr.write(sep + str(arg))
39	sep = " "
40	sys.stderr.write("\n")
41
42	def main():
43	global verbose, filename_only
44	try:
45	opts, args = getopt.getopt(sys.argv[1:], "qv")
46	except getopt.error, msg:
47	errprint(msg)
48	return
49	for o, a in opts:
50	if o == '-q':
51	filename_only = filename_only + 1
52	if o == '-v':
53	verbose = verbose + 1
54	if not args:
55	errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56	return
57	for arg in args:
58	check(arg)
59
60	class NannyNag(Exception):
61	"""
62	Raised by tokeneater() if detecting an ambiguous indent.
63	Captured and handled in check().
64	"""
65	def __init__(self, lineno, msg, line):
66	self.lineno, self.msg, self.line = lineno, msg, line
67	def get_lineno(self):
68	return self.lineno
69	def get_msg(self):
70	return self.msg
71	def get_line(self):
72	return self.line
73
74	def check(file):
75	"""check(file_or_dir)
76
77	If file_or_dir is a directory and not a symbolic link, then recursively
78	descend the directory tree named by file_or_dir, checking all .py files
79	along the way. If file_or_dir is an ordinary Python source file, it is
80	checked for whitespace related problems. The diagnostic messages are
81	written to standard output using the print statement.
82	"""
83
84	if os.path.isdir(file) and not os.path.islink(file):
85	if verbose:
86	print "%r: listing directory" % (file,)
87	names = os.listdir(file)
88	for name in names:
89	fullname = os.path.join(file, name)
90	if (os.path.isdir(fullname) and
91	not os.path.islink(fullname) or
92	os.path.normcase(name[-3:]) == ".py"):
93	check(fullname)
94	return
95
96	try:
97	f = open(file)
98	except IOError, msg:
99	errprint("%r: I/O Error: %s" % (file, msg))
100	return
101
102	if verbose > 1:
103	print "checking %r ..." % file
104
105	try:
106	process_tokens(tokenize.generate_tokens(f.readline))
107
108	except tokenize.TokenError, msg:
109	errprint("%r: Token Error: %s" % (file, msg))
110	return
111
112	except IndentationError, msg:
113	errprint("%r: Indentation Error: %s" % (file, msg))
114	return
115
116	except NannyNag, nag:
117	badline = nag.get_lineno()
118	line = nag.get_line()
119	if verbose:
120	print "%r: * Line %d: trouble in tab city! *" % (file, badline)
121	print "offending line: %r" % (line,)
122	print nag.get_msg()
123	else:
124	if ' ' in file: file = '"' + file + '"'
125	if filename_only: print file
126	else: print file, badline, repr(line)
127	return
128
129	if verbose:
130	print "%r: Clean bill of health." % (file,)
131
132	class Whitespace:
133	# the characters used for space and tab
134	S, T = ' \t'
135
136	# members:
137	# raw
138	# the original string
139	# n
140	# the number of leading whitespace characters in raw
141	# nt
142	# the number of tabs in raw[:n]
143	# norm
144	# the normal form as a pair (count, trailing), where:
145	# count
146	# a tuple such that raw[:n] contains count[i]
147	# instances of S * i + T
148	# trailing
149	# the number of trailing spaces in raw[:n]
150	# It's A Theorem that m.indent_level(t) ==
151	# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
152	# is_simple
153	# true iff raw[:n] is of the form (T)(S)
154
155	def __init__(self, ws):
156	self.raw = ws
157	S, T = Whitespace.S, Whitespace.T
158	count = []
159	b = n = nt = 0
160	for ch in self.raw:
161	if ch == S:
162	n = n + 1
163	b = b + 1
164	elif ch == T:
165	n = n + 1
166	nt = nt + 1
167	if b >= len(count):
168	count = count + [0] * (b - len(count) + 1)
169	count[b] = count[b] + 1
170	b = 0
171	else:
172	break
173	self.n = n
174	self.nt = nt
175	self.norm = tuple(count), b
176	self.is_simple = len(count) <= 1
177
178	# return length of longest contiguous run of spaces (whether or not
179	# preceding a tab)
180	def longest_run_of_spaces(self):
181	count, trailing = self.norm
182	return max(len(count)-1, trailing)
183
184	def indent_level(self, tabsize):
185	# count, il = self.norm
186	# for i in range(len(count)):
187	# if count[i]:
188	# il = il + (i/tabsize + 1)tabsize count[i]
189	# return il
190
191	# quicker:
192	# il = trailing + sum (i/ts + 1)tscount[i] =
193	# trailing + ts * sum (i/ts + 1)*count[i] =
194	# trailing + ts * sum i/ts*count[i] + count[i] =
195	# trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
196	# trailing + ts * [(sum i/ts*count[i]) + num_tabs]
197	# and note that i/ts*count[i] is 0 when i < ts
198
199	count, trailing = self.norm
200	il = 0
201	for i in range(tabsize, len(count)):
202	il = il + i/tabsize * count[i]
203	return trailing + tabsize * (il + self.nt)
204
205	# return true iff self.indent_level(t) == other.indent_level(t)
206	# for all t >= 1
207	def equal(self, other):
208	return self.norm == other.norm
209
210	# return a list of tuples (ts, i1, i2) such that
211	# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
212	# Intended to be used after not self.equal(other) is known, in which
213	# case it will return at least one witnessing tab size.
214	def not_equal_witness(self, other):
215	n = max(self.longest_run_of_spaces(),
216	other.longest_run_of_spaces()) + 1
217	a = []
218	for ts in range(1, n+1):
219	if self.indent_level(ts) != other.indent_level(ts):
220	a.append( (ts,
221	self.indent_level(ts),
222	other.indent_level(ts)) )
223	return a
224
225	# Return True iff self.indent_level(t) < other.indent_level(t)
226	# for all t >= 1.
227	# The algorithm is due to Vincent Broman.
228	# Easy to prove it's correct.
229	# XXXpost that.
230	# Trivial to prove n is sharp (consider T vs ST).
231	# Unknown whether there's a faster general way. I suspected so at
232	# first, but no longer.
233	# For the special (but common!) case where M and N are both of the
234	# form (T)(S), M.less(N) iff M.len() < N.len() and
235	# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
236	# XXXwrite that up.
237	# Note that M is of the form (T)(S) iff len(M.norm[0]) <= 1.
238	def less(self, other):
239	if self.n >= other.n:
240	return False
241	if self.is_simple and other.is_simple:
242	return self.nt <= other.nt
243	n = max(self.longest_run_of_spaces(),
244	other.longest_run_of_spaces()) + 1
245	# the self.n >= other.n test already did it for ts=1
246	for ts in range(2, n+1):
247	if self.indent_level(ts) >= other.indent_level(ts):
248	return False
249	return True
250
251	# return a list of tuples (ts, i1, i2) such that
252	# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
253	# Intended to be used after not self.less(other) is known, in which
254	# case it will return at least one witnessing tab size.
255	def not_less_witness(self, other):
256	n = max(self.longest_run_of_spaces(),
257	other.longest_run_of_spaces()) + 1
258	a = []
259	for ts in range(1, n+1):
260	if self.indent_level(ts) >= other.indent_level(ts):
261	a.append( (ts,
262	self.indent_level(ts),
263	other.indent_level(ts)) )
264	return a
265
266	def format_witnesses(w):
267	firsts = map(lambda tup: str(tup[0]), w)
268	prefix = "at tab size"
269	if len(w) > 1:
270	prefix = prefix + "s"
271	return prefix + " " + ', '.join(firsts)
272
273	def process_tokens(tokens):
274	INDENT = tokenize.INDENT
275	DEDENT = tokenize.DEDENT
276	NEWLINE = tokenize.NEWLINE
277	JUNK = tokenize.COMMENT, tokenize.NL
278	indents = [Whitespace("")]
279	check_equal = 0
280
281	for (type, token, start, end, line) in tokens:
282	if type == NEWLINE:
283	# a program statement, or ENDMARKER, will eventually follow,
284	# after some (possibly empty) run of tokens of the form
285	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
286	# If an INDENT appears, setting check_equal is wrong, and will
287	# be undone when we see the INDENT.
288	check_equal = 1
289
290	elif type == INDENT:
291	check_equal = 0
292	thisguy = Whitespace(token)
293	if not indents[-1].less(thisguy):
294	witness = indents[-1].not_less_witness(thisguy)
295	msg = "indent not greater e.g. " + format_witnesses(witness)
296	raise NannyNag(start[0], msg, line)
297	indents.append(thisguy)
298
299	elif type == DEDENT:
300	# there's nothing we need to check here! what's important is
301	# that when the run of DEDENTs ends, the indentation of the
302	# program statement (or ENDMARKER) that triggered the run is
303	# equal to what's left at the top of the indents stack
304
305	# Ouch! This assert triggers if the last line of the source
306	# is indented and lacks a newline -- then DEDENTs pop out
307	# of thin air.
308	# assert check_equal # else no earlier NEWLINE, or an earlier INDENT
309	check_equal = 1
310
311	del indents[-1]
312
313	elif check_equal and type not in JUNK:
314	# this is the first "real token" following a NEWLINE, so it
315	# must be the first token of the next program statement, or an
316	# ENDMARKER; the "line" argument exposes the leading whitespace
317	# for this statement; in the case of ENDMARKER, line is an empty
318	# string, so will properly match the empty string with which the
319	# "indents" stack was seeded
320	check_equal = 0
321	thisguy = Whitespace(line)
322	if not indents[-1].equal(thisguy):
323	witness = indents[-1].not_equal_witness(thisguy)
324	msg = "indent not equal e.g. " + format_witnesses(witness)
325	raise NannyNag(start[0], msg, line)
326
327
328	if __name__ == '__main__':
329	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/tabnanny.py@ 1538

Download in other formats: