1 | """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
|
---|
2 |
|
---|
3 | Written by Raymond D. Hettinger <python at rcn.com>
|
---|
4 | Copyright (c) 2003 Python Software Foundation. All rights reserved.
|
---|
5 |
|
---|
6 | Designed to catch common markup errors including:
|
---|
7 | * Unbalanced or mismatched parenthesis, brackets, and braces.
|
---|
8 | * Unbalanced or mismatched \\begin and \\end blocks.
|
---|
9 | * Misspelled or invalid LaTeX commands.
|
---|
10 | * Use of forward slashes instead of backslashes for commands.
|
---|
11 | * Table line size mismatches.
|
---|
12 |
|
---|
13 | Sample command line usage:
|
---|
14 | python texcheck.py -k chapterheading -m lib/librandomtex *.tex
|
---|
15 |
|
---|
16 | Options:
|
---|
17 | -m Munge parenthesis and brackets. [0,n) would normally mismatch.
|
---|
18 | -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
|
---|
19 | -d: Delimiter check only (useful for non-LaTeX files).
|
---|
20 | -h: Help
|
---|
21 | -s lineno: Start at lineno (useful for skipping complex sections).
|
---|
22 | -v: Verbose. Trace the matching of //begin and //end blocks.
|
---|
23 | """
|
---|
24 |
|
---|
25 | import re
|
---|
26 | import sys
|
---|
27 | import getopt
|
---|
28 | from itertools import izip, count, islice
|
---|
29 | import glob
|
---|
30 |
|
---|
31 | cmdstr = r"""
|
---|
32 | \section \module \declaremodule \modulesynopsis \moduleauthor
|
---|
33 | \sectionauthor \versionadded \code \class \method \begin
|
---|
34 | \optional \var \ref \end \subsection \lineiii \hline \label
|
---|
35 | \indexii \textrm \ldots \keyword \stindex \index \item \note
|
---|
36 | \withsubitem \ttindex \footnote \citetitle \samp \opindex
|
---|
37 | \noindent \exception \strong \dfn \ctype \obindex \character
|
---|
38 | \indexiii \function \bifuncindex \refmodule \refbimodindex
|
---|
39 | \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
|
---|
40 | \regexp \program \production \token \productioncont \term
|
---|
41 | \grammartoken \lineii \seemodule \file \EOF \documentclass
|
---|
42 | \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
|
---|
43 | \tableofcontents \kbd \programopt \envvar \refstmodindex
|
---|
44 | \cfunction \constant \NULL \moreargs \cfuncline \cdata
|
---|
45 | \textasciicircum \n \ABC \setindexsubitem \versionchanged
|
---|
46 | \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
|
---|
47 | \verbatiminput \methodline \textgreater \seetitle \lineiv
|
---|
48 | \funclineni \ulink \manpage \funcline \dataline \unspecified
|
---|
49 | \textbackslash \mimetype \mailheader \seepep \textunderscore
|
---|
50 | \longprogramopt \infinity \plusminus \shortversion \version
|
---|
51 | \refmodindex \seerfc \makeindex \makemodindex \renewcommand
|
---|
52 | \indexname \appendix \protect \indexiv \mbox \textasciitilde
|
---|
53 | \platform \seeurl \leftmargin \labelwidth \localmoduletable
|
---|
54 | \LaTeX \copyright \memberline \backslash \pi \centerline
|
---|
55 | \caption \vspace \textwidth \menuselection \textless
|
---|
56 | \makevar \csimplemacro \menuselection \bfcode \sub \release
|
---|
57 | \email \kwindex \refexmodindex \filenq \e \menuselection
|
---|
58 | \exindex \linev \newsgroup \verbatim \setshortversion
|
---|
59 | \author \authoraddress \paragraph \subparagraph \cmemberline
|
---|
60 | \textbar \C \seelink
|
---|
61 | """
|
---|
62 |
|
---|
63 | def matchclose(c_lineno, c_symbol, openers, pairmap):
|
---|
64 | "Verify that closing delimiter matches most recent opening delimiter"
|
---|
65 | try:
|
---|
66 | o_lineno, o_symbol = openers.pop()
|
---|
67 | except IndexError:
|
---|
68 | print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
|
---|
69 | return
|
---|
70 | if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
|
---|
71 | print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
|
---|
72 | return
|
---|
73 |
|
---|
74 | def checkit(source, opts, morecmds=[]):
|
---|
75 | """Check the LaTeX formatting in a sequence of lines.
|
---|
76 |
|
---|
77 | Opts is a mapping of options to option values if any:
|
---|
78 | -m munge parenthesis and brackets
|
---|
79 | -d delimiters only checking
|
---|
80 | -v verbose trace of delimiter matching
|
---|
81 | -s lineno: linenumber to start scan (default is 1).
|
---|
82 |
|
---|
83 | Morecmds is a sequence of LaTeX commands (without backslashes) that
|
---|
84 | are to be considered valid in the scan.
|
---|
85 | """
|
---|
86 |
|
---|
87 | texcmd = re.compile(r'\\[A-Za-z]+')
|
---|
88 | falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
|
---|
89 |
|
---|
90 | validcmds = set(cmdstr.split())
|
---|
91 | for cmd in morecmds:
|
---|
92 | validcmds.add('\\' + cmd)
|
---|
93 |
|
---|
94 | if '-m' in opts:
|
---|
95 | pairmap = {']':'[(', ')':'(['} # Munged openers
|
---|
96 | else:
|
---|
97 | pairmap = {']':'[', ')':'('} # Normal opener for a given closer
|
---|
98 | openpunct = set('([') # Set of valid openers
|
---|
99 |
|
---|
100 | delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
|
---|
101 | braces = re.compile(r'({)|(})')
|
---|
102 | doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
|
---|
103 | spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
|
---|
104 |
|
---|
105 | openers = [] # Stack of pending open delimiters
|
---|
106 | bracestack = [] # Stack of pending open braces
|
---|
107 |
|
---|
108 | tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
|
---|
109 | tableline = re.compile(r'\\line([iv]+){')
|
---|
110 | tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
|
---|
111 | tablelevel = ''
|
---|
112 | tablestartline = 0
|
---|
113 |
|
---|
114 | startline = int(opts.get('-s', '1'))
|
---|
115 | lineno = 0
|
---|
116 |
|
---|
117 | for lineno, line in izip(count(startline), islice(source, startline-1, None)):
|
---|
118 | line = line.rstrip()
|
---|
119 |
|
---|
120 | # Check balancing of open/close parenthesis, brackets, and begin/end blocks
|
---|
121 | for begend, name, punct in delimiters.findall(line):
|
---|
122 | if '-v' in opts:
|
---|
123 | print lineno, '|', begend, name, punct,
|
---|
124 | if begend == 'begin' and '-d' not in opts:
|
---|
125 | openers.append((lineno, name))
|
---|
126 | elif punct in openpunct:
|
---|
127 | openers.append((lineno, punct))
|
---|
128 | elif begend == 'end' and '-d' not in opts:
|
---|
129 | matchclose(lineno, name, openers, pairmap)
|
---|
130 | elif punct in pairmap:
|
---|
131 | matchclose(lineno, punct, openers, pairmap)
|
---|
132 | if '-v' in opts:
|
---|
133 | print ' --> ', openers
|
---|
134 |
|
---|
135 | # Balance opening and closing braces
|
---|
136 | for open, close in braces.findall(line):
|
---|
137 | if open == '{':
|
---|
138 | bracestack.append(lineno)
|
---|
139 | if close == '}':
|
---|
140 | try:
|
---|
141 | bracestack.pop()
|
---|
142 | except IndexError:
|
---|
143 | print r'Warning, unmatched } on line %s.' % (lineno,)
|
---|
144 |
|
---|
145 | # Optionally, skip LaTeX specific checks
|
---|
146 | if '-d' in opts:
|
---|
147 | continue
|
---|
148 |
|
---|
149 | # Warn whenever forward slashes encountered with a LaTeX command
|
---|
150 | for cmd in falsetexcmd.findall(line):
|
---|
151 | if '822' in line or '.html' in line:
|
---|
152 | continue # Ignore false positives for urls and for /rfc822
|
---|
153 | if '\\' + cmd in validcmds:
|
---|
154 | print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
|
---|
155 |
|
---|
156 | # Check for markup requiring {} for correct spacing
|
---|
157 | for cmd in spacingmarkup.findall(line):
|
---|
158 | print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
|
---|
159 |
|
---|
160 | # Validate commands
|
---|
161 | nc = line.find(r'\newcommand')
|
---|
162 | if nc != -1:
|
---|
163 | start = line.find('{', nc)
|
---|
164 | end = line.find('}', start)
|
---|
165 | validcmds.add(line[start+1:end])
|
---|
166 | for cmd in texcmd.findall(line):
|
---|
167 | if cmd not in validcmds:
|
---|
168 | print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
|
---|
169 |
|
---|
170 | # Check table levels (make sure lineii only inside tableii)
|
---|
171 | m = tablestart.search(line)
|
---|
172 | if m:
|
---|
173 | tablelevel = m.group(1)
|
---|
174 | tablestartline = lineno
|
---|
175 | m = tableline.search(line)
|
---|
176 | if m and m.group(1) != tablelevel:
|
---|
177 | print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
|
---|
178 | if tableend.search(line):
|
---|
179 | tablelevel = ''
|
---|
180 |
|
---|
181 | # Style guide warnings
|
---|
182 | if 'e.g.' in line or 'i.e.' in line:
|
---|
183 | print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
|
---|
184 |
|
---|
185 | for dw in doubledwords.findall(line):
|
---|
186 | print r'Doubled word warning. "%s" on line %d' % (dw, lineno)
|
---|
187 |
|
---|
188 | lastline = lineno
|
---|
189 | for lineno, symbol in openers:
|
---|
190 | print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
|
---|
191 | for lineno in bracestack:
|
---|
192 | print "Unmatched { on line %d" % (lineno,)
|
---|
193 | print 'Done checking %d lines.' % (lastline,)
|
---|
194 | return 0
|
---|
195 |
|
---|
196 | def main(args=None):
|
---|
197 | if args is None:
|
---|
198 | args = sys.argv[1:]
|
---|
199 | optitems, arglist = getopt.getopt(args, "k:mdhs:v")
|
---|
200 | opts = dict(optitems)
|
---|
201 | if '-h' in opts or args==[]:
|
---|
202 | print __doc__
|
---|
203 | return 0
|
---|
204 |
|
---|
205 | if len(arglist) < 1:
|
---|
206 | print 'Please specify a file to be checked'
|
---|
207 | return 1
|
---|
208 |
|
---|
209 | for i, filespec in enumerate(arglist):
|
---|
210 | if '*' in filespec or '?' in filespec:
|
---|
211 | arglist[i:i+1] = glob.glob(filespec)
|
---|
212 |
|
---|
213 | morecmds = [v for k,v in optitems if k=='-k']
|
---|
214 | err = []
|
---|
215 |
|
---|
216 | for filename in arglist:
|
---|
217 | print '=' * 30
|
---|
218 | print "Checking", filename
|
---|
219 | try:
|
---|
220 | f = open(filename)
|
---|
221 | except IOError:
|
---|
222 | print 'Cannot open file %s.' % arglist[0]
|
---|
223 | return 2
|
---|
224 |
|
---|
225 | try:
|
---|
226 | err.append(checkit(f, opts, morecmds))
|
---|
227 | finally:
|
---|
228 | f.close()
|
---|
229 |
|
---|
230 | return max(err)
|
---|
231 |
|
---|
232 | if __name__ == '__main__':
|
---|
233 | sys.exit(main())
|
---|