1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
|
---|
4 |
|
---|
5 | The output file has an extension of '.bkm' instead of '.out', since hyperref
|
---|
6 | already uses that extension.
|
---|
7 | """
|
---|
8 |
|
---|
9 | import getopt
|
---|
10 | import os
|
---|
11 | import re
|
---|
12 | import string
|
---|
13 | import sys
|
---|
14 |
|
---|
15 |
|
---|
16 | # Ench item in an entry is a tuple of:
|
---|
17 | #
|
---|
18 | # Section #, Title String, Page #, List of Sub-entries
|
---|
19 | #
|
---|
20 | # The return value of parse_toc() is such a tuple.
|
---|
21 |
|
---|
22 | cline_re = r"""^
|
---|
23 | \\contentsline\ \{([a-z]*)} # type of section in $1
|
---|
24 | \{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
|
---|
25 | (.*)} # title string
|
---|
26 | \{(\d+)}$""" # page number
|
---|
27 |
|
---|
28 | cline_rx = re.compile(cline_re, re.VERBOSE)
|
---|
29 |
|
---|
30 | OUTER_TO_INNER = -1
|
---|
31 |
|
---|
32 | _transition_map = {
|
---|
33 | ('chapter', 'section'): OUTER_TO_INNER,
|
---|
34 | ('section', 'subsection'): OUTER_TO_INNER,
|
---|
35 | ('subsection', 'subsubsection'): OUTER_TO_INNER,
|
---|
36 | ('subsubsection', 'subsection'): 1,
|
---|
37 | ('subsection', 'section'): 1,
|
---|
38 | ('section', 'chapter'): 1,
|
---|
39 | ('subsection', 'chapter'): 2,
|
---|
40 | ('subsubsection', 'section'): 2,
|
---|
41 | ('subsubsection', 'chapter'): 3,
|
---|
42 | }
|
---|
43 |
|
---|
44 | INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
|
---|
45 |
|
---|
46 |
|
---|
47 | class BadSectionNesting(Exception):
|
---|
48 | """Raised for unsupported section level transitions."""
|
---|
49 |
|
---|
50 | def __init__(self, level, newsection, path, lineno):
|
---|
51 | self.level = level
|
---|
52 | self.newsection = newsection
|
---|
53 | self.path = path
|
---|
54 | self.lineno = lineno
|
---|
55 |
|
---|
56 | def __str__(self):
|
---|
57 | return ("illegal transition from %s to %s at %s (line %s)"
|
---|
58 | % (self.level, self.newsection, self.path, self.lineno))
|
---|
59 |
|
---|
60 |
|
---|
61 | def parse_toc(fp, bigpart=None):
|
---|
62 | toc = top = []
|
---|
63 | stack = [toc]
|
---|
64 | level = bigpart or 'chapter'
|
---|
65 | lineno = 0
|
---|
66 | while 1:
|
---|
67 | line = fp.readline()
|
---|
68 | if not line:
|
---|
69 | break
|
---|
70 | lineno = lineno + 1
|
---|
71 | m = cline_rx.match(line)
|
---|
72 | if m:
|
---|
73 | stype, snum, title, pageno = m.group(1, 2, 3, 4)
|
---|
74 | title = clean_title(title)
|
---|
75 | entry = (stype, snum, title, int(pageno), [])
|
---|
76 | if stype == level:
|
---|
77 | toc.append(entry)
|
---|
78 | else:
|
---|
79 | if stype not in INCLUDED_LEVELS:
|
---|
80 | # we don't want paragraphs & subparagraphs
|
---|
81 | continue
|
---|
82 | try:
|
---|
83 | direction = _transition_map[(level, stype)]
|
---|
84 | except KeyError:
|
---|
85 | raise BadSectionNesting(level, stype, fp.name, lineno)
|
---|
86 | if direction == OUTER_TO_INNER:
|
---|
87 | toc = toc[-1][-1]
|
---|
88 | stack.insert(0, toc)
|
---|
89 | toc.append(entry)
|
---|
90 | else:
|
---|
91 | for i in range(direction):
|
---|
92 | del stack[0]
|
---|
93 | toc = stack[0]
|
---|
94 | toc.append(entry)
|
---|
95 | level = stype
|
---|
96 | else:
|
---|
97 | sys.stderr.write("l.%s: " + line)
|
---|
98 | return top
|
---|
99 |
|
---|
100 |
|
---|
101 | hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
|
---|
102 | raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
|
---|
103 | title_rx = re.compile(r"\\([a-zA-Z])+\s+")
|
---|
104 | title_trans = string.maketrans("", "")
|
---|
105 |
|
---|
106 | def clean_title(title):
|
---|
107 | title = raisebox_rx.sub("", title)
|
---|
108 | title = hackscore_rx.sub(r"\\_", title)
|
---|
109 | pos = 0
|
---|
110 | while 1:
|
---|
111 | m = title_rx.search(title, pos)
|
---|
112 | if m:
|
---|
113 | start = m.start()
|
---|
114 | if title[start:start+15] != "\\textunderscore":
|
---|
115 | title = title[:start] + title[m.end():]
|
---|
116 | pos = start + 1
|
---|
117 | else:
|
---|
118 | break
|
---|
119 | title = title.translate(title_trans, "{}")
|
---|
120 | return title
|
---|
121 |
|
---|
122 |
|
---|
123 | def write_toc(toc, fp):
|
---|
124 | for entry in toc:
|
---|
125 | write_toc_entry(entry, fp, 0)
|
---|
126 |
|
---|
127 | def write_toc_entry(entry, fp, layer):
|
---|
128 | stype, snum, title, pageno, toc = entry
|
---|
129 | s = "\\pdfoutline goto name{page%03d}" % pageno
|
---|
130 | if toc:
|
---|
131 | s = "%s count -%d" % (s, len(toc))
|
---|
132 | if snum:
|
---|
133 | title = "%s %s" % (snum, title)
|
---|
134 | s = "%s {%s}\n" % (s, title)
|
---|
135 | fp.write(s)
|
---|
136 | for entry in toc:
|
---|
137 | write_toc_entry(entry, fp, layer + 1)
|
---|
138 |
|
---|
139 |
|
---|
140 | def process(ifn, ofn, bigpart=None):
|
---|
141 | toc = parse_toc(open(ifn), bigpart)
|
---|
142 | write_toc(toc, open(ofn, "w"))
|
---|
143 |
|
---|
144 |
|
---|
145 | def main():
|
---|
146 | bigpart = None
|
---|
147 | opts, args = getopt.getopt(sys.argv[1:], "c:")
|
---|
148 | if opts:
|
---|
149 | bigpart = opts[0][1]
|
---|
150 | if not args:
|
---|
151 | usage()
|
---|
152 | sys.exit(2)
|
---|
153 | for filename in args:
|
---|
154 | base, ext = os.path.splitext(filename)
|
---|
155 | ext = ext or ".toc"
|
---|
156 | process(base + ext, base + ".bkm", bigpart)
|
---|
157 |
|
---|
158 |
|
---|
159 | if __name__ == "__main__":
|
---|
160 | main()
|
---|