1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | __version__ = '$Revision: 36356 $'
|
---|
4 |
|
---|
5 | import os.path
|
---|
6 | import re
|
---|
7 | import string
|
---|
8 | import sys
|
---|
9 |
|
---|
10 | from xml.sax.saxutils import quoteattr
|
---|
11 |
|
---|
12 |
|
---|
13 | bang_join = "!".join
|
---|
14 | null_join = "".join
|
---|
15 |
|
---|
16 | REPLACEMENTS = [
|
---|
17 | # Hackish way to deal with macros replaced with simple text
|
---|
18 | (re.compile(r"\\ABC\b"), "ABC"),
|
---|
19 | (re.compile(r"\\ASCII\b"), "ASCII"),
|
---|
20 | (re.compile(r"\\Cpp\b"), "C++"),
|
---|
21 | (re.compile(r"\\EOF\b"), "EOF"),
|
---|
22 | (re.compile(r"\\NULL\b"), "NULL"),
|
---|
23 | (re.compile(r"\\POSIX\b"), "POSIX"),
|
---|
24 | (re.compile(r"\\UNIX\b"), "Unix"),
|
---|
25 | # deal with turds left over from LaTeX2HTML
|
---|
26 | (re.compile(r"<#\d+#>"), ""),
|
---|
27 | ]
|
---|
28 |
|
---|
29 | class Node:
|
---|
30 | continuation = 0
|
---|
31 |
|
---|
32 | def __init__(self, link, str, seqno):
|
---|
33 | self.links = [link]
|
---|
34 | self.seqno = seqno
|
---|
35 | for pattern, replacement in REPLACEMENTS:
|
---|
36 | str = pattern.sub(replacement, str)
|
---|
37 | # build up the text
|
---|
38 | self.text = split_entry_text(str)
|
---|
39 | self.key = split_entry_key(str)
|
---|
40 |
|
---|
41 | def __cmp__(self, other):
|
---|
42 | """Comparison operator includes sequence number, for use with
|
---|
43 | list.sort()."""
|
---|
44 | return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
|
---|
45 |
|
---|
46 | def cmp_entry(self, other):
|
---|
47 | """Comparison 'operator' that ignores sequence number."""
|
---|
48 | c = 0
|
---|
49 | for i in range(min(len(self.key), len(other.key))):
|
---|
50 | c = (cmp_part(self.key[i], other.key[i])
|
---|
51 | or cmp_part(self.text[i], other.text[i]))
|
---|
52 | if c:
|
---|
53 | break
|
---|
54 | return c or cmp(self.key, other.key) or cmp(self.text, other.text)
|
---|
55 |
|
---|
56 | def __repr__(self):
|
---|
57 | return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
|
---|
58 |
|
---|
59 | def __str__(self):
|
---|
60 | return bang_join(self.key)
|
---|
61 |
|
---|
62 | def dump(self):
|
---|
63 | return "%s\1%s###%s\n" \
|
---|
64 | % ("\1".join(self.links),
|
---|
65 | bang_join(self.text),
|
---|
66 | self.seqno)
|
---|
67 |
|
---|
68 |
|
---|
69 | def cmp_part(s1, s2):
|
---|
70 | result = cmp(s1, s2)
|
---|
71 | if result == 0:
|
---|
72 | return 0
|
---|
73 | l1 = s1.lower()
|
---|
74 | l2 = s2.lower()
|
---|
75 | minlen = min(len(s1), len(s2))
|
---|
76 | if len(s1) < len(s2) and l1 == l2[:len(s1)]:
|
---|
77 | result = -1
|
---|
78 | elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
|
---|
79 | result = 1
|
---|
80 | else:
|
---|
81 | result = cmp(l1, l2) or cmp(s1, s2)
|
---|
82 | return result
|
---|
83 |
|
---|
84 |
|
---|
85 | def split_entry(str, which):
|
---|
86 | stuff = []
|
---|
87 | parts = str.split('!')
|
---|
88 | parts = [part.split('@') for part in parts]
|
---|
89 | for entry in parts:
|
---|
90 | if len(entry) != 1:
|
---|
91 | key = entry[which]
|
---|
92 | else:
|
---|
93 | key = entry[0]
|
---|
94 | stuff.append(key)
|
---|
95 | return stuff
|
---|
96 |
|
---|
97 |
|
---|
98 | _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
|
---|
99 | re.IGNORECASE)
|
---|
100 | _rmparens = re.compile(r"\(\)")
|
---|
101 |
|
---|
102 | def split_entry_key(str):
|
---|
103 | parts = split_entry(str, 1)
|
---|
104 | for i in range(len(parts)):
|
---|
105 | m = _rmtt.match(parts[i])
|
---|
106 | if m:
|
---|
107 | parts[i] = null_join(m.group(1, 2, 3))
|
---|
108 | else:
|
---|
109 | parts[i] = parts[i].lower()
|
---|
110 | # remove '()' from the key:
|
---|
111 | parts[i] = _rmparens.sub('', parts[i])
|
---|
112 | return map(trim_ignored_letters, parts)
|
---|
113 |
|
---|
114 |
|
---|
115 | def split_entry_text(str):
|
---|
116 | if '<' in str:
|
---|
117 | m = _rmtt.match(str)
|
---|
118 | if m:
|
---|
119 | str = null_join(m.group(1, 2, 3))
|
---|
120 | return split_entry(str, 1)
|
---|
121 |
|
---|
122 |
|
---|
123 | def load(fp):
|
---|
124 | nodes = []
|
---|
125 | rx = re.compile("(.*)\1(.*)###(.*)$")
|
---|
126 | while 1:
|
---|
127 | line = fp.readline()
|
---|
128 | if not line:
|
---|
129 | break
|
---|
130 | m = rx.match(line)
|
---|
131 | if m:
|
---|
132 | link, str, seqno = m.group(1, 2, 3)
|
---|
133 | nodes.append(Node(link, str, seqno))
|
---|
134 | return nodes
|
---|
135 |
|
---|
136 |
|
---|
137 | def trim_ignored_letters(s):
|
---|
138 | # ignore $ to keep environment variables with the
|
---|
139 | # leading letter from the name
|
---|
140 | if s.startswith("$"):
|
---|
141 | return s[1:].lower()
|
---|
142 | else:
|
---|
143 | return s.lower()
|
---|
144 |
|
---|
145 | def get_first_letter(s):
|
---|
146 | if s.startswith("<tex2html_percent_mark>"):
|
---|
147 | return "%"
|
---|
148 | else:
|
---|
149 | return trim_ignored_letters(s)[0]
|
---|
150 |
|
---|
151 |
|
---|
152 | def split_letters(nodes):
|
---|
153 | letter_groups = []
|
---|
154 | if nodes:
|
---|
155 | group = []
|
---|
156 | append = group.append
|
---|
157 | letter = get_first_letter(nodes[0].text[0])
|
---|
158 | letter_groups.append((letter, group))
|
---|
159 | for node in nodes:
|
---|
160 | nletter = get_first_letter(node.text[0])
|
---|
161 | if letter != nletter:
|
---|
162 | letter = nletter
|
---|
163 | group = []
|
---|
164 | letter_groups.append((letter, group))
|
---|
165 | append = group.append
|
---|
166 | append(node)
|
---|
167 | return letter_groups
|
---|
168 |
|
---|
169 |
|
---|
170 | def group_symbols(groups):
|
---|
171 | entries = []
|
---|
172 | ident_letters = string.ascii_letters + "_"
|
---|
173 | while groups[0][0] not in ident_letters:
|
---|
174 | entries += groups[0][1]
|
---|
175 | del groups[0]
|
---|
176 | if entries:
|
---|
177 | groups.insert(0, ("Symbols", entries))
|
---|
178 |
|
---|
179 |
|
---|
180 | # need a function to separate the nodes into columns...
|
---|
181 | def split_columns(nodes, columns=1):
|
---|
182 | if columns <= 1:
|
---|
183 | return [nodes]
|
---|
184 | # This is a rough height; we may have to increase to avoid breaks before
|
---|
185 | # a subitem.
|
---|
186 | colheight = int(len(nodes) / columns)
|
---|
187 | numlong = int(len(nodes) % columns)
|
---|
188 | if numlong:
|
---|
189 | colheight = colheight + 1
|
---|
190 | else:
|
---|
191 | numlong = columns
|
---|
192 | cols = []
|
---|
193 | for i in range(numlong):
|
---|
194 | start = i * colheight
|
---|
195 | end = start + colheight
|
---|
196 | cols.append(nodes[start:end])
|
---|
197 | del nodes[:end]
|
---|
198 | colheight = colheight - 1
|
---|
199 | try:
|
---|
200 | numshort = int(len(nodes) / colheight)
|
---|
201 | except ZeroDivisionError:
|
---|
202 | cols = cols + (columns - len(cols)) * [[]]
|
---|
203 | else:
|
---|
204 | for i in range(numshort):
|
---|
205 | start = i * colheight
|
---|
206 | end = start + colheight
|
---|
207 | cols.append(nodes[start:end])
|
---|
208 | #
|
---|
209 | # If items continue across columns, make sure they are marked
|
---|
210 | # as continuations so the user knows to look at the previous column.
|
---|
211 | #
|
---|
212 | for i in range(len(cols) - 1):
|
---|
213 | try:
|
---|
214 | prev = cols[i][-1]
|
---|
215 | next = cols[i + 1][0]
|
---|
216 | except IndexError:
|
---|
217 | return cols
|
---|
218 | else:
|
---|
219 | n = min(len(prev.key), len(next.key))
|
---|
220 | for j in range(n):
|
---|
221 | if prev.key[j] != next.key[j]:
|
---|
222 | break
|
---|
223 | next.continuation = j + 1
|
---|
224 | return cols
|
---|
225 |
|
---|
226 |
|
---|
227 | DL_LEVEL_INDENT = " "
|
---|
228 |
|
---|
229 | def format_column(nodes):
|
---|
230 | strings = ["<dl compact='compact'>"]
|
---|
231 | append = strings.append
|
---|
232 | level = 0
|
---|
233 | previous = []
|
---|
234 | for node in nodes:
|
---|
235 | current = node.text
|
---|
236 | count = 0
|
---|
237 | for i in range(min(len(current), len(previous))):
|
---|
238 | if previous[i] != current[i]:
|
---|
239 | break
|
---|
240 | count = i + 1
|
---|
241 | if count > level:
|
---|
242 | append("<dl compact='compact'>" * (count - level) + "\n")
|
---|
243 | level = count
|
---|
244 | elif level > count:
|
---|
245 | append("\n")
|
---|
246 | append(level * DL_LEVEL_INDENT)
|
---|
247 | append("</dl>" * (level - count))
|
---|
248 | level = count
|
---|
249 | # else: level == count
|
---|
250 | for i in range(count, len(current) - 1):
|
---|
251 | term = node.text[i]
|
---|
252 | level = level + 1
|
---|
253 | if node.continuation > i:
|
---|
254 | extra = " (continued)"
|
---|
255 | else:
|
---|
256 | extra = ""
|
---|
257 | append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
|
---|
258 | % (term, extra, level * DL_LEVEL_INDENT))
|
---|
259 | append("\n%s<dt>%s%s</a>"
|
---|
260 | % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
|
---|
261 | for link in node.links[1:]:
|
---|
262 | append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
|
---|
263 | previous = current
|
---|
264 | append("\n")
|
---|
265 | append("</dl>" * (level + 1))
|
---|
266 | return null_join(strings)
|
---|
267 |
|
---|
268 |
|
---|
269 | def format_nodes(nodes, columns=1):
|
---|
270 | strings = []
|
---|
271 | append = strings.append
|
---|
272 | if columns > 1:
|
---|
273 | colnos = range(columns)
|
---|
274 | colheight = int(len(nodes) / columns)
|
---|
275 | if len(nodes) % columns:
|
---|
276 | colheight = colheight + 1
|
---|
277 | colwidth = int(100 / columns)
|
---|
278 | append('<table width="100%"><tr valign="top">')
|
---|
279 | for col in split_columns(nodes, columns):
|
---|
280 | append('<td width="%d%%">\n' % colwidth)
|
---|
281 | append(format_column(col))
|
---|
282 | append("\n</td>")
|
---|
283 | append("\n</tr></table>")
|
---|
284 | else:
|
---|
285 | append(format_column(nodes))
|
---|
286 | return null_join(strings)
|
---|
287 |
|
---|
288 |
|
---|
289 | def format_letter(letter):
|
---|
290 | if letter == '.':
|
---|
291 | lettername = ". (dot)"
|
---|
292 | elif letter == '_':
|
---|
293 | lettername = "_ (underscore)"
|
---|
294 | else:
|
---|
295 | lettername = letter.capitalize()
|
---|
296 | return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
|
---|
297 | % (quoteattr("letter-" + letter), lettername)
|
---|
298 |
|
---|
299 |
|
---|
300 | def format_html_letters(nodes, columns, group_symbol_nodes):
|
---|
301 | letter_groups = split_letters(nodes)
|
---|
302 | if group_symbol_nodes:
|
---|
303 | group_symbols(letter_groups)
|
---|
304 | items = []
|
---|
305 | for letter, nodes in letter_groups:
|
---|
306 | s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
|
---|
307 | items.append(s)
|
---|
308 | s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)]
|
---|
309 | for letter, nodes in letter_groups:
|
---|
310 | s.append(format_letter(letter))
|
---|
311 | s.append(format_nodes(nodes, columns))
|
---|
312 | return null_join(s)
|
---|
313 |
|
---|
314 | def format_html(nodes, columns):
|
---|
315 | return format_nodes(nodes, columns)
|
---|
316 |
|
---|
317 |
|
---|
318 | def collapse(nodes):
|
---|
319 | """Collapse sequences of nodes with matching keys into a single node.
|
---|
320 | Destructive."""
|
---|
321 | if len(nodes) < 2:
|
---|
322 | return
|
---|
323 | prev = nodes[0]
|
---|
324 | i = 1
|
---|
325 | while i < len(nodes):
|
---|
326 | node = nodes[i]
|
---|
327 | if not node.cmp_entry(prev):
|
---|
328 | prev.links.append(node.links[0])
|
---|
329 | del nodes[i]
|
---|
330 | else:
|
---|
331 | i = i + 1
|
---|
332 | prev = node
|
---|
333 |
|
---|
334 |
|
---|
335 | def dump(nodes, fp):
|
---|
336 | for node in nodes:
|
---|
337 | fp.write(node.dump())
|
---|
338 |
|
---|
339 |
|
---|
340 | def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
|
---|
341 | nodes.sort()
|
---|
342 | collapse(nodes)
|
---|
343 | if letters:
|
---|
344 | return format_html_letters(nodes, columns, group_symbol_nodes)
|
---|
345 | else:
|
---|
346 | return format_html(nodes, columns)
|
---|
347 |
|
---|
348 |
|
---|
349 | def main():
|
---|
350 | import getopt
|
---|
351 | ifn = "-"
|
---|
352 | ofn = "-"
|
---|
353 | columns = 1
|
---|
354 | letters = 0
|
---|
355 | group_symbol_nodes = 1
|
---|
356 | opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
|
---|
357 | ["columns=", "dont-group-symbols",
|
---|
358 | "group-symbols", "letters", "output="])
|
---|
359 | for opt, val in opts:
|
---|
360 | if opt in ("-o", "--output"):
|
---|
361 | ofn = val
|
---|
362 | elif opt in ("-c", "--columns"):
|
---|
363 | columns = int(val, 10)
|
---|
364 | elif opt in ("-l", "--letters"):
|
---|
365 | letters = 1
|
---|
366 | elif opt == "--group-symbols":
|
---|
367 | group_symbol_nodes = 1
|
---|
368 | elif opt == "--dont-group-symbols":
|
---|
369 | group_symbol_nodes = 0
|
---|
370 | if not args:
|
---|
371 | args = [ifn]
|
---|
372 | nodes = []
|
---|
373 | for fn in args:
|
---|
374 | nodes = nodes + load(open(fn))
|
---|
375 | num_nodes = len(nodes)
|
---|
376 | html = process_nodes(nodes, columns, letters, group_symbol_nodes)
|
---|
377 | program = os.path.basename(sys.argv[0])
|
---|
378 | if ofn == "-":
|
---|
379 | sys.stdout.write(html)
|
---|
380 | sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
|
---|
381 | else:
|
---|
382 | open(ofn, "w").write(html)
|
---|
383 | print
|
---|
384 | print "%s: %d index nodes" % (program, num_nodes)
|
---|
385 |
|
---|
386 |
|
---|
387 | if __name__ == "__main__":
|
---|
388 | main()
|
---|