Context Navigation

buildindex.py

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 10.7 KB

Line
1	#! /usr/bin/env python
2
3	__version__ = '$Revision: 36356 $'
4
5	import os.path
6	import re
7	import string
8	import sys
9
10	from xml.sax.saxutils import quoteattr
11
12
13	bang_join = "!".join
14	null_join = "".join
15
16	REPLACEMENTS = [
17	# Hackish way to deal with macros replaced with simple text
18	(re.compile(r"\\ABC\b"), "ABC"),
19	(re.compile(r"\\ASCII\b"), "ASCII"),
20	(re.compile(r"\\Cpp\b"), "C++"),
21	(re.compile(r"\\EOF\b"), "EOF"),
22	(re.compile(r"\\NULL\b"), "NULL"),
23	(re.compile(r"\\POSIX\b"), "POSIX"),
24	(re.compile(r"\\UNIX\b"), "Unix"),
25	# deal with turds left over from LaTeX2HTML
26	(re.compile(r"<#\d+#>"), ""),
27	]
28
29	class Node:
30	continuation = 0
31
32	def __init__(self, link, str, seqno):
33	self.links = [link]
34	self.seqno = seqno
35	for pattern, replacement in REPLACEMENTS:
36	str = pattern.sub(replacement, str)
37	# build up the text
38	self.text = split_entry_text(str)
39	self.key = split_entry_key(str)
40
41	def __cmp__(self, other):
42	"""Comparison operator includes sequence number, for use with
43	list.sort()."""
44	return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
45
46	def cmp_entry(self, other):
47	"""Comparison 'operator' that ignores sequence number."""
48	c = 0
49	for i in range(min(len(self.key), len(other.key))):
50	c = (cmp_part(self.key[i], other.key[i])
51	or cmp_part(self.text[i], other.text[i]))
52	if c:
53	break
54	return c or cmp(self.key, other.key) or cmp(self.text, other.text)
55
56	def __repr__(self):
57	return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
58
59	def __str__(self):
60	return bang_join(self.key)
61
62	def dump(self):
63	return "%s\1%s###%s\n" \
64	% ("\1".join(self.links),
65	bang_join(self.text),
66	self.seqno)
67
68
69	def cmp_part(s1, s2):
70	result = cmp(s1, s2)
71	if result == 0:
72	return 0
73	l1 = s1.lower()
74	l2 = s2.lower()
75	minlen = min(len(s1), len(s2))
76	if len(s1) < len(s2) and l1 == l2[:len(s1)]:
77	result = -1
78	elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
79	result = 1
80	else:
81	result = cmp(l1, l2) or cmp(s1, s2)
82	return result
83
84
85	def split_entry(str, which):
86	stuff = []
87	parts = str.split('!')
88	parts = [part.split('@') for part in parts]
89	for entry in parts:
90	if len(entry) != 1:
91	key = entry[which]
92	else:
93	key = entry[0]
94	stuff.append(key)
95	return stuff
96
97
98	_rmtt = re.compile(r"""(.)<tt(?: class=['"][a-z0-9]+["'])?>(.)</tt>(.*)$""",
99	re.IGNORECASE)
100	_rmparens = re.compile(r"")
101
102	def split_entry_key(str):
103	parts = split_entry(str, 1)
104	for i in range(len(parts)):
105	m = _rmtt.match(parts[i])
106	if m:
107	parts[i] = null_join(m.group(1, 2, 3))
108	else:
109	parts[i] = parts[i].lower()
110	# remove '()' from the key:
111	parts[i] = _rmparens.sub('', parts[i])
112	return map(trim_ignored_letters, parts)
113
114
115	def split_entry_text(str):
116	if '<' in str:
117	m = _rmtt.match(str)
118	if m:
119	str = null_join(m.group(1, 2, 3))
120	return split_entry(str, 1)
121
122
123	def load(fp):
124	nodes = []
125	rx = re.compile("(.)\1(.)###(.*)$")
126	while 1:
127	line = fp.readline()
128	if not line:
129	break
130	m = rx.match(line)
131	if m:
132	link, str, seqno = m.group(1, 2, 3)
133	nodes.append(Node(link, str, seqno))
134	return nodes
135
136
137	def trim_ignored_letters(s):
138	# ignore $ to keep environment variables with the
139	# leading letter from the name
140	if s.startswith("$"):
141	return s[1:].lower()
142	else:
143	return s.lower()
144
145	def get_first_letter(s):
146	if s.startswith("<tex2html_percent_mark>"):
147	return "%"
148	else:
149	return trim_ignored_letters(s)[0]
150
151
152	def split_letters(nodes):
153	letter_groups = []
154	if nodes:
155	group = []
156	append = group.append
157	letter = get_first_letter(nodes[0].text[0])
158	letter_groups.append((letter, group))
159	for node in nodes:
160	nletter = get_first_letter(node.text[0])
161	if letter != nletter:
162	letter = nletter
163	group = []
164	letter_groups.append((letter, group))
165	append = group.append
166	append(node)
167	return letter_groups
168
169
170	def group_symbols(groups):
171	entries = []
172	ident_letters = string.ascii_letters + "_"
173	while groups[0][0] not in ident_letters:
174	entries += groups[0][1]
175	del groups[0]
176	if entries:
177	groups.insert(0, ("Symbols", entries))
178
179
180	# need a function to separate the nodes into columns...
181	def split_columns(nodes, columns=1):
182	if columns <= 1:
183	return [nodes]
184	# This is a rough height; we may have to increase to avoid breaks before
185	# a subitem.
186	colheight = int(len(nodes) / columns)
187	numlong = int(len(nodes) % columns)
188	if numlong:
189	colheight = colheight + 1
190	else:
191	numlong = columns
192	cols = []
193	for i in range(numlong):
194	start = i * colheight
195	end = start + colheight
196	cols.append(nodes[start:end])
197	del nodes[:end]
198	colheight = colheight - 1
199	try:
200	numshort = int(len(nodes) / colheight)
201	except ZeroDivisionError:
202	cols = cols + (columns - len(cols)) * [[]]
203	else:
204	for i in range(numshort):
205	start = i * colheight
206	end = start + colheight
207	cols.append(nodes[start:end])
208	#
209	# If items continue across columns, make sure they are marked
210	# as continuations so the user knows to look at the previous column.
211	#
212	for i in range(len(cols) - 1):
213	try:
214	prev = cols[i][-1]
215	next = cols[i + 1][0]
216	except IndexError:
217	return cols
218	else:
219	n = min(len(prev.key), len(next.key))
220	for j in range(n):
221	if prev.key[j] != next.key[j]:
222	break
223	next.continuation = j + 1
224	return cols
225
226
227	DL_LEVEL_INDENT = " "
228
229	def format_column(nodes):
230	strings = ["<dl compact='compact'>"]
231	append = strings.append
232	level = 0
233	previous = []
234	for node in nodes:
235	current = node.text
236	count = 0
237	for i in range(min(len(current), len(previous))):
238	if previous[i] != current[i]:
239	break
240	count = i + 1
241	if count > level:
242	append("<dl compact='compact'>" * (count - level) + "\n")
243	level = count
244	elif level > count:
245	append("\n")
246	append(level * DL_LEVEL_INDENT)
247	append("</dl>" * (level - count))
248	level = count
249	# else: level == count
250	for i in range(count, len(current) - 1):
251	term = node.text[i]
252	level = level + 1
253	if node.continuation > i:
254	extra = " (continued)"
255	else:
256	extra = ""
257	append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
258	% (term, extra, level * DL_LEVEL_INDENT))
259	append("\n%s<dt>%s%s</a>"
260	% (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
261	for link in node.links[1:]:
262	append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
263	previous = current
264	append("\n")
265	append("</dl>" * (level + 1))
266	return null_join(strings)
267
268
269	def format_nodes(nodes, columns=1):
270	strings = []
271	append = strings.append
272	if columns > 1:
273	colnos = range(columns)
274	colheight = int(len(nodes) / columns)
275	if len(nodes) % columns:
276	colheight = colheight + 1
277	colwidth = int(100 / columns)
278	append('<table width="100%"><tr valign="top">')
279	for col in split_columns(nodes, columns):
280	append('<td width="%d%%">\n' % colwidth)
281	append(format_column(col))
282	append("\n</td>")
283	append("\n</tr></table>")
284	else:
285	append(format_column(nodes))
286	return null_join(strings)
287
288
289	def format_letter(letter):
290	if letter == '.':
291	lettername = ". (dot)"
292	elif letter == '_':
293	lettername = "_ (underscore)"
294	else:
295	lettername = letter.capitalize()
296	return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
297	% (quoteattr("letter-" + letter), lettername)
298
299
300	def format_html_letters(nodes, columns, group_symbol_nodes):
301	letter_groups = split_letters(nodes)
302	if group_symbol_nodes:
303	group_symbols(letter_groups)
304	items = []
305	for letter, nodes in letter_groups:
306	s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
307	items.append(s)
308	s = ["<hr /><center>\n%s</center>\n" % " \|\n".join(items)]
309	for letter, nodes in letter_groups:
310	s.append(format_letter(letter))
311	s.append(format_nodes(nodes, columns))
312	return null_join(s)
313
314	def format_html(nodes, columns):
315	return format_nodes(nodes, columns)
316
317
318	def collapse(nodes):
319	"""Collapse sequences of nodes with matching keys into a single node.
320	Destructive."""
321	if len(nodes) < 2:
322	return
323	prev = nodes[0]
324	i = 1
325	while i < len(nodes):
326	node = nodes[i]
327	if not node.cmp_entry(prev):
328	prev.links.append(node.links[0])
329	del nodes[i]
330	else:
331	i = i + 1
332	prev = node
333
334
335	def dump(nodes, fp):
336	for node in nodes:
337	fp.write(node.dump())
338
339
340	def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
341	nodes.sort()
342	collapse(nodes)
343	if letters:
344	return format_html_letters(nodes, columns, group_symbol_nodes)
345	else:
346	return format_html(nodes, columns)
347
348
349	def main():
350	import getopt
351	ifn = "-"
352	ofn = "-"
353	columns = 1
354	letters = 0
355	group_symbol_nodes = 1
356	opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
357	["columns=", "dont-group-symbols",
358	"group-symbols", "letters", "output="])
359	for opt, val in opts:
360	if opt in ("-o", "--output"):
361	ofn = val
362	elif opt in ("-c", "--columns"):
363	columns = int(val, 10)
364	elif opt in ("-l", "--letters"):
365	letters = 1
366	elif opt == "--group-symbols":
367	group_symbol_nodes = 1
368	elif opt == "--dont-group-symbols":
369	group_symbol_nodes = 0
370	if not args:
371	args = [ifn]
372	nodes = []
373	for fn in args:
374	nodes = nodes + load(open(fn))
375	num_nodes = len(nodes)
376	html = process_nodes(nodes, columns, letters, group_symbol_nodes)
377	program = os.path.basename(sys.argv[0])
378	if ofn == "-":
379	sys.stdout.write(html)
380	sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
381	else:
382	open(ofn, "w").write(html)
383	print
384	print "%s: %d index nodes" % (program, num_nodes)
385
386
387	if __name__ == "__main__":
388	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/python/2.5/Doc/tools/buildindex.py

Download in other formats: