Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

conv.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 9.4 KB

Line
1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	# Licensed to PSF under a Contributor Agreement.
3
4	"""Convert graminit.[ch] spit out by pgen to Python code.
5
6	Pgen is the Python parser generator. It is useful to quickly create a
7	parser from a grammar file in Python's grammar notation. But I don't
8	want my parsers to be written in C (yet), so I'm translating the
9	parsing tables to Python data structures and writing a Python parse
10	engine.
11
12	Note that the token numbers are constants determined by the standard
13	Python tokenizer. The standard token module defines these numbers and
14	their names (the names are not used much). The token numbers are
15	hardcoded into the Python tokenizer and into pgen. A Python
16	implementation of the Python tokenizer is also available, in the
17	standard tokenize module.
18
19	On the other hand, symbol numbers (representing the grammar's
20	non-terminals) are assigned by pgen based on the actual grammar
21	input.
22
23	Note: this module is pretty much obsolete; the pgen module generates
24	equivalent grammar tables directly from the Grammar.txt input file
25	without having to invoke the Python pgen C program.
26
27	"""
28
29	# Python imports
30	import re
31
32	# Local imports
33	from pgen2 import grammar, token
34
35
36	class Converter(grammar.Grammar):
37	"""Grammar subclass that reads classic pgen output files.
38
39	The run() method reads the tables as produced by the pgen parser
40	generator, typically contained in two C files, graminit.h and
41	graminit.c. The other methods are for internal use only.
42
43	See the base class for more documentation.
44
45	"""
46
47	def run(self, graminit_h, graminit_c):
48	"""Load the grammar tables from the text files written by pgen."""
49	self.parse_graminit_h(graminit_h)
50	self.parse_graminit_c(graminit_c)
51	self.finish_off()
52
53	def parse_graminit_h(self, filename):
54	"""Parse the .h file written by pgen. (Internal)
55
56	This file is a sequence of #define statements defining the
57	nonterminals of the grammar as numbers. We build two tables
58	mapping the numbers to names and back.
59
60	"""
61	try:
62	f = open(filename)
63	except IOError, err:
64	print "Can't open %s: %s" % (filename, err)
65	return False
66	self.symbol2number = {}
67	self.number2symbol = {}
68	lineno = 0
69	for line in f:
70	lineno += 1
71	mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
72	if not mo and line.strip():
73	print "%s(%s): can't parse %s" % (filename, lineno,
74	line.strip())
75	else:
76	symbol, number = mo.groups()
77	number = int(number)
78	assert symbol not in self.symbol2number
79	assert number not in self.number2symbol
80	self.symbol2number[symbol] = number
81	self.number2symbol[number] = symbol
82	return True
83
84	def parse_graminit_c(self, filename):
85	"""Parse the .c file written by pgen. (Internal)
86
87	The file looks as follows. The first two lines are always this:
88
89	#include "pgenheaders.h"
90	#include "grammar.h"
91
92	After that come four blocks:
93
94	1) one or more state definitions
95	2) a table defining dfas
96	3) a table defining labels
97	4) a struct defining the grammar
98
99	A state definition has the following form:
100	- one or more arc arrays, each of the form:
101	static arc arcs_<n>_<m>[<k>] = {
102	{<i>, <j>},
103	...
104	};
105	- followed by a state array, of the form:
106	static state states_<s>[<t>] = {
107	{<k>, arcs_<n>_<m>},
108	...
109	};
110
111	"""
112	try:
113	f = open(filename)
114	except IOError, err:
115	print "Can't open %s: %s" % (filename, err)
116	return False
117	# The code below essentially uses f's iterator-ness!
118	lineno = 0
119
120	# Expect the two #include lines
121	lineno, line = lineno+1, f.next()
122	assert line == '#include "pgenheaders.h"\n', (lineno, line)
123	lineno, line = lineno+1, f.next()
124	assert line == '#include "grammar.h"\n', (lineno, line)
125
126	# Parse the state definitions
127	lineno, line = lineno+1, f.next()
128	allarcs = {}
129	states = []
130	while line.startswith("static arc "):
131	while line.startswith("static arc "):
132	mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
133	line)
134	assert mo, (lineno, line)
135	n, m, k = map(int, mo.groups())
136	arcs = []
137	for _ in range(k):
138	lineno, line = lineno+1, f.next()
139	mo = re.match(r"\s+{(\d+), (\d+)},$", line)
140	assert mo, (lineno, line)
141	i, j = map(int, mo.groups())
142	arcs.append((i, j))
143	lineno, line = lineno+1, f.next()
144	assert line == "};\n", (lineno, line)
145	allarcs[(n, m)] = arcs
146	lineno, line = lineno+1, f.next()
147	mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
148	assert mo, (lineno, line)
149	s, t = map(int, mo.groups())
150	assert s == len(states), (lineno, line)
151	state = []
152	for _ in range(t):
153	lineno, line = lineno+1, f.next()
154	mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
155	assert mo, (lineno, line)
156	k, n, m = map(int, mo.groups())
157	arcs = allarcs[n, m]
158	assert k == len(arcs), (lineno, line)
159	state.append(arcs)
160	states.append(state)
161	lineno, line = lineno+1, f.next()
162	assert line == "};\n", (lineno, line)
163	lineno, line = lineno+1, f.next()
164	self.states = states
165
166	# Parse the dfas
167	dfas = {}
168	mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
169	assert mo, (lineno, line)
170	ndfas = int(mo.group(1))
171	for i in range(ndfas):
172	lineno, line = lineno+1, f.next()
173	mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
174	line)
175	assert mo, (lineno, line)
176	symbol = mo.group(2)
177	number, x, y, z = map(int, mo.group(1, 3, 4, 5))
178	assert self.symbol2number[symbol] == number, (lineno, line)
179	assert self.number2symbol[number] == symbol, (lineno, line)
180	assert x == 0, (lineno, line)
181	state = states[z]
182	assert y == len(state), (lineno, line)
183	lineno, line = lineno+1, f.next()
184	mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
185	assert mo, (lineno, line)
186	first = {}
187	rawbitset = eval(mo.group(1))
188	for i, c in enumerate(rawbitset):
189	byte = ord(c)
190	for j in range(8):
191	if byte & (1<<j):
192	first[i*8 + j] = 1
193	dfas[number] = (state, first)
194	lineno, line = lineno+1, f.next()
195	assert line == "};\n", (lineno, line)
196	self.dfas = dfas
197
198	# Parse the labels
199	labels = []
200	lineno, line = lineno+1, f.next()
201	mo = re.match(r"static label labels\[(\d+)\] = {$", line)
202	assert mo, (lineno, line)
203	nlabels = int(mo.group(1))
204	for i in range(nlabels):
205	lineno, line = lineno+1, f.next()
206	mo = re.match(r'\s+{(\d+), (0\|"\w+")},$', line)
207	assert mo, (lineno, line)
208	x, y = mo.groups()
209	x = int(x)
210	if y == "0":
211	y = None
212	else:
213	y = eval(y)
214	labels.append((x, y))
215	lineno, line = lineno+1, f.next()
216	assert line == "};\n", (lineno, line)
217	self.labels = labels
218
219	# Parse the grammar struct
220	lineno, line = lineno+1, f.next()
221	assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
222	lineno, line = lineno+1, f.next()
223	mo = re.match(r"\s+(\d+),$", line)
224	assert mo, (lineno, line)
225	ndfas = int(mo.group(1))
226	assert ndfas == len(self.dfas)
227	lineno, line = lineno+1, f.next()
228	assert line == "\tdfas,\n", (lineno, line)
229	lineno, line = lineno+1, f.next()
230	mo = re.match(r"\s+{(\d+), labels},$", line)
231	assert mo, (lineno, line)
232	nlabels = int(mo.group(1))
233	assert nlabels == len(self.labels), (lineno, line)
234	lineno, line = lineno+1, f.next()
235	mo = re.match(r"\s+(\d+)$", line)
236	assert mo, (lineno, line)
237	start = int(mo.group(1))
238	assert start in self.number2symbol, (lineno, line)
239	self.start = start
240	lineno, line = lineno+1, f.next()
241	assert line == "};\n", (lineno, line)
242	try:
243	lineno, line = lineno+1, f.next()
244	except StopIteration:
245	pass
246	else:
247	assert 0, (lineno, line)
248
249	def finish_off(self):
250	"""Create additional useful structures. (Internal)."""
251	self.keywords = {} # map from keyword strings to arc labels
252	self.tokens = {} # map from numeric token values to arc labels
253	for ilabel, (type, value) in enumerate(self.labels):
254	if type == token.NAME and value is not None:
255	self.keywords[value] = ilabel
256	elif value is None:
257	self.tokens[type] = ilabel

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/lib2to3/pgen2/conv.py

Download in other formats: