Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

grammar.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 5.2 KB

Line
1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	# Licensed to PSF under a Contributor Agreement.
3
4	"""This module defines the data structures used to represent a grammar.
5
6	These are a bit arcane because they are derived from the data
7	structures used by Python's 'pgen' parser generator.
8
9	There's also a table here mapping operators to their names in the
10	token module; the Python tokenize module reports all operators as the
11	fallback token code OP, but the parser needs the actual token code.
12
13	"""
14
15	# Python imports
16	import pickle
17
18	# Local imports
19	from . import token, tokenize
20
21
22	class Grammar(object):
23	"""Pgen parsing tables conversion class.
24
25	Once initialized, this class supplies the grammar tables for the
26	parsing engine implemented by parse.py. The parsing engine
27	accesses the instance variables directly. The class here does not
28	provide initialization of the tables; several subclasses exist to
29	do this (see the conv and pgen modules).
30
31	The load() method reads the tables from a pickle file, which is
32	much faster than the other ways offered by subclasses. The pickle
33	file is written by calling dump() (after loading the grammar
34	tables using a subclass). The report() method prints a readable
35	representation of the tables to stdout, for debugging.
36
37	The instance variables are as follows:
38
39	symbol2number -- a dict mapping symbol names to numbers. Symbol
40	numbers are always 256 or higher, to distinguish
41	them from token numbers, which are between 0 and
42	255 (inclusive).
43
44	number2symbol -- a dict mapping numbers to symbol names;
45	these two are each other's inverse.
46
47	states -- a list of DFAs, where each DFA is a list of
48	states, each state is a list of arcs, and each
49	arc is a (i, j) pair where i is a label and j is
50	a state number. The DFA number is the index into
51	this list. (This name is slightly confusing.)
52	Final states are represented by a special arc of
53	the form (0, j) where j is its own state number.
54
55	dfas -- a dict mapping symbol numbers to (DFA, first)
56	pairs, where DFA is an item from the states list
57	above, and first is a set of tokens that can
58	begin this grammar rule (represented by a dict
59	whose values are always 1).
60
61	labels -- a list of (x, y) pairs where x is either a token
62	number or a symbol number, and y is either None
63	or a string; the strings are keywords. The label
64	number is the index in this list; label numbers
65	are used to mark state transitions (arcs) in the
66	DFAs.
67
68	start -- the number of the grammar's start symbol.
69
70	keywords -- a dict mapping keyword strings to arc labels.
71
72	tokens -- a dict mapping token numbers to arc labels.
73
74	"""
75
76	def __init__(self):
77	self.symbol2number = {}
78	self.number2symbol = {}
79	self.states = []
80	self.dfas = {}
81	self.labels = [(0, "EMPTY")]
82	self.keywords = {}
83	self.tokens = {}
84	self.symbol2label = {}
85	self.start = 256
86
87	def dump(self, filename):
88	"""Dump the grammar tables to a pickle file."""
89	f = open(filename, "wb")
90	pickle.dump(self.__dict__, f, 2)
91	f.close()
92
93	def load(self, filename):
94	"""Load the grammar tables from a pickle file."""
95	f = open(filename, "rb")
96	d = pickle.load(f)
97	f.close()
98	self.__dict__.update(d)
99
100	def copy(self):
101	"""
102	Copy the grammar.
103	"""
104	new = self.__class__()
105	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
106	"tokens", "symbol2label"):
107	setattr(new, dict_attr, getattr(self, dict_attr).copy())
108	new.labels = self.labels[:]
109	new.states = self.states[:]
110	new.start = self.start
111	return new
112
113	def report(self):
114	"""Dump the grammar tables to standard output, for debugging."""
115	from pprint import pprint
116	print "s2n"
117	pprint(self.symbol2number)
118	print "n2s"
119	pprint(self.number2symbol)
120	print "states"
121	pprint(self.states)
122	print "dfas"
123	pprint(self.dfas)
124	print "labels"
125	pprint(self.labels)
126	print "start", self.start
127
128
129	# Map from operator to number (since tokenize doesn't do this)
130
131	opmap_raw = """
132	( LPAR
133	) RPAR
134	[ LSQB
135	] RSQB
136	: COLON
137	, COMMA
138	; SEMI
139	+ PLUS
140	- MINUS
141	* STAR
142	/ SLASH
143	\| VBAR
144	& AMPER
145	< LESS
146	> GREATER
147	= EQUAL
148	. DOT
149	% PERCENT
150	` BACKQUOTE
151	{ LBRACE
152	} RBRACE
153	@ AT
154	== EQEQUAL
155	!= NOTEQUAL
156	<> NOTEQUAL
157	<= LESSEQUAL
158	>= GREATEREQUAL
159	~ TILDE
160	^ CIRCUMFLEX
161	<< LEFTSHIFT
162	>> RIGHTSHIFT
163	** DOUBLESTAR
164	+= PLUSEQUAL
165	-= MINEQUAL
166	*= STAREQUAL
167	/= SLASHEQUAL
168	%= PERCENTEQUAL
169	&= AMPEREQUAL
170	\|= VBAREQUAL
171	^= CIRCUMFLEXEQUAL
172	<<= LEFTSHIFTEQUAL
173	>>= RIGHTSHIFTEQUAL
174	**= DOUBLESTAREQUAL
175	// DOUBLESLASH
176	//= DOUBLESLASHEQUAL
177	-> RARROW
178	"""
179
180	opmap = {}
181	for line in opmap_raw.splitlines():
182	if line:
183	op, name = line.split()
184	opmap[op] = getattr(token, name)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/lib2to3/pgen2/grammar.py

Download in other formats: