Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

ElementPath.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 9.3 KB

Line
1	#
2	# ElementTree
3	# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
4	#
5	# limited xpath support for element trees
6	#
7	# history:
8	# 2003-05-23 fl created
9	# 2003-05-28 fl added support for // etc
10	# 2003-08-27 fl fixed parsing of periods in element names
11	# 2007-09-10 fl new selection engine
12	# 2007-09-12 fl fixed parent selector
13	# 2007-09-13 fl added iterfind; changed findall to return a list
14	# 2007-11-30 fl added namespaces support
15	# 2009-10-30 fl added child element value filter
16	#
17	# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
18	#
19	# fredrik@pythonware.com
20	# http://www.pythonware.com
21	#
22	# --------------------------------------------------------------------
23	# The ElementTree toolkit is
24	#
25	# Copyright (c) 1999-2009 by Fredrik Lundh
26	#
27	# By obtaining, using, and/or copying this software and/or its
28	# associated documentation, you agree that you have read, understood,
29	# and will comply with the following terms and conditions:
30	#
31	# Permission to use, copy, modify, and distribute this software and
32	# its associated documentation for any purpose and without fee is
33	# hereby granted, provided that the above copyright notice appears in
34	# all copies, and that both that copyright notice and this permission
35	# notice appear in supporting documentation, and that the name of
36	# Secret Labs AB or the author not be used in advertising or publicity
37	# pertaining to distribution of the software without specific, written
38	# prior permission.
39	#
40	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
41	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
42	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
43	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
44	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
45	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
46	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
47	# OF THIS SOFTWARE.
48	# --------------------------------------------------------------------
49
50	# Licensed to PSF under a Contributor Agreement.
51	# See http://www.python.org/psf/license for licensing details.
52
53	##
54	# Implementation module for XPath support. There's usually no reason
55	# to import this module directly; the <b>ElementTree</b> does this for
56	# you, if needed.
57	##
58
59	import re
60
61	xpath_tokenizer_re = re.compile(
62	"("
63	"'[^']'\|\"[^\"]\"\|"
64	"::\|"
65	"//?\|"
66	"\.\.\|"
67	"\|"
68	"[/.*:\[\]@=])\|"
69	"((?:\{[^}]+\})?[^/\[\]@=\s]+)\|"
70	"\s+"
71	)
72
73	def xpath_tokenizer(pattern, namespaces=None):
74	for token in xpath_tokenizer_re.findall(pattern):
75	tag = token[1]
76	if tag and tag[0] != "{" and ":" in tag:
77	try:
78	prefix, uri = tag.split(":", 1)
79	if not namespaces:
80	raise KeyError
81	yield token[0], "{%s}%s" % (namespaces[prefix], uri)
82	except KeyError:
83	raise SyntaxError("prefix %r not found in prefix map" % prefix)
84	else:
85	yield token
86
87	def get_parent_map(context):
88	parent_map = context.parent_map
89	if parent_map is None:
90	context.parent_map = parent_map = {}
91	for p in context.root.iter():
92	for e in p:
93	parent_map[e] = p
94	return parent_map
95
96	def prepare_child(next, token):
97	tag = token[1]
98	def select(context, result):
99	for elem in result:
100	for e in elem:
101	if e.tag == tag:
102	yield e
103	return select
104
105	def prepare_star(next, token):
106	def select(context, result):
107	for elem in result:
108	for e in elem:
109	yield e
110	return select
111
112	def prepare_self(next, token):
113	def select(context, result):
114	for elem in result:
115	yield elem
116	return select
117
118	def prepare_descendant(next, token):
119	token = next()
120	if token[0] == "*":
121	tag = "*"
122	elif not token[0]:
123	tag = token[1]
124	else:
125	raise SyntaxError("invalid descendant")
126	def select(context, result):
127	for elem in result:
128	for e in elem.iter(tag):
129	if e is not elem:
130	yield e
131	return select
132
133	def prepare_parent(next, token):
134	def select(context, result):
135	# FIXME: raise error if .. is applied at toplevel?
136	parent_map = get_parent_map(context)
137	result_map = {}
138	for elem in result:
139	if elem in parent_map:
140	parent = parent_map[elem]
141	if parent not in result_map:
142	result_map[parent] = None
143	yield parent
144	return select
145
146	def prepare_predicate(next, token):
147	# FIXME: replace with real parser!!! refs:
148	# http://effbot.org/zone/simple-iterator-parser.htm
149	# http://javascript.crockford.com/tdop/tdop.html
150	signature = []
151	predicate = []
152	while 1:
153	token = next()
154	if token[0] == "]":
155	break
156	if token[0] and token[0][:1] in "'\"":
157	token = "'", token[0][1:-1]
158	signature.append(token[0] or "-")
159	predicate.append(token[1])
160	signature = "".join(signature)
161	# use signature to determine predicate type
162	if signature == "@-":
163	# [@attribute] predicate
164	key = predicate[1]
165	def select(context, result):
166	for elem in result:
167	if elem.get(key) is not None:
168	yield elem
169	return select
170	if signature == "@-='":
171	# [@attribute='value']
172	key = predicate[1]
173	value = predicate[-1]
174	def select(context, result):
175	for elem in result:
176	if elem.get(key) == value:
177	yield elem
178	return select
179	if signature == "-" and not re.match("\d+$", predicate[0]):
180	# [tag]
181	tag = predicate[0]
182	def select(context, result):
183	for elem in result:
184	if elem.find(tag) is not None:
185	yield elem
186	return select
187	if signature == "-='" and not re.match("\d+$", predicate[0]):
188	# [tag='value']
189	tag = predicate[0]
190	value = predicate[-1]
191	def select(context, result):
192	for elem in result:
193	for e in elem.findall(tag):
194	if "".join(e.itertext()) == value:
195	yield elem
196	break
197	return select
198	if signature == "-" or signature == "-()" or signature == "-()-":
199	# [index] or [last()] or [last()-index]
200	if signature == "-":
201	index = int(predicate[0]) - 1
202	else:
203	if predicate[0] != "last":
204	raise SyntaxError("unsupported function")
205	if signature == "-()-":
206	try:
207	index = int(predicate[2]) - 1
208	except ValueError:
209	raise SyntaxError("unsupported expression")
210	else:
211	index = -1
212	def select(context, result):
213	parent_map = get_parent_map(context)
214	for elem in result:
215	try:
216	parent = parent_map[elem]
217	# FIXME: what if the selector is "*" ?
218	elems = list(parent.findall(elem.tag))
219	if elems[index] is elem:
220	yield elem
221	except (IndexError, KeyError):
222	pass
223	return select
224	raise SyntaxError("invalid predicate")
225
226	ops = {
227	"": prepare_child,
228	"*": prepare_star,
229	".": prepare_self,
230	"..": prepare_parent,
231	"//": prepare_descendant,
232	"[": prepare_predicate,
233	}
234
235	_cache = {}
236
237	class _SelectorContext:
238	parent_map = None
239	def __init__(self, root):
240	self.root = root
241
242	# --------------------------------------------------------------------
243
244	##
245	# Generate all matching objects.
246
247	def iterfind(elem, path, namespaces=None):
248	# compile selector pattern
249	if path[-1:] == "/":
250	path = path + "*" # implicit all (FIXME: keep this?)
251	try:
252	selector = _cache[path]
253	except KeyError:
254	if len(_cache) > 100:
255	_cache.clear()
256	if path[:1] == "/":
257	raise SyntaxError("cannot use absolute path on element")
258	next = iter(xpath_tokenizer(path, namespaces)).next
259	token = next()
260	selector = []
261	while 1:
262	try:
263	selector.append(ops[token[0]](next, token))
264	except StopIteration:
265	raise SyntaxError("invalid path")
266	try:
267	token = next()
268	if token[0] == "/":
269	token = next()
270	except StopIteration:
271	break
272	_cache[path] = selector
273	# execute selector pattern
274	result = [elem]
275	context = _SelectorContext(elem)
276	for select in selector:
277	result = select(context, result)
278	return result
279
280	##
281	# Find first matching object.
282
283	def find(elem, path, namespaces=None):
284	try:
285	return iterfind(elem, path, namespaces).next()
286	except StopIteration:
287	return None
288
289	##
290	# Find all matching objects.
291
292	def findall(elem, path, namespaces=None):
293	return list(iterfind(elem, path, namespaces))
294
295	##
296	# Find text for first matching object.
297
298	def findtext(elem, path, default=None, namespaces=None):
299	try:
300	elem = iterfind(elem, path, namespaces).next()
301	return elem.text or ""
302	except StopIteration:
303	return default

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/xml/etree/ElementPath.py

Download in other formats: