Changeset 391 for python/trunk/Lib/xml/etree/ElementPath.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/xml/etree/ElementPath.py
r2 r391 1 1 # 2 2 # ElementTree 3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $3 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 4 4 # 5 5 # limited xpath support for element trees … … 9 9 # 2003-05-28 fl added support for // etc 10 10 # 2003-08-27 fl fixed parsing of periods in element names 11 # 12 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 11 # 2007-09-10 fl new selection engine 12 # 2007-09-12 fl fixed parent selector 13 # 2007-09-13 fl added iterfind; changed findall to return a list 14 # 2007-11-30 fl added namespaces support 15 # 2009-10-30 fl added child element value filter 16 # 17 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 13 18 # 14 19 # fredrik@pythonware.com … … 18 23 # The ElementTree toolkit is 19 24 # 20 # Copyright (c) 1999-200 4by Fredrik Lundh25 # Copyright (c) 1999-2009 by Fredrik Lundh 21 26 # 22 27 # By obtaining, using, and/or copying this software and/or its … … 44 49 45 50 # Licensed to PSF under a Contributor Agreement. 46 # See http://www.python.org/ 2.4/license for licensing details.51 # See http://www.python.org/psf/license for licensing details. 47 52 48 53 ## … … 54 59 import re 55 60 56 xpath_tokenizer = re.compile( 57 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" 58 ).findall 59 60 class xpath_descendant_or_self: 61 pass 62 63 ## 64 # Wrapper for a compiled XPath. 65 66 class Path: 67 68 ## 69 # Create an Path instance from an XPath expression. 70 71 def __init__(self, path): 72 tokens = xpath_tokenizer(path) 73 # the current version supports 'path/path'-style expressions only 74 self.path = [] 75 self.tag = None 76 if tokens and tokens[0][0] == "/": 61 xpath_tokenizer_re = re.compile( 62 "(" 63 "'[^']*'|\"[^\"]*\"|" 64 "::|" 65 "//?|" 66 "\.\.|" 67 "\(\)|" 68 "[/.*:\[\]\(\)@=])|" 69 "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 70 "\s+" 71 ) 72 73 def xpath_tokenizer(pattern, namespaces=None): 74 for token in xpath_tokenizer_re.findall(pattern): 75 tag = token[1] 76 if tag and tag[0] != "{" and ":" in tag: 77 try: 78 prefix, uri = tag.split(":", 1) 79 if not namespaces: 80 raise KeyError 81 yield token[0], "{%s}%s" % (namespaces[prefix], uri) 82 except KeyError: 83 raise SyntaxError("prefix %r not found in prefix map" % prefix) 84 else: 85 yield token 86 87 def get_parent_map(context): 88 parent_map = context.parent_map 89 if parent_map is None: 90 context.parent_map = parent_map = {} 91 for p in context.root.iter(): 92 for e in p: 93 parent_map[e] = p 94 return parent_map 95 96 def prepare_child(next, token): 97 tag = token[1] 98 def select(context, result): 99 for elem in result: 100 for e in elem: 101 if e.tag == tag: 102 yield e 103 return select 104 105 def prepare_star(next, token): 106 def select(context, result): 107 for elem in result: 108 for e in elem: 109 yield e 110 return select 111 112 def prepare_self(next, token): 113 def select(context, result): 114 for elem in result: 115 yield elem 116 return select 117 118 def prepare_descendant(next, token): 119 token = next() 120 if token[0] == "*": 121 tag = "*" 122 elif not token[0]: 123 tag = token[1] 124 else: 125 raise SyntaxError("invalid descendant") 126 def select(context, result): 127 for elem in result: 128 for e in elem.iter(tag): 129 if e is not elem: 130 yield e 131 return select 132 133 def prepare_parent(next, token): 134 def select(context, result): 135 # FIXME: raise error if .. is applied at toplevel? 136 parent_map = get_parent_map(context) 137 result_map = {} 138 for elem in result: 139 if elem in parent_map: 140 parent = parent_map[elem] 141 if parent not in result_map: 142 result_map[parent] = None 143 yield parent 144 return select 145 146 def prepare_predicate(next, token): 147 # FIXME: replace with real parser!!! refs: 148 # http://effbot.org/zone/simple-iterator-parser.htm 149 # http://javascript.crockford.com/tdop/tdop.html 150 signature = [] 151 predicate = [] 152 while 1: 153 token = next() 154 if token[0] == "]": 155 break 156 if token[0] and token[0][:1] in "'\"": 157 token = "'", token[0][1:-1] 158 signature.append(token[0] or "-") 159 predicate.append(token[1]) 160 signature = "".join(signature) 161 # use signature to determine predicate type 162 if signature == "@-": 163 # [@attribute] predicate 164 key = predicate[1] 165 def select(context, result): 166 for elem in result: 167 if elem.get(key) is not None: 168 yield elem 169 return select 170 if signature == "@-='": 171 # [@attribute='value'] 172 key = predicate[1] 173 value = predicate[-1] 174 def select(context, result): 175 for elem in result: 176 if elem.get(key) == value: 177 yield elem 178 return select 179 if signature == "-" and not re.match("\d+$", predicate[0]): 180 # [tag] 181 tag = predicate[0] 182 def select(context, result): 183 for elem in result: 184 if elem.find(tag) is not None: 185 yield elem 186 return select 187 if signature == "-='" and not re.match("\d+$", predicate[0]): 188 # [tag='value'] 189 tag = predicate[0] 190 value = predicate[-1] 191 def select(context, result): 192 for elem in result: 193 for e in elem.findall(tag): 194 if "".join(e.itertext()) == value: 195 yield elem 196 break 197 return select 198 if signature == "-" or signature == "-()" or signature == "-()-": 199 # [index] or [last()] or [last()-index] 200 if signature == "-": 201 index = int(predicate[0]) - 1 202 else: 203 if predicate[0] != "last": 204 raise SyntaxError("unsupported function") 205 if signature == "-()-": 206 try: 207 index = int(predicate[2]) - 1 208 except ValueError: 209 raise SyntaxError("unsupported expression") 210 else: 211 index = -1 212 def select(context, result): 213 parent_map = get_parent_map(context) 214 for elem in result: 215 try: 216 parent = parent_map[elem] 217 # FIXME: what if the selector is "*" ? 218 elems = list(parent.findall(elem.tag)) 219 if elems[index] is elem: 220 yield elem 221 except (IndexError, KeyError): 222 pass 223 return select 224 raise SyntaxError("invalid predicate") 225 226 ops = { 227 "": prepare_child, 228 "*": prepare_star, 229 ".": prepare_self, 230 "..": prepare_parent, 231 "//": prepare_descendant, 232 "[": prepare_predicate, 233 } 234 235 _cache = {} 236 237 class _SelectorContext: 238 parent_map = None 239 def __init__(self, root): 240 self.root = root 241 242 # -------------------------------------------------------------------- 243 244 ## 245 # Generate all matching objects. 246 247 def iterfind(elem, path, namespaces=None): 248 # compile selector pattern 249 if path[-1:] == "/": 250 path = path + "*" # implicit all (FIXME: keep this?) 251 try: 252 selector = _cache[path] 253 except KeyError: 254 if len(_cache) > 100: 255 _cache.clear() 256 if path[:1] == "/": 77 257 raise SyntaxError("cannot use absolute path on element") 78 while tokens: 79 op, tag = tokens.pop(0) 80 if tag or op == "*": 81 self.path.append(tag or op) 82 elif op == ".": 83 pass 84 elif op == "/": 85 self.path.append(xpath_descendant_or_self()) 86 continue 87 else: 88 raise SyntaxError("unsupported path syntax (%s)" % op) 89 if tokens: 90 op, tag = tokens.pop(0) 91 if op != "/": 92 raise SyntaxError( 93 "expected path separator (%s)" % (op or tag) 94 ) 95 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): 96 raise SyntaxError("path cannot end with //") 97 if len(self.path) == 1 and isinstance(self.path[0], type("")): 98 self.tag = self.path[0] 99 100 ## 101 # Find first matching object. 102 103 def find(self, element): 104 tag = self.tag 105 if tag is None: 106 nodeset = self.findall(element) 107 if not nodeset: 108 return None 109 return nodeset[0] 110 for elem in element: 111 if elem.tag == tag: 112 return elem 113 return None 114 115 ## 116 # Find text for first matching object. 117 118 def findtext(self, element, default=None): 119 tag = self.tag 120 if tag is None: 121 nodeset = self.findall(element) 122 if not nodeset: 123 return default 124 return nodeset[0].text or "" 125 for elem in element: 126 if elem.tag == tag: 127 return elem.text or "" 128 return default 129 130 ## 131 # Find all matching objects. 132 133 def findall(self, element): 134 nodeset = [element] 135 index = 0 258 next = iter(xpath_tokenizer(path, namespaces)).next 259 token = next() 260 selector = [] 136 261 while 1: 137 262 try: 138 path = self.path[index] 139 index = index + 1 140 except IndexError: 141 return nodeset 142 set = [] 143 if isinstance(path, xpath_descendant_or_self): 144 try: 145 tag = self.path[index] 146 if not isinstance(tag, type("")): 147 tag = None 148 else: 149 index = index + 1 150 except IndexError: 151 tag = None # invalid path 152 for node in nodeset: 153 new = list(node.getiterator(tag)) 154 if new and new[0] is node: 155 set.extend(new[1:]) 156 else: 157 set.extend(new) 158 else: 159 for node in nodeset: 160 for node in node: 161 if path == "*" or node.tag == path: 162 set.append(node) 163 if not set: 164 return [] 165 nodeset = set 166 167 _cache = {} 168 169 ## 170 # (Internal) Compile path. 171 172 def _compile(path): 173 p = _cache.get(path) 174 if p is not None: 175 return p 176 p = Path(path) 177 if len(_cache) >= 100: 178 _cache.clear() 179 _cache[path] = p 180 return p 263 selector.append(ops[token[0]](next, token)) 264 except StopIteration: 265 raise SyntaxError("invalid path") 266 try: 267 token = next() 268 if token[0] == "/": 269 token = next() 270 except StopIteration: 271 break 272 _cache[path] = selector 273 # execute selector pattern 274 result = [elem] 275 context = _SelectorContext(elem) 276 for select in selector: 277 result = select(context, result) 278 return result 181 279 182 280 ## 183 281 # Find first matching object. 184 282 185 def find(element, path): 186 return _compile(path).find(element) 283 def find(elem, path, namespaces=None): 284 try: 285 return iterfind(elem, path, namespaces).next() 286 except StopIteration: 287 return None 288 289 ## 290 # Find all matching objects. 291 292 def findall(elem, path, namespaces=None): 293 return list(iterfind(elem, path, namespaces)) 187 294 188 295 ## 189 296 # Find text for first matching object. 190 297 191 def findtext(element, path, default=None): 192 return _compile(path).findtext(element, default) 193 194 ## 195 # Find all matching objects. 196 197 def findall(element, path): 198 return _compile(path).findall(element) 298 def findtext(elem, path, default=None, namespaces=None): 299 try: 300 elem = iterfind(elem, path, namespaces).next() 301 return elem.text or "" 302 except StopIteration: 303 return default
Note:
See TracChangeset
for help on using the changeset viewer.