Changeset 391 for python/trunk/Lib/xml
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 17 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/xml/__init__.py
r2 r391 20 20 __all__ = ["dom", "parsers", "sax", "etree"] 21 21 22 # When being checked-out without options, this has the form23 # "<dollar>Revision: x.y </dollar>"24 # When exported using -kv, it is "x.y".25 __version__ = "$Revision: 41660 $".split()[-2:][0]26 27 28 22 _MINIMUM_XMLPLUS_VERSION = (0, 8, 4) 29 23 -
python/trunk/Lib/xml/dom/domreg.py
r2 r391 58 58 elif name: 59 59 return registered[name]() 60 elif os.environ.has_key("PYTHON_DOM"):60 elif "PYTHON_DOM" in os.environ: 61 61 return getDOMImplementation(name = os.environ["PYTHON_DOM"]) 62 62 -
python/trunk/Lib/xml/dom/expatbuilder.py
r2 r391 243 243 doctypeName, publicId, systemId) 244 244 doctype.ownerDocument = self.document 245 self.document.childNodes.append(doctype)245 _append_child(self.document, doctype) 246 246 self.document.doctype = doctype 247 247 if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: -
python/trunk/Lib/xml/dom/minicompat.py
r2 r391 7 7 # NodeList -- lightest possible NodeList implementation 8 8 # 9 # EmptyNodeList -- lightest possible NodeList that is guara teed to9 # EmptyNodeList -- lightest possible NodeList that is guaranteed to 10 10 # remain empty (immutable) 11 11 # -
python/trunk/Lib/xml/dom/minidom.py
r2 r391 1 """\ 2 minidom.py -- a lightweight DOM implementation. 1 """Simple implementation of the Level 1 DOM. 2 3 Namespaces and other minor Level 2 features are also supported. 3 4 4 5 parse("foo.xml") … … 178 179 for child in self.childNodes: 179 180 if child.nodeType == Node.TEXT_NODE: 180 data = child.data 181 if data and L and L[-1].nodeType == child.nodeType: 181 if not child.data: 182 # empty text node; discard 183 if L: 184 L[-1].nextSibling = child.nextSibling 185 if child.nextSibling: 186 child.nextSibling.previousSibling = child.previousSibling 187 child.unlink() 188 elif L and L[-1].nodeType == child.nodeType: 182 189 # collapse text node 183 190 node = L[-1] 184 191 node.data = node.data + child.data 185 192 node.nextSibling = child.nextSibling 193 if child.nextSibling: 194 child.nextSibling.previousSibling = node 186 195 child.unlink() 187 elif data: 188 if L: 189 L[-1].nextSibling = child 190 child.previousSibling = L[-1] 191 else: 192 child.previousSibling = None 196 else: 193 197 L.append(child) 194 else:195 # empty text node; discard196 child.unlink()197 198 else: 198 if L:199 L[-1].nextSibling = child200 child.previousSibling = L[-1]201 else:202 child.previousSibling = None203 199 L.append(child) 204 200 if child.nodeType == Node.ELEMENT_NODE: 205 201 child.normalize() 206 if L:207 L[-1].nextSibling = None208 202 self.childNodes[:] = L 209 203 … … 299 293 def _write_data(writer, data): 300 294 "Writes datachars to writer." 301 data = data.replace("&", "&").replace("<", "<") 302 data = data.replace("\"", """).replace(">", ">") 303 writer.write(data) 295 if data: 296 data = data.replace("&", "&").replace("<", "<"). \ 297 replace("\"", """).replace(">", ">") 298 writer.write(data) 304 299 305 300 def _get_elements_by_tagName_helper(parent, name, rc): … … 363 358 def _get_localName(self): 364 359 return self.nodeName.split(":", 1)[-1] 365 366 def _get_name(self):367 return self.name368 360 369 361 def _get_specified(self): … … 499 491 def has_key(self, key): 500 492 if isinstance(key, StringTypes): 501 return self._attrs.has_key(key)502 else: 503 return self._attrsNS.has_key(key)493 return key in self._attrs 494 else: 495 return key in self._attrsNS 504 496 505 497 def keys(self): … … 783 775 784 776 def hasAttribute(self, name): 785 return self._attrs.has_key(name)777 return name in self._attrs 786 778 787 779 def hasAttributeNS(self, namespaceURI, localName): 788 return self._attrsNS.has_key((namespaceURI, localName))780 return (namespaceURI, localName) in self._attrsNS 789 781 790 782 def getElementsByTagName(self, name): … … 813 805 writer.write("\"") 814 806 if self.childNodes: 815 writer.write(">%s"%(newl)) 816 for node in self.childNodes: 817 node.writexml(writer,indent+addindent,addindent,newl) 818 writer.write("%s</%s>%s" % (indent,self.tagName,newl)) 807 writer.write(">") 808 if (len(self.childNodes) == 1 and 809 self.childNodes[0].nodeType == Node.TEXT_NODE): 810 self.childNodes[0].writexml(writer, '', '', '') 811 else: 812 writer.write(newl) 813 for node in self.childNodes: 814 node.writexml(writer, indent+addindent, addindent, newl) 815 writer.write(indent) 816 writer.write("</%s>%s" % (self.tagName, newl)) 819 817 else: 820 818 writer.write("/>%s"%(newl)) … … 897 895 raise xml.dom.NotFoundErr( 898 896 self.nodeName + " nodes do not have children") 897 898 def normalize(self): 899 # For childless nodes, normalize() has nothing to do. 900 pass 899 901 900 902 def replaceChild(self, newChild, oldChild): … … 1034 1036 1035 1037 def writexml(self, writer, indent="", addindent="", newl=""): 1036 _write_data(writer, "%s%s%s" %(indent, self.data, newl))1038 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1037 1039 1038 1040 # DOM Level 3 (WD 9 April 2002) … … 1344 1346 _features = [("core", "1.0"), 1345 1347 ("core", "2.0"), 1346 ("core", "3.0"),1347 1348 ("core", None), 1348 1349 ("xml", "1.0"), 1349 1350 ("xml", "2.0"), 1350 ("xml", "3.0"),1351 1351 ("xml", None), 1352 1352 ("ls-load", "3.0"), … … 1451 1451 1452 1452 def isId(self, aname): 1453 """Returns true iff the named attrib te is a DTD-style ID."""1453 """Returns true iff the named attribute is a DTD-style ID.""" 1454 1454 return False 1455 1455 … … 1880 1880 else: 1881 1881 # Note the cloning of Document and DocumentType nodes is 1882 # implemen etation specific. minidom handles those cases1882 # implementation specific. minidom handles those cases 1883 1883 # directly in the cloneNode() methods. 1884 1884 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) -
python/trunk/Lib/xml/dom/xmlbuilder.py
r2 r391 92 92 def canSetFeature(self, name, state): 93 93 key = (_name_xform(name), state and 1 or 0) 94 return self._settings.has_key(key)94 return key in self._settings 95 95 96 96 # This dictionary maps from (feature,value) to a list of … … 248 248 def _guess_media_encoding(self, source): 249 249 info = source.byteStream.info() 250 if info.has_key("Content-Type"):250 if "Content-Type" in info: 251 251 for param in info.getplist(): 252 252 if param.startswith("charset="): -
python/trunk/Lib/xml/etree/ElementInclude.py
r2 r391 1 1 # 2 2 # ElementTree 3 # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $3 # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ 4 4 # 5 5 # limited xinclude support for element trees … … 17 17 # The ElementTree toolkit is 18 18 # 19 # Copyright (c) 1999-200 4by Fredrik Lundh19 # Copyright (c) 1999-2008 by Fredrik Lundh 20 20 # 21 21 # By obtaining, using, and/or copying this software and/or its … … 43 43 44 44 # Licensed to PSF under a Contributor Agreement. 45 # See http://www.python.org/ 2.4/license for licensing details.45 # See http://www.python.org/psf/license for licensing details. 46 46 47 47 ## … … 50 50 51 51 import copy 52 import ElementTree52 from . import ElementTree 53 53 54 54 XINCLUDE = "{http://www.w3.org/2001/XInclude}" … … 126 126 if i: 127 127 node = elem[i-1] 128 node.tail = (node.tail or "") + text 128 node.tail = (node.tail or "") + text + (e.tail or "") 129 129 else: 130 130 elem.text = (elem.text or "") + text + (e.tail or "") -
python/trunk/Lib/xml/etree/ElementPath.py
r2 r391 1 1 # 2 2 # ElementTree 3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $3 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 4 4 # 5 5 # limited xpath support for element trees … … 9 9 # 2003-05-28 fl added support for // etc 10 10 # 2003-08-27 fl fixed parsing of periods in element names 11 # 12 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 11 # 2007-09-10 fl new selection engine 12 # 2007-09-12 fl fixed parent selector 13 # 2007-09-13 fl added iterfind; changed findall to return a list 14 # 2007-11-30 fl added namespaces support 15 # 2009-10-30 fl added child element value filter 16 # 17 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 13 18 # 14 19 # fredrik@pythonware.com … … 18 23 # The ElementTree toolkit is 19 24 # 20 # Copyright (c) 1999-200 4by Fredrik Lundh25 # Copyright (c) 1999-2009 by Fredrik Lundh 21 26 # 22 27 # By obtaining, using, and/or copying this software and/or its … … 44 49 45 50 # Licensed to PSF under a Contributor Agreement. 46 # See http://www.python.org/ 2.4/license for licensing details.51 # See http://www.python.org/psf/license for licensing details. 47 52 48 53 ## … … 54 59 import re 55 60 56 xpath_tokenizer = re.compile( 57 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" 58 ).findall 59 60 class xpath_descendant_or_self: 61 pass 62 63 ## 64 # Wrapper for a compiled XPath. 65 66 class Path: 67 68 ## 69 # Create an Path instance from an XPath expression. 70 71 def __init__(self, path): 72 tokens = xpath_tokenizer(path) 73 # the current version supports 'path/path'-style expressions only 74 self.path = [] 75 self.tag = None 76 if tokens and tokens[0][0] == "/": 61 xpath_tokenizer_re = re.compile( 62 "(" 63 "'[^']*'|\"[^\"]*\"|" 64 "::|" 65 "//?|" 66 "\.\.|" 67 "\(\)|" 68 "[/.*:\[\]\(\)@=])|" 69 "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 70 "\s+" 71 ) 72 73 def xpath_tokenizer(pattern, namespaces=None): 74 for token in xpath_tokenizer_re.findall(pattern): 75 tag = token[1] 76 if tag and tag[0] != "{" and ":" in tag: 77 try: 78 prefix, uri = tag.split(":", 1) 79 if not namespaces: 80 raise KeyError 81 yield token[0], "{%s}%s" % (namespaces[prefix], uri) 82 except KeyError: 83 raise SyntaxError("prefix %r not found in prefix map" % prefix) 84 else: 85 yield token 86 87 def get_parent_map(context): 88 parent_map = context.parent_map 89 if parent_map is None: 90 context.parent_map = parent_map = {} 91 for p in context.root.iter(): 92 for e in p: 93 parent_map[e] = p 94 return parent_map 95 96 def prepare_child(next, token): 97 tag = token[1] 98 def select(context, result): 99 for elem in result: 100 for e in elem: 101 if e.tag == tag: 102 yield e 103 return select 104 105 def prepare_star(next, token): 106 def select(context, result): 107 for elem in result: 108 for e in elem: 109 yield e 110 return select 111 112 def prepare_self(next, token): 113 def select(context, result): 114 for elem in result: 115 yield elem 116 return select 117 118 def prepare_descendant(next, token): 119 token = next() 120 if token[0] == "*": 121 tag = "*" 122 elif not token[0]: 123 tag = token[1] 124 else: 125 raise SyntaxError("invalid descendant") 126 def select(context, result): 127 for elem in result: 128 for e in elem.iter(tag): 129 if e is not elem: 130 yield e 131 return select 132 133 def prepare_parent(next, token): 134 def select(context, result): 135 # FIXME: raise error if .. is applied at toplevel? 136 parent_map = get_parent_map(context) 137 result_map = {} 138 for elem in result: 139 if elem in parent_map: 140 parent = parent_map[elem] 141 if parent not in result_map: 142 result_map[parent] = None 143 yield parent 144 return select 145 146 def prepare_predicate(next, token): 147 # FIXME: replace with real parser!!! refs: 148 # http://effbot.org/zone/simple-iterator-parser.htm 149 # http://javascript.crockford.com/tdop/tdop.html 150 signature = [] 151 predicate = [] 152 while 1: 153 token = next() 154 if token[0] == "]": 155 break 156 if token[0] and token[0][:1] in "'\"": 157 token = "'", token[0][1:-1] 158 signature.append(token[0] or "-") 159 predicate.append(token[1]) 160 signature = "".join(signature) 161 # use signature to determine predicate type 162 if signature == "@-": 163 # [@attribute] predicate 164 key = predicate[1] 165 def select(context, result): 166 for elem in result: 167 if elem.get(key) is not None: 168 yield elem 169 return select 170 if signature == "@-='": 171 # [@attribute='value'] 172 key = predicate[1] 173 value = predicate[-1] 174 def select(context, result): 175 for elem in result: 176 if elem.get(key) == value: 177 yield elem 178 return select 179 if signature == "-" and not re.match("\d+$", predicate[0]): 180 # [tag] 181 tag = predicate[0] 182 def select(context, result): 183 for elem in result: 184 if elem.find(tag) is not None: 185 yield elem 186 return select 187 if signature == "-='" and not re.match("\d+$", predicate[0]): 188 # [tag='value'] 189 tag = predicate[0] 190 value = predicate[-1] 191 def select(context, result): 192 for elem in result: 193 for e in elem.findall(tag): 194 if "".join(e.itertext()) == value: 195 yield elem 196 break 197 return select 198 if signature == "-" or signature == "-()" or signature == "-()-": 199 # [index] or [last()] or [last()-index] 200 if signature == "-": 201 index = int(predicate[0]) - 1 202 else: 203 if predicate[0] != "last": 204 raise SyntaxError("unsupported function") 205 if signature == "-()-": 206 try: 207 index = int(predicate[2]) - 1 208 except ValueError: 209 raise SyntaxError("unsupported expression") 210 else: 211 index = -1 212 def select(context, result): 213 parent_map = get_parent_map(context) 214 for elem in result: 215 try: 216 parent = parent_map[elem] 217 # FIXME: what if the selector is "*" ? 218 elems = list(parent.findall(elem.tag)) 219 if elems[index] is elem: 220 yield elem 221 except (IndexError, KeyError): 222 pass 223 return select 224 raise SyntaxError("invalid predicate") 225 226 ops = { 227 "": prepare_child, 228 "*": prepare_star, 229 ".": prepare_self, 230 "..": prepare_parent, 231 "//": prepare_descendant, 232 "[": prepare_predicate, 233 } 234 235 _cache = {} 236 237 class _SelectorContext: 238 parent_map = None 239 def __init__(self, root): 240 self.root = root 241 242 # -------------------------------------------------------------------- 243 244 ## 245 # Generate all matching objects. 246 247 def iterfind(elem, path, namespaces=None): 248 # compile selector pattern 249 if path[-1:] == "/": 250 path = path + "*" # implicit all (FIXME: keep this?) 251 try: 252 selector = _cache[path] 253 except KeyError: 254 if len(_cache) > 100: 255 _cache.clear() 256 if path[:1] == "/": 77 257 raise SyntaxError("cannot use absolute path on element") 78 while tokens: 79 op, tag = tokens.pop(0) 80 if tag or op == "*": 81 self.path.append(tag or op) 82 elif op == ".": 83 pass 84 elif op == "/": 85 self.path.append(xpath_descendant_or_self()) 86 continue 87 else: 88 raise SyntaxError("unsupported path syntax (%s)" % op) 89 if tokens: 90 op, tag = tokens.pop(0) 91 if op != "/": 92 raise SyntaxError( 93 "expected path separator (%s)" % (op or tag) 94 ) 95 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): 96 raise SyntaxError("path cannot end with //") 97 if len(self.path) == 1 and isinstance(self.path[0], type("")): 98 self.tag = self.path[0] 99 100 ## 101 # Find first matching object. 102 103 def find(self, element): 104 tag = self.tag 105 if tag is None: 106 nodeset = self.findall(element) 107 if not nodeset: 108 return None 109 return nodeset[0] 110 for elem in element: 111 if elem.tag == tag: 112 return elem 113 return None 114 115 ## 116 # Find text for first matching object. 117 118 def findtext(self, element, default=None): 119 tag = self.tag 120 if tag is None: 121 nodeset = self.findall(element) 122 if not nodeset: 123 return default 124 return nodeset[0].text or "" 125 for elem in element: 126 if elem.tag == tag: 127 return elem.text or "" 128 return default 129 130 ## 131 # Find all matching objects. 132 133 def findall(self, element): 134 nodeset = [element] 135 index = 0 258 next = iter(xpath_tokenizer(path, namespaces)).next 259 token = next() 260 selector = [] 136 261 while 1: 137 262 try: 138 path = self.path[index] 139 index = index + 1 140 except IndexError: 141 return nodeset 142 set = [] 143 if isinstance(path, xpath_descendant_or_self): 144 try: 145 tag = self.path[index] 146 if not isinstance(tag, type("")): 147 tag = None 148 else: 149 index = index + 1 150 except IndexError: 151 tag = None # invalid path 152 for node in nodeset: 153 new = list(node.getiterator(tag)) 154 if new and new[0] is node: 155 set.extend(new[1:]) 156 else: 157 set.extend(new) 158 else: 159 for node in nodeset: 160 for node in node: 161 if path == "*" or node.tag == path: 162 set.append(node) 163 if not set: 164 return [] 165 nodeset = set 166 167 _cache = {} 168 169 ## 170 # (Internal) Compile path. 171 172 def _compile(path): 173 p = _cache.get(path) 174 if p is not None: 175 return p 176 p = Path(path) 177 if len(_cache) >= 100: 178 _cache.clear() 179 _cache[path] = p 180 return p 263 selector.append(ops[token[0]](next, token)) 264 except StopIteration: 265 raise SyntaxError("invalid path") 266 try: 267 token = next() 268 if token[0] == "/": 269 token = next() 270 except StopIteration: 271 break 272 _cache[path] = selector 273 # execute selector pattern 274 result = [elem] 275 context = _SelectorContext(elem) 276 for select in selector: 277 result = select(context, result) 278 return result 181 279 182 280 ## 183 281 # Find first matching object. 184 282 185 def find(element, path): 186 return _compile(path).find(element) 283 def find(elem, path, namespaces=None): 284 try: 285 return iterfind(elem, path, namespaces).next() 286 except StopIteration: 287 return None 288 289 ## 290 # Find all matching objects. 291 292 def findall(elem, path, namespaces=None): 293 return list(iterfind(elem, path, namespaces)) 187 294 188 295 ## 189 296 # Find text for first matching object. 190 297 191 def findtext(element, path, default=None): 192 return _compile(path).findtext(element, default) 193 194 ## 195 # Find all matching objects. 196 197 def findall(element, path): 198 return _compile(path).findall(element) 298 def findtext(elem, path, default=None, namespaces=None): 299 try: 300 elem = iterfind(elem, path, namespaces).next() 301 return elem.text or "" 302 except StopIteration: 303 return default -
python/trunk/Lib/xml/etree/ElementTree.py
r2 r391 1 1 # 2 2 # ElementTree 3 # $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $ 4 # 5 # light-weight XML support for Python 1.5.2 and later. 6 # 7 # history: 8 # 2001-10-20 fl created (from various sources) 9 # 2001-11-01 fl return root from parse method 10 # 2002-02-16 fl sort attributes in lexical order 11 # 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup 12 # 2002-05-01 fl finished TreeBuilder refactoring 13 # 2002-07-14 fl added basic namespace support to ElementTree.write 14 # 2002-07-25 fl added QName attribute support 15 # 2002-10-20 fl fixed encoding in write 16 # 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding 17 # 2002-11-27 fl accept file objects or file names for parse/write 18 # 2002-12-04 fl moved XMLTreeBuilder back to this module 19 # 2003-01-11 fl fixed entity encoding glitch for us-ascii 20 # 2003-02-13 fl added XML literal factory 21 # 2003-02-21 fl added ProcessingInstruction/PI factory 22 # 2003-05-11 fl added tostring/fromstring helpers 23 # 2003-05-26 fl added ElementPath support 24 # 2003-07-05 fl added makeelement factory method 25 # 2003-07-28 fl added more well-known namespace prefixes 26 # 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch) 27 # 2003-09-04 fl fall back on emulator if ElementPath is not installed 28 # 2003-10-31 fl markup updates 29 # 2003-11-15 fl fixed nested namespace bug 30 # 2004-03-28 fl added XMLID helper 31 # 2004-06-02 fl added default support to findtext 32 # 2004-06-08 fl fixed encoding of non-ascii element/attribute names 33 # 2004-08-23 fl take advantage of post-2.1 expat features 34 # 2005-02-01 fl added iterparse implementation 35 # 2005-03-02 fl fixed iterparse support for pre-2.2 versions 36 # 37 # Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved. 3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 4 # 5 # light-weight XML support for Python 2.3 and later. 6 # 7 # history (since 1.2.6): 8 # 2005-11-12 fl added tostringlist/fromstringlist helpers 9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox 10 # 2006-07-05 fl removed support for 2.1 and earlier 11 # 2007-06-21 fl added deprecation/future warnings 12 # 2007-08-25 fl added doctype hook, added parser version attribute etc 13 # 2007-08-26 fl added new serializer code (better namespace handling, etc) 14 # 2007-08-27 fl warn for broken /tag searches on tree level 15 # 2007-09-02 fl added html/text methods to serializer (experimental) 16 # 2007-09-05 fl added method argument to tostring/tostringlist 17 # 2007-09-06 fl improved error handling 18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups 19 # 2007-12-15 fl added C14N hooks, copy method (experimental) 20 # 21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 38 22 # 39 23 # fredrik@pythonware.com … … 43 27 # The ElementTree toolkit is 44 28 # 45 # Copyright (c) 1999-200 5by Fredrik Lundh29 # Copyright (c) 1999-2008 by Fredrik Lundh 46 30 # 47 31 # By obtaining, using, and/or copying this software and/or its … … 69 53 70 54 # Licensed to PSF under a Contributor Agreement. 71 # See http://www.python.org/ 2.4/license for licensing details.55 # See http://www.python.org/psf/license for licensing details. 72 56 73 57 __all__ = [ … … 76 60 "dump", 77 61 "Element", "ElementTree", 78 "fromstring", 62 "fromstring", "fromstringlist", 79 63 "iselement", "iterparse", 80 "parse", 64 "parse", "ParseError", 81 65 "PI", "ProcessingInstruction", 82 66 "QName", 83 67 "SubElement", 84 "tostring", 68 "tostring", "tostringlist", 85 69 "TreeBuilder", 86 "VERSION", "XML", 70 "VERSION", 71 "XML", 87 72 "XMLParser", "XMLTreeBuilder", 88 73 ] 74 75 VERSION = "1.3.0" 89 76 90 77 ## … … 103 90 # </ul> 104 91 # 105 # To create an element instance, use the {@link #Element} or {@link106 # #SubElement} factory functions.92 # To create an element instance, use the {@link #Element} constructor 93 # or the {@link #SubElement} factory function. 107 94 # <p> 108 95 # The {@link #ElementTree} class can be used to wrap an element … … 110 97 ## 111 98 112 import string, sys, re 113 114 class _SimpleElementPath: 99 import sys 100 import re 101 import warnings 102 103 104 class _SimpleElementPath(object): 115 105 # emulate pre-1.2 find/findtext/findall behaviour 116 def find(self, element, tag ):106 def find(self, element, tag, namespaces=None): 117 107 for elem in element: 118 108 if elem.tag == tag: 119 109 return elem 120 110 return None 121 def findtext(self, element, tag, default=None): 111 def findtext(self, element, tag, default=None, namespaces=None): 112 elem = self.find(element, tag) 113 if elem is None: 114 return default 115 return elem.text or "" 116 def iterfind(self, element, tag, namespaces=None): 117 if tag[:3] == ".//": 118 for elem in element.iter(tag[3:]): 119 yield elem 122 120 for elem in element: 123 121 if elem.tag == tag: 124 return elem.text or "" 125 return default 126 def findall(self, element, tag): 127 if tag[:3] == ".//": 128 return element.getiterator(tag[3:]) 129 result = [] 130 for elem in element: 131 if elem.tag == tag: 132 result.append(elem) 133 return result 122 yield elem 123 def findall(self, element, tag, namespaces=None): 124 return list(self.iterfind(element, tag, namespaces)) 134 125 135 126 try: 136 import ElementPath127 from . import ElementPath 137 128 except ImportError: 138 # FIXME: issue warning in this case?139 129 ElementPath = _SimpleElementPath() 140 130 141 # TODO: add support for custom namespace resolvers/default namespaces 142 # TODO: add improved support for incremental parsing 143 144 VERSION = "1.2.6" 145 146 ## 147 # Internal element class. This class defines the Element interface, 148 # and provides a reference implementation of this interface. 131 ## 132 # Parser error. This is a subclass of <b>SyntaxError</b>. 149 133 # <p> 150 # You should not create instances of this class directly. Use the 151 # appropriate factory functions instead, such as {@link #Element} 152 # and {@link #SubElement}. 153 # 134 # In addition to the exception value, an exception instance contains a 135 # specific exception code in the <b>code</b> attribute, and the line and 136 # column of the error in the <b>position</b> attribute. 137 138 class ParseError(SyntaxError): 139 pass 140 141 # -------------------------------------------------------------------- 142 143 ## 144 # Checks if an object appears to be a valid element object. 145 # 146 # @param An element instance. 147 # @return A true value if this is an element object. 148 # @defreturn flag 149 150 def iselement(element): 151 # FIXME: not sure about this; might be a better idea to look 152 # for tag/attrib/text attributes 153 return isinstance(element, Element) or hasattr(element, "tag") 154 155 ## 156 # Element class. This class defines the Element interface, and 157 # provides a reference implementation of this interface. 158 # <p> 159 # The element name, attribute names, and attribute values can be 160 # either ASCII strings (ordinary Python strings containing only 7-bit 161 # ASCII characters) or Unicode strings. 162 # 163 # @param tag The element name. 164 # @param attrib An optional dictionary, containing element attributes. 165 # @param **extra Additional attributes, given as keyword arguments. 154 166 # @see Element 155 167 # @see SubElement … … 157 169 # @see ProcessingInstruction 158 170 159 class _ElementInterface:171 class Element(object): 160 172 # <tag attrib>text<child/>...</tag>tail 161 173 … … 167 179 ## 168 180 # (Attribute) Element attribute dictionary. Where possible, use 169 # {@link # _ElementInterface.get},170 # {@link # _ElementInterface.set},171 # {@link # _ElementInterface.keys}, and172 # {@link # _ElementInterface.items} to access181 # {@link #Element.get}, 182 # {@link #Element.set}, 183 # {@link #Element.keys}, and 184 # {@link #Element.items} to access 173 185 # element attributes. 174 186 … … 177 189 ## 178 190 # (Attribute) Text before first subelement. This is either a 179 # string or the value None, if there was no text. 191 # string or the value None. Note that if there was no text, this 192 # attribute may be either None or an empty string, depending on 193 # the parser. 180 194 181 195 text = None … … 184 198 # (Attribute) Text after this element's end tag, but before the 185 199 # next sibling element's start tag. This is either a string or 186 # the value None, if there was no text. 200 # the value None. Note that if there was no text, this attribute 201 # may be either None or an empty string, depending on the parser. 187 202 188 203 tail = None # text after end tag, if any 189 204 190 def __init__(self, tag, attrib): 205 # constructor 206 207 def __init__(self, tag, attrib={}, **extra): 208 attrib = attrib.copy() 209 attrib.update(extra) 191 210 self.tag = tag 192 211 self.attrib = attrib … … 194 213 195 214 def __repr__(self): 196 return "<Element %s at %x>" % (self.tag, id(self))215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) 197 216 198 217 ## … … 204 223 205 224 def makeelement(self, tag, attrib): 206 return Element(tag, attrib) 207 208 ## 209 # Returns the number of subelements. 225 return self.__class__(tag, attrib) 226 227 ## 228 # (Experimental) Copies the current element. This creates a 229 # shallow copy; subelements will be shared with the original tree. 230 # 231 # @return A new element instance. 232 233 def copy(self): 234 elem = self.makeelement(self.tag, self.attrib) 235 elem.text = self.text 236 elem.tail = self.tail 237 elem[:] = self 238 return elem 239 240 ## 241 # Returns the number of subelements. Note that this only counts 242 # full elements; to check if there's any content in an element, you 243 # have to check both the length and the <b>text</b> attribute. 210 244 # 211 245 # @return The number of subelements. … … 214 248 return len(self._children) 215 249 216 ## 217 # Returns the given subelement. 250 def __nonzero__(self): 251 warnings.warn( 252 "The behavior of this method will change in future versions. " 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", 254 FutureWarning, stacklevel=2 255 ) 256 return len(self._children) != 0 # emulate old behaviour, for now 257 258 ## 259 # Returns the given subelement, by index. 218 260 # 219 261 # @param index What subelement to return. … … 225 267 226 268 ## 227 # Replaces the given subelement .269 # Replaces the given subelement, by index. 228 270 # 229 271 # @param index What subelement to replace. 230 272 # @param element The new element value. 231 273 # @exception IndexError If the given element does not exist. 232 # @exception AssertionError If element is not a valid object.233 274 234 275 def __setitem__(self, index, element): 235 assert iselement(element) 276 # if isinstance(index, slice): 277 # for elt in element: 278 # assert iselement(elt) 279 # else: 280 # assert iselement(element) 236 281 self._children[index] = element 237 282 238 283 ## 239 # Deletes the given subelement .284 # Deletes the given subelement, by index. 240 285 # 241 286 # @param index What subelement to delete. … … 246 291 247 292 ## 248 # Returns a list containing subelements in the given range.249 # 250 # @param start The first subelement to return.251 # @param stop The first subelement that shouldn't be returned.252 # @return A sequence object containing subelements.253 254 def __getslice__(self, start, stop): 255 return self._children[start:stop]256 257 ##258 # Replaces a number of subelements with elements from a sequence. 259 # 260 # @param start The first subelement to replace.261 # @param stop The first subelement that shouldn't be replaced.293 # Adds a subelement to the end of this element. In document order, 294 # the new element will appear after the last existing subelement (or 295 # directly after the text, if it's the first subelement), but before 296 # the end tag for this element. 297 # 298 # @param element The element to add. 299 300 def append(self, element): 301 # assert iselement(element) 302 self._children.append(element) 303 304 ## 305 # Appends subelements from a sequence. 306 # 262 307 # @param elements A sequence object with zero or more elements. 263 # @exception AssertionError If a sequence member is not a valid object. 264 265 def __setslice__(self, start, stop, elements): 266 for element in elements: 267 assert iselement(element) 268 self._children[start:stop] = list(elements) 269 270 ## 271 # Deletes a number of subelements. 272 # 273 # @param start The first subelement to delete. 274 # @param stop The first subelement to leave in there. 275 276 def __delslice__(self, start, stop): 277 del self._children[start:stop] 278 279 ## 280 # Adds a subelement to the end of this element. 281 # 282 # @param element The element to add. 283 # @exception AssertionError If a sequence member is not a valid object. 284 285 def append(self, element): 286 assert iselement(element) 287 self._children.append(element) 308 # @since 1.3 309 310 def extend(self, elements): 311 # for element in elements: 312 # assert iselement(element) 313 self._children.extend(elements) 288 314 289 315 ## … … 291 317 # 292 318 # @param index Where to insert the new subelement. 293 # @exception AssertionError If the element is not a valid object.294 319 295 320 def insert(self, index, element): 296 assert iselement(element)321 # assert iselement(element) 297 322 self._children.insert(index, element) 298 323 … … 300 325 # Removes a matching subelement. Unlike the <b>find</b> methods, 301 326 # this method compares elements based on identity, not on tag 302 # value or contents. 327 # value or contents. To remove subelements by other means, the 328 # easiest way is often to use a list comprehension to select what 329 # elements to keep, and use slice assignment to update the parent 330 # element. 303 331 # 304 332 # @param element What element to remove. 305 333 # @exception ValueError If a matching element could not be found. 306 # @exception AssertionError If the element is not a valid object.307 334 308 335 def remove(self, element): 309 assert iselement(element)336 # assert iselement(element) 310 337 self._children.remove(element) 311 338 312 339 ## 313 # Returns all subelements. The elements are returned in document314 # order.340 # (Deprecated) Returns all subelements. The elements are returned 341 # in document order. 315 342 # 316 343 # @return A list of subelements. … … 318 345 319 346 def getchildren(self): 347 warnings.warn( 348 "This method will be removed in future versions. " 349 "Use 'list(elem)' or iteration over elem instead.", 350 DeprecationWarning, stacklevel=2 351 ) 320 352 return self._children 321 353 … … 324 356 # 325 357 # @param path What element to look for. 358 # @keyparam namespaces Optional namespace prefix map. 326 359 # @return The first matching element, or None if no element was found. 327 360 # @defreturn Element or None 328 361 329 def find(self, path ):330 return ElementPath.find(self, path )362 def find(self, path, namespaces=None): 363 return ElementPath.find(self, path, namespaces) 331 364 332 365 ## … … 335 368 # @param path What element to look for. 336 369 # @param default What to return if the element was not found. 370 # @keyparam namespaces Optional namespace prefix map. 337 371 # @return The text content of the first matching element, or the 338 372 # default value no element was found. Note that if the element 339 # hasis found, but has no text content, this method returns an373 # is found, but has no text content, this method returns an 340 374 # empty string. 341 375 # @defreturn string 342 376 343 def findtext(self, path, default=None ):344 return ElementPath.findtext(self, path, default )377 def findtext(self, path, default=None, namespaces=None): 378 return ElementPath.findtext(self, path, default, namespaces) 345 379 346 380 ## … … 348 382 # 349 383 # @param path What element to look for. 350 # @return A list or iterator containing all matching elements, 384 # @keyparam namespaces Optional namespace prefix map. 385 # @return A list or other sequence containing all matching elements, 351 386 # in document order. 352 387 # @defreturn list of Element instances 353 388 354 def findall(self, path): 355 return ElementPath.findall(self, path) 389 def findall(self, path, namespaces=None): 390 return ElementPath.findall(self, path, namespaces) 391 392 ## 393 # Finds all matching subelements, by tag name or path. 394 # 395 # @param path What element to look for. 396 # @keyparam namespaces Optional namespace prefix map. 397 # @return An iterator or sequence containing all matching elements, 398 # in document order. 399 # @defreturn a generated sequence of Element instances 400 401 def iterfind(self, path, namespaces=None): 402 return ElementPath.iterfind(self, path, namespaces) 356 403 357 404 ## 358 405 # Resets an element. This function removes all subelements, clears 359 # all attributes, and sets the text and tail attributes to None. 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 407 # to None. 360 408 361 409 def clear(self): … … 365 413 366 414 ## 367 # Gets an element attribute. 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 416 # some implementations may handle this a bit more efficiently. 368 417 # 369 418 # @param key What attribute to look for. … … 377 426 378 427 ## 379 # Sets an element attribute. 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 429 # but some implementations may handle this a bit more efficiently. 380 430 # 381 431 # @param key What attribute to set. … … 388 438 # Gets a list of attribute names. The names are returned in an 389 439 # arbitrary order (just like for an ordinary Python dictionary). 440 # Equivalent to <b>attrib.keys()</b>. 390 441 # 391 442 # @return A list of element attribute names. … … 397 448 ## 398 449 # Gets element attributes, as a sequence. The attributes are 399 # returned in an arbitrary order. 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 400 451 # 401 452 # @return A list of (name, value) tuples for all attributes. … … 410 461 # with a matching tag. 411 462 # <p> 412 # If the tree structure is modified during iteration, the result 413 # is undefined. 463 # If the tree structure is modified during iteration, new or removed 464 # elements may or may not be included. To get a stable set, use the 465 # list() function on the iterator, and loop over the resulting list. 414 466 # 415 467 # @param tag What tags to look for (default is to return all elements). 416 # @return A list or iterator containing all the matching elements. 417 # @defreturn list or iterator 418 419 def getiterator(self, tag=None): 420 nodes = [] 468 # @return An iterator containing all the matching elements. 469 # @defreturn iterator 470 471 def iter(self, tag=None): 421 472 if tag == "*": 422 473 tag = None 423 474 if tag is None or self.tag == tag: 424 nodes.append(self) 425 for node in self._children: 426 nodes.extend(node.getiterator(tag)) 427 return nodes 475 yield self 476 for e in self._children: 477 for e in e.iter(tag): 478 yield e 479 480 # compatibility 481 def getiterator(self, tag=None): 482 # Change for a DeprecationWarning in 1.4 483 warnings.warn( 484 "This method will be removed in future versions. " 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 486 PendingDeprecationWarning, stacklevel=2 487 ) 488 return list(self.iter(tag)) 489 490 ## 491 # Creates a text iterator. The iterator loops over this element 492 # and all subelements, in document order, and returns all inner 493 # text. 494 # 495 # @return An iterator containing all inner text. 496 # @defreturn iterator 497 498 def itertext(self): 499 tag = self.tag 500 if not isinstance(tag, basestring) and tag is not None: 501 return 502 if self.text: 503 yield self.text 504 for e in self: 505 for s in e.itertext(): 506 yield s 507 if e.tail: 508 yield e.tail 428 509 429 510 # compatibility 430 _Element = _ElementInterface 431 432 ## 433 # Element factory. This function returns an object implementing the 434 # standard Element interface. The exact class or type of that object 435 # is implementation dependent, but it will always be compatible with 436 # the {@link #_ElementInterface} class in this module. 437 # <p> 438 # The element name, attribute names, and attribute values can be 439 # either 8-bit ASCII strings or Unicode strings. 440 # 441 # @param tag The element name. 442 # @param attrib An optional dictionary, containing element attributes. 443 # @param **extra Additional attributes, given as keyword arguments. 444 # @return An element instance. 445 # @defreturn Element 446 447 def Element(tag, attrib={}, **extra): 448 attrib = attrib.copy() 449 attrib.update(extra) 450 return _ElementInterface(tag, attrib) 511 _Element = _ElementInterface = Element 451 512 452 513 ## … … 473 534 ## 474 535 # Comment element factory. This factory function creates a special 475 # element that will be serialized as an XML comment. 536 # element that will be serialized as an XML comment by the standard 537 # serializer. 476 538 # <p> 477 539 # The comment string can be either an 8-bit ASCII string or a Unicode … … 489 551 ## 490 552 # PI element factory. This factory function creates a special element 491 # that will be serialized as an XML processing instruction. 553 # that will be serialized as an XML processing instruction by the standard 554 # serializer. 492 555 # 493 556 # @param target A string containing the PI target. … … 515 578 # @return An opaque object, representing the QName. 516 579 517 class QName :580 class QName(object): 518 581 def __init__(self, text_or_uri, tag=None): 519 582 if tag: … … 529 592 return cmp(self.text, other) 530 593 594 # -------------------------------------------------------------------- 595 531 596 ## 532 597 # ElementTree wrapper class. This class represents an entire element … … 535 600 # 536 601 # @param element Optional root element. 537 # @keyparam file Optional file handle or name. If given, the602 # @keyparam file Optional file handle or file name. If given, the 538 603 # tree is initialized with the contents of this XML file. 539 604 540 class ElementTree :605 class ElementTree(object): 541 606 542 607 def __init__(self, element=None, file=None): 543 assert element is None or iselement(element)608 # assert element is None or iselement(element) 544 609 self._root = element # first node 545 610 if file: … … 563 628 564 629 def _setroot(self, element): 565 assert iselement(element)630 # assert iselement(element) 566 631 self._root = element 567 632 … … 569 634 # Loads an external XML document into this element tree. 570 635 # 571 # @param source A file name or file object. 572 # @param parser An optional parser instance. If not given, the 573 # standard {@link XMLTreeBuilder} parser is used. 636 # @param source A file name or file object. If a file object is 637 # given, it only has to implement a <b>read(n)</b> method. 638 # @keyparam parser An optional parser instance. If not given, the 639 # standard {@link XMLParser} parser is used. 574 640 # @return The document root element. 575 641 # @defreturn Element 642 # @exception ParseError If the parser fails to parse the document. 576 643 577 644 def parse(self, source, parser=None): 645 close_source = False 578 646 if not hasattr(source, "read"): 579 647 source = open(source, "rb") 580 if not parser: 581 parser = XMLTreeBuilder() 582 while 1: 583 data = source.read(32768) 584 if not data: 585 break 586 parser.feed(data) 587 self._root = parser.close() 588 return self._root 648 close_source = True 649 try: 650 if not parser: 651 parser = XMLParser(target=TreeBuilder()) 652 while 1: 653 data = source.read(65536) 654 if not data: 655 break 656 parser.feed(data) 657 self._root = parser.close() 658 return self._root 659 finally: 660 if close_source: 661 source.close() 589 662 590 663 ## … … 596 669 # @defreturn iterator 597 670 671 def iter(self, tag=None): 672 # assert self._root is not None 673 return self._root.iter(tag) 674 675 # compatibility 598 676 def getiterator(self, tag=None): 599 assert self._root is not None 600 return self._root.getiterator(tag) 601 602 ## 603 # Finds the first toplevel element with given tag. 604 # Same as getroot().find(path). 677 # Change for a DeprecationWarning in 1.4 678 warnings.warn( 679 "This method will be removed in future versions. " 680 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 681 PendingDeprecationWarning, stacklevel=2 682 ) 683 return list(self.iter(tag)) 684 685 ## 686 # Same as getroot().find(path), starting at the root of the 687 # tree. 605 688 # 606 689 # @param path What element to look for. 690 # @keyparam namespaces Optional namespace prefix map. 607 691 # @return The first matching element, or None if no element was found. 608 692 # @defreturn Element or None 609 693 610 def find(self, path ):611 assert self._root is not None694 def find(self, path, namespaces=None): 695 # assert self._root is not None 612 696 if path[:1] == "/": 613 697 path = "." + path 614 return self._root.find(path) 615 616 ## 617 # Finds the element text for the first toplevel element with given 618 # tag. Same as getroot().findtext(path). 619 # 620 # @param path What toplevel element to look for. 698 warnings.warn( 699 "This search is broken in 1.3 and earlier, and will be " 700 "fixed in a future version. If you rely on the current " 701 "behaviour, change it to %r" % path, 702 FutureWarning, stacklevel=2 703 ) 704 return self._root.find(path, namespaces) 705 706 ## 707 # Same as getroot().findtext(path), starting at the root of the tree. 708 # 709 # @param path What element to look for. 621 710 # @param default What to return if the element was not found. 711 # @keyparam namespaces Optional namespace prefix map. 622 712 # @return The text content of the first matching element, or the 623 713 # default value no element was found. Note that if the element 624 # hasis found, but has no text content, this method returns an714 # is found, but has no text content, this method returns an 625 715 # empty string. 626 716 # @defreturn string 627 717 628 def findtext(self, path, default=None ):629 assert self._root is not None718 def findtext(self, path, default=None, namespaces=None): 719 # assert self._root is not None 630 720 if path[:1] == "/": 631 721 path = "." + path 632 return self._root.findtext(path, default) 633 634 ## 635 # Finds all toplevel elements with the given tag. 636 # Same as getroot().findall(path). 722 warnings.warn( 723 "This search is broken in 1.3 and earlier, and will be " 724 "fixed in a future version. If you rely on the current " 725 "behaviour, change it to %r" % path, 726 FutureWarning, stacklevel=2 727 ) 728 return self._root.findtext(path, default, namespaces) 729 730 ## 731 # Same as getroot().findall(path), starting at the root of the tree. 637 732 # 638 733 # @param path What element to look for. 734 # @keyparam namespaces Optional namespace prefix map. 639 735 # @return A list or iterator containing all matching elements, 640 736 # in document order. 641 737 # @defreturn list of Element instances 642 738 643 def findall(self, path ):644 assert self._root is not None739 def findall(self, path, namespaces=None): 740 # assert self._root is not None 645 741 if path[:1] == "/": 646 742 path = "." + path 647 return self._root.findall(path) 743 warnings.warn( 744 "This search is broken in 1.3 and earlier, and will be " 745 "fixed in a future version. If you rely on the current " 746 "behaviour, change it to %r" % path, 747 FutureWarning, stacklevel=2 748 ) 749 return self._root.findall(path, namespaces) 750 751 ## 752 # Finds all matching subelements, by tag name or path. 753 # Same as getroot().iterfind(path). 754 # 755 # @param path What element to look for. 756 # @keyparam namespaces Optional namespace prefix map. 757 # @return An iterator or sequence containing all matching elements, 758 # in document order. 759 # @defreturn a generated sequence of Element instances 760 761 def iterfind(self, path, namespaces=None): 762 # assert self._root is not None 763 if path[:1] == "/": 764 path = "." + path 765 warnings.warn( 766 "This search is broken in 1.3 and earlier, and will be " 767 "fixed in a future version. If you rely on the current " 768 "behaviour, change it to %r" % path, 769 FutureWarning, stacklevel=2 770 ) 771 return self._root.iterfind(path, namespaces) 648 772 649 773 ## 650 774 # Writes the element tree to a file, as XML. 651 775 # 776 # @def write(file, **options) 652 777 # @param file A file name, or a file object opened for writing. 653 # @param encoding Optional output encoding (default is US-ASCII). 654 655 def write(self, file, encoding="us-ascii"): 656 assert self._root is not None 657 if not hasattr(file, "write"): 658 file = open(file, "wb") 778 # @param **options Options, given as keyword arguments. 779 # @keyparam encoding Optional output encoding (default is US-ASCII). 780 # @keyparam xml_declaration Controls if an XML declaration should 781 # be added to the file. Use False for never, True for always, 782 # None for only if not US-ASCII or UTF-8. None is default. 783 # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). 784 # @keyparam method Optional output method ("xml", "html", "text" or 785 # "c14n"; default is "xml"). 786 787 def write(self, file_or_filename, 788 # keyword arguments 789 encoding=None, 790 xml_declaration=None, 791 default_namespace=None, 792 method=None): 793 # assert self._root is not None 794 if not method: 795 method = "xml" 796 elif method not in _serialize: 797 # FIXME: raise an ImportError for c14n if ElementC14N is missing? 798 raise ValueError("unknown method %r" % method) 799 if hasattr(file_or_filename, "write"): 800 file = file_or_filename 801 else: 802 file = open(file_or_filename, "wb") 803 write = file.write 659 804 if not encoding: 660 encoding = "us-ascii" 661 elif encoding != "utf-8" and encoding != "us-ascii": 662 file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 663 self._write(file, self._root, encoding, {}) 664 665 def _write(self, file, node, encoding, namespaces): 666 # write XML to file 667 tag = node.tag 668 if tag is Comment: 669 file.write("<!-- %s -->" % _escape_cdata(node.text, encoding)) 670 elif tag is ProcessingInstruction: 671 file.write("<?%s?>" % _escape_cdata(node.text, encoding)) 805 if method == "c14n": 806 encoding = "utf-8" 807 else: 808 encoding = "us-ascii" 809 elif xml_declaration or (xml_declaration is None and 810 encoding not in ("utf-8", "us-ascii")): 811 if method == "xml": 812 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 813 if method == "text": 814 _serialize_text(write, self._root, encoding) 672 815 else: 673 items = node.items() 674 xmlns_items = [] # new namespaces in this scope 675 try: 676 if isinstance(tag, QName) or tag[:1] == "{": 677 tag, xmlns = fixtag(tag, namespaces) 678 if xmlns: xmlns_items.append(xmlns) 679 except TypeError: 680 _raise_serialization_error(tag) 681 file.write("<" + _encode(tag, encoding)) 682 if items or xmlns_items: 683 items.sort() # lexical order 684 for k, v in items: 685 try: 686 if isinstance(k, QName) or k[:1] == "{": 687 k, xmlns = fixtag(k, namespaces) 688 if xmlns: xmlns_items.append(xmlns) 689 except TypeError: 690 _raise_serialization_error(k) 691 try: 692 if isinstance(v, QName): 693 v, xmlns = fixtag(v, namespaces) 694 if xmlns: xmlns_items.append(xmlns) 695 except TypeError: 696 _raise_serialization_error(v) 697 file.write(" %s=\"%s\"" % (_encode(k, encoding), 698 _escape_attrib(v, encoding))) 699 for k, v in xmlns_items: 700 file.write(" %s=\"%s\"" % (_encode(k, encoding), 701 _escape_attrib(v, encoding))) 702 if node.text or len(node): 703 file.write(">") 704 if node.text: 705 file.write(_escape_cdata(node.text, encoding)) 706 for n in node: 707 self._write(file, n, encoding, namespaces) 708 file.write("</" + _encode(tag, encoding) + ">") 816 qnames, namespaces = _namespaces( 817 self._root, encoding, default_namespace 818 ) 819 serialize = _serialize[method] 820 serialize(write, self._root, encoding, qnames, namespaces) 821 if file_or_filename is not file: 822 file.close() 823 824 def write_c14n(self, file): 825 # lxml.etree compatibility. use output method instead 826 return self.write(file, method="c14n") 827 828 # -------------------------------------------------------------------- 829 # serialization support 830 831 def _namespaces(elem, encoding, default_namespace=None): 832 # identify namespaces used in this tree 833 834 # maps qnames to *encoded* prefix:local names 835 qnames = {None: None} 836 837 # maps uri:s to prefixes 838 namespaces = {} 839 if default_namespace: 840 namespaces[default_namespace] = "" 841 842 def encode(text): 843 return text.encode(encoding) 844 845 def add_qname(qname): 846 # calculate serialized qname representation 847 try: 848 if qname[:1] == "{": 849 uri, tag = qname[1:].rsplit("}", 1) 850 prefix = namespaces.get(uri) 851 if prefix is None: 852 prefix = _namespace_map.get(uri) 853 if prefix is None: 854 prefix = "ns%d" % len(namespaces) 855 if prefix != "xml": 856 namespaces[uri] = prefix 857 if prefix: 858 qnames[qname] = encode("%s:%s" % (prefix, tag)) 859 else: 860 qnames[qname] = encode(tag) # default element 709 861 else: 710 file.write(" />") 711 for k, v in xmlns_items: 712 del namespaces[v] 713 if node.tail: 714 file.write(_escape_cdata(node.tail, encoding)) 862 if default_namespace: 863 # FIXME: can this be handled in XML 1.0? 864 raise ValueError( 865 "cannot use non-qualified names with " 866 "default_namespace option" 867 ) 868 qnames[qname] = encode(qname) 869 except TypeError: 870 _raise_serialization_error(qname) 871 872 # populate qname and namespaces table 873 try: 874 iterate = elem.iter 875 except AttributeError: 876 iterate = elem.getiterator # cET compatibility 877 for elem in iterate(): 878 tag = elem.tag 879 if isinstance(tag, QName): 880 if tag.text not in qnames: 881 add_qname(tag.text) 882 elif isinstance(tag, basestring): 883 if tag not in qnames: 884 add_qname(tag) 885 elif tag is not None and tag is not Comment and tag is not PI: 886 _raise_serialization_error(tag) 887 for key, value in elem.items(): 888 if isinstance(key, QName): 889 key = key.text 890 if key not in qnames: 891 add_qname(key) 892 if isinstance(value, QName) and value.text not in qnames: 893 add_qname(value.text) 894 text = elem.text 895 if isinstance(text, QName) and text.text not in qnames: 896 add_qname(text.text) 897 return qnames, namespaces 898 899 def _serialize_xml(write, elem, encoding, qnames, namespaces): 900 tag = elem.tag 901 text = elem.text 902 if tag is Comment: 903 write("<!--%s-->" % _encode(text, encoding)) 904 elif tag is ProcessingInstruction: 905 write("<?%s?>" % _encode(text, encoding)) 906 else: 907 tag = qnames[tag] 908 if tag is None: 909 if text: 910 write(_escape_cdata(text, encoding)) 911 for e in elem: 912 _serialize_xml(write, e, encoding, qnames, None) 913 else: 914 write("<" + tag) 915 items = elem.items() 916 if items or namespaces: 917 if namespaces: 918 for v, k in sorted(namespaces.items(), 919 key=lambda x: x[1]): # sort on prefix 920 if k: 921 k = ":" + k 922 write(" xmlns%s=\"%s\"" % ( 923 k.encode(encoding), 924 _escape_attrib(v, encoding) 925 )) 926 for k, v in sorted(items): # lexical order 927 if isinstance(k, QName): 928 k = k.text 929 if isinstance(v, QName): 930 v = qnames[v.text] 931 else: 932 v = _escape_attrib(v, encoding) 933 write(" %s=\"%s\"" % (qnames[k], v)) 934 if text or len(elem): 935 write(">") 936 if text: 937 write(_escape_cdata(text, encoding)) 938 for e in elem: 939 _serialize_xml(write, e, encoding, qnames, None) 940 write("</" + tag + ">") 941 else: 942 write(" />") 943 if elem.tail: 944 write(_escape_cdata(elem.tail, encoding)) 945 946 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 947 "img", "input", "isindex", "link", "meta", "param") 948 949 try: 950 HTML_EMPTY = set(HTML_EMPTY) 951 except NameError: 952 pass 953 954 def _serialize_html(write, elem, encoding, qnames, namespaces): 955 tag = elem.tag 956 text = elem.text 957 if tag is Comment: 958 write("<!--%s-->" % _escape_cdata(text, encoding)) 959 elif tag is ProcessingInstruction: 960 write("<?%s?>" % _escape_cdata(text, encoding)) 961 else: 962 tag = qnames[tag] 963 if tag is None: 964 if text: 965 write(_escape_cdata(text, encoding)) 966 for e in elem: 967 _serialize_html(write, e, encoding, qnames, None) 968 else: 969 write("<" + tag) 970 items = elem.items() 971 if items or namespaces: 972 if namespaces: 973 for v, k in sorted(namespaces.items(), 974 key=lambda x: x[1]): # sort on prefix 975 if k: 976 k = ":" + k 977 write(" xmlns%s=\"%s\"" % ( 978 k.encode(encoding), 979 _escape_attrib(v, encoding) 980 )) 981 for k, v in sorted(items): # lexical order 982 if isinstance(k, QName): 983 k = k.text 984 if isinstance(v, QName): 985 v = qnames[v.text] 986 else: 987 v = _escape_attrib_html(v, encoding) 988 # FIXME: handle boolean attributes 989 write(" %s=\"%s\"" % (qnames[k], v)) 990 write(">") 991 ltag = tag.lower() 992 if text: 993 if ltag == "script" or ltag == "style": 994 write(_encode(text, encoding)) 995 else: 996 write(_escape_cdata(text, encoding)) 997 for e in elem: 998 _serialize_html(write, e, encoding, qnames, None) 999 if ltag not in HTML_EMPTY: 1000 write("</" + tag + ">") 1001 if elem.tail: 1002 write(_escape_cdata(elem.tail, encoding)) 1003 1004 def _serialize_text(write, elem, encoding): 1005 for part in elem.itertext(): 1006 write(part.encode(encoding)) 1007 if elem.tail: 1008 write(elem.tail.encode(encoding)) 1009 1010 _serialize = { 1011 "xml": _serialize_xml, 1012 "html": _serialize_html, 1013 "text": _serialize_text, 1014 # this optional method is imported at the end of the module 1015 # "c14n": _serialize_c14n, 1016 } 1017 1018 ## 1019 # Registers a namespace prefix. The registry is global, and any 1020 # existing mapping for either the given prefix or the namespace URI 1021 # will be removed. 1022 # 1023 # @param prefix Namespace prefix. 1024 # @param uri Namespace uri. Tags and attributes in this namespace 1025 # will be serialized with the given prefix, if at all possible. 1026 # @exception ValueError If the prefix is reserved, or is otherwise 1027 # invalid. 1028 1029 def register_namespace(prefix, uri): 1030 if re.match("ns\d+$", prefix): 1031 raise ValueError("Prefix format reserved for internal use") 1032 for k, v in _namespace_map.items(): 1033 if k == uri or v == prefix: 1034 del _namespace_map[k] 1035 _namespace_map[uri] = prefix 1036 1037 _namespace_map = { 1038 # "well-known" namespace prefixes 1039 "http://www.w3.org/XML/1998/namespace": "xml", 1040 "http://www.w3.org/1999/xhtml": "html", 1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1043 # xml schema 1044 "http://www.w3.org/2001/XMLSchema": "xs", 1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1046 # dublin core 1047 "http://purl.org/dc/elements/1.1/": "dc", 1048 } 1049 1050 def _raise_serialization_error(text): 1051 raise TypeError( 1052 "cannot serialize %r (type %s)" % (text, type(text).__name__) 1053 ) 1054 1055 def _encode(text, encoding): 1056 try: 1057 return text.encode(encoding, "xmlcharrefreplace") 1058 except (TypeError, AttributeError): 1059 _raise_serialization_error(text) 1060 1061 def _escape_cdata(text, encoding): 1062 # escape character data 1063 try: 1064 # it's worth avoiding do-nothing calls for strings that are 1065 # shorter than 500 character, or so. assume that's, by far, 1066 # the most common case in most applications. 1067 if "&" in text: 1068 text = text.replace("&", "&") 1069 if "<" in text: 1070 text = text.replace("<", "<") 1071 if ">" in text: 1072 text = text.replace(">", ">") 1073 return text.encode(encoding, "xmlcharrefreplace") 1074 except (TypeError, AttributeError): 1075 _raise_serialization_error(text) 1076 1077 def _escape_attrib(text, encoding): 1078 # escape attribute value 1079 try: 1080 if "&" in text: 1081 text = text.replace("&", "&") 1082 if "<" in text: 1083 text = text.replace("<", "<") 1084 if ">" in text: 1085 text = text.replace(">", ">") 1086 if "\"" in text: 1087 text = text.replace("\"", """) 1088 if "\n" in text: 1089 text = text.replace("\n", " ") 1090 return text.encode(encoding, "xmlcharrefreplace") 1091 except (TypeError, AttributeError): 1092 _raise_serialization_error(text) 1093 1094 def _escape_attrib_html(text, encoding): 1095 # escape attribute value 1096 try: 1097 if "&" in text: 1098 text = text.replace("&", "&") 1099 if ">" in text: 1100 text = text.replace(">", ">") 1101 if "\"" in text: 1102 text = text.replace("\"", """) 1103 return text.encode(encoding, "xmlcharrefreplace") 1104 except (TypeError, AttributeError): 1105 _raise_serialization_error(text) 715 1106 716 1107 # -------------------------------------------------------------------- 717 # helpers 718 719 ## 720 # Checks if an object appears to be a valid element object. 721 # 722 # @param An element instance. 723 # @return A true value if this is an element object. 724 # @defreturn flag 725 726 def iselement(element): 727 # FIXME: not sure about this; might be a better idea to look 728 # for tag/attrib/text attributes 729 return isinstance(element, _ElementInterface) or hasattr(element, "tag") 1108 1109 ## 1110 # Generates a string representation of an XML element, including all 1111 # subelements. 1112 # 1113 # @param element An Element instance. 1114 # @keyparam encoding Optional output encoding (default is US-ASCII). 1115 # @keyparam method Optional output method ("xml", "html", "text" or 1116 # "c14n"; default is "xml"). 1117 # @return An encoded string containing the XML data. 1118 # @defreturn string 1119 1120 def tostring(element, encoding=None, method=None): 1121 class dummy: 1122 pass 1123 data = [] 1124 file = dummy() 1125 file.write = data.append 1126 ElementTree(element).write(file, encoding, method=method) 1127 return "".join(data) 1128 1129 ## 1130 # Generates a string representation of an XML element, including all 1131 # subelements. The string is returned as a sequence of string fragments. 1132 # 1133 # @param element An Element instance. 1134 # @keyparam encoding Optional output encoding (default is US-ASCII). 1135 # @keyparam method Optional output method ("xml", "html", "text" or 1136 # "c14n"; default is "xml"). 1137 # @return A sequence object containing the XML data. 1138 # @defreturn sequence 1139 # @since 1.3 1140 1141 def tostringlist(element, encoding=None, method=None): 1142 class dummy: 1143 pass 1144 data = [] 1145 file = dummy() 1146 file.write = data.append 1147 ElementTree(element).write(file, encoding, method=method) 1148 # FIXME: merge small fragments into larger parts 1149 return data 730 1150 731 1151 ## … … 747 1167 sys.stdout.write("\n") 748 1168 749 def _encode(s, encoding): 750 try: 751 return s.encode(encoding) 752 except AttributeError: 753 return s # 1.5.2: assume the string uses the right encoding 754 755 if sys.version[:3] == "1.5": 756 _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 757 else: 758 _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) 759 760 _escape_map = { 761 "&": "&", 762 "<": "<", 763 ">": ">", 764 '"': """, 765 } 766 767 _namespace_map = { 768 # "well-known" namespace prefixes 769 "http://www.w3.org/XML/1998/namespace": "xml", 770 "http://www.w3.org/1999/xhtml": "html", 771 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 772 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 773 } 774 775 def _raise_serialization_error(text): 776 raise TypeError( 777 "cannot serialize %r (type %s)" % (text, type(text).__name__) 778 ) 779 780 def _encode_entity(text, pattern=_escape): 781 # map reserved and non-ascii characters to numerical entities 782 def escape_entities(m, map=_escape_map): 783 out = [] 784 append = out.append 785 for char in m.group(): 786 text = map.get(char) 787 if text is None: 788 text = "&#%d;" % ord(char) 789 append(text) 790 return string.join(out, "") 791 try: 792 return _encode(pattern.sub(escape_entities, text), "ascii") 793 except TypeError: 794 _raise_serialization_error(text) 795 796 # 797 # the following functions assume an ascii-compatible encoding 798 # (or "utf-16") 799 800 def _escape_cdata(text, encoding=None, replace=string.replace): 801 # escape character data 802 try: 803 if encoding: 804 try: 805 text = _encode(text, encoding) 806 except UnicodeError: 807 return _encode_entity(text) 808 text = replace(text, "&", "&") 809 text = replace(text, "<", "<") 810 text = replace(text, ">", ">") 811 return text 812 except (TypeError, AttributeError): 813 _raise_serialization_error(text) 814 815 def _escape_attrib(text, encoding=None, replace=string.replace): 816 # escape attribute value 817 try: 818 if encoding: 819 try: 820 text = _encode(text, encoding) 821 except UnicodeError: 822 return _encode_entity(text) 823 text = replace(text, "&", "&") 824 text = replace(text, "'", "'") # FIXME: overkill 825 text = replace(text, "\"", """) 826 text = replace(text, "<", "<") 827 text = replace(text, ">", ">") 828 return text 829 except (TypeError, AttributeError): 830 _raise_serialization_error(text) 831 832 def fixtag(tag, namespaces): 833 # given a decorated tag (of the form {uri}tag), return prefixed 834 # tag and namespace declaration, if any 835 if isinstance(tag, QName): 836 tag = tag.text 837 namespace_uri, tag = string.split(tag[1:], "}", 1) 838 prefix = namespaces.get(namespace_uri) 839 if prefix is None: 840 prefix = _namespace_map.get(namespace_uri) 841 if prefix is None: 842 prefix = "ns%d" % len(namespaces) 843 namespaces[namespace_uri] = prefix 844 if prefix == "xml": 845 xmlns = None 846 else: 847 xmlns = ("xmlns:%s" % prefix, namespace_uri) 848 else: 849 xmlns = None 850 return "%s:%s" % (prefix, tag), xmlns 1169 # -------------------------------------------------------------------- 1170 # parsing 851 1171 852 1172 ## … … 855 1175 # @param source A filename or file object containing XML data. 856 1176 # @param parser An optional parser instance. If not given, the 857 # standard {@link XML TreeBuilder} parser is used.1177 # standard {@link XMLParser} parser is used. 858 1178 # @return An ElementTree instance 859 1179 … … 870 1190 # @param events A list of events to report back. If omitted, only "end" 871 1191 # events are reported. 1192 # @param parser An optional parser instance. If not given, the 1193 # standard {@link XMLParser} parser is used. 872 1194 # @return A (event, elem) iterator. 873 1195 874 class iterparse: 875 876 def __init__(self, source, events=None): 877 if not hasattr(source, "read"): 878 source = open(source, "rb") 1196 def iterparse(source, events=None, parser=None): 1197 close_source = False 1198 if not hasattr(source, "read"): 1199 source = open(source, "rb") 1200 close_source = True 1201 if not parser: 1202 parser = XMLParser(target=TreeBuilder()) 1203 return _IterParseIterator(source, events, parser, close_source) 1204 1205 class _IterParseIterator(object): 1206 1207 def __init__(self, source, events, parser, close_source=False): 879 1208 self._file = source 1209 self._close_file = close_source 880 1210 self._events = [] 881 1211 self._index = 0 1212 self._error = None 882 1213 self.root = self._root = None 883 self._parser = XMLTreeBuilder()1214 self._parser = parser 884 1215 # wire up the parser for event reporting 885 1216 parser = self._parser._parser … … 909 1240 def handler(prefix, uri, event=event, append=append): 910 1241 try: 911 uri = _encode(uri,"ascii")1242 uri = (uri or "").encode("ascii") 912 1243 except UnicodeError: 913 1244 pass 914 append((event, (prefix or "", uri )))1245 append((event, (prefix or "", uri or ""))) 915 1246 parser.StartNamespaceDeclHandler = handler 916 1247 elif event == "end-ns": … … 918 1249 append((event, None)) 919 1250 parser.EndNamespaceDeclHandler = handler 1251 else: 1252 raise ValueError("unknown event %r" % event) 920 1253 921 1254 def next(self): … … 923 1256 try: 924 1257 item = self._events[self._index] 1258 self._index += 1 1259 return item 925 1260 except IndexError: 926 if self._parser is None: 927 self.root = self._root 928 try: 929 raise StopIteration 930 except NameError: 931 raise IndexError 932 # load event buffer 933 del self._events[:] 934 self._index = 0 935 data = self._file.read(16384) 936 if data: 1261 pass 1262 if self._error: 1263 e = self._error 1264 self._error = None 1265 raise e 1266 if self._parser is None: 1267 self.root = self._root 1268 if self._close_file: 1269 self._file.close() 1270 raise StopIteration 1271 # load event buffer 1272 del self._events[:] 1273 self._index = 0 1274 data = self._file.read(16384) 1275 if data: 1276 try: 937 1277 self._parser.feed(data) 938 else: 939 self._root = self._parser.close() 940 self._parser = None 1278 except SyntaxError as exc: 1279 self._error = exc 941 1280 else: 942 self._index = self._index + 1 943 return item 944 945 try: 946 iter 947 def __iter__(self): 948 return self 949 except NameError: 950 def __getitem__(self, index): 951 return self.next() 1281 self._root = self._parser.close() 1282 self._parser = None 1283 1284 def __iter__(self): 1285 return self 952 1286 953 1287 ## … … 956 1290 # 957 1291 # @param source A string containing XML data. 1292 # @param parser An optional parser instance. If not given, the 1293 # standard {@link XMLParser} parser is used. 958 1294 # @return An Element instance. 959 1295 # @defreturn Element 960 1296 961 def XML(text): 962 parser = XMLTreeBuilder() 1297 def XML(text, parser=None): 1298 if not parser: 1299 parser = XMLParser(target=TreeBuilder()) 963 1300 parser.feed(text) 964 1301 return parser.close() … … 969 1306 # 970 1307 # @param source A string containing XML data. 1308 # @param parser An optional parser instance. If not given, the 1309 # standard {@link XMLParser} parser is used. 971 1310 # @return A tuple containing an Element instance and a dictionary. 972 1311 # @defreturn (Element, dictionary) 973 1312 974 def XMLID(text): 975 parser = XMLTreeBuilder() 1313 def XMLID(text, parser=None): 1314 if not parser: 1315 parser = XMLParser(target=TreeBuilder()) 976 1316 parser.feed(text) 977 1317 tree = parser.close() 978 1318 ids = {} 979 for elem in tree. getiterator():1319 for elem in tree.iter(): 980 1320 id = elem.get("id") 981 1321 if id: … … 994 1334 995 1335 ## 996 # Generates a string representation of an XML element, including all 997 # subelements. 998 # 999 # @param element An Element instance. 1000 # @return An encoded string containing the XML data. 1001 # @defreturn string 1002 1003 def tostring(element, encoding=None): 1004 class dummy: 1005 pass 1006 data = [] 1007 file = dummy() 1008 file.write = data.append 1009 ElementTree(element).write(file, encoding) 1010 return string.join(data, "") 1336 # Parses an XML document from a sequence of string fragments. 1337 # 1338 # @param sequence A list or other sequence containing XML data fragments. 1339 # @param parser An optional parser instance. If not given, the 1340 # standard {@link XMLParser} parser is used. 1341 # @return An Element instance. 1342 # @defreturn Element 1343 # @since 1.3 1344 1345 def fromstringlist(sequence, parser=None): 1346 if not parser: 1347 parser = XMLParser(target=TreeBuilder()) 1348 for text in sequence: 1349 parser.feed(text) 1350 return parser.close() 1351 1352 # -------------------------------------------------------------------- 1011 1353 1012 1354 ## … … 1021 1363 # is called to create new Element instances, as necessary. 1022 1364 1023 class TreeBuilder :1365 class TreeBuilder(object): 1024 1366 1025 1367 def __init__(self, element_factory=None): … … 1029 1371 self._tail = None # true if we're after an end tag 1030 1372 if element_factory is None: 1031 element_factory = _ElementInterface1373 element_factory = Element 1032 1374 self._factory = element_factory 1033 1375 1034 1376 ## 1035 # Flushes the parser buffers, and returns the toplevel documen1377 # Flushes the builder buffers, and returns the toplevel document 1036 1378 # element. 1037 1379 # … … 1041 1383 def close(self): 1042 1384 assert len(self._elem) == 0, "missing end tags" 1043 assert self._last !=None, "missing toplevel element"1385 assert self._last is not None, "missing toplevel element" 1044 1386 return self._last 1045 1387 … … 1047 1389 if self._data: 1048 1390 if self._last is not None: 1049 text = string.join(self._data, "")1391 text = "".join(self._data) 1050 1392 if self._tail: 1051 1393 assert self._last.tail is None, "internal error (tail)" … … 1106 1448 # @keyparam html Predefine HTML entities. This flag is not supported 1107 1449 # by the current implementation. 1450 # @keyparam encoding Optional encoding. If given, the value overrides 1451 # the encoding specified in the XML file. 1108 1452 # @see #ElementTree 1109 1453 # @see #TreeBuilder 1110 1454 1111 class XML TreeBuilder:1112 1113 def __init__(self, html=0, target=None ):1455 class XMLParser(object): 1456 1457 def __init__(self, html=0, target=None, encoding=None): 1114 1458 try: 1115 1459 from xml.parsers import expat 1116 1460 except ImportError: 1117 raise ImportError( 1118 "No module named expat; use SimpleXMLTreeBuilder instead" 1119 ) 1120 self._parser = parser = expat.ParserCreate(None, "}") 1461 try: 1462 import pyexpat as expat 1463 except ImportError: 1464 raise ImportError( 1465 "No module named expat; use SimpleXMLTreeBuilder instead" 1466 ) 1467 parser = expat.ParserCreate(encoding, "}") 1121 1468 if target is None: 1122 1469 target = TreeBuilder() 1123 self._target = target 1470 # underscored names are provided for compatibility only 1471 self.parser = self._parser = parser 1472 self.target = self._target = target 1473 self._error = expat.error 1124 1474 self._names = {} # name memo cache 1125 1475 # callbacks … … 1128 1478 parser.EndElementHandler = self._end 1129 1479 parser.CharacterDataHandler = self._data 1480 # optional callbacks 1481 parser.CommentHandler = self._comment 1482 parser.ProcessingInstructionHandler = self._pi 1130 1483 # let expat do the buffering, if supported 1131 1484 try: … … 1140 1493 except AttributeError: 1141 1494 pass 1142 encoding = None1143 if not parser.returns_unicode:1144 encoding = "utf-8"1145 # target.xml(encoding, None)1146 1495 self._doctype = None 1147 1496 self.entity = {} 1497 try: 1498 self.version = "Expat %d.%d.%d" % expat.version_info 1499 except AttributeError: 1500 pass # unknown 1501 1502 def _raiseerror(self, value): 1503 err = ParseError(value) 1504 err.code = value.code 1505 err.position = value.lineno, value.offset 1506 raise err 1148 1507 1149 1508 def _fixtext(self, text): 1150 1509 # convert text string to ascii, if possible 1151 1510 try: 1152 return _encode(text,"ascii")1511 return text.encode("ascii") 1153 1512 except UnicodeError: 1154 1513 return text … … 1167 1526 def _start(self, tag, attrib_in): 1168 1527 fixname = self._fixname 1528 fixtext = self._fixtext 1169 1529 tag = fixname(tag) 1170 1530 attrib = {} 1171 1531 for key, value in attrib_in.items(): 1172 attrib[fixname(key)] = self._fixtext(value)1173 return self. _target.start(tag, attrib)1532 attrib[fixname(key)] = fixtext(value) 1533 return self.target.start(tag, attrib) 1174 1534 1175 1535 def _start_list(self, tag, attrib_in): 1176 1536 fixname = self._fixname 1537 fixtext = self._fixtext 1177 1538 tag = fixname(tag) 1178 1539 attrib = {} 1179 1540 if attrib_in: 1180 1541 for i in range(0, len(attrib_in), 2): 1181 attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])1182 return self. _target.start(tag, attrib)1542 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 1543 return self.target.start(tag, attrib) 1183 1544 1184 1545 def _data(self, text): 1185 return self. _target.data(self._fixtext(text))1546 return self.target.data(self._fixtext(text)) 1186 1547 1187 1548 def _end(self, tag): 1188 return self._target.end(self._fixname(tag)) 1549 return self.target.end(self._fixname(tag)) 1550 1551 def _comment(self, data): 1552 try: 1553 comment = self.target.comment 1554 except AttributeError: 1555 pass 1556 else: 1557 return comment(self._fixtext(data)) 1558 1559 def _pi(self, target, data): 1560 try: 1561 pi = self.target.pi 1562 except AttributeError: 1563 pass 1564 else: 1565 return pi(self._fixtext(target), self._fixtext(data)) 1189 1566 1190 1567 def _default(self, text): … … 1193 1570 # deal with undefined entities 1194 1571 try: 1195 self. _target.data(self.entity[text[1:-1]])1572 self.target.data(self.entity[text[1:-1]]) 1196 1573 except KeyError: 1197 1574 from xml.parsers import expat 1198 raiseexpat.error(1575 err = expat.error( 1199 1576 "undefined entity %s: line %d, column %d" % 1200 1577 (text, self._parser.ErrorLineNumber, 1201 1578 self._parser.ErrorColumnNumber) 1202 1579 ) 1580 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1581 err.lineno = self._parser.ErrorLineNumber 1582 err.offset = self._parser.ErrorColumnNumber 1583 raise err 1203 1584 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1204 1585 self._doctype = [] # inside a doctype declaration … … 1208 1589 self._doctype = None 1209 1590 return 1210 text = string.strip(text)1591 text = text.strip() 1211 1592 if not text: 1212 1593 return … … 1224 1605 if pubid: 1225 1606 pubid = pubid[1:-1] 1226 self.doctype(name, pubid, system[1:-1]) 1607 if hasattr(self.target, "doctype"): 1608 self.target.doctype(name, pubid, system[1:-1]) 1609 elif self.doctype is not self._XMLParser__doctype: 1610 # warn about deprecated call 1611 self._XMLParser__doctype(name, pubid, system[1:-1]) 1612 self.doctype(name, pubid, system[1:-1]) 1227 1613 self._doctype = None 1228 1614 1229 1615 ## 1230 # Handles a doctype declaration.1616 # (Deprecated) Handles a doctype declaration. 1231 1617 # 1232 1618 # @param name Doctype name. … … 1235 1621 1236 1622 def doctype(self, name, pubid, system): 1237 pass 1623 """This method of XMLParser is deprecated.""" 1624 warnings.warn( 1625 "This method of XMLParser is deprecated. Define doctype() " 1626 "method on the TreeBuilder target.", 1627 DeprecationWarning, 1628 ) 1629 1630 # sentinel, if doctype is redefined in a subclass 1631 __doctype = doctype 1238 1632 1239 1633 ## … … 1243 1637 1244 1638 def feed(self, data): 1245 self._parser.Parse(data, 0) 1639 try: 1640 self._parser.Parse(data, 0) 1641 except self._error, v: 1642 self._raiseerror(v) 1246 1643 1247 1644 ## … … 1252 1649 1253 1650 def close(self): 1254 self._parser.Parse("", 1) # end of data 1255 tree = self._target.close() 1256 del self._target, self._parser # get rid of circular references 1651 try: 1652 self._parser.Parse("", 1) # end of data 1653 except self._error, v: 1654 self._raiseerror(v) 1655 tree = self.target.close() 1656 del self.target, self._parser # get rid of circular references 1257 1657 return tree 1258 1658 1259 1659 # compatibility 1260 XMLParser = XMLTreeBuilder 1660 XMLTreeBuilder = XMLParser 1661 1662 # workaround circular import. 1663 try: 1664 from ElementC14N import _serialize_c14n 1665 _serialize["c14n"] = _serialize_c14n 1666 except ImportError: 1667 pass -
python/trunk/Lib/xml/etree/__init__.py
r2 r391 1 # $Id: __init__.py 1821 2004-06-03 16:57:49Z fredrik $1 # $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $ 2 2 # elementtree package 3 3 … … 5 5 # The ElementTree toolkit is 6 6 # 7 # Copyright (c) 1999-200 4by Fredrik Lundh7 # Copyright (c) 1999-2008 by Fredrik Lundh 8 8 # 9 9 # By obtaining, using, and/or copying this software and/or its … … 31 31 32 32 # Licensed to PSF under a Contributor Agreement. 33 # See http://www.python.org/ 2.4/license for licensing details.33 # See http://www.python.org/psf/license for licensing details. -
python/trunk/Lib/xml/sax/__init__.py
r2 r391 60 60 61 61 import os, sys 62 if os.environ.has_key("PY_SAX_PARSER"):62 if "PY_SAX_PARSER" in os.environ: 63 63 default_parser_list = os.environ["PY_SAX_PARSER"].split(",") 64 64 del os -
python/trunk/Lib/xml/sax/_exceptions.py
r2 r391 13 13 functionality, or to add localization. Note that although you will 14 14 receive a SAXException as the argument to the handlers in the 15 ErrorHandler interface, you are not actually required to throw15 ErrorHandler interface, you are not actually required to raise 16 16 the exception; instead, you can simply read the information in 17 17 it.""" … … 51 51 receive a SAXParseException as the argument to the handlers in the 52 52 ErrorHandler interface, the application is not actually required 53 to throwthe exception; instead, it can simply read the53 to raise the exception; instead, it can simply read the 54 54 information in it and take a different action. 55 55 … … 63 63 64 64 # We need to cache this stuff at construction time. 65 # If this exception is thrown, the objects through which we must65 # If this exception is raised, the objects through which we must 66 66 # traverse to get this information may be deleted by the time 67 67 # it gets caught. -
python/trunk/Lib/xml/sax/expatreader.py
r2 r391 109 109 def prepareParser(self, source): 110 110 if source.getSystemId() is not None: 111 self._parser.SetBase(source.getSystemId()) 111 base = source.getSystemId() 112 if isinstance(base, unicode): 113 base = base.encode('utf-8') 114 self._parser.SetBase(base) 112 115 113 116 # Redefined setContentHandler to allow changing handlers during parsing … … 408 411 409 412 if __name__ == "__main__": 410 import xml.sax 413 import xml.sax.saxutils 411 414 p = create_parser() 412 p.setContentHandler(xml.sax. XMLGenerator())415 p.setContentHandler(xml.sax.saxutils.XMLGenerator()) 413 416 p.setErrorHandler(xml.sax.ErrorHandler()) 414 p.parse(" ../../../hamlet.xml")417 p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") -
python/trunk/Lib/xml/sax/handler.py
r2 r391 7 7 of the interfaces. 8 8 9 $Id : handler.py 35816 2004-05-06 03:47:48Z fdrake$9 $Id$ 10 10 """ 11 11 -
python/trunk/Lib/xml/sax/saxutils.py
r2 r391 5 5 6 6 import os, urlparse, urllib, types 7 import io 8 import sys 7 9 import handler 8 10 import xmlreader … … 12 14 except AttributeError: 13 15 _StringTypes = [types.StringType] 14 15 # See whether the xmlcharrefreplace error handler is16 # supported17 try:18 from codecs import xmlcharrefreplace_errors19 _error_handling = "xmlcharrefreplace"20 del xmlcharrefreplace_errors21 except ImportError:22 _error_handling = "strict"23 16 24 17 def __dict_replace(s, d): … … 82 75 83 76 77 def _gettextwriter(out, encoding): 78 if out is None: 79 import sys 80 out = sys.stdout 81 82 if isinstance(out, io.RawIOBase): 83 buffer = io.BufferedIOBase(out) 84 # Keep the original file open when the TextIOWrapper is 85 # destroyed 86 buffer.close = lambda: None 87 else: 88 # This is to handle passed objects that aren't in the 89 # IOBase hierarchy, but just have a write method 90 buffer = io.BufferedIOBase() 91 buffer.writable = lambda: True 92 buffer.write = out.write 93 try: 94 # TextIOWrapper uses this methods to determine 95 # if BOM (for UTF-16, etc) should be added 96 buffer.seekable = out.seekable 97 buffer.tell = out.tell 98 except AttributeError: 99 pass 100 # wrap a binary writer with TextIOWrapper 101 class UnbufferedTextIOWrapper(io.TextIOWrapper): 102 def write(self, s): 103 super(UnbufferedTextIOWrapper, self).write(s) 104 self.flush() 105 return UnbufferedTextIOWrapper(buffer, encoding=encoding, 106 errors='xmlcharrefreplace', 107 newline='\n') 108 84 109 class XMLGenerator(handler.ContentHandler): 85 110 86 111 def __init__(self, out=None, encoding="iso-8859-1"): 87 if out is None:88 import sys89 out = sys.stdout90 112 handler.ContentHandler.__init__(self) 91 self._out = out 113 out = _gettextwriter(out, encoding) 114 self._write = out.write 115 self._flush = out.flush 92 116 self._ns_contexts = [{}] # contains uri -> prefix dicts 93 117 self._current_context = self._ns_contexts[-1] … … 95 119 self._encoding = encoding 96 120 97 def _write(self, text):98 if isinstance(text, str):99 self._out.write(text)100 else:101 self._out.write(text.encode(self._encoding, _error_handling))102 103 121 def _qname(self, name): 104 122 """Builds a qualified name from a (ns_url, localname) pair""" 105 123 if name[0]: 124 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is 125 # bound by definition to http://www.w3.org/XML/1998/namespace. It 126 # does not need to be declared and will not usually be found in 127 # self._current_context. 128 if 'http://www.w3.org/XML/1998/namespace' == name[0]: 129 return 'xml:' + name[1] 106 130 # The name is in a non-empty namespace 107 131 prefix = self._current_context[name[0]] … … 115 139 116 140 def startDocument(self): 117 self._write( '<?xml version="1.0" encoding="%s"?>\n' %141 self._write(u'<?xml version="1.0" encoding="%s"?>\n' % 118 142 self._encoding) 143 144 def endDocument(self): 145 self._flush() 119 146 120 147 def startPrefixMapping(self, prefix, uri): … … 128 155 129 156 def startElement(self, name, attrs): 130 self._write( '<' + name)157 self._write(u'<' + name) 131 158 for (name, value) in attrs.items(): 132 self._write( ' %s=%s' % (name, quoteattr(value)))133 self._write( '>')159 self._write(u' %s=%s' % (name, quoteattr(value))) 160 self._write(u'>') 134 161 135 162 def endElement(self, name): 136 self._write( '</%s>' % name)163 self._write(u'</%s>' % name) 137 164 138 165 def startElementNS(self, name, qname, attrs): 139 self._write( '<' + self._qname(name))166 self._write(u'<' + self._qname(name)) 140 167 141 168 for prefix, uri in self._undeclared_ns_maps: 142 169 if prefix: 143 self._ out.write(' xmlns:%s="%s"' % (prefix, uri))170 self._write(u' xmlns:%s="%s"' % (prefix, uri)) 144 171 else: 145 self._ out.write(' xmlns="%s"' % uri)172 self._write(u' xmlns="%s"' % uri) 146 173 self._undeclared_ns_maps = [] 147 174 148 175 for (name, value) in attrs.items(): 149 self._write( ' %s=%s' % (self._qname(name), quoteattr(value)))150 self._write( '>')176 self._write(u' %s=%s' % (self._qname(name), quoteattr(value))) 177 self._write(u'>') 151 178 152 179 def endElementNS(self, name, qname): 153 self._write( '</%s>' % self._qname(name))180 self._write(u'</%s>' % self._qname(name)) 154 181 155 182 def characters(self, content): 183 if not isinstance(content, unicode): 184 content = unicode(content, self._encoding) 156 185 self._write(escape(content)) 157 186 158 187 def ignorableWhitespace(self, content): 188 if not isinstance(content, unicode): 189 content = unicode(content, self._encoding) 159 190 self._write(content) 160 191 161 192 def processingInstruction(self, target, data): 162 self._write( '<?%s %s?>' % (target, data))193 self._write(u'<?%s %s?>' % (target, data)) 163 194 164 195 … … 288 319 289 320 if source.getByteStream() is None: 290 sysid = source.getSystemId() 291 basehead = os.path.dirname(os.path.normpath(base)) 292 sysidfilename = os.path.join(basehead, sysid) 293 if os.path.isfile(sysidfilename): 321 try: 322 sysid = source.getSystemId() 323 basehead = os.path.dirname(os.path.normpath(base)) 324 encoding = sys.getfilesystemencoding() 325 if isinstance(sysid, unicode): 326 if not isinstance(basehead, unicode): 327 try: 328 basehead = basehead.decode(encoding) 329 except UnicodeDecodeError: 330 sysid = sysid.encode(encoding) 331 else: 332 if isinstance(basehead, unicode): 333 try: 334 sysid = sysid.decode(encoding) 335 except UnicodeDecodeError: 336 basehead = basehead.encode(encoding) 337 sysidfilename = os.path.join(basehead, sysid) 338 isfile = os.path.isfile(sysidfilename) 339 except UnicodeError: 340 isfile = False 341 if isfile: 294 342 source.setSystemId(sysidfilename) 295 343 f = open(sysidfilename, "rb") 296 344 else: 297 source.setSystemId(urlparse.urljoin(base, s ysid))345 source.setSystemId(urlparse.urljoin(base, source.getSystemId())) 298 346 f = urllib.urlopen(source.getSystemId()) 299 347 -
python/trunk/Lib/xml/sax/xmlreader.py
r2 r391 69 69 SAX parsers are not required to provide localization for errors 70 70 and warnings; if they cannot support the requested locale, 71 however, they must throwa SAX exception. Applications may71 however, they must raise a SAX exception. Applications may 72 72 request a locale change in the middle of a parse.""" 73 73 raise SAXNotSupportedException("Locale support not implemented") … … 323 323 324 324 def __contains__(self, name): 325 return self._attrs.has_key(name)325 return name in self._attrs 326 326 327 327 def get(self, name, alternative=None):
Note:
See TracChangeset
for help on using the changeset viewer.