source: python/trunk/Lib/xml/dom/minidom.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 64.7 KB
Line 
1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14 interface
15 * SAX 2 namespaces
16"""
17
18import xml.dom
19
20from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
21from xml.dom.minicompat import *
22from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
23
24# This is used by the ID-cache invalidation checks; the list isn't
25# actually complete, since the nodes being checked will never be the
26# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
27# the node being added or removed, not the node being modified.)
28#
29_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
30 xml.dom.Node.ENTITY_REFERENCE_NODE)
31
32
33class Node(xml.dom.Node):
34 namespaceURI = None # this is non-null only for elements and attributes
35 parentNode = None
36 ownerDocument = None
37 nextSibling = None
38 previousSibling = None
39
40 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
41
42 def __nonzero__(self):
43 return True
44
45 def toxml(self, encoding = None):
46 return self.toprettyxml("", "", encoding)
47
48 def toprettyxml(self, indent="\t", newl="\n", encoding = None):
49 # indent = the indentation string to prepend, per level
50 # newl = the newline string to append
51 writer = _get_StringIO()
52 if encoding is not None:
53 import codecs
54 # Can't use codecs.getwriter to preserve 2.0 compatibility
55 writer = codecs.lookup(encoding)[3](writer)
56 if self.nodeType == Node.DOCUMENT_NODE:
57 # Can pass encoding only to document, to put it into XML header
58 self.writexml(writer, "", indent, newl, encoding)
59 else:
60 self.writexml(writer, "", indent, newl)
61 return writer.getvalue()
62
63 def hasChildNodes(self):
64 if self.childNodes:
65 return True
66 else:
67 return False
68
69 def _get_childNodes(self):
70 return self.childNodes
71
72 def _get_firstChild(self):
73 if self.childNodes:
74 return self.childNodes[0]
75
76 def _get_lastChild(self):
77 if self.childNodes:
78 return self.childNodes[-1]
79
80 def insertBefore(self, newChild, refChild):
81 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
82 for c in tuple(newChild.childNodes):
83 self.insertBefore(c, refChild)
84 ### The DOM does not clearly specify what to return in this case
85 return newChild
86 if newChild.nodeType not in self._child_node_types:
87 raise xml.dom.HierarchyRequestErr(
88 "%s cannot be child of %s" % (repr(newChild), repr(self)))
89 if newChild.parentNode is not None:
90 newChild.parentNode.removeChild(newChild)
91 if refChild is None:
92 self.appendChild(newChild)
93 else:
94 try:
95 index = self.childNodes.index(refChild)
96 except ValueError:
97 raise xml.dom.NotFoundErr()
98 if newChild.nodeType in _nodeTypes_with_children:
99 _clear_id_cache(self)
100 self.childNodes.insert(index, newChild)
101 newChild.nextSibling = refChild
102 refChild.previousSibling = newChild
103 if index:
104 node = self.childNodes[index-1]
105 node.nextSibling = newChild
106 newChild.previousSibling = node
107 else:
108 newChild.previousSibling = None
109 newChild.parentNode = self
110 return newChild
111
112 def appendChild(self, node):
113 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
114 for c in tuple(node.childNodes):
115 self.appendChild(c)
116 ### The DOM does not clearly specify what to return in this case
117 return node
118 if node.nodeType not in self._child_node_types:
119 raise xml.dom.HierarchyRequestErr(
120 "%s cannot be child of %s" % (repr(node), repr(self)))
121 elif node.nodeType in _nodeTypes_with_children:
122 _clear_id_cache(self)
123 if node.parentNode is not None:
124 node.parentNode.removeChild(node)
125 _append_child(self, node)
126 node.nextSibling = None
127 return node
128
129 def replaceChild(self, newChild, oldChild):
130 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
131 refChild = oldChild.nextSibling
132 self.removeChild(oldChild)
133 return self.insertBefore(newChild, refChild)
134 if newChild.nodeType not in self._child_node_types:
135 raise xml.dom.HierarchyRequestErr(
136 "%s cannot be child of %s" % (repr(newChild), repr(self)))
137 if newChild is oldChild:
138 return
139 if newChild.parentNode is not None:
140 newChild.parentNode.removeChild(newChild)
141 try:
142 index = self.childNodes.index(oldChild)
143 except ValueError:
144 raise xml.dom.NotFoundErr()
145 self.childNodes[index] = newChild
146 newChild.parentNode = self
147 oldChild.parentNode = None
148 if (newChild.nodeType in _nodeTypes_with_children
149 or oldChild.nodeType in _nodeTypes_with_children):
150 _clear_id_cache(self)
151 newChild.nextSibling = oldChild.nextSibling
152 newChild.previousSibling = oldChild.previousSibling
153 oldChild.nextSibling = None
154 oldChild.previousSibling = None
155 if newChild.previousSibling:
156 newChild.previousSibling.nextSibling = newChild
157 if newChild.nextSibling:
158 newChild.nextSibling.previousSibling = newChild
159 return oldChild
160
161 def removeChild(self, oldChild):
162 try:
163 self.childNodes.remove(oldChild)
164 except ValueError:
165 raise xml.dom.NotFoundErr()
166 if oldChild.nextSibling is not None:
167 oldChild.nextSibling.previousSibling = oldChild.previousSibling
168 if oldChild.previousSibling is not None:
169 oldChild.previousSibling.nextSibling = oldChild.nextSibling
170 oldChild.nextSibling = oldChild.previousSibling = None
171 if oldChild.nodeType in _nodeTypes_with_children:
172 _clear_id_cache(self)
173
174 oldChild.parentNode = None
175 return oldChild
176
177 def normalize(self):
178 L = []
179 for child in self.childNodes:
180 if child.nodeType == Node.TEXT_NODE:
181 if not child.data:
182 # empty text node; discard
183 if L:
184 L[-1].nextSibling = child.nextSibling
185 if child.nextSibling:
186 child.nextSibling.previousSibling = child.previousSibling
187 child.unlink()
188 elif L and L[-1].nodeType == child.nodeType:
189 # collapse text node
190 node = L[-1]
191 node.data = node.data + child.data
192 node.nextSibling = child.nextSibling
193 if child.nextSibling:
194 child.nextSibling.previousSibling = node
195 child.unlink()
196 else:
197 L.append(child)
198 else:
199 L.append(child)
200 if child.nodeType == Node.ELEMENT_NODE:
201 child.normalize()
202 self.childNodes[:] = L
203
204 def cloneNode(self, deep):
205 return _clone_node(self, deep, self.ownerDocument or self)
206
207 def isSupported(self, feature, version):
208 return self.ownerDocument.implementation.hasFeature(feature, version)
209
210 def _get_localName(self):
211 # Overridden in Element and Attr where localName can be Non-Null
212 return None
213
214 # Node interfaces from Level 3 (WD 9 April 2002)
215
216 def isSameNode(self, other):
217 return self is other
218
219 def getInterface(self, feature):
220 if self.isSupported(feature, None):
221 return self
222 else:
223 return None
224
225 # The "user data" functions use a dictionary that is only present
226 # if some user data has been set, so be careful not to assume it
227 # exists.
228
229 def getUserData(self, key):
230 try:
231 return self._user_data[key][0]
232 except (AttributeError, KeyError):
233 return None
234
235 def setUserData(self, key, data, handler):
236 old = None
237 try:
238 d = self._user_data
239 except AttributeError:
240 d = {}
241 self._user_data = d
242 if key in d:
243 old = d[key][0]
244 if data is None:
245 # ignore handlers passed for None
246 handler = None
247 if old is not None:
248 del d[key]
249 else:
250 d[key] = (data, handler)
251 return old
252
253 def _call_user_data_handler(self, operation, src, dst):
254 if hasattr(self, "_user_data"):
255 for key, (data, handler) in self._user_data.items():
256 if handler is not None:
257 handler.handle(operation, key, data, src, dst)
258
259 # minidom-specific API:
260
261 def unlink(self):
262 self.parentNode = self.ownerDocument = None
263 if self.childNodes:
264 for child in self.childNodes:
265 child.unlink()
266 self.childNodes = NodeList()
267 self.previousSibling = None
268 self.nextSibling = None
269
270defproperty(Node, "firstChild", doc="First child node, or None.")
271defproperty(Node, "lastChild", doc="Last child node, or None.")
272defproperty(Node, "localName", doc="Namespace-local name of this node.")
273
274
275def _append_child(self, node):
276 # fast path with less checks; usable by DOM builders if careful
277 childNodes = self.childNodes
278 if childNodes:
279 last = childNodes[-1]
280 node.__dict__["previousSibling"] = last
281 last.__dict__["nextSibling"] = node
282 childNodes.append(node)
283 node.__dict__["parentNode"] = self
284
285def _in_document(node):
286 # return True iff node is part of a document tree
287 while node is not None:
288 if node.nodeType == Node.DOCUMENT_NODE:
289 return True
290 node = node.parentNode
291 return False
292
293def _write_data(writer, data):
294 "Writes datachars to writer."
295 if data:
296 data = data.replace("&", "&amp;").replace("<", "&lt;"). \
297 replace("\"", "&quot;").replace(">", "&gt;")
298 writer.write(data)
299
300def _get_elements_by_tagName_helper(parent, name, rc):
301 for node in parent.childNodes:
302 if node.nodeType == Node.ELEMENT_NODE and \
303 (name == "*" or node.tagName == name):
304 rc.append(node)
305 _get_elements_by_tagName_helper(node, name, rc)
306 return rc
307
308def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
309 for node in parent.childNodes:
310 if node.nodeType == Node.ELEMENT_NODE:
311 if ((localName == "*" or node.localName == localName) and
312 (nsURI == "*" or node.namespaceURI == nsURI)):
313 rc.append(node)
314 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
315 return rc
316
317class DocumentFragment(Node):
318 nodeType = Node.DOCUMENT_FRAGMENT_NODE
319 nodeName = "#document-fragment"
320 nodeValue = None
321 attributes = None
322 parentNode = None
323 _child_node_types = (Node.ELEMENT_NODE,
324 Node.TEXT_NODE,
325 Node.CDATA_SECTION_NODE,
326 Node.ENTITY_REFERENCE_NODE,
327 Node.PROCESSING_INSTRUCTION_NODE,
328 Node.COMMENT_NODE,
329 Node.NOTATION_NODE)
330
331 def __init__(self):
332 self.childNodes = NodeList()
333
334
335class Attr(Node):
336 nodeType = Node.ATTRIBUTE_NODE
337 attributes = None
338 ownerElement = None
339 specified = False
340 _is_id = False
341
342 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
343
344 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
345 prefix=None):
346 # skip setattr for performance
347 d = self.__dict__
348 d["nodeName"] = d["name"] = qName
349 d["namespaceURI"] = namespaceURI
350 d["prefix"] = prefix
351 d['childNodes'] = NodeList()
352
353 # Add the single child node that represents the value of the attr
354 self.childNodes.append(Text())
355
356 # nodeValue and value are set elsewhere
357
358 def _get_localName(self):
359 return self.nodeName.split(":", 1)[-1]
360
361 def _get_specified(self):
362 return self.specified
363
364 def __setattr__(self, name, value):
365 d = self.__dict__
366 if name in ("value", "nodeValue"):
367 d["value"] = d["nodeValue"] = value
368 d2 = self.childNodes[0].__dict__
369 d2["data"] = d2["nodeValue"] = value
370 if self.ownerElement is not None:
371 _clear_id_cache(self.ownerElement)
372 elif name in ("name", "nodeName"):
373 d["name"] = d["nodeName"] = value
374 if self.ownerElement is not None:
375 _clear_id_cache(self.ownerElement)
376 else:
377 d[name] = value
378
379 def _set_prefix(self, prefix):
380 nsuri = self.namespaceURI
381 if prefix == "xmlns":
382 if nsuri and nsuri != XMLNS_NAMESPACE:
383 raise xml.dom.NamespaceErr(
384 "illegal use of 'xmlns' prefix for the wrong namespace")
385 d = self.__dict__
386 d['prefix'] = prefix
387 if prefix is None:
388 newName = self.localName
389 else:
390 newName = "%s:%s" % (prefix, self.localName)
391 if self.ownerElement:
392 _clear_id_cache(self.ownerElement)
393 d['nodeName'] = d['name'] = newName
394
395 def _set_value(self, value):
396 d = self.__dict__
397 d['value'] = d['nodeValue'] = value
398 if self.ownerElement:
399 _clear_id_cache(self.ownerElement)
400 self.childNodes[0].data = value
401
402 def unlink(self):
403 # This implementation does not call the base implementation
404 # since most of that is not needed, and the expense of the
405 # method call is not warranted. We duplicate the removal of
406 # children, but that's all we needed from the base class.
407 elem = self.ownerElement
408 if elem is not None:
409 del elem._attrs[self.nodeName]
410 del elem._attrsNS[(self.namespaceURI, self.localName)]
411 if self._is_id:
412 self._is_id = False
413 elem._magic_id_nodes -= 1
414 self.ownerDocument._magic_id_count -= 1
415 for child in self.childNodes:
416 child.unlink()
417 del self.childNodes[:]
418
419 def _get_isId(self):
420 if self._is_id:
421 return True
422 doc = self.ownerDocument
423 elem = self.ownerElement
424 if doc is None or elem is None:
425 return False
426
427 info = doc._get_elem_info(elem)
428 if info is None:
429 return False
430 if self.namespaceURI:
431 return info.isIdNS(self.namespaceURI, self.localName)
432 else:
433 return info.isId(self.nodeName)
434
435 def _get_schemaType(self):
436 doc = self.ownerDocument
437 elem = self.ownerElement
438 if doc is None or elem is None:
439 return _no_type
440
441 info = doc._get_elem_info(elem)
442 if info is None:
443 return _no_type
444 if self.namespaceURI:
445 return info.getAttributeTypeNS(self.namespaceURI, self.localName)
446 else:
447 return info.getAttributeType(self.nodeName)
448
449defproperty(Attr, "isId", doc="True if this attribute is an ID.")
450defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
451defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
452
453
454class NamedNodeMap(object):
455 """The attribute list is a transient interface to the underlying
456 dictionaries. Mutations here will change the underlying element's
457 dictionary.
458
459 Ordering is imposed artificially and does not reflect the order of
460 attributes as found in an input document.
461 """
462
463 __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
464
465 def __init__(self, attrs, attrsNS, ownerElement):
466 self._attrs = attrs
467 self._attrsNS = attrsNS
468 self._ownerElement = ownerElement
469
470 def _get_length(self):
471 return len(self._attrs)
472
473 def item(self, index):
474 try:
475 return self[self._attrs.keys()[index]]
476 except IndexError:
477 return None
478
479 def items(self):
480 L = []
481 for node in self._attrs.values():
482 L.append((node.nodeName, node.value))
483 return L
484
485 def itemsNS(self):
486 L = []
487 for node in self._attrs.values():
488 L.append(((node.namespaceURI, node.localName), node.value))
489 return L
490
491 def has_key(self, key):
492 if isinstance(key, StringTypes):
493 return key in self._attrs
494 else:
495 return key in self._attrsNS
496
497 def keys(self):
498 return self._attrs.keys()
499
500 def keysNS(self):
501 return self._attrsNS.keys()
502
503 def values(self):
504 return self._attrs.values()
505
506 def get(self, name, value=None):
507 return self._attrs.get(name, value)
508
509 __len__ = _get_length
510
511 __hash__ = None # Mutable type can't be correctly hashed
512 def __cmp__(self, other):
513 if self._attrs is getattr(other, "_attrs", None):
514 return 0
515 else:
516 return cmp(id(self), id(other))
517
518 def __getitem__(self, attname_or_tuple):
519 if isinstance(attname_or_tuple, tuple):
520 return self._attrsNS[attname_or_tuple]
521 else:
522 return self._attrs[attname_or_tuple]
523
524 # same as set
525 def __setitem__(self, attname, value):
526 if isinstance(value, StringTypes):
527 try:
528 node = self._attrs[attname]
529 except KeyError:
530 node = Attr(attname)
531 node.ownerDocument = self._ownerElement.ownerDocument
532 self.setNamedItem(node)
533 node.value = value
534 else:
535 if not isinstance(value, Attr):
536 raise TypeError, "value must be a string or Attr object"
537 node = value
538 self.setNamedItem(node)
539
540 def getNamedItem(self, name):
541 try:
542 return self._attrs[name]
543 except KeyError:
544 return None
545
546 def getNamedItemNS(self, namespaceURI, localName):
547 try:
548 return self._attrsNS[(namespaceURI, localName)]
549 except KeyError:
550 return None
551
552 def removeNamedItem(self, name):
553 n = self.getNamedItem(name)
554 if n is not None:
555 _clear_id_cache(self._ownerElement)
556 del self._attrs[n.nodeName]
557 del self._attrsNS[(n.namespaceURI, n.localName)]
558 if 'ownerElement' in n.__dict__:
559 n.__dict__['ownerElement'] = None
560 return n
561 else:
562 raise xml.dom.NotFoundErr()
563
564 def removeNamedItemNS(self, namespaceURI, localName):
565 n = self.getNamedItemNS(namespaceURI, localName)
566 if n is not None:
567 _clear_id_cache(self._ownerElement)
568 del self._attrsNS[(n.namespaceURI, n.localName)]
569 del self._attrs[n.nodeName]
570 if 'ownerElement' in n.__dict__:
571 n.__dict__['ownerElement'] = None
572 return n
573 else:
574 raise xml.dom.NotFoundErr()
575
576 def setNamedItem(self, node):
577 if not isinstance(node, Attr):
578 raise xml.dom.HierarchyRequestErr(
579 "%s cannot be child of %s" % (repr(node), repr(self)))
580 old = self._attrs.get(node.name)
581 if old:
582 old.unlink()
583 self._attrs[node.name] = node
584 self._attrsNS[(node.namespaceURI, node.localName)] = node
585 node.ownerElement = self._ownerElement
586 _clear_id_cache(node.ownerElement)
587 return old
588
589 def setNamedItemNS(self, node):
590 return self.setNamedItem(node)
591
592 def __delitem__(self, attname_or_tuple):
593 node = self[attname_or_tuple]
594 _clear_id_cache(node.ownerElement)
595 node.unlink()
596
597 def __getstate__(self):
598 return self._attrs, self._attrsNS, self._ownerElement
599
600 def __setstate__(self, state):
601 self._attrs, self._attrsNS, self._ownerElement = state
602
603defproperty(NamedNodeMap, "length",
604 doc="Number of nodes in the NamedNodeMap.")
605
606AttributeList = NamedNodeMap
607
608
609class TypeInfo(object):
610 __slots__ = 'namespace', 'name'
611
612 def __init__(self, namespace, name):
613 self.namespace = namespace
614 self.name = name
615
616 def __repr__(self):
617 if self.namespace:
618 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
619 else:
620 return "<TypeInfo %r>" % self.name
621
622 def _get_name(self):
623 return self.name
624
625 def _get_namespace(self):
626 return self.namespace
627
628_no_type = TypeInfo(None, None)
629
630class Element(Node):
631 nodeType = Node.ELEMENT_NODE
632 nodeValue = None
633 schemaType = _no_type
634
635 _magic_id_nodes = 0
636
637 _child_node_types = (Node.ELEMENT_NODE,
638 Node.PROCESSING_INSTRUCTION_NODE,
639 Node.COMMENT_NODE,
640 Node.TEXT_NODE,
641 Node.CDATA_SECTION_NODE,
642 Node.ENTITY_REFERENCE_NODE)
643
644 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
645 localName=None):
646 self.tagName = self.nodeName = tagName
647 self.prefix = prefix
648 self.namespaceURI = namespaceURI
649 self.childNodes = NodeList()
650
651 self._attrs = {} # attributes are double-indexed:
652 self._attrsNS = {} # tagName -> Attribute
653 # URI,localName -> Attribute
654 # in the future: consider lazy generation
655 # of attribute objects this is too tricky
656 # for now because of headaches with
657 # namespaces.
658
659 def _get_localName(self):
660 return self.tagName.split(":", 1)[-1]
661
662 def _get_tagName(self):
663 return self.tagName
664
665 def unlink(self):
666 for attr in self._attrs.values():
667 attr.unlink()
668 self._attrs = None
669 self._attrsNS = None
670 Node.unlink(self)
671
672 def getAttribute(self, attname):
673 try:
674 return self._attrs[attname].value
675 except KeyError:
676 return ""
677
678 def getAttributeNS(self, namespaceURI, localName):
679 try:
680 return self._attrsNS[(namespaceURI, localName)].value
681 except KeyError:
682 return ""
683
684 def setAttribute(self, attname, value):
685 attr = self.getAttributeNode(attname)
686 if attr is None:
687 attr = Attr(attname)
688 # for performance
689 d = attr.__dict__
690 d["value"] = d["nodeValue"] = value
691 d["ownerDocument"] = self.ownerDocument
692 self.setAttributeNode(attr)
693 elif value != attr.value:
694 d = attr.__dict__
695 d["value"] = d["nodeValue"] = value
696 if attr.isId:
697 _clear_id_cache(self)
698
699 def setAttributeNS(self, namespaceURI, qualifiedName, value):
700 prefix, localname = _nssplit(qualifiedName)
701 attr = self.getAttributeNodeNS(namespaceURI, localname)
702 if attr is None:
703 # for performance
704 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
705 d = attr.__dict__
706 d["prefix"] = prefix
707 d["nodeName"] = qualifiedName
708 d["value"] = d["nodeValue"] = value
709 d["ownerDocument"] = self.ownerDocument
710 self.setAttributeNode(attr)
711 else:
712 d = attr.__dict__
713 if value != attr.value:
714 d["value"] = d["nodeValue"] = value
715 if attr.isId:
716 _clear_id_cache(self)
717 if attr.prefix != prefix:
718 d["prefix"] = prefix
719 d["nodeName"] = qualifiedName
720
721 def getAttributeNode(self, attrname):
722 return self._attrs.get(attrname)
723
724 def getAttributeNodeNS(self, namespaceURI, localName):
725 return self._attrsNS.get((namespaceURI, localName))
726
727 def setAttributeNode(self, attr):
728 if attr.ownerElement not in (None, self):
729 raise xml.dom.InuseAttributeErr("attribute node already owned")
730 old1 = self._attrs.get(attr.name, None)
731 if old1 is not None:
732 self.removeAttributeNode(old1)
733 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
734 if old2 is not None and old2 is not old1:
735 self.removeAttributeNode(old2)
736 _set_attribute_node(self, attr)
737
738 if old1 is not attr:
739 # It might have already been part of this node, in which case
740 # it doesn't represent a change, and should not be returned.
741 return old1
742 if old2 is not attr:
743 return old2
744
745 setAttributeNodeNS = setAttributeNode
746
747 def removeAttribute(self, name):
748 try:
749 attr = self._attrs[name]
750 except KeyError:
751 raise xml.dom.NotFoundErr()
752 self.removeAttributeNode(attr)
753
754 def removeAttributeNS(self, namespaceURI, localName):
755 try:
756 attr = self._attrsNS[(namespaceURI, localName)]
757 except KeyError:
758 raise xml.dom.NotFoundErr()
759 self.removeAttributeNode(attr)
760
761 def removeAttributeNode(self, node):
762 if node is None:
763 raise xml.dom.NotFoundErr()
764 try:
765 self._attrs[node.name]
766 except KeyError:
767 raise xml.dom.NotFoundErr()
768 _clear_id_cache(self)
769 node.unlink()
770 # Restore this since the node is still useful and otherwise
771 # unlinked
772 node.ownerDocument = self.ownerDocument
773
774 removeAttributeNodeNS = removeAttributeNode
775
776 def hasAttribute(self, name):
777 return name in self._attrs
778
779 def hasAttributeNS(self, namespaceURI, localName):
780 return (namespaceURI, localName) in self._attrsNS
781
782 def getElementsByTagName(self, name):
783 return _get_elements_by_tagName_helper(self, name, NodeList())
784
785 def getElementsByTagNameNS(self, namespaceURI, localName):
786 return _get_elements_by_tagName_ns_helper(
787 self, namespaceURI, localName, NodeList())
788
789 def __repr__(self):
790 return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
791
792 def writexml(self, writer, indent="", addindent="", newl=""):
793 # indent = current indentation
794 # addindent = indentation to add to higher levels
795 # newl = newline string
796 writer.write(indent+"<" + self.tagName)
797
798 attrs = self._get_attributes()
799 a_names = attrs.keys()
800 a_names.sort()
801
802 for a_name in a_names:
803 writer.write(" %s=\"" % a_name)
804 _write_data(writer, attrs[a_name].value)
805 writer.write("\"")
806 if self.childNodes:
807 writer.write(">")
808 if (len(self.childNodes) == 1 and
809 self.childNodes[0].nodeType == Node.TEXT_NODE):
810 self.childNodes[0].writexml(writer, '', '', '')
811 else:
812 writer.write(newl)
813 for node in self.childNodes:
814 node.writexml(writer, indent+addindent, addindent, newl)
815 writer.write(indent)
816 writer.write("</%s>%s" % (self.tagName, newl))
817 else:
818 writer.write("/>%s"%(newl))
819
820 def _get_attributes(self):
821 return NamedNodeMap(self._attrs, self._attrsNS, self)
822
823 def hasAttributes(self):
824 if self._attrs:
825 return True
826 else:
827 return False
828
829 # DOM Level 3 attributes, based on the 22 Oct 2002 draft
830
831 def setIdAttribute(self, name):
832 idAttr = self.getAttributeNode(name)
833 self.setIdAttributeNode(idAttr)
834
835 def setIdAttributeNS(self, namespaceURI, localName):
836 idAttr = self.getAttributeNodeNS(namespaceURI, localName)
837 self.setIdAttributeNode(idAttr)
838
839 def setIdAttributeNode(self, idAttr):
840 if idAttr is None or not self.isSameNode(idAttr.ownerElement):
841 raise xml.dom.NotFoundErr()
842 if _get_containing_entref(self) is not None:
843 raise xml.dom.NoModificationAllowedErr()
844 if not idAttr._is_id:
845 idAttr.__dict__['_is_id'] = True
846 self._magic_id_nodes += 1
847 self.ownerDocument._magic_id_count += 1
848 _clear_id_cache(self)
849
850defproperty(Element, "attributes",
851 doc="NamedNodeMap of attributes on the element.")
852defproperty(Element, "localName",
853 doc="Namespace-local name of this element.")
854
855
856def _set_attribute_node(element, attr):
857 _clear_id_cache(element)
858 element._attrs[attr.name] = attr
859 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
860
861 # This creates a circular reference, but Element.unlink()
862 # breaks the cycle since the references to the attribute
863 # dictionaries are tossed.
864 attr.__dict__['ownerElement'] = element
865
866
867class Childless:
868 """Mixin that makes childless-ness easy to implement and avoids
869 the complexity of the Node methods that deal with children.
870 """
871
872 attributes = None
873 childNodes = EmptyNodeList()
874 firstChild = None
875 lastChild = None
876
877 def _get_firstChild(self):
878 return None
879
880 def _get_lastChild(self):
881 return None
882
883 def appendChild(self, node):
884 raise xml.dom.HierarchyRequestErr(
885 self.nodeName + " nodes cannot have children")
886
887 def hasChildNodes(self):
888 return False
889
890 def insertBefore(self, newChild, refChild):
891 raise xml.dom.HierarchyRequestErr(
892 self.nodeName + " nodes do not have children")
893
894 def removeChild(self, oldChild):
895 raise xml.dom.NotFoundErr(
896 self.nodeName + " nodes do not have children")
897
898 def normalize(self):
899 # For childless nodes, normalize() has nothing to do.
900 pass
901
902 def replaceChild(self, newChild, oldChild):
903 raise xml.dom.HierarchyRequestErr(
904 self.nodeName + " nodes do not have children")
905
906
907class ProcessingInstruction(Childless, Node):
908 nodeType = Node.PROCESSING_INSTRUCTION_NODE
909
910 def __init__(self, target, data):
911 self.target = self.nodeName = target
912 self.data = self.nodeValue = data
913
914 def _get_data(self):
915 return self.data
916 def _set_data(self, value):
917 d = self.__dict__
918 d['data'] = d['nodeValue'] = value
919
920 def _get_target(self):
921 return self.target
922 def _set_target(self, value):
923 d = self.__dict__
924 d['target'] = d['nodeName'] = value
925
926 def __setattr__(self, name, value):
927 if name == "data" or name == "nodeValue":
928 self.__dict__['data'] = self.__dict__['nodeValue'] = value
929 elif name == "target" or name == "nodeName":
930 self.__dict__['target'] = self.__dict__['nodeName'] = value
931 else:
932 self.__dict__[name] = value
933
934 def writexml(self, writer, indent="", addindent="", newl=""):
935 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
936
937
938class CharacterData(Childless, Node):
939 def _get_length(self):
940 return len(self.data)
941 __len__ = _get_length
942
943 def _get_data(self):
944 return self.__dict__['data']
945 def _set_data(self, data):
946 d = self.__dict__
947 d['data'] = d['nodeValue'] = data
948
949 _get_nodeValue = _get_data
950 _set_nodeValue = _set_data
951
952 def __setattr__(self, name, value):
953 if name == "data" or name == "nodeValue":
954 self.__dict__['data'] = self.__dict__['nodeValue'] = value
955 else:
956 self.__dict__[name] = value
957
958 def __repr__(self):
959 data = self.data
960 if len(data) > 10:
961 dotdotdot = "..."
962 else:
963 dotdotdot = ""
964 return '<DOM %s node "%r%s">' % (
965 self.__class__.__name__, data[0:10], dotdotdot)
966
967 def substringData(self, offset, count):
968 if offset < 0:
969 raise xml.dom.IndexSizeErr("offset cannot be negative")
970 if offset >= len(self.data):
971 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
972 if count < 0:
973 raise xml.dom.IndexSizeErr("count cannot be negative")
974 return self.data[offset:offset+count]
975
976 def appendData(self, arg):
977 self.data = self.data + arg
978
979 def insertData(self, offset, arg):
980 if offset < 0:
981 raise xml.dom.IndexSizeErr("offset cannot be negative")
982 if offset >= len(self.data):
983 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
984 if arg:
985 self.data = "%s%s%s" % (
986 self.data[:offset], arg, self.data[offset:])
987
988 def deleteData(self, offset, count):
989 if offset < 0:
990 raise xml.dom.IndexSizeErr("offset cannot be negative")
991 if offset >= len(self.data):
992 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
993 if count < 0:
994 raise xml.dom.IndexSizeErr("count cannot be negative")
995 if count:
996 self.data = self.data[:offset] + self.data[offset+count:]
997
998 def replaceData(self, offset, count, arg):
999 if offset < 0:
1000 raise xml.dom.IndexSizeErr("offset cannot be negative")
1001 if offset >= len(self.data):
1002 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1003 if count < 0:
1004 raise xml.dom.IndexSizeErr("count cannot be negative")
1005 if count:
1006 self.data = "%s%s%s" % (
1007 self.data[:offset], arg, self.data[offset+count:])
1008
1009defproperty(CharacterData, "length", doc="Length of the string data.")
1010
1011
1012class Text(CharacterData):
1013 # Make sure we don't add an instance __dict__ if we don't already
1014 # have one, at least when that's possible:
1015 # XXX this does not work, CharacterData is an old-style class
1016 # __slots__ = ()
1017
1018 nodeType = Node.TEXT_NODE
1019 nodeName = "#text"
1020 attributes = None
1021
1022 def splitText(self, offset):
1023 if offset < 0 or offset > len(self.data):
1024 raise xml.dom.IndexSizeErr("illegal offset value")
1025 newText = self.__class__()
1026 newText.data = self.data[offset:]
1027 newText.ownerDocument = self.ownerDocument
1028 next = self.nextSibling
1029 if self.parentNode and self in self.parentNode.childNodes:
1030 if next is None:
1031 self.parentNode.appendChild(newText)
1032 else:
1033 self.parentNode.insertBefore(newText, next)
1034 self.data = self.data[:offset]
1035 return newText
1036
1037 def writexml(self, writer, indent="", addindent="", newl=""):
1038 _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1039
1040 # DOM Level 3 (WD 9 April 2002)
1041
1042 def _get_wholeText(self):
1043 L = [self.data]
1044 n = self.previousSibling
1045 while n is not None:
1046 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1047 L.insert(0, n.data)
1048 n = n.previousSibling
1049 else:
1050 break
1051 n = self.nextSibling
1052 while n is not None:
1053 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1054 L.append(n.data)
1055 n = n.nextSibling
1056 else:
1057 break
1058 return ''.join(L)
1059
1060 def replaceWholeText(self, content):
1061 # XXX This needs to be seriously changed if minidom ever
1062 # supports EntityReference nodes.
1063 parent = self.parentNode
1064 n = self.previousSibling
1065 while n is not None:
1066 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1067 next = n.previousSibling
1068 parent.removeChild(n)
1069 n = next
1070 else:
1071 break
1072 n = self.nextSibling
1073 if not content:
1074 parent.removeChild(self)
1075 while n is not None:
1076 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1077 next = n.nextSibling
1078 parent.removeChild(n)
1079 n = next
1080 else:
1081 break
1082 if content:
1083 d = self.__dict__
1084 d['data'] = content
1085 d['nodeValue'] = content
1086 return self
1087 else:
1088 return None
1089
1090 def _get_isWhitespaceInElementContent(self):
1091 if self.data.strip():
1092 return False
1093 elem = _get_containing_element(self)
1094 if elem is None:
1095 return False
1096 info = self.ownerDocument._get_elem_info(elem)
1097 if info is None:
1098 return False
1099 else:
1100 return info.isElementContent()
1101
1102defproperty(Text, "isWhitespaceInElementContent",
1103 doc="True iff this text node contains only whitespace"
1104 " and is in element content.")
1105defproperty(Text, "wholeText",
1106 doc="The text of all logically-adjacent text nodes.")
1107
1108
1109def _get_containing_element(node):
1110 c = node.parentNode
1111 while c is not None:
1112 if c.nodeType == Node.ELEMENT_NODE:
1113 return c
1114 c = c.parentNode
1115 return None
1116
1117def _get_containing_entref(node):
1118 c = node.parentNode
1119 while c is not None:
1120 if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1121 return c
1122 c = c.parentNode
1123 return None
1124
1125
1126class Comment(Childless, CharacterData):
1127 nodeType = Node.COMMENT_NODE
1128 nodeName = "#comment"
1129
1130 def __init__(self, data):
1131 self.data = self.nodeValue = data
1132
1133 def writexml(self, writer, indent="", addindent="", newl=""):
1134 if "--" in self.data:
1135 raise ValueError("'--' is not allowed in a comment node")
1136 writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1137
1138
1139class CDATASection(Text):
1140 # Make sure we don't add an instance __dict__ if we don't already
1141 # have one, at least when that's possible:
1142 # XXX this does not work, Text is an old-style class
1143 # __slots__ = ()
1144
1145 nodeType = Node.CDATA_SECTION_NODE
1146 nodeName = "#cdata-section"
1147
1148 def writexml(self, writer, indent="", addindent="", newl=""):
1149 if self.data.find("]]>") >= 0:
1150 raise ValueError("']]>' not allowed in a CDATA section")
1151 writer.write("<![CDATA[%s]]>" % self.data)
1152
1153
1154class ReadOnlySequentialNamedNodeMap(object):
1155 __slots__ = '_seq',
1156
1157 def __init__(self, seq=()):
1158 # seq should be a list or tuple
1159 self._seq = seq
1160
1161 def __len__(self):
1162 return len(self._seq)
1163
1164 def _get_length(self):
1165 return len(self._seq)
1166
1167 def getNamedItem(self, name):
1168 for n in self._seq:
1169 if n.nodeName == name:
1170 return n
1171
1172 def getNamedItemNS(self, namespaceURI, localName):
1173 for n in self._seq:
1174 if n.namespaceURI == namespaceURI and n.localName == localName:
1175 return n
1176
1177 def __getitem__(self, name_or_tuple):
1178 if isinstance(name_or_tuple, tuple):
1179 node = self.getNamedItemNS(*name_or_tuple)
1180 else:
1181 node = self.getNamedItem(name_or_tuple)
1182 if node is None:
1183 raise KeyError, name_or_tuple
1184 return node
1185
1186 def item(self, index):
1187 if index < 0:
1188 return None
1189 try:
1190 return self._seq[index]
1191 except IndexError:
1192 return None
1193
1194 def removeNamedItem(self, name):
1195 raise xml.dom.NoModificationAllowedErr(
1196 "NamedNodeMap instance is read-only")
1197
1198 def removeNamedItemNS(self, namespaceURI, localName):
1199 raise xml.dom.NoModificationAllowedErr(
1200 "NamedNodeMap instance is read-only")
1201
1202 def setNamedItem(self, node):
1203 raise xml.dom.NoModificationAllowedErr(
1204 "NamedNodeMap instance is read-only")
1205
1206 def setNamedItemNS(self, node):
1207 raise xml.dom.NoModificationAllowedErr(
1208 "NamedNodeMap instance is read-only")
1209
1210 def __getstate__(self):
1211 return [self._seq]
1212
1213 def __setstate__(self, state):
1214 self._seq = state[0]
1215
1216defproperty(ReadOnlySequentialNamedNodeMap, "length",
1217 doc="Number of entries in the NamedNodeMap.")
1218
1219
1220class Identified:
1221 """Mix-in class that supports the publicId and systemId attributes."""
1222
1223 # XXX this does not work, this is an old-style class
1224 # __slots__ = 'publicId', 'systemId'
1225
1226 def _identified_mixin_init(self, publicId, systemId):
1227 self.publicId = publicId
1228 self.systemId = systemId
1229
1230 def _get_publicId(self):
1231 return self.publicId
1232
1233 def _get_systemId(self):
1234 return self.systemId
1235
1236class DocumentType(Identified, Childless, Node):
1237 nodeType = Node.DOCUMENT_TYPE_NODE
1238 nodeValue = None
1239 name = None
1240 publicId = None
1241 systemId = None
1242 internalSubset = None
1243
1244 def __init__(self, qualifiedName):
1245 self.entities = ReadOnlySequentialNamedNodeMap()
1246 self.notations = ReadOnlySequentialNamedNodeMap()
1247 if qualifiedName:
1248 prefix, localname = _nssplit(qualifiedName)
1249 self.name = localname
1250 self.nodeName = self.name
1251
1252 def _get_internalSubset(self):
1253 return self.internalSubset
1254
1255 def cloneNode(self, deep):
1256 if self.ownerDocument is None:
1257 # it's ok
1258 clone = DocumentType(None)
1259 clone.name = self.name
1260 clone.nodeName = self.name
1261 operation = xml.dom.UserDataHandler.NODE_CLONED
1262 if deep:
1263 clone.entities._seq = []
1264 clone.notations._seq = []
1265 for n in self.notations._seq:
1266 notation = Notation(n.nodeName, n.publicId, n.systemId)
1267 clone.notations._seq.append(notation)
1268 n._call_user_data_handler(operation, n, notation)
1269 for e in self.entities._seq:
1270 entity = Entity(e.nodeName, e.publicId, e.systemId,
1271 e.notationName)
1272 entity.actualEncoding = e.actualEncoding
1273 entity.encoding = e.encoding
1274 entity.version = e.version
1275 clone.entities._seq.append(entity)
1276 e._call_user_data_handler(operation, n, entity)
1277 self._call_user_data_handler(operation, self, clone)
1278 return clone
1279 else:
1280 return None
1281
1282 def writexml(self, writer, indent="", addindent="", newl=""):
1283 writer.write("<!DOCTYPE ")
1284 writer.write(self.name)
1285 if self.publicId:
1286 writer.write("%s PUBLIC '%s'%s '%s'"
1287 % (newl, self.publicId, newl, self.systemId))
1288 elif self.systemId:
1289 writer.write("%s SYSTEM '%s'" % (newl, self.systemId))
1290 if self.internalSubset is not None:
1291 writer.write(" [")
1292 writer.write(self.internalSubset)
1293 writer.write("]")
1294 writer.write(">"+newl)
1295
1296class Entity(Identified, Node):
1297 attributes = None
1298 nodeType = Node.ENTITY_NODE
1299 nodeValue = None
1300
1301 actualEncoding = None
1302 encoding = None
1303 version = None
1304
1305 def __init__(self, name, publicId, systemId, notation):
1306 self.nodeName = name
1307 self.notationName = notation
1308 self.childNodes = NodeList()
1309 self._identified_mixin_init(publicId, systemId)
1310
1311 def _get_actualEncoding(self):
1312 return self.actualEncoding
1313
1314 def _get_encoding(self):
1315 return self.encoding
1316
1317 def _get_version(self):
1318 return self.version
1319
1320 def appendChild(self, newChild):
1321 raise xml.dom.HierarchyRequestErr(
1322 "cannot append children to an entity node")
1323
1324 def insertBefore(self, newChild, refChild):
1325 raise xml.dom.HierarchyRequestErr(
1326 "cannot insert children below an entity node")
1327
1328 def removeChild(self, oldChild):
1329 raise xml.dom.HierarchyRequestErr(
1330 "cannot remove children from an entity node")
1331
1332 def replaceChild(self, newChild, oldChild):
1333 raise xml.dom.HierarchyRequestErr(
1334 "cannot replace children of an entity node")
1335
1336class Notation(Identified, Childless, Node):
1337 nodeType = Node.NOTATION_NODE
1338 nodeValue = None
1339
1340 def __init__(self, name, publicId, systemId):
1341 self.nodeName = name
1342 self._identified_mixin_init(publicId, systemId)
1343
1344
1345class DOMImplementation(DOMImplementationLS):
1346 _features = [("core", "1.0"),
1347 ("core", "2.0"),
1348 ("core", None),
1349 ("xml", "1.0"),
1350 ("xml", "2.0"),
1351 ("xml", None),
1352 ("ls-load", "3.0"),
1353 ("ls-load", None),
1354 ]
1355
1356 def hasFeature(self, feature, version):
1357 if version == "":
1358 version = None
1359 return (feature.lower(), version) in self._features
1360
1361 def createDocument(self, namespaceURI, qualifiedName, doctype):
1362 if doctype and doctype.parentNode is not None:
1363 raise xml.dom.WrongDocumentErr(
1364 "doctype object owned by another DOM tree")
1365 doc = self._create_document()
1366
1367 add_root_element = not (namespaceURI is None
1368 and qualifiedName is None
1369 and doctype is None)
1370
1371 if not qualifiedName and add_root_element:
1372 # The spec is unclear what to raise here; SyntaxErr
1373 # would be the other obvious candidate. Since Xerces raises
1374 # InvalidCharacterErr, and since SyntaxErr is not listed
1375 # for createDocument, that seems to be the better choice.
1376 # XXX: need to check for illegal characters here and in
1377 # createElement.
1378
1379 # DOM Level III clears this up when talking about the return value
1380 # of this function. If namespaceURI, qName and DocType are
1381 # Null the document is returned without a document element
1382 # Otherwise if doctype or namespaceURI are not None
1383 # Then we go back to the above problem
1384 raise xml.dom.InvalidCharacterErr("Element with no name")
1385
1386 if add_root_element:
1387 prefix, localname = _nssplit(qualifiedName)
1388 if prefix == "xml" \
1389 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1390 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1391 if prefix and not namespaceURI:
1392 raise xml.dom.NamespaceErr(
1393 "illegal use of prefix without namespaces")
1394 element = doc.createElementNS(namespaceURI, qualifiedName)
1395 if doctype:
1396 doc.appendChild(doctype)
1397 doc.appendChild(element)
1398
1399 if doctype:
1400 doctype.parentNode = doctype.ownerDocument = doc
1401
1402 doc.doctype = doctype
1403 doc.implementation = self
1404 return doc
1405
1406 def createDocumentType(self, qualifiedName, publicId, systemId):
1407 doctype = DocumentType(qualifiedName)
1408 doctype.publicId = publicId
1409 doctype.systemId = systemId
1410 return doctype
1411
1412 # DOM Level 3 (WD 9 April 2002)
1413
1414 def getInterface(self, feature):
1415 if self.hasFeature(feature, None):
1416 return self
1417 else:
1418 return None
1419
1420 # internal
1421 def _create_document(self):
1422 return Document()
1423
1424class ElementInfo(object):
1425 """Object that represents content-model information for an element.
1426
1427 This implementation is not expected to be used in practice; DOM
1428 builders should provide implementations which do the right thing
1429 using information available to it.
1430
1431 """
1432
1433 __slots__ = 'tagName',
1434
1435 def __init__(self, name):
1436 self.tagName = name
1437
1438 def getAttributeType(self, aname):
1439 return _no_type
1440
1441 def getAttributeTypeNS(self, namespaceURI, localName):
1442 return _no_type
1443
1444 def isElementContent(self):
1445 return False
1446
1447 def isEmpty(self):
1448 """Returns true iff this element is declared to have an EMPTY
1449 content model."""
1450 return False
1451
1452 def isId(self, aname):
1453 """Returns true iff the named attribute is a DTD-style ID."""
1454 return False
1455
1456 def isIdNS(self, namespaceURI, localName):
1457 """Returns true iff the identified attribute is a DTD-style ID."""
1458 return False
1459
1460 def __getstate__(self):
1461 return self.tagName
1462
1463 def __setstate__(self, state):
1464 self.tagName = state
1465
1466def _clear_id_cache(node):
1467 if node.nodeType == Node.DOCUMENT_NODE:
1468 node._id_cache.clear()
1469 node._id_search_stack = None
1470 elif _in_document(node):
1471 node.ownerDocument._id_cache.clear()
1472 node.ownerDocument._id_search_stack= None
1473
1474class Document(Node, DocumentLS):
1475 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1476 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1477
1478 nodeType = Node.DOCUMENT_NODE
1479 nodeName = "#document"
1480 nodeValue = None
1481 attributes = None
1482 doctype = None
1483 parentNode = None
1484 previousSibling = nextSibling = None
1485
1486 implementation = DOMImplementation()
1487
1488 # Document attributes from Level 3 (WD 9 April 2002)
1489
1490 actualEncoding = None
1491 encoding = None
1492 standalone = None
1493 version = None
1494 strictErrorChecking = False
1495 errorHandler = None
1496 documentURI = None
1497
1498 _magic_id_count = 0
1499
1500 def __init__(self):
1501 self.childNodes = NodeList()
1502 # mapping of (namespaceURI, localName) -> ElementInfo
1503 # and tagName -> ElementInfo
1504 self._elem_info = {}
1505 self._id_cache = {}
1506 self._id_search_stack = None
1507
1508 def _get_elem_info(self, element):
1509 if element.namespaceURI:
1510 key = element.namespaceURI, element.localName
1511 else:
1512 key = element.tagName
1513 return self._elem_info.get(key)
1514
1515 def _get_actualEncoding(self):
1516 return self.actualEncoding
1517
1518 def _get_doctype(self):
1519 return self.doctype
1520
1521 def _get_documentURI(self):
1522 return self.documentURI
1523
1524 def _get_encoding(self):
1525 return self.encoding
1526
1527 def _get_errorHandler(self):
1528 return self.errorHandler
1529
1530 def _get_standalone(self):
1531 return self.standalone
1532
1533 def _get_strictErrorChecking(self):
1534 return self.strictErrorChecking
1535
1536 def _get_version(self):
1537 return self.version
1538
1539 def appendChild(self, node):
1540 if node.nodeType not in self._child_node_types:
1541 raise xml.dom.HierarchyRequestErr(
1542 "%s cannot be child of %s" % (repr(node), repr(self)))
1543 if node.parentNode is not None:
1544 # This needs to be done before the next test since this
1545 # may *be* the document element, in which case it should
1546 # end up re-ordered to the end.
1547 node.parentNode.removeChild(node)
1548
1549 if node.nodeType == Node.ELEMENT_NODE \
1550 and self._get_documentElement():
1551 raise xml.dom.HierarchyRequestErr(
1552 "two document elements disallowed")
1553 return Node.appendChild(self, node)
1554
1555 def removeChild(self, oldChild):
1556 try:
1557 self.childNodes.remove(oldChild)
1558 except ValueError:
1559 raise xml.dom.NotFoundErr()
1560 oldChild.nextSibling = oldChild.previousSibling = None
1561 oldChild.parentNode = None
1562 if self.documentElement is oldChild:
1563 self.documentElement = None
1564
1565 return oldChild
1566
1567 def _get_documentElement(self):
1568 for node in self.childNodes:
1569 if node.nodeType == Node.ELEMENT_NODE:
1570 return node
1571
1572 def unlink(self):
1573 if self.doctype is not None:
1574 self.doctype.unlink()
1575 self.doctype = None
1576 Node.unlink(self)
1577
1578 def cloneNode(self, deep):
1579 if not deep:
1580 return None
1581 clone = self.implementation.createDocument(None, None, None)
1582 clone.encoding = self.encoding
1583 clone.standalone = self.standalone
1584 clone.version = self.version
1585 for n in self.childNodes:
1586 childclone = _clone_node(n, deep, clone)
1587 assert childclone.ownerDocument.isSameNode(clone)
1588 clone.childNodes.append(childclone)
1589 if childclone.nodeType == Node.DOCUMENT_NODE:
1590 assert clone.documentElement is None
1591 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1592 assert clone.doctype is None
1593 clone.doctype = childclone
1594 childclone.parentNode = clone
1595 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1596 self, clone)
1597 return clone
1598
1599 def createDocumentFragment(self):
1600 d = DocumentFragment()
1601 d.ownerDocument = self
1602 return d
1603
1604 def createElement(self, tagName):
1605 e = Element(tagName)
1606 e.ownerDocument = self
1607 return e
1608
1609 def createTextNode(self, data):
1610 if not isinstance(data, StringTypes):
1611 raise TypeError, "node contents must be a string"
1612 t = Text()
1613 t.data = data
1614 t.ownerDocument = self
1615 return t
1616
1617 def createCDATASection(self, data):
1618 if not isinstance(data, StringTypes):
1619 raise TypeError, "node contents must be a string"
1620 c = CDATASection()
1621 c.data = data
1622 c.ownerDocument = self
1623 return c
1624
1625 def createComment(self, data):
1626 c = Comment(data)
1627 c.ownerDocument = self
1628 return c
1629
1630 def createProcessingInstruction(self, target, data):
1631 p = ProcessingInstruction(target, data)
1632 p.ownerDocument = self
1633 return p
1634
1635 def createAttribute(self, qName):
1636 a = Attr(qName)
1637 a.ownerDocument = self
1638 a.value = ""
1639 return a
1640
1641 def createElementNS(self, namespaceURI, qualifiedName):
1642 prefix, localName = _nssplit(qualifiedName)
1643 e = Element(qualifiedName, namespaceURI, prefix)
1644 e.ownerDocument = self
1645 return e
1646
1647 def createAttributeNS(self, namespaceURI, qualifiedName):
1648 prefix, localName = _nssplit(qualifiedName)
1649 a = Attr(qualifiedName, namespaceURI, localName, prefix)
1650 a.ownerDocument = self
1651 a.value = ""
1652 return a
1653
1654 # A couple of implementation-specific helpers to create node types
1655 # not supported by the W3C DOM specs:
1656
1657 def _create_entity(self, name, publicId, systemId, notationName):
1658 e = Entity(name, publicId, systemId, notationName)
1659 e.ownerDocument = self
1660 return e
1661
1662 def _create_notation(self, name, publicId, systemId):
1663 n = Notation(name, publicId, systemId)
1664 n.ownerDocument = self
1665 return n
1666
1667 def getElementById(self, id):
1668 if id in self._id_cache:
1669 return self._id_cache[id]
1670 if not (self._elem_info or self._magic_id_count):
1671 return None
1672
1673 stack = self._id_search_stack
1674 if stack is None:
1675 # we never searched before, or the cache has been cleared
1676 stack = [self.documentElement]
1677 self._id_search_stack = stack
1678 elif not stack:
1679 # Previous search was completed and cache is still valid;
1680 # no matching node.
1681 return None
1682
1683 result = None
1684 while stack:
1685 node = stack.pop()
1686 # add child elements to stack for continued searching
1687 stack.extend([child for child in node.childNodes
1688 if child.nodeType in _nodeTypes_with_children])
1689 # check this node
1690 info = self._get_elem_info(node)
1691 if info:
1692 # We have to process all ID attributes before
1693 # returning in order to get all the attributes set to
1694 # be IDs using Element.setIdAttribute*().
1695 for attr in node.attributes.values():
1696 if attr.namespaceURI:
1697 if info.isIdNS(attr.namespaceURI, attr.localName):
1698 self._id_cache[attr.value] = node
1699 if attr.value == id:
1700 result = node
1701 elif not node._magic_id_nodes:
1702 break
1703 elif info.isId(attr.name):
1704 self._id_cache[attr.value] = node
1705 if attr.value == id:
1706 result = node
1707 elif not node._magic_id_nodes:
1708 break
1709 elif attr._is_id:
1710 self._id_cache[attr.value] = node
1711 if attr.value == id:
1712 result = node
1713 elif node._magic_id_nodes == 1:
1714 break
1715 elif node._magic_id_nodes:
1716 for attr in node.attributes.values():
1717 if attr._is_id:
1718 self._id_cache[attr.value] = node
1719 if attr.value == id:
1720 result = node
1721 if result is not None:
1722 break
1723 return result
1724
1725 def getElementsByTagName(self, name):
1726 return _get_elements_by_tagName_helper(self, name, NodeList())
1727
1728 def getElementsByTagNameNS(self, namespaceURI, localName):
1729 return _get_elements_by_tagName_ns_helper(
1730 self, namespaceURI, localName, NodeList())
1731
1732 def isSupported(self, feature, version):
1733 return self.implementation.hasFeature(feature, version)
1734
1735 def importNode(self, node, deep):
1736 if node.nodeType == Node.DOCUMENT_NODE:
1737 raise xml.dom.NotSupportedErr("cannot import document nodes")
1738 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1739 raise xml.dom.NotSupportedErr("cannot import document type nodes")
1740 return _clone_node(node, deep, self)
1741
1742 def writexml(self, writer, indent="", addindent="", newl="",
1743 encoding = None):
1744 if encoding is None:
1745 writer.write('<?xml version="1.0" ?>'+newl)
1746 else:
1747 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1748 for node in self.childNodes:
1749 node.writexml(writer, indent, addindent, newl)
1750
1751 # DOM Level 3 (WD 9 April 2002)
1752
1753 def renameNode(self, n, namespaceURI, name):
1754 if n.ownerDocument is not self:
1755 raise xml.dom.WrongDocumentErr(
1756 "cannot rename nodes from other documents;\n"
1757 "expected %s,\nfound %s" % (self, n.ownerDocument))
1758 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1759 raise xml.dom.NotSupportedErr(
1760 "renameNode() only applies to element and attribute nodes")
1761 if namespaceURI != EMPTY_NAMESPACE:
1762 if ':' in name:
1763 prefix, localName = name.split(':', 1)
1764 if ( prefix == "xmlns"
1765 and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1766 raise xml.dom.NamespaceErr(
1767 "illegal use of 'xmlns' prefix")
1768 else:
1769 if ( name == "xmlns"
1770 and namespaceURI != xml.dom.XMLNS_NAMESPACE
1771 and n.nodeType == Node.ATTRIBUTE_NODE):
1772 raise xml.dom.NamespaceErr(
1773 "illegal use of the 'xmlns' attribute")
1774 prefix = None
1775 localName = name
1776 else:
1777 prefix = None
1778 localName = None
1779 if n.nodeType == Node.ATTRIBUTE_NODE:
1780 element = n.ownerElement
1781 if element is not None:
1782 is_id = n._is_id
1783 element.removeAttributeNode(n)
1784 else:
1785 element = None
1786 # avoid __setattr__
1787 d = n.__dict__
1788 d['prefix'] = prefix
1789 d['localName'] = localName
1790 d['namespaceURI'] = namespaceURI
1791 d['nodeName'] = name
1792 if n.nodeType == Node.ELEMENT_NODE:
1793 d['tagName'] = name
1794 else:
1795 # attribute node
1796 d['name'] = name
1797 if element is not None:
1798 element.setAttributeNode(n)
1799 if is_id:
1800 element.setIdAttributeNode(n)
1801 # It's not clear from a semantic perspective whether we should
1802 # call the user data handlers for the NODE_RENAMED event since
1803 # we're re-using the existing node. The draft spec has been
1804 # interpreted as meaning "no, don't call the handler unless a
1805 # new node is created."
1806 return n
1807
1808defproperty(Document, "documentElement",
1809 doc="Top-level element of this document.")
1810
1811
1812def _clone_node(node, deep, newOwnerDocument):
1813 """
1814 Clone a node and give it the new owner document.
1815 Called by Node.cloneNode and Document.importNode
1816 """
1817 if node.ownerDocument.isSameNode(newOwnerDocument):
1818 operation = xml.dom.UserDataHandler.NODE_CLONED
1819 else:
1820 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1821 if node.nodeType == Node.ELEMENT_NODE:
1822 clone = newOwnerDocument.createElementNS(node.namespaceURI,
1823 node.nodeName)
1824 for attr in node.attributes.values():
1825 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1826 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1827 a.specified = attr.specified
1828
1829 if deep:
1830 for child in node.childNodes:
1831 c = _clone_node(child, deep, newOwnerDocument)
1832 clone.appendChild(c)
1833
1834 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1835 clone = newOwnerDocument.createDocumentFragment()
1836 if deep:
1837 for child in node.childNodes:
1838 c = _clone_node(child, deep, newOwnerDocument)
1839 clone.appendChild(c)
1840
1841 elif node.nodeType == Node.TEXT_NODE:
1842 clone = newOwnerDocument.createTextNode(node.data)
1843 elif node.nodeType == Node.CDATA_SECTION_NODE:
1844 clone = newOwnerDocument.createCDATASection(node.data)
1845 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1846 clone = newOwnerDocument.createProcessingInstruction(node.target,
1847 node.data)
1848 elif node.nodeType == Node.COMMENT_NODE:
1849 clone = newOwnerDocument.createComment(node.data)
1850 elif node.nodeType == Node.ATTRIBUTE_NODE:
1851 clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1852 node.nodeName)
1853 clone.specified = True
1854 clone.value = node.value
1855 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1856 assert node.ownerDocument is not newOwnerDocument
1857 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1858 clone = newOwnerDocument.implementation.createDocumentType(
1859 node.name, node.publicId, node.systemId)
1860 clone.ownerDocument = newOwnerDocument
1861 if deep:
1862 clone.entities._seq = []
1863 clone.notations._seq = []
1864 for n in node.notations._seq:
1865 notation = Notation(n.nodeName, n.publicId, n.systemId)
1866 notation.ownerDocument = newOwnerDocument
1867 clone.notations._seq.append(notation)
1868 if hasattr(n, '_call_user_data_handler'):
1869 n._call_user_data_handler(operation, n, notation)
1870 for e in node.entities._seq:
1871 entity = Entity(e.nodeName, e.publicId, e.systemId,
1872 e.notationName)
1873 entity.actualEncoding = e.actualEncoding
1874 entity.encoding = e.encoding
1875 entity.version = e.version
1876 entity.ownerDocument = newOwnerDocument
1877 clone.entities._seq.append(entity)
1878 if hasattr(e, '_call_user_data_handler'):
1879 e._call_user_data_handler(operation, n, entity)
1880 else:
1881 # Note the cloning of Document and DocumentType nodes is
1882 # implementation specific. minidom handles those cases
1883 # directly in the cloneNode() methods.
1884 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1885
1886 # Check for _call_user_data_handler() since this could conceivably
1887 # used with other DOM implementations (one of the FourThought
1888 # DOMs, perhaps?).
1889 if hasattr(node, '_call_user_data_handler'):
1890 node._call_user_data_handler(operation, node, clone)
1891 return clone
1892
1893
1894def _nssplit(qualifiedName):
1895 fields = qualifiedName.split(':', 1)
1896 if len(fields) == 2:
1897 return fields
1898 else:
1899 return (None, fields[0])
1900
1901
1902def _get_StringIO():
1903 # we can't use cStringIO since it doesn't support Unicode strings
1904 from StringIO import StringIO
1905 return StringIO()
1906
1907def _do_pulldom_parse(func, args, kwargs):
1908 events = func(*args, **kwargs)
1909 toktype, rootNode = events.getEvent()
1910 events.expandNode(rootNode)
1911 events.clear()
1912 return rootNode
1913
1914def parse(file, parser=None, bufsize=None):
1915 """Parse a file into a DOM by filename or file object."""
1916 if parser is None and not bufsize:
1917 from xml.dom import expatbuilder
1918 return expatbuilder.parse(file)
1919 else:
1920 from xml.dom import pulldom
1921 return _do_pulldom_parse(pulldom.parse, (file,),
1922 {'parser': parser, 'bufsize': bufsize})
1923
1924def parseString(string, parser=None):
1925 """Parse a file into a DOM from a string."""
1926 if parser is None:
1927 from xml.dom import expatbuilder
1928 return expatbuilder.parseString(string)
1929 else:
1930 from xml.dom import pulldom
1931 return _do_pulldom_parse(pulldom.parseString, (string,),
1932 {'parser': parser})
1933
1934def getDOMImplementation(features=None):
1935 if features:
1936 if isinstance(features, StringTypes):
1937 features = domreg._parse_feature_string(features)
1938 for f, v in features:
1939 if not Document.implementation.hasFeature(f, v):
1940 return None
1941 return Document.implementation
Note: See TracBrowser for help on using the repository browser.