Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/xml/etree/ElementInclude.py

    r2 r391  
    11#
    22# ElementTree
    3 # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
     3# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
    44#
    55# limited xinclude support for element trees
     
    1717# The ElementTree toolkit is
    1818#
    19 # Copyright (c) 1999-2004 by Fredrik Lundh
     19# Copyright (c) 1999-2008 by Fredrik Lundh
    2020#
    2121# By obtaining, using, and/or copying this software and/or its
     
    4343
    4444# Licensed to PSF under a Contributor Agreement.
    45 # See http://www.python.org/2.4/license for licensing details.
     45# See http://www.python.org/psf/license for licensing details.
    4646
    4747##
     
    5050
    5151import copy
    52 import ElementTree
     52from . import ElementTree
    5353
    5454XINCLUDE = "{http://www.w3.org/2001/XInclude}"
     
    126126                if i:
    127127                    node = elem[i-1]
    128                     node.tail = (node.tail or "") + text
     128                    node.tail = (node.tail or "") + text + (e.tail or "")
    129129                else:
    130130                    elem.text = (elem.text or "") + text + (e.tail or "")
  • python/trunk/Lib/xml/etree/ElementPath.py

    r2 r391  
    11#
    22# ElementTree
    3 # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
     3# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
    44#
    55# limited xpath support for element trees
     
    99# 2003-05-28 fl   added support for // etc
    1010# 2003-08-27 fl   fixed parsing of periods in element names
    11 #
    12 # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved.
     11# 2007-09-10 fl   new selection engine
     12# 2007-09-12 fl   fixed parent selector
     13# 2007-09-13 fl   added iterfind; changed findall to return a list
     14# 2007-11-30 fl   added namespaces support
     15# 2009-10-30 fl   added child element value filter
     16#
     17# Copyright (c) 2003-2009 by Fredrik Lundh.  All rights reserved.
    1318#
    1419# fredrik@pythonware.com
     
    1823# The ElementTree toolkit is
    1924#
    20 # Copyright (c) 1999-2004 by Fredrik Lundh
     25# Copyright (c) 1999-2009 by Fredrik Lundh
    2126#
    2227# By obtaining, using, and/or copying this software and/or its
     
    4449
    4550# Licensed to PSF under a Contributor Agreement.
    46 # See http://www.python.org/2.4/license for licensing details.
     51# See http://www.python.org/psf/license for licensing details.
    4752
    4853##
     
    5459import re
    5560
    56 xpath_tokenizer = re.compile(
    57     "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
    58     ).findall
    59 
    60 class xpath_descendant_or_self:
    61     pass
    62 
    63 ##
    64 # Wrapper for a compiled XPath.
    65 
    66 class Path:
    67 
    68     ##
    69     # Create an Path instance from an XPath expression.
    70 
    71     def __init__(self, path):
    72         tokens = xpath_tokenizer(path)
    73         # the current version supports 'path/path'-style expressions only
    74         self.path = []
    75         self.tag = None
    76         if tokens and tokens[0][0] == "/":
     61xpath_tokenizer_re = re.compile(
     62    "("
     63    "'[^']*'|\"[^\"]*\"|"
     64    "::|"
     65    "//?|"
     66    "\.\.|"
     67    "\(\)|"
     68    "[/.*:\[\]\(\)@=])|"
     69    "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
     70    "\s+"
     71    )
     72
     73def xpath_tokenizer(pattern, namespaces=None):
     74    for token in xpath_tokenizer_re.findall(pattern):
     75        tag = token[1]
     76        if tag and tag[0] != "{" and ":" in tag:
     77            try:
     78                prefix, uri = tag.split(":", 1)
     79                if not namespaces:
     80                    raise KeyError
     81                yield token[0], "{%s}%s" % (namespaces[prefix], uri)
     82            except KeyError:
     83                raise SyntaxError("prefix %r not found in prefix map" % prefix)
     84        else:
     85            yield token
     86
     87def get_parent_map(context):
     88    parent_map = context.parent_map
     89    if parent_map is None:
     90        context.parent_map = parent_map = {}
     91        for p in context.root.iter():
     92            for e in p:
     93                parent_map[e] = p
     94    return parent_map
     95
     96def prepare_child(next, token):
     97    tag = token[1]
     98    def select(context, result):
     99        for elem in result:
     100            for e in elem:
     101                if e.tag == tag:
     102                    yield e
     103    return select
     104
     105def prepare_star(next, token):
     106    def select(context, result):
     107        for elem in result:
     108            for e in elem:
     109                yield e
     110    return select
     111
     112def prepare_self(next, token):
     113    def select(context, result):
     114        for elem in result:
     115            yield elem
     116    return select
     117
     118def prepare_descendant(next, token):
     119    token = next()
     120    if token[0] == "*":
     121        tag = "*"
     122    elif not token[0]:
     123        tag = token[1]
     124    else:
     125        raise SyntaxError("invalid descendant")
     126    def select(context, result):
     127        for elem in result:
     128            for e in elem.iter(tag):
     129                if e is not elem:
     130                    yield e
     131    return select
     132
     133def prepare_parent(next, token):
     134    def select(context, result):
     135        # FIXME: raise error if .. is applied at toplevel?
     136        parent_map = get_parent_map(context)
     137        result_map = {}
     138        for elem in result:
     139            if elem in parent_map:
     140                parent = parent_map[elem]
     141                if parent not in result_map:
     142                    result_map[parent] = None
     143                    yield parent
     144    return select
     145
     146def prepare_predicate(next, token):
     147    # FIXME: replace with real parser!!! refs:
     148    # http://effbot.org/zone/simple-iterator-parser.htm
     149    # http://javascript.crockford.com/tdop/tdop.html
     150    signature = []
     151    predicate = []
     152    while 1:
     153        token = next()
     154        if token[0] == "]":
     155            break
     156        if token[0] and token[0][:1] in "'\"":
     157            token = "'", token[0][1:-1]
     158        signature.append(token[0] or "-")
     159        predicate.append(token[1])
     160    signature = "".join(signature)
     161    # use signature to determine predicate type
     162    if signature == "@-":
     163        # [@attribute] predicate
     164        key = predicate[1]
     165        def select(context, result):
     166            for elem in result:
     167                if elem.get(key) is not None:
     168                    yield elem
     169        return select
     170    if signature == "@-='":
     171        # [@attribute='value']
     172        key = predicate[1]
     173        value = predicate[-1]
     174        def select(context, result):
     175            for elem in result:
     176                if elem.get(key) == value:
     177                    yield elem
     178        return select
     179    if signature == "-" and not re.match("\d+$", predicate[0]):
     180        # [tag]
     181        tag = predicate[0]
     182        def select(context, result):
     183            for elem in result:
     184                if elem.find(tag) is not None:
     185                    yield elem
     186        return select
     187    if signature == "-='" and not re.match("\d+$", predicate[0]):
     188        # [tag='value']
     189        tag = predicate[0]
     190        value = predicate[-1]
     191        def select(context, result):
     192            for elem in result:
     193                for e in elem.findall(tag):
     194                    if "".join(e.itertext()) == value:
     195                        yield elem
     196                        break
     197        return select
     198    if signature == "-" or signature == "-()" or signature == "-()-":
     199        # [index] or [last()] or [last()-index]
     200        if signature == "-":
     201            index = int(predicate[0]) - 1
     202        else:
     203            if predicate[0] != "last":
     204                raise SyntaxError("unsupported function")
     205            if signature == "-()-":
     206                try:
     207                    index = int(predicate[2]) - 1
     208                except ValueError:
     209                    raise SyntaxError("unsupported expression")
     210            else:
     211                index = -1
     212        def select(context, result):
     213            parent_map = get_parent_map(context)
     214            for elem in result:
     215                try:
     216                    parent = parent_map[elem]
     217                    # FIXME: what if the selector is "*" ?
     218                    elems = list(parent.findall(elem.tag))
     219                    if elems[index] is elem:
     220                        yield elem
     221                except (IndexError, KeyError):
     222                    pass
     223        return select
     224    raise SyntaxError("invalid predicate")
     225
     226ops = {
     227    "": prepare_child,
     228    "*": prepare_star,
     229    ".": prepare_self,
     230    "..": prepare_parent,
     231    "//": prepare_descendant,
     232    "[": prepare_predicate,
     233    }
     234
     235_cache = {}
     236
     237class _SelectorContext:
     238    parent_map = None
     239    def __init__(self, root):
     240        self.root = root
     241
     242# --------------------------------------------------------------------
     243
     244##
     245# Generate all matching objects.
     246
     247def iterfind(elem, path, namespaces=None):
     248    # compile selector pattern
     249    if path[-1:] == "/":
     250        path = path + "*" # implicit all (FIXME: keep this?)
     251    try:
     252        selector = _cache[path]
     253    except KeyError:
     254        if len(_cache) > 100:
     255            _cache.clear()
     256        if path[:1] == "/":
    77257            raise SyntaxError("cannot use absolute path on element")
    78         while tokens:
    79             op, tag = tokens.pop(0)
    80             if tag or op == "*":
    81                 self.path.append(tag or op)
    82             elif op == ".":
    83                 pass
    84             elif op == "/":
    85                 self.path.append(xpath_descendant_or_self())
    86                 continue
    87             else:
    88                 raise SyntaxError("unsupported path syntax (%s)" % op)
    89             if tokens:
    90                 op, tag = tokens.pop(0)
    91                 if op != "/":
    92                     raise SyntaxError(
    93                         "expected path separator (%s)" % (op or tag)
    94                         )
    95         if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
    96             raise SyntaxError("path cannot end with //")
    97         if len(self.path) == 1 and isinstance(self.path[0], type("")):
    98             self.tag = self.path[0]
    99 
    100     ##
    101     # Find first matching object.
    102 
    103     def find(self, element):
    104         tag = self.tag
    105         if tag is None:
    106             nodeset = self.findall(element)
    107             if not nodeset:
    108                 return None
    109             return nodeset[0]
    110         for elem in element:
    111             if elem.tag == tag:
    112                 return elem
    113         return None
    114 
    115     ##
    116     # Find text for first matching object.
    117 
    118     def findtext(self, element, default=None):
    119         tag = self.tag
    120         if tag is None:
    121             nodeset = self.findall(element)
    122             if not nodeset:
    123                 return default
    124             return nodeset[0].text or ""
    125         for elem in element:
    126             if elem.tag == tag:
    127                 return elem.text or ""
    128         return default
    129 
    130     ##
    131     # Find all matching objects.
    132 
    133     def findall(self, element):
    134         nodeset = [element]
    135         index = 0
     258        next = iter(xpath_tokenizer(path, namespaces)).next
     259        token = next()
     260        selector = []
    136261        while 1:
    137262            try:
    138                 path = self.path[index]
    139                 index = index + 1
    140             except IndexError:
    141                 return nodeset
    142             set = []
    143             if isinstance(path, xpath_descendant_or_self):
    144                 try:
    145                     tag = self.path[index]
    146                     if not isinstance(tag, type("")):
    147                         tag = None
    148                     else:
    149                         index = index + 1
    150                 except IndexError:
    151                     tag = None # invalid path
    152                 for node in nodeset:
    153                     new = list(node.getiterator(tag))
    154                     if new and new[0] is node:
    155                         set.extend(new[1:])
    156                     else:
    157                         set.extend(new)
    158             else:
    159                 for node in nodeset:
    160                     for node in node:
    161                         if path == "*" or node.tag == path:
    162                             set.append(node)
    163             if not set:
    164                 return []
    165             nodeset = set
    166 
    167 _cache = {}
    168 
    169 ##
    170 # (Internal) Compile path.
    171 
    172 def _compile(path):
    173     p = _cache.get(path)
    174     if p is not None:
    175         return p
    176     p = Path(path)
    177     if len(_cache) >= 100:
    178         _cache.clear()
    179     _cache[path] = p
    180     return p
     263                selector.append(ops[token[0]](next, token))
     264            except StopIteration:
     265                raise SyntaxError("invalid path")
     266            try:
     267                token = next()
     268                if token[0] == "/":
     269                    token = next()
     270            except StopIteration:
     271                break
     272        _cache[path] = selector
     273    # execute selector pattern
     274    result = [elem]
     275    context = _SelectorContext(elem)
     276    for select in selector:
     277        result = select(context, result)
     278    return result
    181279
    182280##
    183281# Find first matching object.
    184282
    185 def find(element, path):
    186     return _compile(path).find(element)
     283def find(elem, path, namespaces=None):
     284    try:
     285        return iterfind(elem, path, namespaces).next()
     286    except StopIteration:
     287        return None
     288
     289##
     290# Find all matching objects.
     291
     292def findall(elem, path, namespaces=None):
     293    return list(iterfind(elem, path, namespaces))
    187294
    188295##
    189296# Find text for first matching object.
    190297
    191 def findtext(element, path, default=None):
    192     return _compile(path).findtext(element, default)
    193 
    194 ##
    195 # Find all matching objects.
    196 
    197 def findall(element, path):
    198     return _compile(path).findall(element)
     298def findtext(elem, path, default=None, namespaces=None):
     299    try:
     300        elem = iterfind(elem, path, namespaces).next()
     301        return elem.text or ""
     302    except StopIteration:
     303        return default
  • python/trunk/Lib/xml/etree/ElementTree.py

    r2 r391  
    11#
    22# ElementTree
    3 # $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
    4 #
    5 # light-weight XML support for Python 1.5.2 and later.
    6 #
    7 # history:
    8 # 2001-10-20 fl   created (from various sources)
    9 # 2001-11-01 fl   return root from parse method
    10 # 2002-02-16 fl   sort attributes in lexical order
    11 # 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup
    12 # 2002-05-01 fl   finished TreeBuilder refactoring
    13 # 2002-07-14 fl   added basic namespace support to ElementTree.write
    14 # 2002-07-25 fl   added QName attribute support
    15 # 2002-10-20 fl   fixed encoding in write
    16 # 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding
    17 # 2002-11-27 fl   accept file objects or file names for parse/write
    18 # 2002-12-04 fl   moved XMLTreeBuilder back to this module
    19 # 2003-01-11 fl   fixed entity encoding glitch for us-ascii
    20 # 2003-02-13 fl   added XML literal factory
    21 # 2003-02-21 fl   added ProcessingInstruction/PI factory
    22 # 2003-05-11 fl   added tostring/fromstring helpers
    23 # 2003-05-26 fl   added ElementPath support
    24 # 2003-07-05 fl   added makeelement factory method
    25 # 2003-07-28 fl   added more well-known namespace prefixes
    26 # 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch)
    27 # 2003-09-04 fl   fall back on emulator if ElementPath is not installed
    28 # 2003-10-31 fl   markup updates
    29 # 2003-11-15 fl   fixed nested namespace bug
    30 # 2004-03-28 fl   added XMLID helper
    31 # 2004-06-02 fl   added default support to findtext
    32 # 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
    33 # 2004-08-23 fl   take advantage of post-2.1 expat features
    34 # 2005-02-01 fl   added iterparse implementation
    35 # 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
    36 #
    37 # Copyright (c) 1999-2005 by Fredrik Lundh.  All rights reserved.
     3# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
     4#
     5# light-weight XML support for Python 2.3 and later.
     6#
     7# history (since 1.2.6):
     8# 2005-11-12 fl   added tostringlist/fromstringlist helpers
     9# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
     10# 2006-07-05 fl   removed support for 2.1 and earlier
     11# 2007-06-21 fl   added deprecation/future warnings
     12# 2007-08-25 fl   added doctype hook, added parser version attribute etc
     13# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
     14# 2007-08-27 fl   warn for broken /tag searches on tree level
     15# 2007-09-02 fl   added html/text methods to serializer (experimental)
     16# 2007-09-05 fl   added method argument to tostring/tostringlist
     17# 2007-09-06 fl   improved error handling
     18# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
     19# 2007-12-15 fl   added C14N hooks, copy method (experimental)
     20#
     21# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
    3822#
    3923# fredrik@pythonware.com
     
    4327# The ElementTree toolkit is
    4428#
    45 # Copyright (c) 1999-2005 by Fredrik Lundh
     29# Copyright (c) 1999-2008 by Fredrik Lundh
    4630#
    4731# By obtaining, using, and/or copying this software and/or its
     
    6953
    7054# Licensed to PSF under a Contributor Agreement.
    71 # See http://www.python.org/2.4/license for licensing details.
     55# See http://www.python.org/psf/license for licensing details.
    7256
    7357__all__ = [
     
    7660    "dump",
    7761    "Element", "ElementTree",
    78     "fromstring",
     62    "fromstring", "fromstringlist",
    7963    "iselement", "iterparse",
    80     "parse",
     64    "parse", "ParseError",
    8165    "PI", "ProcessingInstruction",
    8266    "QName",
    8367    "SubElement",
    84     "tostring",
     68    "tostring", "tostringlist",
    8569    "TreeBuilder",
    86     "VERSION", "XML",
     70    "VERSION",
     71    "XML",
    8772    "XMLParser", "XMLTreeBuilder",
    8873    ]
     74
     75VERSION = "1.3.0"
    8976
    9077##
     
    10390# </ul>
    10491#
    105 # To create an element instance, use the {@link #Element} or {@link
    106 # #SubElement} factory functions.
     92# To create an element instance, use the {@link #Element} constructor
     93# or the {@link #SubElement} factory function.
    10794# <p>
    10895# The {@link #ElementTree} class can be used to wrap an element
     
    11097##
    11198
    112 import string, sys, re
    113 
    114 class _SimpleElementPath:
     99import sys
     100import re
     101import warnings
     102
     103
     104class _SimpleElementPath(object):
    115105    # emulate pre-1.2 find/findtext/findall behaviour
    116     def find(self, element, tag):
     106    def find(self, element, tag, namespaces=None):
    117107        for elem in element:
    118108            if elem.tag == tag:
    119109                return elem
    120110        return None
    121     def findtext(self, element, tag, default=None):
     111    def findtext(self, element, tag, default=None, namespaces=None):
     112        elem = self.find(element, tag)
     113        if elem is None:
     114            return default
     115        return elem.text or ""
     116    def iterfind(self, element, tag, namespaces=None):
     117        if tag[:3] == ".//":
     118            for elem in element.iter(tag[3:]):
     119                yield elem
    122120        for elem in element:
    123121            if elem.tag == tag:
    124                 return elem.text or ""
    125         return default
    126     def findall(self, element, tag):
    127         if tag[:3] == ".//":
    128             return element.getiterator(tag[3:])
    129         result = []
    130         for elem in element:
    131             if elem.tag == tag:
    132                 result.append(elem)
    133         return result
     122                yield elem
     123    def findall(self, element, tag, namespaces=None):
     124        return list(self.iterfind(element, tag, namespaces))
    134125
    135126try:
    136     import ElementPath
     127    from . import ElementPath
    137128except ImportError:
    138     # FIXME: issue warning in this case?
    139129    ElementPath = _SimpleElementPath()
    140130
    141 # TODO: add support for custom namespace resolvers/default namespaces
    142 # TODO: add improved support for incremental parsing
    143 
    144 VERSION = "1.2.6"
    145 
    146 ##
    147 # Internal element class.  This class defines the Element interface,
    148 # and provides a reference implementation of this interface.
     131##
     132# Parser error.  This is a subclass of <b>SyntaxError</b>.
    149133# <p>
    150 # You should not create instances of this class directly.  Use the
    151 # appropriate factory functions instead, such as {@link #Element}
    152 # and {@link #SubElement}.
    153 #
     134# In addition to the exception value, an exception instance contains a
     135# specific exception code in the <b>code</b> attribute, and the line and
     136# column of the error in the <b>position</b> attribute.
     137
     138class ParseError(SyntaxError):
     139    pass
     140
     141# --------------------------------------------------------------------
     142
     143##
     144# Checks if an object appears to be a valid element object.
     145#
     146# @param An element instance.
     147# @return A true value if this is an element object.
     148# @defreturn flag
     149
     150def iselement(element):
     151    # FIXME: not sure about this; might be a better idea to look
     152    # for tag/attrib/text attributes
     153    return isinstance(element, Element) or hasattr(element, "tag")
     154
     155##
     156# Element class.  This class defines the Element interface, and
     157# provides a reference implementation of this interface.
     158# <p>
     159# The element name, attribute names, and attribute values can be
     160# either ASCII strings (ordinary Python strings containing only 7-bit
     161# ASCII characters) or Unicode strings.
     162#
     163# @param tag The element name.
     164# @param attrib An optional dictionary, containing element attributes.
     165# @param **extra Additional attributes, given as keyword arguments.
    154166# @see Element
    155167# @see SubElement
     
    157169# @see ProcessingInstruction
    158170
    159 class _ElementInterface:
     171class Element(object):
    160172    # <tag attrib>text<child/>...</tag>tail
    161173
     
    167179    ##
    168180    # (Attribute) Element attribute dictionary.  Where possible, use
    169     # {@link #_ElementInterface.get},
    170     # {@link #_ElementInterface.set},
    171     # {@link #_ElementInterface.keys}, and
    172     # {@link #_ElementInterface.items} to access
     181    # {@link #Element.get},
     182    # {@link #Element.set},
     183    # {@link #Element.keys}, and
     184    # {@link #Element.items} to access
    173185    # element attributes.
    174186
     
    177189    ##
    178190    # (Attribute) Text before first subelement.  This is either a
    179     # string or the value None, if there was no text.
     191    # string or the value None.  Note that if there was no text, this
     192    # attribute may be either None or an empty string, depending on
     193    # the parser.
    180194
    181195    text = None
     
    184198    # (Attribute) Text after this element's end tag, but before the
    185199    # next sibling element's start tag.  This is either a string or
    186     # the value None, if there was no text.
     200    # the value None.  Note that if there was no text, this attribute
     201    # may be either None or an empty string, depending on the parser.
    187202
    188203    tail = None # text after end tag, if any
    189204
    190     def __init__(self, tag, attrib):
     205    # constructor
     206
     207    def __init__(self, tag, attrib={}, **extra):
     208        attrib = attrib.copy()
     209        attrib.update(extra)
    191210        self.tag = tag
    192211        self.attrib = attrib
     
    194213
    195214    def __repr__(self):
    196         return "<Element %s at %x>" % (self.tag, id(self))
     215        return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
    197216
    198217    ##
     
    204223
    205224    def makeelement(self, tag, attrib):
    206         return Element(tag, attrib)
    207 
    208     ##
    209     # Returns the number of subelements.
     225        return self.__class__(tag, attrib)
     226
     227    ##
     228    # (Experimental) Copies the current element.  This creates a
     229    # shallow copy; subelements will be shared with the original tree.
     230    #
     231    # @return A new element instance.
     232
     233    def copy(self):
     234        elem = self.makeelement(self.tag, self.attrib)
     235        elem.text = self.text
     236        elem.tail = self.tail
     237        elem[:] = self
     238        return elem
     239
     240    ##
     241    # Returns the number of subelements.  Note that this only counts
     242    # full elements; to check if there's any content in an element, you
     243    # have to check both the length and the <b>text</b> attribute.
    210244    #
    211245    # @return The number of subelements.
     
    214248        return len(self._children)
    215249
    216     ##
    217     # Returns the given subelement.
     250    def __nonzero__(self):
     251        warnings.warn(
     252            "The behavior of this method will change in future versions.  "
     253            "Use specific 'len(elem)' or 'elem is not None' test instead.",
     254            FutureWarning, stacklevel=2
     255            )
     256        return len(self._children) != 0 # emulate old behaviour, for now
     257
     258    ##
     259    # Returns the given subelement, by index.
    218260    #
    219261    # @param index What subelement to return.
     
    225267
    226268    ##
    227     # Replaces the given subelement.
     269    # Replaces the given subelement, by index.
    228270    #
    229271    # @param index What subelement to replace.
    230272    # @param element The new element value.
    231273    # @exception IndexError If the given element does not exist.
    232     # @exception AssertionError If element is not a valid object.
    233274
    234275    def __setitem__(self, index, element):
    235         assert iselement(element)
     276        # if isinstance(index, slice):
     277        #     for elt in element:
     278        #         assert iselement(elt)
     279        # else:
     280        #     assert iselement(element)
    236281        self._children[index] = element
    237282
    238283    ##
    239     # Deletes the given subelement.
     284    # Deletes the given subelement, by index.
    240285    #
    241286    # @param index What subelement to delete.
     
    246291
    247292    ##
    248     # Returns a list containing subelements in the given range.
    249     #
    250     # @param start The first subelement to return.
    251     # @param stop The first subelement that shouldn't be returned.
    252     # @return A sequence object containing subelements.
    253 
    254     def __getslice__(self, start, stop):
    255         return self._children[start:stop]
    256 
    257     ##
    258     # Replaces a number of subelements with elements from a sequence.
    259     #
    260     # @param start The first subelement to replace.
    261     # @param stop The first subelement that shouldn't be replaced.
     293    # Adds a subelement to the end of this element.  In document order,
     294    # the new element will appear after the last existing subelement (or
     295    # directly after the text, if it's the first subelement), but before
     296    # the end tag for this element.
     297    #
     298    # @param element The element to add.
     299
     300    def append(self, element):
     301        # assert iselement(element)
     302        self._children.append(element)
     303
     304    ##
     305    # Appends subelements from a sequence.
     306    #
    262307    # @param elements A sequence object with zero or more elements.
    263     # @exception AssertionError If a sequence member is not a valid object.
    264 
    265     def __setslice__(self, start, stop, elements):
    266         for element in elements:
    267             assert iselement(element)
    268         self._children[start:stop] = list(elements)
    269 
    270     ##
    271     # Deletes a number of subelements.
    272     #
    273     # @param start The first subelement to delete.
    274     # @param stop The first subelement to leave in there.
    275 
    276     def __delslice__(self, start, stop):
    277         del self._children[start:stop]
    278 
    279     ##
    280     # Adds a subelement to the end of this element.
    281     #
    282     # @param element The element to add.
    283     # @exception AssertionError If a sequence member is not a valid object.
    284 
    285     def append(self, element):
    286         assert iselement(element)
    287         self._children.append(element)
     308    # @since 1.3
     309
     310    def extend(self, elements):
     311        # for element in elements:
     312        #     assert iselement(element)
     313        self._children.extend(elements)
    288314
    289315    ##
     
    291317    #
    292318    # @param index Where to insert the new subelement.
    293     # @exception AssertionError If the element is not a valid object.
    294319
    295320    def insert(self, index, element):
    296         assert iselement(element)
     321        # assert iselement(element)
    297322        self._children.insert(index, element)
    298323
     
    300325    # Removes a matching subelement.  Unlike the <b>find</b> methods,
    301326    # this method compares elements based on identity, not on tag
    302     # value or contents.
     327    # value or contents.  To remove subelements by other means, the
     328    # easiest way is often to use a list comprehension to select what
     329    # elements to keep, and use slice assignment to update the parent
     330    # element.
    303331    #
    304332    # @param element What element to remove.
    305333    # @exception ValueError If a matching element could not be found.
    306     # @exception AssertionError If the element is not a valid object.
    307334
    308335    def remove(self, element):
    309         assert iselement(element)
     336        # assert iselement(element)
    310337        self._children.remove(element)
    311338
    312339    ##
    313     # Returns all subelements.  The elements are returned in document
    314     # order.
     340    # (Deprecated) Returns all subelements.  The elements are returned
     341    # in document order.
    315342    #
    316343    # @return A list of subelements.
     
    318345
    319346    def getchildren(self):
     347        warnings.warn(
     348            "This method will be removed in future versions.  "
     349            "Use 'list(elem)' or iteration over elem instead.",
     350            DeprecationWarning, stacklevel=2
     351            )
    320352        return self._children
    321353
     
    324356    #
    325357    # @param path What element to look for.
     358    # @keyparam namespaces Optional namespace prefix map.
    326359    # @return The first matching element, or None if no element was found.
    327360    # @defreturn Element or None
    328361
    329     def find(self, path):
    330         return ElementPath.find(self, path)
     362    def find(self, path, namespaces=None):
     363        return ElementPath.find(self, path, namespaces)
    331364
    332365    ##
     
    335368    # @param path What element to look for.
    336369    # @param default What to return if the element was not found.
     370    # @keyparam namespaces Optional namespace prefix map.
    337371    # @return The text content of the first matching element, or the
    338372    #     default value no element was found.  Note that if the element
    339     #     has is found, but has no text content, this method returns an
     373    #     is found, but has no text content, this method returns an
    340374    #     empty string.
    341375    # @defreturn string
    342376
    343     def findtext(self, path, default=None):
    344         return ElementPath.findtext(self, path, default)
     377    def findtext(self, path, default=None, namespaces=None):
     378        return ElementPath.findtext(self, path, default, namespaces)
    345379
    346380    ##
     
    348382    #
    349383    # @param path What element to look for.
    350     # @return A list or iterator containing all matching elements,
     384    # @keyparam namespaces Optional namespace prefix map.
     385    # @return A list or other sequence containing all matching elements,
    351386    #    in document order.
    352387    # @defreturn list of Element instances
    353388
    354     def findall(self, path):
    355         return ElementPath.findall(self, path)
     389    def findall(self, path, namespaces=None):
     390        return ElementPath.findall(self, path, namespaces)
     391
     392    ##
     393    # Finds all matching subelements, by tag name or path.
     394    #
     395    # @param path What element to look for.
     396    # @keyparam namespaces Optional namespace prefix map.
     397    # @return An iterator or sequence containing all matching elements,
     398    #    in document order.
     399    # @defreturn a generated sequence of Element instances
     400
     401    def iterfind(self, path, namespaces=None):
     402        return ElementPath.iterfind(self, path, namespaces)
    356403
    357404    ##
    358405    # Resets an element.  This function removes all subelements, clears
    359     # all attributes, and sets the text and tail attributes to None.
     406    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
     407    # to None.
    360408
    361409    def clear(self):
     
    365413
    366414    ##
    367     # Gets an element attribute.
     415    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
     416    # some implementations may handle this a bit more efficiently.
    368417    #
    369418    # @param key What attribute to look for.
     
    377426
    378427    ##
    379     # Sets an element attribute.
     428    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
     429    # but some implementations may handle this a bit more efficiently.
    380430    #
    381431    # @param key What attribute to set.
     
    388438    # Gets a list of attribute names.  The names are returned in an
    389439    # arbitrary order (just like for an ordinary Python dictionary).
     440    # Equivalent to <b>attrib.keys()</b>.
    390441    #
    391442    # @return A list of element attribute names.
     
    397448    ##
    398449    # Gets element attributes, as a sequence.  The attributes are
    399     # returned in an arbitrary order.
     450    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
    400451    #
    401452    # @return A list of (name, value) tuples for all attributes.
     
    410461    # with a matching tag.
    411462    # <p>
    412     # If the tree structure is modified during iteration, the result
    413     # is undefined.
     463    # If the tree structure is modified during iteration, new or removed
     464    # elements may or may not be included.  To get a stable set, use the
     465    # list() function on the iterator, and loop over the resulting list.
    414466    #
    415467    # @param tag What tags to look for (default is to return all elements).
    416     # @return A list or iterator containing all the matching elements.
    417     # @defreturn list or iterator
    418 
    419     def getiterator(self, tag=None):
    420         nodes = []
     468    # @return An iterator containing all the matching elements.
     469    # @defreturn iterator
     470
     471    def iter(self, tag=None):
    421472        if tag == "*":
    422473            tag = None
    423474        if tag is None or self.tag == tag:
    424             nodes.append(self)
    425         for node in self._children:
    426             nodes.extend(node.getiterator(tag))
    427         return nodes
     475            yield self
     476        for e in self._children:
     477            for e in e.iter(tag):
     478                yield e
     479
     480    # compatibility
     481    def getiterator(self, tag=None):
     482        # Change for a DeprecationWarning in 1.4
     483        warnings.warn(
     484            "This method will be removed in future versions.  "
     485            "Use 'elem.iter()' or 'list(elem.iter())' instead.",
     486            PendingDeprecationWarning, stacklevel=2
     487        )
     488        return list(self.iter(tag))
     489
     490    ##
     491    # Creates a text iterator.  The iterator loops over this element
     492    # and all subelements, in document order, and returns all inner
     493    # text.
     494    #
     495    # @return An iterator containing all inner text.
     496    # @defreturn iterator
     497
     498    def itertext(self):
     499        tag = self.tag
     500        if not isinstance(tag, basestring) and tag is not None:
     501            return
     502        if self.text:
     503            yield self.text
     504        for e in self:
     505            for s in e.itertext():
     506                yield s
     507            if e.tail:
     508                yield e.tail
    428509
    429510# compatibility
    430 _Element = _ElementInterface
    431 
    432 ##
    433 # Element factory.  This function returns an object implementing the
    434 # standard Element interface.  The exact class or type of that object
    435 # is implementation dependent, but it will always be compatible with
    436 # the {@link #_ElementInterface} class in this module.
    437 # <p>
    438 # The element name, attribute names, and attribute values can be
    439 # either 8-bit ASCII strings or Unicode strings.
    440 #
    441 # @param tag The element name.
    442 # @param attrib An optional dictionary, containing element attributes.
    443 # @param **extra Additional attributes, given as keyword arguments.
    444 # @return An element instance.
    445 # @defreturn Element
    446 
    447 def Element(tag, attrib={}, **extra):
    448     attrib = attrib.copy()
    449     attrib.update(extra)
    450     return _ElementInterface(tag, attrib)
     511_Element = _ElementInterface = Element
    451512
    452513##
     
    473534##
    474535# Comment element factory.  This factory function creates a special
    475 # element that will be serialized as an XML comment.
     536# element that will be serialized as an XML comment by the standard
     537# serializer.
    476538# <p>
    477539# The comment string can be either an 8-bit ASCII string or a Unicode
     
    489551##
    490552# PI element factory.  This factory function creates a special element
    491 # that will be serialized as an XML processing instruction.
     553# that will be serialized as an XML processing instruction by the standard
     554# serializer.
    492555#
    493556# @param target A string containing the PI target.
     
    515578# @return An opaque object, representing the QName.
    516579
    517 class QName:
     580class QName(object):
    518581    def __init__(self, text_or_uri, tag=None):
    519582        if tag:
     
    529592        return cmp(self.text, other)
    530593
     594# --------------------------------------------------------------------
     595
    531596##
    532597# ElementTree wrapper class.  This class represents an entire element
     
    535600#
    536601# @param element Optional root element.
    537 # @keyparam file Optional file handle or name.  If given, the
     602# @keyparam file Optional file handle or file name.  If given, the
    538603#     tree is initialized with the contents of this XML file.
    539604
    540 class ElementTree:
     605class ElementTree(object):
    541606
    542607    def __init__(self, element=None, file=None):
    543         assert element is None or iselement(element)
     608        # assert element is None or iselement(element)
    544609        self._root = element # first node
    545610        if file:
     
    563628
    564629    def _setroot(self, element):
    565         assert iselement(element)
     630        # assert iselement(element)
    566631        self._root = element
    567632
     
    569634    # Loads an external XML document into this element tree.
    570635    #
    571     # @param source A file name or file object.
    572     # @param parser An optional parser instance.  If not given, the
    573     #     standard {@link XMLTreeBuilder} parser is used.
     636    # @param source A file name or file object.  If a file object is
     637    #     given, it only has to implement a <b>read(n)</b> method.
     638    # @keyparam parser An optional parser instance.  If not given, the
     639    #     standard {@link XMLParser} parser is used.
    574640    # @return The document root element.
    575641    # @defreturn Element
     642    # @exception ParseError If the parser fails to parse the document.
    576643
    577644    def parse(self, source, parser=None):
     645        close_source = False
    578646        if not hasattr(source, "read"):
    579647            source = open(source, "rb")
    580         if not parser:
    581             parser = XMLTreeBuilder()
    582         while 1:
    583             data = source.read(32768)
    584             if not data:
    585                 break
    586             parser.feed(data)
    587         self._root = parser.close()
    588         return self._root
     648            close_source = True
     649        try:
     650            if not parser:
     651                parser = XMLParser(target=TreeBuilder())
     652            while 1:
     653                data = source.read(65536)
     654                if not data:
     655                    break
     656                parser.feed(data)
     657            self._root = parser.close()
     658            return self._root
     659        finally:
     660            if close_source:
     661                source.close()
    589662
    590663    ##
     
    596669    # @defreturn iterator
    597670
     671    def iter(self, tag=None):
     672        # assert self._root is not None
     673        return self._root.iter(tag)
     674
     675    # compatibility
    598676    def getiterator(self, tag=None):
    599         assert self._root is not None
    600         return self._root.getiterator(tag)
    601 
    602     ##
    603     # Finds the first toplevel element with given tag.
    604     # Same as getroot().find(path).
     677        # Change for a DeprecationWarning in 1.4
     678        warnings.warn(
     679            "This method will be removed in future versions.  "
     680            "Use 'tree.iter()' or 'list(tree.iter())' instead.",
     681            PendingDeprecationWarning, stacklevel=2
     682        )
     683        return list(self.iter(tag))
     684
     685    ##
     686    # Same as getroot().find(path), starting at the root of the
     687    # tree.
    605688    #
    606689    # @param path What element to look for.
     690    # @keyparam namespaces Optional namespace prefix map.
    607691    # @return The first matching element, or None if no element was found.
    608692    # @defreturn Element or None
    609693
    610     def find(self, path):
    611         assert self._root is not None
     694    def find(self, path, namespaces=None):
     695        # assert self._root is not None
    612696        if path[:1] == "/":
    613697            path = "." + path
    614         return self._root.find(path)
    615 
    616     ##
    617     # Finds the element text for the first toplevel element with given
    618     # tag.  Same as getroot().findtext(path).
    619     #
    620     # @param path What toplevel element to look for.
     698            warnings.warn(
     699                "This search is broken in 1.3 and earlier, and will be "
     700                "fixed in a future version.  If you rely on the current "
     701                "behaviour, change it to %r" % path,
     702                FutureWarning, stacklevel=2
     703                )
     704        return self._root.find(path, namespaces)
     705
     706    ##
     707    # Same as getroot().findtext(path), starting at the root of the tree.
     708    #
     709    # @param path What element to look for.
    621710    # @param default What to return if the element was not found.
     711    # @keyparam namespaces Optional namespace prefix map.
    622712    # @return The text content of the first matching element, or the
    623713    #     default value no element was found.  Note that if the element
    624     #     has is found, but has no text content, this method returns an
     714    #     is found, but has no text content, this method returns an
    625715    #     empty string.
    626716    # @defreturn string
    627717
    628     def findtext(self, path, default=None):
    629         assert self._root is not None
     718    def findtext(self, path, default=None, namespaces=None):
     719        # assert self._root is not None
    630720        if path[:1] == "/":
    631721            path = "." + path
    632         return self._root.findtext(path, default)
    633 
    634     ##
    635     # Finds all toplevel elements with the given tag.
    636     # Same as getroot().findall(path).
     722            warnings.warn(
     723                "This search is broken in 1.3 and earlier, and will be "
     724                "fixed in a future version.  If you rely on the current "
     725                "behaviour, change it to %r" % path,
     726                FutureWarning, stacklevel=2
     727                )
     728        return self._root.findtext(path, default, namespaces)
     729
     730    ##
     731    # Same as getroot().findall(path), starting at the root of the tree.
    637732    #
    638733    # @param path What element to look for.
     734    # @keyparam namespaces Optional namespace prefix map.
    639735    # @return A list or iterator containing all matching elements,
    640736    #    in document order.
    641737    # @defreturn list of Element instances
    642738
    643     def findall(self, path):
    644         assert self._root is not None
     739    def findall(self, path, namespaces=None):
     740        # assert self._root is not None
    645741        if path[:1] == "/":
    646742            path = "." + path
    647         return self._root.findall(path)
     743            warnings.warn(
     744                "This search is broken in 1.3 and earlier, and will be "
     745                "fixed in a future version.  If you rely on the current "
     746                "behaviour, change it to %r" % path,
     747                FutureWarning, stacklevel=2
     748                )
     749        return self._root.findall(path, namespaces)
     750
     751    ##
     752    # Finds all matching subelements, by tag name or path.
     753    # Same as getroot().iterfind(path).
     754    #
     755    # @param path What element to look for.
     756    # @keyparam namespaces Optional namespace prefix map.
     757    # @return An iterator or sequence containing all matching elements,
     758    #    in document order.
     759    # @defreturn a generated sequence of Element instances
     760
     761    def iterfind(self, path, namespaces=None):
     762        # assert self._root is not None
     763        if path[:1] == "/":
     764            path = "." + path
     765            warnings.warn(
     766                "This search is broken in 1.3 and earlier, and will be "
     767                "fixed in a future version.  If you rely on the current "
     768                "behaviour, change it to %r" % path,
     769                FutureWarning, stacklevel=2
     770                )
     771        return self._root.iterfind(path, namespaces)
    648772
    649773    ##
    650774    # Writes the element tree to a file, as XML.
    651775    #
     776    # @def write(file, **options)
    652777    # @param file A file name, or a file object opened for writing.
    653     # @param encoding Optional output encoding (default is US-ASCII).
    654 
    655     def write(self, file, encoding="us-ascii"):
    656         assert self._root is not None
    657         if not hasattr(file, "write"):
    658             file = open(file, "wb")
     778    # @param **options Options, given as keyword arguments.
     779    # @keyparam encoding Optional output encoding (default is US-ASCII).
     780    # @keyparam xml_declaration Controls if an XML declaration should
     781    #     be added to the file.  Use False for never, True for always,
     782    #     None for only if not US-ASCII or UTF-8.  None is default.
     783    # @keyparam default_namespace Sets the default XML namespace (for "xmlns").
     784    # @keyparam method Optional output method ("xml", "html", "text" or
     785    #     "c14n"; default is "xml").
     786
     787    def write(self, file_or_filename,
     788              # keyword arguments
     789              encoding=None,
     790              xml_declaration=None,
     791              default_namespace=None,
     792              method=None):
     793        # assert self._root is not None
     794        if not method:
     795            method = "xml"
     796        elif method not in _serialize:
     797            # FIXME: raise an ImportError for c14n if ElementC14N is missing?
     798            raise ValueError("unknown method %r" % method)
     799        if hasattr(file_or_filename, "write"):
     800            file = file_or_filename
     801        else:
     802            file = open(file_or_filename, "wb")
     803        write = file.write
    659804        if not encoding:
    660             encoding = "us-ascii"
    661         elif encoding != "utf-8" and encoding != "us-ascii":
    662             file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
    663         self._write(file, self._root, encoding, {})
    664 
    665     def _write(self, file, node, encoding, namespaces):
    666         # write XML to file
    667         tag = node.tag
    668         if tag is Comment:
    669             file.write("<!-- %s -->" % _escape_cdata(node.text, encoding))
    670         elif tag is ProcessingInstruction:
    671             file.write("<?%s?>" % _escape_cdata(node.text, encoding))
     805            if method == "c14n":
     806                encoding = "utf-8"
     807            else:
     808                encoding = "us-ascii"
     809        elif xml_declaration or (xml_declaration is None and
     810                                 encoding not in ("utf-8", "us-ascii")):
     811            if method == "xml":
     812                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
     813        if method == "text":
     814            _serialize_text(write, self._root, encoding)
    672815        else:
    673             items = node.items()
    674             xmlns_items = [] # new namespaces in this scope
    675             try:
    676                 if isinstance(tag, QName) or tag[:1] == "{":
    677                     tag, xmlns = fixtag(tag, namespaces)
    678                     if xmlns: xmlns_items.append(xmlns)
    679             except TypeError:
    680                 _raise_serialization_error(tag)
    681             file.write("<" + _encode(tag, encoding))
    682             if items or xmlns_items:
    683                 items.sort() # lexical order
    684                 for k, v in items:
    685                     try:
    686                         if isinstance(k, QName) or k[:1] == "{":
    687                             k, xmlns = fixtag(k, namespaces)
    688                             if xmlns: xmlns_items.append(xmlns)
    689                     except TypeError:
    690                         _raise_serialization_error(k)
    691                     try:
    692                         if isinstance(v, QName):
    693                             v, xmlns = fixtag(v, namespaces)
    694                             if xmlns: xmlns_items.append(xmlns)
    695                     except TypeError:
    696                         _raise_serialization_error(v)
    697                     file.write(" %s=\"%s\"" % (_encode(k, encoding),
    698                                                _escape_attrib(v, encoding)))
    699                 for k, v in xmlns_items:
    700                     file.write(" %s=\"%s\"" % (_encode(k, encoding),
    701                                                _escape_attrib(v, encoding)))
    702             if node.text or len(node):
    703                 file.write(">")
    704                 if node.text:
    705                     file.write(_escape_cdata(node.text, encoding))
    706                 for n in node:
    707                     self._write(file, n, encoding, namespaces)
    708                 file.write("</" + _encode(tag, encoding) + ">")
     816            qnames, namespaces = _namespaces(
     817                self._root, encoding, default_namespace
     818                )
     819            serialize = _serialize[method]
     820            serialize(write, self._root, encoding, qnames, namespaces)
     821        if file_or_filename is not file:
     822            file.close()
     823
     824    def write_c14n(self, file):
     825        # lxml.etree compatibility.  use output method instead
     826        return self.write(file, method="c14n")
     827
     828# --------------------------------------------------------------------
     829# serialization support
     830
     831def _namespaces(elem, encoding, default_namespace=None):
     832    # identify namespaces used in this tree
     833
     834    # maps qnames to *encoded* prefix:local names
     835    qnames = {None: None}
     836
     837    # maps uri:s to prefixes
     838    namespaces = {}
     839    if default_namespace:
     840        namespaces[default_namespace] = ""
     841
     842    def encode(text):
     843        return text.encode(encoding)
     844
     845    def add_qname(qname):
     846        # calculate serialized qname representation
     847        try:
     848            if qname[:1] == "{":
     849                uri, tag = qname[1:].rsplit("}", 1)
     850                prefix = namespaces.get(uri)
     851                if prefix is None:
     852                    prefix = _namespace_map.get(uri)
     853                    if prefix is None:
     854                        prefix = "ns%d" % len(namespaces)
     855                    if prefix != "xml":
     856                        namespaces[uri] = prefix
     857                if prefix:
     858                    qnames[qname] = encode("%s:%s" % (prefix, tag))
     859                else:
     860                    qnames[qname] = encode(tag) # default element
    709861            else:
    710                 file.write(" />")
    711             for k, v in xmlns_items:
    712                 del namespaces[v]
    713         if node.tail:
    714             file.write(_escape_cdata(node.tail, encoding))
     862                if default_namespace:
     863                    # FIXME: can this be handled in XML 1.0?
     864                    raise ValueError(
     865                        "cannot use non-qualified names with "
     866                        "default_namespace option"
     867                        )
     868                qnames[qname] = encode(qname)
     869        except TypeError:
     870            _raise_serialization_error(qname)
     871
     872    # populate qname and namespaces table
     873    try:
     874        iterate = elem.iter
     875    except AttributeError:
     876        iterate = elem.getiterator # cET compatibility
     877    for elem in iterate():
     878        tag = elem.tag
     879        if isinstance(tag, QName):
     880            if tag.text not in qnames:
     881                add_qname(tag.text)
     882        elif isinstance(tag, basestring):
     883            if tag not in qnames:
     884                add_qname(tag)
     885        elif tag is not None and tag is not Comment and tag is not PI:
     886            _raise_serialization_error(tag)
     887        for key, value in elem.items():
     888            if isinstance(key, QName):
     889                key = key.text
     890            if key not in qnames:
     891                add_qname(key)
     892            if isinstance(value, QName) and value.text not in qnames:
     893                add_qname(value.text)
     894        text = elem.text
     895        if isinstance(text, QName) and text.text not in qnames:
     896            add_qname(text.text)
     897    return qnames, namespaces
     898
     899def _serialize_xml(write, elem, encoding, qnames, namespaces):
     900    tag = elem.tag
     901    text = elem.text
     902    if tag is Comment:
     903        write("<!--%s-->" % _encode(text, encoding))
     904    elif tag is ProcessingInstruction:
     905        write("<?%s?>" % _encode(text, encoding))
     906    else:
     907        tag = qnames[tag]
     908        if tag is None:
     909            if text:
     910                write(_escape_cdata(text, encoding))
     911            for e in elem:
     912                _serialize_xml(write, e, encoding, qnames, None)
     913        else:
     914            write("<" + tag)
     915            items = elem.items()
     916            if items or namespaces:
     917                if namespaces:
     918                    for v, k in sorted(namespaces.items(),
     919                                       key=lambda x: x[1]):  # sort on prefix
     920                        if k:
     921                            k = ":" + k
     922                        write(" xmlns%s=\"%s\"" % (
     923                            k.encode(encoding),
     924                            _escape_attrib(v, encoding)
     925                            ))
     926                for k, v in sorted(items):  # lexical order
     927                    if isinstance(k, QName):
     928                        k = k.text
     929                    if isinstance(v, QName):
     930                        v = qnames[v.text]
     931                    else:
     932                        v = _escape_attrib(v, encoding)
     933                    write(" %s=\"%s\"" % (qnames[k], v))
     934            if text or len(elem):
     935                write(">")
     936                if text:
     937                    write(_escape_cdata(text, encoding))
     938                for e in elem:
     939                    _serialize_xml(write, e, encoding, qnames, None)
     940                write("</" + tag + ">")
     941            else:
     942                write(" />")
     943    if elem.tail:
     944        write(_escape_cdata(elem.tail, encoding))
     945
     946HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
     947              "img", "input", "isindex", "link", "meta", "param")
     948
     949try:
     950    HTML_EMPTY = set(HTML_EMPTY)
     951except NameError:
     952    pass
     953
     954def _serialize_html(write, elem, encoding, qnames, namespaces):
     955    tag = elem.tag
     956    text = elem.text
     957    if tag is Comment:
     958        write("<!--%s-->" % _escape_cdata(text, encoding))
     959    elif tag is ProcessingInstruction:
     960        write("<?%s?>" % _escape_cdata(text, encoding))
     961    else:
     962        tag = qnames[tag]
     963        if tag is None:
     964            if text:
     965                write(_escape_cdata(text, encoding))
     966            for e in elem:
     967                _serialize_html(write, e, encoding, qnames, None)
     968        else:
     969            write("<" + tag)
     970            items = elem.items()
     971            if items or namespaces:
     972                if namespaces:
     973                    for v, k in sorted(namespaces.items(),
     974                                       key=lambda x: x[1]):  # sort on prefix
     975                        if k:
     976                            k = ":" + k
     977                        write(" xmlns%s=\"%s\"" % (
     978                            k.encode(encoding),
     979                            _escape_attrib(v, encoding)
     980                            ))
     981                for k, v in sorted(items):  # lexical order
     982                    if isinstance(k, QName):
     983                        k = k.text
     984                    if isinstance(v, QName):
     985                        v = qnames[v.text]
     986                    else:
     987                        v = _escape_attrib_html(v, encoding)
     988                    # FIXME: handle boolean attributes
     989                    write(" %s=\"%s\"" % (qnames[k], v))
     990            write(">")
     991            ltag = tag.lower()
     992            if text:
     993                if ltag == "script" or ltag == "style":
     994                    write(_encode(text, encoding))
     995                else:
     996                    write(_escape_cdata(text, encoding))
     997            for e in elem:
     998                _serialize_html(write, e, encoding, qnames, None)
     999            if ltag not in HTML_EMPTY:
     1000                write("</" + tag + ">")
     1001    if elem.tail:
     1002        write(_escape_cdata(elem.tail, encoding))
     1003
     1004def _serialize_text(write, elem, encoding):
     1005    for part in elem.itertext():
     1006        write(part.encode(encoding))
     1007    if elem.tail:
     1008        write(elem.tail.encode(encoding))
     1009
     1010_serialize = {
     1011    "xml": _serialize_xml,
     1012    "html": _serialize_html,
     1013    "text": _serialize_text,
     1014# this optional method is imported at the end of the module
     1015#   "c14n": _serialize_c14n,
     1016}
     1017
     1018##
     1019# Registers a namespace prefix.  The registry is global, and any
     1020# existing mapping for either the given prefix or the namespace URI
     1021# will be removed.
     1022#
     1023# @param prefix Namespace prefix.
     1024# @param uri Namespace uri.  Tags and attributes in this namespace
     1025#     will be serialized with the given prefix, if at all possible.
     1026# @exception ValueError If the prefix is reserved, or is otherwise
     1027#     invalid.
     1028
     1029def register_namespace(prefix, uri):
     1030    if re.match("ns\d+$", prefix):
     1031        raise ValueError("Prefix format reserved for internal use")
     1032    for k, v in _namespace_map.items():
     1033        if k == uri or v == prefix:
     1034            del _namespace_map[k]
     1035    _namespace_map[uri] = prefix
     1036
     1037_namespace_map = {
     1038    # "well-known" namespace prefixes
     1039    "http://www.w3.org/XML/1998/namespace": "xml",
     1040    "http://www.w3.org/1999/xhtml": "html",
     1041    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
     1042    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
     1043    # xml schema
     1044    "http://www.w3.org/2001/XMLSchema": "xs",
     1045    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
     1046    # dublin core
     1047    "http://purl.org/dc/elements/1.1/": "dc",
     1048}
     1049
     1050def _raise_serialization_error(text):
     1051    raise TypeError(
     1052        "cannot serialize %r (type %s)" % (text, type(text).__name__)
     1053        )
     1054
     1055def _encode(text, encoding):
     1056    try:
     1057        return text.encode(encoding, "xmlcharrefreplace")
     1058    except (TypeError, AttributeError):
     1059        _raise_serialization_error(text)
     1060
     1061def _escape_cdata(text, encoding):
     1062    # escape character data
     1063    try:
     1064        # it's worth avoiding do-nothing calls for strings that are
     1065        # shorter than 500 character, or so.  assume that's, by far,
     1066        # the most common case in most applications.
     1067        if "&" in text:
     1068            text = text.replace("&", "&amp;")
     1069        if "<" in text:
     1070            text = text.replace("<", "&lt;")
     1071        if ">" in text:
     1072            text = text.replace(">", "&gt;")
     1073        return text.encode(encoding, "xmlcharrefreplace")
     1074    except (TypeError, AttributeError):
     1075        _raise_serialization_error(text)
     1076
     1077def _escape_attrib(text, encoding):
     1078    # escape attribute value
     1079    try:
     1080        if "&" in text:
     1081            text = text.replace("&", "&amp;")
     1082        if "<" in text:
     1083            text = text.replace("<", "&lt;")
     1084        if ">" in text:
     1085            text = text.replace(">", "&gt;")
     1086        if "\"" in text:
     1087            text = text.replace("\"", "&quot;")
     1088        if "\n" in text:
     1089            text = text.replace("\n", "&#10;")
     1090        return text.encode(encoding, "xmlcharrefreplace")
     1091    except (TypeError, AttributeError):
     1092        _raise_serialization_error(text)
     1093
     1094def _escape_attrib_html(text, encoding):
     1095    # escape attribute value
     1096    try:
     1097        if "&" in text:
     1098            text = text.replace("&", "&amp;")
     1099        if ">" in text:
     1100            text = text.replace(">", "&gt;")
     1101        if "\"" in text:
     1102            text = text.replace("\"", "&quot;")
     1103        return text.encode(encoding, "xmlcharrefreplace")
     1104    except (TypeError, AttributeError):
     1105        _raise_serialization_error(text)
    7151106
    7161107# --------------------------------------------------------------------
    717 # helpers
    718 
    719 ##
    720 # Checks if an object appears to be a valid element object.
    721 #
    722 # @param An element instance.
    723 # @return A true value if this is an element object.
    724 # @defreturn flag
    725 
    726 def iselement(element):
    727     # FIXME: not sure about this; might be a better idea to look
    728     # for tag/attrib/text attributes
    729     return isinstance(element, _ElementInterface) or hasattr(element, "tag")
     1108
     1109##
     1110# Generates a string representation of an XML element, including all
     1111# subelements.
     1112#
     1113# @param element An Element instance.
     1114# @keyparam encoding Optional output encoding (default is US-ASCII).
     1115# @keyparam method Optional output method ("xml", "html", "text" or
     1116#     "c14n"; default is "xml").
     1117# @return An encoded string containing the XML data.
     1118# @defreturn string
     1119
     1120def tostring(element, encoding=None, method=None):
     1121    class dummy:
     1122        pass
     1123    data = []
     1124    file = dummy()
     1125    file.write = data.append
     1126    ElementTree(element).write(file, encoding, method=method)
     1127    return "".join(data)
     1128
     1129##
     1130# Generates a string representation of an XML element, including all
     1131# subelements.  The string is returned as a sequence of string fragments.
     1132#
     1133# @param element An Element instance.
     1134# @keyparam encoding Optional output encoding (default is US-ASCII).
     1135# @keyparam method Optional output method ("xml", "html", "text" or
     1136#     "c14n"; default is "xml").
     1137# @return A sequence object containing the XML data.
     1138# @defreturn sequence
     1139# @since 1.3
     1140
     1141def tostringlist(element, encoding=None, method=None):
     1142    class dummy:
     1143        pass
     1144    data = []
     1145    file = dummy()
     1146    file.write = data.append
     1147    ElementTree(element).write(file, encoding, method=method)
     1148    # FIXME: merge small fragments into larger parts
     1149    return data
    7301150
    7311151##
     
    7471167        sys.stdout.write("\n")
    7481168
    749 def _encode(s, encoding):
    750     try:
    751         return s.encode(encoding)
    752     except AttributeError:
    753         return s # 1.5.2: assume the string uses the right encoding
    754 
    755 if sys.version[:3] == "1.5":
    756     _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
    757 else:
    758     _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
    759 
    760 _escape_map = {
    761     "&": "&amp;",
    762     "<": "&lt;",
    763     ">": "&gt;",
    764     '"': "&quot;",
    765 }
    766 
    767 _namespace_map = {
    768     # "well-known" namespace prefixes
    769     "http://www.w3.org/XML/1998/namespace": "xml",
    770     "http://www.w3.org/1999/xhtml": "html",
    771     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
    772     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
    773 }
    774 
    775 def _raise_serialization_error(text):
    776     raise TypeError(
    777         "cannot serialize %r (type %s)" % (text, type(text).__name__)
    778         )
    779 
    780 def _encode_entity(text, pattern=_escape):
    781     # map reserved and non-ascii characters to numerical entities
    782     def escape_entities(m, map=_escape_map):
    783         out = []
    784         append = out.append
    785         for char in m.group():
    786             text = map.get(char)
    787             if text is None:
    788                 text = "&#%d;" % ord(char)
    789             append(text)
    790         return string.join(out, "")
    791     try:
    792         return _encode(pattern.sub(escape_entities, text), "ascii")
    793     except TypeError:
    794         _raise_serialization_error(text)
    795 
    796 #
    797 # the following functions assume an ascii-compatible encoding
    798 # (or "utf-16")
    799 
    800 def _escape_cdata(text, encoding=None, replace=string.replace):
    801     # escape character data
    802     try:
    803         if encoding:
    804             try:
    805                 text = _encode(text, encoding)
    806             except UnicodeError:
    807                 return _encode_entity(text)
    808         text = replace(text, "&", "&amp;")
    809         text = replace(text, "<", "&lt;")
    810         text = replace(text, ">", "&gt;")
    811         return text
    812     except (TypeError, AttributeError):
    813         _raise_serialization_error(text)
    814 
    815 def _escape_attrib(text, encoding=None, replace=string.replace):
    816     # escape attribute value
    817     try:
    818         if encoding:
    819             try:
    820                 text = _encode(text, encoding)
    821             except UnicodeError:
    822                 return _encode_entity(text)
    823         text = replace(text, "&", "&amp;")
    824         text = replace(text, "'", "&apos;") # FIXME: overkill
    825         text = replace(text, "\"", "&quot;")
    826         text = replace(text, "<", "&lt;")
    827         text = replace(text, ">", "&gt;")
    828         return text
    829     except (TypeError, AttributeError):
    830         _raise_serialization_error(text)
    831 
    832 def fixtag(tag, namespaces):
    833     # given a decorated tag (of the form {uri}tag), return prefixed
    834     # tag and namespace declaration, if any
    835     if isinstance(tag, QName):
    836         tag = tag.text
    837     namespace_uri, tag = string.split(tag[1:], "}", 1)
    838     prefix = namespaces.get(namespace_uri)
    839     if prefix is None:
    840         prefix = _namespace_map.get(namespace_uri)
    841         if prefix is None:
    842             prefix = "ns%d" % len(namespaces)
    843         namespaces[namespace_uri] = prefix
    844         if prefix == "xml":
    845             xmlns = None
    846         else:
    847             xmlns = ("xmlns:%s" % prefix, namespace_uri)
    848     else:
    849         xmlns = None
    850     return "%s:%s" % (prefix, tag), xmlns
     1169# --------------------------------------------------------------------
     1170# parsing
    8511171
    8521172##
     
    8551175# @param source A filename or file object containing XML data.
    8561176# @param parser An optional parser instance.  If not given, the
    857 #     standard {@link XMLTreeBuilder} parser is used.
     1177#     standard {@link XMLParser} parser is used.
    8581178# @return An ElementTree instance
    8591179
     
    8701190# @param events A list of events to report back.  If omitted, only "end"
    8711191#     events are reported.
     1192# @param parser An optional parser instance.  If not given, the
     1193#     standard {@link XMLParser} parser is used.
    8721194# @return A (event, elem) iterator.
    8731195
    874 class iterparse:
    875 
    876     def __init__(self, source, events=None):
    877         if not hasattr(source, "read"):
    878             source = open(source, "rb")
     1196def iterparse(source, events=None, parser=None):
     1197    close_source = False
     1198    if not hasattr(source, "read"):
     1199        source = open(source, "rb")
     1200        close_source = True
     1201    if not parser:
     1202        parser = XMLParser(target=TreeBuilder())
     1203    return _IterParseIterator(source, events, parser, close_source)
     1204
     1205class _IterParseIterator(object):
     1206
     1207    def __init__(self, source, events, parser, close_source=False):
    8791208        self._file = source
     1209        self._close_file = close_source
    8801210        self._events = []
    8811211        self._index = 0
     1212        self._error = None
    8821213        self.root = self._root = None
    883         self._parser = XMLTreeBuilder()
     1214        self._parser = parser
    8841215        # wire up the parser for event reporting
    8851216        parser = self._parser._parser
     
    9091240                def handler(prefix, uri, event=event, append=append):
    9101241                    try:
    911                         uri = _encode(uri, "ascii")
     1242                        uri = (uri or "").encode("ascii")
    9121243                    except UnicodeError:
    9131244                        pass
    914                     append((event, (prefix or "", uri)))
     1245                    append((event, (prefix or "", uri or "")))
    9151246                parser.StartNamespaceDeclHandler = handler
    9161247            elif event == "end-ns":
     
    9181249                    append((event, None))
    9191250                parser.EndNamespaceDeclHandler = handler
     1251            else:
     1252                raise ValueError("unknown event %r" % event)
    9201253
    9211254    def next(self):
     
    9231256            try:
    9241257                item = self._events[self._index]
     1258                self._index += 1
     1259                return item
    9251260            except IndexError:
    926                 if self._parser is None:
    927                     self.root = self._root
    928                     try:
    929                         raise StopIteration
    930                     except NameError:
    931                         raise IndexError
    932                 # load event buffer
    933                 del self._events[:]
    934                 self._index = 0
    935                 data = self._file.read(16384)
    936                 if data:
     1261                pass
     1262            if self._error:
     1263                e = self._error
     1264                self._error = None
     1265                raise e
     1266            if self._parser is None:
     1267                self.root = self._root
     1268                if self._close_file:
     1269                    self._file.close()
     1270                raise StopIteration
     1271            # load event buffer
     1272            del self._events[:]
     1273            self._index = 0
     1274            data = self._file.read(16384)
     1275            if data:
     1276                try:
    9371277                    self._parser.feed(data)
    938                 else:
    939                     self._root = self._parser.close()
    940                     self._parser = None
     1278                except SyntaxError as exc:
     1279                    self._error = exc
    9411280            else:
    942                 self._index = self._index + 1
    943                 return item
    944 
    945     try:
    946         iter
    947         def __iter__(self):
    948             return self
    949     except NameError:
    950         def __getitem__(self, index):
    951             return self.next()
     1281                self._root = self._parser.close()
     1282                self._parser = None
     1283
     1284    def __iter__(self):
     1285        return self
    9521286
    9531287##
     
    9561290#
    9571291# @param source A string containing XML data.
     1292# @param parser An optional parser instance.  If not given, the
     1293#     standard {@link XMLParser} parser is used.
    9581294# @return An Element instance.
    9591295# @defreturn Element
    9601296
    961 def XML(text):
    962     parser = XMLTreeBuilder()
     1297def XML(text, parser=None):
     1298    if not parser:
     1299        parser = XMLParser(target=TreeBuilder())
    9631300    parser.feed(text)
    9641301    return parser.close()
     
    9691306#
    9701307# @param source A string containing XML data.
     1308# @param parser An optional parser instance.  If not given, the
     1309#     standard {@link XMLParser} parser is used.
    9711310# @return A tuple containing an Element instance and a dictionary.
    9721311# @defreturn (Element, dictionary)
    9731312
    974 def XMLID(text):
    975     parser = XMLTreeBuilder()
     1313def XMLID(text, parser=None):
     1314    if not parser:
     1315        parser = XMLParser(target=TreeBuilder())
    9761316    parser.feed(text)
    9771317    tree = parser.close()
    9781318    ids = {}
    979     for elem in tree.getiterator():
     1319    for elem in tree.iter():
    9801320        id = elem.get("id")
    9811321        if id:
     
    9941334
    9951335##
    996 # Generates a string representation of an XML element, including all
    997 # subelements.
    998 #
    999 # @param element An Element instance.
    1000 # @return An encoded string containing the XML data.
    1001 # @defreturn string
    1002 
    1003 def tostring(element, encoding=None):
    1004     class dummy:
    1005         pass
    1006     data = []
    1007     file = dummy()
    1008     file.write = data.append
    1009     ElementTree(element).write(file, encoding)
    1010     return string.join(data, "")
     1336# Parses an XML document from a sequence of string fragments.
     1337#
     1338# @param sequence A list or other sequence containing XML data fragments.
     1339# @param parser An optional parser instance.  If not given, the
     1340#     standard {@link XMLParser} parser is used.
     1341# @return An Element instance.
     1342# @defreturn Element
     1343# @since 1.3
     1344
     1345def fromstringlist(sequence, parser=None):
     1346    if not parser:
     1347        parser = XMLParser(target=TreeBuilder())
     1348    for text in sequence:
     1349        parser.feed(text)
     1350    return parser.close()
     1351
     1352# --------------------------------------------------------------------
    10111353
    10121354##
     
    10211363#    is called to create new Element instances, as necessary.
    10221364
    1023 class TreeBuilder:
     1365class TreeBuilder(object):
    10241366
    10251367    def __init__(self, element_factory=None):
     
    10291371        self._tail = None # true if we're after an end tag
    10301372        if element_factory is None:
    1031             element_factory = _ElementInterface
     1373            element_factory = Element
    10321374        self._factory = element_factory
    10331375
    10341376    ##
    1035     # Flushes the parser buffers, and returns the toplevel documen
     1377    # Flushes the builder buffers, and returns the toplevel document
    10361378    # element.
    10371379    #
     
    10411383    def close(self):
    10421384        assert len(self._elem) == 0, "missing end tags"
    1043         assert self._last != None, "missing toplevel element"
     1385        assert self._last is not None, "missing toplevel element"
    10441386        return self._last
    10451387
     
    10471389        if self._data:
    10481390            if self._last is not None:
    1049                 text = string.join(self._data, "")
     1391                text = "".join(self._data)
    10501392                if self._tail:
    10511393                    assert self._last.tail is None, "internal error (tail)"
     
    11061448# @keyparam html Predefine HTML entities.  This flag is not supported
    11071449#     by the current implementation.
     1450# @keyparam encoding Optional encoding.  If given, the value overrides
     1451#     the encoding specified in the XML file.
    11081452# @see #ElementTree
    11091453# @see #TreeBuilder
    11101454
    1111 class XMLTreeBuilder:
    1112 
    1113     def __init__(self, html=0, target=None):
     1455class XMLParser(object):
     1456
     1457    def __init__(self, html=0, target=None, encoding=None):
    11141458        try:
    11151459            from xml.parsers import expat
    11161460        except ImportError:
    1117             raise ImportError(
    1118                 "No module named expat; use SimpleXMLTreeBuilder instead"
    1119                 )
    1120         self._parser = parser = expat.ParserCreate(None, "}")
     1461            try:
     1462                import pyexpat as expat
     1463            except ImportError:
     1464                raise ImportError(
     1465                    "No module named expat; use SimpleXMLTreeBuilder instead"
     1466                    )
     1467        parser = expat.ParserCreate(encoding, "}")
    11211468        if target is None:
    11221469            target = TreeBuilder()
    1123         self._target = target
     1470        # underscored names are provided for compatibility only
     1471        self.parser = self._parser = parser
     1472        self.target = self._target = target
     1473        self._error = expat.error
    11241474        self._names = {} # name memo cache
    11251475        # callbacks
     
    11281478        parser.EndElementHandler = self._end
    11291479        parser.CharacterDataHandler = self._data
     1480        # optional callbacks
     1481        parser.CommentHandler = self._comment
     1482        parser.ProcessingInstructionHandler = self._pi
    11301483        # let expat do the buffering, if supported
    11311484        try:
     
    11401493        except AttributeError:
    11411494            pass
    1142         encoding = None
    1143         if not parser.returns_unicode:
    1144             encoding = "utf-8"
    1145         # target.xml(encoding, None)
    11461495        self._doctype = None
    11471496        self.entity = {}
     1497        try:
     1498            self.version = "Expat %d.%d.%d" % expat.version_info
     1499        except AttributeError:
     1500            pass # unknown
     1501
     1502    def _raiseerror(self, value):
     1503        err = ParseError(value)
     1504        err.code = value.code
     1505        err.position = value.lineno, value.offset
     1506        raise err
    11481507
    11491508    def _fixtext(self, text):
    11501509        # convert text string to ascii, if possible
    11511510        try:
    1152             return _encode(text, "ascii")
     1511            return text.encode("ascii")
    11531512        except UnicodeError:
    11541513            return text
     
    11671526    def _start(self, tag, attrib_in):
    11681527        fixname = self._fixname
     1528        fixtext = self._fixtext
    11691529        tag = fixname(tag)
    11701530        attrib = {}
    11711531        for key, value in attrib_in.items():
    1172             attrib[fixname(key)] = self._fixtext(value)
    1173         return self._target.start(tag, attrib)
     1532            attrib[fixname(key)] = fixtext(value)
     1533        return self.target.start(tag, attrib)
    11741534
    11751535    def _start_list(self, tag, attrib_in):
    11761536        fixname = self._fixname
     1537        fixtext = self._fixtext
    11771538        tag = fixname(tag)
    11781539        attrib = {}
    11791540        if attrib_in:
    11801541            for i in range(0, len(attrib_in), 2):
    1181                 attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
    1182         return self._target.start(tag, attrib)
     1542                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
     1543        return self.target.start(tag, attrib)
    11831544
    11841545    def _data(self, text):
    1185         return self._target.data(self._fixtext(text))
     1546        return self.target.data(self._fixtext(text))
    11861547
    11871548    def _end(self, tag):
    1188         return self._target.end(self._fixname(tag))
     1549        return self.target.end(self._fixname(tag))
     1550
     1551    def _comment(self, data):
     1552        try:
     1553            comment = self.target.comment
     1554        except AttributeError:
     1555            pass
     1556        else:
     1557            return comment(self._fixtext(data))
     1558
     1559    def _pi(self, target, data):
     1560        try:
     1561            pi = self.target.pi
     1562        except AttributeError:
     1563            pass
     1564        else:
     1565            return pi(self._fixtext(target), self._fixtext(data))
    11891566
    11901567    def _default(self, text):
     
    11931570            # deal with undefined entities
    11941571            try:
    1195                 self._target.data(self.entity[text[1:-1]])
     1572                self.target.data(self.entity[text[1:-1]])
    11961573            except KeyError:
    11971574                from xml.parsers import expat
    1198                 raise expat.error(
     1575                err = expat.error(
    11991576                    "undefined entity %s: line %d, column %d" %
    12001577                    (text, self._parser.ErrorLineNumber,
    12011578                    self._parser.ErrorColumnNumber)
    12021579                    )
     1580                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
     1581                err.lineno = self._parser.ErrorLineNumber
     1582                err.offset = self._parser.ErrorColumnNumber
     1583                raise err
    12031584        elif prefix == "<" and text[:9] == "<!DOCTYPE":
    12041585            self._doctype = [] # inside a doctype declaration
     
    12081589                self._doctype = None
    12091590                return
    1210             text = string.strip(text)
     1591            text = text.strip()
    12111592            if not text:
    12121593                return
     
    12241605                if pubid:
    12251606                    pubid = pubid[1:-1]
    1226                 self.doctype(name, pubid, system[1:-1])
     1607                if hasattr(self.target, "doctype"):
     1608                    self.target.doctype(name, pubid, system[1:-1])
     1609                elif self.doctype is not self._XMLParser__doctype:
     1610                    # warn about deprecated call
     1611                    self._XMLParser__doctype(name, pubid, system[1:-1])
     1612                    self.doctype(name, pubid, system[1:-1])
    12271613                self._doctype = None
    12281614
    12291615    ##
    1230     # Handles a doctype declaration.
     1616    # (Deprecated) Handles a doctype declaration.
    12311617    #
    12321618    # @param name Doctype name.
     
    12351621
    12361622    def doctype(self, name, pubid, system):
    1237         pass
     1623        """This method of XMLParser is deprecated."""
     1624        warnings.warn(
     1625            "This method of XMLParser is deprecated.  Define doctype() "
     1626            "method on the TreeBuilder target.",
     1627            DeprecationWarning,
     1628            )
     1629
     1630    # sentinel, if doctype is redefined in a subclass
     1631    __doctype = doctype
    12381632
    12391633    ##
     
    12431637
    12441638    def feed(self, data):
    1245         self._parser.Parse(data, 0)
     1639        try:
     1640            self._parser.Parse(data, 0)
     1641        except self._error, v:
     1642            self._raiseerror(v)
    12461643
    12471644    ##
     
    12521649
    12531650    def close(self):
    1254         self._parser.Parse("", 1) # end of data
    1255         tree = self._target.close()
    1256         del self._target, self._parser # get rid of circular references
     1651        try:
     1652            self._parser.Parse("", 1) # end of data
     1653        except self._error, v:
     1654            self._raiseerror(v)
     1655        tree = self.target.close()
     1656        del self.target, self._parser # get rid of circular references
    12571657        return tree
    12581658
    12591659# compatibility
    1260 XMLParser = XMLTreeBuilder
     1660XMLTreeBuilder = XMLParser
     1661
     1662# workaround circular import.
     1663try:
     1664    from ElementC14N import _serialize_c14n
     1665    _serialize["c14n"] = _serialize_c14n
     1666except ImportError:
     1667    pass
  • python/trunk/Lib/xml/etree/__init__.py

    r2 r391  
    1 # $Id: __init__.py 1821 2004-06-03 16:57:49Z fredrik $
     1# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $
    22# elementtree package
    33
     
    55# The ElementTree toolkit is
    66#
    7 # Copyright (c) 1999-2004 by Fredrik Lundh
     7# Copyright (c) 1999-2008 by Fredrik Lundh
    88#
    99# By obtaining, using, and/or copying this software and/or its
     
    3131
    3232# Licensed to PSF under a Contributor Agreement.
    33 # See http://www.python.org/2.4/license for licensing details.
     33# See http://www.python.org/psf/license for licensing details.
Note: See TracChangeset for help on using the changeset viewer.