root / ncclient / ElementTree.py @ 53e1a046
History | View | Annotate | Download (43.1 kB)
1 | 53e1a046 | Leonidas Poulopoulos | #
|
---|---|---|---|
2 | 53e1a046 | Leonidas Poulopoulos | # ElementTree
|
3 | 53e1a046 | Leonidas Poulopoulos | # $Id: ElementTree.py 3224 2007-08-27 21:23:39Z fredrik $
|
4 | 53e1a046 | Leonidas Poulopoulos | #
|
5 | 53e1a046 | Leonidas Poulopoulos | # light-weight XML support for Python 1.5.2 and later.
|
6 | 53e1a046 | Leonidas Poulopoulos | #
|
7 | 53e1a046 | Leonidas Poulopoulos | # history:
|
8 | 53e1a046 | Leonidas Poulopoulos | # 2001-10-20 fl created (from various sources)
|
9 | 53e1a046 | Leonidas Poulopoulos | # 2001-11-01 fl return root from parse method
|
10 | 53e1a046 | Leonidas Poulopoulos | # 2002-02-16 fl sort attributes in lexical order
|
11 | 53e1a046 | Leonidas Poulopoulos | # 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup
|
12 | 53e1a046 | Leonidas Poulopoulos | # 2002-05-01 fl finished TreeBuilder refactoring
|
13 | 53e1a046 | Leonidas Poulopoulos | # 2002-07-14 fl added basic namespace support to ElementTree.write
|
14 | 53e1a046 | Leonidas Poulopoulos | # 2002-07-25 fl added QName attribute support
|
15 | 53e1a046 | Leonidas Poulopoulos | # 2002-10-20 fl fixed encoding in write
|
16 | 53e1a046 | Leonidas Poulopoulos | # 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding
|
17 | 53e1a046 | Leonidas Poulopoulos | # 2002-11-27 fl accept file objects or file names for parse/write
|
18 | 53e1a046 | Leonidas Poulopoulos | # 2002-12-04 fl moved XMLTreeBuilder back to this module
|
19 | 53e1a046 | Leonidas Poulopoulos | # 2003-01-11 fl fixed entity encoding glitch for us-ascii
|
20 | 53e1a046 | Leonidas Poulopoulos | # 2003-02-13 fl added XML literal factory
|
21 | 53e1a046 | Leonidas Poulopoulos | # 2003-02-21 fl added ProcessingInstruction/PI factory
|
22 | 53e1a046 | Leonidas Poulopoulos | # 2003-05-11 fl added tostring/fromstring helpers
|
23 | 53e1a046 | Leonidas Poulopoulos | # 2003-05-26 fl added ElementPath support
|
24 | 53e1a046 | Leonidas Poulopoulos | # 2003-07-05 fl added makeelement factory method
|
25 | 53e1a046 | Leonidas Poulopoulos | # 2003-07-28 fl added more well-known namespace prefixes
|
26 | 53e1a046 | Leonidas Poulopoulos | # 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch)
|
27 | 53e1a046 | Leonidas Poulopoulos | # 2003-09-04 fl fall back on emulator if ElementPath is not installed
|
28 | 53e1a046 | Leonidas Poulopoulos | # 2003-10-31 fl markup updates
|
29 | 53e1a046 | Leonidas Poulopoulos | # 2003-11-15 fl fixed nested namespace bug
|
30 | 53e1a046 | Leonidas Poulopoulos | # 2004-03-28 fl added XMLID helper
|
31 | 53e1a046 | Leonidas Poulopoulos | # 2004-06-02 fl added default support to findtext
|
32 | 53e1a046 | Leonidas Poulopoulos | # 2004-06-08 fl fixed encoding of non-ascii element/attribute names
|
33 | 53e1a046 | Leonidas Poulopoulos | # 2004-08-23 fl take advantage of post-2.1 expat features
|
34 | 53e1a046 | Leonidas Poulopoulos | # 2005-02-01 fl added iterparse implementation
|
35 | 53e1a046 | Leonidas Poulopoulos | # 2005-03-02 fl fixed iterparse support for pre-2.2 versions
|
36 | 53e1a046 | Leonidas Poulopoulos | # 2006-11-18 fl added parser support for IronPython (ElementIron)
|
37 | 53e1a046 | Leonidas Poulopoulos | # 2007-08-27 fl fixed newlines in attributes
|
38 | 53e1a046 | Leonidas Poulopoulos | #
|
39 | 53e1a046 | Leonidas Poulopoulos | # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
|
40 | 53e1a046 | Leonidas Poulopoulos | #
|
41 | 53e1a046 | Leonidas Poulopoulos | # fredrik@pythonware.com
|
42 | 53e1a046 | Leonidas Poulopoulos | # http://www.pythonware.com
|
43 | 53e1a046 | Leonidas Poulopoulos | #
|
44 | 53e1a046 | Leonidas Poulopoulos | # --------------------------------------------------------------------
|
45 | 53e1a046 | Leonidas Poulopoulos | # The ElementTree toolkit is
|
46 | 53e1a046 | Leonidas Poulopoulos | #
|
47 | 53e1a046 | Leonidas Poulopoulos | # Copyright (c) 1999-2007 by Fredrik Lundh
|
48 | 53e1a046 | Leonidas Poulopoulos | #
|
49 | 53e1a046 | Leonidas Poulopoulos | # By obtaining, using, and/or copying this software and/or its
|
50 | 53e1a046 | Leonidas Poulopoulos | # associated documentation, you agree that you have read, understood,
|
51 | 53e1a046 | Leonidas Poulopoulos | # and will comply with the following terms and conditions:
|
52 | 53e1a046 | Leonidas Poulopoulos | #
|
53 | 53e1a046 | Leonidas Poulopoulos | # Permission to use, copy, modify, and distribute this software and
|
54 | 53e1a046 | Leonidas Poulopoulos | # its associated documentation for any purpose and without fee is
|
55 | 53e1a046 | Leonidas Poulopoulos | # hereby granted, provided that the above copyright notice appears in
|
56 | 53e1a046 | Leonidas Poulopoulos | # all copies, and that both that copyright notice and this permission
|
57 | 53e1a046 | Leonidas Poulopoulos | # notice appear in supporting documentation, and that the name of
|
58 | 53e1a046 | Leonidas Poulopoulos | # Secret Labs AB or the author not be used in advertising or publicity
|
59 | 53e1a046 | Leonidas Poulopoulos | # pertaining to distribution of the software without specific, written
|
60 | 53e1a046 | Leonidas Poulopoulos | # prior permission.
|
61 | 53e1a046 | Leonidas Poulopoulos | #
|
62 | 53e1a046 | Leonidas Poulopoulos | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
63 | 53e1a046 | Leonidas Poulopoulos | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
64 | 53e1a046 | Leonidas Poulopoulos | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
65 | 53e1a046 | Leonidas Poulopoulos | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
66 | 53e1a046 | Leonidas Poulopoulos | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
67 | 53e1a046 | Leonidas Poulopoulos | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
68 | 53e1a046 | Leonidas Poulopoulos | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
69 | 53e1a046 | Leonidas Poulopoulos | # OF THIS SOFTWARE.
|
70 | 53e1a046 | Leonidas Poulopoulos | # --------------------------------------------------------------------
|
71 | 53e1a046 | Leonidas Poulopoulos | |
72 | 53e1a046 | Leonidas Poulopoulos | __all__ = [ |
73 | 53e1a046 | Leonidas Poulopoulos | # public symbols
|
74 | 53e1a046 | Leonidas Poulopoulos | "Comment",
|
75 | 53e1a046 | Leonidas Poulopoulos | "dump",
|
76 | 53e1a046 | Leonidas Poulopoulos | "Element", "ElementTree", |
77 | 53e1a046 | Leonidas Poulopoulos | "fromstring",
|
78 | 53e1a046 | Leonidas Poulopoulos | "iselement", "iterparse", |
79 | 53e1a046 | Leonidas Poulopoulos | "parse",
|
80 | 53e1a046 | Leonidas Poulopoulos | "PI", "ProcessingInstruction", |
81 | 53e1a046 | Leonidas Poulopoulos | "QName",
|
82 | 53e1a046 | Leonidas Poulopoulos | "SubElement",
|
83 | 53e1a046 | Leonidas Poulopoulos | "tostring",
|
84 | 53e1a046 | Leonidas Poulopoulos | "TreeBuilder",
|
85 | 53e1a046 | Leonidas Poulopoulos | "VERSION", "XML", |
86 | 53e1a046 | Leonidas Poulopoulos | "XMLTreeBuilder",
|
87 | 53e1a046 | Leonidas Poulopoulos | ] |
88 | 53e1a046 | Leonidas Poulopoulos | |
89 | 53e1a046 | Leonidas Poulopoulos | # parser api override (None = use default)
|
90 | 53e1a046 | Leonidas Poulopoulos | parser_api = None
|
91 | 53e1a046 | Leonidas Poulopoulos | |
92 | 53e1a046 | Leonidas Poulopoulos | # TODO: add support for custom namespace resolvers/default namespaces
|
93 | 53e1a046 | Leonidas Poulopoulos | # TODO: add improved support for incremental parsing
|
94 | 53e1a046 | Leonidas Poulopoulos | |
95 | 53e1a046 | Leonidas Poulopoulos | VERSION = "1.2.7"
|
96 | 53e1a046 | Leonidas Poulopoulos | |
97 | 53e1a046 | Leonidas Poulopoulos | ##
|
98 | 53e1a046 | Leonidas Poulopoulos | # The <b>Element</b> type is a flexible container object, designed to
|
99 | 53e1a046 | Leonidas Poulopoulos | # store hierarchical data structures in memory. The type can be
|
100 | 53e1a046 | Leonidas Poulopoulos | # described as a cross between a list and a dictionary.
|
101 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
102 | 53e1a046 | Leonidas Poulopoulos | # Each element has a number of properties associated with it:
|
103 | 53e1a046 | Leonidas Poulopoulos | # <ul>
|
104 | 53e1a046 | Leonidas Poulopoulos | # <li>a <i>tag</i>. This is a string identifying what kind of data
|
105 | 53e1a046 | Leonidas Poulopoulos | # this element represents (the element type, in other words).</li>
|
106 | 53e1a046 | Leonidas Poulopoulos | # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
|
107 | 53e1a046 | Leonidas Poulopoulos | # <li>a <i>text</i> string.</li>
|
108 | 53e1a046 | Leonidas Poulopoulos | # <li>an optional <i>tail</i> string.</li>
|
109 | 53e1a046 | Leonidas Poulopoulos | # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
|
110 | 53e1a046 | Leonidas Poulopoulos | # </ul>
|
111 | 53e1a046 | Leonidas Poulopoulos | #
|
112 | 53e1a046 | Leonidas Poulopoulos | # To create an element instance, use the {@link #Element} or {@link
|
113 | 53e1a046 | Leonidas Poulopoulos | # #SubElement} factory functions.
|
114 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
115 | 53e1a046 | Leonidas Poulopoulos | # The {@link #ElementTree} class can be used to wrap an element
|
116 | 53e1a046 | Leonidas Poulopoulos | # structure, and convert it from and to XML.
|
117 | 53e1a046 | Leonidas Poulopoulos | ##
|
118 | 53e1a046 | Leonidas Poulopoulos | |
119 | 53e1a046 | Leonidas Poulopoulos | |
120 | 53e1a046 | Leonidas Poulopoulos | import sys, re |
121 | 53e1a046 | Leonidas Poulopoulos | |
122 | 53e1a046 | Leonidas Poulopoulos | try:
|
123 | 53e1a046 | Leonidas Poulopoulos | import string |
124 | 53e1a046 | Leonidas Poulopoulos | except:
|
125 | 53e1a046 | Leonidas Poulopoulos | # emulate string module under IronPython
|
126 | 53e1a046 | Leonidas Poulopoulos | class string(object): |
127 | 53e1a046 | Leonidas Poulopoulos | def join(self, seq, sep): |
128 | 53e1a046 | Leonidas Poulopoulos | return sep.join(seq)
|
129 | 53e1a046 | Leonidas Poulopoulos | def replace(self, text, *args): |
130 | 53e1a046 | Leonidas Poulopoulos | return text.replace(*args)
|
131 | 53e1a046 | Leonidas Poulopoulos | def split(self, text, *args): |
132 | 53e1a046 | Leonidas Poulopoulos | return text.split(*args)
|
133 | 53e1a046 | Leonidas Poulopoulos | def strip(self, text, *args): |
134 | 53e1a046 | Leonidas Poulopoulos | return text.strip(*args)
|
135 | 53e1a046 | Leonidas Poulopoulos | string = string() |
136 | 53e1a046 | Leonidas Poulopoulos | |
137 | 53e1a046 | Leonidas Poulopoulos | class _SimpleElementPath: |
138 | 53e1a046 | Leonidas Poulopoulos | # emulate pre-1.2 find/findtext/findall behaviour
|
139 | 53e1a046 | Leonidas Poulopoulos | def find(self, element, tag): |
140 | 53e1a046 | Leonidas Poulopoulos | for elem in element: |
141 | 53e1a046 | Leonidas Poulopoulos | if elem.tag == tag:
|
142 | 53e1a046 | Leonidas Poulopoulos | return elem
|
143 | 53e1a046 | Leonidas Poulopoulos | return None |
144 | 53e1a046 | Leonidas Poulopoulos | def findtext(self, element, tag, default=None): |
145 | 53e1a046 | Leonidas Poulopoulos | for elem in element: |
146 | 53e1a046 | Leonidas Poulopoulos | if elem.tag == tag:
|
147 | 53e1a046 | Leonidas Poulopoulos | return elem.text or "" |
148 | 53e1a046 | Leonidas Poulopoulos | return default
|
149 | 53e1a046 | Leonidas Poulopoulos | def findall(self, element, tag): |
150 | 53e1a046 | Leonidas Poulopoulos | if tag[:3] == ".//": |
151 | 53e1a046 | Leonidas Poulopoulos | return element.getiterator(tag[3:]) |
152 | 53e1a046 | Leonidas Poulopoulos | result = [] |
153 | 53e1a046 | Leonidas Poulopoulos | for elem in element: |
154 | 53e1a046 | Leonidas Poulopoulos | if elem.tag == tag:
|
155 | 53e1a046 | Leonidas Poulopoulos | result.append(elem) |
156 | 53e1a046 | Leonidas Poulopoulos | return result
|
157 | 53e1a046 | Leonidas Poulopoulos | |
158 | 53e1a046 | Leonidas Poulopoulos | try:
|
159 | 53e1a046 | Leonidas Poulopoulos | import ElementPath |
160 | 53e1a046 | Leonidas Poulopoulos | except ImportError: |
161 | 53e1a046 | Leonidas Poulopoulos | # FIXME: issue warning in this case?
|
162 | 53e1a046 | Leonidas Poulopoulos | ElementPath = _SimpleElementPath() |
163 | 53e1a046 | Leonidas Poulopoulos | |
164 | 53e1a046 | Leonidas Poulopoulos | class DefaultParserAPI: |
165 | 53e1a046 | Leonidas Poulopoulos | |
166 | 53e1a046 | Leonidas Poulopoulos | def parse(self, source, parser=None): |
167 | 53e1a046 | Leonidas Poulopoulos | if not hasattr(source, "read"): |
168 | 53e1a046 | Leonidas Poulopoulos | source = open(source, "rb") |
169 | 53e1a046 | Leonidas Poulopoulos | if not parser: |
170 | 53e1a046 | Leonidas Poulopoulos | parser = XMLTreeBuilder() |
171 | 53e1a046 | Leonidas Poulopoulos | while 1: |
172 | 53e1a046 | Leonidas Poulopoulos | data = source.read(32768)
|
173 | 53e1a046 | Leonidas Poulopoulos | if not data: |
174 | 53e1a046 | Leonidas Poulopoulos | break
|
175 | 53e1a046 | Leonidas Poulopoulos | parser.feed(data) |
176 | 53e1a046 | Leonidas Poulopoulos | return parser.close()
|
177 | 53e1a046 | Leonidas Poulopoulos | |
178 | 53e1a046 | Leonidas Poulopoulos | def iterparse(self, source, events): |
179 | 53e1a046 | Leonidas Poulopoulos | return _iterparse(source, events)
|
180 | 53e1a046 | Leonidas Poulopoulos | |
181 | 53e1a046 | Leonidas Poulopoulos | def fromstring(self, text): |
182 | 53e1a046 | Leonidas Poulopoulos | parser = XMLTreeBuilder() |
183 | 53e1a046 | Leonidas Poulopoulos | parser.feed(text) |
184 | 53e1a046 | Leonidas Poulopoulos | return parser.close()
|
185 | 53e1a046 | Leonidas Poulopoulos | |
186 | 53e1a046 | Leonidas Poulopoulos | parser_api = default_parser_api = DefaultParserAPI() |
187 | 53e1a046 | Leonidas Poulopoulos | |
188 | 53e1a046 | Leonidas Poulopoulos | ##
|
189 | 53e1a046 | Leonidas Poulopoulos | # Internal element class. This class defines the Element interface,
|
190 | 53e1a046 | Leonidas Poulopoulos | # and provides a reference implementation of this interface.
|
191 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
192 | 53e1a046 | Leonidas Poulopoulos | # You should not create instances of this class directly. Use the
|
193 | 53e1a046 | Leonidas Poulopoulos | # appropriate factory functions instead, such as {@link #Element}
|
194 | 53e1a046 | Leonidas Poulopoulos | # and {@link #SubElement}.
|
195 | 53e1a046 | Leonidas Poulopoulos | #
|
196 | 53e1a046 | Leonidas Poulopoulos | # @see Element
|
197 | 53e1a046 | Leonidas Poulopoulos | # @see SubElement
|
198 | 53e1a046 | Leonidas Poulopoulos | # @see Comment
|
199 | 53e1a046 | Leonidas Poulopoulos | # @see ProcessingInstruction
|
200 | 53e1a046 | Leonidas Poulopoulos | |
201 | 53e1a046 | Leonidas Poulopoulos | class _ElementInterface: |
202 | 53e1a046 | Leonidas Poulopoulos | # <tag attrib>text<child/>...</tag>tail
|
203 | 53e1a046 | Leonidas Poulopoulos | |
204 | 53e1a046 | Leonidas Poulopoulos | ##
|
205 | 53e1a046 | Leonidas Poulopoulos | # (Attribute) Element tag.
|
206 | 53e1a046 | Leonidas Poulopoulos | |
207 | 53e1a046 | Leonidas Poulopoulos | tag = None
|
208 | 53e1a046 | Leonidas Poulopoulos | |
209 | 53e1a046 | Leonidas Poulopoulos | ##
|
210 | 53e1a046 | Leonidas Poulopoulos | # (Attribute) Element attribute dictionary. Where possible, use
|
211 | 53e1a046 | Leonidas Poulopoulos | # {@link #_ElementInterface.get},
|
212 | 53e1a046 | Leonidas Poulopoulos | # {@link #_ElementInterface.set},
|
213 | 53e1a046 | Leonidas Poulopoulos | # {@link #_ElementInterface.keys}, and
|
214 | 53e1a046 | Leonidas Poulopoulos | # {@link #_ElementInterface.items} to access
|
215 | 53e1a046 | Leonidas Poulopoulos | # element attributes.
|
216 | 53e1a046 | Leonidas Poulopoulos | |
217 | 53e1a046 | Leonidas Poulopoulos | attrib = None
|
218 | 53e1a046 | Leonidas Poulopoulos | |
219 | 53e1a046 | Leonidas Poulopoulos | ##
|
220 | 53e1a046 | Leonidas Poulopoulos | # (Attribute) Text before first subelement. This is either a
|
221 | 53e1a046 | Leonidas Poulopoulos | # string or the value None, if there was no text.
|
222 | 53e1a046 | Leonidas Poulopoulos | |
223 | 53e1a046 | Leonidas Poulopoulos | text = None
|
224 | 53e1a046 | Leonidas Poulopoulos | |
225 | 53e1a046 | Leonidas Poulopoulos | ##
|
226 | 53e1a046 | Leonidas Poulopoulos | # (Attribute) Text after this element's end tag, but before the
|
227 | 53e1a046 | Leonidas Poulopoulos | # next sibling element's start tag. This is either a string or
|
228 | 53e1a046 | Leonidas Poulopoulos | # the value None, if there was no text.
|
229 | 53e1a046 | Leonidas Poulopoulos | |
230 | 53e1a046 | Leonidas Poulopoulos | tail = None # text after end tag, if any |
231 | 53e1a046 | Leonidas Poulopoulos | |
232 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, tag, attrib): |
233 | 53e1a046 | Leonidas Poulopoulos | self.tag = tag
|
234 | 53e1a046 | Leonidas Poulopoulos | self.attrib = attrib
|
235 | 53e1a046 | Leonidas Poulopoulos | self._children = []
|
236 | 53e1a046 | Leonidas Poulopoulos | |
237 | 53e1a046 | Leonidas Poulopoulos | def __repr__(self): |
238 | 53e1a046 | Leonidas Poulopoulos | return "<Element %s at %x>" % (self.tag, id(self)) |
239 | 53e1a046 | Leonidas Poulopoulos | |
240 | 53e1a046 | Leonidas Poulopoulos | ##
|
241 | 53e1a046 | Leonidas Poulopoulos | # Creates a new element object of the same type as this element.
|
242 | 53e1a046 | Leonidas Poulopoulos | #
|
243 | 53e1a046 | Leonidas Poulopoulos | # @param tag Element tag.
|
244 | 53e1a046 | Leonidas Poulopoulos | # @param attrib Element attributes, given as a dictionary.
|
245 | 53e1a046 | Leonidas Poulopoulos | # @return A new element instance.
|
246 | 53e1a046 | Leonidas Poulopoulos | |
247 | 53e1a046 | Leonidas Poulopoulos | def makeelement(self, tag, attrib): |
248 | 53e1a046 | Leonidas Poulopoulos | return Element(tag, attrib)
|
249 | 53e1a046 | Leonidas Poulopoulos | |
250 | 53e1a046 | Leonidas Poulopoulos | ##
|
251 | 53e1a046 | Leonidas Poulopoulos | # Returns the number of subelements.
|
252 | 53e1a046 | Leonidas Poulopoulos | #
|
253 | 53e1a046 | Leonidas Poulopoulos | # @return The number of subelements.
|
254 | 53e1a046 | Leonidas Poulopoulos | |
255 | 53e1a046 | Leonidas Poulopoulos | def __len__(self): |
256 | 53e1a046 | Leonidas Poulopoulos | return len(self._children) |
257 | 53e1a046 | Leonidas Poulopoulos | |
258 | 53e1a046 | Leonidas Poulopoulos | ##
|
259 | 53e1a046 | Leonidas Poulopoulos | # Returns the given subelement.
|
260 | 53e1a046 | Leonidas Poulopoulos | #
|
261 | 53e1a046 | Leonidas Poulopoulos | # @param index What subelement to return.
|
262 | 53e1a046 | Leonidas Poulopoulos | # @return The given subelement.
|
263 | 53e1a046 | Leonidas Poulopoulos | # @exception IndexError If the given element does not exist.
|
264 | 53e1a046 | Leonidas Poulopoulos | |
265 | 53e1a046 | Leonidas Poulopoulos | def __getitem__(self, index): |
266 | 53e1a046 | Leonidas Poulopoulos | return self._children[index] |
267 | 53e1a046 | Leonidas Poulopoulos | |
268 | 53e1a046 | Leonidas Poulopoulos | ##
|
269 | 53e1a046 | Leonidas Poulopoulos | # Replaces the given subelement.
|
270 | 53e1a046 | Leonidas Poulopoulos | #
|
271 | 53e1a046 | Leonidas Poulopoulos | # @param index What subelement to replace.
|
272 | 53e1a046 | Leonidas Poulopoulos | # @param element The new element value.
|
273 | 53e1a046 | Leonidas Poulopoulos | # @exception IndexError If the given element does not exist.
|
274 | 53e1a046 | Leonidas Poulopoulos | # @exception AssertionError If element is not a valid object.
|
275 | 53e1a046 | Leonidas Poulopoulos | |
276 | 53e1a046 | Leonidas Poulopoulos | def __setitem__(self, index, element): |
277 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
278 | 53e1a046 | Leonidas Poulopoulos | self._children[index] = element
|
279 | 53e1a046 | Leonidas Poulopoulos | |
280 | 53e1a046 | Leonidas Poulopoulos | ##
|
281 | 53e1a046 | Leonidas Poulopoulos | # Deletes the given subelement.
|
282 | 53e1a046 | Leonidas Poulopoulos | #
|
283 | 53e1a046 | Leonidas Poulopoulos | # @param index What subelement to delete.
|
284 | 53e1a046 | Leonidas Poulopoulos | # @exception IndexError If the given element does not exist.
|
285 | 53e1a046 | Leonidas Poulopoulos | |
286 | 53e1a046 | Leonidas Poulopoulos | def __delitem__(self, index): |
287 | 53e1a046 | Leonidas Poulopoulos | del self._children[index] |
288 | 53e1a046 | Leonidas Poulopoulos | |
289 | 53e1a046 | Leonidas Poulopoulos | ##
|
290 | 53e1a046 | Leonidas Poulopoulos | # Returns a list containing subelements in the given range.
|
291 | 53e1a046 | Leonidas Poulopoulos | #
|
292 | 53e1a046 | Leonidas Poulopoulos | # @param start The first subelement to return.
|
293 | 53e1a046 | Leonidas Poulopoulos | # @param stop The first subelement that shouldn't be returned.
|
294 | 53e1a046 | Leonidas Poulopoulos | # @return A sequence object containing subelements.
|
295 | 53e1a046 | Leonidas Poulopoulos | |
296 | 53e1a046 | Leonidas Poulopoulos | def __getslice__(self, start, stop): |
297 | 53e1a046 | Leonidas Poulopoulos | return self._children[start:stop] |
298 | 53e1a046 | Leonidas Poulopoulos | |
299 | 53e1a046 | Leonidas Poulopoulos | ##
|
300 | 53e1a046 | Leonidas Poulopoulos | # Replaces a number of subelements with elements from a sequence.
|
301 | 53e1a046 | Leonidas Poulopoulos | #
|
302 | 53e1a046 | Leonidas Poulopoulos | # @param start The first subelement to replace.
|
303 | 53e1a046 | Leonidas Poulopoulos | # @param stop The first subelement that shouldn't be replaced.
|
304 | 53e1a046 | Leonidas Poulopoulos | # @param elements A sequence object with zero or more elements.
|
305 | 53e1a046 | Leonidas Poulopoulos | # @exception AssertionError If a sequence member is not a valid object.
|
306 | 53e1a046 | Leonidas Poulopoulos | |
307 | 53e1a046 | Leonidas Poulopoulos | def __setslice__(self, start, stop, elements): |
308 | 53e1a046 | Leonidas Poulopoulos | for element in elements: |
309 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
310 | 53e1a046 | Leonidas Poulopoulos | self._children[start:stop] = list(elements) |
311 | 53e1a046 | Leonidas Poulopoulos | |
312 | 53e1a046 | Leonidas Poulopoulos | ##
|
313 | 53e1a046 | Leonidas Poulopoulos | # Deletes a number of subelements.
|
314 | 53e1a046 | Leonidas Poulopoulos | #
|
315 | 53e1a046 | Leonidas Poulopoulos | # @param start The first subelement to delete.
|
316 | 53e1a046 | Leonidas Poulopoulos | # @param stop The first subelement to leave in there.
|
317 | 53e1a046 | Leonidas Poulopoulos | |
318 | 53e1a046 | Leonidas Poulopoulos | def __delslice__(self, start, stop): |
319 | 53e1a046 | Leonidas Poulopoulos | del self._children[start:stop] |
320 | 53e1a046 | Leonidas Poulopoulos | |
321 | 53e1a046 | Leonidas Poulopoulos | ##
|
322 | 53e1a046 | Leonidas Poulopoulos | # Adds a subelement to the end of this element.
|
323 | 53e1a046 | Leonidas Poulopoulos | #
|
324 | 53e1a046 | Leonidas Poulopoulos | # @param element The element to add.
|
325 | 53e1a046 | Leonidas Poulopoulos | # @exception AssertionError If a sequence member is not a valid object.
|
326 | 53e1a046 | Leonidas Poulopoulos | |
327 | 53e1a046 | Leonidas Poulopoulos | def append(self, element): |
328 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
329 | 53e1a046 | Leonidas Poulopoulos | self._children.append(element)
|
330 | 53e1a046 | Leonidas Poulopoulos | |
331 | 53e1a046 | Leonidas Poulopoulos | ##
|
332 | 53e1a046 | Leonidas Poulopoulos | # Inserts a subelement at the given position in this element.
|
333 | 53e1a046 | Leonidas Poulopoulos | #
|
334 | 53e1a046 | Leonidas Poulopoulos | # @param index Where to insert the new subelement.
|
335 | 53e1a046 | Leonidas Poulopoulos | # @exception AssertionError If the element is not a valid object.
|
336 | 53e1a046 | Leonidas Poulopoulos | |
337 | 53e1a046 | Leonidas Poulopoulos | def insert(self, index, element): |
338 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
339 | 53e1a046 | Leonidas Poulopoulos | self._children.insert(index, element)
|
340 | 53e1a046 | Leonidas Poulopoulos | |
341 | 53e1a046 | Leonidas Poulopoulos | ##
|
342 | 53e1a046 | Leonidas Poulopoulos | # Removes a matching subelement. Unlike the <b>find</b> methods,
|
343 | 53e1a046 | Leonidas Poulopoulos | # this method compares elements based on identity, not on tag
|
344 | 53e1a046 | Leonidas Poulopoulos | # value or contents.
|
345 | 53e1a046 | Leonidas Poulopoulos | #
|
346 | 53e1a046 | Leonidas Poulopoulos | # @param element What element to remove.
|
347 | 53e1a046 | Leonidas Poulopoulos | # @exception ValueError If a matching element could not be found.
|
348 | 53e1a046 | Leonidas Poulopoulos | # @exception AssertionError If the element is not a valid object.
|
349 | 53e1a046 | Leonidas Poulopoulos | |
350 | 53e1a046 | Leonidas Poulopoulos | def remove(self, element): |
351 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
352 | 53e1a046 | Leonidas Poulopoulos | self._children.remove(element)
|
353 | 53e1a046 | Leonidas Poulopoulos | |
354 | 53e1a046 | Leonidas Poulopoulos | ##
|
355 | 53e1a046 | Leonidas Poulopoulos | # Returns all subelements. The elements are returned in document
|
356 | 53e1a046 | Leonidas Poulopoulos | # order.
|
357 | 53e1a046 | Leonidas Poulopoulos | #
|
358 | 53e1a046 | Leonidas Poulopoulos | # @return A list of subelements.
|
359 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list of Element instances
|
360 | 53e1a046 | Leonidas Poulopoulos | |
361 | 53e1a046 | Leonidas Poulopoulos | def getchildren(self): |
362 | 53e1a046 | Leonidas Poulopoulos | return self._children |
363 | 53e1a046 | Leonidas Poulopoulos | |
364 | 53e1a046 | Leonidas Poulopoulos | ##
|
365 | 53e1a046 | Leonidas Poulopoulos | # Finds the first matching subelement, by tag name or path.
|
366 | 53e1a046 | Leonidas Poulopoulos | #
|
367 | 53e1a046 | Leonidas Poulopoulos | # @param path What element to look for.
|
368 | 53e1a046 | Leonidas Poulopoulos | # @return The first matching element, or None if no element was found.
|
369 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element or None
|
370 | 53e1a046 | Leonidas Poulopoulos | |
371 | 53e1a046 | Leonidas Poulopoulos | def find(self, path): |
372 | 53e1a046 | Leonidas Poulopoulos | return ElementPath.find(self, path) |
373 | 53e1a046 | Leonidas Poulopoulos | |
374 | 53e1a046 | Leonidas Poulopoulos | ##
|
375 | 53e1a046 | Leonidas Poulopoulos | # Finds text for the first matching subelement, by tag name or path.
|
376 | 53e1a046 | Leonidas Poulopoulos | #
|
377 | 53e1a046 | Leonidas Poulopoulos | # @param path What element to look for.
|
378 | 53e1a046 | Leonidas Poulopoulos | # @param default What to return if the element was not found.
|
379 | 53e1a046 | Leonidas Poulopoulos | # @return The text content of the first matching element, or the
|
380 | 53e1a046 | Leonidas Poulopoulos | # default value no element was found. Note that if the element
|
381 | 53e1a046 | Leonidas Poulopoulos | # has is found, but has no text content, this method returns an
|
382 | 53e1a046 | Leonidas Poulopoulos | # empty string.
|
383 | 53e1a046 | Leonidas Poulopoulos | # @defreturn string
|
384 | 53e1a046 | Leonidas Poulopoulos | |
385 | 53e1a046 | Leonidas Poulopoulos | def findtext(self, path, default=None): |
386 | 53e1a046 | Leonidas Poulopoulos | return ElementPath.findtext(self, path, default) |
387 | 53e1a046 | Leonidas Poulopoulos | |
388 | 53e1a046 | Leonidas Poulopoulos | ##
|
389 | 53e1a046 | Leonidas Poulopoulos | # Finds all matching subelements, by tag name or path.
|
390 | 53e1a046 | Leonidas Poulopoulos | #
|
391 | 53e1a046 | Leonidas Poulopoulos | # @param path What element to look for.
|
392 | 53e1a046 | Leonidas Poulopoulos | # @return A list or iterator containing all matching elements,
|
393 | 53e1a046 | Leonidas Poulopoulos | # in document order.
|
394 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list of Element instances
|
395 | 53e1a046 | Leonidas Poulopoulos | |
396 | 53e1a046 | Leonidas Poulopoulos | def findall(self, path): |
397 | 53e1a046 | Leonidas Poulopoulos | return ElementPath.findall(self, path) |
398 | 53e1a046 | Leonidas Poulopoulos | |
399 | 53e1a046 | Leonidas Poulopoulos | ##
|
400 | 53e1a046 | Leonidas Poulopoulos | # Resets an element. This function removes all subelements, clears
|
401 | 53e1a046 | Leonidas Poulopoulos | # all attributes, and sets the text and tail attributes to None.
|
402 | 53e1a046 | Leonidas Poulopoulos | |
403 | 53e1a046 | Leonidas Poulopoulos | def clear(self): |
404 | 53e1a046 | Leonidas Poulopoulos | self.attrib.clear()
|
405 | 53e1a046 | Leonidas Poulopoulos | self._children = []
|
406 | 53e1a046 | Leonidas Poulopoulos | self.text = self.tail = None |
407 | 53e1a046 | Leonidas Poulopoulos | |
408 | 53e1a046 | Leonidas Poulopoulos | ##
|
409 | 53e1a046 | Leonidas Poulopoulos | # Gets an element attribute.
|
410 | 53e1a046 | Leonidas Poulopoulos | #
|
411 | 53e1a046 | Leonidas Poulopoulos | # @param key What attribute to look for.
|
412 | 53e1a046 | Leonidas Poulopoulos | # @param default What to return if the attribute was not found.
|
413 | 53e1a046 | Leonidas Poulopoulos | # @return The attribute value, or the default value, if the
|
414 | 53e1a046 | Leonidas Poulopoulos | # attribute was not found.
|
415 | 53e1a046 | Leonidas Poulopoulos | # @defreturn string or None
|
416 | 53e1a046 | Leonidas Poulopoulos | |
417 | 53e1a046 | Leonidas Poulopoulos | def get(self, key, default=None): |
418 | 53e1a046 | Leonidas Poulopoulos | return self.attrib.get(key, default) |
419 | 53e1a046 | Leonidas Poulopoulos | |
420 | 53e1a046 | Leonidas Poulopoulos | ##
|
421 | 53e1a046 | Leonidas Poulopoulos | # Sets an element attribute.
|
422 | 53e1a046 | Leonidas Poulopoulos | #
|
423 | 53e1a046 | Leonidas Poulopoulos | # @param key What attribute to set.
|
424 | 53e1a046 | Leonidas Poulopoulos | # @param value The attribute value.
|
425 | 53e1a046 | Leonidas Poulopoulos | |
426 | 53e1a046 | Leonidas Poulopoulos | def set(self, key, value): |
427 | 53e1a046 | Leonidas Poulopoulos | self.attrib[key] = value
|
428 | 53e1a046 | Leonidas Poulopoulos | |
429 | 53e1a046 | Leonidas Poulopoulos | ##
|
430 | 53e1a046 | Leonidas Poulopoulos | # Gets a list of attribute names. The names are returned in an
|
431 | 53e1a046 | Leonidas Poulopoulos | # arbitrary order (just like for an ordinary Python dictionary).
|
432 | 53e1a046 | Leonidas Poulopoulos | #
|
433 | 53e1a046 | Leonidas Poulopoulos | # @return A list of element attribute names.
|
434 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list of strings
|
435 | 53e1a046 | Leonidas Poulopoulos | |
436 | 53e1a046 | Leonidas Poulopoulos | def keys(self): |
437 | 53e1a046 | Leonidas Poulopoulos | return self.attrib.keys() |
438 | 53e1a046 | Leonidas Poulopoulos | |
439 | 53e1a046 | Leonidas Poulopoulos | ##
|
440 | 53e1a046 | Leonidas Poulopoulos | # Gets element attributes, as a sequence. The attributes are
|
441 | 53e1a046 | Leonidas Poulopoulos | # returned in an arbitrary order.
|
442 | 53e1a046 | Leonidas Poulopoulos | #
|
443 | 53e1a046 | Leonidas Poulopoulos | # @return A list of (name, value) tuples for all attributes.
|
444 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list of (string, string) tuples
|
445 | 53e1a046 | Leonidas Poulopoulos | |
446 | 53e1a046 | Leonidas Poulopoulos | def items(self): |
447 | 53e1a046 | Leonidas Poulopoulos | return self.attrib.items() |
448 | 53e1a046 | Leonidas Poulopoulos | |
449 | 53e1a046 | Leonidas Poulopoulos | ##
|
450 | 53e1a046 | Leonidas Poulopoulos | # Creates a tree iterator. The iterator loops over this element
|
451 | 53e1a046 | Leonidas Poulopoulos | # and all subelements, in document order, and returns all elements
|
452 | 53e1a046 | Leonidas Poulopoulos | # with a matching tag.
|
453 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
454 | 53e1a046 | Leonidas Poulopoulos | # If the tree structure is modified during iteration, the result
|
455 | 53e1a046 | Leonidas Poulopoulos | # is undefined.
|
456 | 53e1a046 | Leonidas Poulopoulos | #
|
457 | 53e1a046 | Leonidas Poulopoulos | # @param tag What tags to look for (default is to return all elements).
|
458 | 53e1a046 | Leonidas Poulopoulos | # @return A list or iterator containing all the matching elements.
|
459 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list or iterator
|
460 | 53e1a046 | Leonidas Poulopoulos | |
461 | 53e1a046 | Leonidas Poulopoulos | def getiterator(self, tag=None): |
462 | 53e1a046 | Leonidas Poulopoulos | nodes = [] |
463 | 53e1a046 | Leonidas Poulopoulos | if tag == "*": |
464 | 53e1a046 | Leonidas Poulopoulos | tag = None
|
465 | 53e1a046 | Leonidas Poulopoulos | if tag is None or self.tag == tag: |
466 | 53e1a046 | Leonidas Poulopoulos | nodes.append(self)
|
467 | 53e1a046 | Leonidas Poulopoulos | for node in self._children: |
468 | 53e1a046 | Leonidas Poulopoulos | nodes.extend(node.getiterator(tag)) |
469 | 53e1a046 | Leonidas Poulopoulos | return nodes
|
470 | 53e1a046 | Leonidas Poulopoulos | |
471 | 53e1a046 | Leonidas Poulopoulos | # compatibility
|
472 | 53e1a046 | Leonidas Poulopoulos | _Element = _ElementInterface |
473 | 53e1a046 | Leonidas Poulopoulos | |
474 | 53e1a046 | Leonidas Poulopoulos | ##
|
475 | 53e1a046 | Leonidas Poulopoulos | # Element factory. This function returns an object implementing the
|
476 | 53e1a046 | Leonidas Poulopoulos | # standard Element interface. The exact class or type of that object
|
477 | 53e1a046 | Leonidas Poulopoulos | # is implementation dependent, but it will always be compatible with
|
478 | 53e1a046 | Leonidas Poulopoulos | # the {@link #_ElementInterface} class in this module.
|
479 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
480 | 53e1a046 | Leonidas Poulopoulos | # The element name, attribute names, and attribute values can be
|
481 | 53e1a046 | Leonidas Poulopoulos | # either 8-bit ASCII strings or Unicode strings.
|
482 | 53e1a046 | Leonidas Poulopoulos | #
|
483 | 53e1a046 | Leonidas Poulopoulos | # @param tag The element name.
|
484 | 53e1a046 | Leonidas Poulopoulos | # @param attrib An optional dictionary, containing element attributes.
|
485 | 53e1a046 | Leonidas Poulopoulos | # @param **extra Additional attributes, given as keyword arguments.
|
486 | 53e1a046 | Leonidas Poulopoulos | # @return An element instance.
|
487 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
488 | 53e1a046 | Leonidas Poulopoulos | |
489 | 53e1a046 | Leonidas Poulopoulos | def Element(tag, attrib={}, **extra): |
490 | 53e1a046 | Leonidas Poulopoulos | attrib = attrib.copy() |
491 | 53e1a046 | Leonidas Poulopoulos | attrib.update(extra) |
492 | 53e1a046 | Leonidas Poulopoulos | return _ElementInterface(tag, attrib)
|
493 | 53e1a046 | Leonidas Poulopoulos | |
494 | 53e1a046 | Leonidas Poulopoulos | ##
|
495 | 53e1a046 | Leonidas Poulopoulos | # Subelement factory. This function creates an element instance, and
|
496 | 53e1a046 | Leonidas Poulopoulos | # appends it to an existing element.
|
497 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
498 | 53e1a046 | Leonidas Poulopoulos | # The element name, attribute names, and attribute values can be
|
499 | 53e1a046 | Leonidas Poulopoulos | # either 8-bit ASCII strings or Unicode strings.
|
500 | 53e1a046 | Leonidas Poulopoulos | #
|
501 | 53e1a046 | Leonidas Poulopoulos | # @param parent The parent element.
|
502 | 53e1a046 | Leonidas Poulopoulos | # @param tag The subelement name.
|
503 | 53e1a046 | Leonidas Poulopoulos | # @param attrib An optional dictionary, containing element attributes.
|
504 | 53e1a046 | Leonidas Poulopoulos | # @param **extra Additional attributes, given as keyword arguments.
|
505 | 53e1a046 | Leonidas Poulopoulos | # @return An element instance.
|
506 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
507 | 53e1a046 | Leonidas Poulopoulos | |
508 | 53e1a046 | Leonidas Poulopoulos | def SubElement(parent, tag, attrib={}, **extra): |
509 | 53e1a046 | Leonidas Poulopoulos | attrib = attrib.copy() |
510 | 53e1a046 | Leonidas Poulopoulos | attrib.update(extra) |
511 | 53e1a046 | Leonidas Poulopoulos | element = parent.makeelement(tag, attrib) |
512 | 53e1a046 | Leonidas Poulopoulos | parent.append(element) |
513 | 53e1a046 | Leonidas Poulopoulos | return element
|
514 | 53e1a046 | Leonidas Poulopoulos | |
515 | 53e1a046 | Leonidas Poulopoulos | ##
|
516 | 53e1a046 | Leonidas Poulopoulos | # Comment element factory. This factory function creates a special
|
517 | 53e1a046 | Leonidas Poulopoulos | # element that will be serialized as an XML comment.
|
518 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
519 | 53e1a046 | Leonidas Poulopoulos | # The comment string can be either an 8-bit ASCII string or a Unicode
|
520 | 53e1a046 | Leonidas Poulopoulos | # string.
|
521 | 53e1a046 | Leonidas Poulopoulos | #
|
522 | 53e1a046 | Leonidas Poulopoulos | # @param text A string containing the comment string.
|
523 | 53e1a046 | Leonidas Poulopoulos | # @return An element instance, representing a comment.
|
524 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
525 | 53e1a046 | Leonidas Poulopoulos | |
526 | 53e1a046 | Leonidas Poulopoulos | def Comment(text=None): |
527 | 53e1a046 | Leonidas Poulopoulos | element = Element(Comment) |
528 | 53e1a046 | Leonidas Poulopoulos | element.text = text |
529 | 53e1a046 | Leonidas Poulopoulos | return element
|
530 | 53e1a046 | Leonidas Poulopoulos | |
531 | 53e1a046 | Leonidas Poulopoulos | ##
|
532 | 53e1a046 | Leonidas Poulopoulos | # PI element factory. This factory function creates a special element
|
533 | 53e1a046 | Leonidas Poulopoulos | # that will be serialized as an XML processing instruction.
|
534 | 53e1a046 | Leonidas Poulopoulos | #
|
535 | 53e1a046 | Leonidas Poulopoulos | # @param target A string containing the PI target.
|
536 | 53e1a046 | Leonidas Poulopoulos | # @param text A string containing the PI contents, if any.
|
537 | 53e1a046 | Leonidas Poulopoulos | # @return An element instance, representing a PI.
|
538 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
539 | 53e1a046 | Leonidas Poulopoulos | |
540 | 53e1a046 | Leonidas Poulopoulos | def ProcessingInstruction(target, text=None): |
541 | 53e1a046 | Leonidas Poulopoulos | element = Element(ProcessingInstruction) |
542 | 53e1a046 | Leonidas Poulopoulos | element.text = target |
543 | 53e1a046 | Leonidas Poulopoulos | if text:
|
544 | 53e1a046 | Leonidas Poulopoulos | element.text = element.text + " " + text
|
545 | 53e1a046 | Leonidas Poulopoulos | return element
|
546 | 53e1a046 | Leonidas Poulopoulos | |
547 | 53e1a046 | Leonidas Poulopoulos | PI = ProcessingInstruction |
548 | 53e1a046 | Leonidas Poulopoulos | |
549 | 53e1a046 | Leonidas Poulopoulos | ##
|
550 | 53e1a046 | Leonidas Poulopoulos | # QName wrapper. This can be used to wrap a QName attribute value, in
|
551 | 53e1a046 | Leonidas Poulopoulos | # order to get proper namespace handling on output.
|
552 | 53e1a046 | Leonidas Poulopoulos | #
|
553 | 53e1a046 | Leonidas Poulopoulos | # @param text A string containing the QName value, in the form {uri}local,
|
554 | 53e1a046 | Leonidas Poulopoulos | # or, if the tag argument is given, the URI part of a QName.
|
555 | 53e1a046 | Leonidas Poulopoulos | # @param tag Optional tag. If given, the first argument is interpreted as
|
556 | 53e1a046 | Leonidas Poulopoulos | # an URI, and this argument is interpreted as a local name.
|
557 | 53e1a046 | Leonidas Poulopoulos | # @return An opaque object, representing the QName.
|
558 | 53e1a046 | Leonidas Poulopoulos | |
559 | 53e1a046 | Leonidas Poulopoulos | class QName: |
560 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, text_or_uri, tag=None): |
561 | 53e1a046 | Leonidas Poulopoulos | if tag:
|
562 | 53e1a046 | Leonidas Poulopoulos | text_or_uri = "{%s}%s" % (text_or_uri, tag)
|
563 | 53e1a046 | Leonidas Poulopoulos | self.text = text_or_uri
|
564 | 53e1a046 | Leonidas Poulopoulos | def __str__(self): |
565 | 53e1a046 | Leonidas Poulopoulos | return self.text |
566 | 53e1a046 | Leonidas Poulopoulos | def __hash__(self): |
567 | 53e1a046 | Leonidas Poulopoulos | return hash(self.text) |
568 | 53e1a046 | Leonidas Poulopoulos | def __cmp__(self, other): |
569 | 53e1a046 | Leonidas Poulopoulos | if isinstance(other, QName): |
570 | 53e1a046 | Leonidas Poulopoulos | return cmp(self.text, other.text) |
571 | 53e1a046 | Leonidas Poulopoulos | return cmp(self.text, other) |
572 | 53e1a046 | Leonidas Poulopoulos | |
573 | 53e1a046 | Leonidas Poulopoulos | ##
|
574 | 53e1a046 | Leonidas Poulopoulos | # ElementTree wrapper class. This class represents an entire element
|
575 | 53e1a046 | Leonidas Poulopoulos | # hierarchy, and adds some extra support for serialization to and from
|
576 | 53e1a046 | Leonidas Poulopoulos | # standard XML.
|
577 | 53e1a046 | Leonidas Poulopoulos | #
|
578 | 53e1a046 | Leonidas Poulopoulos | # @param element Optional root element.
|
579 | 53e1a046 | Leonidas Poulopoulos | # @keyparam file Optional file handle or name. If given, the
|
580 | 53e1a046 | Leonidas Poulopoulos | # tree is initialized with the contents of this XML file.
|
581 | 53e1a046 | Leonidas Poulopoulos | |
582 | 53e1a046 | Leonidas Poulopoulos | class ElementTree: |
583 | 53e1a046 | Leonidas Poulopoulos | |
584 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, element=None, file=None): |
585 | 53e1a046 | Leonidas Poulopoulos | assert element is None or iselement(element) |
586 | 53e1a046 | Leonidas Poulopoulos | self._root = element # first node |
587 | 53e1a046 | Leonidas Poulopoulos | if file: |
588 | 53e1a046 | Leonidas Poulopoulos | self.parse(file) |
589 | 53e1a046 | Leonidas Poulopoulos | |
590 | 53e1a046 | Leonidas Poulopoulos | ##
|
591 | 53e1a046 | Leonidas Poulopoulos | # Gets the root element for this tree.
|
592 | 53e1a046 | Leonidas Poulopoulos | #
|
593 | 53e1a046 | Leonidas Poulopoulos | # @return An element instance.
|
594 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
595 | 53e1a046 | Leonidas Poulopoulos | |
596 | 53e1a046 | Leonidas Poulopoulos | def getroot(self): |
597 | 53e1a046 | Leonidas Poulopoulos | return self._root |
598 | 53e1a046 | Leonidas Poulopoulos | |
599 | 53e1a046 | Leonidas Poulopoulos | ##
|
600 | 53e1a046 | Leonidas Poulopoulos | # Replaces the root element for this tree. This discards the
|
601 | 53e1a046 | Leonidas Poulopoulos | # current contents of the tree, and replaces it with the given
|
602 | 53e1a046 | Leonidas Poulopoulos | # element. Use with care.
|
603 | 53e1a046 | Leonidas Poulopoulos | #
|
604 | 53e1a046 | Leonidas Poulopoulos | # @param element An element instance.
|
605 | 53e1a046 | Leonidas Poulopoulos | |
606 | 53e1a046 | Leonidas Poulopoulos | def _setroot(self, element): |
607 | 53e1a046 | Leonidas Poulopoulos | assert iselement(element)
|
608 | 53e1a046 | Leonidas Poulopoulos | self._root = element
|
609 | 53e1a046 | Leonidas Poulopoulos | |
610 | 53e1a046 | Leonidas Poulopoulos | ##
|
611 | 53e1a046 | Leonidas Poulopoulos | # Loads an external XML document into this element tree.
|
612 | 53e1a046 | Leonidas Poulopoulos | #
|
613 | 53e1a046 | Leonidas Poulopoulos | # @param source A file name or file object.
|
614 | 53e1a046 | Leonidas Poulopoulos | # @param parser An optional parser instance. If not given, the
|
615 | 53e1a046 | Leonidas Poulopoulos | # standard {@link XMLTreeBuilder} parser is used.
|
616 | 53e1a046 | Leonidas Poulopoulos | # @return The document root element.
|
617 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
618 | 53e1a046 | Leonidas Poulopoulos | |
619 | 53e1a046 | Leonidas Poulopoulos | def parse(self, source, parser=None): |
620 | 53e1a046 | Leonidas Poulopoulos | if parser:
|
621 | 53e1a046 | Leonidas Poulopoulos | tree = default_parser_api.parse(source, parser) |
622 | 53e1a046 | Leonidas Poulopoulos | else:
|
623 | 53e1a046 | Leonidas Poulopoulos | tree = parser_api.parse(source) |
624 | 53e1a046 | Leonidas Poulopoulos | self._root = tree
|
625 | 53e1a046 | Leonidas Poulopoulos | return tree
|
626 | 53e1a046 | Leonidas Poulopoulos | |
627 | 53e1a046 | Leonidas Poulopoulos | ##
|
628 | 53e1a046 | Leonidas Poulopoulos | # Creates a tree iterator for the root element. The iterator loops
|
629 | 53e1a046 | Leonidas Poulopoulos | # over all elements in this tree, in document order.
|
630 | 53e1a046 | Leonidas Poulopoulos | #
|
631 | 53e1a046 | Leonidas Poulopoulos | # @param tag What tags to look for (default is to return all elements)
|
632 | 53e1a046 | Leonidas Poulopoulos | # @return An iterator.
|
633 | 53e1a046 | Leonidas Poulopoulos | # @defreturn iterator
|
634 | 53e1a046 | Leonidas Poulopoulos | |
635 | 53e1a046 | Leonidas Poulopoulos | def getiterator(self, tag=None): |
636 | 53e1a046 | Leonidas Poulopoulos | assert self._root is not None |
637 | 53e1a046 | Leonidas Poulopoulos | return self._root.getiterator(tag) |
638 | 53e1a046 | Leonidas Poulopoulos | |
639 | 53e1a046 | Leonidas Poulopoulos | ##
|
640 | 53e1a046 | Leonidas Poulopoulos | # Finds the first toplevel element with given tag.
|
641 | 53e1a046 | Leonidas Poulopoulos | # Same as getroot().find(path).
|
642 | 53e1a046 | Leonidas Poulopoulos | #
|
643 | 53e1a046 | Leonidas Poulopoulos | # @param path What element to look for.
|
644 | 53e1a046 | Leonidas Poulopoulos | # @return The first matching element, or None if no element was found.
|
645 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element or None
|
646 | 53e1a046 | Leonidas Poulopoulos | |
647 | 53e1a046 | Leonidas Poulopoulos | def find(self, path): |
648 | 53e1a046 | Leonidas Poulopoulos | assert self._root is not None |
649 | 53e1a046 | Leonidas Poulopoulos | if path[:1] == "/": |
650 | 53e1a046 | Leonidas Poulopoulos | path = "." + path
|
651 | 53e1a046 | Leonidas Poulopoulos | return self._root.find(path) |
652 | 53e1a046 | Leonidas Poulopoulos | |
653 | 53e1a046 | Leonidas Poulopoulos | ##
|
654 | 53e1a046 | Leonidas Poulopoulos | # Finds the element text for the first toplevel element with given
|
655 | 53e1a046 | Leonidas Poulopoulos | # tag. Same as getroot().findtext(path).
|
656 | 53e1a046 | Leonidas Poulopoulos | #
|
657 | 53e1a046 | Leonidas Poulopoulos | # @param path What toplevel element to look for.
|
658 | 53e1a046 | Leonidas Poulopoulos | # @param default What to return if the element was not found.
|
659 | 53e1a046 | Leonidas Poulopoulos | # @return The text content of the first matching element, or the
|
660 | 53e1a046 | Leonidas Poulopoulos | # default value no element was found. Note that if the element
|
661 | 53e1a046 | Leonidas Poulopoulos | # has is found, but has no text content, this method returns an
|
662 | 53e1a046 | Leonidas Poulopoulos | # empty string.
|
663 | 53e1a046 | Leonidas Poulopoulos | # @defreturn string
|
664 | 53e1a046 | Leonidas Poulopoulos | |
665 | 53e1a046 | Leonidas Poulopoulos | def findtext(self, path, default=None): |
666 | 53e1a046 | Leonidas Poulopoulos | assert self._root is not None |
667 | 53e1a046 | Leonidas Poulopoulos | if path[:1] == "/": |
668 | 53e1a046 | Leonidas Poulopoulos | path = "." + path
|
669 | 53e1a046 | Leonidas Poulopoulos | return self._root.findtext(path, default) |
670 | 53e1a046 | Leonidas Poulopoulos | |
671 | 53e1a046 | Leonidas Poulopoulos | ##
|
672 | 53e1a046 | Leonidas Poulopoulos | # Finds all toplevel elements with the given tag.
|
673 | 53e1a046 | Leonidas Poulopoulos | # Same as getroot().findall(path).
|
674 | 53e1a046 | Leonidas Poulopoulos | #
|
675 | 53e1a046 | Leonidas Poulopoulos | # @param path What element to look for.
|
676 | 53e1a046 | Leonidas Poulopoulos | # @return A list or iterator containing all matching elements,
|
677 | 53e1a046 | Leonidas Poulopoulos | # in document order.
|
678 | 53e1a046 | Leonidas Poulopoulos | # @defreturn list of Element instances
|
679 | 53e1a046 | Leonidas Poulopoulos | |
680 | 53e1a046 | Leonidas Poulopoulos | def findall(self, path): |
681 | 53e1a046 | Leonidas Poulopoulos | assert self._root is not None |
682 | 53e1a046 | Leonidas Poulopoulos | if path[:1] == "/": |
683 | 53e1a046 | Leonidas Poulopoulos | path = "." + path
|
684 | 53e1a046 | Leonidas Poulopoulos | return self._root.findall(path) |
685 | 53e1a046 | Leonidas Poulopoulos | |
686 | 53e1a046 | Leonidas Poulopoulos | ##
|
687 | 53e1a046 | Leonidas Poulopoulos | # Writes the element tree to a file, as XML.
|
688 | 53e1a046 | Leonidas Poulopoulos | #
|
689 | 53e1a046 | Leonidas Poulopoulos | # @param file A file name, or a file object opened for writing.
|
690 | 53e1a046 | Leonidas Poulopoulos | # @param encoding Optional output encoding (default is US-ASCII).
|
691 | 53e1a046 | Leonidas Poulopoulos | |
692 | 53e1a046 | Leonidas Poulopoulos | def write(self, file, encoding="us-ascii"): |
693 | 53e1a046 | Leonidas Poulopoulos | assert self._root is not None |
694 | 53e1a046 | Leonidas Poulopoulos | if not hasattr(file, "write"): |
695 | 53e1a046 | Leonidas Poulopoulos | file = open(file, "wb") |
696 | 53e1a046 | Leonidas Poulopoulos | if not encoding: |
697 | 53e1a046 | Leonidas Poulopoulos | encoding = "us-ascii"
|
698 | 53e1a046 | Leonidas Poulopoulos | elif encoding != "utf-8" and encoding != "us-ascii": |
699 | 53e1a046 | Leonidas Poulopoulos | file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding) |
700 | 53e1a046 | Leonidas Poulopoulos | self._write(file, self._root, encoding, {}) |
701 | 53e1a046 | Leonidas Poulopoulos | |
702 | 53e1a046 | Leonidas Poulopoulos | def _write(self, file, node, encoding, namespaces): |
703 | 53e1a046 | Leonidas Poulopoulos | # write XML to file
|
704 | 53e1a046 | Leonidas Poulopoulos | tag = node.tag |
705 | 53e1a046 | Leonidas Poulopoulos | if tag is Comment: |
706 | 53e1a046 | Leonidas Poulopoulos | file.write("<!-- %s -->" % _escape_cdata(node.text, encoding)) |
707 | 53e1a046 | Leonidas Poulopoulos | elif tag is ProcessingInstruction: |
708 | 53e1a046 | Leonidas Poulopoulos | file.write("<?%s?>" % _escape_cdata(node.text, encoding)) |
709 | 53e1a046 | Leonidas Poulopoulos | else:
|
710 | 53e1a046 | Leonidas Poulopoulos | items = node.items() |
711 | 53e1a046 | Leonidas Poulopoulos | xmlns_items = [] # new namespaces in this scope
|
712 | 53e1a046 | Leonidas Poulopoulos | try:
|
713 | 53e1a046 | Leonidas Poulopoulos | if isinstance(tag, QName) or tag[:1] == "{": |
714 | 53e1a046 | Leonidas Poulopoulos | tag, xmlns = fixtag(tag, namespaces) |
715 | 53e1a046 | Leonidas Poulopoulos | if xmlns: xmlns_items.append(xmlns)
|
716 | 53e1a046 | Leonidas Poulopoulos | except TypeError: |
717 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(tag) |
718 | 53e1a046 | Leonidas Poulopoulos | file.write("<" + _encode(tag, encoding)) |
719 | 53e1a046 | Leonidas Poulopoulos | if items or xmlns_items: |
720 | 53e1a046 | Leonidas Poulopoulos | items.sort() # lexical order
|
721 | 53e1a046 | Leonidas Poulopoulos | for k, v in items: |
722 | 53e1a046 | Leonidas Poulopoulos | try:
|
723 | 53e1a046 | Leonidas Poulopoulos | if isinstance(k, QName) or k[:1] == "{": |
724 | 53e1a046 | Leonidas Poulopoulos | k, xmlns = fixtag(k, namespaces) |
725 | 53e1a046 | Leonidas Poulopoulos | if xmlns: xmlns_items.append(xmlns)
|
726 | 53e1a046 | Leonidas Poulopoulos | except TypeError: |
727 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(k) |
728 | 53e1a046 | Leonidas Poulopoulos | try:
|
729 | 53e1a046 | Leonidas Poulopoulos | if isinstance(v, QName): |
730 | 53e1a046 | Leonidas Poulopoulos | v, xmlns = fixtag(v, namespaces) |
731 | 53e1a046 | Leonidas Poulopoulos | if xmlns: xmlns_items.append(xmlns)
|
732 | 53e1a046 | Leonidas Poulopoulos | except TypeError: |
733 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(v) |
734 | 53e1a046 | Leonidas Poulopoulos | file.write(" %s=\"%s\"" % (_encode(k, encoding), |
735 | 53e1a046 | Leonidas Poulopoulos | _escape_attrib(v, encoding))) |
736 | 53e1a046 | Leonidas Poulopoulos | for k, v in xmlns_items: |
737 | 53e1a046 | Leonidas Poulopoulos | file.write(" %s=\"%s\"" % (_encode(k, encoding), |
738 | 53e1a046 | Leonidas Poulopoulos | _escape_attrib(v, encoding))) |
739 | 53e1a046 | Leonidas Poulopoulos | if node.text or len(node): |
740 | 53e1a046 | Leonidas Poulopoulos | file.write(">") |
741 | 53e1a046 | Leonidas Poulopoulos | if node.text:
|
742 | 53e1a046 | Leonidas Poulopoulos | file.write(_escape_cdata(node.text, encoding))
|
743 | 53e1a046 | Leonidas Poulopoulos | for n in node: |
744 | 53e1a046 | Leonidas Poulopoulos | self._write(file, n, encoding, namespaces) |
745 | 53e1a046 | Leonidas Poulopoulos | file.write("</" + _encode(tag, encoding) + ">") |
746 | 53e1a046 | Leonidas Poulopoulos | else:
|
747 | 53e1a046 | Leonidas Poulopoulos | file.write(" />") |
748 | 53e1a046 | Leonidas Poulopoulos | for k, v in xmlns_items: |
749 | 53e1a046 | Leonidas Poulopoulos | del namespaces[v]
|
750 | 53e1a046 | Leonidas Poulopoulos | if node.tail:
|
751 | 53e1a046 | Leonidas Poulopoulos | file.write(_escape_cdata(node.tail, encoding))
|
752 | 53e1a046 | Leonidas Poulopoulos | |
753 | 53e1a046 | Leonidas Poulopoulos | # --------------------------------------------------------------------
|
754 | 53e1a046 | Leonidas Poulopoulos | # helpers
|
755 | 53e1a046 | Leonidas Poulopoulos | |
756 | 53e1a046 | Leonidas Poulopoulos | ##
|
757 | 53e1a046 | Leonidas Poulopoulos | # Checks if an object appears to be a valid element object.
|
758 | 53e1a046 | Leonidas Poulopoulos | #
|
759 | 53e1a046 | Leonidas Poulopoulos | # @param An element instance.
|
760 | 53e1a046 | Leonidas Poulopoulos | # @return A true value if this is an element object.
|
761 | 53e1a046 | Leonidas Poulopoulos | # @defreturn flag
|
762 | 53e1a046 | Leonidas Poulopoulos | |
763 | 53e1a046 | Leonidas Poulopoulos | def iselement(element): |
764 | 53e1a046 | Leonidas Poulopoulos | # FIXME: not sure about this; might be a better idea to look
|
765 | 53e1a046 | Leonidas Poulopoulos | # for tag/attrib/text attributes
|
766 | 53e1a046 | Leonidas Poulopoulos | return isinstance(element, _ElementInterface) or hasattr(element, "tag") |
767 | 53e1a046 | Leonidas Poulopoulos | |
768 | 53e1a046 | Leonidas Poulopoulos | ##
|
769 | 53e1a046 | Leonidas Poulopoulos | # Writes an element tree or element structure to sys.stdout. This
|
770 | 53e1a046 | Leonidas Poulopoulos | # function should be used for debugging only.
|
771 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
772 | 53e1a046 | Leonidas Poulopoulos | # The exact output format is implementation dependent. In this
|
773 | 53e1a046 | Leonidas Poulopoulos | # version, it's written as an ordinary XML file.
|
774 | 53e1a046 | Leonidas Poulopoulos | #
|
775 | 53e1a046 | Leonidas Poulopoulos | # @param elem An element tree or an individual element.
|
776 | 53e1a046 | Leonidas Poulopoulos | |
777 | 53e1a046 | Leonidas Poulopoulos | def dump(elem): |
778 | 53e1a046 | Leonidas Poulopoulos | # debugging
|
779 | 53e1a046 | Leonidas Poulopoulos | if not isinstance(elem, ElementTree): |
780 | 53e1a046 | Leonidas Poulopoulos | elem = ElementTree(elem) |
781 | 53e1a046 | Leonidas Poulopoulos | elem.write(sys.stdout) |
782 | 53e1a046 | Leonidas Poulopoulos | tail = elem.getroot().tail |
783 | 53e1a046 | Leonidas Poulopoulos | if not tail or tail[-1] != "\n": |
784 | 53e1a046 | Leonidas Poulopoulos | sys.stdout.write("\n")
|
785 | 53e1a046 | Leonidas Poulopoulos | |
786 | 53e1a046 | Leonidas Poulopoulos | def _encode(s, encoding): |
787 | 53e1a046 | Leonidas Poulopoulos | try:
|
788 | 53e1a046 | Leonidas Poulopoulos | return s.encode(encoding)
|
789 | 53e1a046 | Leonidas Poulopoulos | except AttributeError: |
790 | 53e1a046 | Leonidas Poulopoulos | return s # 1.5.2: assume the string uses the right encoding |
791 | 53e1a046 | Leonidas Poulopoulos | |
792 | 53e1a046 | Leonidas Poulopoulos | if sys.version[:3] == "1.5": |
793 | 53e1a046 | Leonidas Poulopoulos | _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 |
794 | 53e1a046 | Leonidas Poulopoulos | else:
|
795 | 53e1a046 | Leonidas Poulopoulos | _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) |
796 | 53e1a046 | Leonidas Poulopoulos | |
797 | 53e1a046 | Leonidas Poulopoulos | _escape_map = { |
798 | 53e1a046 | Leonidas Poulopoulos | "&": "&", |
799 | 53e1a046 | Leonidas Poulopoulos | "<": "<", |
800 | 53e1a046 | Leonidas Poulopoulos | ">": ">", |
801 | 53e1a046 | Leonidas Poulopoulos | '"': """, |
802 | 53e1a046 | Leonidas Poulopoulos | } |
803 | 53e1a046 | Leonidas Poulopoulos | |
804 | 53e1a046 | Leonidas Poulopoulos | _namespace_map = { |
805 | 53e1a046 | Leonidas Poulopoulos | # "well-known" namespace prefixes
|
806 | 53e1a046 | Leonidas Poulopoulos | "http://www.w3.org/XML/1998/namespace": "xml", |
807 | 53e1a046 | Leonidas Poulopoulos | "http://www.w3.org/1999/xhtml": "html", |
808 | 53e1a046 | Leonidas Poulopoulos | "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", |
809 | 53e1a046 | Leonidas Poulopoulos | "http://schemas.xmlsoap.org/wsdl/": "wsdl", |
810 | 53e1a046 | Leonidas Poulopoulos | } |
811 | 53e1a046 | Leonidas Poulopoulos | |
812 | 53e1a046 | Leonidas Poulopoulos | def _raise_serialization_error(text): |
813 | 53e1a046 | Leonidas Poulopoulos | raise TypeError( |
814 | 53e1a046 | Leonidas Poulopoulos | "cannot serialize %r (type %s)" % (text, type(text).__name__) |
815 | 53e1a046 | Leonidas Poulopoulos | ) |
816 | 53e1a046 | Leonidas Poulopoulos | |
817 | 53e1a046 | Leonidas Poulopoulos | def _encode_entity(text, pattern=_escape): |
818 | 53e1a046 | Leonidas Poulopoulos | # map reserved and non-ascii characters to numerical entities
|
819 | 53e1a046 | Leonidas Poulopoulos | def escape_entities(m, map=_escape_map): |
820 | 53e1a046 | Leonidas Poulopoulos | out = [] |
821 | 53e1a046 | Leonidas Poulopoulos | append = out.append |
822 | 53e1a046 | Leonidas Poulopoulos | for char in m.group(): |
823 | 53e1a046 | Leonidas Poulopoulos | text = map.get(char)
|
824 | 53e1a046 | Leonidas Poulopoulos | if text is None: |
825 | 53e1a046 | Leonidas Poulopoulos | text = "&#%d;" % ord(char) |
826 | 53e1a046 | Leonidas Poulopoulos | append(text) |
827 | 53e1a046 | Leonidas Poulopoulos | return string.join(out, "") |
828 | 53e1a046 | Leonidas Poulopoulos | try:
|
829 | 53e1a046 | Leonidas Poulopoulos | return _encode(pattern.sub(escape_entities, text), "ascii") |
830 | 53e1a046 | Leonidas Poulopoulos | except TypeError: |
831 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(text) |
832 | 53e1a046 | Leonidas Poulopoulos | |
833 | 53e1a046 | Leonidas Poulopoulos | #
|
834 | 53e1a046 | Leonidas Poulopoulos | # the following functions assume an ascii-compatible encoding
|
835 | 53e1a046 | Leonidas Poulopoulos | # (or "utf-16")
|
836 | 53e1a046 | Leonidas Poulopoulos | |
837 | 53e1a046 | Leonidas Poulopoulos | def _escape_cdata(text, encoding=None, replace=string.replace): |
838 | 53e1a046 | Leonidas Poulopoulos | # escape character data
|
839 | 53e1a046 | Leonidas Poulopoulos | try:
|
840 | 53e1a046 | Leonidas Poulopoulos | if encoding:
|
841 | 53e1a046 | Leonidas Poulopoulos | try:
|
842 | 53e1a046 | Leonidas Poulopoulos | text = _encode(text, encoding) |
843 | 53e1a046 | Leonidas Poulopoulos | except UnicodeError: |
844 | 53e1a046 | Leonidas Poulopoulos | return _encode_entity(text)
|
845 | 53e1a046 | Leonidas Poulopoulos | if "&" in text: |
846 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "&", "&") |
847 | 53e1a046 | Leonidas Poulopoulos | if "<" in text: |
848 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "<", "<") |
849 | 53e1a046 | Leonidas Poulopoulos | if ">" in text: |
850 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, ">", ">") |
851 | 53e1a046 | Leonidas Poulopoulos | return text
|
852 | 53e1a046 | Leonidas Poulopoulos | except (TypeError, AttributeError): |
853 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(text) |
854 | 53e1a046 | Leonidas Poulopoulos | |
855 | 53e1a046 | Leonidas Poulopoulos | def _escape_attrib(text, encoding=None, replace=string.replace): |
856 | 53e1a046 | Leonidas Poulopoulos | # escape attribute value
|
857 | 53e1a046 | Leonidas Poulopoulos | try:
|
858 | 53e1a046 | Leonidas Poulopoulos | if encoding:
|
859 | 53e1a046 | Leonidas Poulopoulos | try:
|
860 | 53e1a046 | Leonidas Poulopoulos | text = _encode(text, encoding) |
861 | 53e1a046 | Leonidas Poulopoulos | except UnicodeError: |
862 | 53e1a046 | Leonidas Poulopoulos | return _encode_entity(text)
|
863 | 53e1a046 | Leonidas Poulopoulos | if "&" in text: |
864 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "&", "&") |
865 | 53e1a046 | Leonidas Poulopoulos | if "\"" in text: |
866 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "\"", """) |
867 | 53e1a046 | Leonidas Poulopoulos | if "<" in text: |
868 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "<", "<") |
869 | 53e1a046 | Leonidas Poulopoulos | if ">" in text: |
870 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, ">", ">") |
871 | 53e1a046 | Leonidas Poulopoulos | if "\n" in text: |
872 | 53e1a046 | Leonidas Poulopoulos | text = replace(text, "\n", " ") |
873 | 53e1a046 | Leonidas Poulopoulos | return text
|
874 | 53e1a046 | Leonidas Poulopoulos | except (TypeError, AttributeError): |
875 | 53e1a046 | Leonidas Poulopoulos | _raise_serialization_error(text) |
876 | 53e1a046 | Leonidas Poulopoulos | |
877 | 53e1a046 | Leonidas Poulopoulos | def fixtag(tag, namespaces): |
878 | 53e1a046 | Leonidas Poulopoulos | # given a decorated tag (of the form {uri}tag), return prefixed
|
879 | 53e1a046 | Leonidas Poulopoulos | # tag and namespace declaration, if any
|
880 | 53e1a046 | Leonidas Poulopoulos | if isinstance(tag, QName): |
881 | 53e1a046 | Leonidas Poulopoulos | tag = tag.text |
882 | 53e1a046 | Leonidas Poulopoulos | namespace_uri, tag = string.split(tag[1:], "}", 1) |
883 | 53e1a046 | Leonidas Poulopoulos | prefix = namespaces.get(namespace_uri) |
884 | 53e1a046 | Leonidas Poulopoulos | if prefix is None: |
885 | 53e1a046 | Leonidas Poulopoulos | prefix = _namespace_map.get(namespace_uri) |
886 | 53e1a046 | Leonidas Poulopoulos | if prefix is None: |
887 | 53e1a046 | Leonidas Poulopoulos | prefix = "ns%d" % len(namespaces) |
888 | 53e1a046 | Leonidas Poulopoulos | namespaces[namespace_uri] = prefix |
889 | 53e1a046 | Leonidas Poulopoulos | if prefix == "xml": |
890 | 53e1a046 | Leonidas Poulopoulos | xmlns = None
|
891 | 53e1a046 | Leonidas Poulopoulos | else:
|
892 | 53e1a046 | Leonidas Poulopoulos | xmlns = ("xmlns:%s" % prefix, namespace_uri)
|
893 | 53e1a046 | Leonidas Poulopoulos | else:
|
894 | 53e1a046 | Leonidas Poulopoulos | xmlns = None
|
895 | 53e1a046 | Leonidas Poulopoulos | return "%s:%s" % (prefix, tag), xmlns |
896 | 53e1a046 | Leonidas Poulopoulos | |
897 | 53e1a046 | Leonidas Poulopoulos | ##
|
898 | 53e1a046 | Leonidas Poulopoulos | # Parses an XML document into an element tree.
|
899 | 53e1a046 | Leonidas Poulopoulos | #
|
900 | 53e1a046 | Leonidas Poulopoulos | # @param source A filename or file object containing XML data.
|
901 | 53e1a046 | Leonidas Poulopoulos | # @param parser An optional parser instance. If not given, the
|
902 | 53e1a046 | Leonidas Poulopoulos | # standard {@link XMLTreeBuilder} parser is used.
|
903 | 53e1a046 | Leonidas Poulopoulos | # @return An ElementTree instance
|
904 | 53e1a046 | Leonidas Poulopoulos | |
905 | 53e1a046 | Leonidas Poulopoulos | def parse(source, parser=None): |
906 | 53e1a046 | Leonidas Poulopoulos | if parser:
|
907 | 53e1a046 | Leonidas Poulopoulos | tree = default_parser_api.parse(source, parser) |
908 | 53e1a046 | Leonidas Poulopoulos | else:
|
909 | 53e1a046 | Leonidas Poulopoulos | tree = parser_api.parse(source) |
910 | 53e1a046 | Leonidas Poulopoulos | return ElementTree(tree)
|
911 | 53e1a046 | Leonidas Poulopoulos | |
912 | 53e1a046 | Leonidas Poulopoulos | ##
|
913 | 53e1a046 | Leonidas Poulopoulos | # Parses an XML document into an element tree incrementally, and reports
|
914 | 53e1a046 | Leonidas Poulopoulos | # what's going on to the user.
|
915 | 53e1a046 | Leonidas Poulopoulos | #
|
916 | 53e1a046 | Leonidas Poulopoulos | # @param source A filename or file object containing XML data.
|
917 | 53e1a046 | Leonidas Poulopoulos | # @param events A list of events to report back. If omitted, only "end"
|
918 | 53e1a046 | Leonidas Poulopoulos | # events are reported.
|
919 | 53e1a046 | Leonidas Poulopoulos | # @return A (event, elem) iterator.
|
920 | 53e1a046 | Leonidas Poulopoulos | |
921 | 53e1a046 | Leonidas Poulopoulos | def iterparse(source, events=None): |
922 | 53e1a046 | Leonidas Poulopoulos | return parser_api.iterparse(source, events)
|
923 | 53e1a046 | Leonidas Poulopoulos | |
924 | 53e1a046 | Leonidas Poulopoulos | class _iterparse: |
925 | 53e1a046 | Leonidas Poulopoulos | |
926 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, source, events): |
927 | 53e1a046 | Leonidas Poulopoulos | if not hasattr(source, "read"): |
928 | 53e1a046 | Leonidas Poulopoulos | source = open(source, "rb") |
929 | 53e1a046 | Leonidas Poulopoulos | self._file = source
|
930 | 53e1a046 | Leonidas Poulopoulos | self._events = []
|
931 | 53e1a046 | Leonidas Poulopoulos | self._index = 0 |
932 | 53e1a046 | Leonidas Poulopoulos | self.root = self._root = None |
933 | 53e1a046 | Leonidas Poulopoulos | self._parser = XMLTreeBuilder()
|
934 | 53e1a046 | Leonidas Poulopoulos | # wire up the parser for event reporting
|
935 | 53e1a046 | Leonidas Poulopoulos | parser = self._parser._parser
|
936 | 53e1a046 | Leonidas Poulopoulos | append = self._events.append
|
937 | 53e1a046 | Leonidas Poulopoulos | if events is None: |
938 | 53e1a046 | Leonidas Poulopoulos | events = ["end"]
|
939 | 53e1a046 | Leonidas Poulopoulos | for event in events: |
940 | 53e1a046 | Leonidas Poulopoulos | if event == "start": |
941 | 53e1a046 | Leonidas Poulopoulos | try:
|
942 | 53e1a046 | Leonidas Poulopoulos | parser.ordered_attributes = 1
|
943 | 53e1a046 | Leonidas Poulopoulos | parser.specified_attributes = 1
|
944 | 53e1a046 | Leonidas Poulopoulos | def handler(tag, attrib_in, event=event, append=append, |
945 | 53e1a046 | Leonidas Poulopoulos | start=self._parser._start_list):
|
946 | 53e1a046 | Leonidas Poulopoulos | append((event, start(tag, attrib_in))) |
947 | 53e1a046 | Leonidas Poulopoulos | parser.StartElementHandler = handler |
948 | 53e1a046 | Leonidas Poulopoulos | except AttributeError: |
949 | 53e1a046 | Leonidas Poulopoulos | def handler(tag, attrib_in, event=event, append=append, |
950 | 53e1a046 | Leonidas Poulopoulos | start=self._parser._start):
|
951 | 53e1a046 | Leonidas Poulopoulos | append((event, start(tag, attrib_in))) |
952 | 53e1a046 | Leonidas Poulopoulos | parser.StartElementHandler = handler |
953 | 53e1a046 | Leonidas Poulopoulos | elif event == "end": |
954 | 53e1a046 | Leonidas Poulopoulos | def handler(tag, event=event, append=append, |
955 | 53e1a046 | Leonidas Poulopoulos | end=self._parser._end):
|
956 | 53e1a046 | Leonidas Poulopoulos | append((event, end(tag))) |
957 | 53e1a046 | Leonidas Poulopoulos | parser.EndElementHandler = handler |
958 | 53e1a046 | Leonidas Poulopoulos | elif event == "start-ns": |
959 | 53e1a046 | Leonidas Poulopoulos | def handler(prefix, uri, event=event, append=append): |
960 | 53e1a046 | Leonidas Poulopoulos | try:
|
961 | 53e1a046 | Leonidas Poulopoulos | uri = _encode(uri, "ascii")
|
962 | 53e1a046 | Leonidas Poulopoulos | except UnicodeError: |
963 | 53e1a046 | Leonidas Poulopoulos | pass
|
964 | 53e1a046 | Leonidas Poulopoulos | append((event, (prefix or "", uri))) |
965 | 53e1a046 | Leonidas Poulopoulos | parser.StartNamespaceDeclHandler = handler |
966 | 53e1a046 | Leonidas Poulopoulos | elif event == "end-ns": |
967 | 53e1a046 | Leonidas Poulopoulos | def handler(prefix, event=event, append=append): |
968 | 53e1a046 | Leonidas Poulopoulos | append((event, None))
|
969 | 53e1a046 | Leonidas Poulopoulos | parser.EndNamespaceDeclHandler = handler |
970 | 53e1a046 | Leonidas Poulopoulos | |
971 | 53e1a046 | Leonidas Poulopoulos | def next(self): |
972 | 53e1a046 | Leonidas Poulopoulos | while 1: |
973 | 53e1a046 | Leonidas Poulopoulos | try:
|
974 | 53e1a046 | Leonidas Poulopoulos | item = self._events[self._index] |
975 | 53e1a046 | Leonidas Poulopoulos | except IndexError: |
976 | 53e1a046 | Leonidas Poulopoulos | if self._parser is None: |
977 | 53e1a046 | Leonidas Poulopoulos | self.root = self._root |
978 | 53e1a046 | Leonidas Poulopoulos | try:
|
979 | 53e1a046 | Leonidas Poulopoulos | raise StopIteration |
980 | 53e1a046 | Leonidas Poulopoulos | except NameError: |
981 | 53e1a046 | Leonidas Poulopoulos | raise IndexError |
982 | 53e1a046 | Leonidas Poulopoulos | # load event buffer
|
983 | 53e1a046 | Leonidas Poulopoulos | del self._events[:] |
984 | 53e1a046 | Leonidas Poulopoulos | self._index = 0 |
985 | 53e1a046 | Leonidas Poulopoulos | data = self._file.read(16384) |
986 | 53e1a046 | Leonidas Poulopoulos | if data:
|
987 | 53e1a046 | Leonidas Poulopoulos | self._parser.feed(data)
|
988 | 53e1a046 | Leonidas Poulopoulos | else:
|
989 | 53e1a046 | Leonidas Poulopoulos | self._root = self._parser.close() |
990 | 53e1a046 | Leonidas Poulopoulos | self._parser = None |
991 | 53e1a046 | Leonidas Poulopoulos | else:
|
992 | 53e1a046 | Leonidas Poulopoulos | self._index = self._index + 1 |
993 | 53e1a046 | Leonidas Poulopoulos | return item
|
994 | 53e1a046 | Leonidas Poulopoulos | |
995 | 53e1a046 | Leonidas Poulopoulos | try:
|
996 | 53e1a046 | Leonidas Poulopoulos | iter
|
997 | 53e1a046 | Leonidas Poulopoulos | def __iter__(self): |
998 | 53e1a046 | Leonidas Poulopoulos | return self |
999 | 53e1a046 | Leonidas Poulopoulos | except NameError: |
1000 | 53e1a046 | Leonidas Poulopoulos | def __getitem__(self, index): |
1001 | 53e1a046 | Leonidas Poulopoulos | return self.next() |
1002 | 53e1a046 | Leonidas Poulopoulos | |
1003 | 53e1a046 | Leonidas Poulopoulos | ##
|
1004 | 53e1a046 | Leonidas Poulopoulos | # Parses an XML document from a string constant. This function can
|
1005 | 53e1a046 | Leonidas Poulopoulos | # be used to embed "XML literals" in Python code.
|
1006 | 53e1a046 | Leonidas Poulopoulos | #
|
1007 | 53e1a046 | Leonidas Poulopoulos | # @param source A string containing XML data.
|
1008 | 53e1a046 | Leonidas Poulopoulos | # @return An Element instance.
|
1009 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1010 | 53e1a046 | Leonidas Poulopoulos | |
1011 | 53e1a046 | Leonidas Poulopoulos | def XML(text): |
1012 | 53e1a046 | Leonidas Poulopoulos | api = parser_api or default_parser_api
|
1013 | 53e1a046 | Leonidas Poulopoulos | return api.fromstring(text)
|
1014 | 53e1a046 | Leonidas Poulopoulos | |
1015 | 53e1a046 | Leonidas Poulopoulos | ##
|
1016 | 53e1a046 | Leonidas Poulopoulos | # Parses an XML document from a string constant, and also returns
|
1017 | 53e1a046 | Leonidas Poulopoulos | # a dictionary which maps from element id:s to elements.
|
1018 | 53e1a046 | Leonidas Poulopoulos | #
|
1019 | 53e1a046 | Leonidas Poulopoulos | # @param source A string containing XML data.
|
1020 | 53e1a046 | Leonidas Poulopoulos | # @return A tuple containing an Element instance and a dictionary.
|
1021 | 53e1a046 | Leonidas Poulopoulos | # @defreturn (Element, dictionary)
|
1022 | 53e1a046 | Leonidas Poulopoulos | |
1023 | 53e1a046 | Leonidas Poulopoulos | def XMLID(text): |
1024 | 53e1a046 | Leonidas Poulopoulos | api = parser_api or default_parser_api
|
1025 | 53e1a046 | Leonidas Poulopoulos | tree = api.fromstring(text) |
1026 | 53e1a046 | Leonidas Poulopoulos | ids = {} |
1027 | 53e1a046 | Leonidas Poulopoulos | for elem in tree.getiterator(): |
1028 | 53e1a046 | Leonidas Poulopoulos | id = elem.get("id")
|
1029 | 53e1a046 | Leonidas Poulopoulos | if id: |
1030 | 53e1a046 | Leonidas Poulopoulos | ids[id] = elem
|
1031 | 53e1a046 | Leonidas Poulopoulos | return tree, ids
|
1032 | 53e1a046 | Leonidas Poulopoulos | |
1033 | 53e1a046 | Leonidas Poulopoulos | ##
|
1034 | 53e1a046 | Leonidas Poulopoulos | # Parses an XML document from a string constant. Same as {@link #XML}.
|
1035 | 53e1a046 | Leonidas Poulopoulos | #
|
1036 | 53e1a046 | Leonidas Poulopoulos | # @def fromstring(text)
|
1037 | 53e1a046 | Leonidas Poulopoulos | # @param source A string containing XML data.
|
1038 | 53e1a046 | Leonidas Poulopoulos | # @return An Element instance.
|
1039 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1040 | 53e1a046 | Leonidas Poulopoulos | |
1041 | 53e1a046 | Leonidas Poulopoulos | fromstring = XML |
1042 | 53e1a046 | Leonidas Poulopoulos | |
1043 | 53e1a046 | Leonidas Poulopoulos | ##
|
1044 | 53e1a046 | Leonidas Poulopoulos | # Generates a string representation of an XML element, including all
|
1045 | 53e1a046 | Leonidas Poulopoulos | # subelements.
|
1046 | 53e1a046 | Leonidas Poulopoulos | #
|
1047 | 53e1a046 | Leonidas Poulopoulos | # @param element An Element instance.
|
1048 | 53e1a046 | Leonidas Poulopoulos | # @return An encoded string containing the XML data.
|
1049 | 53e1a046 | Leonidas Poulopoulos | # @defreturn string
|
1050 | 53e1a046 | Leonidas Poulopoulos | |
1051 | 53e1a046 | Leonidas Poulopoulos | def tostring(element, encoding=None): |
1052 | 53e1a046 | Leonidas Poulopoulos | class dummy: |
1053 | 53e1a046 | Leonidas Poulopoulos | pass
|
1054 | 53e1a046 | Leonidas Poulopoulos | data = [] |
1055 | 53e1a046 | Leonidas Poulopoulos | file = dummy() |
1056 | 53e1a046 | Leonidas Poulopoulos | file.write = data.append
|
1057 | 53e1a046 | Leonidas Poulopoulos | ElementTree(element).write(file, encoding)
|
1058 | 53e1a046 | Leonidas Poulopoulos | return string.join(data, "") |
1059 | 53e1a046 | Leonidas Poulopoulos | |
1060 | 53e1a046 | Leonidas Poulopoulos | ##
|
1061 | 53e1a046 | Leonidas Poulopoulos | # Generic element structure builder. This builder converts a sequence
|
1062 | 53e1a046 | Leonidas Poulopoulos | # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
|
1063 | 53e1a046 | Leonidas Poulopoulos | # #TreeBuilder.end} method calls to a well-formed element structure.
|
1064 | 53e1a046 | Leonidas Poulopoulos | # <p>
|
1065 | 53e1a046 | Leonidas Poulopoulos | # You can use this class to build an element structure using a custom XML
|
1066 | 53e1a046 | Leonidas Poulopoulos | # parser, or a parser for some other XML-like format.
|
1067 | 53e1a046 | Leonidas Poulopoulos | #
|
1068 | 53e1a046 | Leonidas Poulopoulos | # @param element_factory Optional element factory. This factory
|
1069 | 53e1a046 | Leonidas Poulopoulos | # is called to create new Element instances, as necessary.
|
1070 | 53e1a046 | Leonidas Poulopoulos | |
1071 | 53e1a046 | Leonidas Poulopoulos | class TreeBuilder: |
1072 | 53e1a046 | Leonidas Poulopoulos | |
1073 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, element_factory=None): |
1074 | 53e1a046 | Leonidas Poulopoulos | self._data = [] # data collector |
1075 | 53e1a046 | Leonidas Poulopoulos | self._elem = [] # element stack |
1076 | 53e1a046 | Leonidas Poulopoulos | self._last = None # last element |
1077 | 53e1a046 | Leonidas Poulopoulos | self._tail = None # true if we're after an end tag |
1078 | 53e1a046 | Leonidas Poulopoulos | if element_factory is None: |
1079 | 53e1a046 | Leonidas Poulopoulos | element_factory = _ElementInterface |
1080 | 53e1a046 | Leonidas Poulopoulos | self._factory = element_factory
|
1081 | 53e1a046 | Leonidas Poulopoulos | |
1082 | 53e1a046 | Leonidas Poulopoulos | ##
|
1083 | 53e1a046 | Leonidas Poulopoulos | # Flushes the parser buffers, and returns the toplevel documen
|
1084 | 53e1a046 | Leonidas Poulopoulos | # element.
|
1085 | 53e1a046 | Leonidas Poulopoulos | #
|
1086 | 53e1a046 | Leonidas Poulopoulos | # @return An Element instance.
|
1087 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1088 | 53e1a046 | Leonidas Poulopoulos | |
1089 | 53e1a046 | Leonidas Poulopoulos | def close(self): |
1090 | 53e1a046 | Leonidas Poulopoulos | assert len(self._elem) == 0, "missing end tags" |
1091 | 53e1a046 | Leonidas Poulopoulos | assert self._last != None, "missing toplevel element" |
1092 | 53e1a046 | Leonidas Poulopoulos | return self._last |
1093 | 53e1a046 | Leonidas Poulopoulos | |
1094 | 53e1a046 | Leonidas Poulopoulos | def _flush(self): |
1095 | 53e1a046 | Leonidas Poulopoulos | if self._data: |
1096 | 53e1a046 | Leonidas Poulopoulos | if self._last is not None: |
1097 | 53e1a046 | Leonidas Poulopoulos | text = string.join(self._data, "") |
1098 | 53e1a046 | Leonidas Poulopoulos | if self._tail: |
1099 | 53e1a046 | Leonidas Poulopoulos | assert self._last.tail is None, "internal error (tail)" |
1100 | 53e1a046 | Leonidas Poulopoulos | self._last.tail = text
|
1101 | 53e1a046 | Leonidas Poulopoulos | else:
|
1102 | 53e1a046 | Leonidas Poulopoulos | assert self._last.text is None, "internal error (text)" |
1103 | 53e1a046 | Leonidas Poulopoulos | self._last.text = text
|
1104 | 53e1a046 | Leonidas Poulopoulos | self._data = []
|
1105 | 53e1a046 | Leonidas Poulopoulos | |
1106 | 53e1a046 | Leonidas Poulopoulos | ##
|
1107 | 53e1a046 | Leonidas Poulopoulos | # Adds text to the current element.
|
1108 | 53e1a046 | Leonidas Poulopoulos | #
|
1109 | 53e1a046 | Leonidas Poulopoulos | # @param data A string. This should be either an 8-bit string
|
1110 | 53e1a046 | Leonidas Poulopoulos | # containing ASCII text, or a Unicode string.
|
1111 | 53e1a046 | Leonidas Poulopoulos | |
1112 | 53e1a046 | Leonidas Poulopoulos | def data(self, data): |
1113 | 53e1a046 | Leonidas Poulopoulos | self._data.append(data)
|
1114 | 53e1a046 | Leonidas Poulopoulos | |
1115 | 53e1a046 | Leonidas Poulopoulos | ##
|
1116 | 53e1a046 | Leonidas Poulopoulos | # Opens a new element.
|
1117 | 53e1a046 | Leonidas Poulopoulos | #
|
1118 | 53e1a046 | Leonidas Poulopoulos | # @param tag The element name.
|
1119 | 53e1a046 | Leonidas Poulopoulos | # @param attrib A dictionary containing element attributes.
|
1120 | 53e1a046 | Leonidas Poulopoulos | # @return The opened element.
|
1121 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1122 | 53e1a046 | Leonidas Poulopoulos | |
1123 | 53e1a046 | Leonidas Poulopoulos | def start(self, tag, attrs): |
1124 | 53e1a046 | Leonidas Poulopoulos | self._flush()
|
1125 | 53e1a046 | Leonidas Poulopoulos | self._last = elem = self._factory(tag, attrs) |
1126 | 53e1a046 | Leonidas Poulopoulos | if self._elem: |
1127 | 53e1a046 | Leonidas Poulopoulos | self._elem[-1].append(elem) |
1128 | 53e1a046 | Leonidas Poulopoulos | self._elem.append(elem)
|
1129 | 53e1a046 | Leonidas Poulopoulos | self._tail = 0 |
1130 | 53e1a046 | Leonidas Poulopoulos | return elem
|
1131 | 53e1a046 | Leonidas Poulopoulos | |
1132 | 53e1a046 | Leonidas Poulopoulos | ##
|
1133 | 53e1a046 | Leonidas Poulopoulos | # Closes the current element.
|
1134 | 53e1a046 | Leonidas Poulopoulos | #
|
1135 | 53e1a046 | Leonidas Poulopoulos | # @param tag The element name.
|
1136 | 53e1a046 | Leonidas Poulopoulos | # @return The closed element.
|
1137 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1138 | 53e1a046 | Leonidas Poulopoulos | |
1139 | 53e1a046 | Leonidas Poulopoulos | def end(self, tag): |
1140 | 53e1a046 | Leonidas Poulopoulos | self._flush()
|
1141 | 53e1a046 | Leonidas Poulopoulos | self._last = self._elem.pop() |
1142 | 53e1a046 | Leonidas Poulopoulos | assert self._last.tag == tag,\ |
1143 | 53e1a046 | Leonidas Poulopoulos | "end tag mismatch (expected %s, got %s)" % (
|
1144 | 53e1a046 | Leonidas Poulopoulos | self._last.tag, tag)
|
1145 | 53e1a046 | Leonidas Poulopoulos | self._tail = 1 |
1146 | 53e1a046 | Leonidas Poulopoulos | return self._last |
1147 | 53e1a046 | Leonidas Poulopoulos | |
1148 | 53e1a046 | Leonidas Poulopoulos | ##
|
1149 | 53e1a046 | Leonidas Poulopoulos | # Element structure builder for XML source data, based on the
|
1150 | 53e1a046 | Leonidas Poulopoulos | # <b>expat</b> parser.
|
1151 | 53e1a046 | Leonidas Poulopoulos | #
|
1152 | 53e1a046 | Leonidas Poulopoulos | # @keyparam target Target object. If omitted, the builder uses an
|
1153 | 53e1a046 | Leonidas Poulopoulos | # instance of the standard {@link #TreeBuilder} class.
|
1154 | 53e1a046 | Leonidas Poulopoulos | # @keyparam html Predefine HTML entities. This flag is not supported
|
1155 | 53e1a046 | Leonidas Poulopoulos | # by the current implementation.
|
1156 | 53e1a046 | Leonidas Poulopoulos | # @see #ElementTree
|
1157 | 53e1a046 | Leonidas Poulopoulos | # @see #TreeBuilder
|
1158 | 53e1a046 | Leonidas Poulopoulos | |
1159 | 53e1a046 | Leonidas Poulopoulos | class XMLTreeBuilder: |
1160 | 53e1a046 | Leonidas Poulopoulos | |
1161 | 53e1a046 | Leonidas Poulopoulos | def __init__(self, html=0, target=None): |
1162 | 53e1a046 | Leonidas Poulopoulos | try:
|
1163 | 53e1a046 | Leonidas Poulopoulos | from xml.parsers import expat |
1164 | 53e1a046 | Leonidas Poulopoulos | except ImportError: |
1165 | 53e1a046 | Leonidas Poulopoulos | raise ImportError( |
1166 | 53e1a046 | Leonidas Poulopoulos | "No module named expat; use SimpleXMLTreeBuilder instead"
|
1167 | 53e1a046 | Leonidas Poulopoulos | ) |
1168 | 53e1a046 | Leonidas Poulopoulos | self._parser = parser = expat.ParserCreate(None, "}") |
1169 | 53e1a046 | Leonidas Poulopoulos | if target is None: |
1170 | 53e1a046 | Leonidas Poulopoulos | target = TreeBuilder() |
1171 | 53e1a046 | Leonidas Poulopoulos | self._target = target
|
1172 | 53e1a046 | Leonidas Poulopoulos | self._names = {} # name memo cache |
1173 | 53e1a046 | Leonidas Poulopoulos | # callbacks
|
1174 | 53e1a046 | Leonidas Poulopoulos | parser.DefaultHandlerExpand = self._default
|
1175 | 53e1a046 | Leonidas Poulopoulos | parser.StartElementHandler = self._start
|
1176 | 53e1a046 | Leonidas Poulopoulos | parser.EndElementHandler = self._end
|
1177 | 53e1a046 | Leonidas Poulopoulos | parser.CharacterDataHandler = self._data
|
1178 | 53e1a046 | Leonidas Poulopoulos | # let expat do the buffering, if supported
|
1179 | 53e1a046 | Leonidas Poulopoulos | try:
|
1180 | 53e1a046 | Leonidas Poulopoulos | self._parser.buffer_text = 1 |
1181 | 53e1a046 | Leonidas Poulopoulos | except AttributeError: |
1182 | 53e1a046 | Leonidas Poulopoulos | pass
|
1183 | 53e1a046 | Leonidas Poulopoulos | # use new-style attribute handling, if supported
|
1184 | 53e1a046 | Leonidas Poulopoulos | try:
|
1185 | 53e1a046 | Leonidas Poulopoulos | self._parser.ordered_attributes = 1 |
1186 | 53e1a046 | Leonidas Poulopoulos | self._parser.specified_attributes = 1 |
1187 | 53e1a046 | Leonidas Poulopoulos | parser.StartElementHandler = self._start_list
|
1188 | 53e1a046 | Leonidas Poulopoulos | except AttributeError: |
1189 | 53e1a046 | Leonidas Poulopoulos | pass
|
1190 | 53e1a046 | Leonidas Poulopoulos | encoding = None
|
1191 | 53e1a046 | Leonidas Poulopoulos | if not parser.returns_unicode: |
1192 | 53e1a046 | Leonidas Poulopoulos | encoding = "utf-8"
|
1193 | 53e1a046 | Leonidas Poulopoulos | # target.xml(encoding, None)
|
1194 | 53e1a046 | Leonidas Poulopoulos | self._doctype = None |
1195 | 53e1a046 | Leonidas Poulopoulos | self.entity = {}
|
1196 | 53e1a046 | Leonidas Poulopoulos | |
1197 | 53e1a046 | Leonidas Poulopoulos | def _fixtext(self, text): |
1198 | 53e1a046 | Leonidas Poulopoulos | # convert text string to ascii, if possible
|
1199 | 53e1a046 | Leonidas Poulopoulos | try:
|
1200 | 53e1a046 | Leonidas Poulopoulos | return _encode(text, "ascii") |
1201 | 53e1a046 | Leonidas Poulopoulos | except UnicodeError: |
1202 | 53e1a046 | Leonidas Poulopoulos | return text
|
1203 | 53e1a046 | Leonidas Poulopoulos | |
1204 | 53e1a046 | Leonidas Poulopoulos | def _fixname(self, key): |
1205 | 53e1a046 | Leonidas Poulopoulos | # expand qname, and convert name string to ascii, if possible
|
1206 | 53e1a046 | Leonidas Poulopoulos | try:
|
1207 | 53e1a046 | Leonidas Poulopoulos | name = self._names[key]
|
1208 | 53e1a046 | Leonidas Poulopoulos | except KeyError: |
1209 | 53e1a046 | Leonidas Poulopoulos | name = key |
1210 | 53e1a046 | Leonidas Poulopoulos | if "}" in name: |
1211 | 53e1a046 | Leonidas Poulopoulos | name = "{" + name
|
1212 | 53e1a046 | Leonidas Poulopoulos | self._names[key] = name = self._fixtext(name) |
1213 | 53e1a046 | Leonidas Poulopoulos | return name
|
1214 | 53e1a046 | Leonidas Poulopoulos | |
1215 | 53e1a046 | Leonidas Poulopoulos | def _start(self, tag, attrib_in): |
1216 | 53e1a046 | Leonidas Poulopoulos | fixname = self._fixname
|
1217 | 53e1a046 | Leonidas Poulopoulos | tag = fixname(tag) |
1218 | 53e1a046 | Leonidas Poulopoulos | attrib = {} |
1219 | 53e1a046 | Leonidas Poulopoulos | for key, value in attrib_in.items(): |
1220 | 53e1a046 | Leonidas Poulopoulos | attrib[fixname(key)] = self._fixtext(value)
|
1221 | 53e1a046 | Leonidas Poulopoulos | return self._target.start(tag, attrib) |
1222 | 53e1a046 | Leonidas Poulopoulos | |
1223 | 53e1a046 | Leonidas Poulopoulos | def _start_list(self, tag, attrib_in): |
1224 | 53e1a046 | Leonidas Poulopoulos | fixname = self._fixname
|
1225 | 53e1a046 | Leonidas Poulopoulos | tag = fixname(tag) |
1226 | 53e1a046 | Leonidas Poulopoulos | attrib = {} |
1227 | 53e1a046 | Leonidas Poulopoulos | if attrib_in:
|
1228 | 53e1a046 | Leonidas Poulopoulos | for i in range(0, len(attrib_in), 2): |
1229 | 53e1a046 | Leonidas Poulopoulos | attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1]) |
1230 | 53e1a046 | Leonidas Poulopoulos | return self._target.start(tag, attrib) |
1231 | 53e1a046 | Leonidas Poulopoulos | |
1232 | 53e1a046 | Leonidas Poulopoulos | def _data(self, text): |
1233 | 53e1a046 | Leonidas Poulopoulos | return self._target.data(self._fixtext(text)) |
1234 | 53e1a046 | Leonidas Poulopoulos | |
1235 | 53e1a046 | Leonidas Poulopoulos | def _end(self, tag): |
1236 | 53e1a046 | Leonidas Poulopoulos | return self._target.end(self._fixname(tag)) |
1237 | 53e1a046 | Leonidas Poulopoulos | |
1238 | 53e1a046 | Leonidas Poulopoulos | def _default(self, text): |
1239 | 53e1a046 | Leonidas Poulopoulos | prefix = text[:1]
|
1240 | 53e1a046 | Leonidas Poulopoulos | if prefix == "&": |
1241 | 53e1a046 | Leonidas Poulopoulos | # deal with undefined entities
|
1242 | 53e1a046 | Leonidas Poulopoulos | try:
|
1243 | 53e1a046 | Leonidas Poulopoulos | self._target.data(self.entity[text[1:-1]]) |
1244 | 53e1a046 | Leonidas Poulopoulos | except KeyError: |
1245 | 53e1a046 | Leonidas Poulopoulos | from xml.parsers import expat |
1246 | 53e1a046 | Leonidas Poulopoulos | raise expat.error(
|
1247 | 53e1a046 | Leonidas Poulopoulos | "undefined entity %s: line %d, column %d" %
|
1248 | 53e1a046 | Leonidas Poulopoulos | (text, self._parser.ErrorLineNumber,
|
1249 | 53e1a046 | Leonidas Poulopoulos | self._parser.ErrorColumnNumber)
|
1250 | 53e1a046 | Leonidas Poulopoulos | ) |
1251 | 53e1a046 | Leonidas Poulopoulos | elif prefix == "<" and text[:9] == "<!DOCTYPE": |
1252 | 53e1a046 | Leonidas Poulopoulos | self._doctype = [] # inside a doctype declaration |
1253 | 53e1a046 | Leonidas Poulopoulos | elif self._doctype is not None: |
1254 | 53e1a046 | Leonidas Poulopoulos | # parse doctype contents
|
1255 | 53e1a046 | Leonidas Poulopoulos | if prefix == ">": |
1256 | 53e1a046 | Leonidas Poulopoulos | self._doctype = None |
1257 | 53e1a046 | Leonidas Poulopoulos | return
|
1258 | 53e1a046 | Leonidas Poulopoulos | text = string.strip(text) |
1259 | 53e1a046 | Leonidas Poulopoulos | if not text: |
1260 | 53e1a046 | Leonidas Poulopoulos | return
|
1261 | 53e1a046 | Leonidas Poulopoulos | self._doctype.append(text)
|
1262 | 53e1a046 | Leonidas Poulopoulos | n = len(self._doctype) |
1263 | 53e1a046 | Leonidas Poulopoulos | if n > 2: |
1264 | 53e1a046 | Leonidas Poulopoulos | type = self._doctype[1] |
1265 | 53e1a046 | Leonidas Poulopoulos | if type == "PUBLIC" and n == 4: |
1266 | 53e1a046 | Leonidas Poulopoulos | name, type, pubid, system = self._doctype |
1267 | 53e1a046 | Leonidas Poulopoulos | elif type == "SYSTEM" and n == 3: |
1268 | 53e1a046 | Leonidas Poulopoulos | name, type, system = self._doctype |
1269 | 53e1a046 | Leonidas Poulopoulos | pubid = None
|
1270 | 53e1a046 | Leonidas Poulopoulos | else:
|
1271 | 53e1a046 | Leonidas Poulopoulos | return
|
1272 | 53e1a046 | Leonidas Poulopoulos | if pubid:
|
1273 | 53e1a046 | Leonidas Poulopoulos | pubid = pubid[1:-1] |
1274 | 53e1a046 | Leonidas Poulopoulos | self.doctype(name, pubid, system[1:-1]) |
1275 | 53e1a046 | Leonidas Poulopoulos | self._doctype = None |
1276 | 53e1a046 | Leonidas Poulopoulos | |
1277 | 53e1a046 | Leonidas Poulopoulos | ##
|
1278 | 53e1a046 | Leonidas Poulopoulos | # Handles a doctype declaration.
|
1279 | 53e1a046 | Leonidas Poulopoulos | #
|
1280 | 53e1a046 | Leonidas Poulopoulos | # @param name Doctype name.
|
1281 | 53e1a046 | Leonidas Poulopoulos | # @param pubid Public identifier.
|
1282 | 53e1a046 | Leonidas Poulopoulos | # @param system System identifier.
|
1283 | 53e1a046 | Leonidas Poulopoulos | |
1284 | 53e1a046 | Leonidas Poulopoulos | def doctype(self, name, pubid, system): |
1285 | 53e1a046 | Leonidas Poulopoulos | pass
|
1286 | 53e1a046 | Leonidas Poulopoulos | |
1287 | 53e1a046 | Leonidas Poulopoulos | ##
|
1288 | 53e1a046 | Leonidas Poulopoulos | # Feeds data to the parser.
|
1289 | 53e1a046 | Leonidas Poulopoulos | #
|
1290 | 53e1a046 | Leonidas Poulopoulos | # @param data Encoded data.
|
1291 | 53e1a046 | Leonidas Poulopoulos | |
1292 | 53e1a046 | Leonidas Poulopoulos | def feed(self, data): |
1293 | 53e1a046 | Leonidas Poulopoulos | self._parser.Parse(data, 0) |
1294 | 53e1a046 | Leonidas Poulopoulos | |
1295 | 53e1a046 | Leonidas Poulopoulos | ##
|
1296 | 53e1a046 | Leonidas Poulopoulos | # Finishes feeding data to the parser.
|
1297 | 53e1a046 | Leonidas Poulopoulos | #
|
1298 | 53e1a046 | Leonidas Poulopoulos | # @return An element structure.
|
1299 | 53e1a046 | Leonidas Poulopoulos | # @defreturn Element
|
1300 | 53e1a046 | Leonidas Poulopoulos | |
1301 | 53e1a046 | Leonidas Poulopoulos | def close(self): |
1302 | 53e1a046 | Leonidas Poulopoulos | self._parser.Parse("", 1) # end of data |
1303 | 53e1a046 | Leonidas Poulopoulos | tree = self._target.close()
|
1304 | 53e1a046 | Leonidas Poulopoulos | del self._target, self._parser # get rid of circular references |
1305 | 53e1a046 | Leonidas Poulopoulos | return tree
|
1306 | 53e1a046 | Leonidas Poulopoulos | |
1307 | 53e1a046 | Leonidas Poulopoulos | |
1308 | 53e1a046 | Leonidas Poulopoulos | # --------------------------------------------------------------------
|
1309 | 53e1a046 | Leonidas Poulopoulos | # load platform specific extensions
|
1310 | 53e1a046 | Leonidas Poulopoulos | |
1311 | 53e1a046 | Leonidas Poulopoulos | if sys.platform == "cli": |
1312 | 53e1a046 | Leonidas Poulopoulos | try:
|
1313 | 53e1a046 | Leonidas Poulopoulos | import ElementIron |
1314 | 53e1a046 | Leonidas Poulopoulos | except ImportError: |
1315 | 53e1a046 | Leonidas Poulopoulos | pass # fall back on optional pyexpat emulation |
1316 | 53e1a046 | Leonidas Poulopoulos | else:
|
1317 | 53e1a046 | Leonidas Poulopoulos | parser_api = ElementIron.ParserAPI(TreeBuilder) |
1318 | 53e1a046 | Leonidas Poulopoulos | |
1319 | 53e1a046 | Leonidas Poulopoulos | elif sys.platform.startswith("java"): |
1320 | 53e1a046 | Leonidas Poulopoulos | try:
|
1321 | 53e1a046 | Leonidas Poulopoulos | import ElementJava |
1322 | 53e1a046 | Leonidas Poulopoulos | except ImportError: |
1323 | 53e1a046 | Leonidas Poulopoulos | pass
|
1324 | 53e1a046 | Leonidas Poulopoulos | else:
|
1325 | 53e1a046 | Leonidas Poulopoulos | parser_api = ElementJava.ParserAPI(TreeBuilder) |