Coverage for grm\plugin\xmltodict.py: 0%
234 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-10 14:44 +0900
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-10 14:44 +0900
1#!/usr/bin/env python
2"Makes working with XML feel like you are working with JSON"
4try:
5 from defusedexpat import pyexpat as expat
6except ImportError:
7 from xml.parsers import expat
8from xml.sax.saxutils import XMLGenerator
9from xml.sax.xmlreader import AttributesImpl
10try: # pragma no cover
11 from cStringIO import StringIO
12except ImportError: # pragma no cover
13 try:
14 from StringIO import StringIO
15 except ImportError:
16 from io import StringIO
17try: # pragma no cover
18 from collections import OrderedDict
19except ImportError: # pragma no cover
20 try:
21 from ordereddict import OrderedDict
22 except ImportError:
23 OrderedDict = dict
25try: # pragma no cover
26 _basestring = basestring
27except NameError: # pragma no cover
28 _basestring = str
29try: # pragma no cover
30 _unicode = unicode
31except NameError: # pragma no cover
32 _unicode = str
34__author__ = 'Martin Blech'
35__version__ = '0.11.0'
36__license__ = 'MIT'
39class ParsingInterrupted(Exception):
40 pass
43class _DictSAXHandler(object):
44 def __init__(self,
45 item_depth=0,
46 item_callback=lambda *args: True,
47 xml_attribs=True,
48 attr_prefix='@',
49 cdata_key='#text',
50 force_cdata=False,
51 cdata_separator='',
52 postprocessor=None,
53 dict_constructor=OrderedDict,
54 strip_whitespace=True,
55 namespace_separator=':',
56 namespaces=None,
57 force_list=None):
58 self.path = []
59 self.stack = []
60 self.data = []
61 self.item = None
62 self.item_depth = item_depth
63 self.xml_attribs = xml_attribs
64 self.item_callback = item_callback
65 self.attr_prefix = attr_prefix
66 self.cdata_key = cdata_key
67 self.force_cdata = force_cdata
68 self.cdata_separator = cdata_separator
69 self.postprocessor = postprocessor
70 self.dict_constructor = dict_constructor
71 self.strip_whitespace = strip_whitespace
72 self.namespace_separator = namespace_separator
73 self.namespaces = namespaces
74 self.namespace_declarations = OrderedDict()
75 self.force_list = force_list
77 def _build_name(self, full_name):
78 if not self.namespaces:
79 return full_name
80 i = full_name.rfind(self.namespace_separator)
81 if i == -1:
82 return full_name
83 namespace, name = full_name[:i], full_name[i+1:]
84 short_namespace = self.namespaces.get(namespace, namespace)
85 if not short_namespace:
86 return name
87 else:
88 return self.namespace_separator.join((short_namespace, name))
90 def _attrs_to_dict(self, attrs):
91 if isinstance(attrs, dict):
92 return attrs
93 return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
95 def startNamespaceDecl(self, prefix, uri):
96 self.namespace_declarations[prefix or ''] = uri
98 def startElement(self, full_name, attrs):
99 name = self._build_name(full_name)
100 attrs = self._attrs_to_dict(attrs)
101 if attrs and self.namespace_declarations:
102 attrs['xmlns'] = self.namespace_declarations
103 self.namespace_declarations = OrderedDict()
104 self.path.append((name, attrs or None))
105 if len(self.path) > self.item_depth:
106 self.stack.append((self.item, self.data))
107 if self.xml_attribs:
108 attr_entries = []
109 for key, value in attrs.items():
110 key = self.attr_prefix+self._build_name(key)
111 if self.postprocessor:
112 entry = self.postprocessor(self.path, key, value)
113 else:
114 entry = (key, value)
115 if entry:
116 attr_entries.append(entry)
117 attrs = self.dict_constructor(attr_entries)
118 else:
119 attrs = None
120 self.item = attrs or None
121 self.data = []
123 def endElement(self, full_name):
124 name = self._build_name(full_name)
125 if len(self.path) == self.item_depth:
126 item = self.item
127 if item is None:
128 item = (None if not self.data
129 else self.cdata_separator.join(self.data))
131 should_continue = self.item_callback(self.path, item)
132 if not should_continue:
133 raise ParsingInterrupted()
134 if len(self.stack):
135 data = (None if not self.data
136 else self.cdata_separator.join(self.data))
137 item = self.item
138 self.item, self.data = self.stack.pop()
139 if self.strip_whitespace and data:
140 data = data.strip() or None
141 if data and self.force_cdata and item is None:
142 item = self.dict_constructor()
143 if item is not None:
144 if data:
145 self.push_data(item, self.cdata_key, data)
146 self.item = self.push_data(self.item, name, item)
147 else:
148 self.item = self.push_data(self.item, name, data)
149 else:
150 self.item = None
151 self.data = []
152 self.path.pop()
154 def characters(self, data):
155 if not self.data:
156 self.data = [data]
157 else:
158 self.data.append(data)
160 def push_data(self, item, key, data):
161 if self.postprocessor is not None:
162 result = self.postprocessor(self.path, key, data)
163 if result is None:
164 return item
165 key, data = result
166 if item is None:
167 item = self.dict_constructor()
168 try:
169 value = item[key]
170 if isinstance(value, list):
171 value.append(data)
172 else:
173 item[key] = [value, data]
174 except KeyError:
175 if self._should_force_list(key, data):
176 item[key] = [data]
177 else:
178 item[key] = data
179 return item
181 def _should_force_list(self, key, value):
182 if not self.force_list:
183 return False
184 try:
185 return key in self.force_list
186 except TypeError:
187 return self.force_list(self.path[:-1], key, value)
190def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
191 namespace_separator=':', disable_entities=True, **kwargs):
192 """Parse the given XML input and convert it into a dictionary.
194 `xml_input` can either be a `string` or a file-like object.
196 If `xml_attribs` is `True`, element attributes are put in the dictionary
197 among regular child elements, using `@` as a prefix to avoid collisions. If
198 set to `False`, they are just ignored.
200 Simple example::
202 >>> import xmltodict
203 >>> doc = xmltodict.parse(\"\"\"
204 ... <a prop="x">
205 ... <b>1</b>
206 ... <b>2</b>
207 ... </a>
208 ... \"\"\")
209 >>> doc['a']['@prop']
210 u'x'
211 >>> doc['a']['b']
212 [u'1', u'2']
214 If `item_depth` is `0`, the function returns a dictionary for the root
215 element (default behavior). Otherwise, it calls `item_callback` every time
216 an item at the specified depth is found and returns `None` in the end
217 (streaming mode).
219 The callback function receives two parameters: the `path` from the document
220 root to the item (name-attribs pairs), and the `item` (dict). If the
221 callback's return value is false-ish, parsing will be stopped with the
222 :class:`ParsingInterrupted` exception.
224 Streaming example::
226 >>> def handle(path, item):
227 ... print('path:%s item:%s' % (path, item))
228 ... return True
229 ...
230 >>> xmltodict.parse(\"\"\"
231 ... <a prop="x">
232 ... <b>1</b>
233 ... <b>2</b>
234 ... </a>\"\"\", item_depth=2, item_callback=handle)
235 path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
236 path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
238 The optional argument `postprocessor` is a function that takes `path`,
239 `key` and `value` as positional arguments and returns a new `(key, value)`
240 pair where both `key` and `value` may have changed. Usage example::
242 >>> def postprocessor(path, key, value):
243 ... try:
244 ... return key + ':int', int(value)
245 ... except (ValueError, TypeError):
246 ... return key, value
247 >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
248 ... postprocessor=postprocessor)
249 OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
251 You can pass an alternate version of `expat` (such as `defusedexpat`) by
252 using the `expat` parameter. E.g:
254 >>> import defusedexpat
255 >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
256 OrderedDict([(u'a', u'hello')])
258 You can use the force_list argument to force lists to be created even
259 when there is only a single child of a given level of hierarchy. The
260 force_list argument is a tuple of keys. If the key for a given level
261 of hierarchy is in the force_list argument, that level of hierarchy
262 will have a list as a child (even if there is only one sub-element).
263 The index_keys operation takes precendence over this. This is applied
264 after any user-supplied postprocessor has already run.
266 For example, given this input:
267 <servers>
268 <server>
269 <name>host1</name>
270 <os>Linux</os>
271 <interfaces>
272 <interface>
273 <name>em0</name>
274 <ip_address>10.0.0.1</ip_address>
275 </interface>
276 </interfaces>
277 </server>
278 </servers>
280 If called with force_list=('interface',), it will produce
281 this dictionary:
282 {'servers':
283 {'server':
284 {'name': 'host1',
285 'os': 'Linux'},
286 'interfaces':
287 {'interface':
288 [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
290 `force_list` can also be a callable that receives `path`, `key` and
291 `value`. This is helpful in cases where the logic that decides whether
292 a list should be forced is more complex.
293 """
294 handler = _DictSAXHandler(namespace_separator=namespace_separator,
295 **kwargs)
296 if isinstance(xml_input, _unicode):
297 if not encoding:
298 encoding = 'utf-8'
299 xml_input = xml_input.encode(encoding)
300 if not process_namespaces:
301 namespace_separator = None
302 parser = expat.ParserCreate(
303 encoding,
304 namespace_separator
305 )
306 try:
307 parser.ordered_attributes = True
308 except AttributeError:
309 # Jython's expat does not support ordered_attributes
310 pass
311 parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
312 parser.StartElementHandler = handler.startElement
313 parser.EndElementHandler = handler.endElement
314 parser.CharacterDataHandler = handler.characters
315 parser.buffer_text = True
316 if disable_entities:
317 try:
318 # Attempt to disable DTD in Jython's expat parser (Xerces-J).
319 feature = "http://apache.org/xml/features/disallow-doctype-decl"
320 parser._reader.setFeature(feature, True)
321 except AttributeError:
322 # For CPython / expat parser.
323 # Anything not handled ends up here and entities aren't expanded.
324 parser.DefaultHandler = lambda x: None
325 # Expects an integer return; zero means failure -> expat.ExpatError.
326 parser.ExternalEntityRefHandler = lambda *x: 1
327 if hasattr(xml_input, 'read'):
328 parser.ParseFile(xml_input)
329 else:
330 parser.Parse(xml_input, True)
331 return handler.item
334def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
335 if not namespaces:
336 return name
337 try:
338 ns, name = name.rsplit(ns_sep, 1)
339 except ValueError:
340 pass
341 else:
342 ns_res = namespaces.get(ns.strip(attr_prefix))
343 name = '{0}{1}{2}{3}'.format(
344 attr_prefix if ns.startswith(attr_prefix) else '',
345 ns_res, ns_sep, name) if ns_res else name
346 return name
349def _emit(key, value, content_handler,
350 attr_prefix='@',
351 cdata_key='#text',
352 depth=0,
353 preprocessor=None,
354 pretty=False,
355 newl='\n',
356 indent='\t',
357 namespace_separator=':',
358 namespaces=None,
359 full_document=True):
360 key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
361 if preprocessor is not None:
362 result = preprocessor(key, value)
363 if result is None:
364 return
365 key, value = result
366 if (not hasattr(value, '__iter__')
367 or isinstance(value, _basestring)
368 or isinstance(value, dict)):
369 value = [value]
370 for index, v in enumerate(value):
371 if full_document and depth == 0 and index > 0:
372 raise ValueError('document with multiple roots')
373 if v is None:
374 v = OrderedDict()
375 elif not isinstance(v, dict):
376 v = _unicode(v)
377 if isinstance(v, _basestring):
378 v = OrderedDict(((cdata_key, v),))
379 cdata = None
380 attrs = OrderedDict()
381 children = []
382 for ik, iv in v.items():
383 if ik == cdata_key:
384 cdata = iv
385 continue
386 if ik.startswith(attr_prefix):
387 ik = _process_namespace(ik, namespaces, namespace_separator,
388 attr_prefix)
389 if ik == '@xmlns' and isinstance(iv, dict):
390 for k, v in iv.items():
391 attr = 'xmlns{0}'.format(':{0}'.format(k) if k else '')
392 attrs[attr] = _unicode(v)
393 continue
394 if not isinstance(iv, _unicode):
395 iv = _unicode(iv)
396 attrs[ik[len(attr_prefix):]] = iv
397 continue
398 children.append((ik, iv))
399 if pretty:
400 content_handler.ignorableWhitespace(depth * indent)
401 content_handler.startElement(key, AttributesImpl(attrs))
402 if pretty and children:
403 content_handler.ignorableWhitespace(newl)
404 for child_key, child_value in children:
405 _emit(child_key, child_value, content_handler,
406 attr_prefix, cdata_key, depth+1, preprocessor,
407 pretty, newl, indent, namespaces=namespaces,
408 namespace_separator=namespace_separator)
409 if cdata is not None:
410 content_handler.characters(cdata)
411 if pretty and children:
412 content_handler.ignorableWhitespace(depth * indent)
413 content_handler.endElement(key)
414 if pretty and depth:
415 content_handler.ignorableWhitespace(newl)
418def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
419 short_empty_elements=False,
420 **kwargs):
421 """Emit an XML document for the given `input_dict` (reverse of `parse`).
423 The resulting XML document is returned as a string, but if `output` (a
424 file-like object) is specified, it is written there instead.
426 Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
427 as XML node attributes, whereas keys equal to `cdata_key`
428 (default=`'#text'`) are treated as character data.
430 The `pretty` parameter (default=`False`) enables pretty-printing. In this
431 mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
432 can be customized with the `newl` and `indent` parameters.
434 """
435 if full_document and len(input_dict) != 1:
436 raise ValueError('Document must have exactly one root.')
437 must_return = False
438 if output is None:
439 output = StringIO()
440 must_return = True
441 if short_empty_elements:
442 content_handler = XMLGenerator(output, encoding, True)
443 else:
444 content_handler = XMLGenerator(output, encoding)
445 if full_document:
446 content_handler.startDocument()
447 for key, value in input_dict.items():
448 _emit(key, value, content_handler, full_document=full_document,
449 **kwargs)
450 if full_document:
451 content_handler.endDocument()
452 if must_return:
453 value = output.getvalue()
454 try: # pragma no cover
455 value = value.decode(encoding)
456 except AttributeError: # pragma no cover
457 pass
458 return value
460if __name__ == '__main__': # pragma: no cover
461 import sys
462 import marshal
463 try:
464 stdin = sys.stdin.buffer
465 stdout = sys.stdout.buffer
466 except AttributeError:
467 stdin = sys.stdin
468 stdout = sys.stdout
470 (item_depth,) = sys.argv[1:]
471 item_depth = int(item_depth)
474 def handle_item(path, item):
475 marshal.dump((path, item), stdout)
476 return True
478 try:
479 root = parse(stdin,
480 item_depth=item_depth,
481 item_callback=handle_item,
482 dict_constructor=dict)
483 if item_depth == 0:
484 handle_item([], root)
485 except KeyboardInterrupt:
486 pass