Coverage for jutil/xml.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2from typing import Optional, Iterable, Dict, Any, List
3from xml import etree
4from xml.etree.ElementTree import Element, SubElement
7def _xml_element_value(el: Element, is_int: bool = False) -> Any:
8 """
9 Gets XML Element value.
10 :param el: Element
11 :param is_int: If True return value is converted to int (if possible)
12 :return: value of the element (int/str)
13 """
14 # None
15 if el.text is None:
16 return None
17 # int
18 try:
19 if is_int:
20 return int(el.text)
21 except Exception: # nosec
22 pass
23 # default to str if not empty
24 s = str(el.text).strip()
25 return s if s else None
28def _xml_tag_filter(s: str, strip_namespaces: bool) -> str:
29 """
30 Returns tag name and optionally strips namespaces.
31 :param s: Tag name
32 :param strip_namespaces: Strip namespace prefix
33 :return: str
34 """
35 if strip_namespaces: 35 ↛ 43line 35 didn't jump to line 43, because the condition on line 35 was never false
36 ns_end = s.find('}')
37 if ns_end != -1:
38 s = s[ns_end+1:]
39 else:
40 ns_end = s.find(':')
41 if ns_end != -1: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 s = s[ns_end+1:]
43 return s
46def _xml_set_element_data_r(data: dict, el: Element, # pylint: disable=too-many-arguments,too-many-locals
47 array_tags: Iterable[str], int_tags: Iterable[str],
48 strip_namespaces: bool, parse_attributes: bool,
49 value_key: str, attribute_prefix: str):
51 tag = _xml_tag_filter(el.tag, strip_namespaces)
53 # complex type?
54 attrib = el.attrib if parse_attributes else {}
55 is_complex = len(attrib) > 0 or len(list(el)) > 0
56 is_array = tag in data or tag in array_tags
57 is_int = not is_array and tag in int_tags
59 # set obj value
60 value = _xml_element_value(el, is_int=is_int)
61 if is_complex:
62 obj = {}
63 if value is not None:
64 obj[value_key] = value
65 else:
66 obj = value
68 # set attributes
69 for a_key, a_val in attrib.items():
70 obj[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val # pytype: disable=unsupported-operands
72 # recurse children
73 for el2 in list(el):
74 _xml_set_element_data_r(obj, el2, array_tags=array_tags, int_tags=int_tags,
75 strip_namespaces=strip_namespaces, parse_attributes=parse_attributes,
76 value_key=value_key, attribute_prefix=attribute_prefix)
78 # store result
79 if is_array:
80 data.setdefault(tag, [])
81 if not isinstance(data[tag], list):
82 data[tag] = [data[tag]]
83 data[tag].append(obj)
84 else:
85 if tag in data: 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true
86 raise Exception('XML parsing failed, tag {} collision'.format(tag))
87 data[tag] = obj
90def xml_to_dict(xml_bytes: bytes, # pylint: disable=too-many-arguments,too-many-locals
91 tags: Optional[Iterable[str]] = None, array_tags: Optional[Iterable[str]] = None,
92 int_tags: Optional[Iterable[str]] = None,
93 strip_namespaces: bool = True, parse_attributes: bool = True,
94 value_key: str = '@', attribute_prefix: str = '@',
95 document_tag: bool = False) -> Dict[str, Any]:
96 """
97 Parses XML string to dict. In case of simple elements (no children, no attributes) value is stored as is.
98 For complex elements value is stored in key '@', attributes '@xxx' and children as sub-dicts.
99 Optionally strips namespaces.
101 For example:
102 <Doc version="1.2">
103 <A class="x">
104 <B class="x2">hello</B>
105 </A>
106 <A class="y">
107 <B class="y2">world</B>
108 </A>
109 <C>value node</C>
110 </Doc>
111 is returned as follows:
112 {'@version': '1.2',
113 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}},
114 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}],
115 'C': 'value node'}
117 Args:
118 xml_bytes: XML file contents in bytes
119 tags: list of tags to parse (pass empty to return all chilren of top-level tag)
120 array_tags: list of tags that should be treated as arrays by default
121 int_tags: list of tags that should be treated as ints
122 strip_namespaces: if true namespaces will be stripped
123 parse_attributes: Elements with attributes are stored as complex types with '@' identifying text value and @xxx identifying each attribute
124 value_key: Key to store (complex) element value. Default is '@'
125 attribute_prefix: Key prefix to store element attribute values. Default is '@'
126 document_tag: Set True if Document root tag should be included as well
128 Returns: dict
129 """
130 if tags is None:
131 tags = []
132 if array_tags is None:
133 array_tags = []
134 if int_tags is None:
135 int_tags = []
137 root = etree.ElementTree.fromstring(xml_bytes)
138 if tags:
139 if document_tag: 139 ↛ 140line 139 didn't jump to line 140, because the condition on line 139 was never true
140 raise Exception('xml_to_dict: document_tag=True does not make sense when using selective tag list '
141 'since selective tag list finds tags from the whole document, not only directly under root document tag')
142 root_elements: List[Element] = []
143 for tag in tags:
144 root_elements.extend(root.iter(tag))
145 else:
146 root_elements = list(root)
148 data: Dict[str, Any] = {}
149 for el in root_elements:
150 _xml_set_element_data_r(data, el, array_tags=array_tags, int_tags=int_tags,
151 strip_namespaces=strip_namespaces, parse_attributes=parse_attributes,
152 value_key=value_key, attribute_prefix=attribute_prefix)
154 # set root attributes
155 if parse_attributes:
156 for a_key, a_val in root.attrib.items():
157 data[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val
159 return data if not document_tag else {root.tag: data}
162def _xml_filter_tag_name(tag: str) -> str:
163 return re.sub(r'\[\d+\]', '', tag)
166def _xml_element_set_data_r(el: Element, data: dict, value_key: str, attribute_prefix: str):
167 # print('_xml_element_set_data_r({}): {}'.format(el.tag, data))
168 if not hasattr(data, 'items'):
169 data = {'@': data}
170 for k, v in data.items():
171 if k == value_key:
172 el.text = str(v)
173 elif k.startswith(attribute_prefix):
174 el.set(k[1:], str(v))
175 elif isinstance(v, (list, tuple)):
176 for v2 in v:
177 el2 = SubElement(el, _xml_filter_tag_name(k))
178 assert isinstance(el2, Element)
179 _xml_element_set_data_r(el2, v2, value_key, attribute_prefix)
180 elif isinstance(v, dict):
181 el2 = SubElement(el, _xml_filter_tag_name(k))
182 assert isinstance(el2, Element)
183 _xml_element_set_data_r(el2, v, value_key, attribute_prefix)
184 else:
185 el2 = SubElement(el, _xml_filter_tag_name(k))
186 assert isinstance(el2, Element)
187 el2.text = str(v)
190def dict_to_element(doc: dict, value_key: str = '@', attribute_prefix: str = '@') -> Element:
191 """
192 Generates XML Element from dict.
193 Generates complex elements by assuming element attributes are prefixed with '@', and value is stored to plain '@'
194 in case of complex element. Children are sub-dicts.
196 For example:
197 {
198 'Doc': {
199 '@version': '1.2',
200 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}},
201 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}],
202 'C': 'value node',
203 'D[]': 'value node line 1',
204 'D[]': 'value node line 2',
205 }
206 }
207 is returned as follows:
208 <?xml version="1.0" ?>
209 <Doc version="1.2">
210 <A class="x">
211 <B class="x2">hello</B>
212 </A>
213 <A class="y">
214 <B class="y2">world</B>
215 </A>
216 <C>value node</C>
217 <D>value node line 1</D>
218 <D>value node line 2</D>
219 </Doc>
221 Args:
222 doc: dict. Must have sigle root key dict.
223 value_key: Key to store (complex) element value. Default is '@'
224 attribute_prefix: Key prefix to store element attribute values. Default is '@'
226 Returns: xml.etree.ElementTree.Element
227 """
228 if len(doc) != 1: 228 ↛ 229line 228 didn't jump to line 229, because the condition on line 228 was never true
229 raise Exception('Invalid data dict for XML generation, document root must have single element')
231 for tag, data in doc.items(): 231 ↛ 237line 231 didn't jump to line 237, because the loop on line 231 didn't complete
232 el = Element(_xml_filter_tag_name(tag))
233 assert isinstance(el, Element)
234 _xml_element_set_data_r(el, data, value_key, attribute_prefix)
235 return el # pytype: disable=bad-return-type
237 return Element('empty')