Coverage for jutil/xml.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2from typing import Optional, Iterable, Dict, Any, List
3from xml import etree
4from xml.etree.ElementTree import Element, SubElement
7def _xml_element_value(el: Element, is_int: bool = False) -> Any:
8 """
9 Gets XML Element value.
10 :param el: Element
11 :param is_int: If True return value is converted to int (if possible)
12 :return: value of the element (int/str)
13 """
14 # None
15 if el.text is None:
16 return None
17 # int
18 try:
19 if is_int:
20 return int(el.text)
21 except Exception: # nosec
22 pass
23 # default to str if not empty
24 s = str(el.text).strip()
25 return s if s else None
28def _xml_tag_filter(s: str, strip_namespaces: bool) -> str:
29 """
30 Returns tag name and optionally strips namespaces.
31 :param s: Tag name
32 :param strip_namespaces: Strip namespace prefix
33 :return: str
34 """
35 if strip_namespaces: 35 ↛ 43line 35 didn't jump to line 43, because the condition on line 35 was never false
36 ns_end = s.find("}")
37 if ns_end != -1:
38 s = s[ns_end + 1 :]
39 else:
40 ns_end = s.find(":")
41 if ns_end != -1: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 s = s[ns_end + 1 :]
43 return s
46def _xml_set_element_data_r( # pylint: disable=too-many-arguments,too-many-locals
47 data: dict,
48 el: Element,
49 array_tags: Iterable[str],
50 int_tags: Iterable[str],
51 strip_namespaces: bool,
52 parse_attributes: bool,
53 value_key: str,
54 attribute_prefix: str,
55):
57 tag = _xml_tag_filter(el.tag, strip_namespaces)
59 # complex type?
60 attrib = el.attrib if parse_attributes else {}
61 is_complex = len(attrib) > 0 or len(list(el)) > 0
62 is_array = tag in data or tag in array_tags
63 is_int = not is_array and tag in int_tags
65 # set obj value
66 value = _xml_element_value(el, is_int=is_int)
67 if is_complex:
68 obj = {}
69 if value is not None:
70 obj[value_key] = value
71 else:
72 obj = value
74 # set attributes
75 for a_key, a_val in attrib.items():
76 obj[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val # pytype: disable=unsupported-operands
78 # recurse children
79 for el2 in list(el):
80 _xml_set_element_data_r(
81 obj,
82 el2,
83 array_tags=array_tags,
84 int_tags=int_tags,
85 strip_namespaces=strip_namespaces,
86 parse_attributes=parse_attributes,
87 value_key=value_key,
88 attribute_prefix=attribute_prefix,
89 )
91 # store result
92 if is_array:
93 data.setdefault(tag, [])
94 if not isinstance(data[tag], list):
95 data[tag] = [data[tag]]
96 data[tag].append(obj)
97 else:
98 if tag in data: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 raise Exception("XML parsing failed, tag {} collision".format(tag))
100 data[tag] = obj
103def xml_to_dict( # pylint: disable=too-many-arguments,too-many-locals
104 xml_bytes: bytes,
105 tags: Optional[Iterable[str]] = None,
106 array_tags: Optional[Iterable[str]] = None,
107 int_tags: Optional[Iterable[str]] = None,
108 strip_namespaces: bool = True,
109 parse_attributes: bool = True,
110 value_key: str = "@",
111 attribute_prefix: str = "@",
112 document_tag: bool = False,
113) -> Dict[str, Any]:
114 """
115 Parses XML string to dict. In case of simple elements (no children, no attributes) value is stored as is.
116 For complex elements value is stored in key '@', attributes '@xxx' and children as sub-dicts.
117 Optionally strips namespaces.
119 For example:
120 <Doc version="1.2">
121 <A class="x">
122 <B class="x2">hello</B>
123 </A>
124 <A class="y">
125 <B class="y2">world</B>
126 </A>
127 <C>value node</C>
128 </Doc>
129 is returned as follows:
130 {'@version': '1.2',
131 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}},
132 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}],
133 'C': 'value node'}
135 Args:
136 xml_bytes: XML file contents in bytes
137 tags: list of tags to parse (pass empty to return all chilren of top-level tag)
138 array_tags: list of tags that should be treated as arrays by default
139 int_tags: list of tags that should be treated as ints
140 strip_namespaces: if true namespaces will be stripped
141 parse_attributes: Elements with attributes are stored as complex types with '@' identifying text value and @xxx identifying each attribute
142 value_key: Key to store (complex) element value. Default is '@'
143 attribute_prefix: Key prefix to store element attribute values. Default is '@'
144 document_tag: Set True if Document root tag should be included as well
146 Returns: dict
147 """
148 if tags is None:
149 tags = []
150 if array_tags is None:
151 array_tags = []
152 if int_tags is None:
153 int_tags = []
155 root = etree.ElementTree.fromstring(xml_bytes)
156 if tags:
157 if document_tag: 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true
158 raise Exception(
159 "xml_to_dict: document_tag=True does not make sense when using selective tag list "
160 "since selective tag list finds tags from the whole document, not only directly under root document tag"
161 )
162 root_elements: List[Element] = []
163 for tag in tags:
164 root_elements.extend(root.iter(tag))
165 else:
166 root_elements = list(root)
168 data: Dict[str, Any] = {}
169 for el in root_elements:
170 _xml_set_element_data_r(
171 data,
172 el,
173 array_tags=array_tags,
174 int_tags=int_tags,
175 strip_namespaces=strip_namespaces,
176 parse_attributes=parse_attributes,
177 value_key=value_key,
178 attribute_prefix=attribute_prefix,
179 )
181 # set root attributes
182 if parse_attributes:
183 for a_key, a_val in root.attrib.items():
184 data[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val
186 return data if not document_tag else {root.tag: data}
189def _xml_filter_tag_name(tag: str) -> str:
190 return re.sub(r"\[\d+\]", "", tag)
193def _xml_element_set_data_r(el: Element, data: dict, value_key: str, attribute_prefix: str):
194 # print('_xml_element_set_data_r({}): {}'.format(el.tag, data))
195 if not hasattr(data, "items"):
196 data = {"@": data}
197 for k, v in data.items():
198 if k == value_key:
199 el.text = str(v)
200 elif k.startswith(attribute_prefix):
201 el.set(k[1:], str(v))
202 elif isinstance(v, (list, tuple)):
203 for v2 in v:
204 el2 = SubElement(el, _xml_filter_tag_name(k))
205 assert isinstance(el2, Element)
206 _xml_element_set_data_r(el2, v2, value_key, attribute_prefix)
207 elif isinstance(v, dict):
208 el2 = SubElement(el, _xml_filter_tag_name(k))
209 assert isinstance(el2, Element)
210 _xml_element_set_data_r(el2, v, value_key, attribute_prefix)
211 else:
212 el2 = SubElement(el, _xml_filter_tag_name(k))
213 assert isinstance(el2, Element)
214 el2.text = str(v)
217def dict_to_element(doc: dict, value_key: str = "@", attribute_prefix: str = "@") -> Element:
218 """
219 Generates XML Element from dict.
220 Generates complex elements by assuming element attributes are prefixed with '@', and value is stored to plain '@'
221 in case of complex element. Children are sub-dicts.
223 For example:
224 {
225 'Doc': {
226 '@version': '1.2',
227 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}},
228 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}],
229 'C': 'value node',
230 'D[]': 'value node line 1',
231 'D[]': 'value node line 2',
232 }
233 }
234 is returned as follows:
235 <?xml version="1.0" ?>
236 <Doc version="1.2">
237 <A class="x">
238 <B class="x2">hello</B>
239 </A>
240 <A class="y">
241 <B class="y2">world</B>
242 </A>
243 <C>value node</C>
244 <D>value node line 1</D>
245 <D>value node line 2</D>
246 </Doc>
248 Args:
249 doc: dict. Must have sigle root key dict.
250 value_key: Key to store (complex) element value. Default is '@'
251 attribute_prefix: Key prefix to store element attribute values. Default is '@'
253 Returns: xml.etree.ElementTree.Element
254 """
255 if len(doc) != 1: 255 ↛ 256line 255 didn't jump to line 256, because the condition on line 255 was never true
256 raise Exception("Invalid data dict for XML generation, document root must have single element")
258 for tag, data in doc.items(): 258 ↛ 264line 258 didn't jump to line 264, because the loop on line 258 didn't complete
259 el = Element(_xml_filter_tag_name(tag))
260 assert isinstance(el, Element)
261 _xml_element_set_data_r(el, data, value_key, attribute_prefix)
262 return el # pytype: disable=bad-return-type
264 return Element("empty")