Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import re 

2from typing import Optional, Iterable, Dict, Any, List 

3from xml import etree 

4from xml.etree.ElementTree import Element, SubElement 

5 

6 

7def _xml_element_value(el: Element, is_int: bool = False) -> Any: 

8 """ 

9 Gets XML Element value. 

10 :param el: Element 

11 :param is_int: If True return value is converted to int (if possible) 

12 :return: value of the element (int/str) 

13 """ 

14 # None 

15 if el.text is None: 

16 return None 

17 # int 

18 try: 

19 if is_int: 

20 return int(el.text) 

21 except Exception: # nosec 

22 pass 

23 # default to str if not empty 

24 s = str(el.text).strip() 

25 return s if s else None 

26 

27 

28def _xml_tag_filter(s: str, strip_namespaces: bool) -> str: 

29 """ 

30 Returns tag name and optionally strips namespaces. 

31 :param s: Tag name 

32 :param strip_namespaces: Strip namespace prefix 

33 :return: str 

34 """ 

35 if strip_namespaces: 35 ↛ 43line 35 didn't jump to line 43, because the condition on line 35 was never false

36 ns_end = s.find('}') 

37 if ns_end != -1: 

38 s = s[ns_end+1:] 

39 else: 

40 ns_end = s.find(':') 

41 if ns_end != -1: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 s = s[ns_end+1:] 

43 return s 

44 

45 

46def _xml_set_element_data_r(data: dict, el: Element, # pylint: disable=too-many-arguments,too-many-locals 

47 array_tags: Iterable[str], int_tags: Iterable[str], 

48 strip_namespaces: bool, parse_attributes: bool, 

49 value_key: str, attribute_prefix: str): 

50 

51 tag = _xml_tag_filter(el.tag, strip_namespaces) 

52 

53 # complex type? 

54 attrib = el.attrib if parse_attributes else {} 

55 is_complex = len(attrib) > 0 or len(list(el)) > 0 

56 is_array = tag in data or tag in array_tags 

57 is_int = not is_array and tag in int_tags 

58 

59 # set obj value 

60 value = _xml_element_value(el, is_int=is_int) 

61 if is_complex: 

62 obj = {} 

63 if value is not None: 

64 obj[value_key] = value 

65 else: 

66 obj = value 

67 

68 # set attributes 

69 for a_key, a_val in attrib.items(): 

70 obj[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val # pytype: disable=unsupported-operands 

71 

72 # recurse children 

73 for el2 in list(el): 

74 _xml_set_element_data_r(obj, el2, array_tags=array_tags, int_tags=int_tags, 

75 strip_namespaces=strip_namespaces, parse_attributes=parse_attributes, 

76 value_key=value_key, attribute_prefix=attribute_prefix) 

77 

78 # store result 

79 if is_array: 

80 data.setdefault(tag, []) 

81 if not isinstance(data[tag], list): 

82 data[tag] = [data[tag]] 

83 data[tag].append(obj) 

84 else: 

85 if tag in data: 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise Exception('XML parsing failed, tag {} collision'.format(tag)) 

87 data[tag] = obj 

88 

89 

90def xml_to_dict(xml_bytes: bytes, # pylint: disable=too-many-arguments,too-many-locals 

91 tags: Optional[Iterable[str]] = None, array_tags: Optional[Iterable[str]] = None, 

92 int_tags: Optional[Iterable[str]] = None, 

93 strip_namespaces: bool = True, parse_attributes: bool = True, 

94 value_key: str = '@', attribute_prefix: str = '@', 

95 document_tag: bool = False) -> Dict[str, Any]: 

96 """ 

97 Parses XML string to dict. In case of simple elements (no children, no attributes) value is stored as is. 

98 For complex elements value is stored in key '@', attributes '@xxx' and children as sub-dicts. 

99 Optionally strips namespaces. 

100 

101 For example: 

102 <Doc version="1.2"> 

103 <A class="x"> 

104 <B class="x2">hello</B> 

105 </A> 

106 <A class="y"> 

107 <B class="y2">world</B> 

108 </A> 

109 <C>value node</C> 

110 </Doc> 

111 is returned as follows: 

112 {'@version': '1.2', 

113 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}}, 

114 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}], 

115 'C': 'value node'} 

116 

117 Args: 

118 xml_bytes: XML file contents in bytes 

119 tags: list of tags to parse (pass empty to return all chilren of top-level tag) 

120 array_tags: list of tags that should be treated as arrays by default 

121 int_tags: list of tags that should be treated as ints 

122 strip_namespaces: if true namespaces will be stripped 

123 parse_attributes: Elements with attributes are stored as complex types with '@' identifying text value and @xxx identifying each attribute 

124 value_key: Key to store (complex) element value. Default is '@' 

125 attribute_prefix: Key prefix to store element attribute values. Default is '@' 

126 document_tag: Set True if Document root tag should be included as well 

127 

128 Returns: dict 

129 """ 

130 if tags is None: 

131 tags = [] 

132 if array_tags is None: 

133 array_tags = [] 

134 if int_tags is None: 

135 int_tags = [] 

136 

137 root = etree.ElementTree.fromstring(xml_bytes) 

138 if tags: 

139 if document_tag: 139 ↛ 140line 139 didn't jump to line 140, because the condition on line 139 was never true

140 raise Exception('xml_to_dict: document_tag=True does not make sense when using selective tag list ' 

141 'since selective tag list finds tags from the whole document, not only directly under root document tag') 

142 root_elements: List[Element] = [] 

143 for tag in tags: 

144 root_elements.extend(root.iter(tag)) 

145 else: 

146 root_elements = list(root) 

147 

148 data: Dict[str, Any] = {} 

149 for el in root_elements: 

150 _xml_set_element_data_r(data, el, array_tags=array_tags, int_tags=int_tags, 

151 strip_namespaces=strip_namespaces, parse_attributes=parse_attributes, 

152 value_key=value_key, attribute_prefix=attribute_prefix) 

153 

154 # set root attributes 

155 if parse_attributes: 

156 for a_key, a_val in root.attrib.items(): 

157 data[attribute_prefix + _xml_tag_filter(a_key, strip_namespaces)] = a_val 

158 

159 return data if not document_tag else {root.tag: data} 

160 

161 

162def _xml_filter_tag_name(tag: str) -> str: 

163 return re.sub(r'\[\d+\]', '', tag) 

164 

165 

166def _xml_element_set_data_r(el: Element, data: dict, value_key: str, attribute_prefix: str): 

167 # print('_xml_element_set_data_r({}): {}'.format(el.tag, data)) 

168 if not hasattr(data, 'items'): 

169 data = {'@': data} 

170 for k, v in data.items(): 

171 if k == value_key: 

172 el.text = str(v) 

173 elif k.startswith(attribute_prefix): 

174 el.set(k[1:], str(v)) 

175 elif isinstance(v, (list, tuple)): 

176 for v2 in v: 

177 el2 = SubElement(el, _xml_filter_tag_name(k)) 

178 assert isinstance(el2, Element) 

179 _xml_element_set_data_r(el2, v2, value_key, attribute_prefix) 

180 elif isinstance(v, dict): 

181 el2 = SubElement(el, _xml_filter_tag_name(k)) 

182 assert isinstance(el2, Element) 

183 _xml_element_set_data_r(el2, v, value_key, attribute_prefix) 

184 else: 

185 el2 = SubElement(el, _xml_filter_tag_name(k)) 

186 assert isinstance(el2, Element) 

187 el2.text = str(v) 

188 

189 

190def dict_to_element(doc: dict, value_key: str = '@', attribute_prefix: str = '@') -> Element: 

191 """ 

192 Generates XML Element from dict. 

193 Generates complex elements by assuming element attributes are prefixed with '@', and value is stored to plain '@' 

194 in case of complex element. Children are sub-dicts. 

195 

196 For example: 

197 { 

198 'Doc': { 

199 '@version': '1.2', 

200 'A': [{'@class': 'x', 'B': {'@': 'hello', '@class': 'x2'}}, 

201 {'@class': 'y', 'B': {'@': 'world', '@class': 'y2'}}], 

202 'C': 'value node', 

203 'D[]': 'value node line 1', 

204 'D[]': 'value node line 2', 

205 } 

206 } 

207 is returned as follows: 

208 <?xml version="1.0" ?> 

209 <Doc version="1.2"> 

210 <A class="x"> 

211 <B class="x2">hello</B> 

212 </A> 

213 <A class="y"> 

214 <B class="y2">world</B> 

215 </A> 

216 <C>value node</C> 

217 <D>value node line 1</D> 

218 <D>value node line 2</D> 

219 </Doc> 

220 

221 Args: 

222 doc: dict. Must have sigle root key dict. 

223 value_key: Key to store (complex) element value. Default is '@' 

224 attribute_prefix: Key prefix to store element attribute values. Default is '@' 

225 

226 Returns: xml.etree.ElementTree.Element 

227 """ 

228 if len(doc) != 1: 228 ↛ 229line 228 didn't jump to line 229, because the condition on line 228 was never true

229 raise Exception('Invalid data dict for XML generation, document root must have single element') 

230 

231 for tag, data in doc.items(): 231 ↛ 237line 231 didn't jump to line 237, because the loop on line 231 didn't complete

232 el = Element(_xml_filter_tag_name(tag)) 

233 assert isinstance(el, Element) 

234 _xml_element_set_data_r(el, data, value_key, attribute_prefix) 

235 return el # pytype: disable=bad-return-type 

236 

237 return Element('empty')