Coverage for sbe2/xmlparser/types.py: 92%
261 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-29 14:27 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-29 14:27 +0200
1from ..schema import (
2 ValidValue,
3 Enum,
4 Type,
5 Composite,
6 Presence,
7 Set,
8 Choice,
9 FixedLengthElement,
10 Ref,
11 Message,
12 MessageSchema,
13 Group,
14 Data,
15 Field,
16 builtin
17)
19from lxml.etree import Element
20from .attributes import (
21 parse_name,
22 parse_description,
23 parse_since_version,
24 parse_deprecated,
25 parse_encoding_type,
26 parse_offset,
27 parse_type as parse_type_attr,
28 parse_presence,
29 parse_primitive_type,
30 parse_length,
31 parse_id,
32 parse_package,
33 parse_version,
34 parse_semantic_version,
35 parse_byte_order,
36 parse_header_type,
37 parse_alignment,
38 parse_semantic_type,
39 parse_block_length,
40 parse_dimension_type,
41 parse_value_ref
42)
43from .errors import SchemaParsingError
44from .ctx import ParsingContext
45from lxml.etree import XMLParser, parse, QName
46from lxml import ElementInclude
47from typing import Any
50def parse_valid_value(val_val: Element, encoding_type: str) -> ValidValue:
51 """
52 Parses a validValue element from XML.
54 Args:
55 val_val (Element): The XML element representing a valid value.
56 enum (Enum): The enumeration type this ValidValue is part of.
57 encoding_type (Type): The encoding type for this enum.
59 Returns:
60 ValidValue: An instance of ValidValue with parsed attributes.
61 """
62 if val_val.tag != "validValue":
63 raise SchemaParsingError(f"Expected 'validValue' tag, got '{val_val.tag}'")
64 name = parse_name(val_val)
65 description = parse_description(val_val)
66 value = val_val.text.encode('ascii') if encoding_type == 'char' else int(val_val.text)
67 since_version = parse_since_version(val_val)
68 deprecated = parse_deprecated(val_val)
70 return ValidValue(
71 name=name,
72 description=description,
73 value=value,
74 since_version=since_version,
75 deprecated=deprecated,
76 )
79def parse_enum(node: Element) -> Enum:
80 """
81 Parses an enum element from XML.
83 Args:
84 node (Element): The XML element representing an enum.
86 Returns:
87 Enum: An instance of Enum with parsed attributes.
88 """
89 if node.tag != "enum":
90 raise SchemaParsingError(f"Expected 'enum' tag, got '{node.tag}'")
91 name = parse_name(node)
92 description = parse_description(node)
93 since_version = parse_since_version(node)
94 deprecated = parse_deprecated(node)
95 encoding_type = parse_encoding_type(node)
96 offset = parse_offset(node)
98 valid_values = [parse_valid_value(vv, encoding_type) for vv in node]
101 # names and values must be unique
102 if len(valid_values) != len(set(vv.name for vv in valid_values)):
103 raise SchemaParsingError(f"Duplicate valid value names found in enum '{name}'")
104 if len(valid_values) != len(set(vv.value for vv in valid_values)):
105 raise SchemaParsingError(f"Duplicate valid value values found in enum '{name}'")
107 enum = Enum(
108 name=name,
109 description=description,
110 since_version=since_version,
111 deprecated=deprecated,
112 valid_values=valid_values,
113 encoding_type_name=encoding_type,
114 offset=offset,
115 )
116 for vv in valid_values:
117 vv.enum = enum
118 return enum
121def parse_choice(node: Element) -> Choice:
122 """
123 Parses a choice element from XML.
125 Args:
126 node (Element): The XML element representing a choice.
128 Returns:
129 Choice: An instance of Choice with parsed attributes.
130 """
131 if node.tag != "choice":
132 raise SchemaParsingError(f"Expected 'choice' tag, got '{node.tag}'")
134 name = parse_name(node)
135 description = parse_description(node)
136 since_version = parse_since_version(node)
137 deprecated = parse_deprecated(node)
138 try:
139 value = int(node.text)
140 except (ValueError, TypeError) as e:
141 raise SchemaParsingError(
142 f"Invalid value for choice '{name}': {node.text}"
143 ) from e
145 return Choice(
146 name=name,
147 description=description,
148 since_version=since_version,
149 deprecated=deprecated,
150 value=value,
151 )
154def parse_set(node: Element) -> Set:
155 """Parses a set element from XML.
156 Args:
157 node (Element): The XML element representing a set.
158 ctx (ParsingContext): The context of parsing.
159 Returns:
160 Set: An instance of Set with parsed attributes.
161 """
162 if node.tag != "set":
163 raise SchemaParsingError(f"Expected 'set' tag, got '{node.tag}'")
165 name = parse_name(node)
166 description = parse_description(node)
167 since_version = parse_since_version(node)
168 deprecated = parse_deprecated(node)
169 encoding_type = parse_encoding_type(node)
170 offset = parse_offset(node)
171 choices = [parse_choice(choice) for choice in node]
173 # names and values must be unique
174 if len(choices) != len(set(choice.name for choice in choices)):
175 raise SchemaParsingError(f"Duplicate choice names found in set '{name}'")
176 if len(choices) != len(set(choice.value for choice in choices)):
177 raise SchemaParsingError(f"Duplicate choice value found in set '{name}'")
179 return Set(
180 name=name,
181 description=description,
182 since_version=since_version,
183 deprecated=deprecated,
184 offset=offset,
185 encoding_type_name=encoding_type,
186 choices=choices,
187 )
190def parse_type(node: Element) -> Type:
191 """
192 Parses a type element from XML.
194 Args:
195 node (Element): The XML element representing a type.
197 Returns:
198 Type: An instance of Type with parsed attributes.
199 """
200 if node.tag != "type":
201 raise SchemaParsingError(f"Expected 'type' tag, got '{node.tag}'")
203 name = parse_name(node)
204 description = parse_description(node)
205 since_version = parse_since_version(node)
206 deprecated = parse_deprecated(node)
207 primitive_type = parse_primitive_type(node)
208 offset = parse_offset(node)
209 length = parse_length(node)
210 value_ref = parse_value_ref(node)
211 value = node.text
213 presence = parse_presence(node)
215 return Type(
216 name=name,
217 description=description,
218 since_version=since_version,
219 deprecated=deprecated,
220 primitive_type=primitive_type,
221 offset=offset,
222 presence=presence,
223 length=length,
224 value_ref=value_ref,
225 value= value
226 )
229def parse_ref(node: Element) -> Ref:
230 """
231 Parses a ref element from XML.
233 Args:
234 node (Element): The XML element representing a reference.
236 Returns:
237 FixedLengthElement: An instance of Ref with parsed attributes.
238 """
239 if node.tag != "ref":
240 raise SchemaParsingError(f"Expected 'ref' tag, got '{node.tag}'")
242 name = parse_name(node)
243 description = parse_description(node)
244 type_ = parse_type_attr(node)
245 offset = parse_offset(node)
247 return Ref(
248 name=name, description=description, type_name=type_, offset=offset
249 )
252def parse_composite_element(node: Element) -> FixedLengthElement:
253 """
254 Parses a composite element from XML.
256 Args:
257 node (Element): The XML element representing a composite element.
259 Returns:
260 FixedLengthElement: An instance of FixedLengthElement with parsed attributes.
261 """
262 match node.tag:
263 case "enum":
264 return parse_enum(node)
265 case "type":
266 return parse_type(node)
267 case "set":
268 return parse_set(node)
269 case "ref":
270 return parse_ref(node)
271 case "composite":
272 return parse_composite(node)
274 raise SchemaParsingError(f"Unknown composite element type: {node.tag}")
277def parse_composite(node: Element) -> Composite:
278 """
279 Parses a composite element from XML.
281 Args:
282 node (Element): The XML element representing a composite.
284 Returns:
285 Composite: An instance of Composite with parsed attributes.
286 """
287 if node.tag != "composite":
288 raise SchemaParsingError(f"Expected 'composite' tag, got '{node.tag}'")
290 name = parse_name(node)
291 description = parse_description(node)
292 since_version = parse_since_version(node)
293 deprecated = parse_deprecated(node)
294 offset = parse_offset(node)
296 elements = [parse_composite_element(child) for child in node]
298 return Composite(
299 name=name,
300 description=description,
301 since_version=since_version,
302 deprecated=deprecated,
303 offset=offset,
304 elements=elements,
305 )
308def parse_message_schema(node: Element) -> MessageSchema:
309 """Parses a messageSchema element from XML.
310 Args:
311 node (Element): The XML element representing a message schema.
312 Returns:
313 MessageSchema: An instance of MessageSchema with parsed attributes.
314 """
316 #TODO: check namespace?
317 qname = QName(node.tag)
318 if qname.localname != "messageSchema":
319 raise SchemaParsingError(f"Expected 'messageSchema' tag, got '{qname.localname}'")
321 package = parse_package(node)
322 version = parse_version(node)
323 semantic_version = parse_semantic_version(node)
324 id_ = parse_id(node)
325 byte_order = parse_byte_order(node)
326 header_type = parse_header_type(node)
327 description = parse_description(node)
329 return MessageSchema(
330 package=package,
331 version=version,
332 semantic_version=semantic_version,
333 id=id_,
334 byte_order=byte_order,
335 header_type_name=header_type,
336 description=description,
337 )
339def get_package(node:Element) -> str | None:
340 """
341 Gets a <messages> tag package name for the given message.
342 """
343 curr = node
344 while True:
345 curr = curr.getparent()
346 if curr is None:
347 return None
348 if curr.tag == 'messages':
349 package = parse_package(curr, required=False)
350 if package:
351 return package
355def parse_message(node: Element, ctx: ParsingContext, default_package:str) -> Message:
356 """
357 Parses a message element from XML.
359 Args:
360 node (Element): The XML element representing a message.
362 Returns:
363 Message: An instance of Message with parsed attributes.
364 """
365 #TODO: check namespace?
366 qname = QName(node.tag)
367 if qname.localname != "message":
368 raise SchemaParsingError(f"Expected 'message' tag, got '{qname.localname}'")
370 id_ = parse_id(node)
371 name = parse_name(node)
372 description = parse_description(node)
373 semantic_type = parse_semantic_type(node)
374 block_length = parse_block_length(node)
375 since_version = parse_since_version(node)
376 deprecated = parse_deprecated(node)
377 alignment = parse_alignment(node)
378 package = get_package(node) or default_package
380 fields, groups, datas = parse_elements(node, ctx)
382 return Message(
383 id=id_,
384 name=name,
385 description=description,
386 package=package,
387 semantic_type=semantic_type,
388 block_length=block_length,
389 since_version=since_version,
390 deprecated=deprecated,
391 alignment=alignment,
392 fields=fields,
393 groups=groups,
394 datas=datas,
395 )
397def value_ref_to_valid_value(value_ref:str, ctx:ParsingContext) -> ValidValue:
398 try:
399 enum_name, valid_value = value_ref.split('.')
400 enum = ctx.types[enum_name]
401 if not isinstance(enum, Enum):
402 raise ValueError(f"'{enum_name}' type is not enum")
403 for vv in enum.valid_values:
404 if vv.name == valid_value:
405 return vv
406 raise ValueError(f"Enum '{enum_name}' does not contain value '{valid_value}'")
407 except Exception as e:
408 raise SchemaParsingError(f"Invalid value reference: '{value_ref}'") from e
410def field_constant_value(value_ref: str, text: str, type_: FixedLengthElement, ctx: ParsingContext) -> Any:
411 value_ref = value_ref or (type_.value_ref if isinstance(type_, Type) else None)
412 const_val = text or (type_.const_val if isinstance(type_, Type) else None)
413 if bool(value_ref) == bool(const_val):
414 raise SchemaParsingError(f"Exactly one of `valueRef' attribute or constant value needs to be defined for the '{node}' field")
415 if value_ref:
416 return value_ref_to_valid_value(value_ref, ctx).value
417 else:
418 return type_.parse(const_val)
421def parse_field(node: Element, ctx: ParsingContext) -> Field:
422 """
423 Parses a field element from XML.
425 Args:
426 node (Element): The XML element representing a field.
428 Returns:
429 Field: An instance of Field with parsed attributes.
430 """
431 if node.tag != "field":
432 raise SchemaParsingError(f"Expected 'field' tag, got '{node.tag}'")
434 id_ = parse_id(node)
435 name = parse_name(node)
436 description = parse_description(node)
437 type_ = ctx.types[parse_type_attr(node)]
438 presence = parse_presence(node)
439 offset = parse_offset(node)
440 since_version = parse_since_version(node)
441 deprecated = parse_deprecated(node)
442 alignment = parse_alignment(node)
443 value_ref = parse_value_ref(node)
444 text = node.text
445 const_val = field_constant_value(value_ref, text, type_, ctx) if presence is Presence.CONSTANT else None
447 return Field(
448 id=id_,
449 name=name,
450 description=description,
451 type=type_,
452 presence=presence,
453 offset=offset,
454 since_version=since_version,
455 deprecated=deprecated,
456 alignment=alignment,
457 value_ref=value_ref,
458 constant_value=const_val,
459 )
462def parse_group(node: Element, ctx: ParsingContext) -> Group:
463 """
464 Parses a group element from XML.
466 Args:
467 node (Element): The XML element representing a group.
469 Returns:
470 Group: An instance of Group with parsed attributes.
471 """
472 if node.tag != "group":
473 raise SchemaParsingError(f"Expected 'group' tag, got '{node.tag}'")
475 id_ = parse_id(node)
476 name = parse_name(node)
477 description = parse_description(node)
478 block_length = parse_block_length(node)
479 since_version = parse_since_version(node)
480 deprecated = parse_deprecated(node)
481 dimension_type = parse_dimension_type(node, ctx)
483 fields, groups, datas = parse_elements(node, ctx)
485 return Group(
486 id=id_,
487 name=name,
488 description=description,
489 block_length=block_length,
490 since_version=since_version,
491 deprecated=deprecated,
492 fields=fields,
493 groups=groups,
494 datas=datas,
495 dimension_type=dimension_type
496 )
499def parse_data(node: Element, ctx: ParsingContext) -> Data:
500 """
501 Parses a data element from XML.
503 Args:
504 node (Element): The XML element representing a data element.
506 Returns:
507 Data: An instance of Data with parsed attributes.
508 """
509 if node.tag != "data":
510 raise SchemaParsingError(f"Expected 'data' tag, got '{node.tag}'")
512 name = parse_name(node)
513 description = parse_description(node)
514 id_ = parse_id(node)
515 type_ = parse_type_attr(node)
516 semantic_type = parse_semantic_type(node)
517 since_version = parse_since_version(node)
518 deprecated = parse_deprecated(node)
520 return Data(
521 name=name,
522 id=id_,
523 type_=ctx.types[type_],
524 description=description,
525 semantic_type=semantic_type,
526 since_version=since_version,
527 deprecated=deprecated,
528 )
531def parse_elements(node: Element, ctx: ParsingContext) -> tuple[list[Field], list[Group], list[Data]]:
532 """
533 Parses a list of elements from XML. This functionality is shared between group and message parsing.
535 Args:
536 node (Element): The XML element containing child elements to parse.
537 ctx (ParsingContext): The parsing context containing type definitions.
539 Returns:
540 tuple[list[Field], list[Group], list[Data]]: A tuple containing lists of parsed fields, groups, and data elements.
541 """
542 fields = []
543 groups = []
544 datas = []
546 for child in node:
547 match child.tag:
548 case "field":
549 if datas or groups:
550 raise SchemaParsingError(
551 "Field cannot be defined after data or group elements."
552 )
553 fields.append(parse_field(child, ctx))
554 case "group":
555 if datas:
556 raise SchemaParsingError(
557 "Group cannot be defined after data elements."
558 )
559 groups.append(parse_group(child, ctx))
560 case "data":
561 datas.append(parse_data(child, ctx))
562 case _:
563 raise SchemaParsingError(f"Unknown element type: {child.tag}")
565 return fields, groups, datas
568def parse_type_node(node:Element) -> FixedLengthElement:
569 match node.tag:
570 case 'type':
571 return parse_type(node)
572 case 'enum':
573 return parse_enum(node)
574 case 'set':
575 return parse_set(node)
576 case 'composite':
577 return parse_composite(node)
579def parse_schema_fd(fd) -> MessageSchema:
580 """
581 Parses an SBE schema from a file descriptor.
582 Args:
583 fd (file-like object): File descriptor containing the XML data.
584 Returns:
585 MessageSchema: An instance of MessageSchema with parsed attributes.
586 Raises:
587 SchemaParsingError: If the schema cannot be parsed.
588 """
589 parser = XMLParser(remove_comments=True)
590 root = parse(fd, parser=parser).getroot()
591 ElementInclude.include(root)
592 schema = parse_message_schema(root)
594 ctx = ParsingContext(types=schema.types)
596 for types in root.iter('types'):
597 for type_ in types:
598 type_def = parse_type_node(type_)
599 ctx.types.add(type_def)
601 for type_def in ctx.types:
602 type_def.lazy_bind(ctx.types)
604 schema.header_type = schema.types.get_composite(schema.header_type_name)
606 for msg in root.iterfind('.//sbe:message', namespaces=root.nsmap):
607 m = parse_message(msg, ctx, schema.package)
608 schema.messages.add(m)
610 return schema
612def parse_schema(path=None, fd=None, text=None) -> MessageSchema:
613 """
614 Parses an SBE schema from an XML file or string.
615 Args:
616 path (str, optional): Path to the XML file containing the schema.
617 fd (file-like object, optional): File-like object containing the XML data.
618 text (str, optional): String containing the XML data.
619 Returns:
620 MessageSchema: An instance of MessageSchema with parsed attributes.
621 Raises:
622 SchemaParsingError: If the schema cannot be parsed.
623 """
624 args = sum(1 for arg in (path, fd, text) if arg is not None)
625 if args != 1:
626 raise ValueError("Exactly one of 'path', 'fd', or 'text' must be provided")
628 if path is not None:
629 with open (path, 'rb') as file:
630 return parse_schema_fd(file)
631 elif fd is not None:
632 return parse_schema_fd(fd)
633 elif text is not None:
634 from io import StringIO
635 return parse_schema_fd(StringIO(text))