Coverage for sbe2/xmlparser/types.py: 92%

261 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-29 14:27 +0200

1from ..schema import ( 

2 ValidValue, 

3 Enum, 

4 Type, 

5 Composite, 

6 Presence, 

7 Set, 

8 Choice, 

9 FixedLengthElement, 

10 Ref, 

11 Message, 

12 MessageSchema, 

13 Group, 

14 Data, 

15 Field, 

16 builtin 

17) 

18 

19from lxml.etree import Element 

20from .attributes import ( 

21 parse_name, 

22 parse_description, 

23 parse_since_version, 

24 parse_deprecated, 

25 parse_encoding_type, 

26 parse_offset, 

27 parse_type as parse_type_attr, 

28 parse_presence, 

29 parse_primitive_type, 

30 parse_length, 

31 parse_id, 

32 parse_package, 

33 parse_version, 

34 parse_semantic_version, 

35 parse_byte_order, 

36 parse_header_type, 

37 parse_alignment, 

38 parse_semantic_type, 

39 parse_block_length, 

40 parse_dimension_type, 

41 parse_value_ref 

42) 

43from .errors import SchemaParsingError 

44from .ctx import ParsingContext 

45from lxml.etree import XMLParser, parse, QName 

46from lxml import ElementInclude 

47from typing import Any 

48 

49 

50def parse_valid_value(val_val: Element, encoding_type: str) -> ValidValue: 

51 """ 

52 Parses a validValue element from XML. 

53 

54 Args: 

55 val_val (Element): The XML element representing a valid value. 

56 enum (Enum): The enumeration type this ValidValue is part of. 

57 encoding_type (Type): The encoding type for this enum. 

58 

59 Returns: 

60 ValidValue: An instance of ValidValue with parsed attributes. 

61 """ 

62 if val_val.tag != "validValue": 

63 raise SchemaParsingError(f"Expected 'validValue' tag, got '{val_val.tag}'") 

64 name = parse_name(val_val) 

65 description = parse_description(val_val) 

66 value = val_val.text.encode('ascii') if encoding_type == 'char' else int(val_val.text) 

67 since_version = parse_since_version(val_val) 

68 deprecated = parse_deprecated(val_val) 

69 

70 return ValidValue( 

71 name=name, 

72 description=description, 

73 value=value, 

74 since_version=since_version, 

75 deprecated=deprecated, 

76 ) 

77 

78 

79def parse_enum(node: Element) -> Enum: 

80 """ 

81 Parses an enum element from XML. 

82 

83 Args: 

84 node (Element): The XML element representing an enum. 

85 

86 Returns: 

87 Enum: An instance of Enum with parsed attributes. 

88 """ 

89 if node.tag != "enum": 

90 raise SchemaParsingError(f"Expected 'enum' tag, got '{node.tag}'") 

91 name = parse_name(node) 

92 description = parse_description(node) 

93 since_version = parse_since_version(node) 

94 deprecated = parse_deprecated(node) 

95 encoding_type = parse_encoding_type(node) 

96 offset = parse_offset(node) 

97 

98 valid_values = [parse_valid_value(vv, encoding_type) for vv in node] 

99 

100 

101 # names and values must be unique 

102 if len(valid_values) != len(set(vv.name for vv in valid_values)): 

103 raise SchemaParsingError(f"Duplicate valid value names found in enum '{name}'") 

104 if len(valid_values) != len(set(vv.value for vv in valid_values)): 

105 raise SchemaParsingError(f"Duplicate valid value values found in enum '{name}'") 

106 

107 enum = Enum( 

108 name=name, 

109 description=description, 

110 since_version=since_version, 

111 deprecated=deprecated, 

112 valid_values=valid_values, 

113 encoding_type_name=encoding_type, 

114 offset=offset, 

115 ) 

116 for vv in valid_values: 

117 vv.enum = enum 

118 return enum 

119 

120 

121def parse_choice(node: Element) -> Choice: 

122 """ 

123 Parses a choice element from XML. 

124 

125 Args: 

126 node (Element): The XML element representing a choice. 

127 

128 Returns: 

129 Choice: An instance of Choice with parsed attributes. 

130 """ 

131 if node.tag != "choice": 

132 raise SchemaParsingError(f"Expected 'choice' tag, got '{node.tag}'") 

133 

134 name = parse_name(node) 

135 description = parse_description(node) 

136 since_version = parse_since_version(node) 

137 deprecated = parse_deprecated(node) 

138 try: 

139 value = int(node.text) 

140 except (ValueError, TypeError) as e: 

141 raise SchemaParsingError( 

142 f"Invalid value for choice '{name}': {node.text}" 

143 ) from e 

144 

145 return Choice( 

146 name=name, 

147 description=description, 

148 since_version=since_version, 

149 deprecated=deprecated, 

150 value=value, 

151 ) 

152 

153 

154def parse_set(node: Element) -> Set: 

155 """Parses a set element from XML. 

156 Args: 

157 node (Element): The XML element representing a set. 

158 ctx (ParsingContext): The context of parsing. 

159 Returns: 

160 Set: An instance of Set with parsed attributes. 

161 """ 

162 if node.tag != "set": 

163 raise SchemaParsingError(f"Expected 'set' tag, got '{node.tag}'") 

164 

165 name = parse_name(node) 

166 description = parse_description(node) 

167 since_version = parse_since_version(node) 

168 deprecated = parse_deprecated(node) 

169 encoding_type = parse_encoding_type(node) 

170 offset = parse_offset(node) 

171 choices = [parse_choice(choice) for choice in node] 

172 

173 # names and values must be unique 

174 if len(choices) != len(set(choice.name for choice in choices)): 

175 raise SchemaParsingError(f"Duplicate choice names found in set '{name}'") 

176 if len(choices) != len(set(choice.value for choice in choices)): 

177 raise SchemaParsingError(f"Duplicate choice value found in set '{name}'") 

178 

179 return Set( 

180 name=name, 

181 description=description, 

182 since_version=since_version, 

183 deprecated=deprecated, 

184 offset=offset, 

185 encoding_type_name=encoding_type, 

186 choices=choices, 

187 ) 

188 

189 

190def parse_type(node: Element) -> Type: 

191 """ 

192 Parses a type element from XML. 

193 

194 Args: 

195 node (Element): The XML element representing a type. 

196 

197 Returns: 

198 Type: An instance of Type with parsed attributes. 

199 """ 

200 if node.tag != "type": 

201 raise SchemaParsingError(f"Expected 'type' tag, got '{node.tag}'") 

202 

203 name = parse_name(node) 

204 description = parse_description(node) 

205 since_version = parse_since_version(node) 

206 deprecated = parse_deprecated(node) 

207 primitive_type = parse_primitive_type(node) 

208 offset = parse_offset(node) 

209 length = parse_length(node) 

210 value_ref = parse_value_ref(node) 

211 value = node.text 

212 

213 presence = parse_presence(node) 

214 

215 return Type( 

216 name=name, 

217 description=description, 

218 since_version=since_version, 

219 deprecated=deprecated, 

220 primitive_type=primitive_type, 

221 offset=offset, 

222 presence=presence, 

223 length=length, 

224 value_ref=value_ref, 

225 value= value 

226 ) 

227 

228 

229def parse_ref(node: Element) -> Ref: 

230 """ 

231 Parses a ref element from XML. 

232 

233 Args: 

234 node (Element): The XML element representing a reference. 

235 

236 Returns: 

237 FixedLengthElement: An instance of Ref with parsed attributes. 

238 """ 

239 if node.tag != "ref": 

240 raise SchemaParsingError(f"Expected 'ref' tag, got '{node.tag}'") 

241 

242 name = parse_name(node) 

243 description = parse_description(node) 

244 type_ = parse_type_attr(node) 

245 offset = parse_offset(node) 

246 

247 return Ref( 

248 name=name, description=description, type_name=type_, offset=offset 

249 ) 

250 

251 

252def parse_composite_element(node: Element) -> FixedLengthElement: 

253 """ 

254 Parses a composite element from XML. 

255 

256 Args: 

257 node (Element): The XML element representing a composite element. 

258 

259 Returns: 

260 FixedLengthElement: An instance of FixedLengthElement with parsed attributes. 

261 """ 

262 match node.tag: 

263 case "enum": 

264 return parse_enum(node) 

265 case "type": 

266 return parse_type(node) 

267 case "set": 

268 return parse_set(node) 

269 case "ref": 

270 return parse_ref(node) 

271 case "composite": 

272 return parse_composite(node) 

273 

274 raise SchemaParsingError(f"Unknown composite element type: {node.tag}") 

275 

276 

277def parse_composite(node: Element) -> Composite: 

278 """ 

279 Parses a composite element from XML. 

280 

281 Args: 

282 node (Element): The XML element representing a composite. 

283 

284 Returns: 

285 Composite: An instance of Composite with parsed attributes. 

286 """ 

287 if node.tag != "composite": 

288 raise SchemaParsingError(f"Expected 'composite' tag, got '{node.tag}'") 

289 

290 name = parse_name(node) 

291 description = parse_description(node) 

292 since_version = parse_since_version(node) 

293 deprecated = parse_deprecated(node) 

294 offset = parse_offset(node) 

295 

296 elements = [parse_composite_element(child) for child in node] 

297 

298 return Composite( 

299 name=name, 

300 description=description, 

301 since_version=since_version, 

302 deprecated=deprecated, 

303 offset=offset, 

304 elements=elements, 

305 ) 

306 

307 

308def parse_message_schema(node: Element) -> MessageSchema: 

309 """Parses a messageSchema element from XML. 

310 Args: 

311 node (Element): The XML element representing a message schema. 

312 Returns: 

313 MessageSchema: An instance of MessageSchema with parsed attributes. 

314 """ 

315 

316 #TODO: check namespace? 

317 qname = QName(node.tag) 

318 if qname.localname != "messageSchema": 

319 raise SchemaParsingError(f"Expected 'messageSchema' tag, got '{qname.localname}'") 

320 

321 package = parse_package(node) 

322 version = parse_version(node) 

323 semantic_version = parse_semantic_version(node) 

324 id_ = parse_id(node) 

325 byte_order = parse_byte_order(node) 

326 header_type = parse_header_type(node) 

327 description = parse_description(node) 

328 

329 return MessageSchema( 

330 package=package, 

331 version=version, 

332 semantic_version=semantic_version, 

333 id=id_, 

334 byte_order=byte_order, 

335 header_type_name=header_type, 

336 description=description, 

337 ) 

338 

339def get_package(node:Element) -> str | None: 

340 """ 

341 Gets a <messages> tag package name for the given message. 

342 """ 

343 curr = node 

344 while True: 

345 curr = curr.getparent() 

346 if curr is None: 

347 return None 

348 if curr.tag == 'messages': 

349 package = parse_package(curr, required=False) 

350 if package: 

351 return package 

352 

353 

354 

355def parse_message(node: Element, ctx: ParsingContext, default_package:str) -> Message: 

356 """ 

357 Parses a message element from XML. 

358 

359 Args: 

360 node (Element): The XML element representing a message. 

361 

362 Returns: 

363 Message: An instance of Message with parsed attributes. 

364 """ 

365 #TODO: check namespace? 

366 qname = QName(node.tag) 

367 if qname.localname != "message": 

368 raise SchemaParsingError(f"Expected 'message' tag, got '{qname.localname}'") 

369 

370 id_ = parse_id(node) 

371 name = parse_name(node) 

372 description = parse_description(node) 

373 semantic_type = parse_semantic_type(node) 

374 block_length = parse_block_length(node) 

375 since_version = parse_since_version(node) 

376 deprecated = parse_deprecated(node) 

377 alignment = parse_alignment(node) 

378 package = get_package(node) or default_package 

379 

380 fields, groups, datas = parse_elements(node, ctx) 

381 

382 return Message( 

383 id=id_, 

384 name=name, 

385 description=description, 

386 package=package, 

387 semantic_type=semantic_type, 

388 block_length=block_length, 

389 since_version=since_version, 

390 deprecated=deprecated, 

391 alignment=alignment, 

392 fields=fields, 

393 groups=groups, 

394 datas=datas, 

395 ) 

396 

397def value_ref_to_valid_value(value_ref:str, ctx:ParsingContext) -> ValidValue: 

398 try: 

399 enum_name, valid_value = value_ref.split('.') 

400 enum = ctx.types[enum_name] 

401 if not isinstance(enum, Enum): 

402 raise ValueError(f"'{enum_name}' type is not enum") 

403 for vv in enum.valid_values: 

404 if vv.name == valid_value: 

405 return vv 

406 raise ValueError(f"Enum '{enum_name}' does not contain value '{valid_value}'") 

407 except Exception as e: 

408 raise SchemaParsingError(f"Invalid value reference: '{value_ref}'") from e 

409 

410def field_constant_value(value_ref: str, text: str, type_: FixedLengthElement, ctx: ParsingContext) -> Any: 

411 value_ref = value_ref or (type_.value_ref if isinstance(type_, Type) else None) 

412 const_val = text or (type_.const_val if isinstance(type_, Type) else None) 

413 if bool(value_ref) == bool(const_val): 

414 raise SchemaParsingError(f"Exactly one of `valueRef' attribute or constant value needs to be defined for the '{node}' field") 

415 if value_ref: 

416 return value_ref_to_valid_value(value_ref, ctx).value 

417 else: 

418 return type_.parse(const_val) 

419 

420 

421def parse_field(node: Element, ctx: ParsingContext) -> Field: 

422 """ 

423 Parses a field element from XML. 

424 

425 Args: 

426 node (Element): The XML element representing a field. 

427 

428 Returns: 

429 Field: An instance of Field with parsed attributes. 

430 """ 

431 if node.tag != "field": 

432 raise SchemaParsingError(f"Expected 'field' tag, got '{node.tag}'") 

433 

434 id_ = parse_id(node) 

435 name = parse_name(node) 

436 description = parse_description(node) 

437 type_ = ctx.types[parse_type_attr(node)] 

438 presence = parse_presence(node) 

439 offset = parse_offset(node) 

440 since_version = parse_since_version(node) 

441 deprecated = parse_deprecated(node) 

442 alignment = parse_alignment(node) 

443 value_ref = parse_value_ref(node) 

444 text = node.text 

445 const_val = field_constant_value(value_ref, text, type_, ctx) if presence is Presence.CONSTANT else None 

446 

447 return Field( 

448 id=id_, 

449 name=name, 

450 description=description, 

451 type=type_, 

452 presence=presence, 

453 offset=offset, 

454 since_version=since_version, 

455 deprecated=deprecated, 

456 alignment=alignment, 

457 value_ref=value_ref, 

458 constant_value=const_val, 

459 ) 

460 

461 

462def parse_group(node: Element, ctx: ParsingContext) -> Group: 

463 """ 

464 Parses a group element from XML. 

465 

466 Args: 

467 node (Element): The XML element representing a group. 

468 

469 Returns: 

470 Group: An instance of Group with parsed attributes. 

471 """ 

472 if node.tag != "group": 

473 raise SchemaParsingError(f"Expected 'group' tag, got '{node.tag}'") 

474 

475 id_ = parse_id(node) 

476 name = parse_name(node) 

477 description = parse_description(node) 

478 block_length = parse_block_length(node) 

479 since_version = parse_since_version(node) 

480 deprecated = parse_deprecated(node) 

481 dimension_type = parse_dimension_type(node, ctx) 

482 

483 fields, groups, datas = parse_elements(node, ctx) 

484 

485 return Group( 

486 id=id_, 

487 name=name, 

488 description=description, 

489 block_length=block_length, 

490 since_version=since_version, 

491 deprecated=deprecated, 

492 fields=fields, 

493 groups=groups, 

494 datas=datas, 

495 dimension_type=dimension_type 

496 ) 

497 

498 

499def parse_data(node: Element, ctx: ParsingContext) -> Data: 

500 """ 

501 Parses a data element from XML. 

502 

503 Args: 

504 node (Element): The XML element representing a data element. 

505 

506 Returns: 

507 Data: An instance of Data with parsed attributes. 

508 """ 

509 if node.tag != "data": 

510 raise SchemaParsingError(f"Expected 'data' tag, got '{node.tag}'") 

511 

512 name = parse_name(node) 

513 description = parse_description(node) 

514 id_ = parse_id(node) 

515 type_ = parse_type_attr(node) 

516 semantic_type = parse_semantic_type(node) 

517 since_version = parse_since_version(node) 

518 deprecated = parse_deprecated(node) 

519 

520 return Data( 

521 name=name, 

522 id=id_, 

523 type_=ctx.types[type_], 

524 description=description, 

525 semantic_type=semantic_type, 

526 since_version=since_version, 

527 deprecated=deprecated, 

528 ) 

529 

530 

531def parse_elements(node: Element, ctx: ParsingContext) -> tuple[list[Field], list[Group], list[Data]]: 

532 """ 

533 Parses a list of elements from XML. This functionality is shared between group and message parsing. 

534 

535 Args: 

536 node (Element): The XML element containing child elements to parse. 

537 ctx (ParsingContext): The parsing context containing type definitions. 

538 

539 Returns: 

540 tuple[list[Field], list[Group], list[Data]]: A tuple containing lists of parsed fields, groups, and data elements. 

541 """ 

542 fields = [] 

543 groups = [] 

544 datas = [] 

545 

546 for child in node: 

547 match child.tag: 

548 case "field": 

549 if datas or groups: 

550 raise SchemaParsingError( 

551 "Field cannot be defined after data or group elements." 

552 ) 

553 fields.append(parse_field(child, ctx)) 

554 case "group": 

555 if datas: 

556 raise SchemaParsingError( 

557 "Group cannot be defined after data elements." 

558 ) 

559 groups.append(parse_group(child, ctx)) 

560 case "data": 

561 datas.append(parse_data(child, ctx)) 

562 case _: 

563 raise SchemaParsingError(f"Unknown element type: {child.tag}") 

564 

565 return fields, groups, datas 

566 

567 

568def parse_type_node(node:Element) -> FixedLengthElement: 

569 match node.tag: 

570 case 'type': 

571 return parse_type(node) 

572 case 'enum': 

573 return parse_enum(node) 

574 case 'set': 

575 return parse_set(node) 

576 case 'composite': 

577 return parse_composite(node) 

578 

579def parse_schema_fd(fd) -> MessageSchema: 

580 """ 

581 Parses an SBE schema from a file descriptor. 

582 Args: 

583 fd (file-like object): File descriptor containing the XML data. 

584 Returns: 

585 MessageSchema: An instance of MessageSchema with parsed attributes. 

586 Raises: 

587 SchemaParsingError: If the schema cannot be parsed. 

588 """ 

589 parser = XMLParser(remove_comments=True) 

590 root = parse(fd, parser=parser).getroot() 

591 ElementInclude.include(root) 

592 schema = parse_message_schema(root) 

593 

594 ctx = ParsingContext(types=schema.types) 

595 

596 for types in root.iter('types'): 

597 for type_ in types: 

598 type_def = parse_type_node(type_) 

599 ctx.types.add(type_def) 

600 

601 for type_def in ctx.types: 

602 type_def.lazy_bind(ctx.types) 

603 

604 schema.header_type = schema.types.get_composite(schema.header_type_name) 

605 

606 for msg in root.iterfind('.//sbe:message', namespaces=root.nsmap): 

607 m = parse_message(msg, ctx, schema.package) 

608 schema.messages.add(m) 

609 

610 return schema 

611 

612def parse_schema(path=None, fd=None, text=None) -> MessageSchema: 

613 """ 

614 Parses an SBE schema from an XML file or string. 

615 Args: 

616 path (str, optional): Path to the XML file containing the schema. 

617 fd (file-like object, optional): File-like object containing the XML data. 

618 text (str, optional): String containing the XML data. 

619 Returns: 

620 MessageSchema: An instance of MessageSchema with parsed attributes. 

621 Raises: 

622 SchemaParsingError: If the schema cannot be parsed. 

623 """ 

624 args = sum(1 for arg in (path, fd, text) if arg is not None) 

625 if args != 1: 

626 raise ValueError("Exactly one of 'path', 'fd', or 'text' must be provided") 

627 

628 if path is not None: 

629 with open (path, 'rb') as file: 

630 return parse_schema_fd(file) 

631 elif fd is not None: 

632 return parse_schema_fd(fd) 

633 elif text is not None: 

634 from io import StringIO 

635 return parse_schema_fd(StringIO(text))