Source code for axiom.parsers
"""Parser classes, for parsing metadata specifications."""
from bs4 import BeautifulSoup
import pandas as pd
import xml.etree.ElementTree as et
import lxml.etree as le
[docs]class Parser:
"""Abstract class for standardised subclasses."""
[docs] def parse(self, filepath):
"""Parse the filepath into a dictionary of variables.
Args:
filepath (str) : Path to the file.
Returns:
dict : Dictionary of variables with metadata.
Raises:
NotImplementedError: [description]
"""
raise NotImplementedError()
[docs]class CFSpecificationsParser(Parser):
"""Parser for the CFConventions standard name table."""
[docs] def parse(self, filepath):
"""Parse the CF standard names table.
Args:
filepath (str): Path to the file.
Returns:
list : List of variables with
"""
# xml = md.parse(filepath)
xml = BeautifulSoup(open(filepath, 'r').read(), features='html.parser')
parsed = dict()
for entry in xml.standard_name_table.findAll('entry'):
parsed[entry['id']] = dict(
description=entry.description.text or None,
units=entry.canonical_units.text or None,
grib=entry.grib.text or None,
amip=entry.amip.text or None,
)
return parsed
[docs]class CordexCSVParser(Parser):
"""Parse a Cordex CSV file with a header line into a list of variables."""
[docs] def parse(self, filepath):
"""Parse a CSV file with header line into variable dict.
Args:
filepath (str): Path to the file.
Returns:
dict : Dictionary of variables and metadata.
"""
df = pd.read_csv(filepath)
parsed = dict()
for record in df.to_dict('records'):
parsed[record['variable']] = record
return parsed
[docs]class XMLSchemaParser(Parser):
"""Parser for XSD files."""
[docs] def parse(self, filepath):
"""Parse an xsd document.
Args:
filepath (str) : Path to the xsd file.
Returns:
lxml.etree.XMLSchema : Schema.
"""
return le.XMLSchema(file=filepath)
[docs]class XMLParser(Parser):
"""Parser for XML files."""
[docs] def parse(self, filepath, schema=None):
"""Parse an XML file.
Args:
filepath (str) : Path to the xml file.
schema (lxml.etree.XMLSchema) : Optional schema.
Returns:
lxml.etree._Element : XML object
"""
_parser = le.XMLParser(schema=schema)
raw = open(filepath, 'rb').read()
return le.fromstring(raw, parser=_parser)
[docs]class ParserFactory():
"""Convenience class to abstract the type of parser required."""
[docs] def get_parser(self, name):
"""Get a parser suitable for the name.
Args:
name (str): Name of the parser.
Returns:
Parser: Subclass of Parser suitable for parsing the requested name.
"""
instances = dict(
xml=XMLParser,
xsd=XMLSchemaParser,
schema=XMLSchemaParser,
cordex=CordexCSVParser,
cf=CFSpecificationsParser,
)
return instances[name]()