Source code for oceanum.datamesh.datasource

from dateutil.parser import parse
import datetime
import pandas
import geopandas
import xarray
import asyncio
from shapely.geometry import shape
from .query import Query


[docs]class DatasourceException(Exception): pass
[docs]def parse_period(self, period): if period: m = re.match( r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T?(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", period, ) if m is None: raise serializers.ValidationError("invalid ISO 8601 duration string") days = 0 hours = 0 minutes = 0 if m[3]: days = int(m[3]) if m[4]: hours = int(m[4]) if m[5]: minutes = int(m[5]) return datetime.timedelta(days=days, hours=hours, minutes=minutes)
[docs]class Datasource(object): """Datasource class""" @classmethod def _init(cls, connector, id, asynchronous=False): meta = connector._metadata_request(id) if meta.status_code == 404: raise DatasourceException(f"Datasource {id} not found") elif meta.status_code == 401: raise DatasourceException(f"Datasource {id} not Authorized") elif meta.status_code != 200: raise DatasourceException(meta.text) meta_dict = meta.json() ds = cls(id, **{"geometry": meta_dict["geometry"], **meta_dict["properties"]}) ds._connector = connector return ds
[docs] def __init__( self, datasource_id, geometry=None, name=None, description=None, tstart=None, tend=None, parchive=None, schema={}, coordinates={}, tags=[], links=[], info={}, details=None, last_modified=None, **extra_kwargs, ): """Constructor for Datasource class Args: datasource_id (string): Unique datasource ID geometry (dict, optional): Datasource geometry as valid geojson dictionary or None. Defaults to None. name (string, optional): Datasource human readable name. Defaults to None. description (string, optional): Datasource description. Defaults to None. tstart (string, optional): Earliest time in datasource. Must be a valid ISO8601 datetime string. Defaults to "1970-01-01T00:00:00Z". tend (string, optional): Latest time in datasource. Must be a valid ISO8601 datetime string or None. Defaults to None. parchive (string, optional): Datasource rolling archive period. Must be a valid ISO8601 interval string or None. Defaults to None. schema (dict, optional): Datasource schema. Defaults to {}. coordinates (dict, optional): Coordinates key. Defaults to {}. tags (list, optional): List of keyword tags. Defaults to []. links (list, optional): List of additional external URL links. Defaults to []. info (dict, optional): Dictionary of additional information. Defaults to {}. details (string, optional): URL link to additional details. Defaults to None. last_modified (string, optional): Latest time datasource metadata was modified. Must be a valid ISO8601 datetime string or None. Defaults to None. """ self.id = datasource_id self._name = name self._description = description self._tstart = tstart self._tend = tend self._parchive = parchive self._schema = schema self._coordinates = coordinates self._tags = tags self._links = links self._info = info self._details = details self._last_modified = last_modified or datetime.datetime.utcnow() self._connector = None self._geometry = shape(geometry)
def __str__(self): return f""" {self._name} [{self.id}] Extent: {self.bounds} Timerange: {self.tstart} to {self.tend} {len(self.attributes)} attributes {len(self.variables)} {"properties" if "g" in self._coordinates else "variables"} Container: {str(self.container)} """ def ___repr__(self): return @property def name(self): """str: Human readable name of datasource""" return self._name or "Datasource with ID " + self.id @property def description(self): """str: Datasource description""" return self._description @property def tstart(self): """:obj:`datetime` Earliest time in datasource""" if self._tstart is None: return datetime.datetime.utcnow() - parse_period(self.parchive) else: return parse(self._tstart) @property def tend(self): """:obj:`datetime` Latest time in datasource""" if self._tend is None: return datetime.datetime.utcnow() else: return parse(self._tend) if self._tend else None @property def container(self): """str: Container type for datasource Is one of: - :obj:`xarray.Dataset` - :obj:`pandas.DataFrame` - :obj:`geopandas.GeoDataFrame` """ if "g" in self._coordinates: return geopandas.GeoDataFrame elif "x" in self._coordinates and "y" in self._coordinates: return xarray.Dataset else: return pandas.DataFrame @property def geometry(self): """:obj:`shapely.geometry.Geometry`: Geometry of datasource extent or location""" return self._geometry @property def bounds(self): """list[float]: Bounding box of datasource geographical extent""" return self._geometry.bounds @property def variables(self): """Datasource variables (or properties)""" return self._schema["data_vars"] @property def attributes(self): """Datasource global attributes""" return self._schema.get("attrs", {})
[docs] def load(self): """Load the datasource into an in memory container or open zarr dataset For datasources which load into DataFrames or GeoDataFrames, this returns an in memory instance of the DataFrame. For datasources which load into an xarray Dataset, an open zarr backed dataset is returned. """ if self.container == xarray.Dataset: mapper = self._connector._zarr_proxy(self.id) return xarray.open_zarr(mapper, consolidated=True) elif self.container == geopandas.GeoDataFrame: tmpfile = self._connector._data_request(self.id, "application/parquet") return geopandas.read_parquet(tmpfile.name) elif self.container == pandas.DataFrame: tmpfile = self._connector._data_request(self.id, "application/parquet") return pandas.read_parquet(tmpfile.name)