Module redvox.common.data_window_configuration

This module provide type-safe data window configuration

Expand source code
"""
This module provide type-safe data window configuration
"""

from dataclasses import dataclass
from dataclasses_json import dataclass_json
from typing import Optional, List, MutableMapping

import pprint
import toml


# defaults for configuration
DEFAULT_DROP_TIME_S: float = 0.2  # seconds between packets to be considered a gap
DEFAULT_START_PADDING_S: float = 120.0  # time to add before start time when searching
DEFAULT_END_PADDING_S: float = 120.0  # time to add after end time when searching


@dataclass_json()
@dataclass
class DataWindowConfig:
    """
    Properties:
        input_directory: string, directory that contains the files to read data from.  REQUIRED
        structured_layout: bool, if True, the input_directory contains specially named and organized
                            directories of data.  Default True
        station_ids: optional list of strings, list of station ids to filter on.
                        If empty or None, get any ids found in the input directory.  Default None
        extensions: optional list of strings, representing file extensions to filter on.
                        If None, gets as much data as it can in the input directory.  Default None
        api_versions: optional list of ApiVersions, representing api versions to filter on.
                        If None, get as much data as it can in the input directory.  Default None
        start_year: optional int representing the year of the data window start time.  Default None
        start_month: optional int representing the month of the data window start time.  Default None
        start_day: optional int representing the day of the data window start time.  Default None
        start_hour: optional int representing the hour of the data window start time.  Default None
        start_minute: optional int representing the minute of the data window start time.  Default None
        start_second: optional int representing the second of the data window start time.  Default None
        end_year: optional int representing the year of the data window end time.  Default None
        end_month: optional int representing the month of the data window end time.  Default None
        end_day: optional int representing the day of the data window end time.  Default None
        end_hour: optional int representing the hour of the data window end time.  Default None
        end_minute: optional int representing the minute of the data window end time.  Default None
        end_second: optional int representing the second of the data window end time.  Default None
        start_padding_seconds: float representing the amount of seconds to include before the start datetime
                                when filtering data.  Default DEFAULT_START_PADDING_S
        end_padding_seconds: float representing the amount of seconds to include after the end datetime
                                when filtering data.  Default DEFAULT_END_PADDING_S
        drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate
                            a gap.  Default DEFAULT_DROP_TIME_S
        apply_correction: bool, if True, update the timestamps in the data based on best station offset.  Default True
        edge_points_mode: str, one of NAN, COPY, or INTERPOLATE.  Determines behavior when creating points on the edge
                            of the data window.  default COPY
        use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best
                                offset.  Default True
        debug: bool, if True, output additional information when processing data window.  Default False
    """

    input_directory: str
    structured_layout: bool = True
    station_ids: Optional[List[str]] = None
    extensions: Optional[List[str]] = None
    api_versions: Optional[List[str]] = None
    start_year: Optional[int] = None
    start_month: Optional[int] = None
    start_day: Optional[int] = None
    start_hour: Optional[int] = None
    start_minute: Optional[int] = None
    start_second: Optional[int] = None
    end_year: Optional[int] = None
    end_month: Optional[int] = None
    end_day: Optional[int] = None
    end_hour: Optional[int] = None
    end_minute: Optional[int] = None
    end_second: Optional[int] = None
    start_padding_seconds: float = DEFAULT_START_PADDING_S
    end_padding_seconds: float = DEFAULT_END_PADDING_S
    drop_time_seconds: float = DEFAULT_DROP_TIME_S
    apply_correction: bool = True
    edge_points_mode: str = "COPY"
    use_model_correction: bool = True
    debug: bool = False

    @staticmethod
    def from_path(config_path: str) -> "DataWindowConfig":
        try:
            with open(config_path, "r") as config_in:
                config_dict: MutableMapping = toml.load(config_in)
                # noinspection Mypy
                return DataWindowConfig.from_dict(config_dict)
        except Exception as e:
            print(f"Error loading configuration at: {config_path}")
            raise e

    def pretty(self) -> str:
        # noinspection Mypy
        return pprint.pformat(self.to_dict())

Classes

class DataWindowConfig (input_directory: str, structured_layout: bool = True, station_ids: Optional[List[str]] = None, extensions: Optional[List[str]] = None, api_versions: Optional[List[str]] = None, start_year: Optional[int] = None, start_month: Optional[int] = None, start_day: Optional[int] = None, start_hour: Optional[int] = None, start_minute: Optional[int] = None, start_second: Optional[int] = None, end_year: Optional[int] = None, end_month: Optional[int] = None, end_day: Optional[int] = None, end_hour: Optional[int] = None, end_minute: Optional[int] = None, end_second: Optional[int] = None, start_padding_seconds: float = 120.0, end_padding_seconds: float = 120.0, drop_time_seconds: float = 0.2, apply_correction: bool = True, edge_points_mode: str = 'COPY', use_model_correction: bool = True, debug: bool = False)

Properties

input_directory: string, directory that contains the files to read data from. REQUIRED structured_layout: bool, if True, the input_directory contains specially named and organized directories of data. Default True station_ids: optional list of strings, list of station ids to filter on. If empty or None, get any ids found in the input directory. Default None extensions: optional list of strings, representing file extensions to filter on. If None, gets as much data as it can in the input directory. Default None api_versions: optional list of ApiVersions, representing api versions to filter on. If None, get as much data as it can in the input directory. Default None start_year: optional int representing the year of the data window start time. Default None start_month: optional int representing the month of the data window start time. Default None start_day: optional int representing the day of the data window start time. Default None start_hour: optional int representing the hour of the data window start time. Default None start_minute: optional int representing the minute of the data window start time. Default None start_second: optional int representing the second of the data window start time. Default None end_year: optional int representing the year of the data window end time. Default None end_month: optional int representing the month of the data window end time. Default None end_day: optional int representing the day of the data window end time. Default None end_hour: optional int representing the hour of the data window end time. Default None end_minute: optional int representing the minute of the data window end time. Default None end_second: optional int representing the second of the data window end time. Default None start_padding_seconds: float representing the amount of seconds to include before the start datetime when filtering data. Default DEFAULT_START_PADDING_S end_padding_seconds: float representing the amount of seconds to include after the end datetime when filtering data. Default DEFAULT_END_PADDING_S drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate a gap. Default DEFAULT_DROP_TIME_S apply_correction: bool, if True, update the timestamps in the data based on best station offset. Default True edge_points_mode: str, one of NAN, COPY, or INTERPOLATE. Determines behavior when creating points on the edge of the data window. default COPY use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best offset. Default True debug: bool, if True, output additional information when processing data window. Default False

Expand source code
@dataclass_json()
@dataclass
class DataWindowConfig:
    """
    Properties:
        input_directory: string, directory that contains the files to read data from.  REQUIRED
        structured_layout: bool, if True, the input_directory contains specially named and organized
                            directories of data.  Default True
        station_ids: optional list of strings, list of station ids to filter on.
                        If empty or None, get any ids found in the input directory.  Default None
        extensions: optional list of strings, representing file extensions to filter on.
                        If None, gets as much data as it can in the input directory.  Default None
        api_versions: optional list of ApiVersions, representing api versions to filter on.
                        If None, get as much data as it can in the input directory.  Default None
        start_year: optional int representing the year of the data window start time.  Default None
        start_month: optional int representing the month of the data window start time.  Default None
        start_day: optional int representing the day of the data window start time.  Default None
        start_hour: optional int representing the hour of the data window start time.  Default None
        start_minute: optional int representing the minute of the data window start time.  Default None
        start_second: optional int representing the second of the data window start time.  Default None
        end_year: optional int representing the year of the data window end time.  Default None
        end_month: optional int representing the month of the data window end time.  Default None
        end_day: optional int representing the day of the data window end time.  Default None
        end_hour: optional int representing the hour of the data window end time.  Default None
        end_minute: optional int representing the minute of the data window end time.  Default None
        end_second: optional int representing the second of the data window end time.  Default None
        start_padding_seconds: float representing the amount of seconds to include before the start datetime
                                when filtering data.  Default DEFAULT_START_PADDING_S
        end_padding_seconds: float representing the amount of seconds to include after the end datetime
                                when filtering data.  Default DEFAULT_END_PADDING_S
        drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate
                            a gap.  Default DEFAULT_DROP_TIME_S
        apply_correction: bool, if True, update the timestamps in the data based on best station offset.  Default True
        edge_points_mode: str, one of NAN, COPY, or INTERPOLATE.  Determines behavior when creating points on the edge
                            of the data window.  default COPY
        use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best
                                offset.  Default True
        debug: bool, if True, output additional information when processing data window.  Default False
    """

    input_directory: str
    structured_layout: bool = True
    station_ids: Optional[List[str]] = None
    extensions: Optional[List[str]] = None
    api_versions: Optional[List[str]] = None
    start_year: Optional[int] = None
    start_month: Optional[int] = None
    start_day: Optional[int] = None
    start_hour: Optional[int] = None
    start_minute: Optional[int] = None
    start_second: Optional[int] = None
    end_year: Optional[int] = None
    end_month: Optional[int] = None
    end_day: Optional[int] = None
    end_hour: Optional[int] = None
    end_minute: Optional[int] = None
    end_second: Optional[int] = None
    start_padding_seconds: float = DEFAULT_START_PADDING_S
    end_padding_seconds: float = DEFAULT_END_PADDING_S
    drop_time_seconds: float = DEFAULT_DROP_TIME_S
    apply_correction: bool = True
    edge_points_mode: str = "COPY"
    use_model_correction: bool = True
    debug: bool = False

    @staticmethod
    def from_path(config_path: str) -> "DataWindowConfig":
        try:
            with open(config_path, "r") as config_in:
                config_dict: MutableMapping = toml.load(config_in)
                # noinspection Mypy
                return DataWindowConfig.from_dict(config_dict)
        except Exception as e:
            print(f"Error loading configuration at: {config_path}")
            raise e

    def pretty(self) -> str:
        # noinspection Mypy
        return pprint.pformat(self.to_dict())

Class variables

var api_versions : Optional[List[str]]
var apply_correction : bool
var debug : bool
var drop_time_seconds : float
var edge_points_mode : str
var end_day : Optional[int]
var end_hour : Optional[int]
var end_minute : Optional[int]
var end_month : Optional[int]
var end_padding_seconds : float
var end_second : Optional[int]
var end_year : Optional[int]
var extensions : Optional[List[str]]
var input_directory : str
var start_day : Optional[int]
var start_hour : Optional[int]
var start_minute : Optional[int]
var start_month : Optional[int]
var start_padding_seconds : float
var start_second : Optional[int]
var start_year : Optional[int]
var station_ids : Optional[List[str]]
var structured_layout : bool
var use_model_correction : bool

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, NoneType], *, infer_missing=False) ‑> ~A
Expand source code
@classmethod
def from_dict(cls: Type[A],
              kvs: Json,
              *,
              infer_missing=False) -> A:
    return _decode_dataclass(cls, kvs, infer_missing)
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
Expand source code
@classmethod
def from_json(cls: Type[A],
              s: JsonData,
              *,
              parse_float=None,
              parse_int=None,
              parse_constant=None,
              infer_missing=False,
              **kw) -> A:
    kvs = json.loads(s,
                     parse_float=parse_float,
                     parse_int=parse_int,
                     parse_constant=parse_constant,
                     **kw)
    return cls.from_dict(kvs, infer_missing=infer_missing)
def from_path(config_path: str) ‑> DataWindowConfig
Expand source code
@staticmethod
def from_path(config_path: str) -> "DataWindowConfig":
    try:
        with open(config_path, "r") as config_in:
            config_dict: MutableMapping = toml.load(config_in)
            # noinspection Mypy
            return DataWindowConfig.from_dict(config_dict)
    except Exception as e:
        print(f"Error loading configuration at: {config_path}")
        raise e
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]
Expand source code
@classmethod
def schema(cls: Type[A],
           *,
           infer_missing: bool = False,
           only=None,
           exclude=(),
           many: bool = False,
           context=None,
           load_only=(),
           dump_only=(),
           partial: bool = False,
           unknown=None) -> SchemaType:
    Schema = build_schema(cls, DataClassJsonMixin, infer_missing, partial)

    if unknown is None:
        undefined_parameter_action = _undefined_parameter_action_safe(cls)
        if undefined_parameter_action is not None:
            # We can just make use of the same-named mm keywords
            unknown = undefined_parameter_action.name.lower()

    return Schema(only=only,
                  exclude=exclude,
                  many=many,
                  context=context,
                  load_only=load_only,
                  dump_only=dump_only,
                  partial=partial,
                  unknown=unknown)

Methods

def pretty(self) ‑> str
Expand source code
def pretty(self) -> str:
    # noinspection Mypy
    return pprint.pformat(self.to_dict())
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, NoneType]]
Expand source code
def to_dict(self, encode_json=False) -> Dict[str, Json]:
    return _asdict(self, encode_json=encode_json)
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, NoneType] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) ‑> str
Expand source code
def to_json(self,
            *,
            skipkeys: bool = False,
            ensure_ascii: bool = True,
            check_circular: bool = True,
            allow_nan: bool = True,
            indent: Optional[Union[int, str]] = None,
            separators: Tuple[str, str] = None,
            default: Callable = None,
            sort_keys: bool = False,
            **kw) -> str:
    return json.dumps(self.to_dict(encode_json=False),
                      cls=_ExtendedEncoder,
                      skipkeys=skipkeys,
                      ensure_ascii=ensure_ascii,
                      check_circular=check_circular,
                      allow_nan=allow_nan,
                      indent=indent,
                      separators=separators,
                      default=default,
                      sort_keys=sort_keys,
                      **kw)