Source code for akerbp.mlpet.dataloader

from typing import Any, Dict, List

import lasio
import pandas as pd
from cognite.client import CogniteClient
from pandas.core.frame import DataFrame


[docs]class DataLoader(object): """ A helper class that performs the data loading part of processing MLPet data. This is an **internal** class only. It is **strictly** to be used as a super of the Dataset class. """
[docs] def save_df_to_cls(self, df: DataFrame) -> DataFrame: """ Simple wrapper function to save a df to the class instance Args: df (DataFrame): Dataframe to be saved to class instance Returns: DataFrame: Returns the passed dataframe. """ self.df_original = df return df
[docs] def load_from_cdf( self, client: CogniteClient, metadata: Dict[str, str], save_as: str = "" ) -> DataFrame: """ Retrieves data from CDF for the provided metadata config Args: client (CogniteClient): The CDF client object to retrieve data from metadata (dict): The metadata config to pass to the CDF client save_as (str): If wanting to save the retrieved data, a filepath can be passed to this arg and the data will be pickled at the provided filepath. Returns: DataFrame: Returns the data retrieved from CDF. """ # Save client instance to class instance self.cdf_client = client heads = client.sequences.list(metadata=metadata, limit=None) data = [] for head in heads: training_data = client.sequences.data.retrieve_dataframe( id=head.id, start=None, end=None ) try: training_data["well_name"] = head.metadata["wellbore"] except KeyError: training_data["well_name"] = head.metadata["wellbore_name"] data.append(training_data) df = pd.concat(data) if save_as: df.to_pickle(save_as) return self.save_df_to_cls(df)
[docs] def load_from_las(self, filepaths: List[str], **kwargs: Any) -> DataFrame: """ Loads data from las file(s) Args: filepaths (list of strings): paths to las files Returns: DataFrame: Returns the data loaded from the provided las files. """ dfs = [] for path in filepaths: las = lasio.read(path) well = las.header["Well"].WELL.value df = las.df().reset_index() df["well_name"] = well dfs.append(df) return self.save_df_to_cls(pd.concat(dfs))
[docs] def load_from_csv(self, filepath: str, **kwargs: Any) -> DataFrame: """ Loads data from csv files Args: filepath (string): path to csv file Returns: DataFrame: Returns the data loaded from the provided csv file. """ return self.save_df_to_cls(pd.read_csv(filepath, **kwargs))
[docs] def load_from_pickle(self, filepath: str, **kwargs: Any) -> DataFrame: """ Loads data from pickle files Args: filepath (string): path to pickle file Returns: DataFrame: Returns the data loaded from the provided csv file. """ return self.save_df_to_cls(pd.read_pickle(filepath, **kwargs))
[docs] def load_from_dict(self, data_dict: Dict[str, Any], **kwargs: Any) -> DataFrame: """ Loads data from a dictionary Args: data_dict (dict): dictionary with data Returns: DataFrame: Returns the data loaded from the provided dictionary. """ return self.save_df_to_cls(pd.DataFrame.from_dict(data_dict, **kwargs))