pychemstation.control.chromatogram

Module for HPLC chromatogram data loading and manipulating

  1"""Module for HPLC chromatogram data loading and manipulating"""
  2
  3import os
  4import logging
  5import time
  6
  7import numpy as np
  8
  9from ..analysis import AbstractSpectrum
 10from ..utils.chemstation import CHFile
 11
 12# Chemstation data path
 13DATA_DIR = r"C:\Chem32\1\Data"
 14
 15
 16# standard filenames for spectral data
 17CHANNELS = {"A": "01", "B": "02", "C": "03", "D": "04"}
 18
 19ACQUISITION_PARAMETERS = "acq.txt"
 20
 21# format used in acquisition parameters
 22TIME_FORMAT = "%Y-%m-%d-%H-%M-%S"
 23SEQUENCE_TIME_FORMAT = "%Y-%m-%d %H"
 24
 25
 26class AgilentHPLCChromatogram(AbstractSpectrum):
 27    """Class for HPLC spectrum (chromatogram) loading and handling."""
 28
 29    AXIS_MAPPING = {"x": "min", "y": "mAu"}
 30
 31    INTERNAL_PROPERTIES = {
 32        "baseline",
 33        "parameters",
 34        "data_path",
 35    }
 36
 37    # set of properties to be saved
 38    PUBLIC_PROPERTIES = {
 39        "x",
 40        "y",
 41        "peaks",
 42        "timestamp",
 43    }
 44
 45    def __init__(self, path=None, autosaving=False):
 46
 47        if path is not None:
 48            os.makedirs(path, exist_ok=True)
 49            self.path = path
 50        else:
 51            self.path = os.path.join(".", "hplc_data")
 52            os.makedirs(self.path, exist_ok=True)
 53
 54        self.logger = logging.getLogger("AgilentHPLCChromatogram")
 55
 56        super().__init__(path=path, autosaving=autosaving)
 57
 58    def load_spectrum(self, data_path, channel="A"):
 59        """Loads the spectra from the given folder.
 60
 61        Args:
 62            data_path (str): Path where HPLC data has been saved.
 63        """
 64
 65        # to avoid dropping parameters when called in parent class
 66        if self.x is not None:
 67            if self.autosaving:
 68                self.save_data(filename=f"{data_path}_{channel}")
 69                self._dump()
 70
 71        # get raw data
 72        x, y = self.extract_rawdata(data_path, channel)
 73
 74        # get timestamp
 75        tstr = data_path.split(".")[0].split("_")[-1]
 76        timestamp = time.mktime(time.strptime(tstr, TIME_FORMAT))
 77
 78        # loading all data
 79        super().load_spectrum(x, y, timestamp)
 80
 81    ### PUBLIC METHODS TO LOAD RAW DATA ###
 82
 83    def extract_rawdata(self, experiment_dir: str, channel: str):
 84        """
 85        Reads raw data from Chemstation .CH files.
 86
 87        Args:
 88            experiment_dir: .D directory with the .CH files
 89
 90        Returns:
 91            np.array(times), np.array(values)   Raw chromatogram data
 92        """
 93        filename = os.path.join(experiment_dir, f"DAD1{channel}")
 94        npz_file = filename + ".npz"
 95
 96        if os.path.exists(npz_file):
 97            # already processed
 98            data = np.load(npz_file)
 99            return data["times"], data["values"]
100        else:
101            self.logger.debug("NPZ file not found. First time loading data.")
102            ch_file = filename + ".ch"
103            data = CHFile(ch_file)
104            np.savez_compressed(npz_file, times=data.times, values=data.values)
105            return np.array(data.times), np.array(data.values)
106
107    def extract_peakarea(self, experiment_dir: str):
108        """
109        Reads processed data from Chemstation report files.
110
111        Args:
112            experiment_dir: .D directory with the report files
113        """
114        # filename = os.path.join(experiment_dir, f"REPORT{CHANNELS[channel]}.csv")
115        # TODO parse file properly
116        # data = np.genfromtxt(filename, delimiter=',')
117        # return data
118        pass
119
120    def default_processing(self):
121        """
122        Processes the chromatogram in place.
123        """
124        # trim first 5 min and last 3 min of run
125        self.trim(5, 25)
126        # parameters found to work best for chromatogram data
127        self.correct_baseline(lmbd=1e5, p=0.0001, n_iter=10)
128        # get all peaks in processed chromatogram
129        self.find_peaks()
DATA_DIR = 'C:\\Chem32\\1\\Data'
CHANNELS = {'A': '01', 'B': '02', 'C': '03', 'D': '04'}
ACQUISITION_PARAMETERS = 'acq.txt'
TIME_FORMAT = '%Y-%m-%d-%H-%M-%S'
SEQUENCE_TIME_FORMAT = '%Y-%m-%d %H'
class AgilentHPLCChromatogram(pychemstation.analysis.base_spectrum.AbstractSpectrum):
 27class AgilentHPLCChromatogram(AbstractSpectrum):
 28    """Class for HPLC spectrum (chromatogram) loading and handling."""
 29
 30    AXIS_MAPPING = {"x": "min", "y": "mAu"}
 31
 32    INTERNAL_PROPERTIES = {
 33        "baseline",
 34        "parameters",
 35        "data_path",
 36    }
 37
 38    # set of properties to be saved
 39    PUBLIC_PROPERTIES = {
 40        "x",
 41        "y",
 42        "peaks",
 43        "timestamp",
 44    }
 45
 46    def __init__(self, path=None, autosaving=False):
 47
 48        if path is not None:
 49            os.makedirs(path, exist_ok=True)
 50            self.path = path
 51        else:
 52            self.path = os.path.join(".", "hplc_data")
 53            os.makedirs(self.path, exist_ok=True)
 54
 55        self.logger = logging.getLogger("AgilentHPLCChromatogram")
 56
 57        super().__init__(path=path, autosaving=autosaving)
 58
 59    def load_spectrum(self, data_path, channel="A"):
 60        """Loads the spectra from the given folder.
 61
 62        Args:
 63            data_path (str): Path where HPLC data has been saved.
 64        """
 65
 66        # to avoid dropping parameters when called in parent class
 67        if self.x is not None:
 68            if self.autosaving:
 69                self.save_data(filename=f"{data_path}_{channel}")
 70                self._dump()
 71
 72        # get raw data
 73        x, y = self.extract_rawdata(data_path, channel)
 74
 75        # get timestamp
 76        tstr = data_path.split(".")[0].split("_")[-1]
 77        timestamp = time.mktime(time.strptime(tstr, TIME_FORMAT))
 78
 79        # loading all data
 80        super().load_spectrum(x, y, timestamp)
 81
 82    ### PUBLIC METHODS TO LOAD RAW DATA ###
 83
 84    def extract_rawdata(self, experiment_dir: str, channel: str):
 85        """
 86        Reads raw data from Chemstation .CH files.
 87
 88        Args:
 89            experiment_dir: .D directory with the .CH files
 90
 91        Returns:
 92            np.array(times), np.array(values)   Raw chromatogram data
 93        """
 94        filename = os.path.join(experiment_dir, f"DAD1{channel}")
 95        npz_file = filename + ".npz"
 96
 97        if os.path.exists(npz_file):
 98            # already processed
 99            data = np.load(npz_file)
100            return data["times"], data["values"]
101        else:
102            self.logger.debug("NPZ file not found. First time loading data.")
103            ch_file = filename + ".ch"
104            data = CHFile(ch_file)
105            np.savez_compressed(npz_file, times=data.times, values=data.values)
106            return np.array(data.times), np.array(data.values)
107
108    def extract_peakarea(self, experiment_dir: str):
109        """
110        Reads processed data from Chemstation report files.
111
112        Args:
113            experiment_dir: .D directory with the report files
114        """
115        # filename = os.path.join(experiment_dir, f"REPORT{CHANNELS[channel]}.csv")
116        # TODO parse file properly
117        # data = np.genfromtxt(filename, delimiter=',')
118        # return data
119        pass
120
121    def default_processing(self):
122        """
123        Processes the chromatogram in place.
124        """
125        # trim first 5 min and last 3 min of run
126        self.trim(5, 25)
127        # parameters found to work best for chromatogram data
128        self.correct_baseline(lmbd=1e5, p=0.0001, n_iter=10)
129        # get all peaks in processed chromatogram
130        self.find_peaks()

Class for HPLC spectrum (chromatogram) loading and handling.

AgilentHPLCChromatogram(path=None, autosaving=False)
46    def __init__(self, path=None, autosaving=False):
47
48        if path is not None:
49            os.makedirs(path, exist_ok=True)
50            self.path = path
51        else:
52            self.path = os.path.join(".", "hplc_data")
53            os.makedirs(self.path, exist_ok=True)
54
55        self.logger = logging.getLogger("AgilentHPLCChromatogram")
56
57        super().__init__(path=path, autosaving=autosaving)

Default constructor, loads properties into instance namespace.

Can be redefined in ancestor classes.

Args: path (Union[str, bool], optional): Valid path to save data to. If omitted, uses ".//spectrum". If False - no folder created. autosaving (bool, optional): If the True (default) will save the spectrum when the new one is loaded. Will drop otherwise.

AXIS_MAPPING = {'x': 'min', 'y': 'mAu'}
INTERNAL_PROPERTIES = {'baseline', 'parameters', 'data_path'}
PUBLIC_PROPERTIES = {'y', 'timestamp', 'peaks', 'x'}
logger
def load_spectrum(self, data_path, channel='A'):
59    def load_spectrum(self, data_path, channel="A"):
60        """Loads the spectra from the given folder.
61
62        Args:
63            data_path (str): Path where HPLC data has been saved.
64        """
65
66        # to avoid dropping parameters when called in parent class
67        if self.x is not None:
68            if self.autosaving:
69                self.save_data(filename=f"{data_path}_{channel}")
70                self._dump()
71
72        # get raw data
73        x, y = self.extract_rawdata(data_path, channel)
74
75        # get timestamp
76        tstr = data_path.split(".")[0].split("_")[-1]
77        timestamp = time.mktime(time.strptime(tstr, TIME_FORMAT))
78
79        # loading all data
80        super().load_spectrum(x, y, timestamp)

Loads the spectra from the given folder.

Args: data_path (str): Path where HPLC data has been saved.

def extract_rawdata(self, experiment_dir: str, channel: str):
 84    def extract_rawdata(self, experiment_dir: str, channel: str):
 85        """
 86        Reads raw data from Chemstation .CH files.
 87
 88        Args:
 89            experiment_dir: .D directory with the .CH files
 90
 91        Returns:
 92            np.array(times), np.array(values)   Raw chromatogram data
 93        """
 94        filename = os.path.join(experiment_dir, f"DAD1{channel}")
 95        npz_file = filename + ".npz"
 96
 97        if os.path.exists(npz_file):
 98            # already processed
 99            data = np.load(npz_file)
100            return data["times"], data["values"]
101        else:
102            self.logger.debug("NPZ file not found. First time loading data.")
103            ch_file = filename + ".ch"
104            data = CHFile(ch_file)
105            np.savez_compressed(npz_file, times=data.times, values=data.values)
106            return np.array(data.times), np.array(data.values)

Reads raw data from Chemstation .CH files.

Args: experiment_dir: .D directory with the .CH files

Returns: np.array(times), np.array(values) Raw chromatogram data

def extract_peakarea(self, experiment_dir: str):
108    def extract_peakarea(self, experiment_dir: str):
109        """
110        Reads processed data from Chemstation report files.
111
112        Args:
113            experiment_dir: .D directory with the report files
114        """
115        # filename = os.path.join(experiment_dir, f"REPORT{CHANNELS[channel]}.csv")
116        # TODO parse file properly
117        # data = np.genfromtxt(filename, delimiter=',')
118        # return data
119        pass

Reads processed data from Chemstation report files.

Args: experiment_dir: .D directory with the report files

def default_processing(self):
121    def default_processing(self):
122        """
123        Processes the chromatogram in place.
124        """
125        # trim first 5 min and last 3 min of run
126        self.trim(5, 25)
127        # parameters found to work best for chromatogram data
128        self.correct_baseline(lmbd=1e5, p=0.0001, n_iter=10)
129        # get all peaks in processed chromatogram
130        self.find_peaks()

Processes the chromatogram in place.