pychemstation.analysis.base_spectrum
1import pickle 2import os 3import logging 4 5from abc import ABC, abstractmethod 6 7import numpy as np 8import matplotlib.pyplot as plt 9 10from scipy import ( 11 sparse, 12 signal, 13 interpolate, 14 integrate, 15) 16 17from .utils import interpolate_to_index, find_nearest_value_index 18 19 20class AbstractSpectrum(ABC): 21 """General class for handling spectroscopic data 22 23 Contains methods for data manipulation (load/save) and basic processing 24 features, such as baseline correction, smoothing, peak picking and 25 integration. 26 27 All data processing happens in place! 28 """ 29 30 # for plotting 31 AXIS_MAPPING = { 32 "x": "x_data", 33 "y": "y_data", 34 } 35 36 # list of properties to be saved 37 PUBLIC_PROPERTIES = { 38 "x", 39 "y", 40 "peaks", 41 "timestamp", 42 } 43 44 # list of internal properties to be dumped during new data loading 45 INTERNAL_PROPERTIES = { 46 "baseline", 47 } 48 49 def __init__(self, path=None, autosaving=True): 50 """Default constructor, loads properties into instance namespace. 51 52 Can be redefined in ancestor classes. 53 54 Args: 55 path (Union[str, bool], optional): Valid path to save data to. 56 If omitted, uses ".//spectrum". If False - no folder created. 57 autosaving (bool, optional): If the True (default) will save the 58 spectrum when the new one is loaded. Will drop otherwise. 59 """ 60 61 self.autosaving = autosaving 62 63 # loading public properties 64 for prop in self.PUBLIC_PROPERTIES: 65 setattr(self, prop, None) 66 67 # loading internal properties 68 for prop in self.INTERNAL_PROPERTIES: 69 setattr(self, prop, None) 70 71 # creating data path 72 if path is None: 73 self.path = os.path.join(".", "spectrum") 74 os.makedirs(self.path, exist_ok=True) 75 else: 76 try: 77 os.makedirs(path, exist_ok=True) 78 self.path = path 79 except TypeError: # type(path) -> bool 80 self.path = "." 81 82 # creating logger 83 if not hasattr(self, "logger"): 84 self.logger = logging.getLogger(self.__class__.__name__) 85 86 def _dump(self): 87 """Dummy method to dump all spectral data. Used before loading new data.""" 88 89 self.__init__(path=self.path, autosaving=self.autosaving) 90 91 @abstractmethod 92 def load_spectrum(self, x, y, timestamp): 93 """Loads the spectral data. 94 95 This method must be redefined in ancestor classes. 96 97 Args: 98 x (:obj: np.array): An array with data to be plotted as "x" axis. 99 y (:obj: np.array): An array with data to be plotted as "y" axis. 100 timestamp (float): Timestamp to the corresponding spectrum. 101 """ 102 103 try: 104 assert x.shape == y.shape 105 except AssertionError: 106 raise ValueError("X and Y data must have same dimension.") from None 107 108 if self.x is not None: 109 if self.autosaving: 110 self.save_data() 111 self._dump() 112 113 self.x = x 114 self.y = y 115 self.timestamp = timestamp 116 117 def save_data(self, filename=None, verbose=False): 118 """Saves the data to given path using python pickle module. 119 120 Args: 121 filename (str, optional): Filename for the current spectrum. If 122 omitted, using current timestamp. 123 verbose (bool, optional): If all processed data needs to be saved as 124 well. Default: False. 125 """ 126 if filename is None: 127 filename = f"{self.timestamp}.pickle" 128 else: 129 # file extension used from python 3. documentation 130 filename += ".pickle" 131 132 path = os.path.join(self.path, filename) 133 134 data = { 135 prop: self.__dict__[prop] 136 for prop in self.PUBLIC_PROPERTIES 137 if self.__dict__[prop] is not None 138 } 139 140 if verbose: 141 data.update( 142 { 143 prop: self.__dict__[prop] 144 for prop in self.INTERNAL_PROPERTIES 145 if self.__dict__[prop] is not None 146 } 147 ) 148 149 with open(path, "wb") as f: 150 pickle.dump(data, f) 151 152 self.logger.info("Saved in %s", path) 153 154 def load_data(self, path): 155 """Loads the data from saved pickle file. 156 157 Data is loaded in place, so instance attributes are overwritten. 158 159 Args: 160 path (str): Valid path to pickle file. 161 """ 162 163 if self.x is not None: 164 self._dump() 165 166 # TODO add exception handling 167 with open(path, "rb") as f: 168 data = pickle.load(f) 169 170 self.__dict__.update(data) 171 172 def trim(self, xmin, xmax, in_place=True): 173 """Trims the spectrum data within specific X region 174 175 Args: 176 xmin (int): Minimum position on the X axis to start from. 177 xmax (int): Maximum position on the X axis to end to. 178 in_place (bool): If trimming happens in place, else returns new 179 array as trimmed copy. 180 181 Returns: 182 (bool): True if trimmed in place. 183 (Tuple[np.array, np.array]): Trimmed copy of the original array as 184 tuple with X and Y points respectively. 185 """ 186 187 # Creating the mask to map arrays 188 above_ind = self.x > xmin 189 below_ind = self.x < xmax 190 full_mask = np.logical_and(above_ind, below_ind) 191 192 # Mapping arrays if they are supplied 193 if in_place: 194 self.y = self.y[full_mask] 195 self.x = self.x[full_mask] 196 if self.baseline is not None and self.baseline.shape == full_mask.shape: 197 self.baseline = self.baseline[full_mask] 198 return True 199 else: 200 return (self.x.copy()[full_mask], self.y.copy()[full_mask]) 201 202 def show_spectrum( 203 self, 204 filename=None, 205 title=None, 206 label=None, 207 ): 208 """Plots the spectral data using matplotlib.pyplot module. 209 210 Args: 211 filename (str, optional): Filename for the current plot. If omitted, 212 file is not saved. 213 title (str, optional): Title for the spectrum plot. If omitted, no 214 title is set. 215 label (str, optional): Label for the spectrum plot. If omitted, uses 216 the spectrum timestamp. 217 """ 218 if label is None: 219 label = f"{self.timestamp}" 220 221 fig, ax = plt.subplots(figsize=(12, 8)) 222 223 ax.plot( 224 self.x, 225 self.y, 226 color="xkcd:navy blue", 227 label=label, 228 ) 229 230 ax.set_xlabel(self.AXIS_MAPPING["x"]) 231 ax.set_ylabel(self.AXIS_MAPPING["y"]) 232 233 if title is not None: 234 ax.set_title(title) 235 236 # plotting peaks if found 237 if self.peaks is not None: 238 plt.scatter( 239 self.peaks[:, 1], 240 self.peaks[:, 2], 241 label="found peaks", 242 color="xkcd:tangerine", 243 ) 244 245 ax.legend() 246 247 if filename is None: 248 fig.show() 249 250 else: 251 path = os.path.join(self.path, "images") 252 os.makedirs(path, exist_ok=True) 253 fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150) 254 255 def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None): 256 """Finds all peaks above the threshold with at least min_width width. 257 258 Args: 259 threshold (float, optional): Relative peak height with respect to 260 the highest peak. 261 min_width (int, optional): Minimum peak width. 262 min_dist (int, optional): Minimum distance between peaks. 263 area (Tuple(int, int), optional): Area to search peaks in. Supplied 264 as min, max X values tuple. 265 266 Return: 267 (:obj: np.array): An array of peaks ids as rounded peak_x coordinate 268 value. If searching within specified area, full peak information 269 matrix is returned, see below for details. 270 271 Also updates the self.peaks attrbiute (if "area" is omitted) as: 272 (:obj: np.array): An (n_peaks x 5) array with peak data as columns: 273 peak_id (float): Rounded peak_x coordinate value. 274 peak_x (float): X-coordinate for the peak. 275 peak_y (float): Y-coordinate for the peak. 276 peak_left_x (float): X-coordinate for the left peak border. 277 peak_right_x (float): X-coordinate for the right peak border. 278 279 Peak data is accessed with indexing, e.g.: 280 self.peaks[n] will give all data for n's peak 281 self.peaks[:, 2] will give Y coordinate for all found peaks 282 """ 283 284 # only dumping if area is omitted 285 if self.peaks is not None and not area: 286 self.peaks = None 287 288 # trimming 289 if area is not None: 290 spec_y = self.trim(area[0], area[1], False)[1] 291 else: 292 spec_y = self.y.copy() 293 294 threshold *= self.y.max() - self.y.min() 295 peaks, _ = signal.find_peaks( 296 spec_y, height=threshold, width=min_width, distance=min_dist 297 ) 298 299 # obtaining width for full peak height 300 # TODO deal with intersecting peaks! 301 # TODO deal with incorrect peak width 302 pw = signal.peak_widths(spec_y, peaks, rel_height=0.95) 303 304 # converting all to column vectors by adding extra dimension along 2nd 305 # axis. Check documentation on np.newaxis for details 306 peak_xs = self.x.copy()[peaks][:, np.newaxis] 307 peak_ys = self.y.copy()[peaks][:, np.newaxis] 308 peaks_ids = np.around(peak_xs) 309 peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis] 310 peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis] 311 312 if area is None: 313 # updating only if area is not specified 314 self.peaks = np.hstack( 315 ( 316 peaks_ids, 317 peak_xs, 318 peak_ys, 319 peaks_left_ids, 320 peaks_right_ids, 321 ) 322 ) 323 return peaks_ids 324 325 return np.hstack( 326 ( 327 peaks_ids, 328 peak_xs, 329 peak_ys, 330 peaks_left_ids, 331 peaks_right_ids, 332 ) 333 ) 334 335 def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10): 336 """Generates and subtracts the baseline for the given spectrum. 337 338 Based on Eilers, P; Boelens, H. (2005): Baseline Correction with 339 Asymmetric Least Squares Smoothing. 340 341 Default values chosen arbitrary based on processing Raman spectra. 342 343 Args: 344 lmbd (float): Arbitrary parameter to define the smoothness of the 345 baseline the larger lmbd is, the smoother baseline will be, 346 recommended value between 1e2 and 1e5. 347 p (float): An asymmetric least squares parameter to compute the 348 weights of the residuals, chosen arbitrary, recommended values 349 between 0.1 and 0.001. 350 n_iter (int, optional): Number of iterations to perform the fit, 351 recommended value between 5 and 10. 352 """ 353 354 # generating the baseline first 355 L = len(self.y) 356 D = sparse.csc_matrix(np.diff(np.eye(L), 2)) 357 w = np.ones(L) 358 for _ in range(n_iter): 359 W = sparse.spdiags(w, 0, L, L) 360 Z = W + lmbd * D.dot(D.transpose()) 361 z = sparse.linalg.spsolve(Z, w * self.y) 362 w = p * (self.y > z) + (1 - p) * (self.y < z) 363 364 # updating attribute for future use 365 self.baseline = z 366 367 # subtracting the baseline 368 # TODO update peak coordinates if peaks were present 369 self.y -= z 370 self.logger.info("Baseline corrected") 371 372 def integrate_area(self, area, rule="trapz"): 373 """Integrate the spectrum within given area 374 375 Args: 376 area (Tuple[float, float]): Tuple with left and right border (X axis 377 obviously) for the desired area. 378 rule (str): Method for integration, "trapz" - trapezoidal 379 rule (default), "simps" - Simpson's rule. 380 Returns: 381 float: Definite integral within given area as approximated by given 382 method. 383 """ 384 385 # closest value in experimental data and its index in data array 386 _, left_idx = find_nearest_value_index(self.x, area[0]) 387 _, right_idx = find_nearest_value_index(self.x, area[1]) 388 389 if rule == "trapz": 390 return integrate.trapz( 391 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 392 ) 393 394 elif rule == "simps": 395 return integrate.simps( 396 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 397 ) 398 399 else: 400 raise ValueError( 401 'Only trapezoidal "trapz" or Simpson\'s "simps" \ 402rules are supported!' 403 ) 404 405 def integrate_peak(self, peak, rule="trapz"): 406 """Calculate an area for a given peak 407 408 Args: 409 peak (float): (rounded) peak Y coordinate. If precise peak position 410 was not found, closest is picked. 411 rule (str): Method for integration, "trapz" - trapezoidal 412 rule (default), "simps" - Simpson's rule. 413 Returns: 414 float: Definite integral within given area as approximated by given 415 method. 416 """ 417 418 if self.peaks is None: 419 self.find_peaks() 420 421 true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak) 422 _, _, _, left, right = self.peaks[idx] 423 424 self.logger.debug( 425 "Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right 426 ) 427 428 return self.integrate_area((left, right), rule=rule) 429 430 def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True): 431 """Smoothes the spectrum using Savitsky-Golay filter. 432 433 For details see scipy.signal.savgol_filter. 434 435 Default values for window length and polynomial order were chosen 436 arbitrary based on Raman spectra. 437 438 Args: 439 window_length (int): The length of the filter window (i.e. the 440 number of coefficients). window_length must be a positive odd 441 integer. 442 polyorder (int): The order of the polynomial used to fit the 443 samples. polyorder must be less than window_length. 444 in_place (bool, optional): If smoothing happens in place, returns 445 smoothed spectrum if True. 446 """ 447 448 if in_place: 449 self.y = signal.savgol_filter( 450 self.y, window_length=window_length, polyorder=polyorder 451 ) 452 return True 453 454 return signal.savgol_filter( 455 self.y, 456 window_length=window_length, 457 polyorder=polyorder, 458 ) 459 460 def default_processing(self): 461 """Dummy method to return spectral data. 462 463 Normally redefined in ancestor classes to include basic processing for 464 specific spectrum type. 465 466 Returns: 467 Tuple[np.array, np.array, float]: Spectral data as X and Y 468 coordinates and a timestamp. 469 """ 470 471 return self.x, self.y, self.timestamp 472 473 @classmethod 474 def from_data(cls, data): 475 """Class method to instantiate the class from the saved data file. 476 477 Args: 478 data (str): Path to spectral data file (as pickle). 479 480 Returns: 481 New instance with all data inside. 482 """ 483 484 if "pickle" not in data: 485 raise AttributeError("Only .pickle files are supported") 486 487 path = os.path.abspath(os.path.dirname(data)) 488 489 spec = cls(path) 490 spec.load_data(data) 491 492 return spec 493 494 def copy(self): 495 """Dummy class to return a new instance with the same data as the 496 current. 497 498 Returns: 499 (:obj:SpinsolveNMRSpectrum): New object with the same data. 500 """ 501 502 # creating new instance 503 spec = self.__class__(self.path, self.autosaving) 504 505 # loading attributes 506 for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES): 507 setattr(spec, prop, getattr(self, prop)) 508 509 return spec
21class AbstractSpectrum(ABC): 22 """General class for handling spectroscopic data 23 24 Contains methods for data manipulation (load/save) and basic processing 25 features, such as baseline correction, smoothing, peak picking and 26 integration. 27 28 All data processing happens in place! 29 """ 30 31 # for plotting 32 AXIS_MAPPING = { 33 "x": "x_data", 34 "y": "y_data", 35 } 36 37 # list of properties to be saved 38 PUBLIC_PROPERTIES = { 39 "x", 40 "y", 41 "peaks", 42 "timestamp", 43 } 44 45 # list of internal properties to be dumped during new data loading 46 INTERNAL_PROPERTIES = { 47 "baseline", 48 } 49 50 def __init__(self, path=None, autosaving=True): 51 """Default constructor, loads properties into instance namespace. 52 53 Can be redefined in ancestor classes. 54 55 Args: 56 path (Union[str, bool], optional): Valid path to save data to. 57 If omitted, uses ".//spectrum". If False - no folder created. 58 autosaving (bool, optional): If the True (default) will save the 59 spectrum when the new one is loaded. Will drop otherwise. 60 """ 61 62 self.autosaving = autosaving 63 64 # loading public properties 65 for prop in self.PUBLIC_PROPERTIES: 66 setattr(self, prop, None) 67 68 # loading internal properties 69 for prop in self.INTERNAL_PROPERTIES: 70 setattr(self, prop, None) 71 72 # creating data path 73 if path is None: 74 self.path = os.path.join(".", "spectrum") 75 os.makedirs(self.path, exist_ok=True) 76 else: 77 try: 78 os.makedirs(path, exist_ok=True) 79 self.path = path 80 except TypeError: # type(path) -> bool 81 self.path = "." 82 83 # creating logger 84 if not hasattr(self, "logger"): 85 self.logger = logging.getLogger(self.__class__.__name__) 86 87 def _dump(self): 88 """Dummy method to dump all spectral data. Used before loading new data.""" 89 90 self.__init__(path=self.path, autosaving=self.autosaving) 91 92 @abstractmethod 93 def load_spectrum(self, x, y, timestamp): 94 """Loads the spectral data. 95 96 This method must be redefined in ancestor classes. 97 98 Args: 99 x (:obj: np.array): An array with data to be plotted as "x" axis. 100 y (:obj: np.array): An array with data to be plotted as "y" axis. 101 timestamp (float): Timestamp to the corresponding spectrum. 102 """ 103 104 try: 105 assert x.shape == y.shape 106 except AssertionError: 107 raise ValueError("X and Y data must have same dimension.") from None 108 109 if self.x is not None: 110 if self.autosaving: 111 self.save_data() 112 self._dump() 113 114 self.x = x 115 self.y = y 116 self.timestamp = timestamp 117 118 def save_data(self, filename=None, verbose=False): 119 """Saves the data to given path using python pickle module. 120 121 Args: 122 filename (str, optional): Filename for the current spectrum. If 123 omitted, using current timestamp. 124 verbose (bool, optional): If all processed data needs to be saved as 125 well. Default: False. 126 """ 127 if filename is None: 128 filename = f"{self.timestamp}.pickle" 129 else: 130 # file extension used from python 3. documentation 131 filename += ".pickle" 132 133 path = os.path.join(self.path, filename) 134 135 data = { 136 prop: self.__dict__[prop] 137 for prop in self.PUBLIC_PROPERTIES 138 if self.__dict__[prop] is not None 139 } 140 141 if verbose: 142 data.update( 143 { 144 prop: self.__dict__[prop] 145 for prop in self.INTERNAL_PROPERTIES 146 if self.__dict__[prop] is not None 147 } 148 ) 149 150 with open(path, "wb") as f: 151 pickle.dump(data, f) 152 153 self.logger.info("Saved in %s", path) 154 155 def load_data(self, path): 156 """Loads the data from saved pickle file. 157 158 Data is loaded in place, so instance attributes are overwritten. 159 160 Args: 161 path (str): Valid path to pickle file. 162 """ 163 164 if self.x is not None: 165 self._dump() 166 167 # TODO add exception handling 168 with open(path, "rb") as f: 169 data = pickle.load(f) 170 171 self.__dict__.update(data) 172 173 def trim(self, xmin, xmax, in_place=True): 174 """Trims the spectrum data within specific X region 175 176 Args: 177 xmin (int): Minimum position on the X axis to start from. 178 xmax (int): Maximum position on the X axis to end to. 179 in_place (bool): If trimming happens in place, else returns new 180 array as trimmed copy. 181 182 Returns: 183 (bool): True if trimmed in place. 184 (Tuple[np.array, np.array]): Trimmed copy of the original array as 185 tuple with X and Y points respectively. 186 """ 187 188 # Creating the mask to map arrays 189 above_ind = self.x > xmin 190 below_ind = self.x < xmax 191 full_mask = np.logical_and(above_ind, below_ind) 192 193 # Mapping arrays if they are supplied 194 if in_place: 195 self.y = self.y[full_mask] 196 self.x = self.x[full_mask] 197 if self.baseline is not None and self.baseline.shape == full_mask.shape: 198 self.baseline = self.baseline[full_mask] 199 return True 200 else: 201 return (self.x.copy()[full_mask], self.y.copy()[full_mask]) 202 203 def show_spectrum( 204 self, 205 filename=None, 206 title=None, 207 label=None, 208 ): 209 """Plots the spectral data using matplotlib.pyplot module. 210 211 Args: 212 filename (str, optional): Filename for the current plot. If omitted, 213 file is not saved. 214 title (str, optional): Title for the spectrum plot. If omitted, no 215 title is set. 216 label (str, optional): Label for the spectrum plot. If omitted, uses 217 the spectrum timestamp. 218 """ 219 if label is None: 220 label = f"{self.timestamp}" 221 222 fig, ax = plt.subplots(figsize=(12, 8)) 223 224 ax.plot( 225 self.x, 226 self.y, 227 color="xkcd:navy blue", 228 label=label, 229 ) 230 231 ax.set_xlabel(self.AXIS_MAPPING["x"]) 232 ax.set_ylabel(self.AXIS_MAPPING["y"]) 233 234 if title is not None: 235 ax.set_title(title) 236 237 # plotting peaks if found 238 if self.peaks is not None: 239 plt.scatter( 240 self.peaks[:, 1], 241 self.peaks[:, 2], 242 label="found peaks", 243 color="xkcd:tangerine", 244 ) 245 246 ax.legend() 247 248 if filename is None: 249 fig.show() 250 251 else: 252 path = os.path.join(self.path, "images") 253 os.makedirs(path, exist_ok=True) 254 fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150) 255 256 def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None): 257 """Finds all peaks above the threshold with at least min_width width. 258 259 Args: 260 threshold (float, optional): Relative peak height with respect to 261 the highest peak. 262 min_width (int, optional): Minimum peak width. 263 min_dist (int, optional): Minimum distance between peaks. 264 area (Tuple(int, int), optional): Area to search peaks in. Supplied 265 as min, max X values tuple. 266 267 Return: 268 (:obj: np.array): An array of peaks ids as rounded peak_x coordinate 269 value. If searching within specified area, full peak information 270 matrix is returned, see below for details. 271 272 Also updates the self.peaks attrbiute (if "area" is omitted) as: 273 (:obj: np.array): An (n_peaks x 5) array with peak data as columns: 274 peak_id (float): Rounded peak_x coordinate value. 275 peak_x (float): X-coordinate for the peak. 276 peak_y (float): Y-coordinate for the peak. 277 peak_left_x (float): X-coordinate for the left peak border. 278 peak_right_x (float): X-coordinate for the right peak border. 279 280 Peak data is accessed with indexing, e.g.: 281 self.peaks[n] will give all data for n's peak 282 self.peaks[:, 2] will give Y coordinate for all found peaks 283 """ 284 285 # only dumping if area is omitted 286 if self.peaks is not None and not area: 287 self.peaks = None 288 289 # trimming 290 if area is not None: 291 spec_y = self.trim(area[0], area[1], False)[1] 292 else: 293 spec_y = self.y.copy() 294 295 threshold *= self.y.max() - self.y.min() 296 peaks, _ = signal.find_peaks( 297 spec_y, height=threshold, width=min_width, distance=min_dist 298 ) 299 300 # obtaining width for full peak height 301 # TODO deal with intersecting peaks! 302 # TODO deal with incorrect peak width 303 pw = signal.peak_widths(spec_y, peaks, rel_height=0.95) 304 305 # converting all to column vectors by adding extra dimension along 2nd 306 # axis. Check documentation on np.newaxis for details 307 peak_xs = self.x.copy()[peaks][:, np.newaxis] 308 peak_ys = self.y.copy()[peaks][:, np.newaxis] 309 peaks_ids = np.around(peak_xs) 310 peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis] 311 peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis] 312 313 if area is None: 314 # updating only if area is not specified 315 self.peaks = np.hstack( 316 ( 317 peaks_ids, 318 peak_xs, 319 peak_ys, 320 peaks_left_ids, 321 peaks_right_ids, 322 ) 323 ) 324 return peaks_ids 325 326 return np.hstack( 327 ( 328 peaks_ids, 329 peak_xs, 330 peak_ys, 331 peaks_left_ids, 332 peaks_right_ids, 333 ) 334 ) 335 336 def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10): 337 """Generates and subtracts the baseline for the given spectrum. 338 339 Based on Eilers, P; Boelens, H. (2005): Baseline Correction with 340 Asymmetric Least Squares Smoothing. 341 342 Default values chosen arbitrary based on processing Raman spectra. 343 344 Args: 345 lmbd (float): Arbitrary parameter to define the smoothness of the 346 baseline the larger lmbd is, the smoother baseline will be, 347 recommended value between 1e2 and 1e5. 348 p (float): An asymmetric least squares parameter to compute the 349 weights of the residuals, chosen arbitrary, recommended values 350 between 0.1 and 0.001. 351 n_iter (int, optional): Number of iterations to perform the fit, 352 recommended value between 5 and 10. 353 """ 354 355 # generating the baseline first 356 L = len(self.y) 357 D = sparse.csc_matrix(np.diff(np.eye(L), 2)) 358 w = np.ones(L) 359 for _ in range(n_iter): 360 W = sparse.spdiags(w, 0, L, L) 361 Z = W + lmbd * D.dot(D.transpose()) 362 z = sparse.linalg.spsolve(Z, w * self.y) 363 w = p * (self.y > z) + (1 - p) * (self.y < z) 364 365 # updating attribute for future use 366 self.baseline = z 367 368 # subtracting the baseline 369 # TODO update peak coordinates if peaks were present 370 self.y -= z 371 self.logger.info("Baseline corrected") 372 373 def integrate_area(self, area, rule="trapz"): 374 """Integrate the spectrum within given area 375 376 Args: 377 area (Tuple[float, float]): Tuple with left and right border (X axis 378 obviously) for the desired area. 379 rule (str): Method for integration, "trapz" - trapezoidal 380 rule (default), "simps" - Simpson's rule. 381 Returns: 382 float: Definite integral within given area as approximated by given 383 method. 384 """ 385 386 # closest value in experimental data and its index in data array 387 _, left_idx = find_nearest_value_index(self.x, area[0]) 388 _, right_idx = find_nearest_value_index(self.x, area[1]) 389 390 if rule == "trapz": 391 return integrate.trapz( 392 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 393 ) 394 395 elif rule == "simps": 396 return integrate.simps( 397 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 398 ) 399 400 else: 401 raise ValueError( 402 'Only trapezoidal "trapz" or Simpson\'s "simps" \ 403rules are supported!' 404 ) 405 406 def integrate_peak(self, peak, rule="trapz"): 407 """Calculate an area for a given peak 408 409 Args: 410 peak (float): (rounded) peak Y coordinate. If precise peak position 411 was not found, closest is picked. 412 rule (str): Method for integration, "trapz" - trapezoidal 413 rule (default), "simps" - Simpson's rule. 414 Returns: 415 float: Definite integral within given area as approximated by given 416 method. 417 """ 418 419 if self.peaks is None: 420 self.find_peaks() 421 422 true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak) 423 _, _, _, left, right = self.peaks[idx] 424 425 self.logger.debug( 426 "Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right 427 ) 428 429 return self.integrate_area((left, right), rule=rule) 430 431 def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True): 432 """Smoothes the spectrum using Savitsky-Golay filter. 433 434 For details see scipy.signal.savgol_filter. 435 436 Default values for window length and polynomial order were chosen 437 arbitrary based on Raman spectra. 438 439 Args: 440 window_length (int): The length of the filter window (i.e. the 441 number of coefficients). window_length must be a positive odd 442 integer. 443 polyorder (int): The order of the polynomial used to fit the 444 samples. polyorder must be less than window_length. 445 in_place (bool, optional): If smoothing happens in place, returns 446 smoothed spectrum if True. 447 """ 448 449 if in_place: 450 self.y = signal.savgol_filter( 451 self.y, window_length=window_length, polyorder=polyorder 452 ) 453 return True 454 455 return signal.savgol_filter( 456 self.y, 457 window_length=window_length, 458 polyorder=polyorder, 459 ) 460 461 def default_processing(self): 462 """Dummy method to return spectral data. 463 464 Normally redefined in ancestor classes to include basic processing for 465 specific spectrum type. 466 467 Returns: 468 Tuple[np.array, np.array, float]: Spectral data as X and Y 469 coordinates and a timestamp. 470 """ 471 472 return self.x, self.y, self.timestamp 473 474 @classmethod 475 def from_data(cls, data): 476 """Class method to instantiate the class from the saved data file. 477 478 Args: 479 data (str): Path to spectral data file (as pickle). 480 481 Returns: 482 New instance with all data inside. 483 """ 484 485 if "pickle" not in data: 486 raise AttributeError("Only .pickle files are supported") 487 488 path = os.path.abspath(os.path.dirname(data)) 489 490 spec = cls(path) 491 spec.load_data(data) 492 493 return spec 494 495 def copy(self): 496 """Dummy class to return a new instance with the same data as the 497 current. 498 499 Returns: 500 (:obj:SpinsolveNMRSpectrum): New object with the same data. 501 """ 502 503 # creating new instance 504 spec = self.__class__(self.path, self.autosaving) 505 506 # loading attributes 507 for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES): 508 setattr(spec, prop, getattr(self, prop)) 509 510 return spec
General class for handling spectroscopic data
Contains methods for data manipulation (load/save) and basic processing features, such as baseline correction, smoothing, peak picking and integration.
All data processing happens in place!
50 def __init__(self, path=None, autosaving=True): 51 """Default constructor, loads properties into instance namespace. 52 53 Can be redefined in ancestor classes. 54 55 Args: 56 path (Union[str, bool], optional): Valid path to save data to. 57 If omitted, uses ".//spectrum". If False - no folder created. 58 autosaving (bool, optional): If the True (default) will save the 59 spectrum when the new one is loaded. Will drop otherwise. 60 """ 61 62 self.autosaving = autosaving 63 64 # loading public properties 65 for prop in self.PUBLIC_PROPERTIES: 66 setattr(self, prop, None) 67 68 # loading internal properties 69 for prop in self.INTERNAL_PROPERTIES: 70 setattr(self, prop, None) 71 72 # creating data path 73 if path is None: 74 self.path = os.path.join(".", "spectrum") 75 os.makedirs(self.path, exist_ok=True) 76 else: 77 try: 78 os.makedirs(path, exist_ok=True) 79 self.path = path 80 except TypeError: # type(path) -> bool 81 self.path = "." 82 83 # creating logger 84 if not hasattr(self, "logger"): 85 self.logger = logging.getLogger(self.__class__.__name__)
Default constructor, loads properties into instance namespace.
Can be redefined in ancestor classes.
Args: path (Union[str, bool], optional): Valid path to save data to. If omitted, uses ".//spectrum". If False - no folder created. autosaving (bool, optional): If the True (default) will save the spectrum when the new one is loaded. Will drop otherwise.
92 @abstractmethod 93 def load_spectrum(self, x, y, timestamp): 94 """Loads the spectral data. 95 96 This method must be redefined in ancestor classes. 97 98 Args: 99 x (:obj: np.array): An array with data to be plotted as "x" axis. 100 y (:obj: np.array): An array with data to be plotted as "y" axis. 101 timestamp (float): Timestamp to the corresponding spectrum. 102 """ 103 104 try: 105 assert x.shape == y.shape 106 except AssertionError: 107 raise ValueError("X and Y data must have same dimension.") from None 108 109 if self.x is not None: 110 if self.autosaving: 111 self.save_data() 112 self._dump() 113 114 self.x = x 115 self.y = y 116 self.timestamp = timestamp
Loads the spectral data.
This method must be redefined in ancestor classes.
Args: x (:obj: np.array): An array with data to be plotted as "x" axis. y (:obj: np.array): An array with data to be plotted as "y" axis. timestamp (float): Timestamp to the corresponding spectrum.
118 def save_data(self, filename=None, verbose=False): 119 """Saves the data to given path using python pickle module. 120 121 Args: 122 filename (str, optional): Filename for the current spectrum. If 123 omitted, using current timestamp. 124 verbose (bool, optional): If all processed data needs to be saved as 125 well. Default: False. 126 """ 127 if filename is None: 128 filename = f"{self.timestamp}.pickle" 129 else: 130 # file extension used from python 3. documentation 131 filename += ".pickle" 132 133 path = os.path.join(self.path, filename) 134 135 data = { 136 prop: self.__dict__[prop] 137 for prop in self.PUBLIC_PROPERTIES 138 if self.__dict__[prop] is not None 139 } 140 141 if verbose: 142 data.update( 143 { 144 prop: self.__dict__[prop] 145 for prop in self.INTERNAL_PROPERTIES 146 if self.__dict__[prop] is not None 147 } 148 ) 149 150 with open(path, "wb") as f: 151 pickle.dump(data, f) 152 153 self.logger.info("Saved in %s", path)
Saves the data to given path using python pickle module.
Args: filename (str, optional): Filename for the current spectrum. If omitted, using current timestamp. verbose (bool, optional): If all processed data needs to be saved as well. Default: False.
155 def load_data(self, path): 156 """Loads the data from saved pickle file. 157 158 Data is loaded in place, so instance attributes are overwritten. 159 160 Args: 161 path (str): Valid path to pickle file. 162 """ 163 164 if self.x is not None: 165 self._dump() 166 167 # TODO add exception handling 168 with open(path, "rb") as f: 169 data = pickle.load(f) 170 171 self.__dict__.update(data)
Loads the data from saved pickle file.
Data is loaded in place, so instance attributes are overwritten.
Args: path (str): Valid path to pickle file.
173 def trim(self, xmin, xmax, in_place=True): 174 """Trims the spectrum data within specific X region 175 176 Args: 177 xmin (int): Minimum position on the X axis to start from. 178 xmax (int): Maximum position on the X axis to end to. 179 in_place (bool): If trimming happens in place, else returns new 180 array as trimmed copy. 181 182 Returns: 183 (bool): True if trimmed in place. 184 (Tuple[np.array, np.array]): Trimmed copy of the original array as 185 tuple with X and Y points respectively. 186 """ 187 188 # Creating the mask to map arrays 189 above_ind = self.x > xmin 190 below_ind = self.x < xmax 191 full_mask = np.logical_and(above_ind, below_ind) 192 193 # Mapping arrays if they are supplied 194 if in_place: 195 self.y = self.y[full_mask] 196 self.x = self.x[full_mask] 197 if self.baseline is not None and self.baseline.shape == full_mask.shape: 198 self.baseline = self.baseline[full_mask] 199 return True 200 else: 201 return (self.x.copy()[full_mask], self.y.copy()[full_mask])
Trims the spectrum data within specific X region
Args: xmin (int): Minimum position on the X axis to start from. xmax (int): Maximum position on the X axis to end to. in_place (bool): If trimming happens in place, else returns new array as trimmed copy.
Returns: (bool): True if trimmed in place. (Tuple[np.array, np.array]): Trimmed copy of the original array as tuple with X and Y points respectively.
203 def show_spectrum( 204 self, 205 filename=None, 206 title=None, 207 label=None, 208 ): 209 """Plots the spectral data using matplotlib.pyplot module. 210 211 Args: 212 filename (str, optional): Filename for the current plot. If omitted, 213 file is not saved. 214 title (str, optional): Title for the spectrum plot. If omitted, no 215 title is set. 216 label (str, optional): Label for the spectrum plot. If omitted, uses 217 the spectrum timestamp. 218 """ 219 if label is None: 220 label = f"{self.timestamp}" 221 222 fig, ax = plt.subplots(figsize=(12, 8)) 223 224 ax.plot( 225 self.x, 226 self.y, 227 color="xkcd:navy blue", 228 label=label, 229 ) 230 231 ax.set_xlabel(self.AXIS_MAPPING["x"]) 232 ax.set_ylabel(self.AXIS_MAPPING["y"]) 233 234 if title is not None: 235 ax.set_title(title) 236 237 # plotting peaks if found 238 if self.peaks is not None: 239 plt.scatter( 240 self.peaks[:, 1], 241 self.peaks[:, 2], 242 label="found peaks", 243 color="xkcd:tangerine", 244 ) 245 246 ax.legend() 247 248 if filename is None: 249 fig.show() 250 251 else: 252 path = os.path.join(self.path, "images") 253 os.makedirs(path, exist_ok=True) 254 fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150)
Plots the spectral data using matplotlib.pyplot module.
Args: filename (str, optional): Filename for the current plot. If omitted, file is not saved. title (str, optional): Title for the spectrum plot. If omitted, no title is set. label (str, optional): Label for the spectrum plot. If omitted, uses the spectrum timestamp.
256 def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None): 257 """Finds all peaks above the threshold with at least min_width width. 258 259 Args: 260 threshold (float, optional): Relative peak height with respect to 261 the highest peak. 262 min_width (int, optional): Minimum peak width. 263 min_dist (int, optional): Minimum distance between peaks. 264 area (Tuple(int, int), optional): Area to search peaks in. Supplied 265 as min, max X values tuple. 266 267 Return: 268 (:obj: np.array): An array of peaks ids as rounded peak_x coordinate 269 value. If searching within specified area, full peak information 270 matrix is returned, see below for details. 271 272 Also updates the self.peaks attrbiute (if "area" is omitted) as: 273 (:obj: np.array): An (n_peaks x 5) array with peak data as columns: 274 peak_id (float): Rounded peak_x coordinate value. 275 peak_x (float): X-coordinate for the peak. 276 peak_y (float): Y-coordinate for the peak. 277 peak_left_x (float): X-coordinate for the left peak border. 278 peak_right_x (float): X-coordinate for the right peak border. 279 280 Peak data is accessed with indexing, e.g.: 281 self.peaks[n] will give all data for n's peak 282 self.peaks[:, 2] will give Y coordinate for all found peaks 283 """ 284 285 # only dumping if area is omitted 286 if self.peaks is not None and not area: 287 self.peaks = None 288 289 # trimming 290 if area is not None: 291 spec_y = self.trim(area[0], area[1], False)[1] 292 else: 293 spec_y = self.y.copy() 294 295 threshold *= self.y.max() - self.y.min() 296 peaks, _ = signal.find_peaks( 297 spec_y, height=threshold, width=min_width, distance=min_dist 298 ) 299 300 # obtaining width for full peak height 301 # TODO deal with intersecting peaks! 302 # TODO deal with incorrect peak width 303 pw = signal.peak_widths(spec_y, peaks, rel_height=0.95) 304 305 # converting all to column vectors by adding extra dimension along 2nd 306 # axis. Check documentation on np.newaxis for details 307 peak_xs = self.x.copy()[peaks][:, np.newaxis] 308 peak_ys = self.y.copy()[peaks][:, np.newaxis] 309 peaks_ids = np.around(peak_xs) 310 peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis] 311 peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis] 312 313 if area is None: 314 # updating only if area is not specified 315 self.peaks = np.hstack( 316 ( 317 peaks_ids, 318 peak_xs, 319 peak_ys, 320 peaks_left_ids, 321 peaks_right_ids, 322 ) 323 ) 324 return peaks_ids 325 326 return np.hstack( 327 ( 328 peaks_ids, 329 peak_xs, 330 peak_ys, 331 peaks_left_ids, 332 peaks_right_ids, 333 ) 334 )
Finds all peaks above the threshold with at least min_width width.
Args: threshold (float, optional): Relative peak height with respect to the highest peak. min_width (int, optional): Minimum peak width. min_dist (int, optional): Minimum distance between peaks. area (Tuple(int, int), optional): Area to search peaks in. Supplied as min, max X values tuple.
Return: (:obj: np.array): An array of peaks ids as rounded peak_x coordinate value. If searching within specified area, full peak information matrix is returned, see below for details.
Also updates the self.peaks attrbiute (if "area" is omitted) as: (:obj: np.array): An (n_peaks x 5) array with peak data as columns: peak_id (float): Rounded peak_x coordinate value. peak_x (float): X-coordinate for the peak. peak_y (float): Y-coordinate for the peak. peak_left_x (float): X-coordinate for the left peak border. peak_right_x (float): X-coordinate for the right peak border.
Peak data is accessed with indexing, e.g.: self.peaks[n] will give all data for n's peak self.peaks[:, 2] will give Y coordinate for all found peaks
336 def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10): 337 """Generates and subtracts the baseline for the given spectrum. 338 339 Based on Eilers, P; Boelens, H. (2005): Baseline Correction with 340 Asymmetric Least Squares Smoothing. 341 342 Default values chosen arbitrary based on processing Raman spectra. 343 344 Args: 345 lmbd (float): Arbitrary parameter to define the smoothness of the 346 baseline the larger lmbd is, the smoother baseline will be, 347 recommended value between 1e2 and 1e5. 348 p (float): An asymmetric least squares parameter to compute the 349 weights of the residuals, chosen arbitrary, recommended values 350 between 0.1 and 0.001. 351 n_iter (int, optional): Number of iterations to perform the fit, 352 recommended value between 5 and 10. 353 """ 354 355 # generating the baseline first 356 L = len(self.y) 357 D = sparse.csc_matrix(np.diff(np.eye(L), 2)) 358 w = np.ones(L) 359 for _ in range(n_iter): 360 W = sparse.spdiags(w, 0, L, L) 361 Z = W + lmbd * D.dot(D.transpose()) 362 z = sparse.linalg.spsolve(Z, w * self.y) 363 w = p * (self.y > z) + (1 - p) * (self.y < z) 364 365 # updating attribute for future use 366 self.baseline = z 367 368 # subtracting the baseline 369 # TODO update peak coordinates if peaks were present 370 self.y -= z 371 self.logger.info("Baseline corrected")
Generates and subtracts the baseline for the given spectrum.
Based on Eilers, P; Boelens, H. (2005): Baseline Correction with Asymmetric Least Squares Smoothing.
Default values chosen arbitrary based on processing Raman spectra.
Args: lmbd (float): Arbitrary parameter to define the smoothness of the baseline the larger lmbd is, the smoother baseline will be, recommended value between 1e2 and 1e5. p (float): An asymmetric least squares parameter to compute the weights of the residuals, chosen arbitrary, recommended values between 0.1 and 0.001. n_iter (int, optional): Number of iterations to perform the fit, recommended value between 5 and 10.
373 def integrate_area(self, area, rule="trapz"): 374 """Integrate the spectrum within given area 375 376 Args: 377 area (Tuple[float, float]): Tuple with left and right border (X axis 378 obviously) for the desired area. 379 rule (str): Method for integration, "trapz" - trapezoidal 380 rule (default), "simps" - Simpson's rule. 381 Returns: 382 float: Definite integral within given area as approximated by given 383 method. 384 """ 385 386 # closest value in experimental data and its index in data array 387 _, left_idx = find_nearest_value_index(self.x, area[0]) 388 _, right_idx = find_nearest_value_index(self.x, area[1]) 389 390 if rule == "trapz": 391 return integrate.trapz( 392 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 393 ) 394 395 elif rule == "simps": 396 return integrate.simps( 397 self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1] 398 ) 399 400 else: 401 raise ValueError( 402 'Only trapezoidal "trapz" or Simpson\'s "simps" \ 403rules are supported!' 404 )
Integrate the spectrum within given area
Args: area (Tuple[float, float]): Tuple with left and right border (X axis obviously) for the desired area. rule (str): Method for integration, "trapz" - trapezoidal rule (default), "simps" - Simpson's rule. Returns: float: Definite integral within given area as approximated by given method.
406 def integrate_peak(self, peak, rule="trapz"): 407 """Calculate an area for a given peak 408 409 Args: 410 peak (float): (rounded) peak Y coordinate. If precise peak position 411 was not found, closest is picked. 412 rule (str): Method for integration, "trapz" - trapezoidal 413 rule (default), "simps" - Simpson's rule. 414 Returns: 415 float: Definite integral within given area as approximated by given 416 method. 417 """ 418 419 if self.peaks is None: 420 self.find_peaks() 421 422 true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak) 423 _, _, _, left, right = self.peaks[idx] 424 425 self.logger.debug( 426 "Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right 427 ) 428 429 return self.integrate_area((left, right), rule=rule)
Calculate an area for a given peak
Args: peak (float): (rounded) peak Y coordinate. If precise peak position was not found, closest is picked. rule (str): Method for integration, "trapz" - trapezoidal rule (default), "simps" - Simpson's rule. Returns: float: Definite integral within given area as approximated by given method.
431 def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True): 432 """Smoothes the spectrum using Savitsky-Golay filter. 433 434 For details see scipy.signal.savgol_filter. 435 436 Default values for window length and polynomial order were chosen 437 arbitrary based on Raman spectra. 438 439 Args: 440 window_length (int): The length of the filter window (i.e. the 441 number of coefficients). window_length must be a positive odd 442 integer. 443 polyorder (int): The order of the polynomial used to fit the 444 samples. polyorder must be less than window_length. 445 in_place (bool, optional): If smoothing happens in place, returns 446 smoothed spectrum if True. 447 """ 448 449 if in_place: 450 self.y = signal.savgol_filter( 451 self.y, window_length=window_length, polyorder=polyorder 452 ) 453 return True 454 455 return signal.savgol_filter( 456 self.y, 457 window_length=window_length, 458 polyorder=polyorder, 459 )
Smoothes the spectrum using Savitsky-Golay filter.
For details see scipy.signal.savgol_filter.
Default values for window length and polynomial order were chosen arbitrary based on Raman spectra.
Args: window_length (int): The length of the filter window (i.e. the number of coefficients). window_length must be a positive odd integer. polyorder (int): The order of the polynomial used to fit the samples. polyorder must be less than window_length. in_place (bool, optional): If smoothing happens in place, returns smoothed spectrum if True.
461 def default_processing(self): 462 """Dummy method to return spectral data. 463 464 Normally redefined in ancestor classes to include basic processing for 465 specific spectrum type. 466 467 Returns: 468 Tuple[np.array, np.array, float]: Spectral data as X and Y 469 coordinates and a timestamp. 470 """ 471 472 return self.x, self.y, self.timestamp
Dummy method to return spectral data.
Normally redefined in ancestor classes to include basic processing for specific spectrum type.
Returns: Tuple[np.array, np.array, float]: Spectral data as X and Y coordinates and a timestamp.
474 @classmethod 475 def from_data(cls, data): 476 """Class method to instantiate the class from the saved data file. 477 478 Args: 479 data (str): Path to spectral data file (as pickle). 480 481 Returns: 482 New instance with all data inside. 483 """ 484 485 if "pickle" not in data: 486 raise AttributeError("Only .pickle files are supported") 487 488 path = os.path.abspath(os.path.dirname(data)) 489 490 spec = cls(path) 491 spec.load_data(data) 492 493 return spec
Class method to instantiate the class from the saved data file.
Args: data (str): Path to spectral data file (as pickle).
Returns: New instance with all data inside.
495 def copy(self): 496 """Dummy class to return a new instance with the same data as the 497 current. 498 499 Returns: 500 (:obj:SpinsolveNMRSpectrum): New object with the same data. 501 """ 502 503 # creating new instance 504 spec = self.__class__(self.path, self.autosaving) 505 506 # loading attributes 507 for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES): 508 setattr(spec, prop, getattr(self, prop)) 509 510 return spec
Dummy class to return a new instance with the same data as the current.
Returns: (:obj:SpinsolveNMRSpectrum): New object with the same data.