Coverage for C:\Users\babdulkadirola\OneDrive - Delft University of Technology\Desktop\nmrlineshapeanalyser\src\core.py: 95%
232 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-04 16:48 +0100
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-04 16:48 +0100
1import nmrglue as ng
2import numpy as np
3from scipy.optimize import curve_fit
4import matplotlib.pyplot as plt
5import matplotlib as mpl
6from typing import List, Tuple, Dict, Optional, Union
7import warnings
8import pandas as pd
10class NMRProcessor:
11 """
12 A comprehensive class for processing and analyzing NMR data.
13 It combines data loading, region selection, peak fitting, and visualization.
14 """
16 def __init__(self):
17 """Initialize the NMR processor with default plot style."""
18 self.data = None
19 self.number = None
20 self.nucleus = None
21 self.uc = None
22 self.ppm = None
23 self.ppm_limits = None
24 self.fixed_params = None
25 self.carrier_freq = None
26 self.set_plot_style()
28 @staticmethod
29 def set_plot_style() -> None:
30 """Set up the matplotlib plotting style."""
31 mpl.rcParams['font.family'] = "sans-serif"
32 plt.rcParams['font.sans-serif'] = ['Arial']
33 plt.rcParams['font.size'] = 14
34 plt.rcParams['axes.linewidth'] = 2
35 mpl.rcParams['xtick.major.size'] = mpl.rcParams['ytick.major.size'] = 8
36 mpl.rcParams['xtick.major.width'] = mpl.rcParams['ytick.major.width'] = 1
37 mpl.rcParams['xtick.direction'] = mpl.rcParams['ytick.direction'] = 'out'
38 mpl.rcParams['xtick.major.top'] = mpl.rcParams['ytick.major.right'] = False
39 mpl.rcParams['xtick.minor.size'] = mpl.rcParams['ytick.minor.size'] = 5
40 mpl.rcParams['xtick.minor.width'] = mpl.rcParams['ytick.minor.width'] = 1
41 mpl.rcParams['xtick.top'] = mpl.rcParams['ytick.right'] = True
43 def load_data(self, filepath: str) -> None:
44 """
45 Load and process Bruker NMR data from the specified filepath.
47 Args:
48 filepath (str): Path to the Bruker data directory
49 """
50 # Read the Bruker data
51 dic, self.data = ng.bruker.read_pdata(filepath)
53 # Set the spectral parameters
54 udic = ng.bruker.guess_udic(dic, self.data)
55 nuclei = udic[0]['label']
57 carrier_freq = udic[0]['obs']
59 self.carrier_freq = carrier_freq
60 # Extract number and nucleus symbols
61 self.number = ''.join(filter(str.isdigit, nuclei))
62 self.nucleus = ''.join(filter(str.isalpha, nuclei))
64 # Create converter and get scales
65 self.uc = ng.fileiobase.uc_from_udic(udic, dim=0)
66 self.ppm = self.uc.ppm_scale()
67 self.ppm_limits = self.uc.ppm_limits()
69 def select_region(self, ppm_start: float, ppm_end: float) -> Tuple[np.ndarray, np.ndarray]:
70 """
71 Select a specific region of the NMR spectrum for analysis.
73 Args:
74 ppm_start (float): Starting chemical shift value
75 ppm_end (float): Ending chemical shift value
77 Returns:
78 Tuple containing x and y data for the selected region
79 """
80 if self.data is None:
81 raise ValueError("No data loaded. Call load_data first.")
83 if ppm_start > np.max(self.ppm) or ppm_end < np.min(self.ppm):
84 raise ValueError(f"Selected region ({ppm_start}, {ppm_end}) is outside "
85 f"data range ({np.min(self.ppm)}, {np.max(self.ppm)})")
87 region_mask = (self.ppm >= ppm_start) & (self.ppm <= ppm_end)
88 x_region = self.ppm[region_mask]
89 y_real = self.data.real
90 y_region = y_real[region_mask]
92 return x_region, y_region
94 def normalize_data(self, x_data: np.ndarray, y_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
95 """
96 Normalize the data for processing.
98 Args:
99 x_data (np.ndarray): X-axis data
100 y_data (np.ndarray): Y-axis data
102 Returns:
103 Tuple containing normalized x and y data
104 """
105 # Convert to float type to avoid integer division issues
106 y_data = y_data.astype(float)
107 y_ground = np.min(y_data)
108 y_normalized = y_data - y_ground
109 y_amp = np.max(y_normalized)
112 # Handle the case where all values are the same (y_amp would be 0)
113 if y_amp != 0:
114 y_normalized /= y_amp
116 return x_data, y_normalized
118 @staticmethod
119 def pseudo_voigt(x: np.ndarray, x0: float, amp: float, width: float, eta: float) -> np.ndarray:
120 """
121 Calculate the Pseudo-Voigt function.
123 Args:
124 x (np.ndarray): X-axis values
125 x0 (float): Peak center
126 amp (float): Peak amplitude
127 width (float): Peak width (FWHM)
128 eta (float): Mixing parameter (0 for Gaussian, 1 for Lorentzian)
130 Returns:
131 np.ndarray: Calculated Pseudo-Voigt values
132 """
133 sigma = width / (2 * np.sqrt(2 * np.log(2)))
134 gamma = width / 2
135 lorentzian = amp * (gamma**2 / ((x - x0)**2 + gamma**2))
136 gaussian = amp * np.exp(-0.5 * ((x - x0) / sigma)**2)
137 return eta * lorentzian + (1 - eta) * gaussian
139 def pseudo_voigt_multiple(self, x: np.ndarray, *params) -> np.ndarray:
140 """
141 Calculate multiple Pseudo-Voigt peaks.
143 Args:
144 x (np.ndarray): X-axis values
145 *params: Variable number of peak parameters
147 Returns:
148 np.ndarray: Sum of all Pseudo-Voigt peaks
149 """
150 n_peaks = len(self.fixed_params)
151 param_idx = 0
152 y = np.zeros_like(x)
154 for i in range(n_peaks):
155 if self.fixed_params[i][0] is not None:
156 x0 = self.fixed_params[i][0]
157 amp, width, eta, offset = params[param_idx:param_idx + 4]
158 param_idx += 4
159 else:
160 x0, amp, width, eta, offset = params[param_idx:param_idx + 5]
161 param_idx += 5
163 y += self.pseudo_voigt(x, x0, amp, width, eta) + offset
165 return y
167 def fit_peaks(self, x_data: np.ndarray, y_data: np.ndarray,
168 initial_params: List[float], fixed_x0: Optional[List[bool]] = None) -> Tuple[np.ndarray, List[Dict], np.ndarray]:
169 """
170 Fit multiple Pseudo-Voigt peaks to the data.
172 Args:
173 x_data (np.ndarray): X-axis data
174 y_data (np.ndarray): Y-axis data
175 initial_params (List[float]): Initial peak parameters
176 fixed_x0 (Optional[List[bool]]): Which x0 positions to fix
179 Returns:
180 Tuple containing optimized parameters, peak metrics, and fitted data
181 """
182 # Input validation
183 if len(initial_params) % 5 != 0:
184 raise ValueError("Number of initial parameters must be divisible by 5")
186 if fixed_x0 is None:
187 fixed_x0 = [False] * (len(initial_params) // 5)
189 # Setup for fitting
190 n_peaks = len(initial_params) // 5
191 self.fixed_params = []
192 fit_params = []
193 lower_bounds = []
194 upper_bounds = []
196 # Process each peak's parameters
197 for i in range(n_peaks):
198 x0, amp, width, eta, offset = initial_params[5*i:5*(i+1)]
200 if fixed_x0[i]:
201 self.fixed_params.append((x0, None, None, None, None))
202 fit_params.extend([amp, width, eta, offset])
203 lower_bounds.extend([0, 1, 0, -np.inf])
204 upper_bounds.extend([np.inf, np.inf, 1, np.inf])
205 else:
206 self.fixed_params.append((None, None, None, None, None))
207 fit_params.extend([x0, amp, width, eta, offset])
208 lower_bounds.extend([x0 - width/2, 0, 1, 0, -np.inf])
209 upper_bounds.extend([x0 + width/2, np.inf, np.inf, 1, np.inf])
211 # Perform the fit
212 with warnings.catch_warnings():
213 warnings.filterwarnings('ignore', category=RuntimeWarning)
214 popt, pcov = curve_fit(self.pseudo_voigt_multiple, x_data, y_data,
215 p0=fit_params, bounds=(lower_bounds, upper_bounds),
216 maxfev=10000, method='trf')
218 # Process results
219 full_popt = self._process_fit_results(popt, initial_params, fixed_x0)
220 peak_metrics = self.calculate_peak_metrics(full_popt, pcov, fixed_x0)
221 fitted_data = self.pseudo_voigt_multiple(x_data, *popt)
223 return full_popt, peak_metrics, fitted_data
225 def _process_fit_results(self, popt: np.ndarray, initial_params: List[float],
226 fixed_x0: List[bool]) -> np.ndarray:
227 """Process and organize fitting results."""
228 full_popt = []
229 param_idx = 0
230 n_peaks = len(initial_params) // 5
232 for i in range(n_peaks):
233 if fixed_x0[i]:
234 x0 = initial_params[5*i]
235 amp, width, eta, offset = popt[param_idx:param_idx + 4]
236 param_idx += 4
237 else:
238 x0, amp, width, eta, offset = popt[param_idx:param_idx + 5]
239 param_idx += 5
240 full_popt.extend([x0, amp, width, eta, offset])
242 return np.array(full_popt)
244 def calculate_peak_metrics(self, popt: np.ndarray, pcov: np.ndarray,
245 fixed_x0: List[bool]) -> List[Dict]:
246 """
247 Calculate metrics for each fitted peak.
249 Args:
250 popt (np.ndarray): Optimized parameters
251 pcov (np.ndarray): Covariance matrix
252 fixed_x0 (List[bool]): Which x0 positions were fixed
254 Returns:
255 List[Dict]: Metrics for each peak
256 """
257 n_peaks = len(popt) // 5
258 peak_results = []
259 errors = np.sqrt(np.diag(pcov)) if pcov.size else np.zeros_like(popt)
260 error_idx = 0
262 for i in range(n_peaks):
263 # Extract parameters for current peak
264 x0, amp, width, eta, offset = popt[5*i:5*(i+1)]
266 # Calculate errors based on whether x0 was fixed
267 if fixed_x0[i]:
268 x0_err = 0
269 amp_err, width_err, eta_err, offset_err = errors[error_idx:error_idx + 4]
270 error_idx += 4
271 else:
272 x0_err, amp_err, width_err, eta_err, offset_err = errors[error_idx:error_idx + 5]
273 error_idx += 5
275 # Calculate areas and their errors
276 sigma = width / (2 * np.sqrt(2 * np.log(2)))
277 gamma = width / 2
279 gauss_area = (1 - eta) * amp * sigma * np.sqrt(2 * np.pi)
280 lorentz_area = eta * amp * np.pi * gamma
281 total_area = gauss_area + lorentz_area
283 # Calculate error propagation
284 gauss_area_err = np.sqrt(
285 ((1 - eta) * sigma * np.sqrt(2 * np.pi) * amp_err) ** 2 +
286 (amp * sigma * np.sqrt(2 * np.pi) * eta_err) ** 2 +
287 ((1 - eta) * amp * np.sqrt(2 * np.pi) * (width_err / (2 * np.sqrt(2 * np.log(2))))) ** 2
288 )
290 lorentz_area_err = np.sqrt(
291 (eta * np.pi * gamma * amp_err) ** 2 +
292 (amp * np.pi * gamma * eta_err) ** 2 +
293 (eta * amp * np.pi * (width_err / 2)) ** 2
294 )
296 total_area_err = np.sqrt(gauss_area_err ** 2 + lorentz_area_err ** 2)
298 # Store results
299 peak_results.append({
300 'x0': (x0, x0_err),
301 'amplitude': (amp, amp_err),
302 'width': (width, width_err),
303 'eta': (eta, eta_err),
304 'offset': (offset, offset_err),
305 'gaussian_area': (gauss_area, gauss_area_err),
306 'lorentzian_area': (lorentz_area, lorentz_area_err),
307 'total_area': (total_area, total_area_err)
308 })
310 return peak_results
312 def plot_results(self, x_data: np.ndarray, y_data: np.ndarray,
313 fitted_data: np.ndarray, peak_metrics: List[Dict],
314 popt: np.ndarray) -> Tuple[plt.Figure, Tuple[plt.Axes, plt.Axes], List[np.ndarray]]:
315 """
316 Plot the fitting results with components.
318 Args:
319 x_data (np.ndarray): X-axis data
320 y_data (np.ndarray): Y-axis data
321 fitted_data (np.ndarray): Fitted curve data
322 peak_metrics (List[Dict]): Peak metrics
323 popt (np.ndarray): Optimized parameters
326 Returns:
327 Tuple containing figure, axes, and components
328 """
329 fig, (ax1) = plt.subplots(1, 1, figsize=(12, 10))
331 # Plot normalized data
332 ax1.plot(x_data, y_data, 'ok', ms=1, label='Data')
333 ax1.plot(x_data, fitted_data, '-r', lw=2, label='Fit')
334 residuals = y_data - fitted_data
335 ax1.plot(x_data, residuals-0.05, '-g', lw=2, label='Residuals', alpha=0.5)
337 # Plot components
338 n_peaks = len(popt) // 5
339 components = []
341 for i in range(n_peaks):
342 x0, amp, width, eta, offset = popt[5*i:5*(i+1)]
343 component = self.pseudo_voigt(x_data, x0, amp, width, eta)
344 components.append(component)
346 ax1.fill(x_data, component, alpha=0.5, label=f'Component {i+1}')
347 ax1.plot(x0, self.pseudo_voigt(np.array([x0]), x0, amp, width, eta),
348 'ob', label='Peak Position' if i == 0 else None)
350 ax1.invert_xaxis()
351 ax1.legend(ncol=2, fontsize=10)
352 ax1.set_title('Normalized Scale')
353 ax1.set_xlabel(f'$^{{{self.number}}} \\ {self.nucleus}$ chemical shift (ppm)')
354 ax1.hlines(0, x_data[0], x_data[-1], colors='blue', linestyles='dashed', alpha=0.5)
358 plt.tight_layout()
360 self._print_detailed_results(peak_metrics)
362 return fig, ax1, components
364 def _print_detailed_results(self, peak_metrics: List[Dict]) -> None:
365 """Print detailed fitting results and statistics."""
366 print("\nPeak Fitting Results:")
367 print("===================")
369 area_of_peaks = []
370 for i, metrics in enumerate(peak_metrics, 1):
371 print(f"\nPeak {i} (Position: {metrics['x0'][0]:.2f} ± {metrics['x0'][1]:.2f}):")
372 print(f"Amplitude: {metrics['amplitude'][0]:.3f} ± {metrics['amplitude'][1]:.3f}")
373 print(f"Width: {metrics['width'][0]:.2f} ± {metrics['width'][1]:.2f} in ppm")
374 print(f"Width: {metrics['width'][0]*self.carrier_freq:.2f} ± {metrics['width'][1]*self.carrier_freq:.2f} in Hz")
375 print(f"Eta: {metrics['eta'][0]:.2f} ± {metrics['eta'][1]:.2f}")
376 print(f"Offset: {metrics['offset'][0]:.3f} ± {metrics['offset'][1]:.3f}")
377 print(f"Gaussian Area: {metrics['gaussian_area'][0]:.2f} ± {metrics['gaussian_area'][1]:.2f}")
378 print(f"Lorentzian Area: {metrics['lorentzian_area'][0]:.2f} ± {metrics['lorentzian_area'][1]:.2f}")
379 print(f"Total Area: {metrics['total_area'][0]:.2f} ± {metrics['total_area'][1]:.2f}")
380 print("-" * 50)
381 area_of_peaks.append(metrics['total_area'])
383 self._calculate_and_print_percentages(area_of_peaks)
385 def _calculate_and_print_percentages(self, area_of_peaks: List[Tuple[float, float]]) -> None:
386 """Calculate and print percentage contributions of each peak."""
387 total_area_sum = sum(area[0] for area in area_of_peaks)
388 total_area_sum_err = np.sqrt(sum(area[1]**2 for area in area_of_peaks))
390 overall_percentage = []
391 for i, (area, area_err) in enumerate(area_of_peaks, 1):
392 percentage = (area / total_area_sum) * 100
393 percentage_err = percentage * np.sqrt((area_err / area) ** 2 +
394 (total_area_sum_err / total_area_sum) ** 2)
395 print(f'Peak {i} Percentage is {percentage:.2f}% ± {percentage_err:.2f}%')
396 overall_percentage.append((percentage, percentage_err))
398 overall_percentage_sum = sum(p[0] for p in overall_percentage)
399 overall_percentage_sum_err = np.sqrt(sum(p[1]**2 for p in overall_percentage))
400 print(f'Overall Percentage is {overall_percentage_sum:.2f}% ± {overall_percentage_sum_err:.2f}%')
402 def save_results(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray,
403 fitted_data: np.ndarray, peak_metrics: List[Dict],
404 popt: np.ndarray, components: List[np.ndarray]) -> None:
405 """
406 Save all results to files.
408 Args:
409 filepath (str): Base path for saving files
410 Other parameters as in plot_results
411 """
412 self._save_peak_data(filepath, x_data, y_data, fitted_data, components)
413 self._save_metrics(filepath, peak_metrics)
414 self._save_plot(filepath, x_data, y_data, fitted_data, peak_metrics,
415 popt)
417 def _save_peak_data(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray,
418 fitted_data: np.ndarray, components: List[np.ndarray]) -> None:
419 """Save peak data to CSV file."""
420 df = pd.DataFrame({'x_data': x_data, 'y_data': y_data, 'y_fit': fitted_data})
422 for i, component in enumerate(components):
423 df[f'component_{i+1}'] = component
425 df.to_csv(filepath + 'peak_data.csv', index=False)
427 def _save_metrics(self, filepath: str, peak_metrics: List[Dict]) -> None:
428 """Save peak metrics and percentages to text file."""
429 with open(filepath + 'pseudoVoigtPeak_metrics.txt', 'w') as file:
430 area_of_peaks = []
431 for i, metrics in enumerate(peak_metrics, 1):
432 file.write(f"\nPeak {i} (Position: {metrics['x0'][0]:.2f} ± {metrics['x0'][1]:.2f}):\n")
433 file.write(f"Amplitude: {metrics['amplitude'][0]:.3f} ± {metrics['amplitude'][1]:.3f}\n")
434 file.write(f"Width: {metrics['width'][0]:.2f} ± {metrics['width'][1]:.2f} in ppm\n")
435 file.write(f"Width: {metrics['width'][0]*self.carrier_freq:.2f} ± {metrics['width'][1]*self.carrier_freq:.2f} in Hz\n")
436 file.write(f"Eta: {metrics['eta'][0]:.2f} ± {metrics['eta'][1]:.2f}\n")
437 file.write(f"Offset: {metrics['offset'][0]:.3f} ± {metrics['offset'][1]:.3f}\n")
438 file.write(f"Gaussian Area: {metrics['gaussian_area'][0]:.2f} ± {metrics['gaussian_area'][1]:.2f}\n")
439 file.write(f"Lorentzian Area: {metrics['lorentzian_area'][0]:.2f} ± {metrics['lorentzian_area'][1]:.2f}\n")
440 file.write(f"Total Area: {metrics['total_area'][0]:.2f} ± {metrics['total_area'][1]:.2f}\n")
441 file.write("\n" + "-" * 50 + "\n")
442 area_of_peaks.append(metrics['total_area'])
444 # Write percentages
445 total_area_sum = sum(area[0] for area in area_of_peaks)
446 total_area_sum_err = np.sqrt(sum(area[1]**2 for area in area_of_peaks))
448 for i, (area, area_err) in enumerate(area_of_peaks, 1):
449 percentage = (area / total_area_sum) * 100
450 percentage_err = percentage * np.sqrt((area_err / area) ** 2 +
451 (total_area_sum_err / total_area_sum) ** 2)
452 file.write(f'Peak {i} Percentage is {percentage:.2f}% ± {percentage_err:.2f}%\n')
454 overall_percentage = sum((area[0] / total_area_sum) * 100 for area in area_of_peaks)
455 file.write(f'Overall Percentage is {overall_percentage:.2f}%\n')
457 def _save_plot(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray,
458 fitted_data: np.ndarray, peak_metrics: List[Dict],
459 popt: np.ndarray) -> None:
460 """Save the plot to a file."""
461 fig, _, _ = self.plot_results(x_data, y_data, fitted_data, peak_metrics,
462 popt)
463 fig.savefig(filepath + 'pseudoVoigtPeakFit.png', bbox_inches='tight')
464 plt.close(fig)