Coverage for C:\Users\babdulkadirola\OneDrive - Delft University of Technology\Desktop\nmrlineshapeanalyser\src\core.py: 95%

232 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-12-04 16:48 +0100

1import nmrglue as ng 

2import numpy as np 

3from scipy.optimize import curve_fit 

4import matplotlib.pyplot as plt 

5import matplotlib as mpl 

6from typing import List, Tuple, Dict, Optional, Union 

7import warnings 

8import pandas as pd 

9 

10class NMRProcessor: 

11 """ 

12 A comprehensive class for processing and analyzing NMR data. 

13 It combines data loading, region selection, peak fitting, and visualization. 

14 """ 

15 

16 def __init__(self): 

17 """Initialize the NMR processor with default plot style.""" 

18 self.data = None 

19 self.number = None 

20 self.nucleus = None 

21 self.uc = None 

22 self.ppm = None 

23 self.ppm_limits = None 

24 self.fixed_params = None 

25 self.carrier_freq = None 

26 self.set_plot_style() 

27 

28 @staticmethod 

29 def set_plot_style() -> None: 

30 """Set up the matplotlib plotting style.""" 

31 mpl.rcParams['font.family'] = "sans-serif" 

32 plt.rcParams['font.sans-serif'] = ['Arial'] 

33 plt.rcParams['font.size'] = 14 

34 plt.rcParams['axes.linewidth'] = 2 

35 mpl.rcParams['xtick.major.size'] = mpl.rcParams['ytick.major.size'] = 8 

36 mpl.rcParams['xtick.major.width'] = mpl.rcParams['ytick.major.width'] = 1 

37 mpl.rcParams['xtick.direction'] = mpl.rcParams['ytick.direction'] = 'out' 

38 mpl.rcParams['xtick.major.top'] = mpl.rcParams['ytick.major.right'] = False 

39 mpl.rcParams['xtick.minor.size'] = mpl.rcParams['ytick.minor.size'] = 5 

40 mpl.rcParams['xtick.minor.width'] = mpl.rcParams['ytick.minor.width'] = 1 

41 mpl.rcParams['xtick.top'] = mpl.rcParams['ytick.right'] = True 

42 

43 def load_data(self, filepath: str) -> None: 

44 """ 

45 Load and process Bruker NMR data from the specified filepath. 

46  

47 Args: 

48 filepath (str): Path to the Bruker data directory 

49 """ 

50 # Read the Bruker data 

51 dic, self.data = ng.bruker.read_pdata(filepath) 

52 

53 # Set the spectral parameters 

54 udic = ng.bruker.guess_udic(dic, self.data) 

55 nuclei = udic[0]['label'] 

56 

57 carrier_freq = udic[0]['obs'] 

58 

59 self.carrier_freq = carrier_freq 

60 # Extract number and nucleus symbols 

61 self.number = ''.join(filter(str.isdigit, nuclei)) 

62 self.nucleus = ''.join(filter(str.isalpha, nuclei)) 

63 

64 # Create converter and get scales 

65 self.uc = ng.fileiobase.uc_from_udic(udic, dim=0) 

66 self.ppm = self.uc.ppm_scale() 

67 self.ppm_limits = self.uc.ppm_limits() 

68 

69 def select_region(self, ppm_start: float, ppm_end: float) -> Tuple[np.ndarray, np.ndarray]: 

70 """ 

71 Select a specific region of the NMR spectrum for analysis. 

72  

73 Args: 

74 ppm_start (float): Starting chemical shift value 

75 ppm_end (float): Ending chemical shift value 

76  

77 Returns: 

78 Tuple containing x and y data for the selected region 

79 """ 

80 if self.data is None: 

81 raise ValueError("No data loaded. Call load_data first.") 

82 

83 if ppm_start > np.max(self.ppm) or ppm_end < np.min(self.ppm): 

84 raise ValueError(f"Selected region ({ppm_start}, {ppm_end}) is outside " 

85 f"data range ({np.min(self.ppm)}, {np.max(self.ppm)})") 

86 

87 region_mask = (self.ppm >= ppm_start) & (self.ppm <= ppm_end) 

88 x_region = self.ppm[region_mask] 

89 y_real = self.data.real 

90 y_region = y_real[region_mask] 

91 

92 return x_region, y_region 

93 

94 def normalize_data(self, x_data: np.ndarray, y_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 

95 """ 

96 Normalize the data for processing. 

97  

98 Args: 

99 x_data (np.ndarray): X-axis data 

100 y_data (np.ndarray): Y-axis data 

101  

102 Returns: 

103 Tuple containing normalized x and y data 

104 """ 

105 # Convert to float type to avoid integer division issues 

106 y_data = y_data.astype(float) 

107 y_ground = np.min(y_data) 

108 y_normalized = y_data - y_ground 

109 y_amp = np.max(y_normalized) 

110 

111 

112 # Handle the case where all values are the same (y_amp would be 0) 

113 if y_amp != 0: 

114 y_normalized /= y_amp 

115 

116 return x_data, y_normalized 

117 

118 @staticmethod 

119 def pseudo_voigt(x: np.ndarray, x0: float, amp: float, width: float, eta: float) -> np.ndarray: 

120 """ 

121 Calculate the Pseudo-Voigt function. 

122  

123 Args: 

124 x (np.ndarray): X-axis values 

125 x0 (float): Peak center 

126 amp (float): Peak amplitude 

127 width (float): Peak width (FWHM) 

128 eta (float): Mixing parameter (0 for Gaussian, 1 for Lorentzian) 

129  

130 Returns: 

131 np.ndarray: Calculated Pseudo-Voigt values 

132 """ 

133 sigma = width / (2 * np.sqrt(2 * np.log(2))) 

134 gamma = width / 2 

135 lorentzian = amp * (gamma**2 / ((x - x0)**2 + gamma**2)) 

136 gaussian = amp * np.exp(-0.5 * ((x - x0) / sigma)**2) 

137 return eta * lorentzian + (1 - eta) * gaussian 

138 

139 def pseudo_voigt_multiple(self, x: np.ndarray, *params) -> np.ndarray: 

140 """ 

141 Calculate multiple Pseudo-Voigt peaks. 

142  

143 Args: 

144 x (np.ndarray): X-axis values 

145 *params: Variable number of peak parameters 

146  

147 Returns: 

148 np.ndarray: Sum of all Pseudo-Voigt peaks 

149 """ 

150 n_peaks = len(self.fixed_params) 

151 param_idx = 0 

152 y = np.zeros_like(x) 

153 

154 for i in range(n_peaks): 

155 if self.fixed_params[i][0] is not None: 

156 x0 = self.fixed_params[i][0] 

157 amp, width, eta, offset = params[param_idx:param_idx + 4] 

158 param_idx += 4 

159 else: 

160 x0, amp, width, eta, offset = params[param_idx:param_idx + 5] 

161 param_idx += 5 

162 

163 y += self.pseudo_voigt(x, x0, amp, width, eta) + offset 

164 

165 return y 

166 

167 def fit_peaks(self, x_data: np.ndarray, y_data: np.ndarray, 

168 initial_params: List[float], fixed_x0: Optional[List[bool]] = None) -> Tuple[np.ndarray, List[Dict], np.ndarray]: 

169 """ 

170 Fit multiple Pseudo-Voigt peaks to the data. 

171  

172 Args: 

173 x_data (np.ndarray): X-axis data 

174 y_data (np.ndarray): Y-axis data 

175 initial_params (List[float]): Initial peak parameters 

176 fixed_x0 (Optional[List[bool]]): Which x0 positions to fix 

177  

178  

179 Returns: 

180 Tuple containing optimized parameters, peak metrics, and fitted data 

181 """ 

182 # Input validation 

183 if len(initial_params) % 5 != 0: 

184 raise ValueError("Number of initial parameters must be divisible by 5") 

185 

186 if fixed_x0 is None: 

187 fixed_x0 = [False] * (len(initial_params) // 5) 

188 

189 # Setup for fitting 

190 n_peaks = len(initial_params) // 5 

191 self.fixed_params = [] 

192 fit_params = [] 

193 lower_bounds = [] 

194 upper_bounds = [] 

195 

196 # Process each peak's parameters 

197 for i in range(n_peaks): 

198 x0, amp, width, eta, offset = initial_params[5*i:5*(i+1)] 

199 

200 if fixed_x0[i]: 

201 self.fixed_params.append((x0, None, None, None, None)) 

202 fit_params.extend([amp, width, eta, offset]) 

203 lower_bounds.extend([0, 1, 0, -np.inf]) 

204 upper_bounds.extend([np.inf, np.inf, 1, np.inf]) 

205 else: 

206 self.fixed_params.append((None, None, None, None, None)) 

207 fit_params.extend([x0, amp, width, eta, offset]) 

208 lower_bounds.extend([x0 - width/2, 0, 1, 0, -np.inf]) 

209 upper_bounds.extend([x0 + width/2, np.inf, np.inf, 1, np.inf]) 

210 

211 # Perform the fit 

212 with warnings.catch_warnings(): 

213 warnings.filterwarnings('ignore', category=RuntimeWarning) 

214 popt, pcov = curve_fit(self.pseudo_voigt_multiple, x_data, y_data, 

215 p0=fit_params, bounds=(lower_bounds, upper_bounds), 

216 maxfev=10000, method='trf') 

217 

218 # Process results 

219 full_popt = self._process_fit_results(popt, initial_params, fixed_x0) 

220 peak_metrics = self.calculate_peak_metrics(full_popt, pcov, fixed_x0) 

221 fitted_data = self.pseudo_voigt_multiple(x_data, *popt) 

222 

223 return full_popt, peak_metrics, fitted_data 

224 

225 def _process_fit_results(self, popt: np.ndarray, initial_params: List[float], 

226 fixed_x0: List[bool]) -> np.ndarray: 

227 """Process and organize fitting results.""" 

228 full_popt = [] 

229 param_idx = 0 

230 n_peaks = len(initial_params) // 5 

231 

232 for i in range(n_peaks): 

233 if fixed_x0[i]: 

234 x0 = initial_params[5*i] 

235 amp, width, eta, offset = popt[param_idx:param_idx + 4] 

236 param_idx += 4 

237 else: 

238 x0, amp, width, eta, offset = popt[param_idx:param_idx + 5] 

239 param_idx += 5 

240 full_popt.extend([x0, amp, width, eta, offset]) 

241 

242 return np.array(full_popt) 

243 

244 def calculate_peak_metrics(self, popt: np.ndarray, pcov: np.ndarray, 

245 fixed_x0: List[bool]) -> List[Dict]: 

246 """ 

247 Calculate metrics for each fitted peak. 

248  

249 Args: 

250 popt (np.ndarray): Optimized parameters 

251 pcov (np.ndarray): Covariance matrix 

252 fixed_x0 (List[bool]): Which x0 positions were fixed 

253  

254 Returns: 

255 List[Dict]: Metrics for each peak 

256 """ 

257 n_peaks = len(popt) // 5 

258 peak_results = [] 

259 errors = np.sqrt(np.diag(pcov)) if pcov.size else np.zeros_like(popt) 

260 error_idx = 0 

261 

262 for i in range(n_peaks): 

263 # Extract parameters for current peak 

264 x0, amp, width, eta, offset = popt[5*i:5*(i+1)] 

265 

266 # Calculate errors based on whether x0 was fixed 

267 if fixed_x0[i]: 

268 x0_err = 0 

269 amp_err, width_err, eta_err, offset_err = errors[error_idx:error_idx + 4] 

270 error_idx += 4 

271 else: 

272 x0_err, amp_err, width_err, eta_err, offset_err = errors[error_idx:error_idx + 5] 

273 error_idx += 5 

274 

275 # Calculate areas and their errors 

276 sigma = width / (2 * np.sqrt(2 * np.log(2))) 

277 gamma = width / 2 

278 

279 gauss_area = (1 - eta) * amp * sigma * np.sqrt(2 * np.pi) 

280 lorentz_area = eta * amp * np.pi * gamma 

281 total_area = gauss_area + lorentz_area 

282 

283 # Calculate error propagation 

284 gauss_area_err = np.sqrt( 

285 ((1 - eta) * sigma * np.sqrt(2 * np.pi) * amp_err) ** 2 + 

286 (amp * sigma * np.sqrt(2 * np.pi) * eta_err) ** 2 + 

287 ((1 - eta) * amp * np.sqrt(2 * np.pi) * (width_err / (2 * np.sqrt(2 * np.log(2))))) ** 2 

288 ) 

289 

290 lorentz_area_err = np.sqrt( 

291 (eta * np.pi * gamma * amp_err) ** 2 + 

292 (amp * np.pi * gamma * eta_err) ** 2 + 

293 (eta * amp * np.pi * (width_err / 2)) ** 2 

294 ) 

295 

296 total_area_err = np.sqrt(gauss_area_err ** 2 + lorentz_area_err ** 2) 

297 

298 # Store results 

299 peak_results.append({ 

300 'x0': (x0, x0_err), 

301 'amplitude': (amp, amp_err), 

302 'width': (width, width_err), 

303 'eta': (eta, eta_err), 

304 'offset': (offset, offset_err), 

305 'gaussian_area': (gauss_area, gauss_area_err), 

306 'lorentzian_area': (lorentz_area, lorentz_area_err), 

307 'total_area': (total_area, total_area_err) 

308 }) 

309 

310 return peak_results 

311 

312 def plot_results(self, x_data: np.ndarray, y_data: np.ndarray, 

313 fitted_data: np.ndarray, peak_metrics: List[Dict], 

314 popt: np.ndarray) -> Tuple[plt.Figure, Tuple[plt.Axes, plt.Axes], List[np.ndarray]]: 

315 """ 

316 Plot the fitting results with components. 

317  

318 Args: 

319 x_data (np.ndarray): X-axis data 

320 y_data (np.ndarray): Y-axis data 

321 fitted_data (np.ndarray): Fitted curve data 

322 peak_metrics (List[Dict]): Peak metrics 

323 popt (np.ndarray): Optimized parameters 

324  

325  

326 Returns: 

327 Tuple containing figure, axes, and components 

328 """ 

329 fig, (ax1) = plt.subplots(1, 1, figsize=(12, 10)) 

330 

331 # Plot normalized data 

332 ax1.plot(x_data, y_data, 'ok', ms=1, label='Data') 

333 ax1.plot(x_data, fitted_data, '-r', lw=2, label='Fit') 

334 residuals = y_data - fitted_data 

335 ax1.plot(x_data, residuals-0.05, '-g', lw=2, label='Residuals', alpha=0.5) 

336 

337 # Plot components 

338 n_peaks = len(popt) // 5 

339 components = [] 

340 

341 for i in range(n_peaks): 

342 x0, amp, width, eta, offset = popt[5*i:5*(i+1)] 

343 component = self.pseudo_voigt(x_data, x0, amp, width, eta) 

344 components.append(component) 

345 

346 ax1.fill(x_data, component, alpha=0.5, label=f'Component {i+1}') 

347 ax1.plot(x0, self.pseudo_voigt(np.array([x0]), x0, amp, width, eta), 

348 'ob', label='Peak Position' if i == 0 else None) 

349 

350 ax1.invert_xaxis() 

351 ax1.legend(ncol=2, fontsize=10) 

352 ax1.set_title('Normalized Scale') 

353 ax1.set_xlabel(f'$^{{{self.number}}} \\ {self.nucleus}$ chemical shift (ppm)') 

354 ax1.hlines(0, x_data[0], x_data[-1], colors='blue', linestyles='dashed', alpha=0.5) 

355 

356 

357 

358 plt.tight_layout() 

359 

360 self._print_detailed_results(peak_metrics) 

361 

362 return fig, ax1, components 

363 

364 def _print_detailed_results(self, peak_metrics: List[Dict]) -> None: 

365 """Print detailed fitting results and statistics.""" 

366 print("\nPeak Fitting Results:") 

367 print("===================") 

368 

369 area_of_peaks = [] 

370 for i, metrics in enumerate(peak_metrics, 1): 

371 print(f"\nPeak {i} (Position: {metrics['x0'][0]:.2f} ± {metrics['x0'][1]:.2f}):") 

372 print(f"Amplitude: {metrics['amplitude'][0]:.3f} ± {metrics['amplitude'][1]:.3f}") 

373 print(f"Width: {metrics['width'][0]:.2f} ± {metrics['width'][1]:.2f} in ppm") 

374 print(f"Width: {metrics['width'][0]*self.carrier_freq:.2f} ± {metrics['width'][1]*self.carrier_freq:.2f} in Hz") 

375 print(f"Eta: {metrics['eta'][0]:.2f} ± {metrics['eta'][1]:.2f}") 

376 print(f"Offset: {metrics['offset'][0]:.3f} ± {metrics['offset'][1]:.3f}") 

377 print(f"Gaussian Area: {metrics['gaussian_area'][0]:.2f} ± {metrics['gaussian_area'][1]:.2f}") 

378 print(f"Lorentzian Area: {metrics['lorentzian_area'][0]:.2f} ± {metrics['lorentzian_area'][1]:.2f}") 

379 print(f"Total Area: {metrics['total_area'][0]:.2f} ± {metrics['total_area'][1]:.2f}") 

380 print("-" * 50) 

381 area_of_peaks.append(metrics['total_area']) 

382 

383 self._calculate_and_print_percentages(area_of_peaks) 

384 

385 def _calculate_and_print_percentages(self, area_of_peaks: List[Tuple[float, float]]) -> None: 

386 """Calculate and print percentage contributions of each peak.""" 

387 total_area_sum = sum(area[0] for area in area_of_peaks) 

388 total_area_sum_err = np.sqrt(sum(area[1]**2 for area in area_of_peaks)) 

389 

390 overall_percentage = [] 

391 for i, (area, area_err) in enumerate(area_of_peaks, 1): 

392 percentage = (area / total_area_sum) * 100 

393 percentage_err = percentage * np.sqrt((area_err / area) ** 2 + 

394 (total_area_sum_err / total_area_sum) ** 2) 

395 print(f'Peak {i} Percentage is {percentage:.2f}% ± {percentage_err:.2f}%') 

396 overall_percentage.append((percentage, percentage_err)) 

397 

398 overall_percentage_sum = sum(p[0] for p in overall_percentage) 

399 overall_percentage_sum_err = np.sqrt(sum(p[1]**2 for p in overall_percentage)) 

400 print(f'Overall Percentage is {overall_percentage_sum:.2f}% ± {overall_percentage_sum_err:.2f}%') 

401 

402 def save_results(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray, 

403 fitted_data: np.ndarray, peak_metrics: List[Dict], 

404 popt: np.ndarray, components: List[np.ndarray]) -> None: 

405 """ 

406 Save all results to files. 

407  

408 Args: 

409 filepath (str): Base path for saving files 

410 Other parameters as in plot_results 

411 """ 

412 self._save_peak_data(filepath, x_data, y_data, fitted_data, components) 

413 self._save_metrics(filepath, peak_metrics) 

414 self._save_plot(filepath, x_data, y_data, fitted_data, peak_metrics, 

415 popt) 

416 

417 def _save_peak_data(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray, 

418 fitted_data: np.ndarray, components: List[np.ndarray]) -> None: 

419 """Save peak data to CSV file.""" 

420 df = pd.DataFrame({'x_data': x_data, 'y_data': y_data, 'y_fit': fitted_data}) 

421 

422 for i, component in enumerate(components): 

423 df[f'component_{i+1}'] = component 

424 

425 df.to_csv(filepath + 'peak_data.csv', index=False) 

426 

427 def _save_metrics(self, filepath: str, peak_metrics: List[Dict]) -> None: 

428 """Save peak metrics and percentages to text file.""" 

429 with open(filepath + 'pseudoVoigtPeak_metrics.txt', 'w') as file: 

430 area_of_peaks = [] 

431 for i, metrics in enumerate(peak_metrics, 1): 

432 file.write(f"\nPeak {i} (Position: {metrics['x0'][0]:.2f} ± {metrics['x0'][1]:.2f}):\n") 

433 file.write(f"Amplitude: {metrics['amplitude'][0]:.3f} ± {metrics['amplitude'][1]:.3f}\n") 

434 file.write(f"Width: {metrics['width'][0]:.2f} ± {metrics['width'][1]:.2f} in ppm\n") 

435 file.write(f"Width: {metrics['width'][0]*self.carrier_freq:.2f} ± {metrics['width'][1]*self.carrier_freq:.2f} in Hz\n") 

436 file.write(f"Eta: {metrics['eta'][0]:.2f} ± {metrics['eta'][1]:.2f}\n") 

437 file.write(f"Offset: {metrics['offset'][0]:.3f} ± {metrics['offset'][1]:.3f}\n") 

438 file.write(f"Gaussian Area: {metrics['gaussian_area'][0]:.2f} ± {metrics['gaussian_area'][1]:.2f}\n") 

439 file.write(f"Lorentzian Area: {metrics['lorentzian_area'][0]:.2f} ± {metrics['lorentzian_area'][1]:.2f}\n") 

440 file.write(f"Total Area: {metrics['total_area'][0]:.2f} ± {metrics['total_area'][1]:.2f}\n") 

441 file.write("\n" + "-" * 50 + "\n") 

442 area_of_peaks.append(metrics['total_area']) 

443 

444 # Write percentages 

445 total_area_sum = sum(area[0] for area in area_of_peaks) 

446 total_area_sum_err = np.sqrt(sum(area[1]**2 for area in area_of_peaks)) 

447 

448 for i, (area, area_err) in enumerate(area_of_peaks, 1): 

449 percentage = (area / total_area_sum) * 100 

450 percentage_err = percentage * np.sqrt((area_err / area) ** 2 + 

451 (total_area_sum_err / total_area_sum) ** 2) 

452 file.write(f'Peak {i} Percentage is {percentage:.2f}% ± {percentage_err:.2f}%\n') 

453 

454 overall_percentage = sum((area[0] / total_area_sum) * 100 for area in area_of_peaks) 

455 file.write(f'Overall Percentage is {overall_percentage:.2f}%\n') 

456 

457 def _save_plot(self, filepath: str, x_data: np.ndarray, y_data: np.ndarray, 

458 fitted_data: np.ndarray, peak_metrics: List[Dict], 

459 popt: np.ndarray) -> None: 

460 """Save the plot to a file.""" 

461 fig, _, _ = self.plot_results(x_data, y_data, fitted_data, peak_metrics, 

462 popt) 

463 fig.savefig(filepath + 'pseudoVoigtPeakFit.png', bbox_inches='tight') 

464 plt.close(fig)