#!/usr/bin/env python
# encoding: utf-8
"""A set of tools for computing bucketing for 1D and 2D NMR spectra
First version by DELSUC Marc-André on 2015-09-06.
extended in 2017
This plugin implements the bucketing routines developped in the work
**Automatic differential analysis of NMR experiments in complex samples**
Laure Margueritte, Petar Markov, Lionel Chiron, Jean-Philippe Starck, Catherine Vonthron-Sénécheau, Mélanie Bourjot, and Marc-André Delsuc
*Magn. Reson. Chem.*, (2018) **80** (5), 1387. http://doi.org/10.1002/mrc.4683
It implements 1D and 2D bucketing
each bucket has a constant progammable size in ppm,
for each buckets, following properties are computes:
center, normalized area, max, min, standard deviation, bucket_size
The results are printed in cvs format either on screen or into a file
"""
from __future__ import print_function
import numpy as np
import scipy.stats as stats
import unittest
from spike import NPKError
from spike.NPKData import NPKData_plugin, NPKData
from spike.util.signal_tools import findnoiselevel
#---------------------------------------------------------------------------
def bucket1d(data, zoom=(0.5, 9.5), bsize=0.04, pp=False, sk=False, thresh=10, file=None):
"""
This tool permits to realize a bucket integration from the current 1D data-set.
You will have to give (all spectral values are in ppm)
- zoom (low,high), : the starting and ending ppm of the integration zone in the spectrum
- bsize: the size of the bucket
- pp: if True, the number of peaks in the bucket is also added
- peaks are detected if intensity is larger that thresh*noise
- sk: if True, skewness and kurtosis computed for each bucket
- file: the filename to which the result is written
For a better bucket integration, you should be careful that :
- the bucket size is not too small, size is better than number !
- the baseline correction has been carefully done
- the spectral window is correctly determined to encompass the meaningfull spectral zone.
"""
data.check1D()
start, end = zoom
if (bsize <= 0): NPKError( "Negative bucket size not allowed")
if (start-bsize/2 < data.axis1.itop(data.size1)): NPKError( "Starting point outside spectrum")
if (end+bsize/2 > data.axis1.itop(0)): NPKError( "Ending point outside spectrum")
if ((end-start)/bsize < 10): NPKError( "Integration zone too small or Bucket too large")
ppm_per_point = (data.axis1.specwidth/data.axis1.frequency/data.size1)
if (bsize < 2*ppm_per_point): NPKError( "Bucket size smaller than digital resolution !")
dcopy = data.copy() # work now on a real version of the data
dcopy.real(axis=1)
if pp:
noise = findnoiselevel( dcopy.get_buffer() )
dcopy.pp(thresh*noise)
peaklist = dcopy.peaks.pos
s = "# %i buckets with a mean size of %.2f data points" % \
( int(round((end-start+bsize)/bsize)), bsize/ppm_per_point)
print(s, file=file)
if file is not None: # wants the prompt on the terminal
print(s)
bklist = "center, bucket, max, min, std"
if pp:
bklist += ", peaks_nb"
if sk:
bklist += ", skewness, kurtosis"
bklist += ', bucket_size'
print(bklist, file=file)
there = max(start,end) # end of the bucket region
here = min(start,end) # running center of the bucket - initialized to begining
here2 = (here-bsize/2) # running beginning of the bucket
while (here2 < there):
ih = int(round(dcopy.axis1.ptoi(here2))) # int of running beginning of the bucket
next = (here2+bsize) # running en of bucket
inext = int(round(dcopy.axis1.ptoi(next))) # int of running en of bucket
if ih<0 or inext<0:
break
lbuf = dcopy.buffer[inext:ih]
integ = lbuf.sum()
try:
maxv = lbuf.max()
minv = lbuf.min()
except ValueError:
maxv = np.NaN # sum and std returns nan - max returns an error ???
minv = np.NaN # sum and std returns nan - min returns an error ???
stdv = lbuf.std()
bkvlist = "%.3f, %.1f, %.1f, %.1f, %.1f"%(here, integ/((ih-inext)*bsize), maxv, minv, stdv) #, (ih-inext) )
if pp:
pk = np.where((peaklist>=inext)&(peaklist<ih))
bkvlist = "%s, %d"%(bkvlist, len(pk[0]))
if sk:
skew = stats.skew(lbuf)
kurt = stats.kurtosis(lbuf)
bkvlist = "%s, %.3f, %.3f"%(bkvlist, skew, kurt)
print("%s, %d"%(bkvlist, (ih-inext) ), file=file)
here2 = next
here = (here+bsize)
return data
#---------------------------------------------------------------------------
def bucket2d(data, zoom=((0.5, 9.5),(0.5, 9.5)), bsize=(0.1, 0.1), pp=False, sk=False, thresh=10, file=None):
"""
This tool permits to realize a bucket integration from the current 2D data-set.
You will have to give the following values: (all spectral values are in ppm)
- zoom (F1limits, F2limits), : the starting and ending ppm of the integration zone in the spectrum
- bsize (F1,F2): the sizes of the bucket
- pp: if True, the number of peaks in the bucket is also added
- peaks are detected if intensity is larger that thresh*noise
- sk: if True, skewness and kurtosis computed for each bucket
- file: the filename to which the result is written
For a better bucket integration, you should be careful that :
- the bucket size is not too small, size is better than number !
- the baseline correction has been carefully done
- the spectral window is correctly determined to encompass the meaningfull spectral zone.
"""
data.check2D()
start1, end1 = zoom[0]
start2, end2 = zoom[1]
bsize1, bsize2 = bsize
if (bsize1 <= 0 or bsize2<=0): NPKError( "Negative bucket size not allowed")
if (start1-bsize1/2 < data.axis1.itop(data.size1)): NPKError( "Starting point outside spectrum")
if (start2-bsize2/2 < data.axis2.itop(data.size2)): NPKError( "Starting point outside spectrum")
if (end1+bsize1/2 > data.axis1.itop(0)): NPKError( "Ending point outside spectrum")
if (end2+bsize2/2 > data.axis2.itop(0)): NPKError( "Ending point outside spectrum")
if ((end1-start1)/bsize1 < 4): NPKError( "Integration zone too small or Bucket too large")
if ((end2-start2)/bsize2 < 4): NPKError( "Integration zone too small or Bucket too large")
ppm_per_point1 = (data.axis1.specwidth/data.axis1.frequency/data.size1)
ppm_per_point2 = (data.axis2.specwidth/data.axis2.frequency/data.size2)
if (bsize1 < 2*ppm_per_point1): NPKError( "Bucket size smaller than digital resolution !")
if (bsize2 < 2*ppm_per_point2): NPKError( "Bucket size smaller than digital resolution !")
dcopy = data.copy() # work now on a real version of the data
dcopy.real(axis=2)
dcopy.real(axis=1)
if pp:
noise = findnoiselevel( dcopy.get_buffer() )
dcopy.pp(thresh*noise)
peaklist = dcopy.peaks
s = "# %i rectangular buckets with a mean size of %.2f x %.2f data points" % \
( int(round((end1-start1+bsize1)/bsize1)*round((end2-start2+bsize2)/bsize2)), \
bsize1/ppm_per_point1, bsize2/ppm_per_point2)
print(s, file=file)
if file is not None: # wants the prompt on the terminal
print(s)
bklist = "centerF1, centerF2, bucket, max, min, std"
if pp:
bklist += ", peaks_nb"
if sk:
bklist += ", skewness, kurtosis"
bklist += ', bucket_size_F1, bucket_size_F2'
print(bklist, file=file)
here1 = min(start1, end1)
here1_2 = (here1-bsize1/2)
there1 = max(start1, end1)
# F = open('toto.txt','w')
while (here1_2 < there1):
ih1 = int(round(dcopy.axis1.ptoi(here1_2)))
next1 = (here1_2+bsize1)
inext1 = int(round(dcopy.axis1.ptoi(next1)))
if ih1<0 or inext1<0:
break
here2 = min(start2, end2)
here2_2 = (here2-bsize2/2)
there2 = max(start2, end2)
while (here2_2 < there2):
ih2 = int(round(dcopy.axis2.ptoi(here2_2)))
next2 = (here2_2+bsize2)
inext2 = int(round(dcopy.axis2.ptoi(next2)))
if ih2<0 or inext2<0:
break
lbuf = dcopy.buffer[inext1:ih1, inext2:ih2]
integ = lbuf.sum()
area = ((ih1-inext1)*bsize1) * ((ih2-inext2)*bsize2)
try:
maxv = lbuf.max()
minv = lbuf.min()
except ValueError:
maxv = np.NaN # sum and std returns nan - max returns an error ???
minv = np.NaN # sum and std returns nan - min returns an error ???
stdv = lbuf.std()
bkvlist = "%.3f, %.3f, %.1f, %.1f, %.1f, %.1f"%(here1, here2, integ/area, maxv, minv, stdv )
if pp:
pk1 = [pk for pk in peaklist if (pk.posF1>=inext1 and pk.posF1<ih1) ] # peaks in F1
pk12 = [pk for pk in pk1 if (pk.posF2>=inext2 and pk.posF2<ih2) ]
bkvlist = "%s, %d"%(bkvlist, len(pk12))
if sk:
skew = stats.skew(lbuf.ravel())
kurt = stats.kurtosis(lbuf.ravel())
bkvlist = "%s, %.3f, %.3f"%(bkvlist, skew, kurt)
print("%s, %d, %d"%(bkvlist, (ih1-inext1), (ih2-inext2) ), file=file)
here2_2 = next2
here2 = (here2+bsize2)
here1_2 = next1
here1 = (here1+bsize1)
return data
class BucketingTests(unittest.TestCase):
def setUp(self):
self.verbose = 1 # verbose >0 switches on messages
def announce(self):
if self.verbose >0:
print (self.shortDescription())
def _test_log(self):
"""testing log"""
import math
self.announce()
x = 0.0
y = math.log(1.0)
self.assertAlmostEqual(x, y )
NPKData_plugin("bucket1d", bucket1d)
NPKData_plugin("bucket2d", bucket2d)