#!/usr/bin/env python
# encoding: utf-8
"""
This module contains several utilities for baseline correction of spectra
Created by Marc-Andre on 2015-03-26.
Modification by Lionel 2015-07-10
"""
from __future__ import print_function, division
from scipy.optimize import minimize
import numpy as np
from numpy import pi
import unittest
import multiprocessing as mp
from scipy import interpolate
[docs]def poly(x,coeff):
"computes the polynomial over x with coeff"
lcoeff = list(coeff)
y = np.zeros_like(x)
y += lcoeff.pop() # a
while lcoeff:
y *= x # ax + b
y += lcoeff.pop()
return y
[docs]def fitpolyL1(x, y, degree=2, power=1, method="Powell"):
"fit with L1 norm a polynome to a function y over x, returns coefficients"
coeff0 = [0]*(degree+1)
#pmin = lambda c, x, y: np.sum(np.abs(y-poly(x,c)) )
pmin = lambda c, x, y: np.sum(np.power(np.abs(y-poly(x,c)), power) )
res = minimize(pmin, coeff0, args=(x,y), method=method)
return res.x
[docs]def bcL1(y, degree=2, power=1, method="Powell"):
"compute a baseline on y using fitpolyL1"
x = np.arange(1.0*y.size)
coeff = fitpolyL1(x, y, degree=degree, power=power, method=method)
return poly(x,coeff)
[docs]def bcL1_paral(args):
'''
compute a baseline on y using fitpolyL1 for the parallel case.
'''
y, degree, power, method = args
x = np.arange(1.0*y.size)
coeff = fitpolyL1(x, y, degree=degree, power=power, method=method)
return poly(x,coeff)
[docs]def baseline0(y, degree=2, power=1, method="Powell",
chunksize=2000, nbcores=None, ratiocov = 0.7):
"""
compute a piece-wise baseline on y using fitpolyL1
degree : is the degree of the underlying polynome
power : norm for the approximation
chunksize : defines the size of the pieces.
nbcores : number of cores used for parallelization of the calculations.
ratiocov : covering ratio of the chunks
y - baseline(y) produces a baseline corrected spectrum
"""
def approx_BL_parallel():
'''
Method for making the baseline using multiprocessing
'''
for k, estimate in enumerate(res):
s0 = slice((k+1)*lsize-cov,(k+2)*lsize+cov)
scov0 = slice((k+1)*lsize-cov,(k+1)*lsize+cov) # covering part
scov1 = slice((k+1)*lsize+cov,(k+2)*lsize+cov) # NON-covering part
tbl = estimate
bl[scov0] = tbl[:2*cov]*corr +bl[scov0]*corrm1 # correction from 0 to 2*cov
bl[scov1] = tbl[2*cov:] # rest of the baseline is the estimate
def approx_BL_serial():
'''
Method for making the baseline serially chunk after chunk.
'''
for i in range(1,nchunk-1):
tbl = bcL1(y[i*lsize-cov:(i+1)*lsize+cov], degree=degree, power=power) # Estimate
bl[i*lsize-cov:i*lsize+cov] = bl[i*lsize-cov:i*lsize+cov]*corrm1 + tbl[:2*cov]*corr # correction from 0 to 2*cov
bl[i*lsize+cov:(i+1)*lsize+cov] = tbl[2*cov:] # rest of the baseline is the estimate
nchunk = y.size//chunksize
if nchunk <2:
bl = bcL1(y, degree=degree, power=power, method=method)
else:
lsize = y.size//nchunk
cov = int(lsize*ratiocov) # covering parts
corr = np.linspace(0.0,1.0,2*cov) # simple weighting coeeficient for fusionning chunks.
corrm1 = 1.0-corr
bl = np.zeros_like(y)
bl[0:lsize+cov] = bcL1(y[0:lsize+cov], degree=degree, power=power)
i = 0 # if nchunk == 2 !
###
if nbcores: # Parallelization
p = mp.Pool(nbcores) # Multiprocessing Pool
args = iter([[y[i*lsize-cov:(i+1)*lsize+cov], degree, power, method] for i in range(1,nchunk-1)])
res = p.imap(bcL1_paral, args)
approx_BL_parallel() # Make the baseline in parallel
p.close()
else:
approx_BL_serial() # Make the baseline in serial
i = nchunk-1
tbl = bcL1(y[i*lsize-cov:-1], degree=degree, power=power)
bl[i*lsize-cov:i*lsize+cov] = bl[i*lsize-cov:i*lsize+cov]*corrm1 + tbl[:2*cov]*corr # correction from 0 to 2*cov
bl[i*lsize+cov:] = tbl[2*cov-1:] # rest of the baseline is the estimate ## -1
return bl
[docs]def baseline1(y, degree=2, chunksize=2000):
"""
compute a piece-wise baseline on y using fitpolyL1
degree is the degree of the underlying polynome
chunksize defines the size of the pieces
a cosine roll-off is used to smooth out chunks junctions
y - baseline(y) produces a baseline corrected spectrum
"""
nchunk = y.size//chunksize
if nchunk <2:
bl = bcL1(y, degree=degree)
else:
lsize = y.size//nchunk
recov = lsize//10 # recovering parts
corr = np.linspace(0.0,1.0,2*recov)
corr = np.sin( np.linspace(0,np.pi/2,2*recov) )**2 # cosine roll-off
corrm1 = 1.0-corr
bl = np.zeros_like(y)
bl[0:lsize+recov] = bcL1(y[0:lsize+recov], degree=degree)
i = 0 # if nchunk == 2 !
for i in range(1,nchunk-1):
tbl = bcL1(y[i*lsize-recov:(i+1)*lsize+recov], degree=degree)
bl[i*lsize-recov:i*lsize+recov] = bl[i*lsize-recov:i*lsize+recov]*corrm1 + tbl[:2*recov]*corr
bl[i*lsize+recov:(i+1)*lsize+recov] = tbl[2*recov:]
i = i+1
tbl = bcL1(y[i*lsize-recov:-1], degree=degree)
bl[i*lsize-recov:i*lsize+recov] = bl[i*lsize-recov:i*lsize+recov]*corrm1 + tbl[:2*recov]*corr
bl[i*lsize+recov:] = tbl[2*recov-1:]
return bl
[docs]def correctbaseline(y, iterations=1, nbchunks = 100, firstpower=0.3,
secondpower=7, degree=1, chunkratio=1.0,
interv_ignore=None, method="Powell",
nbcores=None,
debug=False, choiceBL=0, ratiocov=0.7):
'''
Find baseline by using low norm value and then high norm value to attract the baseline on the small values.
Parameters :
iterations : number of iterations for convergence toward the small values.
nbchunks : number of chunks on which is done the minimization. Typically, each chunk must be larger than the peaks.
firstpower : norm used for the first iterate
secondpower : norm used for attracting the curve toward the lowest values.
firstdeg : degree used for the first minimization
degree : degree of the polynome used for approaching each signal chunk.
chunkratio : ratio for changing the chunksize inside main loop
interv_ignore : ignore a given intervall in the spectrum (eg : avoids issues with water pick)
method : Algorithm used for minimization on each chunk
nbcores : number of cores used for minimizing in parallel on many chunks (if not None)
debug : if debug is set to True, the dictionary bls is built
ratiocov : covering ratio of the chunks. High recovering ratios seem to give better results. By default ratiocov = 0.7
'''
if choiceBL == 0:
baseline = baseline0
elif choiceBL == 1:
baseline = baseline1
else:
raise Exception("error with choiceBL")
if interv_ignore:
ii = interv_ignore
delta = ii[1]-ii[0]
y[ii[0]:ii[1]] = y[ii[0]] + np.arange(delta)/float(delta)*(y[ii[1]]-y[ii[0]]) # linear interpolation on the intervall.
chunksize = y.size//nbchunks # size if each chunk in the baseline
bl = baseline(y, degree=degree, power=firstpower, chunksize = chunksize, nbcores=nbcores, method="Powell", ratiocov=ratiocov) # First iterate
bls = {'bl':[], 'blmin':[]} # Initialisation of bls for debugging.
for i in range(iterations):
blmin = np.minimum.reduce([bl, y])
bl = baseline(blmin, degree=degree, power=secondpower,
chunksize = int(chunksize*chunkratio), nbcores=nbcores, method=method, ratiocov=ratiocov)
bls['bl'].append(bl) # saving the estimate
bls['blmin'].append(blmin) # saving the fusion between bl and the part of the curve under the estimate
if debug:
return bl, bls # return the resutling baseline with the iterations
else:
return bl # return the resutling baseline
[docs]class BC_Tests(unittest.TestCase):
[docs] def test_poly(self):
"tests the poly function"
p = poly(np.arange(10.0),(.1,.2,.3,.4))
self.assertEqual(p[6], 98.5)
self.assertAlmostEqual(sum(p), 905.5)
[docs] def test_baseline0(self):
N = 100000
x = np.linspace(0,10,N)
y = np.sin(x/2) + 0.2*np.random.randn(N)
b = baseline0(y,chunksize=N//20)
corr = y-b
self.assertTrue(np.std(corr) < 0.21)
[docs] def test_baseline1(self):
N = 100000
x = np.linspace(0,10,N)
y = np.sin(x/2) + 0.2*np.random.randn(N)
b = baseline1(y,chunksize=N//20)
corr = y-b
self.assertTrue(np.std(corr) < 0.21)
def _test_correctbaseline(self):
N = 100000
x = np.linspace(0,10,N)
y = np.sin(x/2) + 0.2*np.random.randn(N)
b = correctbaseline(y, iterations=10, nbchunks = 20)
corr = y-b
self.assertTrue(np.std(corr) < 0.25)
if __name__ == '__main__':
unittest.main()