Source code for spike.plugins.pg_sane

#!/usr/bin/env python 
# encoding: utf-8

"""plugin for PG_Sane algorithm,  used for NUS processing

It takes a NUS acquired transient, and fill it by estimating the missing values.

associated publications

- Lionel Chiron, Afef Cherni, Christian Rolando, Emilie Chouzenoux, Marc-André Delsuc
  Fast Analysis of Non Uniform Sampled DataSets in 2D-FT-ICR-MS. - in progress

- Bray, F., Bouclon, J., Chiron, L., Witt, M., Delsuc, M.-A., & Rolando, C. (2017).
  Nonuniform Sampling Acquisition of Two-Dimensional Fourier Transform Ion Cyclotron Resonance Mass Spectrometry for Increased Mass Resolution of Tandem Mass Spectrometry Precursor Ions.
  Anal. Chem., acs.analchem.7b01850. http://doi.org/10.1021/acs.analchem.7b01850

- Chiron, L., van Agthoven, M. A., Kieffer, B., Rolando, C., & Delsuc, M.-A. (2014).
  Efficient denoising algorithms for large experimental datasets and their applications in Fourier transform ion cyclotron resonance mass spectrometry.
  PNAS , 111(4), 1385–1390. http://doi.org/10.1073/pnas.1306700111

"""

from __future__ import print_function

import unittest
import numpy as np
from numpy.fft import fft, ifft, rfft, irfft

from spike.NPKData import NPKData_plugin,  as_cpx, as_float, _base_fft,\
            _base_ifft, _base_rfft, _base_irfft
from spike.Algo.sane import sane
from spike.util.signal_tools import filtering

import sys #
if sys.version_info[0] < 3:
    pass
else:
    xrange = range


[docs]def noise(data, iterations=10, tresh=3.0): "Simple noise evaluation " b = data.copy() for i in range(iterations): b = b[ (b-b.mean()) < tresh*b.std() ] return b.std()
[docs]def HT(x, thresh): """ returns the Hard Thresholding of x, i.e. all points such as x_i <= thresh are set to 0.0 """ ax = abs(x) tx = np.where(ax>thresh, x, 0.0) return tx
[docs]def HTproj(x, k): """ returns the Hard Thresholding of x, on the ball of radius \ell_o = k i.e. the k largest values are kept, all other are set to 0 """ ax = abs(x) N = len(ax)-k tx = np.argpartition(ax, N) hpx = np.zeros_like(x) hpx[tx[N:]] = x[tx[N:]] return hpx
[docs]def pg_sane(npkd, HTmode='projection', axis=1, iterations=10, HTratio=None, rank=20, Lthresh=2.0, sampling=None, size=None, final='SANE'): """ Papoulis Gershberg algorithm - stabilized with SANE This function takes FID with a partial sampling, and fills the holes with estimated values. The FID can then be processed normally, as it was completely acquired. mode (threshold or projection) determine PG algorithm iterations : the number of iterations used for the program pg_sane converges quite rapidely, and usually 5 to 20 iterations are suffisant. when the SNR is high, or for large data-sets (>8k) more iterations might be needed HTratio: number of lines left by HT/projection - usually a few % of the whole spectrum default is 0.01 ( 1% ) rank : a rough estimate of the number of lines present in the FT spectrum of the FID - not stringent - if in doubt, use a slightly larger value. Lthresh : the multiplier for HT/threshold, usually 2.0 lower recovers more weak signals but requires more iterations higher requires less iteration but look only to larger peaks. sampling : if provided, the sampling index list and npkd is supposed to be zerofilled (missing values set to zero) if None, npkd.axis1.sampled should be True, and sampling it will be fetched via npkd.axis1.get_sampling() final: the final step after iterations, default is 'SANE': better (noise-less) data-sets, the best reconstruction quality in simulations 'PG' to reapply PG step - produces the cleanest/more compressible data-sets 'Reinject': the closest to acquired data - to use if there is very little noise. size: if different from None, pg_sane will also extrapolate to size """ def PG(fid): "local version of the PG algorithm" b = directFT(fid) # directFT() defined locally below if HTmode == 'threshold': bf = HT(b, Lthresh*noise(b)) else: bf = HTproj(b, Ndata) return idirectFT(bf) if npkd.dim == 1: if sampling is None: if not npkd.axis1.sampled : raise Exception("this function works only on NUS datasets") fiditer = npkd.copy().zf().get_buffer() # initialize lsampling = npkd.axis1.get_sampling() else: fiditer = npkd.get_buffer() # initialize lsampling = sampling GoodOnes = fiditer[lsampling] RATIO = float(len(GoodOnes))/len(fiditer) # evaluate mean power power = np.linalg.norm(fiditer)/np.sqrt(RATIO) if size is not None and size>len(fiditer): # extend is size is required fidzf = np.zeros(size, dtype=fiditer.dtype) fidzf[:len(fiditer)] = fiditer[:] fiditer = fidzf if HTratio is None: Ndata = len(fiditer)//100 # 1% for HT projection else: Ndata = int(HTratio * len(fiditer)) cpx = (fiditer.dtype == 'complex128') # assign FT if cpx: directFT = fft idirectFT = ifft else: directFT = rfft idirectFT = lambda sp: irfft(sp, len(fiditer)) # check irfft() doc for the len() trick for i in range(iterations): # then loop fiditer[lsampling] = GoodOnes #Reinject fiditer = sane(fiditer, rank) # Apply Sane if i == 0: fiditer *= power/(np.linalg.norm(fiditer)) # normalize first SANE step fiditer[lsampling] = GoodOnes #Reinject fiditer = PG(fiditer) # Apply PG if i == 0: fidnorm = np.linalg.norm(fiditer) if fidnorm>0.0: # happends in some pathological cases fiditer *= power/(fidnorm) # normalize first PG step if final == 'SANE': fiditer = sane(fiditer, rank) elif final == 'PG': pass # nothing special to do elif final == 'Reinject': fiditer[lsampling] = GoodOnes else: raise Exception('wrong mode for "final", choose "SANE", "PG", or "Reinject"') # we're done npkd.set_buffer(fiditer) elif npkd.dim == 2: todo = npkd.test_axis(axis) if todo == 2: for i in xrange(npkd.size1): r = npkd.row(i).pg_sane(rank=rank, iterations=iterations, Lthresh=Lthresh, sampling=sampling, final=final, size=size) npkd.set_row(i,r) elif todo == 1: for i in xrange(npkd.size2): r = npkd.col(i).pg_sane(rank=rank, iterations=iterations, Lthresh=Lthresh, sampling=sampling, final=final, size=size) npkd.set_col(i,r) elif npkd.dim == 3: raise Exception("not implemented yet") npkd.axes(axis).sampling = None return npkd
[docs]class sane_pgTests(unittest.TestCase):
[docs] def test_NUS_sampling(self): ''' NUS example removing the sampling noise ''' from ..Tests import filename, directory import spike.Display.testplot as testplot plt = testplot.plot() import spike.util.signal_tools as u from numpy.fft import fft from spike.Tests import filename from spike.NPKData import NPKData samplingfile = filename("Sampling_file.list") e = NPKData(dim = 1) e.axis1.load_sampling(samplingfile) size = 20000 signal = u.SIGNAL_NOISE(lenfid=size, nbpeaks=10, amplitude=100, noise=50, shift = 7000) signal.fid echant = signal.fid[e.axis1.sampling] # print "echant.size ",echant.size f = NPKData(buffer = echant) f.axis1.load_sampling(samplingfile) h = f.copy() h.pg_sane() pgdb = u.SNR_dB(signal.fid0,h.get_buffer()) print("PG_SANE reconstruction is %.1fdB should be greater than 19dB"%(pgdb)) f.zf().fft().modulus().display(label = "NUS : FFT with sampling noise") h.fft().modulus().display(label = "NUS : processed with PG_SANE", new_fig = False, show = True) self.assertTrue(pgdb>19.0)
[docs] def test_NUS_sampling2(self): ''' NUS larger example removing the sampling noise ''' from spike.NPKData import NPKData import spike.Display.testplot as testplot plt = testplot.plot() import time # First tools def build(tp): """ build a synthetic transient signal containing 10 frequencies with intensities from 1 to 10 return the time axis and the signal """ # set random signal lines np.random.seed(123) freq = 3E5*np.random.rand(10) # 10 random lines amp = range(1,11) # build transient fid = np.zeros(N, dtype=complex) for a,nu in zip(amp, freq): fid += a*np.exp(-tp/tau)*np.exp(2j*np.pi*nu*tp) return fid # compute spectrum def FT(v): " Fourier transform, with a simple cosine-bell apodisation" vapod = v*np.cos(np.linspace(0,np.pi/2, len(v))) return np.fft.fftshift(np.fft.fft(vapod)) # generate sampling list def gene_sampling(ratio): np.random.seed(1234) perm = np.random.permutation(N-1) # generate a permutation sampling = sorted(perm[:int(round(N*ratio))-2]) sampling.insert(0,0) # insure the first point is set sampling.append(N-1) # insure the last point is set return sampling def noise(data): " estimate noise in the spectrum by iteratively removing all signals above 3 sigma " b = data.copy() for i in range(10): b = b[ b-b.mean()<3*b.std() ] return b.std() # Then generate data N = 64000 # Size of the complete sampling SR = 1000000.0 # Spectral Width tau = .1 # ion life-time - make it short enough to reduce FT artifacts dt = 1/SR # Nyquist sampling, the example are presented in complex tp = np.arange(0,N)*dt # time axis fq = np.linspace(0,SR,N) # frequency axis fid0 = build(tp) # noise-free data NOISE = 5 # noise level, same unit as the amp in the first cells. # here, noise level is half of the largest amplitude. np.random.seed(12345) # noisy data nfid = fid0 + NOISE*np.random.randn(len(fid0)) + 1j*NOISE*np.random.randn(len(fid0)) # generate sampling RATIO = 1./8 sampling = gene_sampling(RATIO) # prepare f = NPKData(buffer = nfid[sampling]) f.axis1.sampling = sampling # do it t0 = time.time() g = f.copy().pg_sane( iterations=20, rank=15) elaps = time.time() - t0 SNR = -20*np.log10(np.linalg.norm(g.get_buffer()-fid0) / np.linalg.norm(fid0) ) print ("test_NUS_sampling2: elaps %.2f sec SNR: %.1f dB should be larger than 30dB"%(elaps, SNR)) self.assertTrue(SNR>30.0) ax1 = plt.subplot(211) f.copy().apod_sin().zf(2).fft().display(title='spectrum original data with sampling noise', figure=ax1) ax2 = plt.subplot(212) g.copy().apod_sin().zf(2).fft().display(title='spectrum after pg_sane cleaning', figure=ax2)
NPKData_plugin("pg_sane", pg_sane)