Source code for spike.Algo.urQRd

"""
urQRd.py
#########
Algorithm for denoising time series, named urQRd (standing for "uncoiled random QR denoising")
main function is 
urQRd(data, rank)
data : the series to be denoised
rank : the rank of the analysis

Copyright (c) 2013 IGBMC. All rights reserved.

Marc-Andr\'e Delsuc <madelsuc@unistra.fr>
Lionel Chiron <lionel.chiron@gmail.com>

This software is a computer program whose purpose is to compute urQRd denoising.

This software is governed by the CeCILL  license under French law and
abiding by the rules of distribution of free software.  You can  use, 
modify and/ or redistribute the software under the terms of the CeCILL
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info". 

As a counterpart to the access to the source code and  rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty  and the software's author,  the holder of the
economic rights,  and the successive licensors  have only  limited
liability. 

In this respect, the user's attention is drawn to the risks associated
with loading,  using,  modifying and/or developing or reproducing the
software by the user in light of its specific status of free software,
that may mean  that it is complicated to manipulate,  and  that  also
therefore means  that it is reserved for developers  and  experienced
professionals having in-depth computer knowledge. Users are therefore
encouraged to load and test the software's suitability as regards their
requirements in conditions enabling the security of their systems and/or 
data to be ensured and,  more generally, to use and operate it in the 
same conditions as regards security. 

The fact that you are presently reading this means that you have had
knowledge of the CeCILL license and that you accept its terms.

Created by Lionel Chiron and Marc-Andr\'e on 2013-10-13.

version 2.0 
28/oct/2013

"""
import numpy as np
import numpy.linalg as linalg
from scipy.linalg import norm
from numpy.fft import fft, ifft

[docs]def urQRd(data, k, orda=None, iterations=1): """ urQRd algorithm. Name stands for uncoiled random QR denoising. From a data series return a denoised series denoised data : the series to be denoised - a (normally complex) numpy buffer k : the rank of the analysis orda : is the order of the analysis internally, a Hankel matrix (M,N) is constructed, with M = orda and N = len(data)-orda+1 if None (default) orda = (len(data)+1)/2 iterations : the number of time the operation should be repeated values are such that orda <= (len(data)+1)/2 k < orda N = len(data)-orda+1 Omega is (N x k) """ if np.allclose(data,0.0): # dont do anything if data is empty return data if not orda: orda = data.size//2 if (2*orda > data.size): raise(Exception('order is too large')) if (k >= orda): raise(Exception('rank is too large')) N = len(data)-orda+1 dd = data for _ in range(iterations): Omega = np.random.normal(size=(int(N),int(k))) Q, QstarH = urQRdCore(dd, orda, Omega) # H = QQ*H dd = Fast_Hankel2dt(Q,QstarH) denoised = dd if data.dtype == "float": # this is a kludge, as a complex data-set is to be passed - use the analytic signal if your data are real denoised = np.real(denoised) return denoised
[docs]def urQRdCore(data, orda, Omega): ''' Core of urQRd algorithm ''' Y = FastHankel_prod_mat_mat(data, Omega) Q,r = linalg.qr(Y) # QR decomopsition of Y del(r) # we don't need it any more #dont need to del it QstarH = FastHankel_prod_mat_mat(data.conj(), Q).conj().T# return Q, QstarH # H approximation given by QQ*H
[docs]def vec_mean(M,L): ''' Vector for calculating the mean from the sum on the antidiagonal. data = vec_sum*vec_mean ''' vec_prod_diag = [1/float((i+1)) for i in range(M)] vec_prod_middle = [1/float(M) for i in range(L-2*M)] vec_mean_prod_tot = vec_prod_diag + vec_prod_middle + vec_prod_diag[::-1] return np.array(vec_mean_prod_tot)
[docs]def FastHankel_prod_mat_mat(gene_vect, matrix): ''' Fast Hankel structured matrix matrix product based on FastHankel_prod_mat_vec ''' N,K = matrix.shape L = len(gene_vect) M = L-N+1 data = np.zeros(shape = (M, K), dtype = complex) for k in range(K): prod_vect = matrix[:,k] data[:,k] = FastHankel_prod_mat_vec(gene_vect, prod_vect) return data
#this is the slow stage
[docs]def FastHankel_prod_mat_vec(gene_vect, prod_vect): """ Compute product of Hankel matrix (gene_vect) by vector prod_vect. H is not computed M is the length of the result """ L = len(gene_vect) N = len(prod_vect) M = L-N+1 prod_vect_zero = np.concatenate((np.zeros(M-1), prod_vect[::-1])) # prod_vect is completed with zero to length L fft0, fft1 = fft(gene_vect), fft(prod_vect_zero) # FFT transforms of generator vector and prod = fft0*fft1 # FFT product performing the convolution product. c = ifft(prod) # IFFT for going back return np.roll(c,+1)[:M]
[docs]def Fast_Hankel2dt(Q,QH): ''' returning to data from Q and QstarH Based on FastHankel_prod_mat_vec. ''' M,K = Q.shape K,N = QH.shape L = M+N-1 vec_sum = np.zeros((L,), dtype = complex) for k in range(K): prod_vect = QH[k,:] gene_vect = np.concatenate((np.zeros(N-1), Q[:, k], np.zeros(N-1))) # generator vector for Toeplitz matrix vec_k = FastHankel_prod_mat_vec(gene_vect, prod_vect[::-1]) # used as fast Toeplitz vec_sum += vec_k datadenoised = vec_sum*vec_mean(M,L) # from the sum on the antidiagonal to the mean return datadenoised
[docs]def test_urQRd( lendata = 10000, rank = 100, orda = 4000, noise = 200.0, iterations=1, noisetype = "additive"): """ ============== example of use of urQRd on a synthetic data-set =============== """ import time from numpy import pi import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator def plot_param(fig,fignumb): ax = fig.add_subplot(fignumb) ax.xaxis.set_major_locator(MaxNLocator(4)) ax.yaxis.set_major_locator(MaxNLocator(4)) def mfft(v): "utility that returns the modulus of the fft of v" import scipy.fftpack as fft s0 = fft.fft(v) ### s0 = np.real(np.sqrt(s0*s0.conj())) # ref spectrum return s0 def SNR(noisy,target): "computes and return SNR value, in dB" return 10*np.log10(sum(abs(target)**2)/sum(abs(noisy - target)**2)) ################################################ # Data built for tests ################################################ Create the data nbpeaks = 8 # number of simulated signals LB = 1.11 # linewidth Freq = [(i+1+np.sqrt(10))*pi*500.0j for i in range(nbpeaks)] # frequencies Amp = [(i+1)*20 for i in range(nbpeaks)] # amplitudes data0 = np.zeros(lendata,dtype=complex) if noisetype == "additive": x = np.arange(lendata*1.0)/lendata # time series for i in range(nbpeaks): data0 += Amp[i] * np.exp(Freq[i]*x) * np.exp(-LB*x) dataadd = data0 + noise*(np.random.randn(x.size)+1j*np.random.randn(x.size)) # additive complex noise data=dataadd elif noisetype == "multiplicative": x = np.arange(lendata*1.0)/lendata # time series for i in range(nbpeaks): data0 += Amp[i] * np.exp(Freq[i]*x) * np.exp(-LB*x) data = np.zeros(lendata,dtype=complex) Anoise = noise/2 Fnoise = noise/200 for i in range(nbpeaks): nAmp = Amp[i] + Anoise*np.random.randn(x.size) nFreq = Freq[i] + Fnoise*np.random.randn(x.size) data += nAmp * np.exp(nFreq*x) * np.exp(-LB*x) elif noisetype == "sampling": x = np.arange(lendata*1.0)/lendata # time series xn = x + 0.5*np.random.randn(x.size)/lendata # time series with noisy jitter for i in range(nbpeaks): data0 += Amp[i] * np.exp(Freq[i]*x) * np.exp(-LB*x) data = np.zeros(lendata,dtype=complex) for i in range(nbpeaks): data += Amp[i] * np.exp(Freq[i]*xn) * np.exp(-LB*xn) elif noisetype == "missing points": x = np.arange(lendata*1.0)/lendata # time series for i in range(nbpeaks): data0 += Amp[i] * np.exp(Freq[i]*x) * np.exp(-LB*x) miss = np.random.randint(2, size=len(x)) dataadd = data0*miss data=dataadd else: raise Exception("unknown noise type") iSNR = SNR(data,data0) print("Initial Noisy Data SNR: %.2f dB - noise type : %s"%(iSNR,noisetype)) ###########---- fdata = mfft(data0) # FFT of noiseless signal fdatanoise = mfft(data)# FFT of noisy signal ########### print(''' === Running urQR algo ===", lendata : {0} orda : {1} rank : {2} '''.format(lendata, orda, rank)) t0 = time.time() datarqrd = urQRd(data, k=rank, orda=orda, iterations=iterations) # denoise signal with urQRd trQRd = time.time()-t0 fdatarqrd = mfft(datarqrd )# FFT of urQRd denoised signal # normrQR = norm(fdatarqrd -fdata)/norm(fdata) # print "= normratio ",normrQR print("=== Result ===") fSNR = SNR(datarqrd, data0) print("Denoised SNR: %.2f dB - processing gain : %.2f dB"%( fSNR, fSNR-iSNR )) print("processing time for urQRd : %.2f sec"%trQRd) ################################################################# Plotting fig = plt.figure() plot_param(fig,321) plt.plot(data0.real,'b',label="clean signal")# plot the clean data plt.legend() plt.title('data series') plot_param(fig,323) plt.plot(data.real,'k', label="noisy signal")# plot the noisy data plt.legend() plot_param(fig,325) plt.plot(datarqrd.real ,'r', label='urQRd filtered signal') # plot the signal denoised with urQRd plt.legend() plot_param(fig,322) plt.plot(fdata,'b',label="clean spectrum") plt.legend() plt.title('FFT spectrum') plot_param(fig,324) plt.plot(fdatanoise,'k', label="noisy spectrum")# plot the noisy data plt.legend() plot_param(fig,326) plt.plot(fdatarqrd ,'r', label= 'urQRd filtered spectrum') # plot the signal denoised with urQRd plt.suptitle("Noise type : "+noisetype) plt.legend() plt.show()
if __name__ == '__main__': test_urQRd()