Source code for tisane

#!/usr/bin/env python 
# encoding: utf-8

"""
Tisane : Parallelized Sane
"""

from __future__ import print_function
from spike import NPKError
from spike.NPKData import NPKData_plugin
import os
import numpy as np
from numpy.fft import fft, ifft
from time import time
from spike.Algo.sane import sane, OPTK
import multiprocessing as mp
import logging
#from plot_bokeh import BOKEH_PLOT
from spike.util.signal_tools import findnoiselevel

########################################################################

#bplt = BOKEH_PLOT()

def local_sane(xarg, debug=0):
    '''
    Makes Sane on the shorted buffer
    Returns a denoised short buffer and the corresponding interval
    ###
    buf : numpy array with signal
    interv : list of the bounds
    rank : rank for Sane
    iterations : number of time that Tisane is applied
    '''
    buf, interv, rank, iterations, above_noise, thresh_level = xarg
    if debug>1 : print("rank is {0} ".format(rank))
    if not rank:
        if debug>1 : print('######## Using optimisation !!!!')
        optrk = OPTK(buf, orda = buf.size//2, sane_optk=True, above_noise=above_noise, thresh_level=thresh_level,  debug=False)          # instantiate the class                    
        rank = optrk.find_best_rank()                                              # automatic optimal rank estimation.   
        if debug>1 : print('optimal rank found is {0} '.format(rank))
    #rank = 10
    denbuf = fft(sane(ifft(buf), k = rank, iterations = iterations))

    return denbuf, interv


def find_thresh(spec, above_noise, debug=False):
    '''
    Finding a general threshold on the whole dataset.
    '''
    noiselev = np.abs(findnoiselevel(spec, nbseg = 10))                           # finds noise level
    ###
    nbseg = 20
    less = len(spec)%nbseg     # rest of division of length of data by nb of segment
    restpeaks = spec[less:]   # remove the points that avoid to divide correctly the data in segment of same size.
    mean_level = np.abs(restpeaks.mean())
    noiselev += mean_level
    if debug:
        print("noiselevel found is ", noiselev) 
    thresh_level = above_noise*noiselev
    return thresh_level
 

def tisane(npkd, rank = None, nbinterv = 400, above_noise=3, threshold=False, slice_bounds = None,  nbcores = 1, mixing=False, iterations=1, debug=1):
    """
    Algorithm for an application of Sane by parts.
    tisane stands for Tilded Integration for Sane
    Ti-Sane denoises the spectrum on tilded truncks.. half of each denoised interval is superposed on left and right on neighbours intervals. 
    The algorithm uses Multiprocessing. It works on complex spectrum. 
    ###
    npkd : NPKData object
    rank :  rank for Sane, if no rank is given, it uses OPTK
    nbinterv : number of intervals
    above_noise : number of time the threshold is above the noise level. 
    threshold : if True, calculate a threshold applied to the whole dataset when processed with Sane. 
    slice_bounds : interval used.
    nbcores : number of cores used for parallelization
    mixing : used for giving a correct weight to each denoised interval
    iterations = nb of times that sane is repeated inside tisane
    """
    print('############ Entering in Tisane  !!!!!')

    if slice_bounds:
        npkd.extract(slice_bounds[0], slice_bounds[1])
    delta = npkd.get_buffer().size//nbinterv        # /2 only to go fast for petroleomics dataset !!!!!

    subd = delta//2    # recovering interval (width delta), denoising made on [delta-subd, delta+subd], normally subd = delta/2

    if False:                                                   
        if debug>1 : print("delta*nbinterv ", delta*nbinterv)
        if debug>1 : print("delta  ", delta)
    npkdmax = npkd.copy()  
    spec = npkdmax.modulus().get_buffer()[:delta*nbinterv]                 # Spectrum modulus for finding the heaviest segment (mixing mode)
    if debug>1 : print("spec.size ", spec.size)
    lspec = np.array_split(spec, nbinterv)[10:-10]                         # list of the intervals of width delta, takes the middle intervals
    ll = [l.sum() for l in lspec]                                          # list of the integral on each interval
    maxinterv = lspec[ll.index(max(ll))]                                   # interval with maximum energy

    if debug>1 :
        print("maxinterv.size ", maxinterv.size)
        print("lspec[100].size ",lspec[100].size)
        print("lspec[150].size ",lspec[150].size)
        print("ll.index(max(ll) ", ll.index(max(ll)))
        print("lspec length is ", len(lspec))

    def iterarg(p, delta, rank, iterations, above_noise, thresh_level, debug= 1):
        '''
        Iterator for making Sane on slices
        input: 
            p : full spectrum
            delta : width of intervals
            rank : rank for Sane
        output:
            buf : spectrum interval as numpy array
            interv : list of the interval limits
            rank : Sane rank
        '''
        for i in range(nbinterv):
            limd = max(0, i*delta-subd)                             # interval limit inf
            limu = min((i+1)*delta+subd, (nbinterv)*delta)          # interval limit sup
            buf = p.get_buffer()[limd:limu]                         # spectrum numpy array on the interval
            if mixing:
                buf = np.concatenate((maxinterv, buf))              # mixing for normalizing the signal
            interv = [limd, limu]
            yield (buf, interv, rank, iterations, above_noise, thresh_level)
    
    #thresh_level = None # find_thresh(spec, above_noise)
    if threshold:
        thresh_level = find_thresh(spec, above_noise)
        print("#########  Threshold found is ", thresh_level)
    else:
        thresh_level = None # 
    #print("#########  Threshold found is ", tt)
    pool = mp.Pool(nbcores)                                         # Must be placed after the functions called !!!
    xarg = iterarg(npkd, delta, rank, iterations, above_noise, thresh_level)      # Iterator returning buffer, interval and rank
    t0 = time()
    res = pool.imap(local_sane, xarg)                               # map with iterator
    npkdz = npkd.copy() # 
    npkdz.set_buffer(np.zeros(npkdz.get_buffer().size)*(1+1j))      # Makes a null vector
    valpercent = 0
    percent_step = 10
    for i,result in enumerate(res):                                 # Filling the zero spectrum with denoised trunks
        if debug>1 : print(i)
        percent = int(i/float(nbinterv)*100)
        if percent % percent_step == 0 and percent > valpercent:
            logging.info("Done : {0}%".format(percent))
            valpercent += percent_step
        denbuf, interv = result

        ###
        if False:
            if i in range(nbinterv//3-10, nbinterv//3+10):
                if debug>1 : print("i used is ", i)
                if debug>1 : print("interval is ", interv)
                bplt.xlim(interv[0], interv[1])
                bplt.plot(np.arange(interv[0], interv[1]),  np.abs(denbuf.copy()))
                bplt.show()
                bplt.savefig('plot_interv_{0}-{1}.html'.format(interv[0], interv[1]))

        ###

        if mixing:
            denbuf = denbuf[delta:]
        buf = npkdz.get_buffer()
        if False:
            if debug>1 : print("buf[interv[0]:interv[1]].size ", buf[interv[0]:interv[1]].size)
            if debug>1 : print("denbuf.size ", denbuf.size)
        buf[interv[0]:interv[1]] += denbuf                          # adds the denoised interval in the full spectrum
        npkdz.set_buffer(buf) # 
    buf = npkdz.get_buffer()
    npkd.set_buffer(buf/2)                                         # replace spectrum by the denoised result
    pool.close()
    pool.terminate()
    pool.join()

NPKData_plugin("tisane", tisane)