Source code for envtoolkit.ts


""" Functions/classes relative to time-series """

import datetime 
import numpy as np
import pylab as plt
import scipy.stats as stats


[docs]class Lanczos(object): """ Class for Lanczos filtering. Inspired from NCL's `filwgts_lanczos <http://www.ncl.ucar.edu/Document/Functions/Built-in/filwgts_lanczos.shtml>`_ and `wgt_runave <http://www.ncl.ucar.edu/Document/Functions/Built-in/wgt_runave.shtml>`_ functions. :param str filt_type: The type of filter ("lp"=Low Pass, "hp"=High Pass, "bp"=Band Pass :param int nwts: Number of weights (must be an odd number) :param float pca: First cut-off period :param float pcb: Second cut-off period (only for band-pass filters) :param float delta_t: Time-step """ def __init__(self, filt_type, nwts, pca, pcb=None, delta_t=1): """ Initialisation of the filter """ self.filt_type = filt_type self.nwts = nwts self.pca = pca self.pcb = pcb self.delta_t = delta_t if self.nwts % 2 == 0: raise IOError('Number of weigths must be odd') # Because w(n)=w(-n)=0, we would have only nwts-2 # effective weight. So we add to weights so as to get rid off that nwts = self.nwts+2 weights = np.zeros([nwts]) nbwgt2 = nwts // 2 if self.filt_type == 'lp': cutoff = float(self.pca) cutoff = self.delta_t/cutoff weights[nbwgt2] = 2 * cutoff k = np.arange(1., nbwgt2) sigma = np.sin(np.pi * k / nbwgt2) * nbwgt2 / (np.pi * k) firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k) weights[nbwgt2-1:0:-1] = firstfactor * sigma weights[nbwgt2+1:-1] = firstfactor * sigma elif self.filt_type == 'hp': cutoff = float(self.pca) cutoff = self.delta_t/cutoff weights[nbwgt2] = 1-2 * cutoff k = np.arange(1., nbwgt2) sigma = np.sin(np.pi * k / nbwgt2) * nbwgt2 / (np.pi * k) firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k) weights[nbwgt2-1:0:-1] = -firstfactor * sigma weights[nbwgt2+1:-1] = -firstfactor * sigma elif self.filt_type == 'bp': cutoff1 = np.max([float(self.pca), float(self.pcb)]) cutoff1 = self.delta_t/cutoff1 cutoff2 = np.min([float(self.pca), float(self.pcb)]) cutoff2 = self.delta_t/cutoff2 weights[nbwgt2] = 2*cutoff2-2*cutoff1 k = np.arange(1., nbwgt2) sigma = np.sin(np.pi * k / nbwgt2) * nbwgt2 / (np.pi * k) firstfactor = (np.sin(2.*np.pi*cutoff1*k)/(np.pi*k)) \ - (np.sin(2.*np.pi*cutoff2*k)/(np.pi*k)) weights[nbwgt2-1:0:-1] = -firstfactor * sigma weights[nbwgt2+1:-1] = -firstfactor * sigma else: raise IOError('Unknowm filter %s must be "lp", "hp" or "bp"' % filt_type) self.wgt = weights
[docs] def wgt_runave(self, data): """ Compute the running mean of a ND input array using the filter weights. :param numpy.array data: Array to filter out (time must be the first dimension) """ # we retrive the wgt array and initialise the output wgt = self.wgt output = np.zeros(data.shape) nwt = len(wgt) nwgt2 = nwt/2 indw = nwgt2 if data.ndim > 1: shapein = np.array(data.shape) shapein = shapein[::-1] shapein[-1] = 1 wgt = np.tile(wgt, shapein) wgt = np.transpose(wgt) while indw+nwgt2+1 <= data.shape[0]: index = np.arange(indw-nwgt2, indw+nwgt2+1) output[indw] = np.sum(wgt*data[index], axis=0) indw = indw+1 output[output == 0] = np.nan return output
[docs]def compute_monthly_clim(data, yyyymm): """ Compute a monthly seasonal cycle from a monthly dataset. This script is largely inspired from the `NCL clmMonTLL <https://www.ncl.ucar.edu/Document/Functions/Contributed/clmMonTLL.shtml>`_ function :param numpy.ndarray data: dataset whose seasonal cycle to extract (time must be the first dimension) :param int date: the date vector (format YYYYMM, with YYYY=year, MM=month). For instance obtained with the :py:func:`envtoolkit.ts.make_yymm` function. :return: a numpy array that contains the monthly seasonal cycle. Same dimensions as `data`, except for the first dimension which is 12 :rtype: numpy.array """ data = np.ma.array(data, mask=data != data) yyyymm = yyyymm.astype(np.int) year = yyyymm/100 month = yyyymm-100*year monthvec = np.unique(month) clim = np.empty(tuple(data[:len(monthvec)].shape)) for indmonth in range(0, len(monthvec)): index = np.nonzero(month == monthvec[indmonth])[0] clim[indmonth] = np.mean(data[index], axis=0) return clim
[docs]def compute_monthly_anom(data, yyyymm, clim): """ Computes anomalies relative to a monthly climatology. This script is largely inspired from NCL's `calcMonAnomTLL <https://www.ncl.ucar.edu/Document/Functions/Contributed/calcMonAnomTLL.shtml>`_ function :param numpy.array data: data from which to extract monthly anom (data must be first dimension). :param int date: the date vector (format YYYYMM, with YYYY=year, MM=month) :param numpy.array dataclim: the montly climatology, computed with the :func:`envtoolkit.ts.compute_clim` function. :return: a numpy array that contains the monthly anomalies. Same dimensions as `data` :rtype: numpy.array """ data = np.ma.array(data, mask=data != data) anoms = np.ma.zeros(data.shape) yyyymm = yyyymm.astype(np.int) year = yyyymm/100 month = yyyymm - 100*year monthvec = np.unique(month) reshape_array = np.ones(data.ndim) for indmonth in range(0, len(monthvec)): ind_ok = np.nonzero(month == monthvec[indmonth])[0] reshape_array[0] = len(ind_ok) anoms[ind_ok] = data[ind_ok] - np.tile(clim[indmonth], reshape_array) return anoms
[docs]def day_of_year(yymmdd): """ Returns the day of year of a date vector in format YYYYMMDD :param int date: Input date (format YYYYMMDD, with YYYY=year, MM=month, DD=day) :return: a numpy.array containing the day of year (1st of January=1, etc) :rtype: int """ yymmdd = np.array(yymmdd) year = yymmdd/10000 month = (yymmdd - 10000*year)/100 day = (yymmdd - 10000*year - 100*month) # Computation of the 'day_of_year' vector dayofyear = np.zeros(len(year)) for indtime in range(0, len(year)): dateint = datetime.datetime(year[indtime], month[indtime], day[indtime]) dayofyear[indtime] = dateint.strftime('%j') dayofyear = dayofyear.astype(np.int) return dayofyear
[docs]def compute_daily_clim(data, date, smooth=True, nharm=2): """ Computes a daily seasonal cycle from a daily dataset. The user has the possibility to smooth the seasonal cycle, for instance by keeping the first two harmonics (annual and semi annual cycles). This script is largely inspired from NCL's `calcDayAnomTLL <https://www.ncl.ucar.edu/Document/Functions/Contributed/calcMonAnomTLL.shtml>`_ and `smthClmDayTLL <https://www.ncl.ucar.edu/Document/Functions/Contributed/smthClmDayTLL.shtml>`_ functions :param numpy.ndarray data: dataset whose seasonal cycle to extract (time must be the first dimension) :param int date: the date vector (format YYYYMMDD, with YYYY=year, MM=month, DD=day). Obtained by using the :py:func:`envtoolkit.ts.make_yymmdd` function. :param bool smooth: defines whether the seasonal cycle should be smoothed by using Fast Fourier Transform :param int nharm: number of harmonics to retain if `smooth` is True. Use 2 to keep annual and semi-annual harmonics :return: a numpy array that contains the daily seasonal cycle. Same dimensions as `data`, except for the first dimension which is 366. :rtype: numpy.array """ data = np.ma.array(data, mask=data != data) dayofyear = day_of_year(date) clim = np.empty(tuple(data[:366].shape)) for indday in range(0, 365): i = np.nonzero(dayofyear == indday+1)[0] clim[indday] = np.mean(data[i], axis=0) clim[-1] = 0.5*(clim[0] + clim[364]) # smoothing using real fft decomposition if smooth: clim = smooth_data_fft(clim, nharm=nharm) return clim
[docs]def smooth_data_fft(data, nharm): """ Smooth an input data array by using a FFT filter. Inspired from NCL's `smthClmDayTLL<https://www.ncl.ucar.edu/Document/Functions/Contributed/smthClmDayTLL.shtml>` _ function The FFT coefficients are extracted, and the signal is reconstructed by using only the first `nharm` harmonics. Is used in the :py:mod:`envtoolkit.ts.compute_daily_clim` function to smooth a daily seasonal cycle. :param numpy.ndarray data: dataset whose seasonal cycle to extract (time must be the first dimension) :param int nharm: number of harmonics to retain """ fft = np.fft.rfft(data, axis=0) fft[nharm] = 0.5*fft[nharm] fft[nharm+1:] = 0 dataout = np.fft.irfft(fft, axis=0) return dataout
[docs]def compute_daily_anom(data, date, clim): """ Computes daily anomalies relative to a daily climatology. Inspired from NCL's `calcDayAnomTLL <https://www.ncl.ucar.edu/Document/Functions/Contributed/calcMonAnomTLL.shtml>`_ function :param numpy.array data: data from which to extract daily anom (time must be the first dimension) :param int date: the date vector (format YYYYMMDD, with YYYY=year, MM=month, DD=day). :param numpy.array clim: the daily climatology, computed with the :py:func:`envtoolkit.ts.compute_clim` function :return: a numpy array that contains the daily anomalies. Same dimensions as `data` :rtype: numpy.array """ if np.any(data[0].shape != clim[0].shape): raise ValueError('Leftmost dimensions of data and dataclim ' + ' must be the same. Data = %s and Dataclim = %s' % (data[0].shape, clim[0].shape)) data = np.ma.array(data, mask=data != data) dayofyear = day_of_year(date) anoms = np.ma.zeros(data.shape) reshape_array = np.ones(data.ndim) for indday in range(0, 366): ind_ok = np.nonzero(dayofyear == indday+1)[0] reshape_array[0] = len(ind_ok) anoms[ind_ok] = data[ind_ok] - np.tile(clim[indday], reshape_array) return anoms
[docs]def make_yymmdd(date): """ Converts a list/array of dates into YYYYMMDD integers. | YYYY=year | MM=month | DD=day :param list date: A list/array of :py:class:`datetime.datetime` objects :return: A numpy array containg the dates in format YYYYMMDD :rtype: numpy.array """ year = np.array([d.year for d in date]) month = np.array([d.month for d in date]) day = np.array([d.day for d in date]) return 10000*year + 100*month + day
[docs]def make_yymm(date): """ Converts a list/array of date objects into YYYYMM integers. | YYYY = year | MM = month :param list date: A list/array of :py:mod:`datetime.datetime` :return: A numpy array containing the dates in format YYYYMM :rtype: numpy.array """ year = np.array([d.year for d in date]) month = np.array([d.month for d in date]) return 100*year + month
[docs]def make_monthly_means(data, yymmdd): """ Computes monthy means from daily values :param numpy.array yymmdd: Dates in format YYYYMMDD :param numpy.array data: Dataset (time must be first dimension) :return: a tuple, containing the monthly output dates (format YYYYMM), and the monthly data output. :rtype: tuple """ yymmdd = np.array(yymmdd) dataout = [] yyyymm = yymmdd/100 for yyyint in np.unique(yyyymm): iok = np.nonzero(yyyymm == yyyint)[0] dataout.append(np.mean(data[iok], axis=0)) dataout = np.array(dataout) return np.unique(yyyymm), dataout
[docs]def doy_to_date(year, doy): """ Converts from day of year into date :param int year: Year :param int doy: Day of year """ return datetime.datetime(year, 1, 1) + datetime.timedelta(doy - 1)
[docs]def remove_mean(xdata): r""" Remove the mean from the dataset. .. math:: output = \frac{x-\overline{x}} Made on the first dimension (usually time). :param numpy.array xdata: Input data :return: A numpy array with the anomalies of the input array :rtype: numpy.array """ # masking of data where nan xdata = np.ma.masked_where(xdata!=xdata, xdata) # calculation of mean/std along first dimension (time) xmean = np.mean(xdata, axis=0) return xdata - xmean
[docs]def standardize(xdata, ddof=0): r""" Standardizes the xdata array .. math:: output = \frac{x-\overline{x}}{\sigma_x} Made on the first dimension (usually time). :param numpy.array xdata: Input data :param ddof int: Number of degrees of freedom to remove for the calculation of :math:`\sigma` (see the :py:func:`numpy.std` documentation. :return: A numpy array with the standardized values of the input array :rtype: numpy.array """ # masking of data where nan xdata = np.ma.masked_where(xdata!=xdata, xdata) # calculation of mean/std along first dimension (time) xstd = np.std(xdata, axis=0, ddof=ddof) output = remove_mean(xdata) / (xstd) return output
[docs]def xcorr_1d(xdata, ydata, maxlag=None, use_covariance=False, ddof=1): ''' Computes the cross-correlation/cross-covariance two one-dimensional arrays. xdata leads at positive lags. Inspired from the :py:func:`pyplot.xcorr` function :param xdata numpy.array: x-array (leads at positive lags) :param ydata numpy.array: y-array (leads at negative lags) :param int maxlag: Number of maximum lag to return. If None, maxlag=len(xdata)-1 :param bool use_covariance: Whether covariance (True) or correlation (False) should be returned :param int ddof: Number of degrees of freedom to remove if covariance is computed. If 1, then covariance is divided by (N-1). :return: A tuple with the lags and the cross-correlation array :rtype: tuple ''' # check that the array have the right dimensions ntime = len(xdata) xdata = np.ma.masked_where(xdata!=xdata, xdata) ydata = np.ma.masked_where(ydata!=ydata, ydata) xdata = (xdata - xdata.mean()) ydata = (ydata - ydata.mean()) output = np.correlate(xdata, ydata, mode=2) if use_covariance: output /= (len(xdata) - ddof) else: output /= np.sqrt(np.dot(xdata, xdata) * np.dot(ydata, ydata)) if maxlag is None: maxlag = ntime - 1 if maxlag > ntime-1: maxlag = ntime - 1 if maxlag < 1: maxlag = 1 lags = np.arange(-maxlag, maxlag + 1) output = output[ntime + maxlag -1:ntime -1 - maxlag -1:-1] return lags, output
[docs]def corr_ND(xdata, ydata, use_covariance=False): """ Computes the correlation/covariance between the xdata and ydata arrays at 0-lag. Calculation is performed on first dimension (usually time). Loop is optimized by using special loops. :param xdata numpy.array: x-array (time must be the first dim.) :param ydata numpy.array: y-array (time must be the first dim.) :param bool use_covariance: True if covariance should be computed instead of correlation :return: A tuple with the cross-correlation or cross-covariance array, and the lag array. Cross-correlation has dimensions (xdata.shape[1:], ydata.shape[1:]). :rtype: tuple """ if xdata.shape[0] != ydata.shape[0]: message = 'The first dimension of xdata array(%d) is different from the first dimension of the ydata array (%d). This program will be stopped.' raise ValueError(message) ntime = xdata.shape[0] # removing the time-mean over each point xdata = remove_mean(xdata) ydata = remove_mean(ydata) # chosing the proper function depending on the # use_covariance option: np.cov or np.corrcoef if use_covariance: corrfunc = np.ma.cov else: corrfunc = np.ma.corrcoef # masking of the data where fill value xdata = np.ma.masked_where(xdata != xdata, xdata) ydata = np.ma.masked_where(ydata != ydata, ydata) # If xdata or ydata is 1d (nt), we convert it into # array of dimension (nt, 1) if xdata.ndim == 1: xdata = np.atleast_2d(xdata).T if ydata.ndim == 1: ydata = np.atleast_2d(ydata).T # extracting the shape of the output from # maxlag and the shapes of the input arrays outshape = list(xdata.shape[1:]) + list(ydata.shape[1:]) # extraction of the number of space dimensions in input arrays ndimx = np.prod(xdata.shape[1:]) ndimy = np.prod(ydata.shape[1:]) # conversion of the array into a 2D array # if their dims is greater than 2 if xdata.ndim > 2: xdata = np.reshape(xdata, (ntime, ndimx)) if ydata.ndim > 2: ydata = np.reshape(ydata, (ntime, ndimy)) # transposition so that time is the last dimension xdata = xdata.T ydata = ydata.T # calculation of correlation/covariance using special loops correlation = [corrfunc(xtemp, ytemp)[0][1] for xtemp in xdata for ytemp in ydata] # conversion of corr. into array and reshaping correlation = np.array(correlation, dtype=float) correlation = np.reshape(correlation, outshape) return correlation
[docs]def xcorr_ND(xdata, ydata, maxlag=None, use_covariance=False): """ Computes the cross-correlation/cross-covariance between the xdata and ydata arrays. Calculation is performed on first dimension (usually time). xdata leads for positive lags. :param xdata numpy.array: x-array (time must be the first dim.) :param ydata numpy.array: y-array (time must be the first dim.) :param bool use_covariance: True if covariance should be computed instead of correlation :param int maxglag: number of lag to consider. If None, maxlag=ntime :return: A tuple with the cross-correlation or cross-covariance array, and the lag array. Cross-correlation has dimensions (xdata.shape[1:], ydata.shape[1:], 2*maxlag+1). :rtype: tuple """ if xdata.shape[0] != ydata.shape[0]: message = 'The first dimension of xdata array(%d) is different from the first dimension of the ydata array (%d). This program will be stopped.' raise ValueError(message) ntime = xdata.shape[0] if maxlag is None: maxlag = ntime - 1 if maxlag > ntime - 1: maxlag = ntime - 1 if maxlag < 1: maxlag = 1 # the correlation function used is the 1D xcorr function corrfunc = xcorr_1d # setting options of the xcorr function if use_covariance: dict_xcorr = {"use_covariance": True, "maxlag": maxlag} else: dict_xcorr = {"use_covariance": False, "maxlag": maxlag} # If xdata or ydata is 1d (nt), we convert it into # array of dimension (nt, 1) if xdata.ndim == 1: xdata = np.atleast_2d(xdata).T if ydata.ndim == 1: ydata = np.atleast_2d(ydata).T # extracting the spatial dimensions associated with # the imput arrays ndimx = np.prod(xdata.shape[1:]) ndimy = np.prod(ydata.shape[1:]) # extracting the shape of the output from # maxlag and the shapes of the input arrays tempshape = list(xdata.shape[1:]) + list(ydata.shape[1:]) outshape = tempshape + [2*maxlag+1] # conversion of the array into a 2D array # if their dims is greater than 2 if xdata.ndim > 2: xdata = np.reshape(xdata, (ntime, ndimx)) if ydata.ndim > 2: ydata = np.reshape(ydata, (ntime, ndimy)) # transposition so that time is the last dimension xdata = xdata.T ydata = ydata.T # initialises the lag vector lag_array = np.arange(maxlag+1) # calculation of the cross-correlation for each spatial point output = [corrfunc(xtemp, ytemp, **dict_xcorr) for xtemp in xdata for ytemp in ydata] # extraction of the lag array lag = output[0][0] # extraction of the cross-correlation data output = [var[1] for var in output] # conversion into a numpy array and reshaping output = np.array(output) output = np.reshape(output, outshape) return lag, output