"""
Correlation functions for multi-channel cross-correlation of seismic data.
Various routines used mostly for testing, including links to a compiled
routine using FFTW, a Numpy fft routine which uses bottleneck for normalisation
and a compiled time-domain routine. These have varying levels of efficiency,
both in terms of overall speed, and in memory usage. The time-domain is the
most memory efficient but slowest routine (although fastest for small cases of
less than a few hundred correlations), the Numpy routine is fast, but memory
inefficient due to a need to store large double-precision arrays for
normalisation. The fftw compiled routine is fastest and more memory efficient
than the Numpy routine.
:copyright:
EQcorrscan developers.
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import contextlib
import ctypes
from multiprocessing import Pool as ProcessPool, cpu_count
from multiprocessing.pool import ThreadPool
import numpy as np
from future.utils import native_str
from scipy.fftpack.helper import next_fast_len
from eqcorrscan.utils.libnames import _load_cdll
XCOR_FUNCS = {} # cache of functions for doing cross correlations
# methods added to each xcorr func registered
# these implement the stream interface
XCORR_STREAM_METHODS = ('multithread', 'multiprocess', 'concurrent',
'stream_xcorr')
# these implement the array interface
XCOR_ARRAY_METHODS = ('array_xcorr')
# ------------------ Context manager for switching out default
class _Context:
""" class for permanently or temporarily changing items in a dict """
def __init__(self, cache, value_to_switch):
"""
:type cache: dict
:param cache: A dict to store values in
:type value_to_switch: str
:param value_to_switch:
The key in cache to switch based on different contexts
"""
self.cache = cache
self.value_to_switch = value_to_switch
self.previous_value = None
def __call__(self, new_value, *args, **kwargs):
""" # TODO change docs if this ever becomes general use
Set a new value for the default xcorr function.
This function can be called directly to permanently change the
default normxcorr function or it may be used as a context manager
to only modify it temporarily.
:param new_value:
:return:
"""
self.previous_value = self.cache.get(self.value_to_switch)
self.cache[self.value_to_switch] = get_array_xcorr(new_value)
return self
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
self.revert()
def __repr__(self):
""" this hides the fact _Context instance are returned after calls """
return None
def revert(self):
""" revert the default xcorr function to previous value """
new_value = self.previous_value
self(new_value)
set_xcorr = _Context(XCOR_FUNCS, 'default')
# ---------------------- generic concurrency functions
@contextlib.contextmanager
def _pool_boy(Pool, **kwargs):
"""
A context manager for handling the setup and cleanup of a pool object.
:param Pool: any Class (not instance) that implents the multiprocessing
Pool interface
"""
pool = Pool(kwargs.get('cores', cpu_count()))
yield pool
pool.close()
pool.join()
def _pool_normxcorr(templates, stream, pool, func, *args, **kwargs):
chans = [[] for _i in range(len(templates))]
array_dict_tuple = _get_array_dicts(templates, stream)
stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
# get parameter iterator
params = ((template_dict[sid], stream_dict[sid], pad_dict[sid])
for sid in seed_ids)
# get cc results and used chans into their own lists
results = [pool.apply_async(func, param) for param in params]
xcorrs, tr_chans = zip(*(res.get() for res in results))
cccsums = np.sum(xcorrs, axis=0)
no_chans = np.sum(np.array(tr_chans).astype(np.int), axis=0)
for seed_id, tr_chan in zip(seed_ids, tr_chans):
for chan, state in zip(chans, tr_chan):
if state:
chan.append((seed_id.split('.')[1],
seed_id.split('.')[-1].split('_')[0]))
return cccsums, no_chans, chans
def _general_multithread(func):
""" return the general multithreading function using func """
def multithread(templates, stream, *args, **kwargs):
with _pool_boy(ThreadPool, **kwargs) as pool:
return _pool_normxcorr(templates, stream, pool=pool, func=func)
return multithread
def _general_multiprocess(func):
def multiproc(templates, stream, *args, **kwargs):
with _pool_boy(ProcessPool, **kwargs) as pool:
return _pool_normxcorr(templates, stream, pool=pool, func=func)
return multiproc
def _general_serial(func):
def stream_xcorr(templates, stream, *args, **kwargs):
no_chans = np.zeros(len(templates))
chans = [[] for _ in range(len(templates))]
array_dict_tuple = _get_array_dicts(templates, stream)
stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
cccsums = np.zeros([len(templates),
len(stream[0]) - len(templates[0][0]) + 1])
for seed_id in seed_ids:
tr_cc, tr_chans = func(template_dict[seed_id],
stream_dict[seed_id],
pad_dict[seed_id])
cccsums = np.sum([cccsums, tr_cc], axis=0)
no_chans += tr_chans.astype(np.int)
for chan, state in zip(chans, tr_chans):
if state:
chan.append((seed_id.split('.')[1],
seed_id.split('.')[-1].split('_')[0]))
return cccsums, no_chans, chans
return stream_xcorr
[docs]def register_array_xcorr(name, func=None, is_default=False):
"""
Decorator for registering correlation functions.
:func:
Each function must have the same interface as numpy_normxcorr, which is:
f(templates, stream, pads, *args, **kwargs) any number of specific kwargs
can be used.
Register_normxcorr can be used as a decorator (with or without arguments)
or as a callable.
:param name: The name of the function for quick access, or the callable
that will be wrapped when used as a decorator.
:type name: str, callable
:param func: The function to register
:type func: callable, optional
:param is_default: True if this function should be marked as default
normxcorr
:type is_default: bool
:return: callable
"""
valid_methods = set(list(XCOR_ARRAY_METHODS) + list(XCORR_STREAM_METHODS))
cache = {}
def register(register_str):
"""
Register a function as an implementation.
:param register_str: The registration designation
:type register_str: str
"""
if register_str not in valid_methods:
msg = 'register_name must be in %s' % valid_methods
raise ValueError(msg)
def _register(func):
cache[register_str] = func
setattr(cache['func'], register_str, func)
return func
return _register
def wrapper(func, func_name=None):
# register the functions in the XCOR
fname = func_name or name.__name__ if callable(name) else str(name)
XCOR_FUNCS[fname] = func
if is_default: # set function as default
XCOR_FUNCS['default'] = func
# attach some attrs, this is a bit of a hack to avoid pickle problems
func.register = register
cache['func'] = func
func.multithread = _general_multithread(func)
func.multiprocess = _general_multiprocess(func)
func.concurrent = _general_multithread(func)
func.stream_xcorr = _general_serial(func)
func.array_xcorr = func
func.registered = True
return func
# used as a decorator
if callable(name):
return wrapper(name)
# used as a normal function (called and passed a function)
if callable(func):
return wrapper(func, func_name=name)
# called, then used as a decorator
return wrapper
# ------------------ array_xcorr fetching functions
def _get_registerd_func(name_or_func):
""" get a xcorr function from a str or callable. """
# get the function or register callable
if callable(name_or_func):
func = register_array_xcorr(name_or_func)
else:
func = XCOR_FUNCS[name_or_func or 'default']
assert callable(func), 'func is not callable'
# ensure func has the added methods
if not hasattr(func, 'registered'):
func = register_array_xcorr(func)
return func
[docs]def get_array_xcorr(name_or_func=None):
"""
Get an normalized cross correlation function that takes arrays as inputs.
See :func:`eqcorrscan.utils.correlate.array_normxcorr` for expected
function signature.
:param name_or_func: Either a name of a registered xcorr function or a
callable that implements the standard array_normxcorr signature.
:type name_or_func: str or callable
:return: callable wth array_normxcorr interface
see also :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
"""
func = _get_registerd_func(name_or_func)
return func.array_xcorr
# ----------------------- registered array_xcorr functions
[docs]@register_array_xcorr('numpy')
def numpy_normxcorr(templates, stream, pads, *args, **kwargs):
"""
Compute the normalized cross-correlation using numpy and bottleneck.
:param templates: 2D Array of templates
:type templates: np.ndarray
:param stream: 1D array of continuous data
:type stream: np.ndarray
:param pads: List of ints of pad lengths in the same order as templates
:type pads: list
:return: np.ndarray of cross-correlations
:return: np.ndarray channels used
"""
import bottleneck
from scipy.signal.signaltools import _centered
# Generate a template mask
used_chans = ~np.isnan(templates).any(axis=1)
# Currently have to use float64 as bottleneck runs into issues with other
# types: https://github.com/kwgoodman/bottleneck/issues/164
stream = stream.astype(np.float64)
templates = templates.astype(np.float64)
template_length = templates.shape[1]
stream_length = len(stream)
fftshape = next_fast_len(template_length + stream_length - 1)
# Set up normalizers
stream_mean_array = bottleneck.move_mean(
stream, template_length)[template_length - 1:]
stream_std_array = bottleneck.move_std(
stream, template_length)[template_length - 1:]
# because stream_std_array is in denominator or res, nan all 0s
stream_std_array[stream_std_array == 0] = np.nan
# Normalize and flip the templates
norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (
templates.std(axis=-1, keepdims=True) * template_length))
norm_sum = norm.sum(axis=-1, keepdims=True)
stream_fft = np.fft.rfft(stream, fftshape)
template_fft = np.fft.rfft(np.flip(norm, axis=-1), fftshape, axis=-1)
res = np.fft.irfft(template_fft * stream_fft,
fftshape)[:, 0:template_length + stream_length - 1]
res = ((_centered(res, stream_length - template_length + 1)) -
norm_sum * stream_mean_array) / stream_std_array
res[np.isnan(res)] = 0.0
# res[np.isinf(res)] = 0.0
for i, pad in enumerate(pads): # range(len(pads)):
res[i] = np.append(res[i], np.zeros(pad))[pad:]
return res.astype(np.float32), used_chans
[docs]@register_array_xcorr('time_domain')
def time_multi_normxcorr(templates, stream, pads, *args, **kwargs):
"""
Compute cross-correlations in the time-domain using C routine.
:param templates: 2D Array of templates
:type templates: np.ndarray
:param stream: 1D array of continuous data
:type stream: np.ndarray
:param pads: List of ints of pad lengths in the same order as templates
:type pads: list
:return: np.ndarray of cross-correlations
:return: np.ndarray channels used
"""
used_chans = ~np.isnan(templates).any(axis=1)
utilslib = _load_cdll('libutils')
utilslib.multi_normxcorr_time.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int, ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS'))]
utilslib.multi_normxcorr_time.restype = ctypes.c_int
# Need to de-mean everything
templates_means = templates.mean(axis=1).astype(np.float32)[:, np.newaxis]
stream_mean = stream.mean().astype(np.float32)
templates = templates.astype(np.float32) - templates_means
stream = stream.astype(np.float32) - stream_mean
template_len = templates.shape[1]
n_templates = templates.shape[0]
image_len = stream.shape[0]
ccc = np.ascontiguousarray(
np.empty((image_len - template_len + 1) * n_templates), np.float32)
t_array = np.ascontiguousarray(templates.flatten(), np.float32)
utilslib.multi_normxcorr_time(
t_array, template_len, n_templates,
np.ascontiguousarray(stream, np.float32), image_len, ccc)
ccc[np.isnan(ccc)] = 0.0
ccc = ccc.reshape((n_templates, image_len - template_len + 1))
for i in range(len(pads)):
ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:]
templates += templates_means
stream += stream_mean
return ccc, used_chans
[docs]@register_array_xcorr('fftw', is_default=True)
def fftw_normxcorr(templates, stream, pads, threaded=True, *args, **kwargs):
"""
Normalised cross-correlation using the fftw library.
Internally this function used double precision numbers, which is definitely
required for seismic data. Cross-correlations are computed as the
inverse fft of the dot product of the ffts of the stream and the reversed,
normalised, templates. The cross-correlation is then normalised using the
running mean and standard deviation (not using the N-1 correction) of the
stream and the sums of the normalised templates.
This python fucntion wraps the C-library written by C. Chamberlain for this
purpose.
:param templates: 2D Array of templates
:type templates: np.ndarray
:param stream: 1D array of continuous data
:type stream: np.ndarray
:param pads: List of ints of pad lengths in the same order as templates
:type pads: list
:param threaded:
Whether to use the threaded routine or not - note openMP and python
multiprocessing don't seem to play nice for this.
:type threaded: bool
:return: np.ndarray of cross-correlations
:return: np.ndarray channels used
"""
utilslib = _load_cdll('libutils')
argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int, ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int]
restype = ctypes.c_int
if threaded:
func = utilslib.normxcorr_fftw_threaded
else:
func = utilslib.normxcorr_fftw
func.argtypes = argtypes
func.restype = restype
# Generate a template mask
used_chans = ~np.isnan(templates).any(axis=1)
template_length = templates.shape[1]
stream_length = len(stream)
n_templates = templates.shape[0]
fftshape = next_fast_len(template_length + stream_length - 1)
# # Normalize and flip the templates
norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (
templates.std(axis=-1, keepdims=True) * template_length))
norm = np.nan_to_num(norm)
ccc = np.empty((n_templates, stream_length - template_length + 1),
np.float32).flatten(order='C')
ret = func(
np.ascontiguousarray(norm.flatten(order='C'), np.float32),
template_length, n_templates,
np.ascontiguousarray(stream, np.float32), stream_length,
np.ascontiguousarray(ccc, np.float32), fftshape)
if ret != 0:
print(ret)
raise MemoryError()
ccc = ccc.reshape((n_templates, stream_length - template_length + 1))
for i in range(n_templates):
if not used_chans[i]:
ccc[i] = np.zeros(stream_length - template_length + 1)
ccc[np.isnan(ccc)] = 0.0
if np.any(np.abs(ccc) > 1.01):
print('Normalisation error in C code')
print(ccc.max())
print(ccc.min())
raise MemoryError()
ccc[ccc > 1.0] = 1.0
ccc[ccc < -1.0] = -1.0
for i in range(len(pads)):
ccc[i] = np.append(ccc[i], np.zeros(pads[i]))[pads[i]:]
return ccc, used_chans
# fftw_normxcorr cannot be fun using standard multiprocess/stream functions
# register all of them to point to this function
@fftw_normxcorr.register('multiprocess')
@fftw_normxcorr.register('stream_xcorr')
@fftw_normxcorr.register('multithread')
@fftw_normxcorr.register('concurrent')
def _fftw_stream_xcorr(templates, stream, *args, **kwargs):
"""
Apply fftw normxcorr routine concurrently.
:type templates: list
:param templates:
A list of templates, where each one should be an obspy.Stream object
containing multiple traces of seismic data and the relevant header
information.
:type stream: obspy.core.stream.Stream
:param stream:
A single Stream object to be correlated with the templates.
:returns:
New list of :class:`numpy.ndarray` objects. These will contain
the correlation sums for each template for this day of data.
:rtype: list
:returns:
list of ints as number of channels used for each cross-correlation.
:rtype: list
:returns:
list of list of tuples of station, channel for all cross-correlations.
:rtype: list
"""
chans = [[] for _i in range(len(templates))]
array_dict_tuple = _get_array_dicts(templates, stream)
stream_dict, template_dict, pad_dict, seed_ids = array_dict_tuple
assert set(seed_ids)
xcorrs, tr_chans = fftw_multi_normxcorr(
template_array=template_dict, stream_array=stream_dict,
pad_array=pad_dict, seed_ids=seed_ids)
cccsums = np.sum(xcorrs, axis=0)
no_chans = np.sum(np.array(tr_chans).astype(np.int), axis=0)
for seed_id, tr_chan in zip(seed_ids, tr_chans):
for chan, state in zip(chans, tr_chan):
if state:
chan.append((seed_id.split('.')[1],
seed_id.split('.')[-1].split('_')[0]))
return cccsums, no_chans, chans
[docs]def fftw_multi_normxcorr(template_array, stream_array, pad_array, seed_ids):
"""
Use a C loop rather than a Python loop - in some cases this will be fast.
:type template_array: dict
:param template_array:
:type stream_array: dict
:param stream_array:
:type pad_array: dict
:param pad_array:
:type seed_ids: list
:param seed_ids:
rtype: np.ndarray, list
:return: 3D Array of cross-correlations and list of used channels.
"""
utilslib = _load_cdll('libutils')
utilslib.multi_normxcorr_fftw.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int, ctypes.c_int, ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, ndim=1,
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int]
utilslib.multi_normxcorr_fftw.restype = ctypes.c_int
'''
Arguments are:
templates (stacked [ch_1-t_1, ch_1-t_2, ..., ch_2-t_1, ch_2-t_2, ...])
number of templates
template length
number of channels
image (stacked [ch_1, ch_2, ..., ch_n])
image length
cross-correlations (stacked as per image)
fft-length
'''
used_chans = []
template_len = template_array[seed_ids[0]].shape[1]
for seed_id in seed_ids:
used_chans.append(~np.isnan(template_array[seed_id]).any(axis=1))
template_array[seed_id] = (
(template_array[seed_id] -
template_array[seed_id].mean(axis=-1, keepdims=True)) / (
template_array[seed_id].std(axis=-1, keepdims=True) *
template_len))
template_array[seed_id] = np.nan_to_num(template_array[seed_id])
n_channels = len(seed_ids)
n_templates = template_array[seed_ids[0]].shape[0]
image_len = stream_array[seed_ids[0]].shape[0]
fft_len = next_fast_len(template_len + image_len - 1)
template_list = [template_array[x] for x in seed_ids]
stream_list = [stream_array[x] for x in seed_ids]
template_array = np.array(template_list).flatten(order='C')
stream_array = np.array(stream_list).flatten(order='C')
cccs = np.empty((n_channels, n_templates, image_len - template_len + 1),
np.float32).flatten(order='C')
ret = utilslib.multi_normxcorr_fftw(
template_array, n_templates, template_len, n_channels, stream_array,
image_len, cccs, fft_len)
if ret != 0:
raise MemoryError()
cccs = cccs.reshape((n_channels, n_templates,
image_len - template_len + 1))
for j in range(n_channels):
for i in range(n_templates):
if not used_chans[j][i]:
cccs[j][i] = np.zeros(image_len - template_len + 1)
cccs[np.isnan(cccs)] = 0.0
if np.any(np.abs(cccs) > 1.01):
print('Normalisation error in C code')
print(cccs.max())
print(cccs.min())
raise MemoryError()
cccs[cccs > 1.0] = 1.0
cccs[cccs < -1.0] = -1.0
for j, seed_id in enumerate(seed_ids):
for i in range(len(pad_array[seed_id])):
cccs[j][i] = np.append(
cccs[j][i],
np.zeros(pad_array[seed_id][i]))[pad_array[seed_id][i]:]
cccs = cccs.reshape(n_channels, n_templates, image_len - template_len + 1)
return cccs, used_chans
# ------------------------------- stream_xcorr functions
[docs]def get_stream_xcorr(name_or_func=None, concurrency=None):
"""
Return a function for performing normalized cross correlation on lists of
streams.
:param name_or_func: Either a name of a registered function or a callable
that implements the standard array_normxcorr signature.
:param concurrency:
Optional concurrency strategy, options are:
multithread - use a threadpool for concurrency
multiprocess - use a process pool for concurrency
concurrent - use a customized concurrency stragegy for the function,
if not defined threading will be used
:return: A callable with the interface of stream_normxcorr
"""
func = _get_registerd_func(name_or_func)
concur = concurrency or 'stream_xcorr'
if not hasattr(func, concur):
msg = '%s does not support concurrency %s' % (func.__name__, concur)
raise ValueError(msg)
return getattr(func, concur)
# --------------------------- stream prep functions
def _get_array_dicts(templates, stream, copy_streams=True):
""" prepare templates and stream, return dicts """
# Do some reshaping
# init empty structures for data storage
template_dict = {}
stream_dict = {}
pad_dict = {}
t_starts = []
stream.sort(['network', 'station', 'location', 'channel'])
for template in templates:
template.sort(['network', 'station', 'location', 'channel'])
t_starts.append(min([tr.stats.starttime for tr in template]))
# get seed ids, make sure these are collected on sorted streams
seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])]
# pull common channels out of streams and templates and put in dicts
for i, seed_id in enumerate(seed_ids):
temps_with_seed = [template[i].data for template in templates]
t_ar = np.array(temps_with_seed).astype(np.float32)
template_dict.update({seed_id: t_ar})
stream_dict.update(
{seed_id: stream.select(
id=seed_id.split('_')[0])[0].data.astype(np.float32)})
pad_list = [
int(round(template[i].stats.sampling_rate *
(template[i].stats.starttime - t_starts[j])))
for j, template in zip(range(len(templates)), templates)]
pad_dict.update({seed_id: pad_list})
return stream_dict, template_dict, pad_dict, seed_ids
if __name__ == '__main__':
import doctest
doctest.testmod()