"""Functions to find peaks in data above a certain threshold.
:copyright:
EQcorrscan developers.
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
import ctypes
import random
import logging
import numpy as np
from multiprocessing import Pool, cpu_count
from scipy import ndimage
from future.utils import native_str
from eqcorrscan.utils.correlate import pool_boy
from eqcorrscan.utils.libnames import _load_cdll
from eqcorrscan.utils.clustering import dist_mat_km
Logger = logging.getLogger(__name__)
def is_prime(number):
"""
Function to test primality of a number. Function lifted from online
resource:
http://www.codeproject.com/Articles/691200/Primality-test-algorithms-Prime-test-The-fastest-w
This function is distributed under a separate licence:
This article, along with any associated source code and files, is \
licensed under The Code Project Open License (CPOL)
:type number: int
:param number: Integer to test for primality
:returns: bool
>>> is_prime(4)
False
>>> is_prime(3)
True
"""
''' if number != 1 '''
if number > 1:
''' repeat the test few times '''
for time in range(3):
''' Draw a RANDOM number in range of number ( Z_number ) '''
randomNumber = random.randint(2, number - 1)
''' Test if a^(n-1) = 1 mod n '''
if pow(randomNumber, number - 1, number) != 1:
return False
return True
else:
''' case number == 1 '''
return False
[docs]def find_peaks_compiled(arr, thresh, trig_int, full_peaks=False):
"""
Determine peaks in an array of data above a certain threshold.
:type arr: numpy.ndarray
:param arr: 1-D numpy array is required
:type thresh: float
:param thresh:
The threshold below which will be considered noise and peaks will
not be found in.
:type trig_int: int
:param trig_int:
The minimum difference in samples between triggers, if multiple
peaks within this window this code will find the highest.
:type full_peaks: bool
:param full_peaks:
If True, will decluster within data-sections above the threshold,
rather than just taking the peak within that section. This will take
more time. This defaults to False for match_filter.
:return: peaks: Lists of tuples of peak values and locations.
:rtype: list
"""
if not np.any(np.abs(arr) > thresh):
# Fast fail
return []
if not full_peaks:
peak_vals, peak_indices = _find_peaks_c(array=arr, threshold=thresh)
else:
peak_vals = arr
peak_indices = np.arange(arr.shape[0])
if len(peak_vals) > 0:
peaks = decluster(
peaks=np.array(peak_vals), index=np.array(peak_indices),
trig_int=trig_int + 1, threshold=thresh)
peaks = sorted(peaks, key=lambda peak: peak[1], reverse=False)
return peaks
else:
return []
[docs]def find_peaks2_short(arr, thresh, trig_int, full_peaks=False):
"""
Determine peaks in an array of data above a certain threshold.
Uses a mask to remove data below threshold and finds peaks in what is left.
:type arr: numpy.ndarray
:param arr: 1-D numpy array is required
:type thresh: float
:param thresh:
The threshold below which will be considered noise and peaks will
not be found in.
:type trig_int: int
:param trig_int:
The minimum difference in samples between triggers, if multiple
peaks within this window this code will find the highest.
:type full_peaks: bool
:param full_peaks:
If True will by decluster within
data-sections above the threshold, rather than just taking the peak
within that section. This will take more time. This defaults to False
for match_filter.
:return: peaks: Lists of tuples of peak values and locations.
:rtype: list
>>> import numpy as np
>>> arr = np.random.randn(100)
>>> threshold = 10
>>> arr[40] = 20
>>> arr[60] = 100
>>> find_peaks2_short(arr, threshold, 3)
[(20.0, 40), (100.0, 60)]
"""
# Set everything below the threshold to zero
image = np.copy(arr)
Logger.debug("Threshold: {0}\tMax: {1}".format(thresh, max(image)))
image[np.abs(image) < thresh] = 0
if len(image[np.abs(image) > thresh]) == 0:
Logger.debug("No values over threshold {0}".format(thresh))
return []
if np.all(np.abs(arr) > thresh):
full_peaks = True
Logger.debug('Found {0} samples above the threshold'.format(
len(image[image > thresh])))
initial_peaks = []
# Find the peaks
labeled_image, number_of_objects = ndimage.label(image)
peak_slices = ndimage.find_objects(labeled_image)
for peak_slice in peak_slices:
window = arr[peak_slice[0].start: peak_slice[0].stop]
if peak_slice[0].stop - peak_slice[0].start >= trig_int and full_peaks:
window_peaks, window_peak_indexes = ([], [])
for i in np.arange(peak_slice[0].start, peak_slice[0].stop):
if i == peak_slice[0].start:
prev_value = 0
else:
prev_value = arr[i - 1]
if i == peak_slice[0].stop - 1:
next_value = 0
else:
next_value = arr[i + 1]
# Check for consistent sign - either both greater or
# both smaller.
if (next_value - arr[i]) * (prev_value - arr[i]) > 0:
window_peaks.append(arr[i])
window_peak_indexes.append(i)
peaks = decluster(
peaks=np.array(window_peaks), trig_int=trig_int + 1,
index=np.array(window_peak_indexes))
else:
peaks = [(window[np.argmax(abs(window))],
int(peak_slice[0].start + np.argmax(abs(window))))]
initial_peaks.extend(peaks)
peaks = decluster(peaks=np.array(list(zip(*initial_peaks))[0]),
index=np.array(list(zip(*initial_peaks))[1]),
trig_int=trig_int + 1)
if initial_peaks:
peaks = sorted(peaks, key=lambda time: time[1], reverse=False)
return peaks
else:
Logger.info('No peaks for you!')
return []
[docs]def multi_find_peaks(arr, thresh, trig_int, parallel=True, full_peaks=False,
cores=None, internal_func=find_peaks_compiled):
"""
Wrapper for find-peaks for multiple arrays.
:type arr: numpy.ndarray
:param arr: 2-D numpy array is required
:type thresh: list
:param thresh:
The threshold below which will be considered noise and peaks will not
be found in. One threshold per array.
:type trig_int: int
:param trig_int:
The minimum difference in samples between triggers, if multiple
peaks within this window this code will find the highest.
:type parallel: bool
:param parallel:
Whether to compute in parallel or not - will use multiprocessing if not
using the compiled internal_func
:type full_peaks: bool
:param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short`
:type cores: int
:param cores:
Maximum number of processes to spin up for parallel peak-finding
:type internal_func: callable
:param internal_func:
Function to use for peak finding - defaults to the compiled version.
:returns:
List of list of tuples of (peak, index) in same order as input arrays
"""
peaks = []
if not parallel:
for sub_arr, arr_thresh in zip(arr, thresh):
peaks.append(internal_func(
arr=sub_arr, thresh=arr_thresh, trig_int=trig_int,
full_peaks=full_peaks))
else:
if cores is None:
cores = min(arr.shape[0], cpu_count())
if internal_func.__name__ != 'find_peaks_compiled':
with pool_boy(Pool=Pool, traces=arr.shape[0], cores=cores) as pool:
params = ((sub_arr, arr_thresh, trig_int, full_peaks)
for sub_arr, arr_thresh in zip(arr, thresh))
results = [
pool.apply_async(internal_func, param) for param in params]
peaks = [res.get() for res in results]
else:
peaks = _multi_find_peaks_compiled(
arr, thresh, trig_int, full_peaks=full_peaks, cores=cores)
return peaks
def _multi_find_peaks_compiled(arrays, thresholds, trig_int, full_peaks,
cores):
"""
Determine peaks in an array or arrays of data above a certain threshold.
:type arrays: numpy.ndarray
:param arrays: 2-D numpy array is required
:type thresholds: list
:param thresholds:
Minimum value for peaks.
:type trig_int: int
:param trig_int:
The minimum difference in samples between triggers, if multiple
peaks within this window this code will find the highest.
:type full_peaks: bool
:param full_peaks:
If True, will decluster within data-sections above the threshold,
rather than just taking the peak within that section. This will take
more time. This defaults to False for match_filter.
:type cores: int
:param cores: Number of threads to parallel across
:return: peaks: List of List of tuples of peak values and locations.
:rtype: list
"""
if not full_peaks:
peak_vals, peak_indices = _multi_find_peaks_c(
arrays=arrays, thresholds=thresholds, threads=cores)
# Remove empty arrays
peak_mapper = {}
map_index = 0
_peak_vals = []
_peak_indices = []
_thresholds = []
for i in range(arrays.shape[0]):
if len(peak_vals[i]) > 0:
peak_mapper.update({i: map_index})
_peak_vals.append(peak_vals[i])
_peak_indices.append(peak_indices[i])
_thresholds.append(thresholds[i])
map_index += 1
peak_vals = _peak_vals
peak_indices = _peak_indices
thresholds = _thresholds
else:
peak_vals = arrays
peak_indices = [np.arange(arr.shape[0]) for arr in arrays]
peak_mapper = {i: i for i in range(len(peak_indices))}
if len(peak_indices) > 0:
peaks = _multi_decluster(
peaks=peak_vals, indices=peak_indices, trig_int=trig_int,
thresholds=thresholds, cores=cores)
peaks = [sorted(_peaks, key=lambda peak: peak[1], reverse=False)
for _peaks in peaks]
out_peaks = []
for i in range(arrays.shape[0]):
if i in peak_mapper.keys():
out_peaks.append(peaks[peak_mapper[i]])
else:
out_peaks.append([])
return out_peaks
def _multi_decluster(peaks, indices, trig_int, thresholds, cores):
"""
Decluster peaks based on an enforced minimum separation.
Only works when peaks and indices are all the same shape.
:type peaks: list
:param peaks: list of arrays of peak values
:type indices: list
:param indices: list of arrays of locations of peaks
:type trig_int: int
:param trig_int: Minimum trigger interval in samples
:type thresholds: list
:param thresholds: list of float of threshold values
:return: list of lists of tuples of (value, sample)
"""
utilslib = _load_cdll('libutils')
lengths = np.array([peak.shape[0] for peak in peaks], dtype=int)
trig_int = int(trig_int)
n = np.int32(len(peaks))
cores = min(cores, n)
total_length = lengths.sum()
max_indexes = [_indices.max() for _indices in indices]
max_index = max(max_indexes)
for var in [trig_int, lengths.max(), max_index]:
if var == ctypes.c_long(var).value:
long_type = ctypes.c_long
func = utilslib.multi_decluster
elif var == ctypes.c_longlong(var).value:
long_type = ctypes.c_longlong
func = utilslib.multi_decluster_ll
else:
# Note, could use numpy.gcd to try and find greatest common
# divisor and make numbers smaller
raise OverflowError("Maximum index larger than internal long long")
func.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, shape=(total_length,),
flags=native_str('C_CONTIGUOUS')),
np.ctypeslib.ndpointer(dtype=long_type, shape=(total_length,),
flags=native_str('C_CONTIGUOUS')),
np.ctypeslib.ndpointer(dtype=long_type, shape=(n,),
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, shape=(n,),
flags=native_str('C_CONTIGUOUS')),
long_type,
np.ctypeslib.ndpointer(dtype=np.uint32, shape=(total_length,),
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int]
func.restype = ctypes.c_int
peaks_sorted = np.empty(total_length, dtype=np.float32)
indices_sorted = np.empty_like(peaks_sorted, dtype=np.float32)
# TODO: When doing full decluster from match-filter, all lengths will be
# TODO: the same - would be more efficient to use numpy sort on 2D matrix
start_ind = 0
end_ind = 0
for _peaks, _indices, length in zip(peaks, indices, lengths):
end_ind += length
sorted_indices = np.abs(_peaks).argsort()
peaks_sorted[start_ind: end_ind] = _peaks[sorted_indices[::-1]]
indices_sorted[start_ind: end_ind] = _indices[sorted_indices[::-1]]
start_ind += length
peaks_sorted = np.ascontiguousarray(peaks_sorted, dtype=np.float32)
indices_sorted = np.ascontiguousarray(
indices_sorted, dtype=long_type)
lengths = np.ascontiguousarray(lengths, dtype=long_type)
thresholds = np.ascontiguousarray(thresholds, dtype=np.float32)
out = np.zeros(total_length, dtype=np.uint32)
ret = func(
peaks_sorted, indices_sorted, lengths, np.int32(n), thresholds,
long_type(trig_int + 1), out, np.int32(cores))
if ret != 0:
raise MemoryError("Issue with c-routine, returned %i" % ret)
peaks_out = []
slice_start = 0
for length in lengths:
slice_end = slice_start + length
out_mask = out[slice_start: slice_end].astype(bool)
declustered_peaks = peaks_sorted[slice_start: slice_end][out_mask]
declustered_indices = indices_sorted[slice_start: slice_end][out_mask]
peaks_out.append(list(zip(declustered_peaks, declustered_indices)))
slice_start = slice_end
return peaks_out
def decluster_distance_time(peaks, index, trig_int, catalog,
hypocentral_separation, threshold=0):
"""
Decluster based on time between peaks, and distance between events.
Peaks, index and catalog must all be sorted the same way, e.g. peak[i]
corresponds to index[i] and catalog[i]. Peaks that are within the time
threshold of one-another, but correspond to events separated by more than
the hypocentral_separation threshold will not be removed.
:type peaks: np.array
:param peaks: array of peak values
:type index: np.ndarray
:param index: locations of peaks
:type trig_int: int
:param trig_int: Minimum trigger interval in samples
:type catalog: obspy.core.event.Catalog
:param catalog:
Catalog of events with origins to use to measure inter-event distances
:type hypocentral_separation: float
:param hypocentral_separation:
Maximum inter-event distance to decluster over in km
:type threshold: float
:param threshold: Minimum absolute peak value to retain it
:return: list of tuples of (value, sample)
"""
utilslib = _load_cdll('libutils')
length = peaks.shape[0]
trig_int = int(trig_int)
for var in [index.max(), trig_int]:
if var == ctypes.c_long(var).value:
long_type = ctypes.c_long
func = utilslib.decluster_dist_time
elif var == ctypes.c_longlong(var).value:
long_type = ctypes.c_longlong
func = utilslib.decluster_dist_time_ll
else:
raise OverflowError("Maximum index larger than internal long long")
func.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, shape=(length,),
flags=native_str('C_CONTIGUOUS')),
np.ctypeslib.ndpointer(dtype=long_type, shape=(length,),
flags=native_str('C_CONTIGUOUS')),
np.ctypeslib.ndpointer(dtype=np.float32, shape=(length * length,),
flags=native_str('C_CONTIGUOUS')),
long_type, ctypes.c_float, long_type, ctypes.c_float,
np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length,),
flags=native_str('C_CONTIGUOUS'))]
func.restype = ctypes.c_int
sorted_inds = np.abs(peaks).argsort()
# Sort everything in the same way.
arr = peaks[sorted_inds[::-1]]
inds = index[sorted_inds[::-1]]
sorted_events = [catalog[i] for i in sorted_inds[::-1]]
distance_matrix = dist_mat_km(catalog=sorted_events)
arr = np.ascontiguousarray(arr, dtype=np.float32)
inds = np.ascontiguousarray(inds, dtype=long_type)
distance_matrix = np.ascontiguousarray(
distance_matrix.flatten(order="C"), dtype=np.float32)
out = np.zeros(len(arr), dtype=np.uint32)
ret = func(
arr, inds, distance_matrix, long_type(length), np.float32(threshold),
long_type(trig_int), hypocentral_separation, out)
if ret != 0:
raise MemoryError("Issue with c-routine, returned %i" % ret)
peaks_out = list(zip(arr[out.astype(bool)], inds[out.astype(bool)]))
return peaks_out
[docs]def decluster(peaks, index, trig_int, threshold=0):
"""
Decluster peaks based on an enforced minimum separation.
:type peaks: np.array
:param peaks: array of peak values
:type index: np.ndarray
:param index: locations of peaks
:type trig_int: int
:param trig_int: Minimum trigger interval in samples
:type threshold: float
:param threshold: Minimum absolute peak value to retain it.
:return: list of tuples of (value, sample)
"""
utilslib = _load_cdll('libutils')
length = peaks.shape[0]
trig_int = int(trig_int)
for var in [index.max(), trig_int]:
if var == ctypes.c_long(var).value:
long_type = ctypes.c_long
func = utilslib.decluster
elif var == ctypes.c_longlong(var).value:
long_type = ctypes.c_longlong
func = utilslib.decluster_ll
else:
raise OverflowError("Maximum index larger than internal long long")
func.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, shape=(length,),
flags=native_str('C_CONTIGUOUS')),
np.ctypeslib.ndpointer(dtype=long_type, shape=(length,),
flags=native_str('C_CONTIGUOUS')),
long_type, ctypes.c_float, long_type,
np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length,),
flags=native_str('C_CONTIGUOUS'))]
func.restype = ctypes.c_int
sorted_inds = np.abs(peaks).argsort()
arr = peaks[sorted_inds[::-1]]
inds = index[sorted_inds[::-1]]
arr = np.ascontiguousarray(arr, dtype=np.float32)
inds = np.ascontiguousarray(inds, dtype=long_type)
out = np.zeros(len(arr), dtype=np.uint32)
ret = func(
arr, inds, long_type(length), np.float32(threshold),
long_type(trig_int), out)
if ret != 0:
raise MemoryError("Issue with c-routine, returned %i" % ret)
peaks_out = list(zip(arr[out.astype(bool)], inds[out.astype(bool)]))
return peaks_out
def _find_peaks_c(array, threshold):
"""
Use a C func to find peaks in the array.
"""
utilslib = _load_cdll('libutils')
length = array.shape[0]
utilslib.find_peaks.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, shape=(length, ),
flags=native_str('C_CONTIGUOUS')),
ctypes.c_long, ctypes.c_float,
np.ctypeslib.ndpointer(dtype=np.uint32, shape=(length, ),
flags=native_str('C_CONTIGUOUS'))]
utilslib.find_peaks.restype = ctypes.c_int
arr = np.ascontiguousarray(array, np.float32)
out = np.ascontiguousarray(np.zeros((length, ), dtype=np.uint32))
ret = utilslib.find_peaks(arr, ctypes.c_long(length), threshold, out)
if ret != 0:
raise MemoryError("Internal error")
peaks_locations = np.nonzero(out)
return array[peaks_locations], peaks_locations[0]
def _multi_find_peaks_c(arrays, thresholds, threads):
"""
Wrapper for multi-find peaks C-func
"""
utilslib = _load_cdll('libutils')
length = arrays.shape[1]
n = np.int32(arrays.shape[0])
thresholds = np.ascontiguousarray(thresholds, np.float32)
arr = np.ascontiguousarray(arrays.flatten(), np.float32)
utilslib.multi_find_peaks.argtypes = [
np.ctypeslib.ndpointer(dtype=np.float32, shape=(n * length,),
flags=native_str('C_CONTIGUOUS')),
ctypes.c_long, ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.float32, shape=(n, ),
flags=native_str('C_CONTIGUOUS')),
ctypes.c_int,
np.ctypeslib.ndpointer(dtype=np.uint32, shape=(n * length, ),
flags=native_str('C_CONTIGUOUS'))]
utilslib.multi_find_peaks.restype = ctypes.c_int
out = np.ascontiguousarray(np.zeros((n * length, ), dtype=np.uint32))
ret = utilslib.multi_find_peaks(
arr, ctypes.c_long(length), n, thresholds, threads, out)
# Copy data to avoid farking the users data
if ret != 0:
raise MemoryError("Internal error")
peaks = []
peak_locations = []
out = out.reshape(n, length)
for i in range(n):
peak_locs = np.nonzero(out[i])
peaks.append(arrays[i][peak_locs])
peak_locations.append(peak_locs[0])
return peaks, peak_locations
[docs]def coin_trig(peaks, stachans, samp_rate, moveout, min_trig, trig_int):
"""
Find network coincidence triggers within peaks of detection statistics.
Useful for finding network detections from sets of detections on individual
stations.
:type peaks: list
:param peaks: List of lists of tuples of (peak, index) for each \
station-channel. Index should be in samples.
:type stachans: list
:param stachans: List of tuples of (station, channel) in the order of \
peaks.
:type samp_rate: float
:param samp_rate: Sampling rate in Hz
:type moveout: float
:param moveout: Allowable network moveout in seconds.
:type min_trig: int
:param min_trig: Minimum station-channels required to declare a trigger.
:type trig_int: float
:param trig_int:
Minimum allowable time between network triggers in seconds.
:return:
List of tuples of (peak, index), for the earliest detected station.
:rtype: list
>>> peaks = [[(0.5, 100), (0.3, 800)], [(0.4, 120), (0.7, 850)]]
>>> triggers = coin_trig(peaks, [('a', 'Z'), ('b', 'Z')], 10, 3, 2, 1)
>>> print(triggers)
[(0.45, 100)]
"""
triggers = []
for stachan, _peaks in zip(stachans, peaks):
for peak in _peaks:
trigger = (peak[1], peak[0], '.'.join(stachan))
triggers.append(trigger)
coincidence_triggers = []
for i, master in enumerate(triggers):
slaves = triggers[i + 1:]
coincidence = 1
trig_time = master[0]
trig_val = master[1]
for slave in slaves:
if abs(slave[0] - master[0]) <= (moveout * samp_rate) and \
slave[2] != master[2]:
coincidence += 1
if slave[0] < master[0]:
trig_time = slave[0]
trig_val += slave[1]
if coincidence >= min_trig:
coincidence_triggers.append((trig_val / coincidence,
trig_time))
# Sort by trigger-value, largest to smallest - remove duplicate detections
if coincidence_triggers:
coincidence_triggers.sort(key=lambda tup: tup[0], reverse=True)
output = [coincidence_triggers[0]]
for coincidence_trigger in coincidence_triggers[1:]:
add = True
for peak in output:
# If the event occurs within the trig_int time then do not add
# it, and break out of the inner loop.
if abs(coincidence_trigger[1] - peak[1]) < (trig_int *
samp_rate):
add = False
break
if add:
output.append((coincidence_trigger[0],
coincidence_trigger[1]))
output.sort(key=lambda tup: tup[1])
return output
else:
return []
if __name__ == "__main__":
import doctest
doctest.testmod()