#!/usr/bin/python
"""
Functions for network matched-filter detection of seismic data.
Designed to cross-correlate templates generated by template_gen function
with data and output the detections.
:copyright:
EQcorrscan developers.
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import ast
import contextlib
import copy
import getpass
import glob
import os
import re
import shutil
import tarfile
import tempfile
import time
import warnings
from collections import Counter
from multiprocessing import cpu_count
from os.path import join
import numpy as np
from obspy import Trace, Catalog, UTCDateTime, Stream, read, read_events
from obspy.core.event import Comment, WaveformStreamID
from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier
from eqcorrscan.core import template_gen
from eqcorrscan.core.lag_calc import lag_calc
from eqcorrscan.utils.catalog_utils import _get_origin
from eqcorrscan.utils.correlate import get_array_xcorr, get_stream_xcorr
from eqcorrscan.utils.debug_log import debug_print
from eqcorrscan.utils.findpeaks import find_peaks2_short, decluster
from eqcorrscan.utils.plotting import cumulative_detections
from eqcorrscan.utils.pre_processing import dayproc, shortproc
@contextlib.contextmanager
def temporary_directory():
""" make a temporary directory, yeild its name, cleanup on exit """
dir_name = tempfile.mkdtemp()
yield dir_name
if os.path.exists(dir_name):
shutil.rmtree(dir_name)
def _spike_test(stream, percent=0.99, multiplier=1e6):
"""
Check for very large spikes in data and raise an error if found.
:param stream: Stream to look for spikes in.
:type stream: :class:`obspy.core.stream.Stream`
:param percent: Percentage as a decimal to calculate range for.
:type percent: float
:param multiplier: Multiplier of range to define a spike.
:type multiplier: float
"""
for tr in stream:
if (tr.data > 2 * np.max(np.sort(
np.abs(tr.data))[0:int(percent * len(tr.data))]
) * multiplier).sum() > 0:
msg = ('Spikes above ' + str(multiplier) +
' of the range of ' + str(percent) +
' of the data present, check. \n ' +
'This would otherwise likely result in an issue during ' +
'FFT prior to cross-correlation.\n' +
'If you think this spike is real please report ' +
'this as a bug.')
raise MatchFilterError(msg)
class MatchFilterError(Exception):
"""
Default error for match-filter errors.
"""
def __init__(self, value):
"""
Raise error.
.. rubric:: Example
>>> MatchFilterError('This raises an error')
This raises an error
"""
self.value = value
def __repr__(self):
"""
Print the value of the error.
.. rubric:: Example
>>> print(MatchFilterError('Error').__repr__())
Error
"""
return self.value
def __str__(self):
"""
Print the error in a pretty way.
.. rubric:: Example
>>> print(MatchFilterError('Error'))
Error
"""
return self.value
[docs]class Party(object):
"""
Container for multiple Family objects.
"""
[docs] def __init__(self, families=None):
"""Instantiate the Party object."""
self.families = []
if isinstance(families, Family):
families = [families]
if families:
self.families.extend(families)
def __repr__(self):
"""
Print short info about the Party.
:return: str
.. rubric:: Example
>>> print(Party())
Party of 0 Families.
"""
print_str = ('Party of %s Families.' % len(self.families))
return print_str
def __iadd__(self, other):
"""
Method for in-place addition '+='. Uses the Party.__add__() method.
:type other: `Party`
:param other: Another party to merge with the current family.
:return: Works in place on self.
.. rubric:: Example
>>> party_a = Party(families=[Family(template=Template(name='a'),
... detections=[])])
>>> party_b = Party(families=[Family(template=Template(name='b'),
... detections=[])])
>>> party_a += party_b
>>> print(party_a)
Party of 2 Families.
"""
return self.__add__(other)
def __add__(self, other):
"""
Method for addition '+'.
:type other: `Party` or `Family`
:param other: Another party to merge with the current family.
:return: Works in place on self.
.. Note:: Works in place on party, will alter this original party.
.. rubric:: Example
Addition of two parties together:
>>> party_a = Party(families=[Family(template=Template(name='a'),
... detections=[])])
>>> party_b = Party(families=[Family(template=Template(name='b'),
... detections=[])])
>>> party_c = party_a + party_b
>>> print(party_c)
Party of 2 Families.
Addition of a family to a party:
>>> party_a = Party(families=[Family(template=Template(name='a'),
... detections=[])])
>>> family_b = Family(template=Template(name='b'), detections=[])
>>> party_c = party_a + family_b
>>> print(party_c)
Party of 2 Families.
Addition of a party with some families using the same templates:
>>> party_a = Party(families=[Family(template=Template(name='a'),
... detections=[])])
>>> party_b = Party(families=[Family(template=Template(name='a'),
... detections=[])])
>>> party_c = party_a + party_b
>>> print(party_c)
Party of 1 Families.
Addition of non Family or Party objects is not allows:
>>> party_a = Party(families=[Family(template=Template(name='a'))])
>>> misc = 1.0
>>> party_c = party_a + misc # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
NotImplementedError: Ambiguous add, only allowed Party or Family \
additions
"""
if isinstance(other, Family):
families = [other]
elif isinstance(other, Party):
families = other.families
else:
raise NotImplementedError(
'Ambiguous add, only allowed Party or Family additions.')
for oth_fam in families:
added = False
for fam in self.families:
if fam.template == oth_fam.template:
fam += oth_fam
added = True
if not added:
self.families.append(oth_fam)
return self
def __eq__(self, other):
"""
Equality testing, rich comparison '=='.
:param other: Another object.
:return: bool
.. rubric:: Example
Compare equal Parties:
>>> party_a = Party(families=[Family(template=Template(name='a'))])
>>> party_b = Party(families=[Family(template=Template(name='a'))])
>>> party_a == party_b
True
Compare Parties with different templates:
>>> party_a = Party(families=[Family(template=Template(name='a'))])
>>> party_b = Party(families=[Family(template=Template(name='b'))])
>>> party_a == party_b
False
Compare a Party with a Family:
>>> party = Party(families=[Family(template=Template(name='a'))])
>>> family = Family(template=Template(name='a'))
>>> party == family
False
"""
if not isinstance(other, Party):
return False
for family, oth_fam in zip(self.sort().families,
other.sort().families):
if family != oth_fam:
return False
else:
return True
def __ne__(self, other):
"""
Rich comparison operator '!='.
:param other: other object
:return: bool
.. rubric:: Example
Compare two equal Parties:
>>> party_a = Party(families=[Family(template=Template(name='a'))])
>>> party_b = Party(families=[Family(template=Template(name='a'))])
>>> party_a != party_b
False
"""
return not self.__eq__(other)
def __getitem__(self, index):
"""
Get families from the Party. Can accept either an index or slice.
:param index: Family number or name, or slice.
:return: Party (if a slice is given) or a single Family
.. rubric:: Example
Extract a single family:
>>> party = Party(families=[Family(template=Template(name='a')),
... Family(template=Template(name='b')),
... Family(template=Template(name='c'))])
>>> party[1]
Family of 0 detections from template b
Extract a list of families by giving a slice:
>>> party = Party(families=[Family(template=Template(name='a')),
... Family(template=Template(name='b')),
... Family(template=Template(name='c'))])
>>> party[1:]
Party of 2 Families.
Extract a single family by template name:
>>> party = Party(families=[Family(template=Template(name='a')),
... Family(template=Template(name='b')),
... Family(template=Template(name='c'))])
>>> party['b']
Family of 0 detections from template b
"""
if len(self.families) == 0:
return None
if isinstance(index, slice):
return self.__class__(families=self.families.__getitem__(index))
elif isinstance(index, int):
return self.families.__getitem__(index)
else:
_index = [i for i, family in enumerate(self.families)
if family.template.name == index]
try:
return self.families.__getitem__(_index[0])
except IndexError:
warnings.warn('Family: %s not in party' % index)
return []
def __len__(self):
"""
Get total number of detections in Party.
:return: length, int
.. rubric:: Example
>>> party = Party(families=[Family(template=Template(name='a')),
... Family(template=Template(name='b'))])
>>> len(party)
0
"""
length = 0
for family in self.families:
length += len(family)
return length
[docs] def sort(self):
"""
Sort the families by template name.
.. rubric:: Example
>>> party = Party(families=[Family(template=Template(name='b')),
... Family(template=Template(name='a'))])
>>> party[0]
Family of 0 detections from template b
>>> party.sort()[0]
Family of 0 detections from template a
"""
self.families.sort(key=lambda x: x.template.name)
return self
def plot(self, plot_grouped=False):
"""
Plot the cumulative detections in time.
:type plot_grouped: bool
:param plot_grouped:
Whether to plot all families together (plot_grouped=True), or each
as a separate line.
.. Example::
>>> Party().read().plot(plot_grouped=True) # doctest: +SKIP
.. plot::
from eqcorrscan.core.match_filter import Party
Party().read().plot(plot_grouped=True)
"""
all_dets = []
for fam in self.families:
all_dets.extend(fam.detections)
cumulative_detections(detections=all_dets, plot_grouped=plot_grouped)
[docs] def rethreshold(self, new_threshold, new_threshold_type='MAD'):
"""
Remove detections from the Party that are below a new threshold.
Note, threshold can only be set higher.
.. Warning::
Works in place on Party.
:type new_threshold: float
:param new_threshold: New threshold level
:type new_threshold_type: str
:param new_threshold_type: Either 'MAD', 'absolute' or 'av_chan_corr'
.. rubric:: Example
Using the MAD threshold on detections made using the MAD threshold:
>>> party = Party().read()
>>> len(party)
4
>>> party = party.rethreshold(10.0)
>>> len(party)
4
>>> # Note that all detections are self detections
Using the absolute thresholding method on the same Party:
>>> party = Party().read().rethreshold(6.0, 'absolute')
>>> len(party)
1
Using the av_chan_corr method on the same Party:
>>> party = Party().read().rethreshold(0.9, 'av_chan_corr')
>>> len(party)
4
"""
for family in self.families:
for d in family.detections:
if new_threshold_type == 'MAD' and d.threshold_type == 'MAD':
new_thresh = (d.threshold /
d.threshold_input) * new_threshold
elif new_threshold_type == 'MAD' and d.threshold_type != 'MAD':
raise MatchFilterError(
'Cannot recalculate MAD level, '
'use another threshold type')
elif new_threshold_type == 'absolute':
new_thresh = new_threshold
elif new_threshold_type == 'av_chan_corr':
new_thresh = new_threshold * d.no_chans
else:
raise MatchFilterError(
'new_threshold_type %s is not recognised' %
str(new_threshold_type))
if d.detect_val < new_thresh:
family.detections.remove(d)
family.catalog = Catalog([d.event for d in family])
return self
[docs] def decluster(self, trig_int, timing='detect', metric='avg_cor'):
"""
De-cluster a Party of detections by enforcing a detection separation.
De-clustering occurs between events detected by different (or the same)
templates. If multiple detections occur within trig_int then the
preferred detection will be determined by the metric argument. This
can be either the average single-station correlation coefficient which
is calculated as Detection.detect_val / Detection.no_chans, or the
raw cross channel correlation sum which is simply Detection.detect_val.
:type trig_int: float
:param trig_int: Minimum detection separation in seconds.
:type metric: str
:param metric: What metric to sort peaks by. Either 'avg_cor' which
takes the single station average correlation or 'cor_sum' which
takes the total correlation sum across all channels.
:type timing: str
:param timing:
Either 'detect' or 'origin' to decluster based on either the
detection time or the origin time.
.. Warning::
Works in place on object, if you need to keep the original safe
then run this on a copy of the object!
.. rubric:: Example
>>> party = Party().read()
>>> len(party)
4
>>> declustered = party.decluster(20)
>>> len(party)
3
"""
all_detections = []
for fam in self.families:
all_detections.extend(fam.detections)
if timing == 'detect':
if metric == 'avg_cor':
detect_info = [(d.detect_time, d.detect_val / d.no_chans)
for d in all_detections]
elif metric == 'cor_sum':
detect_info = [(d.detect_time, d.detect_val)
for d in all_detections]
else:
raise MatchFilterError('metric is not cor_sum or avg_cor')
elif timing == 'origin':
if metric == 'avg_cor':
detect_info = [(_get_origin(d.event).time,
d.detect_val / d.no_chans)
for d in all_detections]
elif metric == 'cor_sum':
detect_info = [(_get_origin(d.event).time, d.detect_val)
for d in all_detections]
else:
raise MatchFilterError('metric is not cor_sum or avg_cor')
else:
raise MatchFilterError('timing is not detect or origin')
min_det = sorted([d[0] for d in detect_info])[0]
detect_info = [(d[1], _total_microsec(d[0].datetime, min_det.datetime))
for d in detect_info]
peaks_out, inds_out = decluster(
peaks=detect_info, trig_int=trig_int * 10 ** 6, return_ind=True)
# Trig_int must be converted from seconds to micro-seconds
declustered_detections = [all_detections[ind] for ind in inds_out]
# Convert this list into families
template_names = list(set([d.template_name
for d in declustered_detections]))
new_families = []
for template_name in template_names:
template = [fam.template for fam in self.families
if fam.template.name == template_name][0]
new_families.append(Family(
template=template,
detections=[d for d in declustered_detections
if d.template_name == template_name],
catalog=Catalog([d.event for d in declustered_detections
if d.template_name == template_name])))
self.families = new_families
return self
[docs] def copy(self):
"""
Returns a copy of the Party.
:return: Copy of party
.. rubric:: Example
>>> party = Party(families=[Family(template=Template(name='a'))])
>>> party_b = party.copy()
>>> party == party_b
True
"""
return copy.deepcopy(self)
[docs] def write(self, filename, format='tar'):
"""
Write Family out, select output format.
:type format: str
:param format:
One of either 'tar', 'csv', or any obspy supported
catalog output. See note below on formats
:type filename: str
:param filename: Path to write file to.
.. NOTE::
csv format will write out detection objects, all other
outputs will write the catalog. These cannot be rebuilt into
a Family object. The only format that can be read back into
Family objects is the 'tar' type.
.. NOTE::
We recommend writing to the 'tar' format, which will write out
all the template information (wavefiles as miniseed and metadata)
alongside the detections and store these in a tar archive. This
is readable by other programs and maintains all information
required for further study.
.. rubric:: Example
>>> party = Party().read()
>>> party.write('test_tar_write', format='tar')
Writing family 0
Writing family 1
Writing family 2
Writing family 3
Party of 4 Families.
>>> party.write('test_csv_write.csv', format='csv')
Party of 4 Families.
>>> party.write('test_quakeml.ml', format='quakeml')
Party of 4 Families.
"""
if format.lower() == 'csv':
if os.path.isfile(filename):
raise MatchFilterError(
'Will not overwrite existing file: %s' % filename)
for family in self.families:
for detection in family.detections:
detection.write(fname=filename, append=True)
elif format.lower() == 'tar':
if os.path.exists(filename):
raise IOError('Will not overwrite existing file: %s'
% filename)
# os.makedirs(filename)
with temporary_directory() as temp_dir:
Tribe([f.template for f in self.families]).write(
filename=temp_dir, compress=False)
all_cat = Catalog()
for family in self.families:
all_cat += family.catalog
if not len(all_cat) == 0:
all_cat.write(join(temp_dir, 'catalog.xml'),
format='QUAKEML')
for i, family in enumerate(self.families):
print('Writing family %i' % i)
name = family.template.name + '_detections.csv'
name_to_write = join(temp_dir, name)
_write_family(family=family, filename=name_to_write)
with tarfile.open(filename + '.tgz', "w:gz") as tar:
tar.add(temp_dir, arcname=os.path.basename(filename))
else:
warnings.warn('Writing only the catalog component, metadata '
'will not be preserved')
self.get_catalog().write(filename=filename, format=format)
return self
[docs] def read(self, filename=None):
"""
Read a Party from a file.
:type filename: str
:param filename: File to read from
.. rubric:: Example
>>> Party().read()
Party of 4 Families.
"""
if filename is None:
# If there is no filename given, then read the example.
filename = os.path.join(os.path.dirname(__file__),
'..', 'tests', 'test_data',
'test_party.tgz')
# First work out how many templates there are and get them.
tribe = Tribe()
with tarfile.open(filename, "r:*") as arc:
temp_dir = tempfile.mkdtemp()
arc.extractall(path=temp_dir, members=_safemembers(arc))
party_dir = glob.glob(temp_dir + os.sep + '*')[0]
tribe._read_from_folder(dirname=party_dir)
# Read in families here!
if os.path.isfile(party_dir + os.sep + 'catalog.xml'):
all_cat = read_events(party_dir + os.sep + 'catalog.xml')
else:
all_cat = Catalog()
for family_file in glob.glob(join(party_dir, '*_detections.csv')):
template = [t for t in tribe if _templates_match(t, family_file)]
if len(template) == 0:
raise MatchFilterError(
'Missing template for detection file: ' + family_file)
family = Family(template=template[0])
family.detections.extend(_read_family(family_file, all_cat))
family.catalog = Catalog([d.event for d in family.detections])
self.families.append(family)
shutil.rmtree(temp_dir)
return self
[docs] def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
cores=1, interpolate=False, plot=False, parallel=True,
overlap='calculate', debug=0):
"""
Compute picks based on cross-correlation alignment.
:type stream: obspy.core.stream.Stream
:param stream:
All the data needed to cut from - can be a gappy Stream.
:type pre_processed: bool
:param pre_processed:
Whether the stream has been pre-processed or not to match the
templates. See note below.
:type shift_len: float
:param shift_len:
Shift length allowed for the pick in seconds, will be plus/minus
this amount - default=0.2
:type min_cc: float
:param min_cc:
Minimum cross-correlation value to be considered a pick,
default=0.4.
:type horizontal_chans: list
:param horizontal_chans:
List of channel endings for horizontal-channels, on which S-picks
will be made.
:type vertical_chans: list
:param vertical_chans:
List of channel endings for vertical-channels, on which P-picks
will be made.
:type cores: int
:param cores:
Number of cores to use in parallel processing, defaults to one.
:type interpolate: bool
:param interpolate:
Interpolate the correlation function to achieve sub-sample
precision.
:type plot: bool
:param plot:
To generate a plot for every detection or not, defaults to False
:type parallel: bool
:param parallel: Turn parallel processing on or off.
:type overlap: float
:param overlap:
Either None, "calculate" or a float of number of seconds to
overlap detection streams by. This is to counter the effects of
the delay-and-stack in calcualting cross-correlation sums. Setting
overlap = "calculate" will work out the appropriate overlap based
on the maximum lags within templates.
:type debug: int
:param debug: Debug output level, 0-5 with 5 being the most output.
:returns:
Catalog of events with picks. No origin information is included.
These events can then be written out via
:func:`obspy.core.event.Catalog.write`, or to Nordic Sfiles using
:func:`eqcorrscan.utils.sfile_util.eventtosfile` and located
externally.
:rtype: obspy.core.event.Catalog
.. Note::
Note on pre-processing: You can provide a pre-processed stream,
which may be beneficial for detections over large time periods
(the stream can have gaps, which reduces memory usage). However,
in this case the processing steps are not checked, so you must
ensure that all the template in the Party have the same sampling
rate and filtering as the stream.
If pre-processing has not be done then the data will be processed
according to the parameters in the templates, in this case
templates will be grouped by processing parameters and run with
similarly processed data. In this case, all templates do not have
to have the same processing parameters.
.. Note::
Picks are corrected for the template pre-pick time.
"""
catalog = Catalog()
template_groups = [[]]
detection_groups = [[]]
for master in self.families:
master_chans = [(tr.stats.station,
tr.stats.channel) for tr in master.template.st]
if len(master_chans) > len(set(master_chans)):
warnings.warn(master.template.name +
' has duplicate channels, will not use this '
'template for lag-calc as this is not coded')
continue
for group in template_groups:
if master.template in group:
break
else:
new_group = [master.template.copy()]
new_det_group = copy.deepcopy(master.detections)
for slave in self.families:
if master.template.same_processing(slave.template) and \
master.template != slave.template:
slave_chans = [
(tr.stats.station,
tr.stats.channel) for tr in slave.template.st]
if len(slave_chans) > len(set(slave_chans)):
continue
else:
new_group.append(slave.template.copy())
new_det_group.extend(
copy.deepcopy(slave.detections))
template_groups.append(new_group)
detection_groups.append(new_det_group)
# template_groups will contain an empty first list
for group, det_group in zip(template_groups, detection_groups):
if len(group) == 0:
template_groups.remove(group)
detection_groups.remove(det_group)
# Process the data for each group and time-chunk
for group, det_group in zip(template_groups, detection_groups):
lap = 0.0
for template in group:
starts = [t.stats.starttime for t in
template.st.sort(['starttime'])]
if starts[-1] - starts[0] > lap:
lap = starts[-1] - starts[0]
if overlap is None:
lap = 0.0
elif isinstance(overlap, float):
lap = overlap
if not pre_processed:
processed_streams = _group_process(
template_group=group, cores=cores, parallel=parallel,
stream=stream.copy(), debug=debug, daylong=False,
ignore_length=False, overlap=lap)
processed_stream = Stream()
for p in processed_streams:
processed_stream += p
processed_stream.merge(method=1)
print(processed_stream)
else:
processed_stream = stream
temp_cat = lag_calc(
detections=det_group, detect_data=processed_stream,
template_names=[t.name for t in group],
templates=[t.st for t in group], shift_len=shift_len,
min_cc=min_cc, horizontal_chans=horizontal_chans,
vertical_chans=vertical_chans, cores=cores,
interpolate=interpolate, plot=plot, parallel=parallel,
debug=debug)
for event in temp_cat:
det = [d for d in det_group
if str(d.id) == str(event.resource_id)][0]
pre_pick = [t for t in group
if t.name == det.template_name][0].prepick
for pick in event.picks:
pick.time += pre_pick
catalog += temp_cat
return catalog
[docs] def get_catalog(self):
"""
Get an obspy catalog object from the party.
:returns: :class:`obspy.core.event.Catalog`
.. rubric:: Example
>>> party = Party().read()
>>> cat = party.get_catalog()
>>> print(len(cat))
4
"""
catalog = Catalog()
for fam in self.families:
catalog += fam.catalog
return catalog
[docs] def min_chans(self, min_chans):
"""
Remove detections with fewer channels used than min_chans
:type min_chans: int
:param min_chans: Minimum number of channels to allow a detection.
:return: Party
.. Note:: Works in place on Party.
.. rubric:: Example
>>> party = Party().read()
>>> print(len(party))
4
>>> party = party.min_chans(5)
>>> print(len(party))
1
"""
declustered = Party()
for family in self.families:
fam = Family(family.template)
for d in family.detections:
if d.no_chans > min_chans:
fam.detections.append(d)
declustered.families.append(fam)
self.families = declustered.families
return self
[docs]class Family(object):
"""
Container for Detection objects from a single template.
:type template: eqcorrscan.core.match_filter.Template
:param template: The template used to detect the family
:type detections: list
:param detections: list of Detection objects
:type catalog: obspy.core.event.Catalog
:param catalog:
Catalog of detections, with information for the individual detections.
"""
[docs] def __init__(self, template, detections=None, catalog=None):
"""Instantiation of Family object."""
self.template = template
self.detections = []
self.catalog = Catalog()
if isinstance(detections, Detection):
detections = [detections]
if isinstance(catalog, Event):
catalog = Catalog(catalog)
if detections:
self.detections.extend(detections)
if catalog:
self.catalog.extend(catalog)
def __repr__(self):
"""
Print method on Family.
:return: str
.. rubric:: Example
>>> family = Family(template=Template(name='a'))
>>> print(family)
Family of 0 detections from template a
"""
print_str = ('Family of %s detections from template %s' %
(len(self.detections), self.template.name))
return print_str
def __add__(self, other):
"""
Extend method. Used for '+'
.. rubric:: Example
>>> family_a = Family(template=Template(name='a'))
>>> family_b = Family(template=Template(name='a'))
>>> family_c = family_a + family_b
>>> print(family_c)
Family of 0 detections from template a
Can only extend family with the family of detections from the same
template:
>>> family_a = Family(template=Template(name='a'))
>>> family_b = Family(template=Template(name='b'))
>>> family_c = family_a + family_b # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
NotImplementedError: Templates do not match
Can extend by adding a detection from the same template.
>>> family_a = Family(template=Template(name='a'))
>>> detection = Detection(
... template_name='a', detect_time=UTCDateTime(), no_chans=5,
... detect_val=2.5, threshold=1.23, typeofdet='corr',
... threshold_type='MAD', threshold_input=8.0)
>>> family = family_a + detection
>>> print(family)
Family of 1 detections from template a
Will not work if detections are made using a different Template.
>>> family_a = Family(template=Template(name='a'))
>>> detection = Detection(
... template_name='b', detect_time=UTCDateTime(), no_chans=5,
... detect_val=2.5, threshold=1.23, typeofdet='corr',
... threshold_type='MAD', threshold_input=8.0)
>>> family = family_a + detection # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
NotImplementedError: Templates do not match
Can not extent a family with a list, or another object.
>>> family_a = Family(template=Template(name='a'))
>>> family = family_a + ['albert'] # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
NotImplementedError: Can only extend with a Detection of Family object.
"""
if isinstance(other, Family):
if other.template == self.template:
self.detections.extend(other.detections)
try:
self.catalog += other.catalog
except TypeError:
pass
else:
raise NotImplementedError('Templates do not match')
elif isinstance(other, Detection) and other.template_name \
== self.template.name:
self.detections.append(other)
try:
self.catalog += other.event
except TypeError:
pass
elif isinstance(other, Detection):
raise NotImplementedError('Template do not match')
else:
raise NotImplementedError('Can only extend with a Detection or '
'Family object.')
return self
def __iadd__(self, other):
"""
Rich method '+='
.. rubric:: Example
>>> family_a = Family(template=Template(name='a'))
>>> family_b = Family(template=Template(name='a'))
>>> family_a += family_b
>>> print(family_a)
Family of 0 detections from template a
"""
return self.__add__(other)
def __eq__(self, other):
"""
Check equality, rich comparison operator '=='
.. rubric:: Example
>>> family_a = Family(template=Template(name='a'), detections=[])
>>> family_b = Family(template=Template(name='a'), detections=[])
>>> family_a == family_b
True
>>> family_c = Family(template=Template(name='b'))
>>> family_c == family_a
False
Test if families are equal without the same detections
>>> family_a = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family_b = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family_a == family_b
False
>>> family_c = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family_a == family_c
False
"""
if not self.template == other.template:
return False
if len(self.detections) != len(other.detections):
return False
if len(self.detections) != 0 and len(other.detections) != 0:
for det, other_det in zip(self.sort().detections,
other.sort().detections):
if det != other_det:
return False
# currently not checking for catalog...
if len(self.catalog) != len(other.catalog):
return False
return True
def __ne__(self, other):
"""
Rich comparison operator '!='
.. rubric:: Example
>>> family_a = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family_b = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family_a != family_b
True
"""
return not self.__eq__(other)
def __getitem__(self, index):
"""
Retrieve a detection or series of detections from the Family.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> isinstance(family[0], Detection)
True
>>> len(family[0:])
2
"""
return self.detections.__getitem__(index)
def __len__(self):
"""Number of detections in Family.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> print(len(family))
2
"""
return len(self.detections)
def _uniq(self):
"""
Get list of unique detections.
Works in place.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> len(family)
3
>>> len(family._uniq())
2
"""
_detections = []
[_detections.append(d) for d in self.detections
if not _detections.count(d)]
self.detections = _detections
self.catalog = get_catalog(self.detections)
return self
[docs] def sort(self):
"""Sort by detection time.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0) + 200,
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family[0].detect_time
UTCDateTime(1970, 1, 1, 0, 3, 20)
>>> family.sort()[0].detect_time
UTCDateTime(1970, 1, 1, 0, 0)
"""
self.detections = sorted(self.detections, key=lambda d: d.detect_time)
return self
[docs] def copy(self):
"""
Returns a copy of the family.
:return: Copy of family
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family == family.copy()
True
"""
return copy.deepcopy(self)
[docs] def append(self, other):
"""
Add another family or detection to the family.
.. rubric:: Example
Append a family to a family
>>> family_a = Family(template=Template(name='a'))
>>> family_b = Family(template=Template(name='a'))
>>> family_a.append(family_b)
Family of 0 detections from template a
Append a detection to the family
>>> family_a = Family(template=Template(name='a'))
>>> detection = Detection(
... template_name='a', detect_time=UTCDateTime(), no_chans=5,
... detect_val=2.5, threshold=1.23, typeofdet='corr',
... threshold_type='MAD', threshold_input=8.0)
>>> family_a.append(detection)
Family of 1 detections from template a
"""
return self.__add__(other)
[docs] def plot(self, plot_grouped=False):
"""
Plot the cumulative number of detections in time.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a'), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0) + 200,
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family.plot(plot_grouped=True) # doctest: +SKIP
.. plot::
from eqcorrscan.core.match_filter import Family, Template
from eqcorrscan.core.match_filter import Detection
from obspy import UTCDateTime
family = Family(
template=Template(name='a'), detections=[
Detection(template_name='a', detect_time=UTCDateTime(0) + 200,
no_chans=8, detect_val=4.2, threshold=1.2,
typeofdet='corr', threshold_type='MAD',
threshold_input=8.0),
Detection(template_name='a', detect_time=UTCDateTime(0),
no_chans=8, detect_val=4.5, threshold=1.2,
typeofdet='corr', threshold_type='MAD',
threshold_input=8.0),
Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
no_chans=8, detect_val=4.5, threshold=1.2,
typeofdet='corr', threshold_type='MAD',
threshold_input=8.0)])
family.plot(plot_grouped=True)
"""
cumulative_detections(
detections=self.detections, plot_grouped=plot_grouped)
[docs] def write(self, filename, format='tar'):
"""
Write Family out, select output format.
:type format: str
:param format:
One of either 'tar', 'csv', or any obspy supported
catalog output.
:type filename: str
:param filename: Path to write file to.
.. Note:: csv format will write out detection objects, all other
outputs will write the catalog. These cannot be rebuilt into
a Family object. The only format that can be read back into
Family objects is the 'tar' type.
.. Note:: csv format will append detections to filename, all others
will overwrite any existing files.
.. rubric:: Example
>>> family = Family(
... template=Template(name='a', st=read()), detections=[
... Detection(template_name='a', detect_time=UTCDateTime(0) + 200,
... no_chans=8, detect_val=4.2, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0),
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0),
... Detection(template_name='a', detect_time=UTCDateTime(0) + 10,
... no_chans=8, detect_val=4.5, threshold=1.2,
... typeofdet='corr', threshold_type='MAD',
... threshold_input=8.0)])
>>> family.write('test_family')
Writing family 0
"""
Party(families=[self]).write(filename=filename, format=format)
return
[docs] def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
horizontal_chans=['E', 'N', '1', '2'], vertical_chans=['Z'],
cores=1, interpolate=False, plot=False, parallel=True,
debug=0):
"""
Compute picks based on cross-correlation alignment.
:type stream: obspy.core.stream.Stream
:param stream:
All the data needed to cut from - can be a gappy Stream.
:type pre_processed: bool
:param pre_processed:
Whether the stream has been pre-processed or not to match the
templates. See note below.
:type shift_len: float
:param shift_len:
Shift length allowed for the pick in seconds, will be
plus/minus this amount - default=0.2
:type min_cc: float
:param min_cc:
Minimum cross-correlation value to be considered a pick,
default=0.4.
:type horizontal_chans: list
:param horizontal_chans:
List of channel endings for horizontal-channels, on which
S-picks will be made.
:type vertical_chans: list
:param vertical_chans:
List of channel endings for vertical-channels, on which P-picks
will be made.
:type cores: int
:param cores:
Number of cores to use in parallel processing, defaults to one.
:type interpolate: bool
:param interpolate:
Interpolate the correlation function to achieve sub-sample
precision.
:type plot: bool
:param plot:
To generate a plot for every detection or not, defaults to False
:type parallel: bool
:param parallel: Turn parallel processing on or off.
:type debug: int
:param debug: Debug output level, 0-5 with 5 being the most output.
:returns:
Catalog of events with picks. No origin information is included.
These events can then be written out via
:func:`obspy.core.event.Catalog.write`, or to Nordic Sfiles using
:func:`eqcorrscan.utils.sfile_util.eventtosfile` and located
externally.
:rtype: obspy.core.event.Catalog
.. Note::
Note on pre-processing: You can provide a pre-processed stream,
which may be beneficial for detections over large time periods
(the stream can have gaps, which reduces memory usage). However,
in this case the processing steps are not checked, so you must
ensure that all the template in the Party have the same sampling
rate and filtering as the stream.
If pre-processing has not be done then the data will be processed
according to the parameters in the templates, in this case
templates will be grouped by processing parameters and run with
similarly processed data. In this case, all templates do not have
to have the same processing parameters.
.. Note::
Picks are corrected for the template pre-pick time.
"""
return Party(families=[self]).lag_calc(
stream=stream, pre_processed=pre_processed, shift_len=shift_len,
min_cc=min_cc, horizontal_chans=horizontal_chans,
vertical_chans=vertical_chans, cores=cores,
interpolate=interpolate, plot=plot, parallel=parallel,
debug=debug)
[docs]class Template(object):
"""
Template object holder. Contains waveform data and metadata parameters
used to generate the template.
"""
[docs] def __init__(self, name=None, st=None, lowcut=None, highcut=None,
samp_rate=None, filt_order=None, process_length=None,
prepick=None, event=None):
name_regex = re.compile(r"^[a-z_0-9]+$")
if name is not None and not re.match(name_regex, name):
raise ValueError("Invalid name: '%s' - Must satisfy the regex "
"'%s'." % (name, name_regex.pattern))
if name is None:
temp_name = "unnamed"
else:
temp_name = name
self.name = name
self.st = st
self.lowcut = lowcut
self.highcut = highcut
self.samp_rate = samp_rate
if st and samp_rate is not None:
for tr in st:
if not tr.stats.sampling_rate == self.samp_rate:
raise MatchFilterError(
'Sampling rates do not match in data.')
self.filt_order = filt_order
self.process_length = process_length
self.prepick = prepick
if event is not None:
if "eqcorrscan_template_" + temp_name not in \
[c.text for c in event.comments]:
event.comments.append(Comment(
text="eqcorrscan_template_" + temp_name,
creation_info=CreationInfo(agency='eqcorrscan',
author=getpass.getuser())))
self.event = event
def __repr__(self):
"""
Print the template.
.. rubric:: Example
>>> print(Template())
Template()
>>> template = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> print(template) # doctest: +NORMALIZE_WHITESPACE
Template a:
3 channels;
lowcut: 2.0 Hz;
highcut: 8.0 Hz;
sampling rate 100 Hz;
filter order: 4;
process length: 3600 s
"""
if self.name is None:
return 'Template()'
if self.st is None:
st_len = 0
else:
st_len = len(self.st)
print_str = ('Template %s: \n\t %s channels;\n\t lowcut: %s Hz;'
'\n\t highcut: %s Hz;\n\t sampling rate %s Hz;'
'\n\t filter order: %s; \n\t process length: %s s'
% (self.name, st_len, self.lowcut, self.highcut,
self.samp_rate, self.filt_order, self.process_length))
return print_str
def __eq__(self, other):
"""
Check for Template equality, rich comparison operator '=='
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_b = template_a.copy()
>>> template_a == template_b
True
This will check all parameters of template including the data in the
stream.
>>> template_b.st[0].data = template_b.st[0].data[0:-1]
>>> template_a == template_b
False
>>> template_b = template_a.copy()
>>> template_b.st[0].stats.station = 'MIS'
>>> template_a == template_b
False
>>> template_b = template_a.copy()
>>> template_b.st = template_b.st[0]
>>> template_a == template_b
False
>>> template_b = template_a.copy()
>>> template_b.lowcut = 5.0
>>> template_a == template_b
False
This will also test if the events in the templates are the same,
ignoring resource ID's as obspy will alter these so that they do not
match.
>>> party = Party().read()
>>> template_a = party.families[0].template
>>> template_b = template_a.copy()
>>> template_b.event.origins[0].time = \
template_a.event.origins[0].time + 20
>>> template_a == template_b
False
"""
for key in self.__dict__.keys():
if key == 'st':
self_is_stream = isinstance(self.st, Stream)
other_is_stream = isinstance(other.st, Stream)
if self_is_stream and other_is_stream:
for tr, oth_tr in zip(self.st.sort(),
other.st.sort()):
if not np.array_equal(tr.data, oth_tr.data):
return False
for trkey in ['network', 'station', 'channel',
'location', 'starttime', 'endtime',
'sampling_rate', 'delta', 'npts',
'calib']:
if tr.stats[trkey] != oth_tr.stats[trkey]:
return False
elif self_is_stream and not other_is_stream:
return False
elif not self_is_stream and other_is_stream:
return False
elif key == 'event':
self_is_event = isinstance(self.event, Event)
other_is_event = isinstance(other.event, Event)
if self_is_event and other_is_event:
if not _test_event_similarity(
self.event, other.event, verbose=False):
return False
elif self_is_event and not other_is_event:
return False
elif not self_is_event and other_is_event:
return False
elif not self.__dict__[key] == other.__dict__[key]:
return False
return True
def __ne__(self, other):
"""
Rich comparison operator '!='.
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_b = template_a.copy()
>>> template_a != template_b
False
"""
return not self.__eq__(other)
[docs] def copy(self):
"""
Returns a copy of the template.
:return: Copy of template
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_b = template_a.copy()
>>> template_a == template_b
True
"""
return copy.deepcopy(self)
[docs] def same_processing(self, other):
"""
Check is the templates are processed the same.
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_b = template_a.copy()
>>> template_a.same_processing(template_b)
True
>>> template_b.lowcut = 5.0
>>> template_a.same_processing(template_b)
False
"""
for key in self.__dict__.keys():
if key in ['name', 'st', 'prepick', 'event', 'template_info']:
continue
if not self.__dict__[key] == other.__dict__[key]:
return False
return True
[docs] def write(self, filename, format='tar'):
"""
Write template.
:type filename: str
:param filename:
Filename to write to, if it already exists it will be opened and
appended to, otherwise it will be created.
:type format: str
:param format:
Format to write to, either 'tar' (to retain metadata), or any obspy
supported waveform format to just extract the waveform.
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_a.write('test_template') # doctest: +NORMALIZE_WHITESPACE
Template a:
3 channels;
lowcut: 2.0 Hz;
highcut: 8.0 Hz;
sampling rate 100 Hz;
filter order: 4;
process length: 3600 s
>>> template_a.write('test_waveform.ms',
... format='MSEED') # doctest: +NORMALIZE_WHITESPACE
Template a:
3 channels;
lowcut: 2.0 Hz;
highcut: 8.0 Hz;
sampling rate 100 Hz;
filter order: 4;
process length: 3600 s
"""
if format == 'tar':
Tribe(templates=[self]).write(filename=filename)
else:
self.st.write(filename, format=format)
return self
[docs] def read(self, filename):
"""
Read template from tar format with metadata.
:type filename: str
:param filename: Filename to read template from.
.. rubric:: Example
>>> template_a = Template(
... name='a', st=read(), lowcut=2.0, highcut=8.0, samp_rate=100,
... filt_order=4, process_length=3600, prepick=0.5)
>>> template_a.write(
... 'test_template_read') # doctest: +NORMALIZE_WHITESPACE
Template a:
3 channels;
lowcut: 2.0 Hz;
highcut: 8.0 Hz;
sampling rate 100 Hz;
filter order: 4;
process length: 3600 s
>>> template_b = Template().read('test_template_read.tgz')
>>> template_a == template_b
True
"""
tribe = Tribe()
tribe.read(filename=filename)
if len(tribe) > 1:
raise IOError('Multiple templates in file')
for key in self.__dict__.keys():
self.__dict__[key] = tribe[0].__dict__[key]
return self
[docs] def detect(self, stream, threshold, threshold_type, trig_int, plotvar,
pre_processed=False, daylong=False, parallel_process=True,
xcorr_func=None, concurrency=None, cores=None,
ignore_length=False, overlap="calculate", debug=0):
"""
Detect using a single template within a continuous stream.
:type stream: `obspy.core.stream.Stream`
:param stream: Continuous data to detect within using the Template.
:type threshold: float
:param threshold:
Threshold level, if using `threshold_type='MAD'` then this will be
the multiple of the median absolute deviation.
:type threshold_type: str
:param threshold_type:
The type of threshold to be used, can be MAD, absolute or
av_chan_corr. See Note on thresholding below.
:type trig_int: float
:param trig_int:
Minimum gap between detections in seconds. If multiple detections
occur within trig_int of one-another, the one with the highest
cross-correlation sum will be selected.
:type plotvar: bool
:param plotvar:
Turn plotting on or off, see warning about plotting below
:type pre_processed: bool
:param pre_processed:
Set to True if `stream` has already undergone processing, in this
case eqcorrscan will only check that the sampling rate is correct.
Defaults to False, which will use the
:mod:`eqcorrscan.utils.pre_processing` routines to resample and
filter the continuous data.
:type daylong: bool
:param daylong:
Set to True to use the
:func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
preforms additional checks and is more efficient for day-long data
over other methods.
:type parallel_process: bool
:param parallel_process:
:type xcorr_func: str or callable
:param xcorr_func:
A str of a registered xcorr function or a callable for implementing
a custom xcorr function. For more details see
:func:`eqcorrscan.utils.correlate.register_array_xcorr`.
:type concurrency: str
:param concurrency:
The type of concurrency to apply to the xcorr function. Options are
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`.
:type cores: int
:param cores: Number of workers for processing and detection.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
are there for at least 80% of the day, if you don't want this check
(which will raise an error if too much data are missing) then set
ignore_length=True. This is not recommended!
:type overlap: float
:param overlap:
Either None, "calculate" or a float of number of seconds to
overlap detection streams by. This is to counter the effects of
the delay-and-stack in calcualting cross-correlation sums. Setting
overlap = "calculate" will work out the appropriate overlap based
on the maximum lags within templates.
:type debug: int
:param debug:
Debug level from 0-5 where five is more output, for debug levels
4 and 5, detections will not be computed in parallel.
:returns: Family of detections.
.. warning::
Plotting within the match-filter routine uses the Agg backend
with interactive plotting turned off. This is because the function
is designed to work in bulk. If you wish to turn interactive
plotting on you must import matplotlib in your script first, when
you then import match_filter you will get the warning that this
call to matplotlib has no effect, which will mean that
match_filter has not changed the plotting behaviour.
.. note::
**Data overlap:**
Internally this routine shifts and trims the data according to
the offsets in the template (e.g. if trace 2 starts 2 seconds
after trace 1 in the template then the continuous data will be
shifted by 2 seconds to align peak correlations prior to summing).
Because of this, detections at the start and end of continuous data
streams **may be missed**. The maximum time-period that might be
missing detections is the maximum offset in the template.
To work around this, if you are conducting matched-filter
detections through long-duration continuous data, we suggest using
some overlap (a few seconds, on the order of the maximum offset
in the templates) in the continous data. You will then need to
post-process the detections (which should be done anyway to remove
duplicates).
.. note::
**Thresholding:**
**MAD** threshold is calculated as the:
.. math::
threshold {\\times} (median(abs(cccsum)))
where :math:`cccsum` is the cross-correlation sum for a
given template.
**absolute** threshold is a true absolute threshold based on the
cccsum value.
**av_chan_corr** is based on the mean values of single-channel
cross-correlations assuming all data are present as required
for the template, e.g:
.. math::
av\_chan\_corr\_thresh=threshold \\times (cccsum /
len(template))
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
.. Note::
See tutorials for example.
"""
party = _group_detect(
templates=[self], stream=stream.copy(), threshold=threshold,
threshold_type=threshold_type, trig_int=trig_int,
plotvar=plotvar, pre_processed=pre_processed, daylong=daylong,
parallel_process=parallel_process, xcorr_func=xcorr_func,
concurrency=concurrency, cores=cores, ignore_length=ignore_length,
overlap=overlap, debug=debug)
return party[0]
[docs] def construct(self, method, name, lowcut, highcut, samp_rate, filt_order,
prepick, **kwargs):
"""
Construct a template using a given method.
:param method:
Method to make the template,
see :mod:`eqcorrscan.core.template_gen` for possible methods.
:type method: str
:type name: str
:param name: Name for the template
:type lowcut: float
:param lowcut:
Low cut (Hz), if set to None will not apply a lowcut
:type highcut: float
:param highcut:
High cut (Hz), if set to None will not apply a highcut.
:type samp_rate: float
:param samp_rate:
New sampling rate in Hz.
:type filt_order: int
:param filt_order:
Filter level (number of corners).
:type prepick: float
:param prepick: Pre-pick time in seconds
.. Note::
methods `from_meta_file`, `from_seishub`, `from_client` and
`multi_template_gen` are not accommodated in this function and must
be called from Tribe.construct as these generate multiple
templates.
.. Note::
Calls functions from `eqcorrscan.core.template_gen`, see these
functions for details on what further arguments are required.
.. rubric:: Example
>>> sac_files = glob.glob(
... 'eqcorrscan/tests/test_data/SAC/2014p611252/*')
>>> template = Template().construct(
... method='from_sac', name='test', lowcut=2.0, highcut=8.0,
... samp_rate=20.0, filt_order=4, prepick=0.1, swin='all',
... length=2.0, sac_files=sac_files)
>>> print(template) # doctest: +NORMALIZE_WHITESPACE
Template test:
12 channels;
lowcut: 2.0 Hz;
highcut: 8.0 Hz;
sampling rate 20.0 Hz;
filter order: 4;
process length: 300.0 s
This will raise an error if the method is unsupported:
>>> template = Template().construct(
... method='from_meta_file', name='test', lowcut=2.0, highcut=8.0,
... samp_rate=20.0, filt_order=4, prepick=0.1, swin='all',
... length=2.0) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
NotImplementedError: Method is not supported, use \
Tribe.construct instead.
"""
if method in ['from_meta_file', 'from_seishub', 'from_client',
'multi_template_gen']:
raise NotImplementedError('Method is not supported, '
'use Tribe.construct instead.')
func = getattr(template_gen, method)
st, event, process_length = func(
lowcut=lowcut, highcut=highcut, filt_order=filt_order,
samp_rate=samp_rate, prepick=prepick, return_event=True, **kwargs)
self.name = name
for tr in st:
if not np.any(tr.data.astype(np.float16)):
warnings.warn('Data are zero in float16, missing data,'
' will not use: %s' % tr.id)
st.remove(tr)
self.st = st
self.lowcut = lowcut
self.highcut = highcut
self.filt_order = filt_order
self.samp_rate = samp_rate
self.process_length = process_length
self.prepick = prepick
self.event = event
return self
[docs]class Tribe(object):
"""Holder for multiple templates."""
[docs] def __init__(self, templates=None):
self.templates = []
if isinstance(templates, Template):
templates = [templates]
if templates:
self.templates.extend(templates)
def __repr__(self):
"""
Print information about the tribe.
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='a')])
>>> print(tribe)
Tribe of 1 templates
"""
return 'Tribe of %i templates' % self.__len__()
def __add__(self, other):
"""
Add two Tribes or a Tribe and a Template together. '+'
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='a')])
>>> tribe + Tribe(templates=[Template(name='b')])
Tribe of 2 templates
>>> tribe + Template(name='c')
Tribe of 3 templates
"""
if isinstance(other, Tribe):
self.templates += other.templates
elif isinstance(other, Template):
self.templates.append(other)
else:
raise TypeError('Must be either Template or Tribe')
return self
def __iadd__(self, other):
"""
Add in place: '+='
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='a')])
>>> tribe += Tribe(templates=[Template(name='b')])
>>> print(tribe)
Tribe of 2 templates
>>> tribe += Template(name='c')
>>> print(tribe)
Tribe of 3 templates
"""
return self.__add__(other)
def __eq__(self, other):
"""
Test for equality. Rich comparison operator '=='
.. rubric:: Example
>>> tribe_a = Tribe(templates=[Template(name='a')])
>>> tribe_b = Tribe(templates=[Template(name='b')])
>>> tribe_a == tribe_b
False
>>> tribe_a == tribe_a
True
"""
if self.sort().templates != other.sort().templates:
return False
return True
def __ne__(self, other):
"""
Test for inequality. Rich comparison operator '!='
.. rubric:: Example
>>> tribe_a = Tribe(templates=[Template(name='a')])
>>> tribe_b = Tribe(templates=[Template(name='b')])
>>> tribe_a != tribe_b
True
>>> tribe_a != tribe_a
False
"""
return not self.__eq__(other)
def __len__(self):
"""
Number of Templates in Tribe. len(tribe)
.. rubric:: Example
>>> tribe_a = Tribe(templates=[Template(name='a')])
>>> len(tribe_a)
1
"""
return len(self.templates)
def __iter__(self):
"""
Iterator for the Tribe.
"""
return list(self.templates).__iter__()
def __getitem__(self, index):
"""
Support slicing to get Templates from Tribe.
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='a'), Template(name='b'),
... Template(name='c')])
>>> tribe[1] # doctest: +NORMALIZE_WHITESPACE
Template b:
0 channels;
lowcut: None Hz;
highcut: None Hz;
sampling rate None Hz;
filter order: None;
process length: None s
>>> tribe[0:2]
Tribe of 2 templates
"""
if isinstance(index, slice):
return self.__class__(templates=self.templates.__getitem__(index))
elif isinstance(index, int):
return self.templates.__getitem__(index)
else:
_index = [i for i, t in enumerate(self.templates)
if t.name == index]
try:
return self.templates.__getitem__(_index[0])
except IndexError:
warnings.warn('Template: %s not in tribe' % index)
return []
[docs] def sort(self):
"""
Sort the tribe, sorts by template name.
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='c'), Template(name='b'),
... Template(name='a')])
>>> tribe.sort()
Tribe of 3 templates
>>> tribe[0] # doctest: +NORMALIZE_WHITESPACE
Template a:
0 channels;
lowcut: None Hz;
highcut: None Hz;
sampling rate None Hz;
filter order: None;
process length: None s
"""
self.templates = sorted(self.templates, key=lambda x: x.name)
return self
[docs] def remove(self, template):
"""
Remove a template from the tribe.
:type template: :class:`eqcorrscan.core.match_filter.Template`
:param template: Template to remove from tribe
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='c'), Template(name='b'),
... Template(name='a')])
>>> tribe.remove(tribe.templates[0])
Tribe of 2 templates
"""
self.templates.remove(template)
return self
[docs] def copy(self):
"""
Copy the Tribe.
.. rubric:: Example
>>> tribe_a = Tribe(templates=[Template(name='a')])
>>> tribe_b = tribe_a.copy()
>>> tribe_a == tribe_b
True
"""
return copy.deepcopy(self)
[docs] def write(self, filename, compress=True):
"""
Write the tribe to a file using tar archive formatting.
:type filename: str
:param filename:
Filename to write to, if it exists it will be appended to.
:type compress: bool
:param compress:
Whether to compress the tar archive or not, if False then will
just be files in a folder.
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='c', st=read())])
>>> tribe.write('test_tribe')
Tribe of 1 templates
"""
if not os.path.isdir(filename):
os.makedirs(filename)
self._par_write(filename)
tribe_cat = Catalog()
for t in self.templates:
if t.event is not None:
tribe_cat.append(t.event)
if len(tribe_cat) > 0:
tribe_cat.write(filename + os.sep + 'tribe_cat.xml',
format='QUAKEML')
for template in self.templates:
template.st.write(filename + '/' + template.name + '.ms',
format='MSEED')
if compress:
with tarfile.open(filename + '.tgz', "w:gz") as tar:
tar.add(filename, arcname=os.path.basename(filename))
shutil.rmtree(filename)
return self
def _par_write(self, dirname):
"""
Internal write function to write a formatted parameter file.
:type dirname: str
:param dirname: Directory to write the parameter file to.
"""
filename = dirname + '/' + 'template_parameters.csv'
with open(filename, 'w') as parfile:
for template in self.templates:
for key in template.__dict__.keys():
if key not in ['st', 'event']:
parfile.write(key + ': ' +
str(template.__dict__[key]) + ', ')
parfile.write('\n')
return self
[docs] def read(self, filename):
"""
Read a tribe of templates from a tar formatted file.
:type filename: str
:param filename: File to read templates from.
.. rubric:: Example
>>> tribe = Tribe(templates=[Template(name='c', st=read())])
>>> tribe.write('test_tribe')
Tribe of 1 templates
>>> tribe_back = Tribe().read('test_tribe.tgz')
>>> tribe_back == tribe
True
"""
with tarfile.open(filename, "r:*") as arc:
temp_dir = tempfile.mkdtemp()
arc.extractall(path=temp_dir, members=_safemembers(arc))
tribe_dir = glob.glob(temp_dir + os.sep + '*')[0]
self._read_from_folder(dirname=tribe_dir)
shutil.rmtree(temp_dir)
return self
def _read_from_folder(self, dirname):
"""
Internal folder reader.
:type dirname: str
:param dirname: Folder to read from.
"""
templates = _par_read(dirname=dirname, compressed=False)
t_files = glob.glob(dirname + os.sep + '*.ms')
if os.path.isfile(dirname + os.sep + 'tribe_cat.xml'):
tribe_cat = read_events(dirname + os.sep + 'tribe_cat.xml')
else:
tribe_cat = Catalog()
for template in templates:
for event in tribe_cat:
for comment in event.comments:
if comment.text == 'eqcorrscan_template_' + template.name:
template.event = event
t_file = [t for t in t_files
if t.split(os.sep)[-1] == template.name + '.ms']
if len(t_file) == 0:
print('No waveform for template: ' + template.name)
templates.remove(template)
continue
elif len(t_file) > 1:
print('Multiple waveforms found, using: ' + t_file[0])
template.st = read(t_file[0])
self.templates.extend(templates)
return
[docs] def cluster(self, method, **kwargs):
"""
Cluster the tribe, returns multiple tribes each of which could be
stacked.
:type method: str
:param method:
Method of stacking, see :mod:`eqcorrscan.utils.clustering`
:return: List of tribes.
.. rubric:: Example
"""
from eqcorrscan.utils import clustering
tribes = []
func = getattr(clustering, method)
if method in ['space_cluster', 'space_time_cluster']:
cat = Catalog([t.event for t in self.templates])
groups = func(cat, **kwargs)
for group in groups:
new_tribe = Tribe()
for event in group:
new_tribe.templates.extend([t for t in self.templates
if t.event == event])
tribes.append(new_tribe)
return tribes
[docs] def detect(self, stream, threshold, threshold_type, trig_int, plotvar,
daylong=False, parallel_process=True, xcorr_func=None,
concurrency=None, cores=None, ignore_length=False,
group_size=None, overlap="calculate", debug=0):
"""
Detect using a Tribe of templates within a continuous stream.
:type stream: `obspy.core.stream.Stream`
:param stream: Continuous data to detect within using the Template.
:type threshold: float
:param threshold:
Threshold level, if using `threshold_type='MAD'` then this will be
the multiple of the median absolute deviation.
:type threshold_type: str
:param threshold_type:
The type of threshold to be used, can be MAD, absolute or
av_chan_corr. See Note on thresholding below.
:type trig_int: float
:param trig_int:
Minimum gap between detections in seconds. If multiple detections
occur within trig_int of one-another, the one with the highest
cross-correlation sum will be selected.
:type plotvar: bool
:param plotvar:
Turn plotting on or off, see warning about plotting below
:type daylong: bool
:param daylong:
Set to True to use the
:func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
preforms additional checks and is more efficient for day-long data
over other methods.
:type parallel_process: bool
:param parallel_process:
:type xcorr_func: str or callable
:param xcorr_func:
A str of a registered xcorr function or a callable for implementing
a custom xcorr function. For more information see:
:func:`eqcorrscan.utils.correlate.register_array_xcorr`
:type concurrency: str
:param concurrency:
The type of concurrency to apply to the xcorr function. Options are
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
:param cores: Number of workers for procesisng and detection.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
are there for at least 80% of the day, if you don't want this check
(which will raise an error if too much data are missing) then set
ignore_length=True. This is not recommended!
:type group_size: int
:param group_size:
Maximum number of templates to run at once, use to reduce memory
consumption, if unset will use all templates.
:type overlap: float
:param overlap:
Either None, "calculate" or a float of number of seconds to
overlap detection streams by. This is to counter the effects of
the delay-and-stack in calcualting cross-correlation sums. Setting
overlap = "calculate" will work out the appropriate overlap based
on the maximum lags within templates.
:type debug: int
:param debug:
Debug level from 0-5 where five is more output, for debug levels
4 and 5, detections will not be computed in parallel.
:return:
:class:`eqcorrscan.core.match_filter.Party` of Families of
detections.
.. Note::
`stream` must not be pre-processed.
.. warning::
Picks included in the output Party.get_catalog() will not be
corrected for pre-picks in the template.
.. warning::
Plotting within the match-filter routine uses the Agg backend
with interactive plotting turned off. This is because the function
is designed to work in bulk. If you wish to turn interactive
plotting on you must import matplotlib in your script first,
when you then import match_filter you will get the warning that
this call to matplotlib has no effect, which will mean that
match_filter has not changed the plotting behaviour.
.. note::
**Data overlap:**
Internally this routine shifts and trims the data according to the
offsets in the template (e.g. if trace 2 starts 2 seconds after
trace 1 in the template then the continuous data will be shifted
by 2 seconds to align peak correlations prior to summing).
Because of this, detections at the start and end of continuous
data streams **may be missed**. The maximum time-period that
might be missing detections is the maximum offset in the template.
To work around this, if you are conducting matched-filter
detections through long-duration continuous data, we suggest
using some overlap (a few seconds, on the order of the maximum
offset in the templates) in the continuous data. You will then
need to post-process the detections (which should be done anyway
to remove duplicates). See below note for how `overlap` argument
affects data internally if `stream` is longer than the processing
length.
.. Note::
If `stream` is long then processing length, this routine will
ensure that data overlap between loops, which will lead to no
missed detections at data start-stop points (see above note).
This will result in end-time not being strictly
honoured, so detections may occur after the end-time set. This is
because data must be run in the correct process-length.
.. note::
**Thresholding:**
**MAD** threshold is calculated as the:
.. math::
threshold {\\times} (median(abs(cccsum)))
where :math:`cccsum` is the cross-correlation sum for a given
template.
**absolute** threshold is a true absolute threshold based on the
cccsum value.
**av_chan_corr** is based on the mean values of single-channel
cross-correlations assuming all data are present as required for
the template, e.g:
.. math::
av\_chan\_corr\_thresh=threshold \\times (cccsum /
len(template))
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
"""
party = Party()
template_groups = [[]]
for master in self.templates:
for group in template_groups:
if master in group:
break
else:
new_group = [master]
for slave in self.templates:
if master.same_processing(slave) and master != slave:
new_group.append(slave)
template_groups.append(new_group)
# template_groups will contain an empty first list
for group in template_groups:
if len(group) == 0:
template_groups.remove(group)
# now we can compute the detections for each group
for group in template_groups:
group_party = _group_detect(
templates=group, stream=stream.copy(), threshold=threshold,
threshold_type=threshold_type, trig_int=trig_int,
plotvar=plotvar, group_size=group_size, pre_processed=False,
daylong=daylong, parallel_process=parallel_process,
xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
ignore_length=ignore_length, overlap=overlap, debug=debug)
party += group_party
for family in party:
if family is not None:
family.detections = family._uniq().detections
family.catalog = family._uniq().catalog
return party
[docs] def client_detect(self, client, starttime, endtime, threshold,
threshold_type, trig_int, plotvar, daylong=False,
parallel_process=True, xcorr_func=None,
concurrency=None, cores=None, ignore_length=False,
group_size=None, debug=0, return_stream=False):
"""
Detect using a Tribe of templates within a continuous stream.
:type client: `obspy.clients.*.Client`
:param client: Any obspy client with a dataselect service.
:type starttime: :class:`obspy.core.UTCDateTime`
:param starttime: Start-time for detections.
:type endtime: :class:`obspy.core.UTCDateTime`
:param endtime: End-time for detections
:type threshold: float
:param threshold:
Threshold level, if using `threshold_type='MAD'` then this will be
the multiple of the median absolute deviation.
:type threshold_type: str
:param threshold_type:
The type of threshold to be used, can be MAD, absolute or
av_chan_corr. See Note on thresholding below.
:type trig_int: float
:param trig_int:
Minimum gap between detections in seconds. If multiple detections
occur within trig_int of one-another, the one with the highest
cross-correlation sum will be selected.
:type plotvar: bool
:param plotvar:
Turn plotting on or off, see warning about plotting below
:type daylong: bool
:param daylong:
Set to True to use the
:func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
preforms additional checks and is more efficient for day-long data
over other methods.
:type parallel_process: bool
:param parallel_process:
:type xcorr_func: str or callable
:param xcorr_func:
A str of a registered xcorr function or a callable for implementing
a custom xcorr function. For more information see:
:func:`eqcorrscan.utils.correlate.register_array_xcorr`
:type concurrency: str
:param concurrency:
The type of concurrency to apply to the xcorr function. Options are
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
:param cores: Number of workers for processing and detection.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
are there for at least 80% of the day, if you don't want this check
(which will raise an error if too much data are missing) then set
ignore_length=True. This is not recommended!
:type group_size: int
:param group_size:
Maximum number of templates to run at once, use to reduce memory
consumption, if unset will use all templates.
:type debug: int
:param debug:
Debug level from 0-5 where five is more output, for debug levels
4 and 5, detections will not be computed in parallel.
:type return_stream: bool
:param return_stream:
Whether to also output the stream downloaded, useful if you plan
to use the stream for something else, e.g. lag_calc.
:return:
:class:`eqcorrscan.core.match_filter.Party` of Families of
detections.
.. warning::
Picks included in the output Party.get_catalog() will not be
corrected for pre-picks in the template.
.. Note::
Ensures that data overlap between loops, which will lead to no
missed detections at data start-stop points (see note for
:meth:`eqcorrscan.core.match_filter.Tribe.detect` method).
This will result in end-time not being strictly
honoured, so detections may occur after the end-time set. This is
because data must be run in the correct process-length.
.. warning::
Plotting within the match-filter routine uses the Agg backend
with interactive plotting turned off. This is because the function
is designed to work in bulk. If you wish to turn interactive
plotting on you must import matplotlib in your script first,
when you then import match_filter you will get the warning that
this call to matplotlib has no effect, which will mean that
match_filter has not changed the plotting behaviour.
.. note::
**Thresholding:**
**MAD** threshold is calculated as the:
.. math::
threshold {\\times} (median(abs(cccsum)))
where :math:`cccsum` is the cross-correlation sum for a given
template.
**absolute** threshold is a true absolute threshold based on the
cccsum value.
**av_chan_corr** is based on the mean values of single-channel
cross-correlations assuming all data are present as required for
the template, e.g:
.. math::
av\_chan\_corr\_thresh=threshold \\times (cccsum /
len(template))
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
"""
party = Party()
buff = 300 # Apply a buffer, often data downloaded is not the correct
# length
data_length = max([t.process_length for t in self.templates])
pad = 0
for template in self.templates:
max_delay = (template.st.sort(['starttime'])[-1].stats.starttime -
template.st.sort(['starttime'])[0].stats.starttime)
if max_delay > pad:
pad = max_delay
download_groups = int(endtime - starttime) / data_length
template_channel_ids = []
for template in self.templates:
for tr in template.st:
if tr.stats.network not in [None, '']:
chan_id = (tr.stats.network,)
else:
chan_id = ('*',)
if tr.stats.station not in [None, '']:
chan_id += (tr.stats.station,)
else:
chan_id += ('*',)
if tr.stats.location not in [None, '']:
chan_id += (tr.stats.location,)
else:
chan_id += ('*',)
if tr.stats.channel not in [None, '']:
if len(tr.stats.channel) == 2:
chan_id += (tr.stats.channel[0] + '?' +
tr.stats.channel[-1],)
else:
chan_id += (tr.stats.channel,)
else:
chan_id += ('*',)
template_channel_ids.append(chan_id)
template_channel_ids = list(set(template_channel_ids))
if return_stream:
stream = Stream()
if int(download_groups) < download_groups:
download_groups = int(download_groups) + 1
else:
download_groups = int(download_groups)
for i in range(download_groups):
bulk_info = []
for chan_id in template_channel_ids:
bulk_info.append((
chan_id[0], chan_id[1], chan_id[2], chan_id[3],
starttime + (i * data_length) - (pad + buff),
starttime + ((i + 1) * data_length) + (pad + buff)))
try:
st = client.get_waveforms_bulk(bulk_info)
st.merge(fill_value='interpolate')
st.trim(starttime=starttime + (i * data_length) - pad,
endtime=starttime + ((i + 1) * data_length) + pad)
party += self.detect(
stream=st, threshold=threshold,
threshold_type=threshold_type, trig_int=trig_int,
plotvar=plotvar, daylong=daylong,
parallel_process=parallel_process, xcorr_func=xcorr_func,
concurrency=concurrency, cores=cores,
ignore_length=ignore_length, group_size=group_size,
overlap=None, debug=debug)
if return_stream:
stream += st
except Exception as e:
print('Error, routine incomplete, returning incomplete Party')
print('Error: %s' % str(e))
if return_stream:
return party, stream
else:
return party
for family in party:
if family is not None:
family.detections = family._uniq().detections
family.catalog = family._uniq().catalog
if return_stream:
return party, stream
else:
return party
[docs] def construct(self, method, lowcut, highcut, samp_rate, filt_order,
prepick, **kwargs):
"""
Generate a Tribe of Templates. See :mod:`eqcorrscan.core.template_gen`
for available methods.
:param method: Method of Tribe generation.
:param kwargs: Arguments for the given method.
:type lowcut: float
:param lowcut:
Low cut (Hz), if set to None will not apply a lowcut
:type highcut: float
:param highcut:
High cut (Hz), if set to None will not apply a highcut.
:type samp_rate: float
:param samp_rate:
New sampling rate in Hz.
:type filt_order: int
:param filt_order:
Filter level (number of corners).
:type prepick: float
:param prepick: Pre-pick time in seconds
.. Note::
Methods: `from_contbase`, `from_sfile` and `from_sac` are not
supported by Tribe.construct and must use Template.construct.
.. Note::
The Method `multi_template_gen` is not supported because the
processing parameters for the stream are not known. Use
`from_meta_file` instead.
.. Note:: Templates will be named according to their start-time.
"""
if method in ['from_contbase', 'from_sfile', 'from_sac']:
raise NotImplementedError('Tribe.construct does not support '
'single-event methods, use '
'Template.construct instead.')
func = getattr(template_gen, method)
templates, catalog, process_lengths = func(
lowcut=lowcut, highcut=highcut, filt_order=filt_order,
samp_rate=samp_rate, prepick=prepick, return_event=True, **kwargs)
for template, event, process_len in zip(templates, catalog,
process_lengths):
t = Template()
for tr in template:
if not np.any(tr.data.astype(np.float16)):
warnings.warn('Data are zero in float16, missing data,'
' will not use: %s' % tr.id)
template.remove(tr)
if len(template) == 0:
print('Empty Template')
continue
t.st = template
t.name = template.sort(['starttime'])[0]. \
stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
t.lowcut = lowcut
t.highcut = highcut
t.filt_order = filt_order
t.samp_rate = samp_rate
t.process_length = process_len
t.prepick = prepick
event.comments.append(Comment(
text="eqcorrscan_template_" + t.name,
creation_info=CreationInfo(agency='eqcorrscan',
author=getpass.getuser())))
t.event = event
self.templates.append(t)
return self
[docs]class Detection(object):
"""
Single detection from detection routines in eqcorrscan.
Information required for a full detection based on cross-channel \
correlation sums.
:type template_name: str
:param template_name: The name of the template for which this \
detection was made.
:type detect_time: obspy.core.utcdatetime.UTCDateTime
:param detect_time: Time of detection as an obspy UTCDateTime object
:type no_chans: int
:param no_chans: The number of channels for which the cross-channel \
correlation sum was calculated over.
:type detect_val: float
:param detect_val: The raw value of the cross-channel correlation sum \
for this detection.
:type threshold: float
:param threshold: The value of the threshold used for this detection, \
will be the raw threshold value related to the cccsum.
:type typeofdet: str
:param typeofdet: Type of detection, STA, corr, bright
:type threshold_type: str
:param threshold_type: Type of threshold used for detection
:type threshold_input: float
:param threshold_input:
Threshold set for detection, relates to `threshold` according to the
`threshold_type`.
:type chans: list
:param chans: List of stations for the detection
:type event: obspy.core.event.event.Event
:param event:
Obspy Event object for this detection, note that this is lost when
writing to a :class:`Detection` objects to csv files using
:func:`eqcorrscan.core.match_filter.Detection.write`
:type id: str
:param id: Identification for detection (should be unique).
"""
[docs] def __init__(self, template_name, detect_time, no_chans, detect_val,
threshold, typeofdet, threshold_type, threshold_input,
chans=None, event=None, id=None):
"""Main class of Detection."""
self.template_name = template_name
self.detect_time = detect_time
self.no_chans = no_chans
self.chans = chans
self.detect_val = detect_val
self.threshold = threshold
self.typeofdet = typeofdet
self.threshold_type = threshold_type
self.threshold_input = threshold_input
self.event = event
if id is not None:
self.id = id
else:
self.id = (''.join(template_name.split(' ')) + '_' +
detect_time.strftime('%Y%m%d_%H%M%S%f'))
if event is not None:
event.resource_id = self.id
def __repr__(self):
"""Simple print."""
print_str = ' '.join(['template name =', self.template_name, '\n',
'detection id =', self.id, '\n',
'detection time =', str(self.detect_time), '\n',
'number of channels =', str(self.no_chans), '\n',
'channels =', str(self.chans), '\n',
'detection value =', str(self.detect_val), '\n',
'threshold =', str(self.threshold), '\n',
'threshold type =', self.threshold_type, '\n',
'input threshold =', str(self.threshold_input),
'\n detection type =', str(self.typeofdet)])
return "Detection(" + print_str + ")"
def __str__(self):
"""Full print."""
print_str = ' '.join(['Detection on template:', self.template_name,
'at:', str(self.detect_time),
'with', str(self.no_chans), 'channels:',
str(self.chans)])
return print_str
def __eq__(self, other):
for key in self.__dict__.keys():
self_is_event = isinstance(self.event, Event)
other_is_event = isinstance(other.event, Event)
if key == 'event':
if self_is_event and other_is_event:
if not _test_event_similarity(
self.event, other.event, verbose=False):
return False
elif self_is_event and not other_is_event:
return False
elif not self_is_event and other_is_event:
return False
elif self.__dict__[key] != other.__dict__[key]:
return False
return True
def __lt__(self, other):
if self.detect_time < other.detect_time:
return True
else:
return False
def __le__(self, other):
if self.detect_time <= other.detect_time:
return True
else:
return False
def __gt__(self, other):
return not self.__le__(other)
def __ge__(self, other):
return not self.__lt__(other)
def __hash__(self):
"""
Cannot hash Detection objects, they may change.
:return: 0
"""
return 0
def __ne__(self, other):
return not self.__eq__(other)
[docs] def copy(self):
"""
Returns a copy of the detection.
:return: Copy of detection
"""
return copy.deepcopy(self)
[docs] def write(self, fname, append=True):
"""
Write detection to csv formatted file.
Will append if append==True and file exists
:type fname: str
:param fname: Full path to file to open and write to.
:type append: bool
:param append: Set to true to append to an existing file, if True \
and file doesn't exist, will create new file and warn. If False
will overwrite old files.
"""
if append and os.path.isfile(fname):
f = open(fname, 'a')
else:
f = open(fname, 'w')
header = '; '.join(['Template name', 'Detection time (UTC)',
'Number of channels', 'Channel list',
'Detection value', 'Threshold',
'Threshold type', 'Input threshold',
'Detection type'])
f.write(header + '\n') # Write a header for the file
print_str = '; '.join([self.template_name, str(self.detect_time),
str(self.no_chans), str(self.chans),
str(self.detect_val), str(self.threshold),
self.threshold_type, str(self.threshold_input),
self.typeofdet])
f.write(print_str + '\n')
f.close()
[docs]def _total_microsec(t1, t2):
"""
Calculate difference between two datetime stamps in microseconds.
:type t1: :class: `datetime.datetime`
:type t2: :class: `datetime.datetime`
:return: int
.. rubric:: Example
>>> print(_total_microsec(UTCDateTime(2013, 1, 1).datetime,
... UTCDateTime(2014, 1, 1).datetime))
-31536000000000
"""
td = t1 - t2
return (td.seconds + td.days * 24 * 3600) * 10 ** 6 + td.microseconds
def _templates_match(t, family_file):
"""
Return True if a tribe matches a family file path.
:type t: Tribe
:type family_file: str
:return: bool
"""
return t.name == family_file.split(os.sep)[-1].split('_detections.csv')[0]
[docs]def _test_event_similarity(event_1, event_2, verbose=False):
"""
Check the similarity of the components of obspy events, discounting
resource IDs, which are not maintained in nordic files.
:type event_1: obspy.core.event.Event
:param event_1: First event
:type event_2: obspy.core.event.Event
:param event_2: Comparison event
:type verbose: bool
:param verbose: If true and fails will output why it fails.
:return: bool
"""
if not isinstance(event_1, Event) or not isinstance(event_2, Event):
raise NotImplementedError('Cannot compare things that are not Events')
# Check origins
if len(event_1.origins) != len(event_2.origins):
return False
for ori_1, ori_2 in zip(event_1.origins, event_2.origins):
for key in ori_1.keys():
if key not in ["resource_id", "comments", "arrivals",
"method_id", "origin_uncertainty", "depth_type",
"quality", "creation_info", "evaluation_mode",
"depth_errors", "time_errors"]:
if ori_1[key] != ori_2[key]:
if verbose:
print('%s is not the same as %s for key %s' %
(ori_1[key], ori_2[key], key))
return False
elif key == "arrivals":
if len(ori_1[key]) != len(ori_2[key]):
print('%i is not the same as %i for key %s' %
(len(ori_1[key]), len(ori_2[key]), key))
return False
for arr_1, arr_2 in zip(ori_1[key], ori_2[key]):
for arr_key in arr_1.keys():
if arr_key not in ["resource_id", "pick_id",
"distance"]:
if arr_1[arr_key] != arr_2[arr_key]:
if verbose:
print('%s does not match %s for key %s' %
(arr_1[arr_key], arr_2[arr_key],
arr_key))
return False
if arr_1["distance"] and round(
arr_1["distance"]) != round(arr_2["distance"]):
if verbose:
print('%s does not match %s for key %s' %
(arr_1[arr_key], arr_2[arr_key],
arr_key))
return False
# Check picks
if len(event_1.picks) != len(event_2.picks):
if verbose:
print('Number of picks is not equal')
return False
for pick_1, pick_2 in zip(event_1.picks, event_2.picks):
# Assuming same ordering of picks...
for key in pick_1.keys():
if key not in ["resource_id", "waveform_id"]:
if pick_1[key] != pick_2[key]:
# Cope with a None set not being equal to a set, but still
# None quantity
if pick_1[key] is None and pick_2[key] is not None:
try:
if not any(pick_2[key].__dict__.values()):
continue
except AttributeError:
if verbose:
print('%s is not the same as %s for key %s' %
(pick_1[key], pick_2[key], key))
return False
if pick_2[key] is None and pick_1[key] is not None:
try:
if not any(pick_1[key].__dict__.values()):
continue
except AttributeError:
if verbose:
print('%s is not the same as %s for key %s' %
(pick_1[key], pick_2[key], key))
return False
if verbose:
print('%s is not the same as %s for key %s' %
(pick_1[key], pick_2[key], key))
return False
elif key == "waveform_id":
if pick_1[key].station_code != pick_2[key].station_code:
if verbose:
print('Station codes do not match')
return False
if pick_1[key].channel_code[0] != pick_2[key].channel_code[0]:
if verbose:
print('Channel codes do not match')
return False
if pick_1[key].channel_code[-1] != \
pick_2[key].channel_code[-1]:
if verbose:
print('Channel codes do not match')
return False
# Check amplitudes
if not len(event_1.amplitudes) == len(event_2.amplitudes):
if verbose:
print('Not the same number of amplitudes')
return False
for amp_1, amp_2 in zip(event_1.amplitudes, event_2.amplitudes):
# Assuming same ordering of amplitudes
for key in amp_1.keys():
if key not in ["resource_id", "pick_id", "waveform_id", "snr"]:
if not amp_1[key] == amp_2[key]:
if verbose:
print('%s is not the same as %s for key %s' %
(amp_1[key], amp_2[key], key))
return False
elif key == "waveform_id":
if pick_1[key].station_code != pick_2[key].station_code:
if verbose:
print('Station codes do not match')
return False
if pick_1[key].channel_code[0] != pick_2[key].channel_code[0]:
if verbose:
print('Channel codes do not match')
return False
if pick_1[key].channel_code[-1] != \
pick_2[key].channel_code[-1]:
if verbose:
print('Channel codes do not match')
return False
return True
[docs]def _group_detect(templates, stream, threshold, threshold_type, trig_int,
plotvar, group_size=None, pre_processed=False, daylong=False,
parallel_process=True, xcorr_func=None, concurrency=None,
cores=None, ignore_length=False, overlap="calculate",
debug=0):
"""
Pre-process and compute detections for a group of templates.
Will process the stream object, so if running in a loop, you will want
to copy the stream before passing it to this function.
:type templates: list
:param templates: List of :class:`eqcorrscan.core.match_filter.Template`
:type stream: `obspy.core.stream.Stream`
:param stream: Continuous data to detect within using the Template.
:type threshold: float
:param threshold:
Threshold level, if using `threshold_type='MAD'` then this will be
the multiple of the median absolute deviation.
:type threshold_type: str
:param threshold_type:
The type of threshold to be used, can be MAD, absolute or
av_chan_corr. See Note on thresholding below.
:type trig_int: float
:param trig_int:
Minimum gap between detections in seconds. If multiple detections
occur within trig_int of one-another, the one with the highest
cross-correlation sum will be selected.
:type plotvar: bool
:param plotvar:
Turn plotting on or off, see warning about plotting below.
:type group_size: int
:param group_size:
Maximum number of templates to run at once, use to reduce memory
consumption, if unset will use all templates.
:type pre_processed: bool
:param pre_processed:
Set to True if `stream` has already undergone processing, in this
case eqcorrscan will only check that the sampling rate is correct.
Defaults to False, which will use the
:mod:`eqcorrscan.utils.pre_processing` routines to resample and
filter the continuous data.
:type daylong: bool
:param daylong:
Set to True to use the
:func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
preforms additional checks and is more efficient for day-long data
over other methods.
:type parallel_process: bool
:param parallel_process:
:type xcorr_func: str or callable
:param xcorr_func:
A str of a registered xcorr function or a callable for implementing
a custom xcorr function. For more details see:
:func:`eqcorrscan.utils.correlate.register_array_xcorr`
:type concurrency: str
:param concurrency:
The type of concurrency to apply to the xcorr function. Options are
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
:param cores: Number of workers for processing and correlation.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
are there for at least 80% of the day, if you don't want this check
(which will raise an error if too much data are missing) then set
ignore_length=True. This is not recommended!
:type overlap: float
:param overlap:
Either None, "calculate" or a float of number of seconds to
overlap detection streams by. This is to counter the effects of
the delay-and-stack in calcualting cross-correlation sums. Setting
overlap = "calculate" will work out the appropriate overlap based
on the maximum lags within templates.
:type debug: int
:param debug:
Debug level from 0-5 where five is more output, for debug levels
4 and 5, detections will not be computed in parallel.
:return:
:class:`eqcorrscan.core.match_filter.Party` of families of detections.
"""
master = templates[0]
# Check that they are all processed the same.
lap = 0.0
for template in templates:
starts = [t.stats.starttime for t in template.st.sort(['starttime'])]
if starts[-1] - starts[0] > lap:
lap = starts[-1] - starts[0]
if not template.same_processing(master):
raise MatchFilterError('Templates must be processed the same.')
if overlap is None:
overlap = 0.0
elif not isinstance(overlap, float) and str(overlap) == str("calculate"):
overlap = lap
elif not isinstance(overlap, float):
raise NotImplementedError(
"%s is not a recognised overlap type" % str(overlap))
if not pre_processed:
st = _group_process(
template_group=templates, parallel=parallel_process, debug=debug,
cores=cores, stream=stream, daylong=daylong,
ignore_length=ignore_length, overlap=overlap)
else:
warnings.warn('Not performing any processing on the '
'continuous data.')
st = [stream]
detections = []
party = Party()
if group_size is not None:
n_groups = int(len(templates) / group_size)
if n_groups * group_size < len(templates):
n_groups += 1
else:
n_groups = 1
for st_chunk in st:
debug_print(
'Computing detections between %s and %s' %
(st_chunk[0].stats.starttime, st_chunk[0].stats.endtime), 0, debug)
st_chunk.trim(starttime=st_chunk[0].stats.starttime,
endtime=st_chunk[0].stats.endtime)
for tr in st_chunk:
if len(tr) > len(st_chunk[0]):
tr.data = tr.data[0:len(st_chunk[0])]
for i in range(n_groups):
if group_size is not None:
end_group = (i + 1) * group_size
start_group = i * group_size
if i == n_groups:
end_group = len(templates)
else:
end_group = len(templates)
start_group = 0
template_group = [t for t in templates[start_group: end_group]]
detections += match_filter(
template_names=[t.name for t in template_group],
template_list=[t.st for t in template_group], st=st_chunk,
xcorr_func=xcorr_func, concurrency=concurrency,
threshold=threshold, threshold_type=threshold_type,
trig_int=trig_int, plotvar=plotvar, debug=debug, cores=cores)
for template in template_group:
family = Family(template=template, detections=[])
for detection in detections:
if detection.template_name == template.name:
family.append(detection)
party += family
return party
[docs]def _group_process(template_group, parallel, debug, cores, stream, daylong,
ignore_length, overlap):
"""
Process data into chunks based on template processing length.
Templates in template_group must all have the same processing parameters.
:type template_group: list
:param template_group: List of Templates.
:type parallel: bool
:param parallel: Whether to use parallel processing or not
:type debug: int
:param debug: Debug level from 0-5
:type cores: int
:param cores: Number of cores to use, can be False to use all available.
:type stream: :class:`obspy.core.stream.Stream`
:param stream: Stream to process, will be left intact.
:type daylong: bool
:param daylong: Whether to enforce day-length files or not.
:type ignore_length: bool
:param ignore_length:
If using daylong=True, then dayproc will try check that the data
are there for at least 80% of the day, if you don't want this check
(which will raise an error if too much data are missing) then set
ignore_length=True. This is not recommended!
:type overlap: float
:param overlap: Number of seconds to overlap chunks by.
:return: list of processed streams.
"""
master = template_group[0]
processed_streams = []
kwargs = {
'filt_order': master.filt_order,
'highcut': master.highcut, 'lowcut': master.lowcut,
'samp_rate': master.samp_rate, 'debug': debug,
'parallel': parallel, 'num_cores': cores}
if daylong:
if not master.process_length == 86400:
warnings.warn(
'Processing day-long data, but template was cut from %i s long'
' data, will reduce correlations' % master.process_length)
func = dayproc
kwargs.update({'ignore_length': ignore_length})
# Check that data all start on the same day, otherwise strange
# things will happen...
starttimes = [tr.stats.starttime.date for tr in stream]
if not len(list(set(starttimes))) == 1:
warnings.warn('Data start on different days, setting to last day')
starttime = UTCDateTime(
stream.sort(['starttime'])[-1].stats.starttime.date)
else:
starttime = stream.sort(['starttime'])[0].stats.starttime
else:
# We want to use shortproc to allow overlaps
func = shortproc
starttime = stream.sort(['starttime'])[0].stats.starttime
endtime = stream.sort(['endtime'])[-1].stats.endtime
n_chunks = int((endtime - starttime + 1) / (master.process_length -
overlap))
if n_chunks == 0:
print('Data must be process_length or longer, not computing')
for i in range(n_chunks):
kwargs.update(
{'starttime': starttime + (i * (master.process_length - overlap))})
if not daylong:
kwargs.update(
{'endtime': kwargs['starttime'] + master.process_length})
chunk_stream = stream.slice(starttime=kwargs['starttime'],
endtime=kwargs['endtime']).copy()
else:
chunk_stream = stream.copy()
processed_streams.append(func(st=chunk_stream, **kwargs))
return processed_streams
def _par_read(dirname, compressed=True):
"""
Internal write function to read a formatted parameter file.
:type dirname: str
:param dirname: Directory to read the parameter file from.
:type compressed: bool
:param compressed: Whether the directory is compressed or not.
"""
templates = []
if compressed:
arc = tarfile.open(dirname, "r:*")
members = arc.getmembers()
_parfile = [member for member in members
if member.name.split(os.sep)[-1] ==
'template_parameters.csv']
if len(_parfile) == 0:
arc.close()
raise MatchFilterError(
'No template parameter file in archive')
parfile = arc.extractfile(member[0])
else:
parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
for line in parfile:
t_in = Template()
for key_pair in line.rstrip().split(','):
if key_pair.split(':')[0].strip() == 'name':
t_in.__dict__[key_pair.split(':')[0].strip()] = \
key_pair.split(':')[-1].strip()
elif key_pair.split(':')[0].strip() == 'filt_order':
try:
t_in.__dict__[key_pair.split(':')[0].strip()] = \
int(key_pair.split(':')[-1])
except ValueError:
pass
else:
try:
t_in.__dict__[key_pair.split(':')[0].strip()] = \
float(key_pair.split(':')[-1])
except ValueError:
pass
templates.append(t_in)
parfile.close()
if compressed:
arc.close()
return templates
def _resolved(x):
return os.path.realpath(os.path.abspath(x))
def _badpath(path, base):
"""
joinpath will ignore base if path is absolute.
"""
return not _resolved(os.path.join(base, path)).startswith(base)
def _badlink(info, base):
"""
Links are interpreted relative to the directory containing the link
"""
tip = _resolved(os.path.join(base, os.path.dirname(info.name)))
return _badpath(info.linkname, base=tip)
def _safemembers(members):
"""Check members of a tar archive for safety.
Ensure that they do not contain paths or links outside of where we
need them - this would only happen if the archive wasn't made by
eqcorrscan.
:type members: :class:`tarfile.TarFile`
:param members: an open tarfile.
"""
base = _resolved(".")
for finfo in members:
if _badpath(finfo.name, base):
print(finfo.name, "is blocked (illegal path)")
elif finfo.issym() and _badlink(finfo, base):
print(finfo.name, "is blocked: Hard link to", finfo.linkname)
elif finfo.islnk() and _badlink(finfo, base):
print(finfo.name, "is blocked: Symlink to", finfo.linkname)
else:
yield finfo
[docs]def _write_family(family, filename):
"""
Write a family to a csv file.
:type family: :class:`eqcorrscan.core.match_filter.Family`
:param family: Family to write to file
:type filename: str
:param filename: File to write to.
"""
with open(filename, 'w') as f:
for detection in family.detections:
det_str = ''
for key in detection.__dict__.keys():
if key == 'event' and detection.__dict__[key] is not None:
value = str(detection.event.resource_id)
elif key in ['threshold', 'detect_val', 'threshold_input']:
value = format(detection.__dict__[key], '.32f').rstrip('0')
else:
value = str(detection.__dict__[key])
det_str += key + ': ' + value + '; '
f.write(det_str + '\n')
return
[docs]def _read_family(fname, all_cat):
"""
Internal function to read csv family files.
:type fname: str
:param fname: Filename
:return: list of Detection
"""
detections = []
with open(fname, 'r') as f:
for line in f:
det_dict = {}
for key_pair in line.rstrip().split(';'):
key = key_pair.split(': ')[0].strip()
value = key_pair.split(': ')[-1].strip()
if key == 'event':
if len(all_cat) == 0:
continue
el = [e for e in all_cat
if str(e.resource_id).split('/')[-1] == value][0]
det_dict.update({'event': el})
elif key == 'detect_time':
det_dict.update(
{'detect_time': UTCDateTime(value)})
elif key == 'chans':
det_dict.update({'chans': ast.literal_eval(value)})
elif key in ['template_name', 'typeofdet', 'id',
'threshold_type']:
det_dict.update({key: value})
elif key == 'no_chans':
det_dict.update({key: int(value)})
elif len(key) == 0:
continue
else:
det_dict.update({key: float(value)})
detections.append(Detection(**det_dict))
return detections
[docs]def read_tribe(fname):
"""
Read a Tribe of templates from a tar archive.
:param fname: Filename to read from
:return: :class:`eqcorrscan.core.match_filter.Tribe`
"""
tribe = Tribe()
tribe.read(filename=fname)
return tribe
[docs]def read_party(fname=None):
"""
Read detections and metadata from a tar archive.
:type fname: str
:param fname:
Filename to read from, if this contains a single Family, then will
return a party of length = 1
:return: :class:`eqcorrscan.core.match_filter.Party`
"""
party = Party()
party.read(filename=fname)
return party
[docs]def read_detections(fname):
"""
Read detections from a file to a list of Detection objects.
:type fname: str
:param fname: File to read from, must be a file written to by \
Detection.write.
:returns: list of :class:`eqcorrscan.core.match_filter.Detection`
:rtype: list
.. note::
:class:`eqcorrscan.core.match_filter.Detection`'s returned do not
contain Detection.event
"""
f = open(fname, 'r')
detections = []
for index, line in enumerate(f):
if index == 0:
continue # Skip header
if line.rstrip().split('; ')[0] == 'Template name':
continue # Skip any repeated headers
detection = line.rstrip().split('; ')
detection[1] = UTCDateTime(detection[1])
detection[2] = int(float(detection[2]))
detection[3] = ast.literal_eval(detection[3])
detection[4] = float(detection[4])
detection[5] = float(detection[5])
if len(detection) < 9:
detection.extend(['Unset', float('NaN')])
else:
detection[7] = float(detection[7])
detections.append(Detection(
template_name=detection[0], detect_time=detection[1],
no_chans=detection[2], detect_val=detection[4],
threshold=detection[5], threshold_type=detection[6],
threshold_input=detection[7], typeofdet=detection[8],
chans=detection[3]))
f.close()
return detections
[docs]def read_template(fname):
"""
Read a Template object from a tar archive.
:type fname: str
:param fname: Filename to read from
:return: :class:`eqcorrscan.core.match_filter.Template`
"""
template = Template()
template.read(filename=fname)
return template
[docs]def write_catalog(detections, fname, format="QUAKEML"):
"""Write events contained within detections to a catalog file.
:type detections: list
:param detections: list of eqcorrscan.core.match_filter.Detection
:type fname: str
:param fname: Name of the file to write to
:type format: str
:param format: File format to use, see obspy.core.event.Catalog.write \
for supported formats.
"""
catalog = get_catalog(detections)
catalog.write(filename=fname, format=format)
[docs]def get_catalog(detections):
"""
Generate an :class:`obspy.core.event.Catalog` from list of \
:class:`Detection`'s.
:type detections: list
:param detections: list of :class:`eqcorrscan.core.match_filter.Detection`
:returns: Catalog of detected events.
:rtype: :class:`obspy.core.event.Catalog`
.. warning::
Will only work if the detections have an event associated with them.
This will not be the case if detections have been written to csv
format using :func:`eqcorrscan.core.match_filter.Detection.write`
and read back in.
"""
catalog = Catalog()
for detection in detections:
if detection.event:
catalog.append(detection.event)
return catalog
[docs]def normxcorr2(template, image):
"""
Thin wrapper to eqcorrscan.utils.correlate functions.
:type template: numpy.ndarray
:param template: Template array
:type image: numpy.ndarray
:param image:
Image to scan the template through. The order of these
matters, if you put the template after the image you will get a
reversed correlation matrix
:return:
New :class:`numpy.ndarray` of the correlation values for the
correlation of the image with the template.
:rtype: numpy.ndarray
"""
array_xcorr = get_array_xcorr()
# Check that we have been passed numpy arrays
if type(template) != np.ndarray or type(image) != np.ndarray:
print('You have not provided numpy arrays, I will not convert them')
return 'NaN'
if len(template) > len(image):
ccc = array_xcorr(
templates=np.array([image]).astype(np.float32),
stream=template.astype(np.float32), pads=[0],
threaded=False)[0][0]
else:
ccc = array_xcorr(
templates=np.array([template]).astype(np.float32),
stream=image.astype(np.float32), pads=[0], threaded=False)[0][0]
ccc = ccc.reshape((1, len(ccc)))
return ccc
[docs]def match_filter(template_names, template_list, st, threshold,
threshold_type, trig_int, plotvar, plotdir='.',
xcorr_func=None, concurrency=None, cores=None,
debug=0, plot_format='png', output_cat=False,
output_event=True, extract_detections=False,
arg_check=True):
"""
Main matched-filter detection function.
Over-arching code to run the correlations of given templates with a \
day of seismic data and output the detections based on a given threshold.
For a functional example see the tutorials.
:type template_names: list
:param template_names:
List of template names in the same order as template_list
:type template_list: list
:param template_list:
A list of templates of which each template is a
:class:`obspy.core.stream.Stream` of obspy traces containing seismic
data and header information.
:type st: :class:`obspy.core.stream.Stream`
:param st:
A Stream object containing all the data available and
required for the correlations with templates given. For efficiency
this should contain no excess traces which are not in one or more of
the templates. This will now remove excess traces internally, but
will copy the stream and work on the copy, leaving your input stream
untouched.
:type threshold: float
:param threshold: A threshold value set based on the threshold_type
:type threshold_type: str
:param threshold_type:
The type of threshold to be used, can be MAD, absolute or av_chan_corr.
See Note on thresholding below.
:type trig_int: float
:param trig_int: Minimum gap between detections in seconds.
:type plotvar: bool
:param plotvar: Turn plotting on or off
:type plotdir: str
:param plotdir:
Path to plotting folder, plots will be output here, defaults to run
location.
:type xcorr_func: str or callable
:param xcorr_func:
A str of a registered xcorr function or a callable for implementing
a custom xcorr function. For more information see:
:func:`eqcorrscan.utils.correlate.register_array_xcorr`
:type concurrency: str
:param concurrency:
The type of concurrency to apply to the xcorr function. Options are
'multithread', 'multiprocess', 'concurrent'. For more details see
:func:`eqcorrscan.utils.correlate.get_stream_xcorr`
:type cores: int
:param cores: Number of cores to use
:type debug: int
:param debug:
Debug output level, the bigger the number, the more the output.
:type plot_format: str
:param plot_format: Specify format of output plots if saved
:type output_cat: bool
:param output_cat:
Specifies if matched_filter will output an obspy.Catalog class
containing events for each detection. Default is False, in which case
matched_filter will output a list of detection classes, as normal.
:type output_event: bool
:param output_event:
Whether to include events in the Detection objects, defaults to True,
but for large cases you may want to turn this off as Event objects
can be quite memory intensive.
:type extract_detections: bool
:param extract_detections:
Specifies whether or not to return a list of streams, one stream per
detection.
:type arg_check: bool
:param arg_check:
Check arguments, defaults to True, but if running in bulk, and you are
certain of your arguments, then set to False.
.. rubric::
If neither `output_cat` or `extract_detections` are set to `True`,
then only the list of :class:`eqcorrscan.core.match_filter.Detection`'s
will be output:
:return:
:class:`eqcorrscan.core.match_filter.Detection` detections for each
detection made.
:rtype: list
.. rubric::
If `output_cat` is set to `True`, then the
:class:`obspy.core.event.Catalog` will also be output:
:return: Catalog containing events for each detection, see above.
:rtype: :class:`obspy.core.event.Catalog`
.. rubric::
If `extract_detections` is set to `True` then the list of
:class:`obspy.core.stream.Stream`'s will also be output.
:return:
list of :class:`obspy.core.stream.Stream`'s for each detection, see
above.
:rtype: list
.. warning::
Plotting within the match-filter routine uses the Agg backend
with interactive plotting turned off. This is because the function
is designed to work in bulk. If you wish to turn interactive
plotting on you must import matplotlib in your script first, when you
them import match_filter you will get the warning that this call to
matplotlib has no effect, which will mean that match_filter has not
changed the plotting behaviour.
.. note::
**Data overlap:**
Internally this routine shifts and trims the data according to the
offsets in the template (e.g. if trace 2 starts 2 seconds after trace 1
in the template then the continuous data will be shifted by 2 seconds
to align peak correlations prior to summing). Because of this,
detections at the start and end of continuous data streams
**may be missed**. The maximum time-period that might be missing
detections is the maximum offset in the template.
To work around this, if you are conducting matched-filter detections
through long-duration continuous data, we suggest using some overlap
(a few seconds, on the order of the maximum offset in the templates)
in the continous data. You will then need to post-process the
detections (which should be done anyway to remove duplicates).
.. note::
**Thresholding:**
**MAD** threshold is calculated as the:
.. math::
threshold {\\times} (median(abs(cccsum)))
where :math:`cccsum` is the cross-correlation sum for a given template.
**absolute** threshold is a true absolute threshold based on the
cccsum value.
**av_chan_corr** is based on the mean values of single-channel
cross-correlations assuming all data are present as required for the
template, e.g:
.. math::
av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template))
where :math:`template` is a single template from the input and the
length is the number of channels within this template.
.. note::
The output_cat flag will create an :class:`obspy.core.event.Catalog`
containing one event for each
:class:`eqcorrscan.core.match_filter.Detection`'s generated by
match_filter. Each event will contain a number of comments dealing
with correlation values and channels used for the detection. Each
channel used for the detection will have a corresponding
:class:`obspy.core.event.Pick` which will contain time and
waveform information. **HOWEVER**, the user should note that, at
present, the pick times do not account for the
prepick times inherent in each template. For example, if a template
trace starts 0.1 seconds before the actual arrival of that phase,
then the pick time generated by match_filter for that phase will be
0.1 seconds early.
.. Note:: xcorr_func can be used as follows:
.. rubric:: Example
>>> import obspy
>>> import numpy as np
>>> from eqcorrscan.core.match_filter import match_filter
>>> from eqcorrscan.utils.correlate import time_multi_normxcorr
>>> # define a custom xcorr function
>>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
... # Just to keep example short call other xcorr function
... # in practice you would define your own function here
... print('calling custom xcorr function')
... return time_multi_normxcorr(templates, stream, pads)
>>> # generate some toy templates and stream
>>> random = np.random.RandomState(42)
>>> template = obspy.read()
>>> stream = obspy.read()
>>> for num, tr in enumerate(stream): # iter stream and embed templates
... data = tr.data
... tr.data = random.randn(6000) * 5
... tr.data[100: 100 + len(data)] = data
>>> # call match_filter ane ensure the custom function is used
>>> detections = match_filter(
... template_names=['1'], template_list=[template], st=stream,
... threshold=.5, threshold_type='absolute', trig_int=1, plotvar=False,
... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS
calling custom xcorr function...
"""
_spike_test(st)
import matplotlib
matplotlib.use('Agg')
from eqcorrscan.utils.plotting import _match_filter_plot
if arg_check:
# Check the arguments to be nice - if arguments wrong type the parallel
# output for the error won't be useful
if not type(template_names) == list:
raise MatchFilterError('template_names must be of type: list')
if not type(template_list) == list:
raise MatchFilterError('templates must be of type: list')
if not len(template_list) == len(template_names):
raise MatchFilterError('Not the same number of templates as names')
for template in template_list:
if not type(template) == Stream:
msg = 'template in template_list must be of type: ' + \
'obspy.core.stream.Stream'
raise MatchFilterError(msg)
if not type(st) == Stream:
msg = 'st must be of type: obspy.core.stream.Stream'
raise MatchFilterError(msg)
if str(threshold_type) not in [str('MAD'), str('absolute'),
str('av_chan_corr')]:
msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
raise MatchFilterError(msg)
for tr in st:
if not tr.stats.sampling_rate == st[0].stats.sampling_rate:
raise MatchFilterError('Sampling rates are not equal')
for template in template_list:
for tr in template:
if not tr.stats.sampling_rate == st[0].stats.sampling_rate:
raise MatchFilterError(
'Template sampling rate does not '
'match continuous data')
# Copy the stream here because we will muck about with it
stream = st.copy()
templates = copy.deepcopy(template_list)
_template_names = copy.deepcopy(template_names)
# Debug option to confirm that the channel names match those in the
# templates
if debug >= 2:
template_stachan = []
data_stachan = []
for template in templates:
for tr in template:
if isinstance(tr.data, np.ma.core.MaskedArray):
raise MatchFilterError('Template contains masked array,'
' split first')
template_stachan.append(tr.stats.station + '.' +
tr.stats.channel)
for tr in stream:
data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
template_stachan = list(set(template_stachan))
data_stachan = list(set(data_stachan))
debug_print('I have template info for these stations:\n' +
template_stachan.__str__() +
'\nI have daylong data for these stations:\n' +
data_stachan.__str__(), 3, debug)
# Perform a check that the continuous data are all the same length
min_start_time = min([tr.stats.starttime for tr in stream])
max_end_time = max([tr.stats.endtime for tr in stream])
longest_trace_length = stream[0].stats.sampling_rate * (max_end_time -
min_start_time)
longest_trace_length += 1
for tr in stream:
if not tr.stats.npts == longest_trace_length:
msg = 'Data are not equal length, padding short traces'
warnings.warn(msg)
start_pad = np.zeros(int(tr.stats.sampling_rate *
(tr.stats.starttime - min_start_time)))
end_pad = np.zeros(int(tr.stats.sampling_rate *
(max_end_time - tr.stats.endtime)))
tr.data = np.concatenate([start_pad, tr.data, end_pad])
# Perform check that all template lengths are internally consistent
for i, temp in enumerate(template_list):
if len(set([tr.stats.npts for tr in temp])) > 1:
msg = ('Template %s contains traces of differing length, this is '
'not currently supported' % _template_names[i])
raise MatchFilterError(msg)
outtic = time.clock()
debug_print('Ensuring all template channels have matches in'
' continuous data', 2, debug)
template_stachan = {}
# Work out what station-channel pairs are in the templates, including
# duplicate station-channel pairs. We will use this information to fill
# all templates with the same station-channel pairs as required by
# _template_loop.
for template in templates:
stachans_in_template = []
for tr in template:
stachans_in_template.append((tr.stats.network, tr.stats.station,
tr.stats.location, tr.stats.channel))
stachans_in_template = dict(Counter(stachans_in_template))
for stachan in stachans_in_template.keys():
stachans = stachans_in_template[stachan]
if stachan not in template_stachan.keys():
template_stachan.update({stachan: stachans})
elif stachans_in_template[stachan] > template_stachan[stachan]:
template_stachan.update({stachan: stachans})
# Remove un-matched channels from templates.
_template_stachan = copy.deepcopy(template_stachan)
for stachan in template_stachan.keys():
if not stream.select(network=stachan[0], station=stachan[1],
location=stachan[2], channel=stachan[3]):
# Remove stachan from list of dictionary of template_stachans
_template_stachan.pop(stachan)
# Remove template traces rather than adding NaN data
for template in templates:
if template.select(network=stachan[0], station=stachan[1],
location=stachan[2], channel=stachan[3]):
for tr in template.select(network=stachan[0],
station=stachan[1],
location=stachan[2],
channel=stachan[3]):
template.remove(tr)
print('Removing template channel %s.%s.%s.%s due to'
' no matches in continuous data' %
(stachan[0], stachan[1], stachan[2], stachan[3]))
template_stachan = _template_stachan
# Remove un-needed channels from continuous data.
for tr in stream:
if not (tr.stats.network, tr.stats.station,
tr.stats.location, tr.stats.channel) in \
template_stachan.keys():
print('Removing channel in continuous data for %s.%s.%s.%s:'
' no match in template' %
(tr.stats.network, tr.stats.station, tr.stats.location,
tr.stats.channel))
stream.remove(tr)
# Check for duplicate channels
stachans = [(tr.stats.network, tr.stats.station,
tr.stats.location, tr.stats.channel) for tr in stream]
c_stachans = Counter(stachans)
for key in c_stachans.keys():
if c_stachans[key] > 1:
msg = ('Multiple channels for %s.%s.%s.%s, likely a data issue'
% (key[0], key[1], key[2], key[3]))
raise MatchFilterError(msg)
# Pad out templates to have all channels
_templates = []
used_template_names = []
for template, template_name in zip(templates, _template_names):
if len(template) == 0:
msg = ('No channels matching in continuous data for ' +
'template' + template_name)
warnings.warn(msg)
continue
for stachan in template_stachan.keys():
number_of_channels = len(template.select(
network=stachan[0], station=stachan[1], location=stachan[2],
channel=stachan[3]))
if number_of_channels < template_stachan[stachan]:
missed_channels = template_stachan[stachan] - \
number_of_channels
nulltrace = Trace()
nulltrace.stats.update(
{'network': stachan[0], 'station': stachan[1],
'location': stachan[2], 'channel': stachan[3],
'sampling_rate': template[0].stats.sampling_rate,
'starttime': template[0].stats.starttime})
nulltrace.data = np.array([np.NaN] * len(template[0].data),
dtype=np.float32)
for dummy in range(missed_channels):
template += nulltrace
template.sort()
_templates.append(template)
used_template_names.append(template_name)
# Quick check that this has all worked
if len(template) != max([len(t) for t in templates]):
raise MatchFilterError('Internal error forcing same template '
'lengths, report this error.')
templates = _templates
_template_names = used_template_names
debug_print('Starting the correlation run for these data', 2, debug)
for template in templates:
debug_print(template.__str__(), 3, debug)
debug_print(stream.__str__(), 3, debug)
multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
[cccsums, no_chans, chans] = multichannel_normxcorr(
templates=templates, stream=stream, cores=cores)
if len(cccsums[0]) == 0:
raise MatchFilterError('Correlation has not run, zero length cccsum')
outtoc = time.clock()
debug_print(' '.join(['Looping over templates and streams took:',
str(outtoc - outtic), 's']), 0, debug)
debug_print('The shape of the returned cccsums is: %s\n'
'This is from %i templates\nCorrelated with %i channels of '
'data' % (cccsums.shape, len(templates), len(stream)), 2,
debug)
detections = []
if output_cat:
det_cat = Catalog()
for i, cccsum in enumerate(cccsums):
template = templates[i]
if str(threshold_type) == str('MAD'):
rawthresh = threshold * np.median(np.abs(cccsum))
elif str(threshold_type) == str('absolute'):
rawthresh = threshold
elif str(threshold_type) == str('av_chan_corr'):
rawthresh = threshold * no_chans[i]
# Findpeaks returns a list of tuples in the form [(cccsum, sample)]
debug_print("Threshold is set at: %f\nMax of data is %f\nMean of "
"data is %f" % (rawthresh, max(cccsum), np.mean(cccsum)),
0, debug)
if np.abs(np.mean(cccsum)) > 0.05:
warnings.warn('Mean is not zero! Check this!')
# Set up a trace object for the cccsum as this is easier to plot and
# maintains timing
if plotvar:
_match_filter_plot(stream=stream, cccsum=cccsum,
template_names=_template_names,
rawthresh=rawthresh, plotdir=plotdir,
plot_format=plot_format, i=i)
if debug >= 4:
np.save(_template_names[i] +
stream[0].stats.starttime.datetime.strftime('%Y%j'),
cccsum)
debug_print(
' '.join(['Saved the cccsum to:', _template_names[i],
stream[0].stats.starttime.datetime.strftime('%Y%j')]),
4, debug)
tic = time.clock()
if max(cccsum) > rawthresh:
peaks = find_peaks2_short(
arr=cccsum, thresh=rawthresh,
trig_int=trig_int * stream[0].stats.sampling_rate,
debug=debug,
starttime=stream[0].stats.starttime,
samp_rate=stream[0].stats.sampling_rate)
else:
debug_print('No peaks found above threshold', 0, debug)
peaks = False
toc = time.clock()
debug_print('Finding peaks took: %f s' % (toc - tic), 0, debug)
if peaks:
for peak in peaks:
detecttime = stream[0].stats.starttime + \
peak[1] / stream[0].stats.sampling_rate
# Detect time must be valid QuakeML uri within resource_id.
# This will write a formatted string which is still
# readable by UTCDateTime
if not output_event and not output_cat:
det_ev = None
else:
det_time = str(detecttime.strftime('%Y%m%dT%H%M%S.%f'))
ev = Event(resource_id=ResourceIdentifier(
id=_template_names[i] + '_' + det_time,
prefix='smi:local'))
ev.creation_info = CreationInfo(
author='EQcorrscan', creation_time=UTCDateTime())
ev.comments.append(
Comment(text='threshold=' + str(rawthresh)))
ev.comments.append(
Comment(text='detect_val=' + str(peak[0])))
ev.comments.append(Comment(
text='channels used: ' +
' '.join([str(pair) for pair in chans[i]])))
min_template_tm = min(
[tr.stats.starttime for tr in template])
for tr in template:
if (tr.stats.station, tr.stats.channel) \
not in chans[i]:
continue
else:
pick_tm = detecttime + (tr.stats.starttime -
min_template_tm)
wv_id = WaveformStreamID(
network_code=tr.stats.network,
station_code=tr.stats.station,
channel_code=tr.stats.channel)
ev.picks.append(
Pick(time=pick_tm, waveform_id=wv_id))
if not output_event:
det_ev = None
else:
det_ev = ev
detections.append(Detection(
template_name=_template_names[i], detect_time=detecttime,
no_chans=no_chans[i], detect_val=peak[0],
threshold=rawthresh, typeofdet='corr', chans=chans[i],
event=det_ev, threshold_type=threshold_type,
threshold_input=threshold))
if output_cat:
det_cat.append(ev)
if extract_detections:
detection_streams = extract_from_stream(stream, detections)
del stream, templates
if output_cat and not extract_detections:
return detections, det_cat
elif not extract_detections:
return detections
elif extract_detections and not output_cat:
return detections, detection_streams
else:
return detections, det_cat, detection_streams
if __name__ == "__main__":
import doctest
doctest.testmod()
# List files to be removed after doctest
cleanup = ['test_tar_write.tgz', 'test_csv_write.csv', 'test_quakeml.ml',
'test_family.tgz', 'test_template.tgz', 'test_waveform.ms',
'test_template_read.tgz', 'test_tribe.tgz']
for f in cleanup:
if os.path.isfile(f):
os.remove(f)
elif os.path.isdir(f):
shutil.rmtree(f)