"""
Helper functions for reading data from archives for the EQcorrscan package.
.. note:: These functions are tools to aid simplification of general scripts, \
they do not cover all use cases, however if you have a use case you want \
to see here, then let the authors know, or implement it yourself and \
contribute it back to the project.
:copyright:
EQcorrscan developers.
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import os
import warnings
import glob
from obspy import read, UTCDateTime, Stream
from obspy.clients.fdsn.header import FDSNException
from obspy.clients.seishub import Client as SeishubClient
from obspy.clients.fdsn import Client as FDSNClient
[docs]def read_data(archive, arc_type, day, stachans, length=86400):
"""
Function to read the appropriate data from an archive for a day.
:type archive: str
:param archive:
The archive source - if arc_type is seishub, this should be a url,
if the arc_type is FDSN then this can be either a url or a known obspy
client. If arc_type is day_vols, then this is the path to the top
directory.
:type arc_type: str
:param arc_type: The type of archive, can be: seishub, FDSN, day_volumes
:type day: datetime.date
:param day: Date to retrieve data for
:type stachans: list
:param stachans: List of tuples of Stations and channels to try and get,
will not fail if stations are not available, but will warn.
:type length: float
:param length: Data length to extract in seconds, defaults to 1 day.
:returns: Stream of data
:rtype: obspy.core.stream.Stream
.. note:: A note on arc_types, if arc_type is day_vols, then this will \
look for directories labelled in the IRIS DMC conventions of \
Yyyyy/Rjjj.01/... where yyyy is the year and jjj is the julian day. \
Data within these files directories should be stored as day-long, \
single-channel files. This is not implemented in the fasted way \
possible to allow for a more general situation. If you require more \
speed you will need to re-write this.
.. rubric:: Example
>>> from obspy import UTCDateTime
>>> t1 = UTCDateTime(2012, 3, 26)
>>> stachans = [('JCNB', 'SP1')]
>>> st = read_data('NCEDC', 'FDSN', t1, stachans)
>>> print(st)
1 Trace(s) in Stream:
BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
950000Z | 20.0 Hz, 1728000 samples
.. rubric:: Example, missing data
>>> t1 = UTCDateTime(2012, 3, 26)
>>> stachans = [('JCNB', 'SP1'), ('GCSZ', 'HHZ')]
>>> st = read_data('NCEDC', 'FDSN', t1, stachans)
>>> print(st)
1 Trace(s) in Stream:
BP.JCNB.40.SP1 | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.\
950000Z | 20.0 Hz, 1728000 samples
.. rubric:: Example, local day-volumes
>>> t1 = UTCDateTime(2012, 3, 26)
>>> stachans = [('WHYM', 'SHZ'), ('EORO', 'SHZ')]
>>> st = read_data('eqcorrscan/tests/test_data/day_vols', 'day_vols',
... t1, stachans)
>>> print(st)
2 Trace(s) in Stream:
AF.WHYM..SHZ | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.000000Z \
| 1.0 Hz, 86400 samples
AF.EORO..SHZ | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.000000Z \
| 1.0 Hz, 86400 samples
"""
st = []
available_stations = _check_available_data(archive, arc_type, day)
for station in stachans:
if len(station[1]) == 2:
# Cope with two char channel naming in seisan
station_map = (station[0], station[1][0] + '*' + station[1][1])
available_stations_map = [(sta[0], sta[1][0] + '*' + sta[1][-1])
for sta in available_stations]
else:
station_map = station
available_stations_map = available_stations
if station_map not in available_stations_map:
msg = ' '.join([station[0], station_map[1], 'is not available for',
day.strftime('%Y/%m/%d')])
warnings.warn(msg)
continue
if arc_type.lower() == 'seishub':
client = SeishubClient(archive)
st += client.get_waveforms(
network='*', station=station_map[0], location='*',
channel=station_map[1], starttime=UTCDateTime(day),
endtime=UTCDateTime(day) + length)
elif arc_type.upper() == "FDSN":
client = FDSNClient(archive)
try:
st += client.get_waveforms(
network='*', station=station_map[0], location='*',
channel=station_map[1], starttime=UTCDateTime(day),
endtime=UTCDateTime(day) + length)
except FDSNException:
warnings.warn('No data on server despite station being ' +
'available...')
continue
elif arc_type.lower() == 'day_vols':
wavfiles = _get_station_file(os.path.join(
archive, day.strftime('Y%Y' + os.sep + 'R%j.01')),
station_map[0], station_map[1])
for wavfile in wavfiles:
st += read(wavfile, starttime=day, endtime=day + length)
st = Stream(st)
return st
[docs]def _get_station_file(path_name, station, channel, debug=0):
"""
Helper function to find the correct file.
:type path_name: str
:param path_name: Path to files to check.
:type station: str
:type channel: str
:returns: list of filenames, str
"""
wavfiles = glob.glob(path_name + os.sep + '*')
out_files = [_check_data(wavfile, station, channel, debug=debug)
for wavfile in wavfiles]
out_files = list(set(out_files))
return out_files
def _check_data(wavfile, station, channel, debug=0):
"""
Inner loop for parallel checks.
:type wavfile: str
:param wavfile: Wavefile path name to look in.
:type station: str
:param station: Channel name to check for
:type channel: str
:param channel: Channel name to check for
:type debug: int
:param debug: Debug level, if > 1, will output what it it working on.
"""
if debug > 1:
print('Checking ' + wavfile)
st = read(wavfile, headonly=True)
for tr in st:
if tr.stats.station == station and tr.stats.channel == channel:
return wavfile
[docs]def _check_available_data(archive, arc_type, day):
"""
Function to check what stations are available in the archive for a given \
day.
:type archive: str
:param archive: The archive source
:type arc_type: str
:param arc_type: The type of archive, can be:
:type day: datetime.date
:param day: Date to retrieve data for
:returns: list of tuples of (station, channel) as available.
.. note:: Currently the seishub options are untested.
"""
available_stations = []
if arc_type.lower() == 'day_vols':
wavefiles = glob.glob(os.path.join(archive, day.strftime('Y%Y'),
day.strftime('R%j.01'), '*'))
for wavefile in wavefiles:
header = read(wavefile, headonly=True)
available_stations.append((header[0].stats.station,
header[0].stats.channel))
elif arc_type.lower() == 'seishub':
client = SeishubClient(archive)
st = client.get_previews(starttime=UTCDateTime(day),
endtime=UTCDateTime(day) + 86400)
for tr in st:
available_stations.append((tr.stats.station, tr.stats.channel))
elif arc_type.lower() == 'fdsn':
client = FDSNClient(archive)
inventory = client.get_stations(starttime=UTCDateTime(day),
endtime=UTCDateTime(day) + 86400,
level='channel')
for network in inventory:
for station in network:
for channel in station:
available_stations.append((station.code,
channel.code))
return available_stations
if __name__ == '__main__':
import doctest
doctest.testmod()