-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpds_trig_mseed_reference.py
70 lines (56 loc) · 2.91 KB
/
pds_trig_mseed_reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
"""
The mseed_referencer class acquires a list of mseed files for event waveforms.
These waveforms are referenced from a mounted path to the SCEDC Open Data Set (that the user needs to set up).
The waveforms are for events that can be queried by year, month, and magnitude within the class.
"""
import pandas as pd
import boto3
class mseed_referencer():
def __init__(self, year,month,mag,scedc_path):
super().__init__()
self.year=year
self.month=month
self.mag=mag
self.scedc_path=scedc_path
self.PARENT_DIR = 'event_waveforms'
self.s3res = boto3.resource('s3')
def get_prefix(self,event_time):
""" Creates the Open Data Set prefix for an event given the origin time
as a Pandas timestamp.
"""
return '{}/{}_{:03d}/'.format(event_time.year, event_time.year, event_time.dayofyear)
def get_events(self):
""" Get all the events in the SCEDC catalog that occurred in a given year and month and
exceed a given magnitude. Return the events in a dataframe.
"""
catalog_file = '{}_catalog_index.csv'.format(self.year)
relative_catalog_path='earthquake_catalogs/index/csv/year={}/{}'.format(self.year, catalog_file)
final_catalog_path=self.scedc_path+'/'+relative_catalog_path
catalog = pd.read_csv(final_catalog_path)
if 'ORIGIN_DATETIME' in catalog.columns:
#make sure the datetime is correct
catalog=catalog[~catalog.ORIGIN_DATETIME.str.contains(":60", na=False)]
catalog['eventdate'] = pd.to_datetime(catalog['ORIGIN_DATETIME'])
elif 'YYYY/MM/DD' in catalog.columns:
catalog['eventdate'] = pd.to_datetime(catalog['YYYY/MM/DD'])
df = catalog[ (catalog['eventdate'].dt.year==self.year) \
& (catalog['eventdate'].dt.month==self.month) \
& (catalog['MAG']>=self.mag) ]
if 'PREFIX' not in df.columns:
df['PREFIX'] = df['eventdate'].apply(self.get_prefix)
if 'MS_FILENAME' not in df.columns:
df['MS_FILENAME'] = df['EVID'].astype(str)+'.ms'
if 'ORIGIN_DATETIME' not in df.columns:
df['ORIGIN_DATETIME']=df['YYYY/MM/DD']
#If there are empty rows in the MS_FILENAME (as is case with 1992), then we will have to add it
if True in df['MS_FILENAME'].isna().values:
df['MS_FILENAME'] = df['EVID'].astype(str)+'.ms'
if 'MS_FILENAME' in df.columns:
df=df.dropna(subset=['PREFIX', 'MS_FILENAME'])
download_files = []
if not df.empty:
df["total_path"] = self.scedc_path+'/event_waveforms/'+df["PREFIX"] + df["MS_FILENAME"]
total_paths = df["total_path"].tolist()
download_files.extend(total_paths)
return download_files