Source code for seisbench.data.dummy

import numpy as np
import pandas as pd
from obspy import UTCDateTime
from obspy.clients.fdsn import Client

import seisbench
import seisbench.util

from .base import WaveformBenchmarkDataset


[docs] class DummyDataset(WaveformBenchmarkDataset): """ A dummy dataset visualizing the implementation of custom datasets """ def __init__(self, **kwargs): citation = ( "Münchmeyer, Jannes; Bindi, Dino; Sippl, Christian; Leser, Ulf; Tilmann, Frederik (2019): " "Magnitude scales, attenuation models and feature matrices for the IPOC catalog. " "V. 1.0. GFZ Data Services. https://doi.org/10.5880/GFZ.2.4.2019.004" ) super().__init__(citation=citation, repository_lookup=True, **kwargs) def _download_dataset(self, writer, trace_length=60, **kwargs): sampling_rate = 20 writer.metadata_dict = { "time": "trace_start_time", "latitude": "source_latitude_deg", "longitude": "source_longitude_deg", "depth": "source_depth_km", "cls": "source_event_category", "MA": "source_magnitude", "ML": "source_magnitude2", "std_MA": "source_magnitude_uncertainty", "std_ML": "source_magnitude_uncertainty2", } writer.data_format = { "dimension_order": "CW", "component_order": "ZNE", "sampling_rate": sampling_rate, "measurement": "velocity", "unit": "counts", "instrument_response": "not restituted", } path = self.path path.mkdir(parents=True, exist_ok=True) seisbench.util.download_ftp( "datapub.gfz-potsdam.de", "download/10.5880.GFZ.2.4.2019.004/IPOC_catalog_magnitudes.csv", path / "raw_catalog.csv", progress_bar=False, ) metadata = pd.read_csv(path / "raw_catalog.csv") metadata = metadata.iloc[:100].copy() def to_tracename(x): for c in "/:.": x = x.replace(c, "_") return x client = Client("GFZ") inv = client.get_stations( net="CX", sta="PB01", starttime=UTCDateTime.strptime( "2007/01/01 00:00:00.00", "%Y/%m/%d %H:%M:%S.%f" ), ) metadata["trace_name"] = metadata["time"].apply(to_tracename) metadata["station_network_code"] = "CX" metadata["station_code"] = "PB01" metadata["station_type"] = "BH" metadata["station_latitude_deg"] = inv[0][0].latitude metadata["station_longitude_deg"] = inv[0][0].longitude metadata["station_elevation_m"] = inv[0][0].elevation metadata["source_magnitude_type"] = "MA" metadata["source_magnitude_type2"] = "ML" splits = 60 * ["train"] + 10 * ["dev"] + 30 * ["test"] metadata["split"] = splits writer.set_total(len(metadata)) for _, row in metadata.iterrows(): time = row["time"] waveform = np.zeros((3, sampling_rate * trace_length)) time = UTCDateTime.strptime(time, "%Y/%m/%d %H:%M:%S.%f") stream = client.get_waveforms( "CX", "PB01", "*", "BH?", time, time + trace_length ) for cid, component in enumerate("ZNE"): ctrace = stream.select(channel=f"??{component}")[0] waveform[cid] = ctrace.data[: sampling_rate * trace_length].astype( float ) writer.add_trace(row, waveform)
[docs] class ChunkedDummyDataset(WaveformBenchmarkDataset): """ A chunked dummy dataset visualizing the implementation of custom datasets with chunking """ def __init__(self, **kwargs): citation = ( "Münchmeyer, Jannes; Bindi, Dino; Sippl, Christian; Leser, Ulf; Tilmann, Frederik (2019): " "Magnitude scales, attenuation models and feature matrices for the IPOC catalog. " "V. 1.0. GFZ Data Services. https://doi.org/10.5880/GFZ.2.4.2019.004" ) # Write chunks to file chunks_path = self.path / "chunks" if not chunks_path.is_file(): self.path.mkdir(exist_ok=True, parents=True) with open(chunks_path, "w") as f: f.write("0\n1\n") super().__init__(citation=citation, repository_lookup=True, **kwargs) def _download_dataset(self, writer, chunk, trace_length=60, **kwargs): sampling_rate = 20 writer.metadata_dict = { "time": "trace_start_time", "latitude": "source_latitude_deg", "longitude": "source_longitude_deg", "depth": "source_depth_km", "cls": "source_event_category", "MA": "source_magnitude", "ML": "source_magnitude2", "std_MA": "source_magnitude_uncertainty", "std_ML": "source_magnitude_uncertainty2", } writer.data_format = { "dimension_order": "CW", "component_order": "ZNE", "sampling_rate": sampling_rate, "measurement": "velocity", "unit": "counts", "instrument_response": "not restituted", } path = self.path path.mkdir(parents=True, exist_ok=True) seisbench.util.download_ftp( "datapub.gfz-potsdam.de", "download/10.5880.GFZ.2.4.2019.004/IPOC_catalog_magnitudes.csv", path / "raw_catalog.csv", progress_bar=False, ) metadata = pd.read_csv(path / "raw_catalog.csv") if chunk == "0": metadata = metadata.iloc[:100].copy() elif chunk == "1": metadata = metadata.iloc[100:200].copy() else: raise ValueError(f'Unknown chunk "{chunk}"') def to_tracename(x): for c in "/:.": x = x.replace(c, "_") return x client = Client("GFZ") inv = client.get_stations( net="CX", sta="PB01", starttime=UTCDateTime.strptime( "2007/01/01 00:00:00.00", "%Y/%m/%d %H:%M:%S.%f" ), ) metadata["trace_name"] = metadata["time"].apply(to_tracename) metadata["station_network_code"] = "CX" metadata["station_code"] = "PB01" metadata["station_type"] = "BH" metadata["station_latitude_deg"] = inv[0][0].latitude metadata["station_longitude_deg"] = inv[0][0].longitude metadata["station_elevation_m"] = inv[0][0].elevation metadata["source_magnitude_type"] = "MA" metadata["source_magnitude_type2"] = "ML" splits = 60 * ["train"] + 10 * ["dev"] + 30 * ["test"] metadata["split"] = splits writer.set_total(len(metadata)) for _, row in metadata.iterrows(): time = row["time"] waveform = np.zeros((3, sampling_rate * trace_length)) time = UTCDateTime.strptime(time, "%Y/%m/%d %H:%M:%S.%f") stream = client.get_waveforms( "CX", "PB01", "*", "BH?", time, time + trace_length ) for cid, component in enumerate("ZNE"): ctrace = stream.select(channel=f"??{component}")[0] waveform[cid] = ctrace.data[: sampling_rate * trace_length].astype( float ) writer.add_trace(row, waveform)