from datetime import datetime
import numpy as np
import pandas as pd
import requests
import xarray as xr
from tonik import Storage
You can define any directory structure within a storage group. We here use the hierarchy that is common for seismometers which is network, site, sensor, and channel. Data can be stored under each of them. We will start by generating fake spectrogram data and then store that under a channel.
dates = pd.date_range("2024-01-02", freq='10min', periods=288)
data = np.abs(np.cumsum(np.random.normal(0, 8., len(dates))))
data = np.tile(data, (10, 1))
freqs = np.arange(10)
xrd = xr.Dataset({'spectrogram': xr.DataArray(data, coords=[freqs, dates],
dims=['frequency', 'datetime'])})
fig = xrd['spectrogram'].plot()
Now we will store the data under two different sites of the same experiment using the same sensor and channel names for both.
g = Storage('experiment', rootdir='/tmp')
st1 = g.get_substore('NET1', 'MDR1', '00', 'HHZ')
st2 = g.get_substore('NET1', 'MDR2', '00', 'HHZ')
st1.save(xrd, mode='w')
st2.save(xrd, mode='w')
g
Group: experiment /tmp/experiment /tmp/experiment/NET1 /tmp/experiment/NET1/MDR1 /tmp/experiment/NET1/MDR1/00 /tmp/experiment/NET1/MDR1/00/HHZ /tmp/experiment/NET1/MDR2 /tmp/experiment/NET1/MDR2/00 /tmp/experiment/NET1/MDR2/00/HHZ
Next we want to retrieve the data we just saved. Before retrieving data we have to set the timespan over which we want to retrieve data.
g.starttime = datetime(2024, 1, 2, 0, 0, 0)
g.endtime = datetime(2024, 1, 4, 0, 0, 0)
st = g.get_substore('NET1', 'MDR2', '00', 'HHZ')
st('spectrogram').plot()
<matplotlib.collections.QuadMesh at 0x7f813303e920>
# Now start the API server by running the following command in a terminal
# tonik_api --rootdir /tmp
Let's first have a look what is available.
url = f"http://localhost:8003/inventory?group={g.name}"
requests.get(url).json()
{'experiment': [{'NET1': [{'MDR1': [{'00': [{'HHZ': ['spectrogram']}]}]}, {'MDR2': [{'00': [{'HHZ': ['spectrogram']}]}]}]}]}
Now let's request the data we just stored.
url = "http://localhost:8003/feature?"
url += f"group={g.name}&subdir=NET1&subdir=MDR2&subdir=00&subdir=HHZ&name=spectrogram"
url += f"&starttime={g.starttime.isoformat()}&endtime={g.endtime.isoformat()}"
spec = pd.read_csv(url)
# The API returns timezone aware timestamps which are not supported by xarray yet
spec['dates'] = pd.to_datetime(spec['dates'], format='ISO8601').dt.tz_localize(None)
spec.set_index(['dates', 'freqs'], inplace=True)
spec = spec.to_xarray()
spec.transpose('freqs', 'dates')['feature'].plot()
<matplotlib.collections.QuadMesh at 0x7f8132ff3f10>
Note that we repeated the subdir
parameter multiple times. It is important that you pass the subdirs in the order of your directory structure.
Using pandas instead of xarray¶
First we'll create some fake data again. Note that we name the feature in the DataFrame and also that we name the index datetime
. The save
function uses this information when creating the files on disk. If the dataframe contains multiple features they will be stored in separate files.
dates = pd.date_range("2024-01-02", freq='10min', periods=288)
data = np.abs(np.cumsum(np.random.normal(0, 8., len(dates))))
df = pd.DataFrame({'rsam': data}, index=dates)
df.index.name = 'datetime'
df.plot()
<Axes: xlabel='datetime'>
For illustration, this time we store the data under the site.
st3 = g.get_substore('NET1', 'MDR3')
st3.save(df.to_xarray())
url = "http://localhost:8003/feature?"
url += f"group={g.name}&subdir=NET1&subdir=MDR3&name=rsam"
url += "&starttime=2024-01-02T00:00:00&endtime=2024-01-04T00:00:00"
rsam = pd.read_csv(url, parse_dates=True, index_col=0, date_format='ISO8601')
rsam.plot()
<Axes: xlabel='dates'>