diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 00000000..5766905a --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,28 @@ +# +# Build draft PDF that will be submitted to JOSS +# +name: Paper Draft + +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 00000000..ce92c80d --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,35 @@ +@article{Obara2005, + title={A densely distributed high-sensitivity seismograph network in Japan:Hi-net by National Research Institute for Earth Science and DisasterPrevention}, + author={Obara, Kazushige and Kasahara, Keiji and Hori, Sadaki and Okada, Yoshimitsu}, + journal={Review of Scientific Instruments}, + volume={76}, + number={2}, + pages={021301}, + year={2005}, + month=feb, + DOI={10.1063/1.1854197} +} + +@article{Okada2014, + title={Recent progress of seismic observation networks in Japan —Hi-net, F-net, K-NET and KiK-net—}, + author={Okada, Yoshimitsu and Kasahara, Keiji and Hori, Sadaki and Obara, Kazushige and Sekiguchi, Shoji and Fujiwara, Hiroyuki and Yamamoto, Akira}, + journal={Earth, Planets and Space}, + volume={56}, + number={8}, + pages={xv–xxviii}, + year={2014}, + month=jun, + DOI={10.1186/BF03353076}, +} + +@article{ObsPy2015, + title={ObsPy: a bridge for seismology into the scientific Python ecosystem}, + author={Krischer, Lion and Megies, Tobias and Barsch, Robert and Beyreuther, Moritz and Lecocq, Thomas and Caudron, Corentin and Wassermann, Joachim}, + journal={Computational Science & Discovery}, + volume={8}, + number={1}, + pages={014003}, + year={2015}, + month=may, + DOI={10.1088/1749-4699/8/1/014003}, +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 00000000..966b539a --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,145 @@ +--- +title: 'HinetPy: A Python package for accessing and processing NIED Hi-net seismic data' +tags: + - Python + - geophysics + - seismology + +authors: + - name: Dongdong Tian + orcid: 0000-0001-7967-1197 + affiliation: 1 +affiliations: + - name: School of Geophysics and Geomatics, China University of Geosciences, China + index: 1 +date: 2 April 2024 +bibliography: paper.bib +--- + +# Summary + +HinetPy is a Python package designed for researchers working with seismic data from the +National Research Institute for Earth Science and Disaster Resilience (NIED) Hi-net +(High-sensitivity seismograph network) in Japan. The seismic network comprises approximately +800 stations with high-quality seismic data. However, accessing and processing the data +can be challenging due to the limited functionality of the web UI and backend data server. +Furthermore, the seismic data is stored in a non-standard format, which adds an extra +layer of complexity. HinetPy solves these challenges by offering a user-friendly interface +for accessing seismic data from NIED Hi-net and converting it into commonly used data +formats. This streamlines the workflow for seismologists, allowing them to more effectively +utilize this valuable dataset. + +# Statement of need + +The National Research Institute for Earth Science and Disaster Resilience (NIED) operates +and maintains NIED Hi-net, a nationwide high-sensitivity seismograph network in Japan. +Since its establishment in October 2000, NIED Hi-net has grown to include around 800 +seismic stations equipped with three-component short-period seismometers [@Obara2005, @Okada2014]. +The NIED Hi-net website (https://www.hinet.bosai.go.jp/) provides access to high-quality +seismic data from 2004 onwards, including data from other seismic networks such as F-net, +S-net, V-net, and more. The NIED Hi-net is well-known for its valuable contributions to seismological research. + +## Challenges in accessing NIED Hi-net data + +To access Hi-net data, users need to register an Hinet account. This registration process +is necessary to ensure data security and adherence to Hi-net's data usage policies. +Once registered, users can authenticate their accounts within HinetPy to access and +download the data. The NIED Hi-net data is free-accessible after user registration. +However, accessing Hi-net seismic data can still be challenging. Although the +seismological community have switched to standard web services users can request data +waveforms using tools like ObsPy [@ObsPy2015]. Unfortuately, NEID Hi-net has upgraded their server to +use the web services. Users have to login in the NIED Hi-net website and request data +manually. What’s more challenging are the limitations about data size and length in one +single request: the number of channels \* record length must be no larger than 12000 +minutes and record length must be no larger than 60 minutes. For NIED Hi-net, which +contains 800 seismic stations and 24000 channels (3 channels per station), the record +length must be no larger than 5 minutes. Thus, for a typical teleseismic event, we may +required 30-minute length of data, which means we need to divide the time range into 6 +subranges and post 6 requests separately. We also need to note that NIED Hi-net website +doesn’t allow posting multiple data requests at the same time. Thus, we need to post +the request, wait for data preparation (which may take a few minutes), and then post +another request. After downloading all the files, we then need to combine these data +into a single one. + +## Challenges in processing NIED Hi-net data + +This format is not widely supported by standard seismology data formats such as miniSEED, +StationXML, and QuakeML, making it difficult to exchange data with other seismograph +networks and researchers. As a result, researchers face barriers in accessing and utilizing +the high-quality data provided by Hi-net. Although the seismological community has standard +data formats such as miniSEED for waveforms, StationXML for station metadata and QuakeML +for earthquake catalog since 20XX. NEID Hi-net still uses its own proprietary WIN32 format, +which is the format used by its own WIN32 system. This format presents obstacles for data +exchange and collaboration within the seismology community, hindering the broader +utilization of Hi-net data. In the WIN32 format, continuous waveform data is divided into +multiple one-minute segments. Again, a companion text file, called “channels table” is +provided for instrumental metadata. NIED Hi-net also provides a series of commands in the +their win32tools package to process WIN32 data and converts WIN32 data to the SAC format, +but there are no tools to convert the channels table to a more commonly used format +(e.g., SAC polezero files). + + +# HinetPy for easy data accessing and processing + +HinetPy is a Python package developed to address the challenges of accessing and +processing NIED Hi-net data. The package provides a simple and intuitive interface for +accessing Hi-net data, allowing researchers to easily download waveform data and station +metadata. HinetPy also includes tools for processing seismic data, mainly converting the +seismic data from win32 format to SAC format and build SAC polezero files from the +channels table. + +Here is an example showing how to access and process NIED Hi-net waveform data. + +```python +from HinetPy import Client, win32 + +# You need a Hi-net account to access the data +client = Client("username", "password") + +# Let's try to request 20-minute data of the Hi-net network (with an internal +# network code of '0101') starting at 2010-01-01T00:00 (JST, GMT+0900) +data, ctable = client.get_continuous_waveform("0101", "201001010000", 20) + +# The request and download process usually takes a few minutes +# waiting for data request ... +# waiting for data download ... + +# Now you can see the data and corresponding channel table in your working directory +# waveform data (in win32 format) : 0101_201001010000_20.cnt +# channel table (plaintext file) : 0101_20100101.ch +# Let's convert data from win32 format to SAC format +win32.extract_sac(data, ctable) + +# Let's extract instrument response as PZ files from the channel table file +win32.extract_sacpz(ctable) + +# Now you can see several SAC and SAC_PZ files in your working directory +# N.NGUH.E.SAC N.NGUH.U.SAC N.NNMH.N.SAC +# N.NGUH.N.SAC N.NNMH.E.SAC N.NNMH.U.SAC +# ... +# N.NGUH.E.SAC_PZ N.NGUH.U.SAC_PZ N.NNMH.N.SAC_PZ +# N.NGUH.N.SAC_PZ N.NNMH.E.SAC_PZ N.NNMH.U.SAC_PZ +# ... +``` + +The package itself is platform-independent but it requires the win32tools to be compiled, +so using HinetPy should be easy on Linux and macOS but Windows is not tested. It is +available from PyPI and can be installed using Python’s package management tool `pip`. + +# Conclusions + +HinetPy provides a valuable tool for researchers working with Hi-net data, enabling them +to more easily access and process this high-quality dataset. By addressing the challenges +posed by the proprietary WIN32 format, HinetPy helps to facilitate data exchange and +collaboration within the seismology community, ultimately advancing our understanding of +seismic events and Earth's structure. + +# Acknowledgments + +The HinetPy package was initially developed in 2013, when the author was a graduate +student at University of Science and Technology of China. The package doesn’t contain +any NEID Hi-net data even a small sample. Please also note that redistributing any NIED +Hi-net data is prohibited and users should renew their account and report any +publications that uses NIED Hi-net data annually. + +# References