From b19b9ddd522a7c37be406b9ee6cc45cb1c25bde3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enoc=20Mart=C3=ADnez?= <enoc.martinez@upc.edu> Date: Wed, 3 May 2023 12:50:30 +0200 Subject: [PATCH] Adding multidimensional NetCDF file generation --- mooda/waterframe/__init__.py | 2 +- mooda/waterframe/output/__init__.py | 1 + mooda/waterframe/output/to_multidim_nc.py | 99 +++++++++++++++++++++++ 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 mooda/waterframe/output/to_multidim_nc.py diff --git a/mooda/waterframe/__init__.py b/mooda/waterframe/__init__.py index e239fcf..383dd73 100644 --- a/mooda/waterframe/__init__.py +++ b/mooda/waterframe/__init__.py @@ -9,7 +9,7 @@ class WaterFrame: min, max, copy, use_only, rename, corr, max_diff, time_intervals, resample, slice_time, info_metadata, info_vocabulary, drop, reduce_memory, pres2depth, psal2asal, asal_temp2dens) - from .output import to_nc, to_pkl, to_json, to_es, metadata_to_html, to_csv + from .output import to_nc, to_pkl, to_json, to_es, metadata_to_html, to_csv, to_multidim_nc from .plot import plot_timeseries, plot_timebar, plot_hist, plot from .qc import qc_flat_test, qc_range_test, qc_spike_test, qc_replace, qc_syntax_test from .iplot import ( diff --git a/mooda/waterframe/output/__init__.py b/mooda/waterframe/output/__init__.py index 1e1f751..8c40dce 100644 --- a/mooda/waterframe/output/__init__.py +++ b/mooda/waterframe/output/__init__.py @@ -5,3 +5,4 @@ from .to_es import to_es from .metadata_to_html import metadata_to_html from .to_csv import to_csv +from .to_multidim_nc import to_multidim_nc diff --git a/mooda/waterframe/output/to_multidim_nc.py b/mooda/waterframe/output/to_multidim_nc.py new file mode 100644 index 0000000..c5f9a33 --- /dev/null +++ b/mooda/waterframe/output/to_multidim_nc.py @@ -0,0 +1,99 @@ +""" Function to be imported in a WaterFrame. It save the WaterFrame into a multidimensional NetCDF file. """ +from xarray import Dataset +import netCDF4 as nc +import pandas as pd +import numpy as np + + +def to_multidim_nc(self, path, dimensions: list, time_key="TIME", compression=True, fill_value=-99999): + """ + It saves the WaterFrame into a multidimensional NetCDF using NetCDF4 library + + Parameters + ---------- + path: str + Path to save the NetCDF. If path is None, the filename will be metadata['id']. + dimensions: list + List of the variable names to be stored as dimensions + time_key: str + Name of the time column + compression: str + If True, use zlib compression + fill_value: str + Fill value + + + Returns + ------- + path: str + Path where the file is placed. + """ + + # Make sure that time is the last entry in the multiindex + if time_key in dimensions: + dimensions.remove(time_key) + dimensions.append(time_key) + + df = self.data # Access the DataFrame within the waterframe + df = df.reset_index() + + index_df = df[dimensions].copy() # create a dataframe with only the variables that will be used as indexes + multiindex = pd.MultiIndex.from_frame(index_df) # create a multiindex from the dataframe + + # Arrange other variables into a dict + data = {col: df[col].values for col in df.columns if col not in dimensions} + + # Create a dataframe with multiindex + data_df = pd.DataFrame(data, index=multiindex) + + dimensions = tuple(dimensions) + + with nc.Dataset(path, "w", format="NETCDF4") as ncfile: + for dimension in dimensions: + data = index_df[dimension].values + values = np.unique(data) # fixed-length dimension + if dimension == time_key: + # convert timestamp to float + index_df[time_key] = pd.to_datetime(index_df[time_key]) + times = index_df[time_key].dt.to_pydatetime() + values = nc.date2num(times, "seconds since 1970-01-01", calendar="standard") + + ncfile.createDimension(dimension, len(values)) # create dimension + if type(values[0]) == str: # Some dimension may be a string (e.g. sesnor_id) + var = ncfile.createVariable(dimension, str, (dimension,), fill_value=fill_value, zlib=compression) + else: + var = ncfile.createVariable(dimension, 'f8', (dimension,), fill_value=fill_value, zlib=compression) + + var[:] = values # assign dimension values + # add all dimension metadata + + for key, value in self.vocabulary[dimension].items(): + if type(value) == list: + values = [str(v) for v in value] + value = join_attr.join(values) + var.setncattr(key, value) + + for varname in data_df.columns: + values = data_df[varname].to_numpy() # assign values to the variable + if varname.endswith("_QC"): + # Store Quality Control as unsigned bytes + var = ncfile.createVariable(varname, "u1", dimensions, fill_value=fill_value, zlib=compression) + var[:] = values.astype(np.int8) + else: + var = ncfile.createVariable(varname, 'float', dimensions, fill_value=fill_value, zlib=compression) + var[:] = values + + # Adding metadata + for key, value in self.vocabulary[varname].items(): + if type(value) == list: + values = [str(v) for v in value] + value = join_attr.join(values) + var.setncattr(key, value) + + # Set global attibutes + for key, value in self.metadata.items(): + if type(value) == list: + values = [str(v) for v in value] + value = join_attr.join(values) + ncfile.setncattr(key, value) + return path \ No newline at end of file