From e25ca3905148385b156b4f6a821ce3cc8e32045b Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Thu, 20 Feb 2025 09:57:31 -0500 Subject: [PATCH] Handle xarray zarr backend decoding logic. --- virtualizarr/readers/hdf/hdf.py | 8 ++++- virtualizarr/tests/test_readers/conftest.py | 40 +++++++++++++-------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/virtualizarr/readers/hdf/hdf.py b/virtualizarr/readers/hdf/hdf.py index 4a155913..9ce9c377 100644 --- a/virtualizarr/readers/hdf/hdf.py +++ b/virtualizarr/readers/hdf/hdf.py @@ -14,6 +14,7 @@ import numpy as np import xarray as xr +from xarray.backends.zarr import FillValueCoder from virtualizarr.manifests import ( ChunkEntry, @@ -248,11 +249,16 @@ def _extract_cf_fill_value( fillvalue = None for n, v in h5obj.attrs.items(): if n == "_FillValue": - # Extract scalar _FillValue if isinstance(v, np.ndarray) and v.size == 1: fillvalue = v.item() else: fillvalue = v + if ( + fillvalue is not None + and h5obj.dtype.kind not in "S" + and h5obj.dtype.fields is None + ): + fillvalue = FillValueCoder.encode(fillvalue, h5obj.dtype) return fillvalue @staticmethod diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py index 52626e45..658de4a9 100644 --- a/virtualizarr/tests/test_readers/conftest.py +++ b/virtualizarr/tests/test_readers/conftest.py @@ -354,16 +354,30 @@ def scalar_fill_value_hdf5_file(tmpdir): return filepath +compound_dtype = np.dtype( + [ + ("id", "i4"), # 4-byte integer + ("temperature", "f4"), # 4-byte float + ] +) + +compound_data = np.array( + [ + (1, 98.6), + (2, 101.3), + ], + dtype=compound_dtype, +) + +compound_fill = (-9999, -9999.0) + fill_values = [ - -9999, # Integer fill value - -9999.0, # Floating-point fill value - np.nan, # NaN as fill value - True, - False, # Boolean fill values - "N/A", # String fill value - b"Unknown", # Bytes fill value - 1 + 2j, # Complex number fill value - (b"Unknown", -1), # Structured type (Tuple) + {"fill_value": -9999, "data": np.random.randint(0, 10, size=(5))}, + {"fill_value": -9999.0, "data": np.random.random(5)}, + {"fill_value": np.nan, "data": np.random.random(5)}, + {"fill_value": True, "data": np.random.choice([True, False], size=(5))}, + {"fill_value": "N/A", "data": np.array(["one", "two"], dtype="S")}, + {"fill_value": compound_fill, "data": compound_data}, ] @@ -371,10 +385,8 @@ def scalar_fill_value_hdf5_file(tmpdir): def cf_fill_value_hdf5_file(tmpdir, request): filepath = f"{tmpdir}/cf_fill_value.nc" f = h5py.File(filepath, "w") - data = np.random.randint(0, 10, size=(5)) - dset = f.create_dataset(name="data", data=data, chunks=True) - if request.param is not None: - dset.attrs["_FillValue"] = request.param + dset = f.create_dataset(name="data", data=request.param["data"], chunks=True) + dset.attrs["_FillValue"] = request.param["fill_value"] return filepath @@ -382,7 +394,7 @@ def cf_fill_value_hdf5_file(tmpdir, request): def cf_array_fill_value_hdf5_file(tmpdir): filepath = f"{tmpdir}/cf_array_fill_value.nc" f = h5py.File(filepath, "w") - data = np.random.randint(0, 10, size=(5)) + data = np.random.random(5) dset = f.create_dataset(name="data", data=data, chunks=True) dset.attrs["_FillValue"] = np.array([np.nan]) return filepath