From ff2a0a231f915d09190bf4fd0bdf274fa9c65c3f Mon Sep 17 00:00:00 2001 From: sharkinsspatial Date: Thu, 27 Feb 2025 14:51:50 -0500 Subject: [PATCH] Include _FillValue encoding intergration tests for HDFVirtualBackend. --- virtualizarr/readers/hdf/hdf.py | 8 ++------ virtualizarr/tests/test_readers/conftest.py | 14 ++++++++------ .../tests/test_readers/test_hdf/test_hdf.py | 6 ++++++ .../test_readers/test_hdf/test_hdf_integration.py | 12 +++++++++--- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/virtualizarr/readers/hdf/hdf.py b/virtualizarr/readers/hdf/hdf.py index daece4ea..08e35c6b 100644 --- a/virtualizarr/readers/hdf/hdf.py +++ b/virtualizarr/readers/hdf/hdf.py @@ -252,12 +252,7 @@ def _extract_cf_fill_value( fillvalue = v.item() else: fillvalue = v - if ( - fillvalue is not None - and h5obj.dtype.kind not in "S" - and h5obj.dtype.fields is None - ): - fillvalue = FillValueCoder.encode(fillvalue, h5obj.dtype) + fillvalue = FillValueCoder.encode(fillvalue, h5obj.dtype) return fillvalue @staticmethod @@ -334,6 +329,7 @@ def _dataset_to_variable( cfcodec = cfcodec_from_dataset(dataset) attrs = HDFVirtualBackend._extract_attrs(dataset) cf_fill_value = HDFVirtualBackend._extract_cf_fill_value(dataset) + attrs.pop("_FillValue", None) if cfcodec: codecs.insert(0, cfcodec["codec"]) diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py index bb146e60..4884db4a 100644 --- a/virtualizarr/tests/test_readers/conftest.py +++ b/virtualizarr/tests/test_readers/conftest.py @@ -375,9 +375,9 @@ def scalar_fill_value_hdf5_file(tmpdir): {"fill_value": -9999, "data": np.random.randint(0, 10, size=(5))}, {"fill_value": -9999.0, "data": np.random.random(5)}, {"fill_value": np.nan, "data": np.random.random(5)}, - {"fill_value": True, "data": np.random.choice([True, False], size=(5))}, - {"fill_value": "NA".encode("ascii"), "data": np.array(["one"], dtype="S")}, - # {"fill_value": compound_fill, "data": compound_data}, + {"fill_value": False, "data": np.array([True, False, False, True, True])}, + {"fill_value": "NaN", "data": np.array(["three"], dtype="S10")}, + {"fill_value": compound_fill, "data": compound_data}, ] @@ -386,9 +386,11 @@ def cf_fill_value_hdf5_file(tmpdir, request): filepath = f"{tmpdir}/cf_fill_value.nc" f = h5py.File(filepath, "w") dset = f.create_dataset(name="data", data=request.param["data"], chunks=True) - wat = f.create_dataset(name="wat", data=request.param["data"], chunks=True) - wat.make_scale() - dset.dims[0].attach_scale(wat) + dim_scale = f.create_dataset( + name="dim_scale", data=request.param["data"], chunks=True + ) + dim_scale.make_scale() + dset.dims[0].attach_scale(dim_scale) dset.attrs["_FillValue"] = request.param["fill_value"] return filepath diff --git a/virtualizarr/tests/test_readers/test_hdf/test_hdf.py b/virtualizarr/tests/test_readers/test_hdf/test_hdf.py index 00f1a8fe..d5146e88 100644 --- a/virtualizarr/tests/test_readers/test_hdf/test_hdf.py +++ b/virtualizarr/tests/test_readers/test_hdf/test_hdf.py @@ -123,6 +123,12 @@ def test_scalar_fill_value(self, scalar_fill_value_hdf5_file): def test_cf_fill_value(self, cf_fill_value_hdf5_file): f = h5py.File(cf_fill_value_hdf5_file) ds = f["data"] + if ds.dtype.kind in "S": + pytest.xfail("Investigate fixed-length binary encoding in Zarr v3") + if ds.dtype.names: + pytest.xfail( + "To fix, structured dtype fill value encoding for Zarr backend" + ) var = HDFVirtualBackend._dataset_to_variable( cf_fill_value_hdf5_file, ds, group="" ) diff --git a/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py index b02e49ab..9de95aa8 100644 --- a/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py +++ b/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py @@ -72,8 +72,14 @@ def test_non_coord_dim_roundtrip(self, tmpdir, non_coord_dim): @requires_icechunk def test_cf_fill_value_roundtrip(self, tmpdir, cf_fill_value_hdf5_file): - # ds = xr.open_dataset(cf_fill_value_hdf5_file) + ds = xr.open_dataset(cf_fill_value_hdf5_file) + if ds["data"].dtype.names: + pytest.xfail( + "To fix, structured dtype fill value encoding for Zarr backend" + ) vds = virtualizarr.open_virtual_dataset( - cf_fill_value_hdf5_file, backend=HDFVirtualBackend + cf_fill_value_hdf5_file, + backend=HDFVirtualBackend, ) - roundtrip_as_in_memory_icechunk(vds, tmpdir, decode_times=False) + roundtrip = roundtrip_as_in_memory_icechunk(vds, tmpdir, decode_times=False) + xrt.assert_equal(ds, roundtrip)