diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index f38d5c2c..19de6345 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -1,8 +1,10 @@ import importlib import itertools +import fsspec import numpy as np import pytest +import xarray as xr from packaging.version import Version from virtualizarr.manifests import ChunkManifest, ManifestArray @@ -105,3 +107,15 @@ def offset_from_chunk_key(ind: tuple[int, ...]) -> int: def length_from_chunk_key(ind: tuple[int, ...]) -> int: return sum(ind) + 5 + + +def open_dataset_kerchunk( + filename_or_obj: str, *, storage_options=None, **kwargs +) -> xr.Dataset: + """Equivalent to ``xr.open_dataset(..., engine="kerchunk")`` but without depending on + kerchunk library + """ + m = fsspec.filesystem( + "reference", fo=filename_or_obj, **(storage_options or {}) + ).get_mapper() + return xr.open_dataset(m, engine="zarr", consolidated=False, **kwargs) diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py index e4157b73..41bf9c2d 100644 --- a/virtualizarr/tests/test_backend.py +++ b/virtualizarr/tests/test_backend.py @@ -15,6 +15,7 @@ from virtualizarr.readers.hdf import HDFVirtualBackend from virtualizarr.tests import ( has_astropy, + open_dataset_kerchunk, parametrize_over_hdf_backends, requires_hdf5plugin, requires_imagecodecs, @@ -321,7 +322,7 @@ def test_virtualizarr_vs_local_nisar(self, hdf_backend): ) tmpref = "/tmp/cmip6.json" vds.virtualize.to_kerchunk(tmpref, format="json") - dsV = xr.open_dataset(tmpref, engine="kerchunk") + dsV = open_dataset_kerchunk(tmpref) # xrt.assert_identical(dsXR, dsV) #Attribute order changes xrt.assert_equal(dsXR, dsV) diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 95be3de8..aac5f17b 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -8,7 +8,13 @@ from virtualizarr import open_virtual_dataset from virtualizarr.manifests import ChunkManifest, ManifestArray -from virtualizarr.tests import parametrize_over_hdf_backends, requires_kerchunk +from virtualizarr.tests import ( + has_kerchunk, + open_dataset_kerchunk, + parametrize_over_hdf_backends, + requires_kerchunk, + requires_zarr_python, +) from virtualizarr.translators.kerchunk import ( dataset_from_kerchunk_refs, ) @@ -84,8 +90,10 @@ def test_numpy_arrays_to_inlined_kerchunk_refs( assert refs["refs"]["time/0"] == expected["refs"]["time/0"] -@requires_kerchunk -@pytest.mark.parametrize("format", ["dict", "json", "parquet"]) +@requires_zarr_python +@pytest.mark.parametrize( + "format", ["dict", "json", "parquet"] if has_kerchunk else ["dict", "json"] +) class TestKerchunkRoundtrip: @parametrize_over_hdf_backends def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend): @@ -103,14 +111,14 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend): ds_refs = vds.virtualize.to_kerchunk(format=format) # use fsspec to read the dataset from the kerchunk references dict - roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False) + roundtrip = open_dataset_kerchunk(ds_refs, decode_times=False) else: # write those references to disk as kerchunk references format vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format) # use fsspec to read the dataset from disk via the kerchunk references - roundtrip = xr.open_dataset( - f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False + roundtrip = open_dataset_kerchunk( + f"{tmpdir}/refs.{format}", decode_times=False ) # assert all_close to original dataset @@ -164,16 +172,14 @@ def test_kerchunk_roundtrip_concat( ds_refs = vds.virtualize.to_kerchunk(format=format) # use fsspec to read the dataset from the kerchunk references dict - roundtrip = xr.open_dataset( - ds_refs, engine="kerchunk", decode_times=decode_times - ) + roundtrip = open_dataset_kerchunk(ds_refs, decode_times=decode_times) else: # write those references to disk as kerchunk references format vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format) # use fsspec to read the dataset from disk via the kerchunk references - roundtrip = xr.open_dataset( - f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=decode_times + roundtrip = open_dataset_kerchunk( + f"{tmpdir}/refs.{format}", decode_times=decode_times ) if decode_times is False: @@ -214,14 +220,14 @@ def test_non_dimension_coordinates(self, tmpdir, format, hdf_backend): ds_refs = vds.virtualize.to_kerchunk(format=format) # use fsspec to read the dataset from the kerchunk references dict - roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False) + roundtrip = open_dataset_kerchunk(ds_refs, decode_times=False) else: # write those references to disk as kerchunk references format vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format) # use fsspec to read the dataset from disk via the kerchunk references - roundtrip = xr.open_dataset( - f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False + roundtrip = open_dataset_kerchunk( + f"{tmpdir}/refs.{format}", decode_times=False ) # assert equal to original dataset @@ -265,13 +271,13 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format): ds_refs = ds.virtualize.to_kerchunk(format=format) # use fsspec to read the dataset from the kerchunk references dict - roundtrip = xr.open_dataset(ds_refs, engine="kerchunk") + roundtrip = open_dataset_kerchunk(ds_refs) else: # write those references to disk as kerchunk references format ds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format) # use fsspec to read the dataset from disk via the kerchunk references - roundtrip = xr.open_dataset(f"{tmpdir}/refs.{format}", engine="kerchunk") + roundtrip = open_dataset_kerchunk(f"{tmpdir}/refs.{format}") assert roundtrip.a.attrs == ds.a.attrs diff --git a/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py index bebc4560..e43ae1f4 100644 --- a/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py +++ b/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py @@ -4,16 +4,21 @@ import virtualizarr from virtualizarr.readers.hdf import HDFVirtualBackend -from virtualizarr.tests import requires_kerchunk +from virtualizarr.tests import ( + open_dataset_kerchunk, + requires_hdf5plugin, + requires_imagecodecs, +) -@requires_kerchunk +@requires_hdf5plugin +@requires_imagecodecs class TestIntegration: @pytest.mark.xfail( reason="0 time start is being interpreted as fillvalue see issues/280" ) def test_filters_h5netcdf_roundtrip( - self, tmpdir, filter_encoded_roundtrip_hdf5_file, backend=HDFVirtualBackend + self, tmpdir, filter_encoded_roundtrip_hdf5_file ): ds = xr.open_dataset(filter_encoded_roundtrip_hdf5_file, decode_times=True) vds = virtualizarr.open_virtual_dataset( @@ -24,7 +29,7 @@ def test_filters_h5netcdf_roundtrip( ) kerchunk_file = f"{tmpdir}/kerchunk.json" vds.virtualize.to_kerchunk(kerchunk_file, format="json") - roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk", decode_times=True) + roundtrip = open_dataset_kerchunk(kerchunk_file, decode_times=True) xrt.assert_allclose(ds, roundtrip) @pytest.mark.xfail( @@ -37,8 +42,8 @@ def test_filters_netcdf4_roundtrip( ds = xr.open_dataset(filepath) vds = virtualizarr.open_virtual_dataset(filepath, backend=HDFVirtualBackend) kerchunk_file = f"{tmpdir}/kerchunk.json" - vds.virtualize.to_kerchunk(kerchunk_file, format="json") - roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk") + vds.virtualize.to_kerchunk(kerchunk_file, format="dict") + roundtrip = open_dataset_kerchunk(kerchunk_file) xrt.assert_equal(ds, roundtrip) def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file): @@ -48,5 +53,5 @@ def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file ) kerchunk_file = f"{tmpdir}/filter_cf_kerchunk.json" vds.virtualize.to_kerchunk(kerchunk_file, format="json") - roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk") + roundtrip = open_dataset_kerchunk(kerchunk_file) xrt.assert_allclose(ds, roundtrip)