From 9973b6e3dba4164997749d426a06c66d7a57b87c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sat, 26 Nov 2022 21:22:42 -0500 Subject: [PATCH] implement Zarr v3 spec support (#6475) * make zarr backend compatible with v3 spec * add tests for Zarr v3 stores * add tests for Zarr v3 stores when the store is not a StoreV3 class In this case where create_zarr_target returns a string, we must specify zarr_version=3 when opening/writing a store to make sure a version 3 store will be created rather than the default of a version 2 store. * update import path to match Zarr v2.12 and v2.13 experimental API remove path='xarray' default for zarr v3 path=None should work as of Zarr v2.13 * flake8 fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disallow consolidated metadata for zarr v3 * whats new a + remove more consolidated metadata for v3 * activate upstream dev test for zarr v3 * better typing * untype zarr_version in open_dataset * update whats new * [test-upstream] * update comment * fix whats new * update whats new Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Joseph Hamman --- .github/workflows/upstream-dev-ci.yaml | 1 + doc/whats-new.rst | 3 +- xarray/backends/api.py | 11 ++ xarray/backends/zarr.py | 35 ++++ xarray/core/dataset.py | 10 + xarray/tests/test_backends.py | 260 ++++++++++++++++++------- 6 files changed, 253 insertions(+), 67 deletions(-) diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 9748eda7dcf..68bd0c15067 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -80,6 +80,7 @@ jobs: if: success() id: status run: | + export ZARR_V3_EXPERIMENTAL_API=1 python -m pytest --timeout=60 -rf \ --report-log output-${{ matrix.python-version }}-log.jsonl - name: Generate and publish the report diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 04a5e7b4b1e..b8a2f47bcf8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,6 @@ What's New np.random.seed(123456) - .. _whats-new.2022.11.1: v2022.11.1 (unreleased) @@ -23,6 +22,8 @@ v2022.11.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). + By `Gregory Lee `_ and `Joe Hamman `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a186bdf56d6..02cf425386b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1504,6 +1504,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> backends.ZarrStore: ... @@ -1525,6 +1526,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> Delayed: ... @@ -1543,6 +1545,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> backends.ZarrStore | Delayed: """This function creates an appropriate datastore for writing a dataset to a zarr ztore @@ -1609,6 +1612,13 @@ def to_zarr( f"``region`` with to_zarr(), got {append_dim} in both" ) + if zarr_version is None: + # default to 2 if store doesn't specify it's version (e.g. a path) + zarr_version = int(getattr(store, "_store_version", 2)) + + if consolidated is None and zarr_version > 2: + consolidated = False + if mode == "r+": already_consolidated = consolidated consolidate_on_close = False @@ -1627,6 +1637,7 @@ def to_zarr( write_region=region, safe_chunks=safe_chunks, stacklevel=4, # for Dataset.to_zarr() + zarr_version=zarr_version, ) if mode in ["a", "r+"]: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 6c547f82252..cca2d89678f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -353,6 +353,7 @@ def open_group( write_region=None, safe_chunks=True, stacklevel=2, + zarr_version=None, ): import zarr @@ -360,12 +361,29 @@ def open_group( if isinstance(store, os.PathLike): store = os.fspath(store) + if zarr_version is None: + # default to 2 if store doesn't specify it's version (e.g. a path) + zarr_version = getattr(store, "_store_version", 2) + open_kwargs = dict( mode=mode, synchronizer=synchronizer, path=group, ) open_kwargs["storage_options"] = storage_options + if zarr_version > 2: + open_kwargs["zarr_version"] = zarr_version + + if consolidated or consolidate_on_close: + raise ValueError( + "consolidated metadata has not been implemented for zarr " + f"version {zarr_version} yet. Set consolidated=False for " + f"zarr version {zarr_version}. See also " + "https://github.com/zarr-developers/zarr-specs/issues/136" + ) + + if consolidated is None: + consolidated = False if chunk_store: open_kwargs["chunk_store"] = chunk_store @@ -440,6 +458,11 @@ def open_store_variable(self, name, zarr_array): zarr_array, DIMENSION_KEY, try_nczarr ) attributes = dict(attributes) + + # TODO: this should not be needed once + # https://github.com/zarr-developers/zarr-python/issues/1269 is resolved. + attributes.pop("filters", None) + encoding = { "chunks": zarr_array.chunks, "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)), @@ -668,6 +691,7 @@ def open_zarr( storage_options=None, decode_timedelta=None, use_cftime=None, + zarr_version=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -725,6 +749,9 @@ def open_zarr( capability. Only works for stores that have already been consolidated. By default (`consolidate=None`), attempts to read consolidated metadata, falling back to read non-consolidated metadata if that fails. + + When the experimental ``zarr_version=3``, ``consolidated`` must be + either be ``None`` or ``False``. chunk_store : MutableMapping, optional A separate Zarr store only for chunk data. storage_options : dict, optional @@ -745,6 +772,10 @@ def open_zarr( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + zarr_version : int or None, optional + The desired zarr spec version to target (currently 2 or 3). The default + of None will attempt to determine the zarr version from ``store`` when + possible, otherwise defaulting to 2. Returns ------- @@ -782,6 +813,7 @@ def open_zarr( "chunk_store": chunk_store, "storage_options": storage_options, "stacklevel": 4, + "zarr_version": zarr_version, } ds = open_dataset( @@ -798,6 +830,7 @@ def open_zarr( backend_kwargs=backend_kwargs, decode_timedelta=decode_timedelta, use_cftime=use_cftime, + zarr_version=zarr_version, ) return ds @@ -842,6 +875,7 @@ def open_dataset( chunk_store=None, storage_options=None, stacklevel=3, + zarr_version=None, ): filename_or_obj = _normalize_path(filename_or_obj) @@ -855,6 +889,7 @@ def open_dataset( chunk_store=chunk_store, storage_options=storage_options, stacklevel=stacklevel + 1, + zarr_version=zarr_version, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6c432545669..7cf45e81460 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1930,6 +1930,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> ZarrStore: ... @@ -1967,6 +1968,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> ZarrStore | Delayed: """Write dataset contents to a zarr group. @@ -2017,6 +2019,9 @@ def to_zarr( metadata; if False, do not. The default (`consolidated=None`) means write consolidated metadata and attempt to read consolidated metadata for existing stores (falling back to non-consolidated). + + When the experimental ``zarr_version=3``, ``consolidated`` must be + either be ``None`` or ``False``. append_dim : hashable, optional If set, the dimension along which the data will be appended. All other dimensions on overridden variables must remain the same size. @@ -2048,6 +2053,10 @@ def to_zarr( storage_options : dict, optional Any additional parameters for the storage backend (ignored for local paths). + zarr_version : int or None, optional + The desired zarr spec version to target (currently 2 or 3). The + default of None will attempt to determine the zarr version from + ``store`` when possible, otherwise defaulting to 2. Returns ------- @@ -2092,6 +2101,7 @@ def to_zarr( append_dim=append_dim, region=region, safe_chunks=safe_chunks, + zarr_version=zarr_version, ) def __repr__(self) -> str: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 64030b3f595..70987a3dd38 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -102,6 +102,24 @@ except ImportError: pass +have_zarr_kvstore = False +try: + from zarr.storage import KVStore + + have_zarr_kvstore = True +except ImportError: + KVStore = None + +have_zarr_v3 = False +try: + # as of Zarr v2.13 these imports require environment variable + # ZARR_V3_EXPERIMENTAL_API=1 + from zarr import DirectoryStoreV3, KVStoreV3 + + have_zarr_v3 = True +except ImportError: + KVStoreV3 = None + ON_WINDOWS = sys.platform == "win32" default_value = object() dask_array_type = array_type("dask") @@ -1735,6 +1753,8 @@ def test_write_inconsistent_chunks(self) -> None: class ZarrBase(CFEncodedBase): DIMENSION_KEY = "_ARRAY_DIMENSIONS" + zarr_version = 2 + version_kwargs: dict[str, Any] = {} def create_zarr_target(self): raise NotImplementedError @@ -1742,14 +1762,18 @@ def create_zarr_target(self): @contextlib.contextmanager def create_store(self): with self.create_zarr_target() as store_target: - yield backends.ZarrStore.open_group(store_target, mode="w") + yield backends.ZarrStore.open_group( + store_target, mode="w", **self.version_kwargs + ) def save(self, dataset, store_target, **kwargs): - return dataset.to_zarr(store=store_target, **kwargs) + return dataset.to_zarr(store=store_target, **kwargs, **self.version_kwargs) @contextlib.contextmanager def open(self, store_target, **kwargs): - with xr.open_dataset(store_target, engine="zarr", **kwargs) as ds: + with xr.open_dataset( + store_target, engine="zarr", **kwargs, **self.version_kwargs + ) as ds: yield ds @contextlib.contextmanager @@ -1767,24 +1791,30 @@ def roundtrip( @pytest.mark.parametrize("consolidated", [False, True, None]) def test_roundtrip_consolidated(self, consolidated) -> None: + if consolidated and self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") expected = create_test_data() with self.roundtrip( expected, - save_kwargs={"consolidated": True}, - open_kwargs={"backend_kwargs": {"consolidated": True}}, + save_kwargs={"consolidated": consolidated}, + open_kwargs={"backend_kwargs": {"consolidated": consolidated}}, ) as actual: self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) def test_read_non_consolidated_warning(self) -> None: + + if self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") + expected = create_test_data() with self.create_zarr_target() as store: - expected.to_zarr(store, consolidated=False) + expected.to_zarr(store, consolidated=False, **self.version_kwargs) with pytest.warns( RuntimeWarning, match="Failed to open Zarr store with consolidated", ): - with xr.open_zarr(store) as ds: + with xr.open_zarr(store, **self.version_kwargs) as ds: assert_identical(ds, expected) def test_non_existent_store(self) -> None: @@ -2076,10 +2106,14 @@ def test_write_persistence_modes(self, group) -> None: # check append mode for append write ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w", group=group) - ds_to_append.to_zarr(store_target, append_dim="time", group=group) + ds.to_zarr(store_target, mode="w", group=group, **self.version_kwargs) + ds_to_append.to_zarr( + store_target, append_dim="time", group=group, **self.version_kwargs + ) original = xr.concat([ds, ds_to_append], dim="time") - actual = xr.open_dataset(store_target, group=group, engine="zarr") + actual = xr.open_dataset( + store_target, group=group, engine="zarr", **self.version_kwargs + ) assert_identical(original, actual) def test_compressor_encoding(self) -> None: @@ -2119,8 +2153,8 @@ def test_append_with_mode_rplus_success(self) -> None: original = Dataset({"foo": ("x", [1])}) modified = Dataset({"foo": ("x", [2])}) with self.create_zarr_target() as store: - original.to_zarr(store) - modified.to_zarr(store, mode="r+") + original.to_zarr(store, **self.version_kwargs) + modified.to_zarr(store, mode="r+", **self.version_kwargs) with self.open(store) as actual: assert_identical(actual, modified) @@ -2128,61 +2162,69 @@ def test_append_with_mode_rplus_fails(self) -> None: original = Dataset({"foo": ("x", [1])}) modified = Dataset({"bar": ("x", [2])}) with self.create_zarr_target() as store: - original.to_zarr(store) + original.to_zarr(store, **self.version_kwargs) with pytest.raises( ValueError, match="dataset contains non-pre-existing variables" ): - modified.to_zarr(store, mode="r+") + modified.to_zarr(store, mode="r+", **self.version_kwargs) def test_append_with_invalid_dim_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises( ValueError, match="does not match any existing dataset dimensions" ): - ds_to_append.to_zarr(store_target, append_dim="notvalid") + ds_to_append.to_zarr( + store_target, append_dim="notvalid", **self.version_kwargs + ) def test_append_with_no_dims_raises(self) -> None: with self.create_zarr_target() as store_target: - Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w") + Dataset({"foo": ("x", [1])}).to_zarr( + store_target, mode="w", **self.version_kwargs + ) with pytest.raises(ValueError, match="different dimension names"): - Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a") + Dataset({"foo": ("y", [2])}).to_zarr( + store_target, mode="a", **self.version_kwargs + ) def test_append_with_append_dim_not_set_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="different dimension sizes"): - ds_to_append.to_zarr(store_target, mode="a") + ds_to_append.to_zarr(store_target, mode="a", **self.version_kwargs) def test_append_with_mode_not_a_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="cannot set append_dim unless"): - ds_to_append.to_zarr(store_target, mode="w", append_dim="time") + ds_to_append.to_zarr( + store_target, mode="w", append_dim="time", **self.version_kwargs + ) def test_append_with_existing_encoding_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="but encoding was provided"): ds_to_append.to_zarr( store_target, append_dim="time", encoding={"da": {"compressor": None}}, + **self.version_kwargs, ) @pytest.mark.parametrize("dtype", ["U", "S"]) def test_append_string_length_mismatch_raises(self, dtype) -> None: ds, ds_to_append = create_append_string_length_mismatch_test_data(dtype) with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="Mismatched dtypes for variable"): ds_to_append.to_zarr( - store_target, - append_dim="time", + store_target, append_dim="time", **self.version_kwargs ) def test_check_encoding_is_consistent_after_append(self) -> None: @@ -2195,13 +2237,17 @@ def test_check_encoding_is_consistent_after_append(self) -> None: compressor = zarr.Blosc() encoding = {"da": {"compressor": compressor}} - ds.to_zarr(store_target, mode="w", encoding=encoding) - ds_to_append.to_zarr(store_target, append_dim="time") - actual_ds = xr.open_dataset(store_target, engine="zarr") + ds.to_zarr(store_target, mode="w", encoding=encoding, **self.version_kwargs) + ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs) + actual_ds = xr.open_dataset( + store_target, engine="zarr", **self.version_kwargs + ) actual_encoding = actual_ds["da"].encoding["compressor"] assert actual_encoding.get_config() == compressor.get_config() assert_identical( - xr.open_dataset(store_target, engine="zarr").compute(), + xr.open_dataset( + store_target, engine="zarr", **self.version_kwargs + ).compute(), xr.concat([ds, ds_to_append], dim="time"), ) @@ -2211,11 +2257,16 @@ def test_append_with_new_variable(self) -> None: # check append mode for new variable with self.create_zarr_target() as store_target: - xr.concat([ds, ds_to_append], dim="time").to_zarr(store_target, mode="w") - ds_with_new_var.to_zarr(store_target, mode="a") + xr.concat([ds, ds_to_append], dim="time").to_zarr( + store_target, mode="w", **self.version_kwargs + ) + ds_with_new_var.to_zarr(store_target, mode="a", **self.version_kwargs) combined = xr.concat([ds, ds_to_append], dim="time") combined["new_var"] = ds_with_new_var["new_var"] - assert_identical(combined, xr.open_dataset(store_target, engine="zarr")) + assert_identical( + combined, + xr.open_dataset(store_target, engine="zarr", **self.version_kwargs), + ) @requires_dask def test_to_zarr_compute_false_roundtrip(self) -> None: @@ -2291,12 +2342,14 @@ def test_no_warning_from_open_emptydim_with_chunks(self) -> None: with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload: assert_identical(ds, ds_reload) - @pytest.mark.parametrize("consolidated", [False, True]) + @pytest.mark.parametrize("consolidated", [False, True, None]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True]) def test_write_region(self, consolidated, compute, use_dask) -> None: if (use_dask or not compute) and not has_dask: pytest.skip("requires dask") + if consolidated and self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") zeros = Dataset({"u": (("x",), np.zeros(10))}) nonzeros = Dataset({"u": (("x",), np.arange(1, 11))}) @@ -2311,16 +2364,24 @@ def test_write_region(self, consolidated, compute, use_dask) -> None: consolidated=consolidated, compute=compute, encoding={"u": dict(chunks=2)}, + **self.version_kwargs, ) if compute: - with xr.open_zarr(store, consolidated=consolidated) as actual: + with xr.open_zarr( + store, consolidated=consolidated, **self.version_kwargs + ) as actual: assert_identical(actual, zeros) for i in range(0, 10, 2): region = {"x": slice(i, i + 2)} nonzeros.isel(region).to_zarr( - store, region=region, consolidated=consolidated + store, + region=region, + consolidated=consolidated, + **self.version_kwargs, ) - with xr.open_zarr(store, consolidated=consolidated) as actual: + with xr.open_zarr( + store, consolidated=consolidated, **self.version_kwargs + ) as actual: assert_identical(actual, nonzeros) @pytest.mark.parametrize("mode", [None, "r+", "a"]) @@ -2328,10 +2389,12 @@ def test_write_region_mode(self, mode) -> None: zeros = Dataset({"u": (("x",), np.zeros(10))}) nonzeros = Dataset({"u": (("x",), np.arange(1, 11))}) with self.create_zarr_target() as store: - zeros.to_zarr(store) + zeros.to_zarr(store, **self.version_kwargs) for region in [{"x": slice(5)}, {"x": slice(5, 10)}]: - nonzeros.isel(region).to_zarr(store, region=region, mode=mode) - with xr.open_zarr(store) as actual: + nonzeros.isel(region).to_zarr( + store, region=region, mode=mode, **self.version_kwargs + ) + with xr.open_zarr(store, **self.version_kwargs) as actual: assert_identical(actual, nonzeros) @requires_dask @@ -2355,8 +2418,8 @@ def test_write_preexisting_override_metadata(self) -> None: ) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) - both_modified.to_zarr(store, mode="a") + original.to_zarr(store, compute=False, **self.version_kwargs) + both_modified.to_zarr(store, mode="a", **self.version_kwargs) with self.open(store) as actual: # NOTE: this arguably incorrect -- we should probably be # overriding the variable metadata, too. See the TODO note in @@ -2364,15 +2427,17 @@ def test_write_preexisting_override_metadata(self) -> None: assert_identical(actual, global_modified) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) - both_modified.to_zarr(store, mode="r+") + original.to_zarr(store, compute=False, **self.version_kwargs) + both_modified.to_zarr(store, mode="r+", **self.version_kwargs) with self.open(store) as actual: assert_identical(actual, only_new_data) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) + original.to_zarr(store, compute=False, **self.version_kwargs) # with region, the default mode becomes r+ - both_modified.to_zarr(store, region={"x": slice(None)}) + both_modified.to_zarr( + store, region={"x": slice(None)}, **self.version_kwargs + ) with self.open(store) as actual: assert_identical(actual, only_new_data) @@ -2383,7 +2448,7 @@ def test_write_region_errors(self) -> None: @contextlib.contextmanager def setup_and_verify_store(expected=data): with self.create_zarr_target() as store: - data.to_zarr(store) + data.to_zarr(store, **self.version_kwargs) yield store with self.open(store) as actual: assert_identical(actual, expected) @@ -2391,7 +2456,7 @@ def setup_and_verify_store(expected=data): # verify the base case works expected = Dataset({"u": (("x",), np.array([10, 11, 2, 3, 4]))}) with setup_and_verify_store(expected) as store: - data2.to_zarr(store, region={"x": slice(2)}) + data2.to_zarr(store, region={"x": slice(2)}, **self.version_kwargs) with setup_and_verify_store() as store: with pytest.raises( @@ -2400,46 +2465,57 @@ def setup_and_verify_store(expected=data): "cannot set region unless mode='a', mode='r+' or mode=None" ), ): - data.to_zarr(store, region={"x": slice(None)}, mode="w") + data.to_zarr( + store, region={"x": slice(None)}, mode="w", **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises(TypeError, match=r"must be a dict"): - data.to_zarr(store, region=slice(None)) # type: ignore[call-overload] + data.to_zarr(store, region=slice(None), **self.version_kwargs) # type: ignore[call-overload] with setup_and_verify_store() as store: with pytest.raises(TypeError, match=r"must be slice objects"): - data2.to_zarr(store, region={"x": [0, 1]}) # type: ignore[dict-item] + data2.to_zarr(store, region={"x": [0, 1]}, **self.version_kwargs) # type: ignore[dict-item] with setup_and_verify_store() as store: with pytest.raises(ValueError, match=r"step on all slices"): - data2.to_zarr(store, region={"x": slice(None, None, 2)}) + data2.to_zarr( + store, region={"x": slice(None, None, 2)}, **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"all keys in ``region`` are not in Dataset dimensions", ): - data.to_zarr(store, region={"y": slice(None)}) + data.to_zarr(store, region={"y": slice(None)}, **self.version_kwargs) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"all variables in the dataset to write must have at least one dimension in common", ): - data2.assign(v=2).to_zarr(store, region={"x": slice(2)}) + data2.assign(v=2).to_zarr( + store, region={"x": slice(2)}, **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"cannot list the same dimension in both" ): - data.to_zarr(store, region={"x": slice(None)}, append_dim="x") + data.to_zarr( + store, + region={"x": slice(None)}, + append_dim="x", + **self.version_kwargs, + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"variable 'u' already exists with different dimension sizes", ): - data2.to_zarr(store, region={"x": slice(3)}) + data2.to_zarr(store, region={"x": slice(3)}, **self.version_kwargs) @requires_dask def test_encoding_chunksizes(self) -> None: @@ -2481,10 +2557,10 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None: def test_open_zarr_use_cftime(self) -> None: ds = create_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target) - ds_a = xr.open_zarr(store_target) + ds.to_zarr(store_target, **self.version_kwargs) + ds_a = xr.open_zarr(store_target, **self.version_kwargs) assert_identical(ds, ds_a) - ds_b = xr.open_zarr(store_target, use_cftime=True) + ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs) assert xr.coding.times.contains_cftime_datetimes(ds_b.time) def test_write_read_select_write(self) -> None: @@ -2493,13 +2569,13 @@ def test_write_read_select_write(self) -> None: # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug. with self.create_zarr_target() as initial_store: - ds.to_zarr(initial_store, mode="w") - ds1 = xr.open_zarr(initial_store) + ds.to_zarr(initial_store, mode="w", **self.version_kwargs) + ds1 = xr.open_zarr(initial_store, **self.version_kwargs) # Combination of where+squeeze triggers error on write. ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3") with self.create_zarr_target() as final_store: - ds_sel.to_zarr(final_store, mode="w") + ds_sel.to_zarr(final_store, mode="w", **self.version_kwargs) @pytest.mark.parametrize("obj", [Dataset(), DataArray(name="foo")]) def test_attributes(self, obj) -> None: @@ -2508,21 +2584,24 @@ def test_attributes(self, obj) -> None: obj.attrs["good"] = {"key": "value"} ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target) - assert_identical(ds, xr.open_zarr(store_target)) + ds.to_zarr(store_target, **self.version_kwargs) + assert_identical(ds, xr.open_zarr(store_target, **self.version_kwargs)) obj.attrs["bad"] = DataArray() ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: with pytest.raises(TypeError, match=r"Invalid attribute in Dataset.attrs."): - ds.to_zarr(store_target) + ds.to_zarr(store_target, **self.version_kwargs) @requires_zarr class TestZarrDictStore(ZarrBase): @contextlib.contextmanager def create_zarr_target(self): - yield {} + if have_zarr_kvstore: + yield KVStore({}) + else: + yield {} @requires_zarr @@ -2532,6 +2611,55 @@ def create_zarr_target(self): with create_tmp_file(suffix=".zarr") as tmp: yield tmp + @contextlib.contextmanager + def create_store(self): + with self.create_zarr_target() as store_target: + group = backends.ZarrStore.open_group(store_target, mode="w") + # older Zarr versions do not have the _store_version attribute + if have_zarr_v3: + # verify that a v2 store was created + assert group.zarr_group.store._store_version == 2 + yield group + + +class ZarrBaseV3(ZarrBase): + zarr_version = 3 + + def test_roundtrip_coordinates_with_space(self): + original = Dataset(coords={"x": 0, "y z": 1}) + with pytest.warns(SerializationWarning): + # v3 stores do not allow spaces in the key name + with pytest.raises(ValueError): + with self.roundtrip(original): + pass + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrKVStoreV3(ZarrBaseV3): + @contextlib.contextmanager + def create_zarr_target(self): + yield KVStoreV3({}) + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrDirectoryStoreV3(ZarrBaseV3): + @contextlib.contextmanager + def create_zarr_target(self): + with create_tmp_file(suffix=".zr3") as tmp: + yield DirectoryStoreV3(tmp) + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrDirectoryStoreV3FromPath(TestZarrDirectoryStoreV3): + # Must specify zarr_version=3 to get a v3 store because create_zarr_target + # is a string path. + version_kwargs = {"zarr_version": 3} + + @contextlib.contextmanager + def create_zarr_target(self): + with create_tmp_file(suffix=".zr3") as tmp: + yield tmp + @requires_zarr @requires_fsspec