diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a10a8c8851f..1a435903e19 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,9 @@ Deprecations Bug fixes ~~~~~~~~~ + +- Fix incompatibilities between ``open_datatree`` and Zarr-Python V3, along with refactoring ``TestZarrDatatreeIO`` (:issue:`9960`, :pull:`10020`). + By `Alfonso Ladino-Rincon `_. - Default to resolution-dependent optimal integer encoding units when saving chunked non-nanosecond :py:class:`numpy.datetime64` or :py:class:`numpy.timedelta64` arrays to disk. Previously units of @@ -59,6 +62,7 @@ Bug fixes By `Benoit Bovy `_. + Documentation ~~~~~~~~~~~~~ - Better expose the :py:class:`Coordinates` class in API reference (:pull:`10000`) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e83f5556369..c47e4cbeb99 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -637,6 +637,8 @@ def open_store( write_empty: bool | None = None, cache_members: bool = True, ): + root_group = "/" if _zarr_v3() else group + ( zarr_group, consolidate_on_close, @@ -646,7 +648,7 @@ def open_store( store=store, mode=mode, synchronizer=synchronizer, - group=group, + group=root_group, consolidated=consolidated, consolidate_on_close=consolidate_on_close, chunk_store=chunk_store, @@ -655,10 +657,24 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) - group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) + from zarr import Group + + group_members: dict[str, Group] + if _zarr_v3(): + group_members = { + f"/{path}": store + for path, store in dict(zarr_group.members(max_depth=1000)).items() + if isinstance(store, Group) and path.startswith(group.lstrip("/")) + } + if group == "/": + group_members[group] = zarr_group + else: + group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) + group_members = {path: zarr_group.get(path) for path in group_paths} + return { group: cls( - zarr_group.get(group), + group_store, mode, consolidate_on_close, append_dim, @@ -669,7 +685,7 @@ def open_store( use_zarr_fill_value_as_mask, cache_members=cache_members, ) - for group in group_paths + for group, group_store in group_members.items() } @classmethod @@ -1651,8 +1667,6 @@ def open_groups_as_dict( zarr_version=None, zarr_format=None, ) -> dict[str, Dataset]: - from xarray.core.treenode import NodePath - filename_or_obj = _normalize_path(filename_or_obj) # Check for a group and make it a parent if it exists @@ -1751,7 +1765,7 @@ def _get_open_params( consolidated = False if _zarr_v3(): - missing_exc = ValueError + missing_exc = AssertionError else: missing_exc = zarr.errors.GroupNotFoundError diff --git a/xarray/tests/conftest.py b/xarray/tests/conftest.py index c3f1ccbfe3c..44f94d32a33 100644 --- a/xarray/tests/conftest.py +++ b/xarray/tests/conftest.py @@ -191,7 +191,7 @@ def create_test_datatree(): """ def _create_test_datatree(modify=lambda ds: ds): - set1_data = modify(xr.Dataset({"a": 0, "b": 1})) + set1_data = modify(xr.Dataset({"a": 1, "b": 2})) set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 8819735e26a..b3ce86af8de 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -15,7 +15,7 @@ requires_dask, requires_h5netcdf, requires_netCDF4, - requires_zarr, + requires_zarr_v3, ) if TYPE_CHECKING: @@ -141,6 +141,8 @@ def unaligned_datatree_zarr(tmp_path_factory): a (y) int64 16B ... b (x) float64 16B ... """ + from zarr import consolidate_metadata + filepath = tmp_path_factory.mktemp("data") / "unaligned_simple_datatree.zarr" root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": 0, "b": 1}) @@ -149,6 +151,7 @@ def unaligned_datatree_zarr(tmp_path_factory): set1_data.to_zarr(filepath, group="/Group1", mode="a") set2_data.to_zarr(filepath, group="/Group2", mode="a") set1_data.to_zarr(filepath, group="/Group1/subgroup1", mode="a") + consolidate_metadata(filepath) yield filepath @@ -396,15 +399,12 @@ def test_phony_dims_warning(self, tmpdir) -> None: } -@pytest.mark.skipif( - have_zarr_v3, reason="datatree support for zarr 3 is not implemented yet" -) -@requires_zarr +@requires_zarr_v3 class TestZarrDatatreeIO: engine = "zarr" def test_to_zarr(self, tmpdir, simple_datatree): - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -414,16 +414,30 @@ def test_to_zarr(self, tmpdir, simple_datatree): def test_zarr_encoding(self, tmpdir, simple_datatree): from numcodecs.blosc import Blosc - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree - comp = {"compressor": Blosc(cname="zstd", clevel=3, shuffle=2)} + blosc = Blosc(cname="zstd", clevel=3, shuffle="shuffle").get_config() + comp = {"compressor": {"name": blosc.pop("id"), "configuration": blosc}} enc = {"/set2": {var: comp for var in original_dt["/set2"].dataset.data_vars}} original_dt.to_zarr(filepath, encoding=enc) with open_datatree(filepath, engine="zarr") as roundtrip_dt: - print(roundtrip_dt["/set2/a"].encoding) - assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"] + retrieved_compressor = roundtrip_dt["/set2/a"].encoding["compressors"][ + 0 + ] # Get the BloscCodec object + assert ( + retrieved_compressor.cname.name + == comp["compressor"]["configuration"]["cname"] + ) + assert ( + retrieved_compressor.clevel + == comp["compressor"]["configuration"]["clevel"] + ) + assert ( + retrieved_compressor.shuffle.name + == comp["compressor"]["configuration"]["shuffle"] + ) enc["/not/a/group"] = {"foo": "bar"} # type: ignore[dict-item] with pytest.raises(ValueError, match="unexpected encoding group.*"): @@ -432,9 +446,9 @@ def test_zarr_encoding(self, tmpdir, simple_datatree): def test_to_zarr_zip_store(self, tmpdir, simple_datatree): from zarr.storage import ZipStore - filepath = tmpdir / "test.zarr.zip" + filepath = str(tmpdir / "test.zarr.zip") original_dt = simple_datatree - store = ZipStore(filepath) + store = ZipStore(filepath, mode="w") original_dt.to_zarr(store) with open_datatree(store, engine="zarr") as roundtrip_dt: # type: ignore[arg-type, unused-ignore] @@ -455,13 +469,11 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): assert_equal(original_dt, roundtrip_dt) def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): - import zarr - - simple_datatree.to_zarr(tmpdir) + simple_datatree.to_zarr(str(tmpdir)) # with default settings, to_zarr should not overwrite an existing dir - with pytest.raises(zarr.errors.ContainsGroupError): - simple_datatree.to_zarr(tmpdir) + with pytest.raises(FileExistsError): + simple_datatree.to_zarr(str(tmpdir)) @requires_dask def test_to_zarr_compute_false(self, tmpdir, simple_datatree): @@ -469,18 +481,17 @@ def test_to_zarr_compute_false(self, tmpdir, simple_datatree): filepath = tmpdir / "test.zarr" original_dt = simple_datatree.chunk() - original_dt.to_zarr(filepath, compute=False) + original_dt.to_zarr(str(filepath), compute=False) for node in original_dt.subtree: for name, variable in node.dataset.variables.items(): var_dir = filepath / node.path / name var_files = var_dir.listdir() - assert var_dir / ".zarray" in var_files - assert var_dir / ".zattrs" in var_files + assert var_dir / "zarr.json" in var_files if isinstance(variable.data, da.Array): - assert var_dir / "0" not in var_files + assert var_dir / "zarr.json" in var_files else: - assert var_dir / "0" in var_files + assert var_dir / "c" in var_files def test_to_zarr_inherited_coords(self, tmpdir): original_dt = DataTree.from_dict( @@ -489,7 +500,7 @@ def test_to_zarr_inherited_coords(self, tmpdir): "/sub": xr.Dataset({"b": (("x",), [5, 6])}), } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath) with open_datatree(filepath, engine="zarr") as roundtrip_dt: @@ -499,7 +510,7 @@ def test_to_zarr_inherited_coords(self, tmpdir): def test_open_groups_round_trip(self, tmpdir, simple_datatree) -> None: """Test `open_groups` opens a zarr store with the `simple_datatree` structure.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -524,7 +535,7 @@ def test_open_datatree(self, unaligned_datatree_zarr) -> None: @requires_dask def test_open_datatree_chunks(self, tmpdir, simple_datatree) -> None: - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") chunks = {"x": 2, "y": 1} @@ -551,7 +562,6 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: assert "/" in unaligned_dict_of_datasets.keys() assert "/Group1" in unaligned_dict_of_datasets.keys() - assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() assert "/Group2" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets with xr.open_dataset( @@ -559,24 +569,22 @@ def test_open_groups(self, unaligned_datatree_zarr) -> None: ) as expected: assert_identical(unaligned_dict_of_datasets["/"], expected) with xr.open_dataset( - unaligned_datatree_zarr, group="Group1", engine="zarr" + unaligned_datatree_zarr, group="/Group1", engine="zarr" ) as expected: assert_identical(unaligned_dict_of_datasets["/Group1"], expected) - with xr.open_dataset( - unaligned_datatree_zarr, group="/Group1/subgroup1", engine="zarr" - ) as expected: - assert_identical(unaligned_dict_of_datasets["/Group1/subgroup1"], expected) with xr.open_dataset( unaligned_datatree_zarr, group="/Group2", engine="zarr" ) as expected: assert_identical(unaligned_dict_of_datasets["/Group2"], expected) - for ds in unaligned_dict_of_datasets.values(): ds.close() + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: """Test opening a specific group within a Zarr store using `open_datatree`.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") group = "/set2" original_dt = simple_datatree original_dt.to_zarr(filepath) @@ -591,10 +599,7 @@ def test_open_groups_chunks(self, tmpdir) -> None: """Test `open_groups` with chunks on a zarr store.""" chunks = {"x": 2, "y": 1} - filepath = tmpdir / "test.zarr" - - chunks = {"x": 2, "y": 1} - + filepath = str(tmpdir / "test.zarr") root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": ("y", [-1, 0, 1]), "b": ("x", [-10, 6])}) set2_data = xr.Dataset({"a": ("y", [1, 2, 3]), "b": ("x", [0.1, 0.2])}) @@ -628,13 +633,16 @@ def test_write_subgroup(self, tmpdir): expected_dt = original_dt.copy() expected_dt.name = None - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) assert_identical(expected_dt, roundtrip_dt) + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_write_inherited_coords_false(self, tmpdir): original_dt = DataTree.from_dict( { @@ -643,7 +651,7 @@ def test_write_inherited_coords_false(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath, write_inherited_coords=False) with open_datatree(filepath, engine="zarr") as roundtrip_dt: @@ -654,6 +662,9 @@ def test_write_inherited_coords_false(self, tmpdir): with open_datatree(filepath, group="child", engine="zarr") as roundtrip_child: assert_identical(expected_child, roundtrip_child) + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) def test_write_inherited_coords_true(self, tmpdir): original_dt = DataTree.from_dict( { @@ -662,7 +673,7 @@ def test_write_inherited_coords_true(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt.to_zarr(filepath, write_inherited_coords=True) with open_datatree(filepath, engine="zarr") as roundtrip_dt: