Skip to content

Commit

Permalink
implement Zarr v3 spec support (#6475)
Browse files Browse the repository at this point in the history
* make zarr backend compatible with v3 spec

* add tests for Zarr v3 stores

* add tests for Zarr v3 stores when the store is not a StoreV3 class

In this case where create_zarr_target returns a string, we must
specify zarr_version=3 when opening/writing a store to make sure
a version 3 store will be created rather than the default of a
version 2 store.

* update import path to match Zarr v2.12 and v2.13 experimental API

remove path='xarray' default for zarr v3

path=None should work as of Zarr v2.13

* flake8 fixes

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* disallow consolidated metadata for zarr v3

* whats new a + remove more consolidated metadata for v3

* activate upstream dev test for zarr v3

* better typing

* untype zarr_version in open_dataset

* update whats new

* [test-upstream]

* update comment

* fix whats new

* update whats new

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Joseph Hamman <joe@earthmover.io>
  • Loading branch information
3 people authored Nov 27, 2022
1 parent 4e9535a commit 9973b6e
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 67 deletions.
1 change: 1 addition & 0 deletions .github/workflows/upstream-dev-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
if: success()
id: status
run: |
export ZARR_V3_EXPERIMENTAL_API=1
python -m pytest --timeout=60 -rf \
--report-log output-${{ matrix.python-version }}-log.jsonl
- name: Generate and publish the report
Expand Down
3 changes: 2 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ What's New
np.random.seed(123456)
.. _whats-new.2022.11.1:

v2022.11.1 (unreleased)
Expand All @@ -23,6 +22,8 @@ v2022.11.1 (unreleased)
New Features
~~~~~~~~~~~~

- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
11 changes: 11 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> backends.ZarrStore:
...

Expand All @@ -1525,6 +1526,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> Delayed:
...

Expand All @@ -1543,6 +1545,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> backends.ZarrStore | Delayed:
"""This function creates an appropriate datastore for writing a dataset to
a zarr ztore
Expand Down Expand Up @@ -1609,6 +1612,13 @@ def to_zarr(
f"``region`` with to_zarr(), got {append_dim} in both"
)

if zarr_version is None:
# default to 2 if store doesn't specify it's version (e.g. a path)
zarr_version = int(getattr(store, "_store_version", 2))

if consolidated is None and zarr_version > 2:
consolidated = False

if mode == "r+":
already_consolidated = consolidated
consolidate_on_close = False
Expand All @@ -1627,6 +1637,7 @@ def to_zarr(
write_region=region,
safe_chunks=safe_chunks,
stacklevel=4, # for Dataset.to_zarr()
zarr_version=zarr_version,
)

if mode in ["a", "r+"]:
Expand Down
35 changes: 35 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,19 +353,37 @@ def open_group(
write_region=None,
safe_chunks=True,
stacklevel=2,
zarr_version=None,
):
import zarr

# zarr doesn't support pathlib.Path objects yet. zarr-python#601
if isinstance(store, os.PathLike):
store = os.fspath(store)

if zarr_version is None:
# default to 2 if store doesn't specify it's version (e.g. a path)
zarr_version = getattr(store, "_store_version", 2)

open_kwargs = dict(
mode=mode,
synchronizer=synchronizer,
path=group,
)
open_kwargs["storage_options"] = storage_options
if zarr_version > 2:
open_kwargs["zarr_version"] = zarr_version

if consolidated or consolidate_on_close:
raise ValueError(
"consolidated metadata has not been implemented for zarr "
f"version {zarr_version} yet. Set consolidated=False for "
f"zarr version {zarr_version}. See also "
"https://github.com/zarr-developers/zarr-specs/issues/136"
)

if consolidated is None:
consolidated = False

if chunk_store:
open_kwargs["chunk_store"] = chunk_store
Expand Down Expand Up @@ -440,6 +458,11 @@ def open_store_variable(self, name, zarr_array):
zarr_array, DIMENSION_KEY, try_nczarr
)
attributes = dict(attributes)

# TODO: this should not be needed once
# https://github.com/zarr-developers/zarr-python/issues/1269 is resolved.
attributes.pop("filters", None)

encoding = {
"chunks": zarr_array.chunks,
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
Expand Down Expand Up @@ -668,6 +691,7 @@ def open_zarr(
storage_options=None,
decode_timedelta=None,
use_cftime=None,
zarr_version=None,
**kwargs,
):
"""Load and decode a dataset from a Zarr store.
Expand Down Expand Up @@ -725,6 +749,9 @@ def open_zarr(
capability. Only works for stores that have already been consolidated.
By default (`consolidate=None`), attempts to read consolidated metadata,
falling back to read non-consolidated metadata if that fails.
When the experimental ``zarr_version=3``, ``consolidated`` must be
either be ``None`` or ``False``.
chunk_store : MutableMapping, optional
A separate Zarr store only for chunk data.
storage_options : dict, optional
Expand All @@ -745,6 +772,10 @@ def open_zarr(
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error.
zarr_version : int or None, optional
The desired zarr spec version to target (currently 2 or 3). The default
of None will attempt to determine the zarr version from ``store`` when
possible, otherwise defaulting to 2.
Returns
-------
Expand Down Expand Up @@ -782,6 +813,7 @@ def open_zarr(
"chunk_store": chunk_store,
"storage_options": storage_options,
"stacklevel": 4,
"zarr_version": zarr_version,
}

ds = open_dataset(
Expand All @@ -798,6 +830,7 @@ def open_zarr(
backend_kwargs=backend_kwargs,
decode_timedelta=decode_timedelta,
use_cftime=use_cftime,
zarr_version=zarr_version,
)
return ds

Expand Down Expand Up @@ -842,6 +875,7 @@ def open_dataset(
chunk_store=None,
storage_options=None,
stacklevel=3,
zarr_version=None,
):

filename_or_obj = _normalize_path(filename_or_obj)
Expand All @@ -855,6 +889,7 @@ def open_dataset(
chunk_store=chunk_store,
storage_options=storage_options,
stacklevel=stacklevel + 1,
zarr_version=zarr_version,
)

store_entrypoint = StoreBackendEntrypoint()
Expand Down
10 changes: 10 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1930,6 +1930,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> ZarrStore:
...

Expand Down Expand Up @@ -1967,6 +1968,7 @@ def to_zarr(
region: Mapping[str, slice] | None = None,
safe_chunks: bool = True,
storage_options: dict[str, str] | None = None,
zarr_version: int | None = None,
) -> ZarrStore | Delayed:
"""Write dataset contents to a zarr group.
Expand Down Expand Up @@ -2017,6 +2019,9 @@ def to_zarr(
metadata; if False, do not. The default (`consolidated=None`) means
write consolidated metadata and attempt to read consolidated
metadata for existing stores (falling back to non-consolidated).
When the experimental ``zarr_version=3``, ``consolidated`` must be
either be ``None`` or ``False``.
append_dim : hashable, optional
If set, the dimension along which the data will be appended. All
other dimensions on overridden variables must remain the same size.
Expand Down Expand Up @@ -2048,6 +2053,10 @@ def to_zarr(
storage_options : dict, optional
Any additional parameters for the storage backend (ignored for local
paths).
zarr_version : int or None, optional
The desired zarr spec version to target (currently 2 or 3). The
default of None will attempt to determine the zarr version from
``store`` when possible, otherwise defaulting to 2.
Returns
-------
Expand Down Expand Up @@ -2092,6 +2101,7 @@ def to_zarr(
append_dim=append_dim,
region=region,
safe_chunks=safe_chunks,
zarr_version=zarr_version,
)

def __repr__(self) -> str:
Expand Down
Loading

0 comments on commit 9973b6e

Please sign in to comment.