diff --git a/changes/2862.bugfix.rst b/changes/2862.bugfix.rst new file mode 100644 index 000000000..bbe6f0746 --- /dev/null +++ b/changes/2862.bugfix.rst @@ -0,0 +1 @@ +Fix a bug that prevented the number of initialized chunks being counted properly. \ No newline at end of file diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst index 4268004f7..d5a0a7cce 100644 --- a/docs/user-guide/groups.rst +++ b/docs/user-guide/groups.rst @@ -140,7 +140,7 @@ property. E.g.:: No. bytes : 8000000 (7.6M) No. bytes stored : 1614 Storage ratio : 4956.6 - Chunks Initialized : 0 + Chunks Initialized : 10 >>> baz.info Type : Array Zarr format : 3 diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9c2f8a726..27320ea58 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -114,6 +114,7 @@ get_pipeline_class, ) from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path +from zarr.storage._utils import _relativize_path if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -3727,7 +3728,12 @@ async def chunks_initialized( store_contents = [ x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] - return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) + store_contents_relative = [ + _relativize_path(key, array.store_path.path) for key in store_contents + ] + return tuple( + chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative + ) def _build_parents( diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index eda4342f4..b6569a794 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -74,11 +74,60 @@ def _join_paths(paths: Iterable[str]) -> str: """ Filter out instances of '' and join the remaining strings with '/'. - Because the root node of a zarr hierarchy is represented by an empty string, + Parameters + ---------- + paths : Iterable[str] + + Returns + ------- + str + + Examples + -------- + >>> _join_paths(["", "a", "b"]) + 'a/b' + >>> _join_paths(["a", "b", "c"]) + 'a/b/c' """ return "/".join(filter(lambda v: v != "", paths)) +def _relativize_path(path: str, prefix: str) -> str: + """ + Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is + returned as-is. Otherwise, the prefix is removed from the path as well as the separator + string "\". + + If ``prefix`` is not the empty string and``path`` does not start with ``prefix`` + followed by a "/" character, then an error is raised. + + Parameters + ---------- + path : str + The path to make relative to the prefix. + prefix : str + The prefix to make relative to. + + Returns + ------- + str + + Examples + -------- + >>> _relativize_paths("", "a/b") + 'a/b' + >>> _relativize_paths("a/b", "a/b/c") + 'c' + """ + if prefix == "": + return path + else: + _prefix = prefix + "/" + if not path.startswith(_prefix): + raise ValueError(f"The first component of {path} does not start with {prefix}.") + return path.removeprefix(f"{prefix}/") + + def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]: """ Normalize the input paths according to the normalization scheme used for zarr node paths. diff --git a/tests/test_array.py b/tests/test_array.py index efcf8a6bf..75c31f081 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -386,12 +386,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]] assert observed == expected -async def test_chunks_initialized() -> None: +@pytest.mark.parametrize("path", ["", "foo"]) +async def test_chunks_initialized(path: str) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ store = MemoryStore() - arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4") + arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4") chunks_accumulated = tuple( accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))