Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/unbreak chunks initialized #2862

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2862.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug that prevented the number of initialized chunks being counted properly.
2 changes: 1 addition & 1 deletion docs/user-guide/groups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ property. E.g.::
No. bytes : 8000000 (7.6M)
No. bytes stored : 1614
Storage ratio : 4956.6
Chunks Initialized : 0
Chunks Initialized : 10
>>> baz.info
Type : Array
Zarr format : 3
Expand Down
8 changes: 7 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
get_pipeline_class,
)
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
from zarr.storage._utils import _relativize_path

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -3727,7 +3728,12 @@ async def chunks_initialized(
store_contents = [
x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path)
]
return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents)
store_contents_relative = [
_relativize_path(key, array.store_path.path) for key in store_contents
]
return tuple(
chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative
)


def _build_parents(
Expand Down
51 changes: 50 additions & 1 deletion src/zarr/storage/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,60 @@
"""
Filter out instances of '' and join the remaining strings with '/'.

Because the root node of a zarr hierarchy is represented by an empty string,
Parameters
----------
paths : Iterable[str]

Returns
-------
str

Examples
--------
>>> _join_paths(["", "a", "b"])
'a/b'
>>> _join_paths(["a", "b", "c"])
'a/b/c'
"""
return "/".join(filter(lambda v: v != "", paths))


def _relativize_path(path: str, prefix: str) -> str:
"""
Make a "\"-delimited path relative to some prefix. If the prefix is '', then the path is
returned as-is. Otherwise, the prefix is removed from the path as well as the separator
string "\".

If ``prefix`` is not the empty string and``path`` does not start with ``prefix``
followed by a "/" character, then an error is raised.

Parameters
----------
path : str
The path to make relative to the prefix.
prefix : str
The prefix to make relative to.

Returns
-------
str

Examples
--------
>>> _relativize_paths("", "a/b")
'a/b'
>>> _relativize_paths("a/b", "a/b/c")
'c'
"""
if prefix == "":
return path
else:
_prefix = prefix + "/"
if not path.startswith(_prefix):
raise ValueError(f"The first component of {path} does not start with {prefix}.")

Check warning on line 127 in src/zarr/storage/_utils.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/storage/_utils.py#L127

Added line #L127 was not covered by tests
return path.removeprefix(f"{prefix}/")


def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
"""
Normalize the input paths according to the normalization scheme used for zarr node paths.
Expand Down
5 changes: 3 additions & 2 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]
assert observed == expected


async def test_chunks_initialized() -> None:
@pytest.mark.parametrize("path", ["", "foo"])
async def test_chunks_initialized(path: str) -> None:
"""
Test that chunks_initialized accurately returns the keys of stored chunks.
"""
store = MemoryStore()
arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4")
arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4")

chunks_accumulated = tuple(
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))
Expand Down