Skip to content

Commit

Permalink
Fix iterating over sharding index (#2392)
Browse files Browse the repository at this point in the history
* test_sharding_with_empty_inner_chunk

* tests for failing read with sharding

* replace morton order by np unravel index

* format

* Revert "replace morton order by np unravel index"

This reverts commit adc3240.

* skip morton indices out of bound

* improve test_sharding_with_chunks_per_shard

* format

---------

Co-authored-by: Norman Rzepka <code@normanrz.com>
  • Loading branch information
brokkoli71 and normanrz authored Nov 29, 2024
1 parent 206d145 commit cdd6a74
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/zarr/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,8 +1346,15 @@ def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords:


def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]:
for i in range(product(chunk_shape)):
yield decode_morton(i, chunk_shape)
i = 0
order: list[ChunkCoords] = []
while len(order) < product(chunk_shape):
m = decode_morton(i, chunk_shape)
if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)):
order.append(m)
i += 1
for j in range(product(chunk_shape)):
yield order[j]


def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:
Expand Down
20 changes: 20 additions & 0 deletions tests/test_codecs/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,26 @@ def test_morton() -> None:
]


@pytest.mark.parametrize(
"shape",
[
[2, 2, 2],
[5, 2],
[2, 5],
[2, 9, 2],
[3, 2, 12],
[2, 5, 1],
[4, 3, 6, 2, 7],
[3, 2, 1, 6, 4, 5, 2],
],
)
def test_morton2(shape) -> None:
order = list(morton_order_iter(shape))
for i, x in enumerate(order):
assert x not in order[:i] # no duplicates
assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds


@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
def test_write_partial_chunks(store: Store) -> None:
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
Expand Down
29 changes: 29 additions & 0 deletions tests/test_codecs/test_sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,32 @@ async def test_sharding_with_empty_inner_chunk(
print("read data")
data_read = await a.getitem(...)
assert np.array_equal(data_read, data)


@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
@pytest.mark.parametrize(
"index_location",
[ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end],
)
@pytest.mark.parametrize("chunks_per_shard", [(5, 2), (2, 5), (5, 5)])
async def test_sharding_with_chunks_per_shard(
store: Store, index_location: ShardingCodecIndexLocation, chunks_per_shard: tuple[int]
) -> None:
chunk_shape = (2, 1)
shape = [x * y for x, y in zip(chunks_per_shard, chunk_shape, strict=False)]
data = np.ones(np.prod(shape), dtype="int32").reshape(shape)
fill_value = 42

path = f"test_sharding_with_chunks_per_shard_{index_location}"
spath = StorePath(store, path)
a = Array.create(
spath,
shape=shape,
chunk_shape=shape,
dtype="int32",
fill_value=fill_value,
codecs=[ShardingCodec(chunk_shape=chunk_shape, index_location=index_location)],
)
a[...] = data
data_read = a[...]
assert np.array_equal(data_read, data)

0 comments on commit cdd6a74

Please sign in to comment.