Skip to content

Commit

Permalink
(fix): structured arrays for v2 (#2681)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Martin Durant <martindurant@users.noreply.github.com>
  • Loading branch information
ilan-gold and martindurant authored Jan 21, 2025
1 parent e9772ac commit a260ae9
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Unreleased
Bug fixes
~~~~~~~~~

* Backwards compatibility for Zarr format 2 structured arrays (:issue:`2134`)

Features
~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions docs/user-guide/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ This is the current default configuration::
'level': 0}},
'v2_default_filters': {'bytes': [{'id': 'vlen-bytes'}],
'numeric': None,
'raw': None,
'string': [{'id': 'vlen-utf8'}]},
'v3_default_compressors': {'bytes': [{'configuration': {'checksum': False,
'level': 0},
Expand Down
4 changes: 3 additions & 1 deletion src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
# every single time we have to write data?
_data, other = np.broadcast_arrays(self._data, other)
return np.array_equal(
self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "USTO" else False
self._data,
other,
equal_nan=equal_nan if self._data.dtype.kind not in "USTOV" else False,
)

def fill(self, value: Any) -> None:
Expand Down
1 change: 1 addition & 0 deletions src/zarr/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def reset(self) -> None:
"numeric": None,
"string": [{"id": "vlen-utf8"}],
"bytes": [{"id": "vlen-bytes"}],
"raw": None,
},
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
"v3_default_serializer": {
Expand Down
15 changes: 13 additions & 2 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,14 @@ def to_dict(self) -> dict[str, JSON]:
zarray_dict["fill_value"] = fill_value

_ = zarray_dict.pop("dtype")
zarray_dict["dtype"] = self.dtype.str
dtype_json: JSON
# In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
dtype_descr = self.dtype.descr
if self.dtype.kind == "V" and dtype_descr[0][0] != "" and len(dtype_descr) != 0:
dtype_json = tuple(self.dtype.descr)
else:
dtype_json = self.dtype.str
zarray_dict["dtype"] = dtype_json

return zarray_dict

Expand All @@ -220,6 +227,8 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:


def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
if isinstance(data, list): # this is a valid _VoidDTypeLike check
data = [tuple(d) for d in data]
return np.dtype(data)


Expand Down Expand Up @@ -376,8 +385,10 @@ def _default_filters(
dtype_key = "numeric"
elif dtype.kind in "U":
dtype_key = "string"
elif dtype.kind in "OSV":
elif dtype.kind in "OS":
dtype_key = "bytes"
elif dtype.kind == "V":
dtype_key = "raw"
else:
raise ValueError(f"Unsupported dtype kind {dtype.kind}")

Expand Down
1 change: 1 addition & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def test_config_defaults_set() -> None:
"numeric": None,
"string": [{"id": "vlen-utf8"}],
"bytes": [{"id": "vlen-bytes"}],
"raw": None,
},
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
"v3_default_serializer": {
Expand Down
41 changes: 35 additions & 6 deletions tests/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,15 @@ def test_codec_pipeline() -> None:
np.testing.assert_array_equal(result, expected)


@pytest.mark.parametrize("dtype", ["|S", "|V"])
async def test_v2_encode_decode(dtype):
@pytest.mark.parametrize(
("dtype", "expected_dtype", "fill_value", "fill_value_encoding"),
[
("|S", "|S0", b"X", "WA=="),
("|V", "|V0", b"X", "WA=="),
("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="),
],
)
async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_encoding) -> None:
with config.set(
{
"array.v2_default_filters.bytes": [{"id": "vlen-bytes"}],
Expand All @@ -95,7 +102,7 @@ async def test_v2_encode_decode(dtype):
store = zarr.storage.MemoryStore()
g = zarr.group(store=store, zarr_format=2)
g.create_array(
name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=b"X", compressor=None
name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None
)

result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
Expand All @@ -105,9 +112,9 @@ async def test_v2_encode_decode(dtype):
expected = {
"chunks": [3],
"compressor": None,
"dtype": f"{dtype}0",
"fill_value": "WA==",
"filters": [{"id": "vlen-bytes"}],
"dtype": expected_dtype,
"fill_value": fill_value_encoding,
"filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
"order": "C",
"shape": [3],
"zarr_format": 2,
Expand Down Expand Up @@ -284,3 +291,25 @@ def test_default_filters_and_compressor(dtype_expected: Any) -> None:
assert arr.metadata.compressor.codec_id == expected_compressor
if expected_filter is not None:
assert arr.metadata.filters[0].codec_id == expected_filter


@pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"])
def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
a = np.array(
[(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)],
dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")],
)
array_path = tmp_path / "data.zarr"
za = zarr.create(
shape=(3,),
store=array_path,
chunks=(2,),
fill_value=fill_value,
zarr_format=2,
dtype=a.dtype,
)
if fill_value is not None:
assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all()
za[...] = a
za = zarr.open_array(store=array_path)
assert (a == za[:]).all()

0 comments on commit a260ae9

Please sign in to comment.