Skip to content

Commit

Permalink
(fix): object dtypes in rust
Browse files Browse the repository at this point in the history
  • Loading branch information
ilan-gold committed Feb 2, 2025
1 parent a51e810 commit 19e90e3
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
8 changes: 4 additions & 4 deletions python/zarrs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, N
},
}
# TODO: get the endianness added to V2Codec API
# TODO: how to handle this with strings, which don't need this but zarrs
# complains about its absence if its not there
yield BytesCodec().to_dict()
else:
yield codec.to_dict()
Expand Down Expand Up @@ -220,10 +222,8 @@ def _raise_error_on_batch_info_error(
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
],
):
if any(
info.dtype in ["object"] or info.dtype.kind in {"V", "S"}
for (_, info, _, _) in batch_info
):
# https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293
if any(info.dtype.kind in {"V", "S"} for (_, info, _, _) in batch_info):
raise UnsupportedDataTypeError()
if any(info.fill_value is None for (_, info, _, _) in batch_info):
raise FillValueNoneError()
18 changes: 16 additions & 2 deletions src/chunk_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::num::NonZeroU64;
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{PyAnyMethods as _, PyBytes, PyBytesMethods, PySlice, PySliceMethods as _},
types::{PyAnyMethods, PyBytes, PyBytesMethods, PyInt, PySlice, PySliceMethods as _},
Bound, PyAny, PyErr, PyResult,
};
use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
Expand Down Expand Up @@ -40,14 +40,28 @@ impl Basic {
let path: String = byte_interface.getattr("path")?.extract()?;

let chunk_shape = chunk_spec.getattr("shape")?.extract()?;
let dtype: String = chunk_spec
let mut dtype: String = chunk_spec
.getattr("dtype")?
.call_method0("__str__")?
.extract()?;
let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
let fill_value_bytes: Vec<u8>;
if let Ok(fill_value_downcast) = fill_value.downcast::<PyBytes>() {
fill_value_bytes = fill_value_downcast.as_bytes().to_vec();
} else if let Ok(fill_value_downcast) = fill_value.downcast::<PyInt>() {
let fill_value_usize: usize = fill_value_downcast.extract()?;
if fill_value_usize == (0 as usize) && dtype == "object" {
// https://github.com/LDeakin/zarrs/pull/140
fill_value_bytes = "".as_bytes().to_vec();
// zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()`
// but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288
dtype = String::from("string");
} else {
return Err(PyErr::new::<PyValueError, _>(format!(
"Cannot understand non-zero integer {:?} fill value for dtype {:?}",
fill_value_usize, dtype
)));
}
} else if fill_value.hasattr("tobytes")? {
fill_value_bytes = fill_value.call_method0("tobytes")?.extract()?;
} else {
Expand Down
4 changes: 2 additions & 2 deletions tests/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def test_v2_encode_decode_with_data(dtype_value, tmp_path):


@pytest.mark.parametrize("dtype", [str, "str"])
async def test_create_dtype_str(dtype: Any) -> None:
arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
async def test_create_dtype_str(dtype: Any, tmp_path: Path) -> None:
arr = zarr.create(store=tmp_path, shape=3, dtype=dtype, zarr_format=2)
assert arr.dtype.kind == "O"
assert arr.metadata.to_dict()["dtype"] == "|O"
assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
Expand Down

0 comments on commit 19e90e3

Please sign in to comment.