Skip to content

Commit

Permalink
Parse 0 fill value as "" for str dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
moradology committed Feb 4, 2025
1 parent 87557e3 commit fa2f8f5
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 7 deletions.
6 changes: 4 additions & 2 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
_warn_order_kwarg,
concurrent_map,
parse_dtype,
parse_fill_value,
parse_order,
parse_shapelike,
product,
Expand Down Expand Up @@ -3901,6 +3902,7 @@ async def init_array(

from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation

fill_value_parsed = parse_fill_value(fill_value, dtype, zarr_format)
dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
shape_parsed = parse_shapelike(shape)
chunk_key_encoding_parsed = _parse_chunk_key_encoding(
Expand Down Expand Up @@ -3947,7 +3949,7 @@ async def init_array(
dtype=dtype_parsed,
chunks=chunk_shape_parsed,
dimension_separator=chunk_key_encoding_parsed.separator,
fill_value=fill_value,
fill_value=fill_value_parsed,
order=order_parsed,
filters=filters_parsed,
compressor=compressor_parsed,
Expand Down Expand Up @@ -3985,7 +3987,7 @@ async def init_array(
meta = AsyncArray._create_metadata_v3(
shape=shape_parsed,
dtype=dtype_parsed,
fill_value=fill_value,
fill_value=fill_value_parsed,
chunk_shape=chunks_out,
chunk_key_encoding=chunk_key_encoding_parsed,
codecs=codecs_out,
Expand Down
4 changes: 1 addition & 3 deletions src/zarr/core/array_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from zarr.core.common import (
MemoryOrder,
parse_bool,
parse_fill_value,
parse_order,
parse_shapelike,
)
Expand Down Expand Up @@ -102,11 +101,10 @@ def __init__(
) -> None:
shape_parsed = parse_shapelike(shape)
dtype_parsed = np.dtype(dtype)
fill_value_parsed = parse_fill_value(fill_value)

object.__setattr__(self, "shape", shape_parsed)
object.__setattr__(self, "dtype", dtype_parsed)
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "fill_value", fill_value)
object.__setattr__(self, "config", config)
object.__setattr__(self, "prototype", prototype)

Expand Down
6 changes: 4 additions & 2 deletions src/zarr/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,11 @@ def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]:
return data_tuple


def parse_fill_value(data: Any) -> Any:
def parse_fill_value(fill_value: Any, dtype: Any, zarr_format: ZarrFormat) -> Any:
if zarr_format == 2 and (dtype is str or dtype == "str") and fill_value == 0:
fill_value = ""
# todo: real validation
return data
return fill_value


def parse_order(data: Any) -> Literal["C", "F"]:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_metadata/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,18 @@ def test_zstd_checksum() -> None:
arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes()
)
assert "checksum" not in metadata["compressor"]


def test_0_fill_str_type():
array = zarr.create_array(
store=zarr.storage.MemoryStore(),
dtype=str,
shape=(5,),
chunks=(2,),
fill_value=0,
zarr_format=2,
overwrite=True,
)

# Ensure the array initializes correctly with the fill value
np.testing.assert_array_equal(array[:], ["", "", "", "", ""])

0 comments on commit fa2f8f5

Please sign in to comment.