diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4c444a81fa..b4e07f728d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -58,6 +58,7 @@ _warn_order_kwarg, concurrent_map, parse_dtype, + parse_fill_value, parse_order, parse_shapelike, product, @@ -3901,6 +3902,7 @@ async def init_array( from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation + fill_value_parsed = parse_fill_value(fill_value, dtype, zarr_format) dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) shape_parsed = parse_shapelike(shape) chunk_key_encoding_parsed = _parse_chunk_key_encoding( @@ -3947,7 +3949,7 @@ async def init_array( dtype=dtype_parsed, chunks=chunk_shape_parsed, dimension_separator=chunk_key_encoding_parsed.separator, - fill_value=fill_value, + fill_value=fill_value_parsed, order=order_parsed, filters=filters_parsed, compressor=compressor_parsed, @@ -3985,7 +3987,7 @@ async def init_array( meta = AsyncArray._create_metadata_v3( shape=shape_parsed, dtype=dtype_parsed, - fill_value=fill_value, + fill_value=fill_value_parsed, chunk_shape=chunks_out, chunk_key_encoding=chunk_key_encoding_parsed, codecs=codecs_out, diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index b1a6a3cad0..51543214f5 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -8,7 +8,6 @@ from zarr.core.common import ( MemoryOrder, parse_bool, - parse_fill_value, parse_order, parse_shapelike, ) @@ -102,11 +101,10 @@ def __init__( ) -> None: shape_parsed = parse_shapelike(shape) dtype_parsed = np.dtype(dtype) - fill_value_parsed = parse_fill_value(fill_value) object.__setattr__(self, "shape", shape_parsed) object.__setattr__(self, "dtype", dtype_parsed) - object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "fill_value", fill_value) object.__setattr__(self, "config", config) object.__setattr__(self, "prototype", prototype) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index ad3316b619..b4cd0b7e44 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -150,9 +150,11 @@ def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]: return data_tuple -def parse_fill_value(data: Any) -> Any: +def parse_fill_value(fill_value: Any, dtype: Any, zarr_format: ZarrFormat) -> Any: + if zarr_format == 2 and (dtype is str or dtype == "str") and fill_value == 0: + fill_value = "" # todo: real validation - return data + return fill_value def parse_order(data: Any) -> Literal["C", "F"]: diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 5a5bf5f73a..b89ca2910f 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -298,3 +298,18 @@ def test_zstd_checksum() -> None: arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes() ) assert "checksum" not in metadata["compressor"] + + +def test_0_fill_str_type(): + array = zarr.create_array( + store=zarr.storage.MemoryStore(), + dtype=str, + shape=(5,), + chunks=(2,), + fill_value=0, + zarr_format=2, + overwrite=True, + ) + + # Ensure the array initializes correctly with the fill value + np.testing.assert_array_equal(array[:], ["", "", "", "", ""])