From f8bc3153162e85ad41f62cdb6c693239868b85c6 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 14 Feb 2025 18:49:24 +0100 Subject: [PATCH] create_array creates explicit groups (#2795) * refactor create_array tests and add failing test for implicit -> explicit groups * clean up array config parsing, and modify init_array to create parent groups and return an asyncarray instead of metadata * typecheck tests * remove comment * release notes * add type: ignore statement * fix unbound local error in test * remove type:ignore * Add property test * fix test * Update tests/test_array.py --------- Co-authored-by: Deepak Cherian --- changes/2795.bugfix.rst | 1 + src/zarr/api/asynchronous.py | 6 +- src/zarr/api/synchronous.py | 10 +- src/zarr/core/array.py | 60 ++- src/zarr/core/array_spec.py | 11 +- tests/test_array.py | 735 +++++++++++++++++------------------ tests/test_properties.py | 19 + 7 files changed, 429 insertions(+), 413 deletions(-) create mode 100644 changes/2795.bugfix.rst diff --git a/changes/2795.bugfix.rst b/changes/2795.bugfix.rst new file mode 100644 index 0000000000..0ee6619c16 --- /dev/null +++ b/changes/2795.bugfix.rst @@ -0,0 +1 @@ +Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 0584f19c3f..3a3d03bb71 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -10,7 +10,7 @@ from typing_extensions import deprecated from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata -from zarr.core.array_spec import ArrayConfig, ArrayConfigLike +from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams from zarr.core.buffer import NDArrayLike from zarr.core.common import ( JSON, @@ -856,7 +856,7 @@ async def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, **kwargs: Any, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. @@ -1018,7 +1018,7 @@ async def create( mode = "a" store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) - config_dict: ArrayConfigLike = {} + config_dict: ArrayConfigParams = {} if write_empty_chunks is not None: if config is not None: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index fe68981cb9..e1f92633cd 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -25,7 +25,7 @@ SerializerLike, ShardsLike, ) - from zarr.core.array_spec import ArrayConfig, ArrayConfigLike + from zarr.core.array_spec import ArrayConfigLike from zarr.core.buffer import NDArrayLike from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike from zarr.core.common import ( @@ -625,7 +625,7 @@ def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, **kwargs: Any, ) -> Array: """Create an array. @@ -695,7 +695,7 @@ def create( storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. - config : ArrayConfig or ArrayConfigLike, optional + config : ArrayConfigLike, optional Runtime configuration of the array. If provided, will override the default values from `zarr.config.array`. @@ -761,7 +761,7 @@ def create_array( dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> Array: """Create an array. @@ -853,7 +853,7 @@ def create_array( Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. - config : ArrayConfig or ArrayConfigLike, optional + config : ArrayConfigLike, optional Runtime configuration for the array. Returns diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4de24bab41..9c2f8a7260 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -221,7 +221,7 @@ class AsyncArray(Generic[T_ArrayMetadata]): The metadata of the array. store_path : StorePath The path to the Zarr store. - config : ArrayConfig, optional + config : ArrayConfigLike, optional The runtime configuration of the array, by default None. Attributes @@ -246,7 +246,7 @@ def __init__( self: AsyncArray[ArrayV2Metadata], metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, - config: ArrayConfig | None = None, + config: ArrayConfigLike | None = None, ) -> None: ... @overload @@ -254,14 +254,14 @@ def __init__( self: AsyncArray[ArrayV3Metadata], metadata: ArrayV3Metadata | ArrayV3MetadataDict, store_path: StorePath, - config: ArrayConfig | None = None, + config: ArrayConfigLike | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, - config: ArrayConfig | None = None, + config: ArrayConfigLike | None = None, ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] @@ -275,12 +275,11 @@ def __init__( raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") metadata_parsed = parse_array_metadata(metadata) - - config = ArrayConfig.from_dict({}) if config is None else config + config_parsed = parse_array_config(config) object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) - object.__setattr__(self, "_config", config) + object.__setattr__(self, "_config", config_parsed) object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed)) # this overload defines the function signature when zarr_format is 2 @@ -304,7 +303,7 @@ async def create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata]: ... # this overload defines the function signature when zarr_format is 3 @@ -333,7 +332,7 @@ async def create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata]: ... @overload @@ -361,7 +360,7 @@ async def create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata]: ... @overload @@ -395,7 +394,7 @@ async def create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ... @classmethod @@ -430,7 +429,7 @@ async def create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. @@ -508,7 +507,7 @@ async def create( Whether to raise an error if the store already exists (default is False). data : npt.ArrayLike, optional The data to be inserted into the array (default is None). - config : ArrayConfig or ArrayConfigLike, optional + config : ArrayConfigLike, optional Runtime configuration for the array. Returns @@ -571,7 +570,7 @@ async def _create( # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. See :func:`AsyncArray.create` for more details. @@ -1745,7 +1744,7 @@ def create( compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. @@ -1874,7 +1873,7 @@ def _create( compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. See :func:`Array.create` for more details. @@ -3814,7 +3813,8 @@ async def init_array( chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, overwrite: bool = False, -) -> ArrayV3Metadata | ArrayV2Metadata: + config: ArrayConfigLike | None, +) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: """Create and persist an array metadata document. Parameters @@ -3893,11 +3893,13 @@ async def init_array( Zarr format 3 only. Zarr format 2 arrays should not use this parameter. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. + config : ArrayConfigLike or None, optional + Configuration for this array. Returns ------- - ArrayV3Metadata | ArrayV2Metadata - The array metadata document. + AsyncArray + The AsyncArray. """ if zarr_format is None: @@ -3997,14 +3999,9 @@ async def init_array( attributes=attributes, ) - # save the metadata to disk - # TODO: make this easier -- it should be a simple function call that takes a {key: buffer} - coros = ( - (store_path / key).set(value) - for key, value in meta.to_buffer_dict(default_buffer_prototype()).items() - ) - await gather(*coros) - return meta + arr = AsyncArray(metadata=meta, store_path=store_path, config=config) + await arr._save_metadata(meta, ensure_parents=True) + return arr async def create_array( @@ -4027,7 +4024,7 @@ async def create_array( dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. @@ -4117,7 +4114,7 @@ async def create_array( Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. - config : ArrayConfig or ArrayConfigLike, optional + config : ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter @@ -4143,13 +4140,12 @@ async def create_array( """ mode: Literal["a"] = "a" - config_parsed = parse_array_config(config) store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options) data_parsed, shape_parsed, dtype_parsed = _parse_data_params( data=data, shape=shape, dtype=dtype ) - meta = await init_array( + result = await init_array( store_path=store_path, shape=shape_parsed, dtype=dtype_parsed, @@ -4165,9 +4161,9 @@ async def create_array( chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, overwrite=overwrite, + config=config, ) - result = AsyncArray(metadata=meta, store_path=store_path, config=config_parsed) if write_data is True and data_parsed is not None: await result._set_selection( BasicIndexer(..., shape=result.shape, chunk_grid=result.metadata.chunk_grid), diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index b1a6a3cad0..59d3cc6b40 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -21,7 +21,7 @@ from zarr.core.common import ChunkCoords -class ArrayConfigLike(TypedDict): +class ArrayConfigParams(TypedDict): """ A TypedDict model of the attributes of an ArrayConfig class, but with no required fields. This allows for partial construction of an ArrayConfig, with the assumption that the unset @@ -56,13 +56,13 @@ def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None: object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed) @classmethod - def from_dict(cls, data: ArrayConfigLike) -> Self: + def from_dict(cls, data: ArrayConfigParams) -> Self: """ Create an ArrayConfig from a dict. The keys of that dict are a subset of the attributes of the ArrayConfig class. Any keys missing from that dict will be set to the the values in the ``array`` namespace of ``zarr.config``. """ - kwargs_out: ArrayConfigLike = {} + kwargs_out: ArrayConfigParams = {} for f in fields(ArrayConfig): field_name = cast(Literal["order", "write_empty_chunks"], f.name) if field_name not in data: @@ -72,7 +72,10 @@ def from_dict(cls, data: ArrayConfigLike) -> Self: return cls(**kwargs_out) -def parse_array_config(data: ArrayConfig | ArrayConfigLike | None) -> ArrayConfig: +ArrayConfigLike = ArrayConfig | ArrayConfigParams + + +def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig: """ Convert various types of data to an ArrayConfig. """ diff --git a/tests/test_array.py b/tests/test_array.py index 4838129561..b81f966e20 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -829,66 +829,6 @@ def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None: z.append(b) -@pytest.mark.parametrize("order", ["C", "F", None]) -@pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_array_create_metadata_order_v2( - order: MemoryOrder | None, zarr_format: int, store: MemoryStore -) -> None: - """ - Test that the ``order`` attribute in zarr v2 array metadata is set correctly via the ``order`` - keyword argument to ``Array.create``. When ``order`` is ``None``, the value of the - ``array.order`` config is used. - """ - arr = zarr.create_array(store=store, shape=(2, 2), order=order, zarr_format=2, dtype="i4") - - expected = order or zarr.config.get("array.order") - assert arr.metadata.zarr_format == 2 # guard for mypy - assert arr.metadata.order == expected - - -@pytest.mark.parametrize("order_config", ["C", "F", None]) -@pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_array_create_order( - order_config: MemoryOrder | None, - zarr_format: ZarrFormat, - store: MemoryStore, -) -> None: - """ - Test that the arrays generated by array indexing have a memory order defined by the config order - value - """ - config: ArrayConfigLike = {} - if order_config is None: - config = {} - expected = zarr.config.get("array.order") - else: - config = {"order": order_config} - expected = order_config - - arr = zarr.create_array( - store=store, shape=(2, 2), zarr_format=zarr_format, dtype="i4", config=config - ) - - vals = np.asarray(arr) - if expected == "C": - assert vals.flags.c_contiguous - elif expected == "F": - assert vals.flags.f_contiguous - else: - raise AssertionError - - -@pytest.mark.parametrize("write_empty_chunks", [True, False]) -def test_write_empty_chunks_config(write_empty_chunks: bool) -> None: - """ - Test that the value of write_empty_chunks is sensitive to the global config when not set - explicitly - """ - with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}): - arr = zarr.create_array({}, shape=(2, 2), dtype="i4") - assert arr._async_array._config.write_empty_chunks == write_empty_chunks - - @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("write_empty_chunks", [True, False]) @pytest.mark.parametrize("fill_value", [0, 5]) @@ -992,339 +932,396 @@ def test_auto_partition_auto_shards( assert auto_shards == expected_shards -def test_chunks_and_shards() -> None: - store = StorePath(MemoryStore()) - shape = (100, 100) - chunks = (5, 5) - shards = (10, 10) - - arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4") - assert arr_v3.chunks == chunks - assert arr_v3.shards is None - - arr_v3_sharding = zarr.create_array( - store=store / "v3_sharding", - shape=shape, - chunks=chunks, - shards=shards, - dtype="i4", - ) - assert arr_v3_sharding.chunks == chunks - assert arr_v3_sharding.shards == shards - - arr_v2 = zarr.create_array( - store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4" - ) - assert arr_v2.chunks == chunks - assert arr_v2.shards is None - - -def test_create_array_default_fill_values() -> None: - a = zarr.create_array(MemoryStore(), shape=(5,), chunks=(5,), dtype=" None: - """ - Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked. - """ +class TestCreateArray: + @staticmethod + def test_chunks_and_shards(store: Store) -> None: + spath = StorePath(store) + shape = (100, 100) + chunks = (5, 5) + shards = (10, 10) + + arr_v3 = zarr.create_array(store=spath / "v3", shape=shape, chunks=chunks, dtype="i4") + assert arr_v3.chunks == chunks + assert arr_v3.shards is None + + arr_v3_sharding = zarr.create_array( + store=spath / "v3_sharding", + shape=shape, + chunks=chunks, + shards=shards, + dtype="i4", + ) + assert arr_v3_sharding.chunks == chunks + assert arr_v3_sharding.shards == shards - # v2 - arr = await create_array( - store=store, - dtype=dtype, - shape=(10,), - zarr_format=2, - compressors=empty_value, - filters=empty_value, + arr_v2 = zarr.create_array( + store=spath / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4" + ) + assert arr_v2.chunks == chunks + assert arr_v2.shards is None + + @staticmethod + @pytest.mark.parametrize( + ("dtype", "fill_value_expected"), [(" None: + a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) + assert a.fill_value == fill_value_expected + + @staticmethod + @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) + @pytest.mark.parametrize("empty_value", [None, ()]) + async def test_no_filters_compressors(store: MemoryStore, dtype: str, empty_value: Any) -> None: + """ + Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked. + """ + + # v2 + arr = await create_array( + store=store, + dtype=dtype, + shape=(10,), + zarr_format=2, + compressors=empty_value, + filters=empty_value, + ) + # Test metadata explicitly + assert arr.metadata.zarr_format == 2 # guard for mypy + # The v2 metadata stores None and () separately + assert arr.metadata.filters == empty_value + # The v2 metadata does not allow tuple for compressor, therefore it is turned into None + assert arr.metadata.compressor is None + + assert arr.filters == () + assert arr.compressors == () + + # v3 + arr = await create_array( + store=store, + dtype=dtype, + shape=(10,), + compressors=empty_value, + filters=empty_value, + ) + assert arr.metadata.zarr_format == 3 # guard for mypy + if dtype == "str": + assert arr.metadata.codecs == (VLenUTF8Codec(),) + assert arr.serializer == VLenUTF8Codec() + else: + assert arr.metadata.codecs == (BytesCodec(),) + assert arr.serializer == BytesCodec() + + @staticmethod + @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) + @pytest.mark.parametrize( + "compressors", + [ + "auto", + None, + (), + (ZstdCodec(level=3),), + (ZstdCodec(level=3), GzipCodec(level=0)), + ZstdCodec(level=3), + {"name": "zstd", "configuration": {"level": 3}}, + ({"name": "zstd", "configuration": {"level": 3}},), + ], ) - assert arr.metadata.zarr_format == 3 # guard for mypy - if dtype == "str": - assert arr.metadata.codecs == (VLenUTF8Codec(),) - assert arr.serializer == VLenUTF8Codec() - else: - assert arr.metadata.codecs == (BytesCodec(),) - assert arr.serializer == BytesCodec() - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) -@pytest.mark.parametrize( - "compressors", - [ - "auto", - None, - (), - (ZstdCodec(level=3),), - (ZstdCodec(level=3), GzipCodec(level=0)), - ZstdCodec(level=3), - {"name": "zstd", "configuration": {"level": 3}}, - ({"name": "zstd", "configuration": {"level": 3}},), - ], -) -@pytest.mark.parametrize( - "filters", - [ - "auto", - None, - (), - ( - TransposeCodec( - order=[ - 0, - ] + @pytest.mark.parametrize( + "filters", + [ + "auto", + None, + (), + ( + TransposeCodec( + order=[ + 0, + ] + ), ), - ), - ( - TransposeCodec( - order=[ - 0, - ] + ( + TransposeCodec( + order=[ + 0, + ] + ), + TransposeCodec( + order=[ + 0, + ] + ), ), TransposeCodec( order=[ 0, ] ), - ), - TransposeCodec( - order=[ - 0, - ] - ), - {"name": "transpose", "configuration": {"order": [0]}}, - ({"name": "transpose", "configuration": {"order": [0]}},), - ], -) -@pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))]) -async def test_create_array_v3_chunk_encoding( - store: MemoryStore, - compressors: CompressorsLike, - filters: FiltersLike, - dtype: str, - chunks: tuple[int, ...], - shards: tuple[int, ...] | None, -) -> None: - """ - Test various possibilities for the compressors and filters parameter to create_array - """ - arr = await create_array( - store=store, - dtype=dtype, - shape=(12,), - chunks=chunks, - shards=shards, - zarr_format=3, - filters=filters, - compressors=compressors, - ) - filters_expected, _, compressors_expected = _parse_chunk_encoding_v3( - filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype) - ) - assert arr.filters == filters_expected - assert arr.compressors == compressors_expected - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) -@pytest.mark.parametrize( - "compressors", - [ - "auto", - None, - numcodecs.Zstd(level=3), - (), - (numcodecs.Zstd(level=3),), - ], -) -@pytest.mark.parametrize( - "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)] -) -async def test_create_array_v2_chunk_encoding( - store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str -) -> None: - arr = await create_array( - store=store, - dtype=dtype, - shape=(10,), - zarr_format=2, - compressors=compressors, - filters=filters, - ) - filters_expected, compressor_expected = _parse_chunk_encoding_v2( - filters=filters, compressor=compressors, dtype=np.dtype(dtype) - ) - assert arr.metadata.zarr_format == 2 # guard for mypy - assert arr.metadata.compressor == compressor_expected - assert arr.metadata.filters == filters_expected - - # Normalize for property getters - compressor_expected = () if compressor_expected is None else (compressor_expected,) - filters_expected = () if filters_expected is None else filters_expected - - assert arr.compressors == compressor_expected - assert arr.filters == filters_expected - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) -async def test_create_array_v3_default_filters_compressors(store: MemoryStore, dtype: str) -> None: - """ - Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with - ``zarr_format`` = 3 and ``filters`` and ``compressors`` are not specified. - """ - arr = await create_array( - store=store, - dtype=dtype, - shape=(10,), - zarr_format=3, - ) - expected_filters, expected_serializer, expected_compressors = _get_default_chunk_encoding_v3( - np_dtype=np.dtype(dtype) - ) - assert arr.filters == expected_filters - assert arr.serializer == expected_serializer - assert arr.compressors == expected_compressors - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) -async def test_create_array_v2_default_filters_compressors(store: MemoryStore, dtype: str) -> None: - """ - Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with - ``zarr_format`` = 2 and ``filters`` and ``compressors`` are not specified. - """ - arr = await create_array( - store=store, - dtype=dtype, - shape=(10,), - zarr_format=2, + {"name": "transpose", "configuration": {"order": [0]}}, + ({"name": "transpose", "configuration": {"order": [0]}},), + ], ) - expected_filters, expected_compressors = _get_default_chunk_encoding_v2( - np_dtype=np.dtype(dtype) + @pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))]) + async def test_v3_chunk_encoding( + store: MemoryStore, + compressors: CompressorsLike, + filters: FiltersLike, + dtype: str, + chunks: tuple[int, ...], + shards: tuple[int, ...] | None, + ) -> None: + """ + Test various possibilities for the compressors and filters parameter to create_array + """ + arr = await create_array( + store=store, + dtype=dtype, + shape=(12,), + chunks=chunks, + shards=shards, + zarr_format=3, + filters=filters, + compressors=compressors, + ) + filters_expected, _, compressors_expected = _parse_chunk_encoding_v3( + filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype) + ) + assert arr.filters == filters_expected + assert arr.compressors == compressors_expected + + @staticmethod + @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) + @pytest.mark.parametrize( + "compressors", + [ + "auto", + None, + numcodecs.Zstd(level=3), + (), + (numcodecs.Zstd(level=3),), + ], ) - assert arr.metadata.zarr_format == 2 # guard for mypy - assert arr.metadata.filters == expected_filters - assert arr.metadata.compressor == expected_compressors - - # Normalize for property getters - expected_filters = () if expected_filters is None else expected_filters - expected_compressors = () if expected_compressors is None else (expected_compressors,) - assert arr.filters == expected_filters - assert arr.compressors == expected_compressors - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -async def test_create_array_v2_no_shards(store: MemoryStore) -> None: - """ - Test that creating a Zarr v2 array with ``shard_shape`` set to a non-None value raises an error. - """ - msg = re.escape( - "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. Got `shard_shape=(5,)` instead." + @pytest.mark.parametrize( + "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)] ) - with pytest.raises(ValueError, match=msg): - _ = await create_array( + async def test_v2_chunk_encoding( + store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str + ) -> None: + arr = await create_array( store=store, - dtype="uint8", + dtype=dtype, shape=(10,), - shards=(5,), zarr_format=2, + compressors=compressors, + filters=filters, ) - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("impl", ["sync", "async"]) -async def test_create_array_data(impl: Literal["sync", "async"], store: Store) -> None: - """ - Test that we can invoke ``create_array`` with a ``data`` parameter. - """ - data = np.arange(10) - name = "foo" - arr: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array - if impl == "sync": - arr = sync_api.create_array(store, name=name, data=data) - stored = arr[:] - elif impl == "async": - arr = await create_array(store, name=name, data=data, zarr_format=3) - stored = await arr._get_selection( - BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid), - prototype=default_buffer_prototype(), + filters_expected, compressor_expected = _parse_chunk_encoding_v2( + filters=filters, compressor=compressors, dtype=np.dtype(dtype) ) - else: - raise ValueError(f"Invalid impl: {impl}") + assert arr.metadata.zarr_format == 2 # guard for mypy + assert arr.metadata.compressor == compressor_expected + assert arr.metadata.filters == filters_expected - assert np.array_equal(stored, data) + # Normalize for property getters + compressor_expected = () if compressor_expected is None else (compressor_expected,) + filters_expected = () if filters_expected is None else filters_expected + assert arr.compressors == compressor_expected + assert arr.filters == filters_expected -@pytest.mark.parametrize("store", ["memory"], indirect=True) -async def test_create_array_data_invalid_params(store: Store) -> None: - """ - Test that failing to specify data AND shape / dtype results in a ValueError - """ - with pytest.raises(ValueError, match="shape was not specified"): - await create_array(store, data=None, shape=None, dtype=None) - - # we catch shape=None first, so specifying a dtype should raise the same exception as before - with pytest.raises(ValueError, match="shape was not specified"): - await create_array(store, data=None, shape=None, dtype="uint8") - - with pytest.raises(ValueError, match="dtype was not specified"): - await create_array(store, data=None, shape=(10, 10)) - - -@pytest.mark.parametrize("store", ["memory"], indirect=True) -async def test_create_array_data_ignored_params(store: Store) -> None: - """ - Test that specify data AND shape AND dtype results in a warning - """ - data = np.arange(10) - with pytest.raises( - ValueError, match="The data parameter was used, but the shape parameter was also used." - ): - await create_array(store, data=data, shape=data.shape, dtype=None, overwrite=True) + @staticmethod + @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) + async def test_default_filters_compressors( + store: MemoryStore, dtype: str, zarr_format: ZarrFormat + ) -> None: + """ + Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified. + """ + arr = await create_array( + store=store, + dtype=dtype, + shape=(10,), + zarr_format=zarr_format, + ) + if zarr_format == 3: + expected_filters, expected_serializer, expected_compressors = ( + _get_default_chunk_encoding_v3(np_dtype=np.dtype(dtype)) + ) - # we catch shape first, so specifying a dtype should raise the same warning as before - with pytest.raises( - ValueError, match="The data parameter was used, but the shape parameter was also used." - ): - await create_array(store, data=data, shape=data.shape, dtype=data.dtype, overwrite=True) + elif zarr_format == 2: + default_filters, default_compressors = _get_default_chunk_encoding_v2( + np_dtype=np.dtype(dtype) + ) + if default_filters is None: + expected_filters = () + else: + expected_filters = default_filters + if default_compressors is None: + expected_compressors = () + else: + expected_compressors = (default_compressors,) + expected_serializer = None + else: + raise ValueError(f"Invalid zarr_format: {zarr_format}") + + assert arr.filters == expected_filters + assert arr.serializer == expected_serializer + assert arr.compressors == expected_compressors + + @staticmethod + async def test_v2_no_shards(store: Store) -> None: + """ + Test that creating a Zarr v2 array with ``shard_shape`` set to a non-None value raises an error. + """ + msg = re.escape( + "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. Got `shard_shape=(5,)` instead." + ) + with pytest.raises(ValueError, match=msg): + _ = await create_array( + store=store, + dtype="uint8", + shape=(10,), + shards=(5,), + zarr_format=2, + ) - with pytest.raises( - ValueError, match="The data parameter was used, but the dtype parameter was also used." - ): - await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True) + @staticmethod + @pytest.mark.parametrize("impl", ["sync", "async"]) + async def test_with_data(impl: Literal["sync", "async"], store: Store) -> None: + """ + Test that we can invoke ``create_array`` with a ``data`` parameter. + """ + data = np.arange(10) + name = "foo" + arr: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array + if impl == "sync": + arr = sync_api.create_array(store, name=name, data=data) + stored = arr[:] + elif impl == "async": + arr = await create_array(store, name=name, data=data, zarr_format=3) + stored = await arr._get_selection( + BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid), + prototype=default_buffer_prototype(), + ) + else: + raise ValueError(f"Invalid impl: {impl}") + + assert np.array_equal(stored, data) + + @staticmethod + async def test_with_data_invalid_params(store: Store) -> None: + """ + Test that failing to specify data AND shape / dtype results in a ValueError + """ + with pytest.raises(ValueError, match="shape was not specified"): + await create_array(store, data=None, shape=None, dtype=None) + + # we catch shape=None first, so specifying a dtype should raise the same exception as before + with pytest.raises(ValueError, match="shape was not specified"): + await create_array(store, data=None, shape=None, dtype="uint8") + + with pytest.raises(ValueError, match="dtype was not specified"): + await create_array(store, data=None, shape=(10, 10)) + + @staticmethod + async def test_data_ignored_params(store: Store) -> None: + """ + Test that specifying data AND shape AND dtype results in a ValueError + """ + data = np.arange(10) + with pytest.raises( + ValueError, match="The data parameter was used, but the shape parameter was also used." + ): + await create_array(store, data=data, shape=data.shape, dtype=None, overwrite=True) + + # we catch shape first, so specifying a dtype should raise the same warning as before + with pytest.raises( + ValueError, match="The data parameter was used, but the shape parameter was also used." + ): + await create_array(store, data=data, shape=data.shape, dtype=data.dtype, overwrite=True) + + with pytest.raises( + ValueError, match="The data parameter was used, but the dtype parameter was also used." + ): + await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True) + + @staticmethod + @pytest.mark.parametrize("order_config", ["C", "F", None]) + def test_order( + order_config: MemoryOrder | None, + zarr_format: ZarrFormat, + store: MemoryStore, + ) -> None: + """ + Test that the arrays generated by array indexing have a memory order defined by the config order + value, and that for zarr v2 arrays, the ``order`` field in the array metadata is set correctly. + """ + config: ArrayConfigLike = {} + if order_config is None: + config = {} + expected = zarr.config.get("array.order") + else: + config = {"order": order_config} + expected = order_config + if zarr_format == 2: + arr = zarr.create_array( + store=store, + shape=(2, 2), + zarr_format=zarr_format, + dtype="i4", + order=expected, + config=config, + ) + # guard for type checking + assert arr.metadata.zarr_format == 2 + assert arr.metadata.order == expected + else: + arr = zarr.create_array( + store=store, shape=(2, 2), zarr_format=zarr_format, dtype="i4", config=config + ) + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + @staticmethod + @pytest.mark.parametrize("write_empty_chunks", [True, False]) + async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store) -> None: + """ + Test that the value of write_empty_chunks is sensitive to the global config when not set + explicitly + """ + with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}): + arr = await create_array(store, shape=(2, 2), dtype="i4") + assert arr._config.write_empty_chunks == write_empty_chunks + + @staticmethod + @pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"]) + async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) -> None: + arr = await create_array( + store, shape=(2, 2), dtype="i4", name=path, zarr_format=zarr_format + ) + if path is None: + expected_path = "" + elif path.startswith("/"): + expected_path = path.lstrip("/") + else: + expected_path = path + assert arr.path == expected_path + assert arr.name == "/" + expected_path + + # test that implicit groups were created + path_parts = expected_path.split("/") + if len(path_parts) > 1: + *parents, _ = ["", *accumulate(path_parts, lambda x, y: "/".join([x, y]))] # noqa: FLY002 + for parent_path in parents: + # this will raise if these groups were not created + _ = await zarr.api.asynchronous.open_group( + store=store, path=parent_path, mode="r", zarr_format=zarr_format + ) async def test_scalar_array() -> None: diff --git a/tests/test_properties.py b/tests/test_properties.py index bf98f9d162..71fbeeb839 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -12,6 +12,7 @@ from zarr.abc.store import Store from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata +from zarr.core.sync import sync from zarr.testing.strategies import ( array_metadata, arrays, @@ -30,6 +31,24 @@ def test_roundtrip(data: st.DataObject, zarr_format: int) -> None: assert_array_equal(nparray, zarray[:]) +@given(array=arrays()) +def test_array_creates_implicit_groups(array): + path = array.path + ancestry = path.split("/")[:-1] + for i in range(len(ancestry)): + parent = "/".join(ancestry[: i + 1]) + if array.metadata.zarr_format == 2: + assert ( + sync(array.store.get(f"{parent}/.zgroup", prototype=default_buffer_prototype())) + is not None + ) + elif array.metadata.zarr_format == 3: + assert ( + sync(array.store.get(f"{parent}/zarr.json", prototype=default_buffer_prototype())) + is not None + ) + + @given(data=st.data()) def test_basic_indexing(data: st.DataObject) -> None: zarray = data.draw(arrays())