diff --git a/changes/2718.bugfix.rst b/changes/2718.bugfix.rst new file mode 100644 index 0000000000..48ddf8b5a8 --- /dev/null +++ b/changes/2718.bugfix.rst @@ -0,0 +1,3 @@ +0-dimensional arrays are now returning a scalar. Therefore, the return type of ``__getitem__`` changed +to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with +``numpy`` scalars. \ No newline at end of file diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3a3d03bb71..8c032a7805 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -33,6 +33,7 @@ from collections.abc import Iterable from zarr.abc.codec import Codec + from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_key_encodings import ChunkKeyEncoding from zarr.storage import StoreLike @@ -232,7 +233,7 @@ async def load( path: str | None = None, zarr_format: ZarrFormat | None = None, zarr_version: ZarrFormat | None = None, -) -> NDArrayLike | dict[str, NDArrayLike]: +) -> NDArrayLikeOrScalar | dict[str, NDArrayLikeOrScalar]: """Load data from an array or group into memory. Parameters diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index e1f92633cd..b9f8ac0ae7 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -26,7 +26,7 @@ ShardsLike, ) from zarr.core.array_spec import ArrayConfigLike - from zarr.core.buffer import NDArrayLike + from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike from zarr.core.common import ( JSON, @@ -119,7 +119,7 @@ def load( path: str | None = None, zarr_format: ZarrFormat | None = None, zarr_version: ZarrFormat | None = None, -) -> NDArrayLike | dict[str, NDArrayLike]: +) -> NDArrayLikeOrScalar | dict[str, NDArrayLikeOrScalar]: """Load data from an array or group into memory. Parameters diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9c2f8a7260..0e9d74ba8a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -35,6 +35,7 @@ from zarr.core.buffer import ( BufferPrototype, NDArrayLike, + NDArrayLikeOrScalar, NDBuffer, default_buffer_prototype, ) @@ -1256,7 +1257,7 @@ async def _get_selection( prototype: BufferPrototype, out: NDBuffer | None = None, fields: Fields | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: # check fields are sensible out_dtype = check_fields(fields, self.dtype) @@ -1298,6 +1299,8 @@ async def _get_selection( out_buffer, drop_axes=indexer.drop_axes, ) + if isinstance(indexer, BasicIndexer) and indexer.shape == (): + return out_buffer.as_scalar() return out_buffer.as_ndarray_like() async def getitem( @@ -1305,7 +1308,7 @@ async def getitem( selection: BasicSelection, *, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """ Asynchronous function that retrieves a subset of the array's data based on the provided selection. @@ -1318,7 +1321,7 @@ async def getitem( Returns ------- - NDArrayLike + NDArrayLikeOrScalar The retrieved subset of the array's data. Examples @@ -2270,14 +2273,15 @@ def __array__( msg = "`copy=False` is not supported. This method always creates a copy." raise ValueError(msg) - arr_np = self[...] + arr = self[...] + arr_np: NDArrayLike = np.array(arr, dtype=dtype) if dtype is not None: arr_np = arr_np.astype(dtype) return arr_np - def __getitem__(self, selection: Selection) -> NDArrayLike: + def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. Parameters @@ -2288,8 +2292,8 @@ def __getitem__(self, selection: Selection) -> NDArrayLike: Returns ------- - NDArrayLike - An array-like containing the data for the requested region. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested region. Examples -------- @@ -2535,7 +2539,7 @@ def get_basic_selection( out: NDBuffer | None = None, prototype: BufferPrototype | None = None, fields: Fields | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. Parameters @@ -2553,8 +2557,8 @@ def get_basic_selection( Returns ------- - NDArrayLike - An array-like containing the data for the requested region. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested region. Examples -------- @@ -2755,7 +2759,7 @@ def get_orthogonal_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """Retrieve data by making a selection for each dimension of the array. For example, if an array has 2 dimensions, allows selecting specific rows and/or columns. The selection for each dimension can be either an integer (indexing a @@ -2777,8 +2781,8 @@ def get_orthogonal_selection( Returns ------- - NDArrayLike - An array-like containing the data for the requested selection. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested selection. Examples -------- @@ -2991,7 +2995,7 @@ def get_mask_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True values indicate a selected item. @@ -3011,8 +3015,8 @@ def get_mask_selection( Returns ------- - NDArrayLike - An array-like containing the data for the requested selection. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested selection. Examples -------- @@ -3153,7 +3157,7 @@ def get_coordinate_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -3171,8 +3175,8 @@ def get_coordinate_selection( Returns ------- - NDArrayLike - An array-like containing the data for the requested coordinate selection. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested coordinate selection. Examples -------- @@ -3341,7 +3345,7 @@ def get_block_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, - ) -> NDArrayLike: + ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -3359,8 +3363,8 @@ def get_block_selection( Returns ------- - NDArrayLike - An array-like containing the data for the requested block selection. + NDArrayLikeOrScalar + An array-like or scalar containing the data for the requested block selection. Examples -------- diff --git a/src/zarr/core/buffer/__init__.py b/src/zarr/core/buffer/__init__.py index ccb41e291c..ebec61a372 100644 --- a/src/zarr/core/buffer/__init__.py +++ b/src/zarr/core/buffer/__init__.py @@ -3,6 +3,7 @@ Buffer, BufferPrototype, NDArrayLike, + NDArrayLikeOrScalar, NDBuffer, default_buffer_prototype, ) @@ -13,6 +14,7 @@ "Buffer", "BufferPrototype", "NDArrayLike", + "NDArrayLikeOrScalar", "NDBuffer", "default_buffer_prototype", "numpy_buffer_prototype", diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index ccab103e0f..70d408d32b 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -105,6 +105,10 @@ def __eq__(self, other: object) -> Self: # type: ignore[explicit-override, over """ +ScalarType = int | float | complex | bytes | str | bool | np.generic +NDArrayLikeOrScalar = ScalarType | NDArrayLike + + def check_item_key_is_1d_contiguous(key: Any) -> None: """Raises error if `key` isn't a 1d contiguous slice""" if not isinstance(key, slice): @@ -419,6 +423,21 @@ def as_numpy_array(self) -> npt.NDArray[Any]: """ ... + def as_scalar(self) -> ScalarType: + """Returns the buffer as a scalar value""" + if self._data.size != 1: + raise ValueError("Buffer does not contain a single scalar value") + item = self.as_numpy_array().item() + scalar: ScalarType + + if np.issubdtype(self.dtype, np.datetime64): + unit: str = np.datetime_data(self.dtype)[0] # Extract the unit (e.g., 'Y', 'D', etc.) + scalar = np.datetime64(item, unit) + else: + scalar = self.dtype.type(item) # Regular conversion for non-datetime types + + return scalar + @property def dtype(self) -> np.dtype[Any]: return self._data.dtype diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index c197f6f397..998fe156a1 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -29,7 +29,7 @@ if TYPE_CHECKING: from zarr.core.array import Array - from zarr.core.buffer import NDArrayLike + from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords @@ -937,7 +937,7 @@ class OIndex: array: Array # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool - def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLike: + def __getitem__(self, selection: OrthogonalSelection | Array) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. @@ -1046,7 +1046,7 @@ def __iter__(self) -> Iterator[ChunkProjection]: class BlockIndex: array: Array - def __getitem__(self, selection: BasicSelection) -> NDArrayLike: + def __getitem__(self, selection: BasicSelection) -> NDArrayLikeOrScalar: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) @@ -1236,7 +1236,9 @@ class VIndex: array: Array # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool - def __getitem__(self, selection: CoordinateSelection | MaskSelection | Array) -> NDArrayLike: + def __getitem__( + self, selection: CoordinateSelection | MaskSelection | Array + ) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. diff --git a/tests/test_api.py b/tests/test_api.py index e9db33f6c5..341245404c 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -23,6 +23,7 @@ save_array, save_group, ) +from zarr.core.buffer import NDArrayLike from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.errors import MetadataValidationError from zarr.storage import MemoryStore @@ -236,7 +237,9 @@ def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: z2 = zarr.open(store=tmp_path, mode="r") assert isinstance(z2, Array) assert z2.fill_value == 1 - assert (z2[:] == 1).all() + result = z2[:] + assert isinstance(result, NDArrayLike) + assert (result == 1).all() with pytest.raises(ValueError): z2[:] = 3 @@ -248,7 +251,9 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: zarr.ones(store=tmp_path, shape=(3, 3)) z2 = zarr.open(store=tmp_path, mode="r+") assert isinstance(z2, Array) - assert (z2[:] == 1).all() + result = z2[:] + assert isinstance(result, NDArrayLike) + assert (result == 1).all() z2[:] = 3 @@ -264,7 +269,9 @@ async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: arr[...] = 1 z2 = zarr.open(store=tmp_path, mode="a") assert isinstance(z2, Array) - assert (z2[:] == 1).all() + result = z2[:] + assert isinstance(result, NDArrayLike) + assert (result == 1).all() z2[:] = 3 @@ -276,7 +283,9 @@ def test_open_with_mode_w(tmp_path: pathlib.Path) -> None: arr[...] = 3 z2 = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) assert isinstance(z2, Array) - assert not (z2[:] == 3).all() + result = z2[:] + assert isinstance(result, NDArrayLike) + assert not (result == 3).all() z2[:] = 3 @@ -1120,7 +1129,9 @@ def test_open_array_with_mode_r_plus(store: Store) -> None: zarr.ones(store=store, shape=(3, 3)) z2 = zarr.open_array(store=store, mode="r+") assert isinstance(z2, Array) - assert (z2[:] == 1).all() + result = z2[:] + assert isinstance(result, NDArrayLike) + assert (result == 1).all() z2[:] = 3 diff --git a/tests/test_array.py b/tests/test_array.py index b81f966e20..48f436e211 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -37,7 +37,7 @@ chunks_initialized, create_array, ) -from zarr.core.buffer import default_buffer_prototype +from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.buffer.cpu import NDBuffer from zarr.core.chunk_grids import _auto_partition from zarr.core.common import JSON, MemoryOrder, ZarrFormat @@ -654,35 +654,43 @@ def test_resize_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: ) a = np.arange(105, dtype="i4") z[:] = a + result = z[:] + assert isinstance(result, NDArrayLike) assert (105,) == z.shape - assert (105,) == z[:].shape + assert (105,) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10,) == z.chunks - np.testing.assert_array_equal(a, z[:]) + np.testing.assert_array_equal(a, result) z.resize(205) + result = z[:] + assert isinstance(result, NDArrayLike) assert (205,) == z.shape - assert (205,) == z[:].shape + assert (205,) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10,) == z.chunks np.testing.assert_array_equal(a, z[:105]) np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:]) z.resize(55) + result = z[:] + assert isinstance(result, NDArrayLike) assert (55,) == z.shape - assert (55,) == z[:].shape + assert (55,) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10,) == z.chunks - np.testing.assert_array_equal(a[:55], z[:]) + np.testing.assert_array_equal(a[:55], result) # via shape setter new_shape = (105,) z.shape = new_shape + result = z[:] + assert isinstance(result, NDArrayLike) assert new_shape == z.shape - assert new_shape == z[:].shape + assert new_shape == result.shape @pytest.mark.parametrize("store", ["memory"], indirect=True) @@ -697,44 +705,54 @@ def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: ) a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z[:] = a + result = z[:] + assert isinstance(result, NDArrayLike) assert (105, 105) == z.shape - assert (105, 105) == z[:].shape + assert (105, 105) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks - np.testing.assert_array_equal(a, z[:]) + np.testing.assert_array_equal(a, result) z.resize((205, 205)) + result = z[:] + assert isinstance(result, NDArrayLike) assert (205, 205) == z.shape - assert (205, 205) == z[:].shape + assert (205, 205) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a, z[:105, :105]) np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :]) np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:]) z.resize((55, 55)) + result = z[:] + assert isinstance(result, NDArrayLike) assert (55, 55) == z.shape - assert (55, 55) == z[:].shape + assert (55, 55) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks - np.testing.assert_array_equal(a[:55, :55], z[:]) + np.testing.assert_array_equal(a[:55, :55], result) z.resize((55, 1)) + result = z[:] + assert isinstance(result, NDArrayLike) assert (55, 1) == z.shape - assert (55, 1) == z[:].shape + assert (55, 1) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks - np.testing.assert_array_equal(a[:55, :1], z[:]) + np.testing.assert_array_equal(a[:55, :1], result) z.resize((1, 55)) + result = z[:] + assert isinstance(result, NDArrayLike) assert (1, 55) == z.shape - assert (1, 55) == z[:].shape + assert (1, 55) == result.shape assert np.dtype("i4") == z.dtype - assert np.dtype("i4") == z[:].dtype + assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a[:1, :10], z[:, :10]) np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55]) @@ -742,8 +760,10 @@ def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: # via shape setter new_shape = (105, 105) z.shape = new_shape + result = z[:] + assert isinstance(result, NDArrayLike) assert new_shape == z.shape - assert new_shape == z[:].shape + assert new_shape == result.shape @pytest.mark.parametrize("store", ["memory"], indirect=True) @@ -1324,11 +1344,14 @@ async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) -> ) -async def test_scalar_array() -> None: - arr = zarr.array(1.5) - assert arr[...] == 1.5 - assert arr[()] == 1.5 +@pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_scalar_array(value: Any, zarr_format: ZarrFormat) -> None: + arr = zarr.array(value, zarr_format=zarr_format) + assert arr[...] == value assert arr.shape == () + assert arr.ndim == 0 + assert isinstance(arr[()], NDArrayLikeOrScalar) async def test_orthogonal_set_total_slice() -> None: @@ -1440,4 +1463,6 @@ async def test_sharding_coordinate_selection() -> None: shards=(2, 4, 4), ) arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4)) - assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() # type: ignore[index] + result = arr[1, [0, 1]] # type: ignore[index] + assert isinstance(result, NDArrayLike) + assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() diff --git a/tests/test_buffer.py b/tests/test_buffer.py index baef0b8109..33ac0266eb 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -30,6 +30,8 @@ cp = None +import zarr.api.asynchronous + if TYPE_CHECKING: import types @@ -64,7 +66,7 @@ async def test_async_array_prototype() -> None: got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=my_prototype) # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol # The test passes, so it clearly does. - assert isinstance(got, TestNDArrayLike) # type: ignore[unreachable] + assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) # type: ignore[unreachable] @@ -117,7 +119,7 @@ async def test_codecs_use_of_prototype() -> None: got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=my_prototype) # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol # The test passes, so it clearly does. - assert isinstance(got, TestNDArrayLike) # type: ignore[unreachable] + assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) # type: ignore[unreachable] @@ -151,3 +153,5 @@ def test_numpy_buffer_prototype() -> None: ndbuffer = cpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) assert isinstance(buffer.as_array_like(), np.ndarray) assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray) + with pytest.raises(ValueError, match="Buffer does not contain a single scalar value"): + ndbuffer.as_scalar() diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index e36a332440..b8122b4ac2 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -23,7 +23,7 @@ if TYPE_CHECKING: from zarr.abc.store import Store - from zarr.core.buffer.core import NDArrayLike + from zarr.core.buffer import NDArrayLike from zarr.core.common import MemoryOrder diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index 2ba57d7a39..403fd80e81 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -16,7 +16,7 @@ ShardingCodecIndexLocation, TransposeCodec, ) -from zarr.core.buffer import default_buffer_prototype +from zarr.core.buffer import NDArrayLike, default_buffer_prototype from zarr.storage import StorePath from ..conftest import ArrayRequest @@ -66,6 +66,7 @@ def test_sharding( assert np.all(arr[empty_region] == arr.metadata.fill_value) read_data = arr[write_region] + assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @@ -130,6 +131,7 @@ def test_sharding_partial( assert np.all(read_data == 0) read_data = a[10:, 10:, 10:] + assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @@ -275,6 +277,7 @@ def test_nested_sharding( a[:, :, :] = data read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] + assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @@ -322,6 +325,7 @@ def test_nested_sharding_create_array( a[:, :, :] = data read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] + assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data)