Skip to content

Commit

Permalink
fixes doctests
Browse files Browse the repository at this point in the history
  • Loading branch information
normanrz committed Jan 5, 2025
1 parent 3bf61c5 commit 635a35f
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 30 deletions.
38 changes: 22 additions & 16 deletions docs/user-guide/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ argument accepted by all array creation functions. For example::
>>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
>>> z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors)
>>> z[:] = data
>>> z.metadata.codecs
[BytesCodec(endian=<Endian.little: 'little'>), BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0)]
>>> z.compressors
(BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)

This array above will use Blosc as the primary compressor, using the Zstandard
algorithm (compression level 3) internally within Blosc, and with the
Expand All @@ -188,7 +188,8 @@ which can be used to print useful diagnostics, e.g.::
Order : C
Read-only : False
Store type : LocalStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 3, 'shuffle': <BloscShuffle.bitshuffle: 'bitshuffle'>, 'blocksize': 0}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
No. bytes : 400000000 (381.5M)

The :func:`zarr.Array.info_complete` method inspects the underlying store and
Expand All @@ -203,7 +204,8 @@ prints additional diagnostics, e.g.::
Order : C
Read-only : False
Store type : LocalStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 3, 'shuffle': <BloscShuffle.bitshuffle: 'bitshuffle'>, 'blocksize': 0}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 9696302
Storage ratio : 41.3
Expand All @@ -223,8 +225,8 @@ here is an array using Gzip compression, level 1::
>>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
>>> z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1))
>>> z[:] = data
>>> z.metadata.codecs
[BytesCodec(endian=<Endian.little: 'little'>), GzipCodec(level=1)]
>>> z.compressors
(GzipCodec(level=1),)

Here is an example using LZMA from NumCodecs_ with a custom filter pipeline including LZMA's
built-in delta filter::
Expand All @@ -236,23 +238,24 @@ built-in delta filter::
>>> compressors = LZMA(filters=lzma_filters)
>>> data = np.arange(100000000, dtype='int32').reshape(10000, 10000)
>>> z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors)
>>> z.metadata.codecs
[BytesCodec(endian=<Endian.little: 'little'>), _make_bytes_bytes_codec.<locals>._Codec(codec_name='numcodecs.lzma', codec_config={'id': 'lzma', 'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]})]
>>> z.compressors
(LZMA(codec_name='numcodecs.lzma', codec_config={'id': 'lzma', 'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),)

The default compressor can be changed by setting the value of the using Zarr's
:ref:`user-guide-config`, e.g.::

>>> with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}):
... z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2)
>>> z.metadata.filters
>>> z.metadata.compressor
Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
>>> z.filters
()
>>> z.compressors
(Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),)

To disable compression, set ``compressors=None`` when creating an array, e.g.::

>>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None)
>>> z.metadata.codecs
[BytesCodec(endian=<Endian.little: 'little'>)]
>>> z.compressors
()

.. _user-guide-filters:

Expand Down Expand Up @@ -287,7 +290,9 @@ Here is an example using a delta filter with the Blosc compressor::
Order : C
Read-only : False
Store type : LocalStore
Codecs : [{'codec_name': 'numcodecs.delta', 'codec_config': {'id': 'delta', 'dtype': 'int32'}}, {'endian': <Endian.little: 'little'>}, {'typesize': 4, 'cname': <BloscCname.zstd: 'zstd'>, 'clevel': 1, 'shuffle': <BloscShuffle.shuffle: 'shuffle'>, 'blocksize': 0}]
Filters : (Delta(codec_name='numcodecs.delta', codec_config={'id': 'delta', 'dtype': 'int32'}),)
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=1, shuffle=<BloscShuffle.shuffle: 'shuffle'>, blocksize=0),)
No. bytes : 400000000 (381.5M)

For more information about available filter codecs, see the `Numcodecs
Expand Down Expand Up @@ -600,11 +605,12 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
Order : C
Read-only : False
Store type : LocalStore
Codecs : [{'chunk_shape': (100, 100), 'codecs': ({'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}), 'index_codecs': ({'endian': <Endian.little: 'little'>}, {}), 'index_location': <ShardingCodecIndexLocation.end: 'end'>}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 100000000 (95.4M)
No. bytes stored : 3981060
Storage ratio : 25.1
Chunks Initialized : 100
Shards Initialized : 100

In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used.
This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total.
Expand Down
12 changes: 6 additions & 6 deletions docs/user-guide/consolidated_metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ that can be used.:
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
separator='/'),
fill_value=np.float64(0.0),
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)],
codecs=(BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)),
attributes={},
dimension_names=None,
zarr_format=3,
Expand All @@ -65,8 +65,8 @@ that can be used.:
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
separator='/'),
fill_value=np.float64(0.0),
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)],
codecs=(BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)),
attributes={},
dimension_names=None,
zarr_format=3,
Expand All @@ -78,8 +78,8 @@ that can be used.:
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
separator='/'),
fill_value=np.float64(0.0),
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)],
codecs=(BytesCodec(endian=<Endian.little: 'little'>),
ZstdCodec(level=0, checksum=False)),
attributes={},
dimension_names=None,
zarr_format=3,
Expand Down
6 changes: 4 additions & 2 deletions docs/user-guide/groups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ property. E.g.::
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 8000000 (7.6M)
No. bytes stored : 1432
Storage ratio : 5586.6
Expand All @@ -123,7 +124,8 @@ property. E.g.::
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 4000000 (3.8M)

Groups also have the :func:`zarr.Group.tree` method, e.g.::
Expand Down
9 changes: 6 additions & 3 deletions docs/user-guide/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ To use sharding, you need to specify the ``shards`` parameter when creating the
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'chunk_shape': (100, 100, 100), 'codecs': ({'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}), 'index_codecs': ({'endian': <Endian.little: 'little'>}, {}), 'index_location': <ShardingCodecIndexLocation.end: 'end'>}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 100000000000 (93.1G)

.. _user-guide-chunks-order:
Expand All @@ -125,7 +126,8 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : C
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 342588717
Storage ratio : 1.2
Expand All @@ -142,7 +144,8 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : F
Read-only : False
Store type : MemoryStore
Codecs : [{'endian': <Endian.little: 'little'>}, {'level': 0, 'checksum': False}]
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 342588717
Storage ratio : 1.2
Expand Down
4 changes: 3 additions & 1 deletion src/zarr/core/_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ def __repr__(self) -> str:
Read-only : {_read_only}
Store type : {_store_type}""")

kwargs = dataclasses.asdict(self)
# We can't use dataclasses.asdict, because we only want a shallow dict
kwargs = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}

if self._chunk_shape is None:
# for non-regular chunk grids
kwargs["chunk_shape"] = "<variable>"
Expand Down
1 change: 1 addition & 0 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,6 +1624,7 @@ def _info(
_data_type = self.metadata.dtype
else:
_data_type = self.metadata.data_type

return ArrayInfo(
_zarr_format=self.metadata.zarr_format,
_data_type=_data_type,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_array_info(zarr_format: ZarrFormat) -> None:
Order : C
Read-only : True
Store type : MemoryStore
Serializer : {{'endian': <Endian.little: 'little'>}}""")
Serializer : BytesCodec(endian=<Endian.little: 'little'>)""")


@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
Expand Down Expand Up @@ -110,7 +110,7 @@ def test_array_info_complete(
Order : C
Read-only : True
Store type : MemoryStore
Serializer : {{'endian': <Endian.little: 'little'>}}
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
No. bytes : {count_bytes} ({count_bytes_formatted})
No. bytes stored : {count_bytes_stored_formatted}
Storage ratio : {storage_ratio_formatted}
Expand Down

0 comments on commit 635a35f

Please sign in to comment.