From 20089c559484384fc30c229d08731715aefe1aba Mon Sep 17 00:00:00 2001 From: Zhuoqing Fang Date: Mon, 3 Feb 2025 14:56:45 -0800 Subject: [PATCH 1/2] handle fill_value datatype for string-array (#140) * handle fill_value datatype for string-array * fix 0 -> '' * fix: minimise "string"/0 fill value workaround * chore: add V2 string fill value tests --------- Co-authored-by: Zhuoqing Fang Co-authored-by: Lachlan Deakin --- zarrs/tests/data/v2_str0.py | 42 ++++++++++++++++++ .../str_v2_fv_0.zarr/.zarray | 19 ++++++++ .../str_v2_fv_0.zarr/.zattrs | 1 + .../zarr_python_compat/str_v2_fv_0.zarr/0 | Bin 0 -> 15 bytes .../zarr_python_compat/str_v2_fv_0.zarr/1 | Bin 0 -> 12 bytes .../str_v2_fv_null.zarr/.zarray | 19 ++++++++ .../str_v2_fv_null.zarr/.zattrs | 1 + .../zarr_python_compat/str_v2_fv_null.zarr/0 | Bin 0 -> 15 bytes .../zarr_python_compat/str_v2_fv_null.zarr/1 | Bin 0 -> 12 bytes zarrs/tests/zarr_python_compat.rs | 28 ++++++++++++ zarrs_metadata/src/v2_to_v3.rs | 5 +++ 11 files changed, 115 insertions(+) create mode 100755 zarrs/tests/data/v2_str0.py create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zarray create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zattrs create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/0 create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/1 create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zarray create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zattrs create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/0 create mode 100644 zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/1 diff --git a/zarrs/tests/data/v2_str0.py b/zarrs/tests/data/v2_str0.py new file mode 100755 index 00000000..162724ee --- /dev/null +++ b/zarrs/tests/data/v2_str0.py @@ -0,0 +1,42 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "zarr==3.0.1", +# ] +# /// + +import zarr + +path_out = "tests/data/zarr_python_compat/str_v2_fv_0.zarr" +array = zarr.create_array( + path_out, + dtype=str, + shape=(5,), + chunks=(2,), + filters=zarr.codecs.vlen_utf8.VLenUTF8(), + compressors=[None], + fill_value=0, + zarr_format=2, + overwrite=True, +) +array[:3] = ["a", "bb", ""] +print(array.info) +# assert (array[:] == ["a", "bb", "", "", ""]).all() # FAILURE + +path_out = "tests/data/zarr_python_compat/str_v2_fv_null.zarr" +array = zarr.create_array( + path_out, + dtype=str, + shape=(5,), + chunks=(2,), + filters=zarr.codecs.vlen_utf8.VLenUTF8(), + compressors=[None], + fill_value=None, + zarr_format=2, + overwrite=True, +) +array[:3] = ["a", "bb", ""] +print(array.info) +print(array[:]) +assert (array[:] == ["a", "bb", "", "", ""]).all() \ No newline at end of file diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zarray b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zarray new file mode 100644 index 00000000..aae1beed --- /dev/null +++ b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zarray @@ -0,0 +1,19 @@ +{ + "shape": [ + 5 + ], + "chunks": [ + 2 + ], + "fill_value": 0, + "order": "C", + "filters": [ + { + "id": "vlen-utf8" + } + ], + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2, + "dtype": "|O" +} \ No newline at end of file diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zattrs b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zattrs new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/0 b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/0 new file mode 100644 index 0000000000000000000000000000000000000000..72190f219c3972acc03de5b5e7c9c5d1dc09e2d2 GIT binary patch literal 15 TcmZQ#U|?Vb;zTAOk(2}g1Q7u% literal 0 HcmV?d00001 diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/1 b/zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/1 new file mode 100644 index 0000000000000000000000000000000000000000..3ae168742fb512b29aeaf6283b3f2534804fb9be GIT binary patch literal 12 KcmZQ#KmY&$Bme^d literal 0 HcmV?d00001 diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zarray b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zarray new file mode 100644 index 00000000..a1b39c04 --- /dev/null +++ b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zarray @@ -0,0 +1,19 @@ +{ + "shape": [ + 5 + ], + "chunks": [ + 2 + ], + "fill_value": null, + "order": "C", + "filters": [ + { + "id": "vlen-utf8" + } + ], + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2, + "dtype": "|O" +} \ No newline at end of file diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zattrs b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zattrs new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/0 b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/0 new file mode 100644 index 0000000000000000000000000000000000000000..72190f219c3972acc03de5b5e7c9c5d1dc09e2d2 GIT binary patch literal 15 TcmZQ#U|?Vb;zTAOk(2}g1Q7u% literal 0 HcmV?d00001 diff --git a/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/1 b/zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/1 new file mode 100644 index 0000000000000000000000000000000000000000..3ae168742fb512b29aeaf6283b3f2534804fb9be GIT binary patch literal 12 KcmZQ#KmY&$Bme^d literal 0 HcmV?d00001 diff --git a/zarrs/tests/zarr_python_compat.rs b/zarrs/tests/zarr_python_compat.rs index 54b4d195..7f1974ca 100644 --- a/zarrs/tests/zarr_python_compat.rs +++ b/zarrs/tests/zarr_python_compat.rs @@ -43,3 +43,31 @@ fn zarr_python_compat_fletcher32_v2() -> Result<(), Box> { Ok(()) } + +#[test] +fn zarr_python_v2_compat_str_fv_0() -> Result<(), Box> { + let store = Arc::new(FilesystemStore::new( + "tests/data/zarr_python_compat/str_v2_fv_0.zarr", + )?); + let array = zarrs::array::Array::open(store.clone(), "/")?; + let subset_all = array.subset_all(); + let elements = array.retrieve_array_subset_elements::(&subset_all)?; + + assert_eq!(elements, &["a", "bb", "", "", ""]); + + Ok(()) +} + +#[test] +fn zarr_python_v2_compat_str_fv_null() -> Result<(), Box> { + let store = Arc::new(FilesystemStore::new( + "tests/data/zarr_python_compat/str_v2_fv_null.zarr", + )?); + let array = zarrs::array::Array::open(store.clone(), "/")?; + let subset_all = array.subset_all(); + let elements = array.retrieve_array_subset_elements::(&subset_all)?; + + assert_eq!(elements, &["a", "bb", "", "", ""]); + + Ok(()) +} diff --git a/zarrs_metadata/src/v2_to_v3.rs b/zarrs_metadata/src/v2_to_v3.rs index b922b91a..7c785a23 100644 --- a/zarrs_metadata/src/v2_to_v3.rs +++ b/zarrs_metadata/src/v2_to_v3.rs @@ -124,6 +124,11 @@ pub fn array_metadata_v2_to_v3( )); } } + } else if data_type.name() == "string" { + // Add a special case for `zarr-python` string data with a 0 fill value -> empty string + if let Some(0) = fill_value.try_as_uint::() { + fill_value = FillValueMetadataV3::String(String::new()); + } } let mut codecs: Vec = vec![]; From 7dfb22f52f1083f118473be782b3de945be49fde Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 4 Feb 2025 10:00:09 +1100 Subject: [PATCH 2/2] chore: #140 changelog --- zarrs_metadata/CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 81f3923f..546c47e6 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Interpret a `0` fill value as `""` for Zarr V2 string arrays (for `zarr-python` compatibility) ([#140] by [@zqfang]) + +[#140]: https://github.com/LDeakin/zarrs/pull/140 + ## [0.3.1] - 2025-01-29 ### Fixed @@ -66,3 +71,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [0.3.0]: https://github.com/LDeakin/zarrs/releases/tag/zarrs_metadata-v0.3.0 [0.2.0]: https://github.com/LDeakin/zarrs/releases/tag/zarrs_metadata-v0.2.0 [0.1.0]: https://github.com/LDeakin/zarrs/releases/tag/zarrs_metadata-v0.1.0 + +[@zqfang]: https://github.com/zqfang