Skip to content

Commit

Permalink
Merge branch 'main' into ig/integer_indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
ilan-gold authored Feb 25, 2025
2 parents 812d29a + 40485ca commit 324bdce
Show file tree
Hide file tree
Showing 40 changed files with 294 additions and 102 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Reenable broken compatibility tests since fixed in `zarr-python`/`numcodecs`
- **Breaking**: move the `zarrs::array::{data_type,fill_value}` modules into the `zarrs_data_type` crate
- Bump `lru` to 0.13
- Use codec identifiers in the example for `experimental_codec_names` remapping
- Allow `{Array,Group}::new_with_metadata()` and `{Array,Group}Builder` to create arrays with `"must_understand": true` additional fields
- `{Array,Group}::[async_]open[_opt]` continue to fail with additional fields with `"must_understand": true`
- Bump `derive_more` to 0.2.0

## [0.19.2] - 2025-02-13

Expand Down
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ version = "0.1.0"
path = "zarrs_data_type"

[workspace.dependencies.zarrs_metadata]
version = "0.3.4"
version = "0.3.5"
path = "zarrs_metadata"

[workspace.dependencies.zarrs_storage]
Expand All @@ -52,7 +52,7 @@ version = "0.3.0"
path = "zarrs_object_store"

[workspace.dependencies.zarrs_opendal]
version = "0.5.0"
version = "0.6.0"
path = "zarrs_opendal"

[workspace.dependencies.zarrs_zip]
Expand All @@ -63,7 +63,7 @@ path = "zarrs_zip"
version = "0.11"

[workspace.dependencies.opendal]
version = "0.51.0"
version = "0.52.0"

[workspace.dependencies.zip]
version = "2.1.3"
Expand Down
4 changes: 4 additions & 0 deletions doc/correctness_issues.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
## Correctness Issues with Past Versions
- Prior to `zarrs_metadata` [v0.3.5](https://github.com/LDeakin/zarrs/releases/tag/zarrs_metadata-v0.3.5) (`zarrs` <= 0.19), it was possible for a user to create non-conformant Zarr V2 metadata with `filters: []`
- Empty filters now always correctly serialise to `null`
- `zarrs` will indefinitely support reading Zarr V2 data with `filters: []`
- `zarr-python` shared this bug (see https://github.com/zarr-developers/zarr-python/issues/2842)
- Prior to zarrs [v0.11.5](https://github.com/LDeakin/zarrs/releases/tag/v0.11.5), arrays that used the `crc32c` codec have invalid chunk checksums
- These arrays will fail to be read by Zarr implementations if they validate checksums
- These arrays can be read by zarrs if the [validate checksums](crate::config::Config#validate-checksums) global configuration option is disabled or the relevant codec option is set explicitly
Expand Down
2 changes: 1 addition & 1 deletion zarrs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bytemuck = { version = "1.14.0", features = ["extern_crate_alloc", "must_cast",
bytes = "1.6.0"
bzip2 = { version = "0.5.0", optional = true, features = ["static"] }
crc32c = { version = "0.6.5", optional = true }
derive_more = { version = "1.0.0", features = ["deref", "display", "from"] }
derive_more = { version = "2.0.0", features = ["deref", "display", "from"] }
flate2 = { version = "1.0.30", optional = true }
futures = { version = "0.3.29", optional = true }
gdeflate-sys = { version = "0.4.1", optional = true }
Expand Down
55 changes: 28 additions & 27 deletions zarrs/doc/status/codecs_experimental.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,37 @@ This is intentional to encourage standardisation of some of these experimental c
To enable support, the `numcodecs` codec names needs to be remapped to the identifier of the `zarrs` codec:
```rust,ignore
{
use zarrs_metadata::v3::array::codec;
let mut config = crate::config::global_config_mut();
let experimental_codec_names = config.experimental_codec_names_mut();
experimental_codec_names.insert("zfp".to_string(), "numcodecs.zfpy".to_string());
experimental_codec_names.insert("pcodec".to_string(), "numcodecs.pcodec".to_string());
experimental_codec_names.insert("bz2".to_string(), "numcodecs.bz2".to_string());
experimental_codec_names.insert(codec::zfp::IDENTIFIER.to_string(), "numcodecs.zfpy".to_string());
experimental_codec_names.insert(codec::pcodec::IDENTIFIER.to_string(), "numcodecs.pcodec".to_string());
experimental_codec_names.insert(codec::bz2::IDENTIFIER.to_string(), "numcodecs.bz2".to_string());
}
```

| Codec Type | Codec | Default Name | V3 | V2 | Feature Flag |
| -------------- | ------------------------ | --------------------------------------------------- | ------- | ------- | ------------ |
| Array to Array | [bitround] | <https://codec.zarrs.dev/array_to_array/bitround> | &check; | &check; | bitround |
| Array to Bytes | [zfp] | <https://codec.zarrs.dev/array_to_bytes/zfp> | &check; | | zfp |
| | [zfpy] | zfpy || &check; | zfp |
| | [pcodec] | <https://codec.zarrs.dev/array_to_bytes/pcodec> | &check; | &check; | pcodec |
| | [vlen] | <https://codec.zarrs.dev/array_to_bytes/vlen> | &check; | | |
| | [vlen-array] | <https://codec.zarrs.dev/array_to_bytes/vlen_array> | &check; | &check; | |
| | [vlen-bytes] | <https://codec.zarrs.dev/array_to_bytes/vlen_bytes> | &check; | &check; | |
| | [vlen-utf8] | <https://codec.zarrs.dev/array_to_bytes/vlen_utf8> | &check; | &check; | |
| Bytes to Bytes | [bz2] | <https://codec.zarrs.dev/bytes_to_bytes/bz2> | &check; | &check; | bz2 |
| | [gdeflate] | <https://codec.zarrs.dev/bytes_to_bytes/gdeflate> | &check; | | gdeflate |
| | [fletcher32] | <https://codec.zarrs.dev/bytes_to_bytes/fletcher32> | &check; | &check; | fletcher32 |
| Codec Type | Codec | Default Name | V3 | V2 | Feature Flag |
| -------------- | ------------------------------ | --------------------------------------------------- | ------- | ------- | ------------ |
| Array to Array | [codec_bitround] | <https://codec.zarrs.dev/array_to_array/bitround> | &check; | &check; | bitround |
| Array to Bytes | [codec_zfp] | <https://codec.zarrs.dev/array_to_bytes/zfp> | &check; | | zfp |
| | [codec_zfpy] | zfpy || &check; | zfp |
| | [codec_pcodec] | <https://codec.zarrs.dev/array_to_bytes/pcodec> | &check; | &check; | pcodec |
| | [codec_vlen] | <https://codec.zarrs.dev/array_to_bytes/vlen> | &check; | | |
| | [codec_vlen-array] | <https://codec.zarrs.dev/array_to_bytes/vlen_array> | &check; | &check; | |
| | [codec_vlen-bytes] | <https://codec.zarrs.dev/array_to_bytes/vlen_bytes> | &check; | &check; | |
| | [codec_vlen-utf8] | <https://codec.zarrs.dev/array_to_bytes/vlen_utf8> | &check; | &check; | |
| Bytes to Bytes | [codec_bz2] | <https://codec.zarrs.dev/bytes_to_bytes/bz2> | &check; | &check; | bz2 |
| | [codec_gdeflate] | <https://codec.zarrs.dev/bytes_to_bytes/gdeflate> | &check; | | gdeflate |
| | [codec_fletcher32] | <https://codec.zarrs.dev/bytes_to_bytes/fletcher32> | &check; | &check; | fletcher32 |

[bitround]: (crate::array::codec::array_to_array::bitround)
[zfp]: crate::array::codec::array_to_bytes::zfp
[zfpy]: https://numcodecs.readthedocs.io/en/latest/compression/zfpy.html
[pcodec]: crate::array::codec::array_to_bytes::pcodec
[vlen]: crate::array::codec::array_to_bytes::vlen
[vlen-array]: crate::array::codec::array_to_bytes::vlen_array
[vlen-bytes]: crate::array::codec::array_to_bytes::vlen_bytes
[vlen-utf8]: crate::array::codec::array_to_bytes::vlen_utf8
[bz2]: crate::array::codec::bytes_to_bytes::bz2
[gdeflate]: crate::array::codec::bytes_to_bytes::gdeflate
[fletcher32]: crate::array::codec::bytes_to_bytes::fletcher32
[codec_bitround]: crate::array::codec::array_to_array::bitround
[codec_zfp]: crate::array::codec::array_to_bytes::zfp
[codec_zfpy]: https://numcodecs.readthedocs.io/en/latest/compression/zfpy.html
[codec_pcodec]: crate::array::codec::array_to_bytes::pcodec
[codec_vlen]: crate::array::codec::array_to_bytes::vlen
[codec_vlen-array]: crate::array::codec::array_to_bytes::vlen_array
[codec_vlen-bytes]: crate::array::codec::array_to_bytes::vlen_bytes
[codec_vlen-utf8]: crate::array::codec::array_to_bytes::vlen_utf8
[codec_bz2]: crate::array::codec::bytes_to_bytes::bz2
[codec_gdeflate]: crate::array::codec::bytes_to_bytes::gdeflate
[codec_fletcher32]: crate::array::codec::bytes_to_bytes::fletcher32
4 changes: 2 additions & 2 deletions zarrs/doc/status/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[r* (raw bits)] | [ZEP0001] | &check; | | |
| [bfloat16] | [zarr-specs #130] | &check; | | |
| [string] (experimental) | [ZEP0007 (draft)] | &check; | | |
| [bytes] (experimental) | [ZEP0007 (draft)] | &check; | | |
| [dtype_bytes] (experimental) | [ZEP0007 (draft)] | &check; | | |

<sup>† Experimental data types are recommended for evaluation only.</sup>

Expand All @@ -25,7 +25,7 @@
[bfloat16]: crate::data_type::DataType::BFloat16
[r* (raw bits)]: crate::data_type::DataType::RawBits
[string]: crate::data_type::DataType::String
[bytes]: crate::data_type::DataType::Bytes
[dtype_bytes]: crate::data_type::DataType::Bytes

[ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html
[zarr-specs #130]: https://github.com/zarr-developers/zarr-specs/issues/130
Expand Down
73 changes: 69 additions & 4 deletions zarrs/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ pub use chunk_cache::{
pub use array_sharded_ext::ArrayShardedExt;
#[cfg(feature = "sharding")]
pub use array_sync_sharded_readable_ext::{ArrayShardedReadableExt, ArrayShardedReadableExtCache};
use zarrs_metadata::v3::UnsupportedAdditionalFieldError;
// TODO: Add AsyncArrayShardedReadableExt and AsyncArrayShardedReadableExtCache

use crate::{
Expand Down Expand Up @@ -611,7 +612,7 @@ impl<TStorage: ?Sized> Array<TStorage> {
ArrayMetadata::V2(_metadata) => {
// NOTE: The codec related options in ArrayMetadataOptions do not impact V2 codecs
}
};
}

// Convert version
match (metadata, options.metadata_convert_version()) {
Expand Down Expand Up @@ -807,6 +808,22 @@ impl<TStorage: ?Sized> Array<TStorage> {
ArrayMetadata::V3(_) => Ok(self),
}
}

/// Reject the array if it contains additional fields with `"must_understand": true`.
fn validate_metadata(metadata: &ArrayMetadata) -> Result<(), ArrayCreateError> {
let additional_fields = match &metadata {
ArrayMetadata::V2(metadata) => &metadata.additional_fields,
ArrayMetadata::V3(metadata) => &metadata.additional_fields,
};
for (name, field) in additional_fields {
if field.must_understand() {
return Err(ArrayCreateError::UnsupportedAdditionalFieldError(
UnsupportedAdditionalFieldError::new(name.clone(), field.as_value().clone()),
));
}
}
Ok(())
}
}

#[cfg(feature = "ndarray")]
Expand Down Expand Up @@ -938,6 +955,7 @@ pub fn bytes_to_ndarray<T: bytemuck::Pod>(
mod tests {
use crate::storage::store::MemoryStore;
use zarrs_filesystem::FilesystemStore;
use zarrs_metadata::v3::AdditionalField;

use super::*;

Expand Down Expand Up @@ -1184,11 +1202,21 @@ mod tests {
#[allow(dead_code)]
fn array_v3_numcodecs(path_in: &str) {
{
use zarrs_metadata::v3::array::codec;
let mut config = crate::config::global_config_mut();
let experimental_codec_names = config.experimental_codec_names_mut();
experimental_codec_names.insert("zfp".to_string(), "numcodecs.zfpy".to_string());
experimental_codec_names.insert("pcodec".to_string(), "numcodecs.pcodec".to_string());
experimental_codec_names.insert("bz2".to_string(), "numcodecs.bz2".to_string());
experimental_codec_names.insert(
codec::zfp::IDENTIFIER.to_string(),
"numcodecs.zfpy".to_string(),
);
experimental_codec_names.insert(
codec::pcodec::IDENTIFIER.to_string(),
"numcodecs.pcodec".to_string(),
);
experimental_codec_names.insert(
codec::bz2::IDENTIFIER.to_string(),
"numcodecs.bz2".to_string(),
);
}

let store = Arc::new(FilesystemStore::new(path_in).unwrap());
Expand Down Expand Up @@ -1320,4 +1348,41 @@ mod tests {
// false,
// );
// }

#[test]
fn array_additional_fields() {
let store = Arc::new(MemoryStore::new());
let array_path = "/group/array";

for must_understand in [true, false] {
let additional_field = serde_json::Map::new();
let additional_field = AdditionalField::new(additional_field, must_understand);
let mut additional_fields = AdditionalFields::new();
additional_fields.insert("key".to_string(), additional_field);

// Permit array creation with manually added additional fields
let array = ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].try_into().unwrap(),
FillValue::from(ZARR_NAN_F32),
)
.bytes_to_bytes_codecs(vec![
#[cfg(feature = "gzip")]
Arc::new(codec::GzipCodec::new(5).unwrap()),
])
.additional_fields(additional_fields)
.build(store.clone(), array_path)
.unwrap();
array.store_metadata().unwrap();

let array = Array::open(store.clone(), array_path);
if must_understand {
// Disallow array opening with unknown `"must_understand": true` additional fields
assert!(array.is_err());
} else {
assert!(array.is_ok());
}
}
}
}
14 changes: 12 additions & 2 deletions zarrs/src/array/array_async_readable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + 'static> Array<TStorage> {
path: &str,
version: &MetadataRetrieveVersion,
) -> Result<Array<TStorage>, ArrayCreateError> {
let metadata = Self::async_open_metadata(storage.clone(), path, version).await?;
Self::validate_metadata(&metadata)?;
Self::new_with_metadata(storage, path, metadata)
}

async fn async_open_metadata(
storage: Arc<TStorage>,
path: &str,
version: &MetadataRetrieveVersion,
) -> Result<ArrayMetadata, ArrayCreateError> {
let node_path = NodePath::new(path)?;

if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version {
Expand All @@ -50,7 +60,7 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + 'static> Array<TStorage> {
if let Some(metadata) = storage.get(&key_v3).await? {
let metadata: ArrayMetadataV3 = serde_json::from_slice(&metadata)
.map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?;
return Self::new_with_metadata(storage, path, ArrayMetadata::V3(metadata));
return Ok(ArrayMetadata::V3(metadata));
}
}

Expand All @@ -69,7 +79,7 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits + 'static> Array<TStorage> {
})?;
}

return Self::new_with_metadata(storage, path, ArrayMetadata::V2(metadata));
return Ok(ArrayMetadata::V2(metadata));
}
}

Expand Down
20 changes: 1 addition & 19 deletions zarrs/src/array/array_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,25 +328,7 @@ impl ArrayBuilder {
.with_storage_transformers(self.storage_transformers.create_metadatas()),
);

Ok(Array {
storage,
path,
// shape: self.shape.clone(),
data_type: self.data_type.clone(),
chunk_grid: self.chunk_grid.clone(),
chunk_key_encoding: self.chunk_key_encoding.clone(),
fill_value: self.fill_value.clone(),
codecs: Arc::new(CodecChain::new(
self.array_to_array_codecs.clone(),
self.array_to_bytes_codec.clone(),
self.bytes_to_bytes_codecs.clone(),
)),
storage_transformers: self.storage_transformers.clone(),
// attributes: self.attributes.clone(),
dimension_names: self.dimension_names.clone(),
// additional_fields: self.additional_fields.clone(),
metadata: array_metadata,
})
Array::new_with_metadata(storage, path.as_str(), array_metadata)
}

/// Build into an [`Arc<Array>`].
Expand Down
14 changes: 12 additions & 2 deletions zarrs/src/array/array_sync_readable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ impl<TStorage: ?Sized + ReadableStorageTraits + 'static> Array<TStorage> {
path: &str,
version: &MetadataRetrieveVersion,
) -> Result<Self, ArrayCreateError> {
let metadata = Self::open_metadata(&storage, path, version)?;
Self::validate_metadata(&metadata)?;
Self::new_with_metadata(storage, path, metadata)
}

fn open_metadata(
storage: &Arc<TStorage>,
path: &str,
version: &MetadataRetrieveVersion,
) -> Result<ArrayMetadata, ArrayCreateError> {
let node_path = NodePath::new(path)?;

if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version {
Expand All @@ -55,7 +65,7 @@ impl<TStorage: ?Sized + ReadableStorageTraits + 'static> Array<TStorage> {
if let Some(metadata) = storage.get(&key_v3)? {
let metadata: ArrayMetadataV3 = serde_json::from_slice(&metadata)
.map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?;
return Self::new_with_metadata(storage, path, ArrayMetadata::V3(metadata));
return Ok(ArrayMetadata::V3(metadata));
}
}

Expand All @@ -74,7 +84,7 @@ impl<TStorage: ?Sized + ReadableStorageTraits + 'static> Array<TStorage> {
})?;
}

return Self::new_with_metadata(storage, path, ArrayMetadata::V2(metadata));
return Ok(ArrayMetadata::V2(metadata));
}
}

Expand Down
2 changes: 1 addition & 1 deletion zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl BytesCodec {
)));
}
}
};
}

if let Some(endian) = &self.endian {
if !endian.is_native() {
Expand Down
4 changes: 2 additions & 2 deletions zarrs/src/array/codec/array_to_bytes/codec_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ impl ArrayToBytesCodecTraits for CodecChain {

if Some(codec_index) == self.cache_index {
input_handle = Arc::new(BytesPartialDecoderCache::new(&*input_handle, options)?);
};
}

let mut input_handle = {
let array_representation = array_representations.last().unwrap();
Expand Down Expand Up @@ -537,7 +537,7 @@ impl ArrayToBytesCodecTraits for CodecChain {
if Some(codec_index) == self.cache_index {
input_handle =
Arc::new(BytesPartialDecoderCache::async_new(&*input_handle, options).await?);
};
}

let mut input_handle = {
let array_representation = array_representations.last().unwrap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn do_partial_decode<'a>(
super::IDENTIFIER.to_string(),
));
}
};
}
}
}
Ok(decoded_bytes)
Expand Down
Loading

0 comments on commit 324bdce

Please sign in to comment.