From 62702923afbd6ea2c3632d8a34bc360ea55e2ecc Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 22 Feb 2025 08:22:39 +1100 Subject: [PATCH] fix: Error on `{Array,Group}::[async_]open[_opt]` with additional fields with `"must_understand": true` --- CHANGELOG.md | 3 ++ zarrs/src/array.rs | 55 +++++++++++++++++++++++++ zarrs/src/array/array_async_readable.rs | 14 ++++++- zarrs/src/array/array_sync_readable.rs | 14 ++++++- zarrs/src/group.rs | 55 ++++++++++++++++++++++--- 5 files changed, 132 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d46d5555..23ae2057 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Bump `lru` to 0.13 - Use codec identifiers in the example for `experimental_codec_names` remapping +### Fixed +- Error on `{Array,Group}::[async_]open[_opt]` with additional fields with `"must_understand": true` + ## [0.19.2] - 2025-02-13 ### Changed diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs index d9412792..5df4c6be 100644 --- a/zarrs/src/array.rs +++ b/zarrs/src/array.rs @@ -92,6 +92,7 @@ pub use chunk_cache::{ pub use array_sharded_ext::ArrayShardedExt; #[cfg(feature = "sharding")] pub use array_sync_sharded_readable_ext::{ArrayShardedReadableExt, ArrayShardedReadableExtCache}; +use zarrs_metadata::v3::UnsupportedAdditionalFieldError; // TODO: Add AsyncArrayShardedReadableExt and AsyncArrayShardedReadableExtCache use crate::{ @@ -807,6 +808,22 @@ impl Array { ArrayMetadata::V3(_) => Ok(self), } } + + /// Reject the array if it contains additional fields with `"must_understand": true`. + fn validate_metadata(metadata: &ArrayMetadata) -> Result<(), ArrayCreateError> { + let additional_fields = match &metadata { + ArrayMetadata::V2(metadata) => &metadata.additional_fields, + ArrayMetadata::V3(metadata) => &metadata.additional_fields, + }; + for (name, field) in additional_fields { + if field.must_understand() { + return Err(ArrayCreateError::UnsupportedAdditionalFieldError( + UnsupportedAdditionalFieldError::new(name.clone(), field.as_value().clone()), + )); + } + } + Ok(()) + } } #[cfg(feature = "ndarray")] @@ -938,6 +955,7 @@ pub fn bytes_to_ndarray( mod tests { use crate::storage::store::MemoryStore; use zarrs_filesystem::FilesystemStore; + use zarrs_metadata::v3::AdditionalField; use super::*; @@ -1330,4 +1348,41 @@ mod tests { // false, // ); // } + + #[test] + fn array_additional_fields() { + let store = Arc::new(MemoryStore::new()); + let array_path = "/group/array"; + + for must_understand in [true, false] { + let additional_field = serde_json::Map::new(); + let additional_field = AdditionalField::new(additional_field, must_understand); + let mut additional_fields = AdditionalFields::new(); + additional_fields.insert("key".to_string(), additional_field); + + // Permit array creation with manually added additional fields + let array = ArrayBuilder::new( + vec![8, 8], // array shape + DataType::Float32, + vec![4, 4].try_into().unwrap(), + FillValue::from(ZARR_NAN_F32), + ) + .bytes_to_bytes_codecs(vec![ + #[cfg(feature = "gzip")] + Arc::new(codec::GzipCodec::new(5).unwrap()), + ]) + .additional_fields(additional_fields) + .build(store.clone(), array_path) + .unwrap(); + array.store_metadata().unwrap(); + + let array = Array::open(store.clone(), array_path); + if must_understand { + // Disallow array opening with unknown `"must_understand": true` additional fields + assert!(array.is_err()); + } else { + assert!(array.is_ok()); + } + } + } } diff --git a/zarrs/src/array/array_async_readable.rs b/zarrs/src/array/array_async_readable.rs index f418968e..96d9a59a 100644 --- a/zarrs/src/array/array_async_readable.rs +++ b/zarrs/src/array/array_async_readable.rs @@ -42,6 +42,16 @@ impl Array { path: &str, version: &MetadataRetrieveVersion, ) -> Result, ArrayCreateError> { + let metadata = Self::async_open_metadata(storage.clone(), path, version).await?; + Self::validate_metadata(&metadata)?; + Self::new_with_metadata(storage, path, metadata) + } + + async fn async_open_metadata( + storage: Arc, + path: &str, + version: &MetadataRetrieveVersion, + ) -> Result { let node_path = NodePath::new(path)?; if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version { @@ -50,7 +60,7 @@ impl Array { if let Some(metadata) = storage.get(&key_v3).await? { let metadata: ArrayMetadataV3 = serde_json::from_slice(&metadata) .map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?; - return Self::new_with_metadata(storage, path, ArrayMetadata::V3(metadata)); + return Ok(ArrayMetadata::V3(metadata)); } } @@ -69,7 +79,7 @@ impl Array { })?; } - return Self::new_with_metadata(storage, path, ArrayMetadata::V2(metadata)); + return Ok(ArrayMetadata::V2(metadata)); } } diff --git a/zarrs/src/array/array_sync_readable.rs b/zarrs/src/array/array_sync_readable.rs index d699e944..a5dde3aa 100644 --- a/zarrs/src/array/array_sync_readable.rs +++ b/zarrs/src/array/array_sync_readable.rs @@ -47,6 +47,16 @@ impl Array { path: &str, version: &MetadataRetrieveVersion, ) -> Result { + let metadata = Self::open_metadata(&storage, path, version)?; + Self::validate_metadata(&metadata)?; + Self::new_with_metadata(storage, path, metadata) + } + + fn open_metadata( + storage: &Arc, + path: &str, + version: &MetadataRetrieveVersion, + ) -> Result { let node_path = NodePath::new(path)?; if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version { @@ -55,7 +65,7 @@ impl Array { if let Some(metadata) = storage.get(&key_v3)? { let metadata: ArrayMetadataV3 = serde_json::from_slice(&metadata) .map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?; - return Self::new_with_metadata(storage, path, ArrayMetadata::V3(metadata)); + return Ok(ArrayMetadata::V3(metadata)); } } @@ -74,7 +84,7 @@ impl Array { })?; } - return Self::new_with_metadata(storage, path, ArrayMetadata::V2(metadata)); + return Ok(ArrayMetadata::V2(metadata)); } } diff --git a/zarrs/src/group.rs b/zarrs/src/group.rs index f3e8faf0..55cb60e3 100644 --- a/zarrs/src/group.rs +++ b/zarrs/src/group.rs @@ -202,6 +202,22 @@ impl Group { self } } + + /// Reject the group if it contains additional fields with `"must_understand": true`. + fn validate_metadata(metadata: &GroupMetadata) -> Result<(), GroupCreateError> { + let additional_fields = match &metadata { + GroupMetadata::V2(metadata) => &metadata.additional_fields, + GroupMetadata::V3(metadata) => &metadata.additional_fields, + }; + for (name, field) in additional_fields { + if field.must_understand() { + return Err(GroupCreateError::UnsupportedAdditionalFieldError( + UnsupportedAdditionalFieldError::new(name.clone(), field.as_value().clone()), + )); + } + } + Ok(()) + } } impl Group { @@ -224,15 +240,24 @@ impl Group { path: &str, version: &MetadataRetrieveVersion, ) -> Result { - let node_path = path.try_into()?; + let metadata = Self::open_metadata(&storage, path, version)?; + Self::validate_metadata(&metadata)?; + Self::new_with_metadata(storage, path, metadata) + } + fn open_metadata( + storage: &Arc, + path: &str, + version: &MetadataRetrieveVersion, + ) -> Result { + let node_path = path.try_into()?; if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version { // Try Zarr V3 let key_v3 = meta_key_v3(&node_path); if let Some(metadata) = storage.get(&key_v3)? { let metadata: GroupMetadataV3 = serde_json::from_slice(&metadata) .map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?; - return Self::new_with_metadata(storage, path, GroupMetadata::V3(metadata)); + return Ok(GroupMetadata::V3(metadata)); } } @@ -249,7 +274,7 @@ impl Group { StorageError::InvalidMetadata(attributes_key, err.to_string()) })?; } - return Self::new_with_metadata(storage, path, GroupMetadata::V2(metadata)); + return Ok(GroupMetadata::V2(metadata)); } } @@ -373,6 +398,16 @@ impl Group { path: &str, version: &MetadataRetrieveVersion, ) -> Result { + let metadata = Self::async_open_metadata(storage.clone(), path, version).await?; + Self::validate_metadata(&metadata)?; + Self::new_with_metadata(storage, path, metadata) + } + + async fn async_open_metadata( + storage: Arc, + path: &str, + version: &MetadataRetrieveVersion, + ) -> Result { let node_path = path.try_into()?; if let MetadataRetrieveVersion::Default | MetadataRetrieveVersion::V3 = version { @@ -381,7 +416,7 @@ impl Group { if let Some(metadata) = storage.get(&key_v3).await? { let metadata: GroupMetadataV3 = serde_json::from_slice(&metadata) .map_err(|err| StorageError::InvalidMetadata(key_v3, err.to_string()))?; - return Self::new_with_metadata(storage, path, GroupMetadata::V3(metadata)); + return Ok(GroupMetadata::V3(metadata)); } } @@ -398,7 +433,7 @@ impl Group { StorageError::InvalidMetadata(attributes_key, err.to_string()) })?; } - return Self::new_with_metadata(storage, path, GroupMetadata::V2(metadata)); + return Ok(GroupMetadata::V2(metadata)); } } @@ -773,6 +808,16 @@ mod tests { .get("unknown") .unwrap() .must_understand()); + + // Permit manual creation of group with unsupported metadata + let storage = Arc::new(MemoryStore::new()); + let group = + Group::new_with_metadata(storage.clone(), "/", group_metadata.clone().into()).unwrap(); + group.store_metadata().unwrap(); + + // Group opening should fail with unsupported metadata + let group = Group::open(storage, "/"); + assert!(group.is_err()); } #[test]