diff --git a/zarrs/src/array/array_builder.rs b/zarrs/src/array/array_builder.rs index 7ea6d8ba..656f1e87 100644 --- a/zarrs/src/array/array_builder.rs +++ b/zarrs/src/array/array_builder.rs @@ -232,8 +232,7 @@ impl ArrayBuilder { /// Set additional fields not defined in the Zarr specification. /// Use this cautiously. In general, store user defined attributes using [`ArrayBuilder::attributes`]. /// - /// Note that array metadata must not contain any additional fields, unless they are annotated with `"must_understand": false`. - /// `zarrs` will error when opening an array with additional fields without this annotation. + /// `zarrs` and other implementations are expected to error when opening an array with unsupported additional fields, unless they are a JSON object containing `"must_understand": false`. pub fn additional_fields(&mut self, additional_fields: AdditionalFields) -> &mut Self { self.additional_fields = additional_fields; self diff --git a/zarrs/src/group.rs b/zarrs/src/group.rs index 54d04380..f3e8faf0 100644 --- a/zarrs/src/group.rs +++ b/zarrs/src/group.rs @@ -754,8 +754,8 @@ mod tests { } #[test] - fn group_metadata_invalid_additional_field() { - let group_metadata = serde_json::from_str::( + fn group_metadata_unknown_additional_field() { + let group_metadata = serde_json::from_str::( r#"{ "zarr_format": 3, "node_type": "group", @@ -763,10 +763,16 @@ mod tests { "spam": "ham", "eggs": 42 }, - "unknown": "fail" + "unknown": "unsupported" }"#, - ); - assert!(group_metadata.is_err()); + ) + .unwrap(); + assert!(group_metadata.additional_fields.len() == 1); + assert!(group_metadata + .additional_fields + .get("unknown") + .unwrap() + .must_understand()); } #[test] diff --git a/zarrs/src/group/group_builder.rs b/zarrs/src/group/group_builder.rs index 36d13abd..8b0206c8 100644 --- a/zarrs/src/group/group_builder.rs +++ b/zarrs/src/group/group_builder.rs @@ -44,8 +44,7 @@ impl GroupBuilder { /// Set additional fields not defined in the Zarr specification. /// Use this cautiously. In general, store user defined attributes using [`GroupBuilder::attributes`]. /// - /// Note that array metadata must not contain any additional fields, unless they are annotated with `"must_understand": false`. - /// `zarrs` will error when opening an array with additional fields without this annotation. + /// `zarrs` and other implementations are expected to error when opening a group with unsupported additional fields, unless they are a JSON object containing `"must_understand": false`. pub fn additional_fields(&mut self, additional_fields: AdditionalFields) -> &mut Self { match &mut self.metadata { GroupMetadata::V3(metadata) => metadata.additional_fields = additional_fields, diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 45712372..8f52bc25 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add `UnsupportedAdditionalFieldError::new` + +### Fixed +- Make `AdditionalField` public and permit any JSON type (not just objects) + ## [0.3.3] - 2025-02-06 ### Fixed diff --git a/zarrs_metadata/src/lib.rs b/zarrs_metadata/src/lib.rs index fadc3b2e..16577efd 100644 --- a/zarrs_metadata/src/lib.rs +++ b/zarrs_metadata/src/lib.rs @@ -75,7 +75,7 @@ pub enum NodeMetadata { #[cfg(test)] mod tests { use super::*; - use v3::{AdditionalFields, MetadataV3}; + use v3::{AdditionalField, AdditionalFields, MetadataV3}; #[test] fn metadata() { @@ -111,14 +111,27 @@ mod tests { } #[test] - fn additional_fields_auto() { - let mut additional_fields = AdditionalFields::new(); + fn additional_fields_constructors() { let additional_field = serde_json::Map::new(); - additional_fields.insert("key".to_string(), additional_field.into()); - assert!(!additional_fields.contains_key("must_understand")); - assert!(serde_json::to_string(&additional_fields) - .unwrap() - .contains(r#""must_understand":false"#)); + let additional_field: AdditionalField = additional_field.into(); + assert!(additional_field.must_understand()); + assert!( + additional_field.as_value() == &serde_json::Value::Object(serde_json::Map::default()) + ); + assert!(serde_json::to_string(&additional_field).unwrap() == r#"{"must_understand":true}"#); + + let additional_field: AdditionalField = AdditionalField::new("test", true); + assert!(additional_field.must_understand()); + assert!(additional_field.as_value() == &serde_json::Value::String("test".to_string())); + assert!(serde_json::to_string(&additional_field).unwrap() == r#""test""#); + + let additional_field: AdditionalField = AdditionalField::new(123, false); + assert!(!additional_field.must_understand()); + assert!( + additional_field.as_value() + == &serde_json::Value::Number(serde_json::Number::from(123)) + ); + assert!(serde_json::to_string(&additional_field).unwrap() == "123"); } #[test] @@ -127,20 +140,23 @@ mod tests { "unknown_field": { "key": "value", "must_understand": false - } - }"#; - let additional_fields = serde_json::from_str::(json); - assert!(additional_fields.is_ok()); - } - - #[test] - fn additional_fields_invalid() { - let json = r#"{ - "unknown_field": { + }, + "unsupported_field_1": { + "key": "value", + "must_understand": true + }, + "unsupported_field_2": { "key": "value" - } + }, + "unsupported_field_3": [], + "unsupported_field_4": "test" }"#; - let additional_fields = serde_json::from_str::(json); - assert!(additional_fields.is_err()); + let additional_fields = serde_json::from_str::(json).unwrap(); + assert!(additional_fields.len() == 5); + assert!(!additional_fields["unknown_field"].must_understand()); + assert!(additional_fields["unsupported_field_1"].must_understand()); + assert!(additional_fields["unsupported_field_2"].must_understand()); + assert!(additional_fields["unsupported_field_3"].must_understand()); + assert!(additional_fields["unsupported_field_4"].must_understand()); } } diff --git a/zarrs_metadata/src/v3.rs b/zarrs_metadata/src/v3.rs index d67b0b01..960e3c58 100644 --- a/zarrs_metadata/src/v3.rs +++ b/zarrs_metadata/src/v3.rs @@ -9,8 +9,8 @@ pub use group::GroupMetadataV3; mod metadata; pub use metadata::{ - AdditionalFields, ConfigurationInvalidError, MetadataConfiguration, MetadataV3, - UnsupportedAdditionalFieldError, + AdditionalField, AdditionalFields, ConfigurationInvalidError, MetadataConfiguration, + MetadataV3, UnsupportedAdditionalFieldError, }; /// V3 node metadata ([`ArrayMetadataV3`] or [`GroupMetadataV3`]). diff --git a/zarrs_metadata/src/v3/metadata.rs b/zarrs_metadata/src/v3/metadata.rs index edcafa6d..bcc4279f 100644 --- a/zarrs_metadata/src/v3/metadata.rs +++ b/zarrs_metadata/src/v3/metadata.rs @@ -1,5 +1,6 @@ use derive_more::From; use serde::{de::DeserializeOwned, ser::SerializeMap, Deserialize, Serialize}; +use serde_json::Value; use thiserror::Error; /// Metadata with a name and optional configuration. @@ -33,7 +34,7 @@ pub struct MetadataV3 { } /// Configuration metadata. -pub type MetadataConfiguration = serde_json::Map; +pub type MetadataConfiguration = serde_json::Map; impl TryFrom<&str> for MetadataV3 { type Error = serde_json::Error; @@ -138,7 +139,7 @@ impl MetadataV3 { configuration: &TConfiguration, ) -> Result { let configuration = serde_json::to_value(configuration)?; - if let serde_json::Value::Object(configuration) = configuration { + if let Value::Object(configuration) = configuration { Ok(Self::new_with_configuration(name, configuration)) } else { Err(serde::ser::Error::custom( @@ -212,6 +213,7 @@ impl ConfigurationInvalidError { } } +// FIXME: Move to `zarrs` itself in 0.4.0 /// An unsupported additional field error. /// /// An unsupported field in array or group metadata is an unrecognised field without `"must_understand": false`. @@ -219,10 +221,16 @@ impl ConfigurationInvalidError { #[error("unsupported additional field {name} with value {value}")] pub struct UnsupportedAdditionalFieldError { name: String, - value: serde_json::Value, + value: Value, } impl UnsupportedAdditionalFieldError { + /// Create a new [`UnsupportedAdditionalFieldError`]. + #[must_use] + pub fn new(name: String, value: Value) -> UnsupportedAdditionalFieldError { + Self { name, value } + } + /// Return the name of the unsupported additional field. #[must_use] pub fn name(&self) -> &str { @@ -231,54 +239,110 @@ impl UnsupportedAdditionalFieldError { /// Return the value of the unsupported additional field. #[must_use] - pub const fn value(&self) -> &serde_json::Value { + pub const fn value(&self) -> &Value { &self.value } } /// An additional field in array or group metadata. /// -/// Must be an object with a `"must_understand": false` field. -#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Default, From)] +/// A field that is not recognised / supported by `zarrs` will be considered an additional field. +/// Additional fields can be any JSON type. +/// An array / group cannot be created with an additional field, unless the additional field is an object with a `"must_understand": false` field. +/// +/// ### Example additional field JSON +/// ```json +// "unknown_field": { +// "key": "value", +// "must_understand": false +// }, +// "unsupported_field_1": { +// "key": "value", +// "must_understand": true +// }, +// "unsupported_field_2": { +// "key": "value" +// }, +// "unsupported_field_3": [], +// "unsupported_field_4": "test" +/// ``` +#[derive(Clone, Eq, PartialEq, Debug, Default)] pub struct AdditionalField { - must_understand: monostate::MustBe!(false), - #[serde(flatten)] - fields: serde_json::Map, + field: Value, + must_understand: bool, } impl AdditionalField { - /// Return the underlying map. + /// Create a new additional field. + #[must_use] + pub fn new(field: impl Into, must_understand: bool) -> AdditionalField { + Self { + field: field.into(), + must_understand, + } + } + + /// Return the underlying value. + #[must_use] + pub const fn as_value(&self) -> &Value { + &self.field + } + + /// Return the `must_understand` component of the additional field. #[must_use] - pub const fn as_map(&self) -> &serde_json::Map { - &self.fields + pub const fn must_understand(&self) -> bool { + self.must_understand } } -impl From for serde_json::Map { - fn from(value: AdditionalField) -> Self { - value.fields +impl Serialize for AdditionalField { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match &self.field { + Value::Object(object) => { + let mut map = serializer.serialize_map(Some(object.len() + 1))?; + map.serialize_entry("must_understand", &Value::Bool(self.must_understand))?; + for (k, v) in object { + map.serialize_entry(k, v)?; + } + map.end() + } + _ => self.field.serialize(serializer), + } + } +} + +impl<'de> serde::Deserialize<'de> for AdditionalField { + fn deserialize>(d: D) -> Result { + let value = Value::deserialize(d)?; + Ok(value.into()) } } -impl From> for AdditionalField { - fn from(value: serde_json::Map) -> Self { +impl From for AdditionalField +where + T: Into, +{ + fn from(field: T) -> Self { + let mut value: Value = field.into(); + let must_understand = if let Some(object) = value.as_object_mut() { + if let Some(Value::Bool(must_understand)) = object.remove("must_understand") { + must_understand + } else { + true + } + } else { + true + }; Self { - must_understand: monostate::MustBe!(false), - fields: value, + must_understand, + field: value, } } } /// Additional fields in array or group metadata. -/// -/// Additional fields are a JSON object with a `"must_understand": false` key-value pair. -/// -/// ### Example additional field JSON -/// ```json -/// "unknown_field": { -/// "key": "value", -/// "must_understand": false -/// } -/// ``` -// NOTE: It would be nice if this was just a serde_json::Map, but it only has implementations for ``. +// NOTE: It would be nice if this was just a serde_json::Map, but it only has implementations for ``. pub type AdditionalFields = std::collections::BTreeMap; diff --git a/zarrs_metadata/tests/extensions_zep_9.rs b/zarrs_metadata/tests/extensions_zep_9.rs new file mode 100644 index 00000000..4c623844 --- /dev/null +++ b/zarrs_metadata/tests/extensions_zep_9.rs @@ -0,0 +1,56 @@ +#![allow(missing_docs)] + +use zarrs_metadata::v3::ArrayMetadataV3; + +#[test] +fn array_extensions() { + let json = r#"{ + "zarr_format": 3, + "node_type": "array", + "data_type": "https://example.com/zarr/string", + "fill_value": "", + "chunk_key_encoding": { + "name": "default", + "configuration": { "separator": "." } + }, + "codecs": [ + { + "name": "https://numcodecs.dev/vlen-utf8" + }, + { + "name": "zstd", + "configuration": {} + } + ], + "chunk_grid": { + "name": "regular", + "configuration": { "chunk_shape": [ 32 ] } + }, + "shape": [ 128 ], + "dimension_names": [ "x" ], + "attributes": {}, + "storage_transformers": [], + "extensions": [ + { + "name": "https://example.com/zarr/offset", + "configuration": { "offset": [ 12 ] } + }, + { + "name": "https://example.com/zarr/array-statistics", + "configuration": { + "min": 5, + "max": 12 + }, + "must_understand": false + }, + { + "name": "https://example.com/zarr/consolidated-metadata", + "configuration": {}, + "must_understand": false + } + ] +}"#; + + let metadata: ArrayMetadataV3 = serde_json::from_str(&json).unwrap(); + assert_eq!(metadata.data_type.name(), "https://example.com/zarr/string"); +}