diff --git a/CHANGELOG.md b/CHANGELOG.md index f5fd8b32..39cf709b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `Group::[set_]consolidated_metadata` - Add `Node::consolidate_metadata` - Consolidated metadata is not currently used to optimise node hierarchy requests +- Add experimental `fletcher32` checksum codec based on the numcodecs implementation + - Adds `fletcher32` feature flag ### Changed - **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt` diff --git a/zarrs/Cargo.toml b/zarrs/Cargo.toml index 36c494d5..e4de0878 100644 --- a/zarrs/Cargo.toml +++ b/zarrs/Cargo.toml @@ -20,6 +20,7 @@ bitround = [] # Enable the experimental bitround codec blosc = ["dep:blosc-sys"] # Enable the blosc codec bz2 = ["dep:bzip2"] # Enable the experimental bz2 codec crc32c = ["dep:crc32c"] # Enable the crc32c checksum codec +fletcher32 = [] # Enable the fletcher32 checksum codec gdeflate = ["dep:gdeflate-sys"] # Enable the experimental gdeflate codec gzip = ["dep:flate2"] # Enable the gzip codec pcodec = ["dep:pco"] # Enable the experimental pcodec codec diff --git a/zarrs/doc/status/codecs_experimental.md b/zarrs/doc/status/codecs_experimental.md index f1ec44f1..bdc7a136 100644 --- a/zarrs/doc/status/codecs_experimental.md +++ b/zarrs/doc/status/codecs_experimental.md @@ -13,6 +13,7 @@ This is configurable with [`Config::experimental_codec_names_mut`](config::Confi | | [vlen-utf8] | | ✓ | ✓ | | | Bytes to Bytes | [bz2] | | ✓ | ✓ | bz2 | | | [gdeflate] | | ✓ | | gdeflate | +| | [fletcher32] | | ✓ | ✓ | fletcher32 | [bitround]: (crate::array::codec::array_to_array::bitround) [zfp]: crate::array::codec::array_to_bytes::zfp @@ -23,3 +24,4 @@ This is configurable with [`Config::experimental_codec_names_mut`](config::Confi [vlen-utf8]: crate::array::codec::array_to_bytes::vlen_utf8 [bz2]: crate::array::codec::bytes_to_bytes::bz2 [gdeflate]: crate::array::codec::bytes_to_bytes::gdeflate +[fletcher32]: crate::array::codec::bytes_to_bytes::fletcher32 diff --git a/zarrs/src/array/codec/bytes_to_bytes.rs b/zarrs/src/array/codec/bytes_to_bytes.rs index 9849a367..f28791b6 100644 --- a/zarrs/src/array/codec/bytes_to_bytes.rs +++ b/zarrs/src/array/codec/bytes_to_bytes.rs @@ -6,6 +6,8 @@ pub mod blosc; pub mod bz2; #[cfg(feature = "crc32c")] pub mod crc32c; +#[cfg(feature = "fletcher32")] +pub mod fletcher32; #[cfg(feature = "gdeflate")] pub mod gdeflate; #[cfg(feature = "gzip")] @@ -15,3 +17,6 @@ pub mod zstd; #[cfg(test)] pub mod test_unbounded; + +#[cfg(any(feature = "crc32c", feature = "fletcher32"))] +mod strip_suffix_partial_decoder; diff --git a/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs b/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs index 89b6724b..e95ff365 100644 --- a/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs +++ b/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs @@ -5,7 +5,6 @@ //! See . mod crc32c_codec; -mod crc32c_partial_decoder; use std::sync::Arc; diff --git a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs index 3233dd5d..7e88534b 100644 --- a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs +++ b/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs @@ -3,6 +3,7 @@ use std::{borrow::Cow, sync::Arc}; use crate::{ array::{ codec::{ + bytes_to_bytes::strip_suffix_partial_decoder::StripSuffixPartialDecoder, BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits, BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency, }, @@ -14,10 +15,10 @@ use crate::{ #[cfg(feature = "async")] use crate::array::codec::AsyncBytesPartialDecoderTraits; -use super::{ - crc32c_partial_decoder, Crc32cCodecConfiguration, Crc32cCodecConfigurationV1, CHECKSUM_SIZE, - IDENTIFIER, -}; +#[cfg(feature = "async")] +use crate::array::codec::bytes_to_bytes::strip_suffix_partial_decoder::AsyncStripSuffixPartialDecoder; + +use super::{Crc32cCodecConfiguration, Crc32cCodecConfigurationV1, CHECKSUM_SIZE, IDENTIFIER}; /// A `crc32c` (CRC32C checksum) codec implementation. #[derive(Clone, Debug, Default)] @@ -106,8 +107,9 @@ impl BytesToBytesCodecTraits for Crc32cCodec { _decoded_representation: &BytesRepresentation, _options: &CodecOptions, ) -> Result, CodecError> { - Ok(Arc::new(crc32c_partial_decoder::Crc32cPartialDecoder::new( + Ok(Arc::new(StripSuffixPartialDecoder::new( input_handle, + CHECKSUM_SIZE, ))) } @@ -133,9 +135,10 @@ impl BytesToBytesCodecTraits for Crc32cCodec { _decoded_representation: &BytesRepresentation, _options: &CodecOptions, ) -> Result, CodecError> { - Ok(Arc::new( - crc32c_partial_decoder::AsyncCrc32cPartialDecoder::new(input_handle), - )) + Ok(Arc::new(AsyncStripSuffixPartialDecoder::new( + input_handle, + CHECKSUM_SIZE, + ))) } fn compute_encoded_size( @@ -144,10 +147,10 @@ impl BytesToBytesCodecTraits for Crc32cCodec { ) -> BytesRepresentation { match decoded_representation { BytesRepresentation::FixedSize(size) => { - BytesRepresentation::FixedSize(size + core::mem::size_of::() as u64) + BytesRepresentation::FixedSize(size + CHECKSUM_SIZE as u64) } BytesRepresentation::BoundedSize(size) => { - BytesRepresentation::BoundedSize(size + core::mem::size_of::() as u64) + BytesRepresentation::BoundedSize(size + CHECKSUM_SIZE as u64) } BytesRepresentation::UnboundedSize => BytesRepresentation::UnboundedSize, } diff --git a/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs b/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs new file mode 100644 index 00000000..044509d9 --- /dev/null +++ b/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs @@ -0,0 +1,189 @@ +//! The `fletcher32` bytes to bytes codec. +//! +//! Appends a fletcher32 checksum of the input bytestream. +//! +//! This is based on the `numcodecs` implementation. +//! See . +//! +//!
+//! This codec is experimental and may be incompatible with other Zarr V3 implementations. +//!
+//! +//! This codec requires the `fletcher32` feature, which is disabled by default. +//! +//! See [`Fletcher32CodecConfigurationV1`] for example `JSON` metadata. + +mod fletcher32_codec; + +use std::sync::Arc; + +pub use crate::metadata::v3::array::codec::fletcher32::{ + Fletcher32CodecConfiguration, Fletcher32CodecConfigurationV1, +}; +pub use fletcher32_codec::Fletcher32Codec; + +use crate::{ + array::codec::{Codec, CodecPlugin}, + metadata::v3::{array::codec::fletcher32, MetadataV3}, + plugin::{PluginCreateError, PluginMetadataInvalidError}, +}; + +pub use fletcher32::IDENTIFIER; + +// Register the codec. +inventory::submit! { + CodecPlugin::new(IDENTIFIER, is_name_fletcher32, create_codec_fletcher32) +} + +fn is_name_fletcher32(name: &str) -> bool { + name.eq(IDENTIFIER) +} + +pub(crate) fn create_codec_fletcher32(metadata: &MetadataV3) -> Result { + let configuration = metadata + .to_configuration() + .map_err(|_| PluginMetadataInvalidError::new(IDENTIFIER, "codec", metadata.clone()))?; + let codec = Arc::new(Fletcher32Codec::new_with_configuration(&configuration)); + Ok(Codec::BytesToBytes(codec)) +} + +const CHECKSUM_SIZE: usize = core::mem::size_of::(); + +#[cfg(test)] +mod tests { + use std::{borrow::Cow, sync::Arc}; + + use crate::{ + array::{ + codec::{BytesToBytesCodecTraits, CodecOptions, CodecTraits}, + BytesRepresentation, + }, + byte_range::ByteRange, + }; + + use super::*; + + const JSON1: &str = r#"{}"#; + + #[test] + fn codec_fletcher32_configuration_none() { + let codec_configuration: Fletcher32CodecConfiguration = + serde_json::from_str(r#"{}"#).unwrap(); + let codec = Fletcher32Codec::new_with_configuration(&codec_configuration); + let metadata = codec.create_metadata().unwrap(); + assert_eq!( + serde_json::to_string(&metadata).unwrap(), + r#"{"name":"fletcher32"}"# + ); + } + + #[test] + fn codec_fletcher32() { + let elements: Vec = (0..6).collect(); + let bytes = elements; + let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64); + + let codec_configuration: Fletcher32CodecConfiguration = + serde_json::from_str(JSON1).unwrap(); + let codec = Fletcher32Codec::new_with_configuration(&codec_configuration); + + let encoded = codec + .encode(Cow::Borrowed(&bytes), &CodecOptions::default()) + .unwrap(); + let decoded = codec + .decode( + encoded.clone(), + &bytes_representation, + &CodecOptions::default(), + ) + .unwrap(); + assert_eq!(bytes, decoded.to_vec()); + + // Check that the checksum is correct + let checksum: &[u8; 4] = &encoded + [encoded.len() - core::mem::size_of::()..encoded.len()] + .try_into() + .unwrap(); + println!("checksum {checksum:?}"); + assert_eq!(checksum, &[9, 6, 14, 8]); // TODO: CHECK + } + + #[test] + fn codec_fletcher32_partial_decode() { + let elements: Vec = (0..32).collect(); + let bytes = elements; + let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64); + + let codec_configuration: Fletcher32CodecConfiguration = + serde_json::from_str(JSON1).unwrap(); + let codec = Arc::new(Fletcher32Codec::new_with_configuration( + &codec_configuration, + )); + + let encoded = codec + .encode(Cow::Owned(bytes), &CodecOptions::default()) + .unwrap(); + let decoded_regions = [ByteRange::FromStart(3, Some(2))]; + let input_handle = Arc::new(std::io::Cursor::new(encoded)); + let partial_decoder = codec + .partial_decoder( + input_handle, + &bytes_representation, + &CodecOptions::default(), + ) + .unwrap(); + let decoded_partial_chunk = partial_decoder + .partial_decode(&decoded_regions, &CodecOptions::default()) + .unwrap() + .unwrap(); + let answer: &[Vec] = &[vec![3, 4]]; + assert_eq!( + answer, + decoded_partial_chunk + .into_iter() + .map(|v| v.to_vec()) + .collect::>() + ); + } + + #[cfg(feature = "async")] + #[tokio::test] + async fn codec_fletcher32_async_partial_decode() { + let elements: Vec = (0..32).collect(); + let bytes = elements; + let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64); + + let codec_configuration: Fletcher32CodecConfiguration = + serde_json::from_str(JSON1).unwrap(); + let codec = Arc::new(Fletcher32Codec::new_with_configuration( + &codec_configuration, + )); + + let encoded = codec + .encode(Cow::Owned(bytes), &CodecOptions::default()) + .unwrap(); + let decoded_regions = [ByteRange::FromStart(3, Some(2))]; + let input_handle = Arc::new(std::io::Cursor::new(encoded)); + let partial_decoder = codec + .async_partial_decoder( + input_handle, + &bytes_representation, + &CodecOptions::default(), + ) + .await + .unwrap(); + let decoded_partial_chunk = partial_decoder + .partial_decode(&decoded_regions, &CodecOptions::default()) + .await + .unwrap() + .unwrap(); + let answer: &[Vec] = &[vec![3, 4]]; + assert_eq!( + answer, + decoded_partial_chunk + .into_iter() + .map(|v| v.to_vec()) + .collect::>() + ); + } +} diff --git a/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs new file mode 100644 index 00000000..04e319f6 --- /dev/null +++ b/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs @@ -0,0 +1,199 @@ +use std::{borrow::Cow, sync::Arc}; + +use num::Integer; + +use crate::{ + array::{ + codec::{ + bytes_to_bytes::strip_suffix_partial_decoder::StripSuffixPartialDecoder, + BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits, + BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency, + }, + ArrayMetadataOptions, BytesRepresentation, RawBytes, + }, + metadata::v3::MetadataV3, +}; + +#[cfg(feature = "async")] +use crate::array::codec::AsyncBytesPartialDecoderTraits; + +#[cfg(feature = "async")] +use crate::array::codec::bytes_to_bytes::strip_suffix_partial_decoder::AsyncStripSuffixPartialDecoder; + +use super::{ + Fletcher32CodecConfiguration, Fletcher32CodecConfigurationV1, CHECKSUM_SIZE, IDENTIFIER, +}; + +/// A `fletcher32` codec implementation. +#[derive(Clone, Debug, Default)] +pub struct Fletcher32Codec; + +impl Fletcher32Codec { + /// Create a new `fletcher32` codec. + #[must_use] + pub const fn new() -> Self { + Self {} + } + + /// Create a new `fletcher32` codec. + #[must_use] + pub const fn new_with_configuration(_configuration: &Fletcher32CodecConfiguration) -> Self { + Self {} + } +} + +impl CodecTraits for Fletcher32Codec { + fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option { + let configuration = Fletcher32CodecConfigurationV1 {}; + Some(MetadataV3::new_with_serializable_configuration(IDENTIFIER, &configuration).unwrap()) + } + + fn partial_decoder_should_cache_input(&self) -> bool { + false + } + + fn partial_decoder_decodes_all(&self) -> bool { + false + } +} + +/// HDF5 Fletcher32. +/// +/// Based on . +fn h5_checksum_fletcher32(data: &[u8]) -> u32 { + let mut len = data.len() / 2; + let mut sum1: u32 = 0; + let mut sum2: u32 = 0; + + // Compute checksum for pairs of bytes + let mut data_idx = 0; + while len > 0 { + let tlen = len.min(360); + len -= tlen; + for _ in 0..tlen { + sum1 += u32::from((u16::from(data[data_idx]) << 8u16) | u16::from(data[data_idx + 1])); + data_idx += 2; + sum2 += sum1; + } + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + } + + // Check for odd # of bytes + if len.is_odd() { + sum1 += u32::from(u16::from(data[data_idx]) << 8); + sum2 += sum1; + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + } + + // Second reduction step to reduce sums to 16 bits + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + + (sum2 << 16) | sum1 +} + +#[cfg_attr(feature = "async", async_trait::async_trait)] +impl BytesToBytesCodecTraits for Fletcher32Codec { + fn dynamic(self: Arc) -> Arc { + self as Arc + } + + fn recommended_concurrency( + &self, + _decoded_representation: &BytesRepresentation, + ) -> Result { + Ok(RecommendedConcurrency::new_maximum(1)) + } + + fn encode<'a>( + &self, + decoded_value: RawBytes<'a>, + _options: &CodecOptions, + ) -> Result, CodecError> { + let checksum = h5_checksum_fletcher32(&decoded_value).to_le_bytes(); + let mut encoded_value: Vec = Vec::with_capacity(decoded_value.len() + checksum.len()); + encoded_value.extend_from_slice(&decoded_value); + encoded_value.extend_from_slice(&checksum); + Ok(Cow::Owned(encoded_value)) + } + + fn decode<'a>( + &self, + encoded_value: RawBytes<'a>, + _decoded_representation: &BytesRepresentation, + options: &CodecOptions, + ) -> Result, CodecError> { + if encoded_value.len() >= CHECKSUM_SIZE { + if options.validate_checksums() { + let decoded_value = &encoded_value[..encoded_value.len() - CHECKSUM_SIZE]; + let checksum = h5_checksum_fletcher32(decoded_value).to_le_bytes(); + if checksum != encoded_value[encoded_value.len() - CHECKSUM_SIZE..] { + return Err(CodecError::InvalidChecksum); + } + } + let decoded_value = encoded_value[..encoded_value.len() - CHECKSUM_SIZE].to_vec(); + Ok(Cow::Owned(decoded_value)) + } else { + Err(CodecError::Other( + "fletcher32 decoder expects a 32 bit input".to_string(), + )) + } + } + + fn partial_decoder( + self: Arc, + input_handle: Arc, + _decoded_representation: &BytesRepresentation, + _options: &CodecOptions, + ) -> Result, CodecError> { + Ok(Arc::new(StripSuffixPartialDecoder::new( + input_handle, + CHECKSUM_SIZE, + ))) + } + + fn partial_encoder( + self: Arc, + input_handle: Arc, + output_handle: Arc, + decoded_representation: &BytesRepresentation, + _options: &CodecOptions, + ) -> Result, CodecError> { + Ok(Arc::new(BytesPartialEncoderDefault::new( + input_handle, + output_handle, + *decoded_representation, + self, + ))) + } + + #[cfg(feature = "async")] + async fn async_partial_decoder( + self: Arc, + input_handle: Arc, + _decoded_representation: &BytesRepresentation, + _options: &CodecOptions, + ) -> Result, CodecError> { + Ok(Arc::new(AsyncStripSuffixPartialDecoder::new( + input_handle, + CHECKSUM_SIZE, + ))) + } + + fn compute_encoded_size( + &self, + decoded_representation: &BytesRepresentation, + ) -> BytesRepresentation { + match decoded_representation { + BytesRepresentation::FixedSize(size) => { + BytesRepresentation::FixedSize(size + CHECKSUM_SIZE as u64) + } + BytesRepresentation::BoundedSize(size) => { + BytesRepresentation::BoundedSize(size + CHECKSUM_SIZE as u64) + } + BytesRepresentation::UnboundedSize => BytesRepresentation::UnboundedSize, + } + } +} diff --git a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs b/zarrs/src/array/codec/bytes_to_bytes/strip_suffix_partial_decoder.rs similarity index 66% rename from zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs rename to zarrs/src/array/codec/bytes_to_bytes/strip_suffix_partial_decoder.rs index 10814b2a..23b9fac7 100644 --- a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_partial_decoder.rs +++ b/zarrs/src/array/codec/bytes_to_bytes/strip_suffix_partial_decoder.rs @@ -11,21 +11,26 @@ use crate::{ #[cfg(feature = "async")] use crate::array::codec::AsyncBytesPartialDecoderTraits; -use super::CHECKSUM_SIZE; - -/// Partial decoder for the `crc32c` (CRC32C checksum) codec. -pub(crate) struct Crc32cPartialDecoder { +/// Partial decoder for stripping a suffix (e.g. checksum). +pub(crate) struct StripSuffixPartialDecoder { input_handle: Arc, + suffix_size: usize, } -impl Crc32cPartialDecoder { - /// Create a new partial decoder for the `crc32c` codec. - pub(crate) fn new(input_handle: Arc) -> Self { - Self { input_handle } +impl StripSuffixPartialDecoder { + /// Create a new "strip suffix" partial decoder. + pub(crate) fn new( + input_handle: Arc, + suffix_size: usize, + ) -> Self { + Self { + input_handle, + suffix_size, + } } } -impl BytesPartialDecoderTraits for Crc32cPartialDecoder { +impl BytesPartialDecoderTraits for StripSuffixPartialDecoder { fn partial_decode( &self, decoded_regions: &[ByteRange], @@ -42,11 +47,11 @@ impl BytesPartialDecoderTraits for Crc32cPartialDecoder { let bytes = match byte_range { ByteRange::FromStart(_, Some(_)) => bytes, ByteRange::FromStart(_, None) => { - let length = bytes.len() - CHECKSUM_SIZE; + let length = bytes.len() - self.suffix_size; Cow::Owned(bytes[..length].to_vec()) } ByteRange::Suffix(_) => { - let length = bytes.len() as u64 - (CHECKSUM_SIZE as u64); + let length = bytes.len() as u64 - (self.suffix_size as u64); let length = usize::try_from(length).unwrap(); Cow::Owned(bytes[..length].to_vec()) } @@ -59,22 +64,29 @@ impl BytesPartialDecoderTraits for Crc32cPartialDecoder { } #[cfg(feature = "async")] -/// Asynchronous partial decoder for the `crc32c` (CRC32C checksum) codec. -pub(crate) struct AsyncCrc32cPartialDecoder { +/// Asynchronous partial decoder for stripping a suffix (e.g. checksum). +pub(crate) struct AsyncStripSuffixPartialDecoder { input_handle: Arc, + suffix_size: usize, } #[cfg(feature = "async")] -impl AsyncCrc32cPartialDecoder { - /// Create a new partial decoder for the `crc32c` codec. - pub(crate) fn new(input_handle: Arc) -> Self { - Self { input_handle } +impl AsyncStripSuffixPartialDecoder { + /// Create a new "strip suffix" partial decoder. + pub(crate) fn new( + input_handle: Arc, + suffix_size: usize, + ) -> Self { + Self { + input_handle, + suffix_size, + } } } #[cfg(feature = "async")] #[async_trait::async_trait] -impl AsyncBytesPartialDecoderTraits for AsyncCrc32cPartialDecoder { +impl AsyncBytesPartialDecoderTraits for AsyncStripSuffixPartialDecoder { async fn partial_decode( &self, decoded_regions: &[ByteRange], @@ -94,11 +106,11 @@ impl AsyncBytesPartialDecoderTraits for AsyncCrc32cPartialDecoder { let bytes = match byte_range { ByteRange::FromStart(_, Some(_)) => bytes, ByteRange::FromStart(_, None) => { - let length = bytes.len() - CHECKSUM_SIZE; + let length = bytes.len() - self.suffix_size; Cow::Owned(bytes[..length].to_vec()) } ByteRange::Suffix(_) => { - let length = bytes.len() as u64 - (CHECKSUM_SIZE as u64); + let length = bytes.len() as u64 - (self.suffix_size as u64); let length = usize::try_from(length).unwrap(); Cow::Owned(bytes[..length].to_vec()) } diff --git a/zarrs/src/config.rs b/zarrs/src/config.rs index d1bab970..424c4c97 100644 --- a/zarrs/src/config.rs +++ b/zarrs/src/config.rs @@ -22,7 +22,7 @@ use crate::array::{codec::CodecOptions, ArrayMetadataOptions}; /// /// [`CodecOptions::validate_checksums()`] defaults to [`Config::validate_checksums()`]. /// -/// If validate checksums is enabled, checksum codecs (e.g. `crc32c`) will validate that encoded data matches stored checksums, otherwise validation is skipped. +/// If validate checksums is enabled, checksum codecs (e.g. `crc32c`, `fletcher32`) will validate that encoded data matches stored checksums, otherwise validation is skipped. /// Note that regardless of this configuration option, checksum codecs may skip validation when partial decoding. /// /// ### Store Empty Chunks diff --git a/zarrs/tests/data/zarr_python_compat/fletcher32.zarr/.zarray b/zarrs/tests/data/zarr_python_compat/fletcher32.zarr/.zarray new file mode 100644 index 00000000..c01e5af3 --- /dev/null +++ b/zarrs/tests/data/zarr_python_compat/fletcher32.zarr/.zarray @@ -0,0 +1,19 @@ +{ + "shape": [ + 100, + 100 + ], + "chunks": [ + 50, + 50 + ], + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": { + "id": "fletcher32" + }, + "zarr_format": 2, + "dtype": " Result<(), Box> { Ok(()) } + +#[cfg(feature = "fletcher32")] +#[test] +fn zarr_python_compat_fletcher32() -> Result<(), Box> { + let path = PathBuf::from("tests/data/zarr_python_compat/fletcher32.zarr"); + let store = Arc::new(FilesystemStore::new(&path)?); + + let array = Array::open(store, "/")?; + assert_eq!(array.shape(), vec![100, 100]); + let elements = array.retrieve_array_subset_elements::(&ArraySubset::new_with_shape( + array.shape().to_vec(), + ))?; + assert_eq!(elements, (0..100 * 100).collect::>()); + + Ok(()) +} diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 6b5d8561..d0d4e075 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `v3::group::{ConsolidatedMetadata,ConsolidatedMetadataMetadata,ConsolidatedMetadataKind}` - Add `GroupMetadataV3::consolidated_metadata` field - Add `GroupMetadataV3::with_consolidated_metadata` field +- Add `fletcher32` codec metadata ### Changed - **Breaking**: Rename `DataTypeMetadataV3::Binary` to `Bytes` for compatibility with `zarr-python` diff --git a/zarrs_metadata/src/v2/array.rs b/zarrs_metadata/src/v2/array.rs index 1f7412fa..aedebbfa 100644 --- a/zarrs_metadata/src/v2/array.rs +++ b/zarrs_metadata/src/v2/array.rs @@ -14,6 +14,8 @@ pub mod codec { pub mod blosc; /// `bz2` codec metadata. pub mod bz2; + /// `fletcher32` codec metadata. + pub mod fletcher32; /// `gzip` codec metadata. pub mod gzip; /// `vlen-array` codec metadata. diff --git a/zarrs_metadata/src/v2/array/codec/fletcher32.rs b/zarrs_metadata/src/v2/array/codec/fletcher32.rs new file mode 100644 index 00000000..b47b6148 --- /dev/null +++ b/zarrs_metadata/src/v2/array/codec/fletcher32.rs @@ -0,0 +1 @@ +pub use crate::v3::array::codec::fletcher32::Fletcher32CodecConfigurationV1; diff --git a/zarrs_metadata/src/v3/array.rs b/zarrs_metadata/src/v3/array.rs index c5b8ff4a..4eecf76e 100644 --- a/zarrs_metadata/src/v3/array.rs +++ b/zarrs_metadata/src/v3/array.rs @@ -23,6 +23,8 @@ pub mod codec { pub mod bz2; /// `crc32c` codec metadata. pub mod crc32c; + /// `fletcher32` codec metadata. + pub mod fletcher32; /// `gdeflate` codec metadata. pub mod gdeflate; /// `gzip` codec metadata. diff --git a/zarrs_metadata/src/v3/array/codec/fletcher32.rs b/zarrs_metadata/src/v3/array/codec/fletcher32.rs new file mode 100644 index 00000000..31521dc6 --- /dev/null +++ b/zarrs_metadata/src/v3/array/codec/fletcher32.rs @@ -0,0 +1,60 @@ +use derive_more::{Display, From}; +use serde::{Deserialize, Serialize}; + +/// The identifier for the `fletcher32` codec. +pub const IDENTIFIER: &str = "fletcher32"; + +/// A wrapper to handle various versions of `fletcher32` codec configuration parameters. +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display, From)] +#[serde(untagged)] +pub enum Fletcher32CodecConfiguration { + /// Version 1.0 draft. + V1(Fletcher32CodecConfigurationV1), +} + +/// `fletcher32` (checksum) codec configuration parameters (version 1.0 draft). +/// +/// ### Example (Zarr V3) +/// ```json +/// { +/// "name": "fletcher32", +/// "configuration": {} +/// } +/// ``` +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +#[serde(deny_unknown_fields)] +#[display("{}", serde_json::to_string(self).unwrap_or_default())] +pub struct Fletcher32CodecConfigurationV1 {} + +#[cfg(test)] +mod tests { + use crate::v3::MetadataV3; + + use super::*; + + #[test] + fn codec_fletcher32_config1() { + serde_json::from_str::(r#"{}"#).unwrap(); + } + + #[test] + fn codec_fletcher32_config_outer1() { + serde_json::from_str::( + r#"{ + "name": "fletcher32", + "configuration": {} + }"#, + ) + .unwrap(); + } + + #[test] + fn codec_fletcher32_config_outer2() { + serde_json::from_str::( + r#"{ + "name": "fletcher32" + }"#, + ) + .unwrap(); + } +}