Skip to content

Commit

Permalink
Support 8 and 16 bit integer data types with zfp codec by promoting t…
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed May 5, 2024
1 parent 37cbeae commit 1e18989
Show file tree
Hide file tree
Showing 6 changed files with 319 additions and 142 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Allow float fill values to be created from int fill value metadata
- Make `chunk_grid::{regular,rectangular}` public
- Support 8 and 16 bit integer data types with zfp codec by promoting to 32 bit

### Fixed
- Fix `compute_encoded_size()` for `BitroundCodec` incorrectly indicating various data types were unsupported
Expand Down
196 changes: 180 additions & 16 deletions src/array/codec/array_to_bytes/zfp.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
//! The `zfp` array to bytes codec.
//!
//! [zfp](https://zfp.io/) is a compressed number format for 1D to 4D arrays of 32/64-bit floating point or integer data.
//! 8/16-bit integer types are supported through promotion to 32-bit in accordance with the [zfp utility functions](https://zfp.readthedocs.io/en/release1.0.1/low-level-api.html#utility-functions).
//!
//! This codec requires the `zfp` feature, which is disabled by default.
//!
//! See [`ZfpCodecConfigurationV1`] for example `JSON` metadata.
mod zfp_array;
mod zfp_bitstream;
mod zfp_codec;
mod zfp_configuration;
Expand All @@ -23,20 +25,21 @@ pub use zfp_configuration::{

use zfp_sys::{
zfp_decompress, zfp_exec_policy_zfp_exec_omp, zfp_stream_rewind, zfp_stream_set_bit_stream,
zfp_stream_set_execution, zfp_type, zfp_type_zfp_type_double, zfp_type_zfp_type_float,
zfp_type_zfp_type_int32, zfp_type_zfp_type_int64,
zfp_stream_set_execution,
};

use crate::{
array::{
codec::{Codec, CodecError, CodecPlugin},
ChunkRepresentation, DataType,
transmute_from_bytes_vec, transmute_to_bytes_vec, ChunkRepresentation, DataType,
},
metadata::Metadata,
plugin::{PluginCreateError, PluginMetadataInvalidError},
};

use self::{zfp_bitstream::ZfpBitstream, zfp_field::ZfpField, zfp_stream::ZfpStream};
use self::{
zfp_array::ZfpArray, zfp_bitstream::ZfpBitstream, zfp_field::ZfpField, zfp_stream::ZfpStream,
};

/// The identifier for the `zfp` codec.
pub const IDENTIFIER: &str = "zfp";
Expand Down Expand Up @@ -99,27 +102,145 @@ pub struct ZfpExpertParams {
pub minexp: i32,
}

const fn zarr_data_type_to_zfp_data_type(data_type: &DataType) -> Option<zfp_type> {
const fn zarr_to_zfp_data_type(data_type: &DataType) -> Option<zfp_sys::zfp_type> {
match data_type {
DataType::Int32 | DataType::UInt32 => Some(zfp_type_zfp_type_int32),
DataType::Int64 | DataType::UInt64 => Some(zfp_type_zfp_type_int64),
DataType::Float32 => Some(zfp_type_zfp_type_float),
DataType::Float64 => Some(zfp_type_zfp_type_double),
DataType::Int8
| DataType::UInt8
| DataType::Int16
| DataType::UInt16
| DataType::Int32
| DataType::UInt32 => Some(zfp_sys::zfp_type_zfp_type_int32),
DataType::Int64 | DataType::UInt64 => Some(zfp_sys::zfp_type_zfp_type_int64),

Check warning on line 113 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L112-L113

Added lines #L112 - L113 were not covered by tests
DataType::Float32 => Some(zfp_sys::zfp_type_zfp_type_float),
DataType::Float64 => Some(zfp_sys::zfp_type_zfp_type_double),

Check warning on line 115 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L115

Added line #L115 was not covered by tests
_ => None,
}
}

fn promote_before_zfp_encoding(
decoded_value: Vec<u8>,
decoded_representation: &ChunkRepresentation,
) -> Result<ZfpArray, CodecError> {
match decoded_representation.data_type() {
DataType::Int8 => {
let decoded_value = transmute_from_bytes_vec::<i8>(decoded_value);
let decoded_value_promoted =
decoded_value.iter().map(|i| i32::from(*i) << 23).collect();
Ok(ZfpArray::Int32(decoded_value_promoted))

Check warning on line 129 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L126-L129

Added lines #L126 - L129 were not covered by tests
}
DataType::UInt8 => {
let decoded_value = transmute_from_bytes_vec::<u8>(decoded_value);
let decoded_value_promoted = decoded_value
.iter()
.map(|i| (i32::from(*i) - 0x80) << 23)
.collect();
Ok(ZfpArray::Int32(decoded_value_promoted))

Check warning on line 137 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L132-L137

Added lines #L132 - L137 were not covered by tests
}
DataType::Int16 => {
let decoded_value = transmute_from_bytes_vec::<i16>(decoded_value);
let decoded_value_promoted =
decoded_value.iter().map(|i| i32::from(*i) << 15).collect();
Ok(ZfpArray::Int32(decoded_value_promoted))
}
DataType::UInt16 => {
let decoded_value = transmute_from_bytes_vec::<u16>(decoded_value);
let decoded_value_promoted = decoded_value
.iter()
.map(|i| (i32::from(*i) - 0x8000) << 15)
.collect();
Ok(ZfpArray::Int32(decoded_value_promoted))

Check warning on line 151 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L146-L151

Added lines #L146 - L151 were not covered by tests
}
DataType::Int32 | DataType::UInt32 => Ok(ZfpArray::Int32(transmute_from_bytes_vec::<i32>(
decoded_value,
))),
DataType::Int64 | DataType::UInt64 => Ok(ZfpArray::Int64(transmute_from_bytes_vec::<i64>(
decoded_value,
))),

Check warning on line 158 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L153-L158

Added lines #L153 - L158 were not covered by tests
DataType::Float32 => Ok(ZfpArray::Float(transmute_from_bytes_vec::<f32>(
decoded_value,
))),
DataType::Float64 => Ok(ZfpArray::Double(transmute_from_bytes_vec::<f64>(
decoded_value,
))),
_ => Err(CodecError::UnsupportedDataType(
decoded_representation.data_type().clone(),
IDENTIFIER.to_string(),
)),

Check warning on line 168 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L162-L168

Added lines #L162 - L168 were not covered by tests
}
}

fn init_zfp_decoding_output(
decoded_representation: &ChunkRepresentation,
) -> Result<ZfpArray, CodecError> {
let num_elements = decoded_representation.num_elements_usize();
match decoded_representation.data_type() {
DataType::Int8
| DataType::UInt8
| DataType::Int16
| DataType::UInt16
| DataType::Int32
| DataType::UInt32 => Ok(ZfpArray::Int32(vec![0; num_elements])),
DataType::Int64 | DataType::UInt64 => Ok(ZfpArray::Int64(vec![0; num_elements])),

Check warning on line 183 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L183

Added line #L183 was not covered by tests
DataType::Float32 => Ok(ZfpArray::Float(vec![0.0; num_elements])),
DataType::Float64 => Ok(ZfpArray::Double(vec![0.0; num_elements])),
_ => Err(CodecError::UnsupportedDataType(
decoded_representation.data_type().clone(),
IDENTIFIER.to_string(),
)),

Check warning on line 189 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L185-L189

Added lines #L185 - L189 were not covered by tests
}
}

fn demote_after_zfp_decoding(
array: ZfpArray,
decoded_representation: &ChunkRepresentation,
) -> Result<Vec<u8>, CodecError> {
#[allow(non_upper_case_globals)]
match (decoded_representation.data_type(), array) {
(DataType::Int32 | DataType::UInt32, ZfpArray::Int32(vec)) => {
Ok(transmute_to_bytes_vec(vec))

Check warning on line 200 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L199-L200

Added lines #L199 - L200 were not covered by tests
}
(DataType::Int64 | DataType::UInt64, ZfpArray::Int64(vec)) => {
Ok(transmute_to_bytes_vec(vec))

Check warning on line 203 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L202-L203

Added lines #L202 - L203 were not covered by tests
}
(DataType::Float32, ZfpArray::Float(vec)) => Ok(transmute_to_bytes_vec(vec)),
(DataType::Float64, ZfpArray::Double(vec)) => Ok(transmute_to_bytes_vec(vec)),
(DataType::Int8, ZfpArray::Int32(vec)) => Ok(transmute_to_bytes_vec(
vec.into_iter()
.map(|i| i8::try_from((i >> 23).clamp(-0x80, 0x7f)).unwrap())
.collect(),
)),
(DataType::UInt8, ZfpArray::Int32(vec)) => Ok(transmute_to_bytes_vec(
vec.into_iter()
.map(|i| u8::try_from(((i >> 23) + 0x80).clamp(0x00, 0xff)).unwrap())
.collect(),
)),

Check warning on line 216 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L206-L216

Added lines #L206 - L216 were not covered by tests
(DataType::Int16, ZfpArray::Int32(vec)) => Ok(transmute_to_bytes_vec(
vec.into_iter()
.map(|i| i16::try_from((i >> 15).clamp(-0x8000, 0x7fff)).unwrap())
.collect(),
)),
(DataType::UInt16, ZfpArray::Int32(vec)) => Ok(transmute_to_bytes_vec(
vec.into_iter()
.map(|i| u16::try_from(((i >> 15) + 0x8000).clamp(0x0000, 0xffff)).unwrap())
.collect(),
)),
_ => Err(CodecError::UnsupportedDataType(
decoded_representation.data_type().clone(),
IDENTIFIER.to_string(),
)),

Check warning on line 230 in src/array/codec/array_to_bytes/zfp.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp.rs#L222-L230

Added lines #L222 - L230 were not covered by tests
}
}

fn zfp_decode(
zfp_mode: &ZfpMode,
zfp_type: zfp_type,
mut encoded_value: Vec<u8>,
decoded_representation: &ChunkRepresentation,
parallel: bool,
) -> Result<Vec<u8>, CodecError> {
let mut decoded_value = vec![0u8; usize::try_from(decoded_representation.size()).unwrap()];
let mut array = init_zfp_decoding_output(decoded_representation)?;
let zfp_type = array.zfp_type();
let Some(field) = ZfpField::new(
&mut decoded_value,
zfp_type,
&mut array,
&decoded_representation
.shape()
.iter()
Expand Down Expand Up @@ -148,10 +269,11 @@ fn zfp_decode(
}

let ret = unsafe { zfp_decompress(zfp.as_zfp_stream(), field.as_zfp_field()) };
drop(field);
if ret == 0 {
Err(CodecError::from("zfp decompression failed"))
} else {
Ok(decoded_value)
demote_after_zfp_decoding(array, decoded_representation)
}
}

Expand All @@ -166,10 +288,17 @@ mod tests {

use super::*;

// const JSON_VALID: &'static str = r#"{
// "mode": "fixedprecision",
// "precision": 12
// }"#;
const JSON_VALID: &'static str = r#"{
"mode": "fixedprecision",
"precision": 12
"mode": "reversible"
}"#;
// const JSON_VALID: &'static str = r#"{
// "mode": "fixedrate",
// "rate": 2.5
// }"#;

#[test]
#[cfg_attr(miri, ignore)]
Expand Down Expand Up @@ -206,6 +335,41 @@ mod tests {
assert_eq!(elements, decoded_elements);
}

#[test]
#[cfg_attr(miri, ignore)]
fn codec_zfp_round_trip_i16() {
let chunk_shape = vec![
NonZeroU64::new(3).unwrap(),
NonZeroU64::new(3).unwrap(),
NonZeroU64::new(3).unwrap(),
];
let chunk_representation =
ChunkRepresentation::new(chunk_shape, DataType::Int16, 0i16.into()).unwrap();
let elements: Vec<i16> = (0..27).map(|i| i as i16).collect();
let bytes = crate::array::transmute_to_bytes_vec(elements.clone());

let configuration: ZfpCodecConfiguration = serde_json::from_str(JSON_VALID).unwrap();
let codec = ZfpCodec::new_with_configuration(&configuration);

let encoded = codec
.encode(
bytes.clone(),
&chunk_representation,
&CodecOptions::default(),
)
.unwrap();
let decoded = codec
.decode(
encoded.clone(),
&chunk_representation,
&CodecOptions::default(),
)
.unwrap();

let decoded_elements = crate::array::transmute_from_bytes_vec::<i16>(decoded);
assert_eq!(elements, decoded_elements);
}

#[test]
#[cfg_attr(miri, ignore)]
fn codec_zfp_partial_decode() {
Expand Down
45 changes: 45 additions & 0 deletions src/array/codec/array_to_bytes/zfp/zfp_array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#[derive(Debug)]
pub enum ZfpArray {
Int32(Vec<i32>),
Int64(Vec<i64>),
Float(Vec<f32>),
Double(Vec<f64>),
}

impl ZfpArray {
pub fn len(&self) -> usize {
match self {
ZfpArray::Int32(v) => v.len(),
ZfpArray::Int64(v) => v.len(),

Check warning on line 13 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L13

Added line #L13 was not covered by tests
ZfpArray::Float(v) => v.len(),
ZfpArray::Double(v) => v.len(),

Check warning on line 15 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L15

Added line #L15 was not covered by tests
}
}

pub fn zfp_type(&self) -> zfp_sys::zfp_type {
match self {
ZfpArray::Int32(_) => zfp_sys::zfp_type_zfp_type_int32,
ZfpArray::Int64(_) => zfp_sys::zfp_type_zfp_type_int64,

Check warning on line 22 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L22

Added line #L22 was not covered by tests
ZfpArray::Float(_) => zfp_sys::zfp_type_zfp_type_float,
ZfpArray::Double(_) => zfp_sys::zfp_type_zfp_type_double,

Check warning on line 24 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L24

Added line #L24 was not covered by tests
}
}

// pub fn as_ptr(&self) -> *const std::ffi::c_void {
// match self {
// ZfpArray::Int32(v) => v.as_ptr().cast::<std::ffi::c_void>(),
// ZfpArray::Int64(v) => v.as_ptr().cast::<std::ffi::c_void>(),
// ZfpArray::Float(v) => v.as_ptr().cast::<std::ffi::c_void>(),
// ZfpArray::Double(v) => v.as_ptr().cast::<std::ffi::c_void>(),
// }
// }

pub fn as_mut_ptr(&mut self) -> *mut std::ffi::c_void {
match self {
ZfpArray::Int32(v) => v.as_mut_ptr().cast::<std::ffi::c_void>(),
ZfpArray::Int64(v) => v.as_mut_ptr().cast::<std::ffi::c_void>(),

Check warning on line 40 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L40

Added line #L40 was not covered by tests
ZfpArray::Float(v) => v.as_mut_ptr().cast::<std::ffi::c_void>(),
ZfpArray::Double(v) => v.as_mut_ptr().cast::<std::ffi::c_void>(),

Check warning on line 42 in src/array/codec/array_to_bytes/zfp/zfp_array.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/zfp/zfp_array.rs#L42

Added line #L42 was not covered by tests
}
}
}
26 changes: 7 additions & 19 deletions src/array/codec/array_to_bytes/zfp/zfp_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
use crate::array::codec::{AsyncArrayPartialDecoderTraits, AsyncBytesPartialDecoderTraits};

use super::{
zarr_data_type_to_zfp_data_type,
promote_before_zfp_encoding, zarr_to_zfp_data_type,
zfp_bitstream::ZfpBitstream,
zfp_configuration::{
ZfpFixedAccuracyConfiguration, ZfpFixedPrecisionConfiguration, ZfpFixedRateConfiguration,
Expand Down Expand Up @@ -136,19 +136,15 @@ impl ArrayCodecTraits for ZfpCodec {

fn encode(
&self,
mut decoded_value: Vec<u8>,
decoded_value: Vec<u8>,
decoded_representation: &ChunkRepresentation,
_options: &CodecOptions,
) -> Result<Vec<u8>, CodecError> {
let Some(zfp_type) = zarr_data_type_to_zfp_data_type(decoded_representation.data_type())
else {
return Err(CodecError::from(
"data type {} is unsupported for zfp codec",
));
};
let mut decoded_value_promoted =
promote_before_zfp_encoding(decoded_value, decoded_representation)?;
let zfp_type = decoded_value_promoted.zfp_type();
let Some(field) = ZfpField::new(
&mut decoded_value,
zfp_type,
&mut decoded_value_promoted,
&decoded_representation
.shape()
.iter()
Expand Down Expand Up @@ -197,15 +193,8 @@ impl ArrayCodecTraits for ZfpCodec {
decoded_representation: &ChunkRepresentation,
_options: &CodecOptions,
) -> Result<Vec<u8>, CodecError> {
let Some(zfp_type) = zarr_data_type_to_zfp_data_type(decoded_representation.data_type())
else {
return Err(CodecError::from(
"data type {} is unsupported for zfp codec",
));
};
zfp_decode(
&self.mode,
zfp_type,
encoded_value,
decoded_representation,
false, // FIXME
Expand Down Expand Up @@ -247,8 +236,7 @@ impl ArrayToBytesCodecTraits for ZfpCodec {
decoded_representation: &ChunkRepresentation,
) -> Result<BytesRepresentation, CodecError> {
let data_type = decoded_representation.data_type();
let Some(zfp_type) = zarr_data_type_to_zfp_data_type(decoded_representation.data_type())
else {
let Some(zfp_type) = zarr_to_zfp_data_type(decoded_representation.data_type()) else {
return Err(CodecError::from(
"data type {} is unsupported for zfp codec",
));
Expand Down
Loading

0 comments on commit 1e18989

Please sign in to comment.