Skip to content

Commit

Permalink
Enforce non-zero dimensions for chunk shapes, adds `array::{ChunkShap…
Browse files Browse the repository at this point in the history
…e,ChunkRepresentation}`
  • Loading branch information
LDeakin committed Jan 26, 2024
1 parent 50fa031 commit e414b10
Show file tree
Hide file tree
Showing 42 changed files with 970 additions and 495 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- More informative `Metadata` deserialisation error message with an invalid configuration
- **Breaking**: `PluginCreateError::Other` changed to unit struct and added `PluginCreateError::from<{String,&str}>`
- `PluginCreateError::Unsupported` now includes a `plugin_type` field for more informative error messages
- Add `array::ChunkShape` wrapping `Vec<NonZeroU64>` and `array::ChunkRepresentation` which is essentially `ArrayRepresentation` with a `NonZeroU64` shape
- **Breaking**: Relevant codec and partial decoder methods now use `ChunkRepresentation` instead of `ArrayRepresentation`
- **Breaking**: Relevant chunk grid methods now use `ChunkShape` instead of `ArrayShape`
- **Breaking**: Relevant array methods now use `ChunkShape` instead of `ArrayShape`

### Removed
- **Breaking**: Remove `StorePrefixError::new`, deprecated since `v0.7.3`
Expand Down
4 changes: 2 additions & 2 deletions benches/array_blosc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn array_blosc_write_all(c: &mut Criterion) {
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt8,
vec![32; 3].into(),
vec![32; 3].try_into().unwrap(),
zarrs::array::FillValue::from(0u8),
)
.bytes_to_bytes_codecs(vec![Box::new(
Expand Down Expand Up @@ -52,7 +52,7 @@ fn array_blosc_read_all(c: &mut Criterion) {
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt8,
vec![32; 3].into(),
vec![32; 3].try_into().unwrap(),
zarrs::array::FillValue::from(0u8),
)
.bytes_to_bytes_codecs(vec![Box::new(
Expand Down
14 changes: 8 additions & 6 deletions benches/array_uncompressed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn array_write_all(c: &mut Criterion) {
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt8,
vec![32; 3].into(),
vec![32; 3].try_into().unwrap(),
zarrs::array::FillValue::from(0u8),
)
.build(store.into(), "/")
Expand All @@ -36,11 +36,12 @@ fn array_write_all_sharded(c: &mut Criterion) {
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
b.iter(|| {
let store = zarrs::storage::store::MemoryStore::new();
let sharding_codec = Box::new(ShardingCodecBuilder::new(vec![32; 3]).build());
let sharding_codec =
Box::new(ShardingCodecBuilder::new(vec![32; 3].try_into().unwrap()).build());
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt16,
vec![size; 3].into(),
vec![size; 3].try_into().unwrap(),
zarrs::array::FillValue::from(0u16),
)
.array_to_bytes_codec(sharding_codec)
Expand Down Expand Up @@ -68,7 +69,7 @@ fn array_read_all(c: &mut Criterion) {
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt16,
vec![32; 3].into(),
vec![32; 3].try_into().unwrap(),
zarrs::array::FillValue::from(0u16),
)
.build(store.into(), "/")
Expand Down Expand Up @@ -96,11 +97,12 @@ fn array_read_all_sharded(c: &mut Criterion) {
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
// Write the data
let store = zarrs::storage::store::MemoryStore::new();
let sharding_codec = Box::new(ShardingCodecBuilder::new(vec![32; 3]).build());
let sharding_codec =
Box::new(ShardingCodecBuilder::new(vec![32; 3].try_into().unwrap()).build());
let array = zarrs::array::ArrayBuilder::new(
vec![size; 3],
zarrs::array::DataType::UInt8,
vec![size; 3].into(),
vec![size; 3].try_into().unwrap(),
zarrs::array::FillValue::from(1u8),
)
.array_to_bytes_codec(sharding_codec)
Expand Down
10 changes: 7 additions & 3 deletions benches/codecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use zarrs::array::{
bytes_to_bytes::blosc::{BloscCompressor, BloscShuffleMode},
ArrayCodecTraits, BloscCodec, BytesCodec, BytesToBytesCodecTraits,
},
ArrayRepresentation, BytesRepresentation, DataType,
BytesRepresentation, ChunkRepresentation, DataType,
};

fn codec_bytes(c: &mut Criterion) {
Expand All @@ -25,8 +25,12 @@ fn codec_bytes(c: &mut Criterion) {
for size in [32, 64, 128, 256, 512].iter() {
let size3 = size * size * size;
let num_elements = size3 / 2;
let rep =
ArrayRepresentation::new(vec![num_elements; 1], DataType::UInt16, 0u16.into()).unwrap();
let rep = ChunkRepresentation::new(
vec![num_elements.try_into().unwrap(); 1],
DataType::UInt16,
0u16.into(),
)
.unwrap();

let data = vec![0u8; size3.try_into().unwrap()];
group.throughput(Throughput::Bytes(size3));
Expand Down
2 changes: 1 addition & 1 deletion examples/array_write_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn array_write_read() -> Result<(), Box<dyn std::error::Error>> {
let array = zarrs::array::ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].into(), // regular chunk shape
vec![4, 4].try_into()?, // regular chunk shape
FillValue::from(ZARR_NAN_F32),
)
// .bytes_to_bytes_codecs(vec![]) // uncompressed
Expand Down
6 changes: 3 additions & 3 deletions examples/async_array_write_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ async fn async_array_write_read() -> Result<(), Box<dyn std::error::Error>> {

println!(
"The group metadata is:\n{}\n",
serde_json::to_string_pretty(&group.metadata()).unwrap()
serde_json::to_string_pretty(&group.metadata())?
);

// Create an array
let array_path = "/group/array";
let array = zarrs::array::ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].into(), // regular chunk shape
vec![4, 4].try_into()?, // regular chunk shape
FillValue::from(ZARR_NAN_F32),
)
// .bytes_to_bytes_codecs(vec![]) // uncompressed
Expand All @@ -53,7 +53,7 @@ async fn async_array_write_read() -> Result<(), Box<dyn std::error::Error>> {

println!(
"The array metadata is:\n{}\n",
serde_json::to_string_pretty(&array.metadata()).unwrap()
serde_json::to_string_pretty(&array.metadata())?
);

// Write some chunks
Expand Down
10 changes: 8 additions & 2 deletions examples/rectangular_array_write_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ fn rectangular_array_write_read() -> Result<(), Box<dyn std::error::Error>> {
let array = zarrs::array::ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
ChunkGrid::new(RectangularChunkGrid::new(&[[1, 2, 3, 2].into(), 4.into()])),
ChunkGrid::new(RectangularChunkGrid::new(&[
[1, 2, 3, 2].try_into()?,
4.try_into()?,
])),
FillValue::from(ZARR_NAN_F32),
)
.bytes_to_bytes_codecs(vec![
Expand All @@ -58,7 +61,10 @@ fn rectangular_array_write_read() -> Result<(), Box<dyn std::error::Error>> {
let chunk_indices = vec![i, 0];
if let Some(chunk_shape) = chunk_grid.chunk_shape(&chunk_indices, array.shape())? {
let chunk_array = ndarray::ArrayD::<f32>::from_elem(
chunk_shape.iter().map(|u| *u as usize).collect::<Vec<_>>(),
chunk_shape
.iter()
.map(|u| u.get() as usize)
.collect::<Vec<_>>(),
i as f32,
);
array.store_chunk_ndarray(&chunk_indices, &chunk_array.view())
Expand Down
16 changes: 10 additions & 6 deletions examples/sharded_array_write_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ fn sharded_array_write_read() -> Result<(), Box<dyn std::error::Error>> {
let array_path = "/group/array";
let shard_shape = vec![4, 8];
let inner_chunk_shape = vec![4, 4];
let mut sharding_codec_builder = ShardingCodecBuilder::new(inner_chunk_shape.clone());
let mut sharding_codec_builder =
ShardingCodecBuilder::new(inner_chunk_shape.as_slice().try_into()?);
sharding_codec_builder.bytes_to_bytes_codecs(vec![
#[cfg(feature = "gzip")]
Box::new(codec::GzipCodec::new(5)?),
]);
let array = zarrs::array::ArrayBuilder::new(
vec![8, 8], // array shape
DataType::UInt16,
shard_shape.into(),
shard_shape.try_into()?,
FillValue::from(0u16),
)
.array_to_bytes_codec(Box::new(sharding_codec_builder.build()))
Expand All @@ -80,10 +81,13 @@ fn sharded_array_write_read() -> Result<(), Box<dyn std::error::Error>> {
let chunk_indices = vec![s, 0];
if let Some(chunk_shape) = chunk_grid.chunk_shape(&chunk_indices, array.shape())? {
let chunk_array = ndarray::ArrayD::<u16>::from_shape_fn(
chunk_shape.iter().map(|u| *u as usize).collect::<Vec<_>>(),
chunk_shape
.iter()
.map(|u| u.get() as usize)
.collect::<Vec<_>>(),
|ij| {
(s * chunk_shape[0] * chunk_shape[1]
+ ij[0] as u64 * chunk_shape[1]
(s * chunk_shape[0].get() * chunk_shape[1].get()
+ ij[0] as u64 * chunk_shape[1].get()
+ ij[1] as u64) as u16
},
);
Expand Down Expand Up @@ -131,7 +135,7 @@ fn sharded_array_write_read() -> Result<(), Box<dyn std::error::Error>> {
ndarray::ArrayD::<u16>::from_shape_vec(
inner_chunk_shape
.iter()
.map(|u| *u as usize)
.map(|&u| u as usize)
.collect::<Vec<_>>(),
elements,
)
Expand Down
2 changes: 1 addition & 1 deletion examples/zip_array_write_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn write_array_to_storage<TStorage: ReadableWritableStorageTraits>(
let array = zarrs::array::ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].into(), // regular chunk shape
vec![4, 4].try_into()?, // regular chunk shape
FillValue::from(ZARR_NAN_F32),
)
.bytes_to_bytes_codecs(vec![
Expand Down
31 changes: 21 additions & 10 deletions src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod array_representation;
mod bytes_representation;
pub mod chunk_grid;
pub mod chunk_key_encoding;
mod chunk_shape;
pub mod codec;
pub mod data_type;
mod dimension_name;
Expand All @@ -27,10 +28,11 @@ pub use self::{
array_builder::ArrayBuilder,
array_errors::{ArrayCreateError, ArrayError},
array_metadata::{ArrayMetadata, ArrayMetadataV3},
array_representation::ArrayRepresentation,
array_representation::{ArrayRepresentation, ChunkRepresentation},
bytes_representation::BytesRepresentation,
chunk_grid::ChunkGrid,
chunk_key_encoding::ChunkKeyEncoding,
chunk_shape::{chunk_shape_to_array_shape, ChunkShape},
codec::CodecChain,
data_type::DataType,
dimension_name::DimensionName,
Expand All @@ -40,6 +42,7 @@ pub use self::{
};

use serde::Serialize;
use thiserror::Error;

use crate::{
array_subset::{ArraySubset, IncompatibleDimensionalityError},
Expand All @@ -54,6 +57,14 @@ pub type ArrayIndices = Vec<u64>;
/// The shape of an array.
pub type ArrayShape = Vec<u64>;

/// A non zero error.
///
/// This is used in cases where a non-zero type cannot be converted to its equivalent integer type (e.g. [`NonZeroU64`](std::num::NonZeroU64) to [`u64`]).
/// It is used in the [`ChunkShape`] `try_from` methods.
#[derive(Debug, Error)]
#[error("value must be non-zero")]
pub struct NonZeroError;

/// An alias for bytes which may or may not be available.
///
/// When a value is read from a store, it returns `MaybeBytes` which is [`None`] if the key is not available.
Expand Down Expand Up @@ -427,15 +438,15 @@ impl<TStorage: ?Sized> Array<TStorage> {

/// Return the shape of the chunk grid (i.e., the number of chunks).
#[must_use]
pub fn chunk_grid_shape(&self) -> Option<Vec<u64>> {
pub fn chunk_grid_shape(&self) -> Option<ArrayShape> {
unsafe { self.chunk_grid().grid_shape_unchecked(self.shape()) }
}

/// Return the shape of the chunk at `chunk_indices`.
///
/// # Errors
/// Returns [`ArrayError::InvalidChunkGridIndicesError`] if the `chunk_indices` are incompatible with the chunk grid.
pub fn chunk_shape(&self, chunk_indices: &[u64]) -> Result<Vec<u64>, ArrayError> {
pub fn chunk_shape(&self, chunk_indices: &[u64]) -> Result<ChunkShape, ArrayError> {
self.chunk_grid()
.chunk_shape(chunk_indices, self.shape())
.map_err(|_| ArrayError::InvalidChunkGridIndicesError(chunk_indices.to_vec()))?
Expand Down Expand Up @@ -491,7 +502,7 @@ impl<TStorage: ?Sized> Array<TStorage> {
pub fn chunk_array_representation(
&self,
chunk_indices: &[u64],
) -> Result<ArrayRepresentation, ArrayError> {
) -> Result<ChunkRepresentation, ArrayError> {
(self.chunk_grid().chunk_shape(chunk_indices, self.shape())?).map_or_else(
|| {
Err(ArrayError::InvalidChunkGridIndicesError(
Expand All @@ -500,8 +511,8 @@ impl<TStorage: ?Sized> Array<TStorage> {
},
|chunk_shape| {
Ok(unsafe {
ArrayRepresentation::new_unchecked(
chunk_shape,
ChunkRepresentation::new_unchecked(
chunk_shape.to_vec(),
self.data_type().clone(),
self.fill_value().clone(),
)
Expand Down Expand Up @@ -686,7 +697,7 @@ mod tests {
let array = ArrayBuilder::new(
vec![8, 8],
DataType::UInt8,
vec![4, 4].into(),
vec![4, 4].try_into().unwrap(),
FillValue::from(0u8),
)
.build(store.clone(), array_path)
Expand All @@ -708,7 +719,7 @@ mod tests {
let mut array = ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].into(),
vec![4, 4].try_into().unwrap(),
FillValue::from(ZARR_NAN_F32),
)
.bytes_to_bytes_codecs(vec![
Expand Down Expand Up @@ -740,7 +751,7 @@ mod tests {
let array = ArrayBuilder::new(
vec![8, 8], // array shape
DataType::Float32,
vec![4, 4].into(), // regular chunk shape
vec![4, 4].try_into().unwrap(), // regular chunk shape
FillValue::from(1f32),
)
.bytes_to_bytes_codecs(vec![
Expand Down Expand Up @@ -786,7 +797,7 @@ mod tests {
let array = ArrayBuilder::new(
vec![100, 4],
DataType::UInt8,
vec![10, 2].into(),
vec![10, 2].try_into().unwrap(),
FillValue::from(0u8),
)
.build(store, array_path)
Expand Down
13 changes: 8 additions & 5 deletions src/array/array_async_readable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::{
};

use super::{
chunk_shape_to_array_shape,
codec::{
ArrayCodecTraits, ArrayToBytesCodecTraits, AsyncArrayPartialDecoderTraits,
AsyncStoragePartialDecoder,
Expand Down Expand Up @@ -157,10 +158,12 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits> Array<TStorage> {
&self,
chunk_indices: &[u64],
) -> Result<ndarray::ArrayD<T>, ArrayError> {
let shape = self
.chunk_grid()
.chunk_shape(chunk_indices, self.shape())?
.ok_or_else(|| ArrayError::InvalidChunkGridIndicesError(chunk_indices.to_vec()))?;
let shape = chunk_shape_to_array_shape(
&self
.chunk_grid()
.chunk_shape(chunk_indices, self.shape())?
.ok_or_else(|| ArrayError::InvalidChunkGridIndicesError(chunk_indices.to_vec()))?,
);
array_async_retrieve_ndarray!(self, shape, async_retrieve_chunk_elements(chunk_indices))
}

Expand Down Expand Up @@ -517,7 +520,7 @@ impl<TStorage: ?Sized + AsyncReadableStorageTraits> Array<TStorage> {
chunk_subset: &ArraySubset,
) -> Result<Vec<u8>, ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
if !chunk_subset.inbounds(chunk_representation.shape()) {
if !chunk_subset.inbounds(&chunk_shape_to_array_shape(chunk_representation.shape())) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
Expand Down
3 changes: 2 additions & 1 deletion src/array/array_async_readable_writable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
storage::{data_key, AsyncReadableWritableStorageTraits},
};

use super::{Array, ArrayError};
use super::{chunk_shape_to_array_shape, Array, ArrayError};

impl<TStorage: ?Sized + AsyncReadableWritableStorageTraits> Array<TStorage> {
/// Encode `subset_bytes` and store in `array_subset`.
Expand Down Expand Up @@ -196,6 +196,7 @@ impl<TStorage: ?Sized + AsyncReadableWritableStorageTraits> Array<TStorage> {
) -> Result<(), ArrayError> {
// Validation
if let Some(chunk_shape) = self.chunk_grid().chunk_shape(chunk_indices, self.shape())? {
let chunk_shape = chunk_shape_to_array_shape(&chunk_shape);
if std::iter::zip(chunk_subset.end_exc(), &chunk_shape)
.any(|(end_exc, shape)| end_exc > *shape)
{
Expand Down
Loading

0 comments on commit e414b10

Please sign in to comment.