diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 802a1428..cee9dacb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,9 +2,9 @@ name: build
on:
push:
- branches: ["main", "v[0-9]+.[0-9]+"]
+ branches: ["main", "v[0-9]+.[0-9]+.x"]
pull_request:
- branches: ["main", "v[0-9]+.[0-9]+"]
+ branches: ["main", "v[0-9]+.[0-9]+.x"]
env:
CARGO_TERM_COLOR: always
@@ -87,16 +87,16 @@ jobs:
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-hack cargo-minimal-versions --locked
- run: cargo minimal-versions check --workspace --all-features --direct
-# codecov:
-# runs-on: ubuntu-latest
-# steps:
-# - uses: actions/checkout@v4
-# - run: sudo apt update && sudo apt install -y cmake clang-15
-# - uses: dtolnay/rust-toolchain@nightly
-# - uses: Swatinem/rust-cache@v2
-# - run: cargo +nightly install cargo-llvm-cov --locked
-# - run: cargo +nightly llvm-cov --all-features --doctests --lcov --output-path lcov.info
-# - name: Upload coverage reports to Codecov
-# uses: codecov/codecov-action@v4
-# with:
-# token: ${{ secrets.CODECOV_TOKEN }}
+ codecov:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - run: sudo apt update && sudo apt install -y cmake clang-15
+ - uses: dtolnay/rust-toolchain@nightly
+ - uses: Swatinem/rust-cache@v2
+ - run: cargo +nightly install cargo-llvm-cov --locked
+ - run: cargo +nightly llvm-cov --all-features --doctests --lcov --output-path lcov.info
+ - name: Upload coverage reports to Codecov
+ uses: codecov/codecov-action@v4
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b01ad5de..111fe991 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+### Added
+- Add `array:codec::{InvalidBytesLengthError,InvalidArrayShapeError,InvalidNumberOfElementsError,SubsetOutOfBoundsError}`
+- Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour)
+- Add `ArrayBytesFixedDisjointView[CreateError]`
+
+### Changed
+- **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape
+- **Breaking**: `CodecError` enum changes:
+ - Change `CodecError::UnexpectedChunkDecodedSize` to an `InvalidBytesLengthError`
+ - Add `CodecError::{InvalidArrayShape,InvalidNumberOfElements,SubsetOutOfBounds,RawBytesOffsetsCreate,RawBytesOffsetsOutOfBounds}`
+- **Breaking**: Change output args to `ArrayBytesFixedDisjointView` and make safe the following:
+ - `Array::[async_]retrieve_chunk[_subset]_into`
+ - `[Async]ArrayPartialDecoderTraits::partial_decode_into`
+ - `ArrayToBytesCodecTraits::decode_into`
+ - `zarrs::array::copy_fill_value_into`
+ - `zarrs::array::update_array_bytes`
+- **Breaking**: change `RawBytesOffsets` into a validated newtype
+- **Breaking**: `ArrayBytes::new_vlen()` not returns a `Result` and validates bytes/offsets compatibility
+- Reenable broken compatibility tests since fixed in `zarr-python`/`numcodecs`
+- **Breaking**: move the `zarrs::array::{data_type,fill_value}` modules into the `zarrs_data_type` crate
+- Bump `lru` to 0.13
+
+## [0.19.2] - 2025-02-13
+
+### Changed
+- Bump `zarrs_metadata` to 0.3.4 which includes a number of Zarr metadata fixes
+ - See the [`zarrs_metadata` CHANGELOG.md](https://github.com/LDeakin/zarrs/blob/main/zarrs_metadata/CHANGELOG.md)
+
+## [0.19.1] - 2025-01-19
+
+### Added
+- Document that elements in `ArrayBytes` must be in C-contiguous order
+
+### Changed
+- Use new language/library features added between Rust 1.78-1.82 (internal)
+- Cleanup root docs and README removing ZEPs table and ecosystem table
+
+### Fixed
+- New clippy lints
+- Mark `String` and `Bytes` data types as experimental in their docs
+- Mark `rectangular` chunk grid as experimental since it is based on a draft ZEP
+- Add missing invariant to `[partial_]decode_into` safety docs
+
## [0.19.0] - 2025-01-10
### Highlights
@@ -1215,7 +1258,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Initial public release
-[unreleased]: https://github.com/LDeakin/zarrs/compare/zarrs-v0.19.0...HEAD
+[unreleased]: https://github.com/LDeakin/zarrs/compare/zarrs-v0.19.2...HEAD
+[0.19.2]: https://github.com/LDeakin/zarrs/releases/tag/zarrs-v0.19.2
+[0.19.1]: https://github.com/LDeakin/zarrs/releases/tag/zarrs-v0.19.1
[0.19.0]: https://github.com/LDeakin/zarrs/releases/tag/zarrs-v0.19.0
[0.18.3]: https://github.com/LDeakin/zarrs/releases/tag/zarrs-v0.18.3
[0.18.2]: https://github.com/LDeakin/zarrs/releases/tag/zarrs-v0.18.2
diff --git a/CITATION.cff b/CITATION.cff
index 44437c92..86bd8679 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,8 +1,8 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
title: "zarrs"
-version: 0.19.0
-date-released: 2025-01-10
+version: 0.19.2
+date-released: 2025-02-13
repository-code: "https://github.com/LDeakin/zarrs"
url: "https://zarrs.dev"
abstract: "zarrs is a Rust library for the Zarr storage format for multidimensional arrays and metadata."
diff --git a/Cargo.toml b/Cargo.toml
index 41825a07..dfbcfaf6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ resolver = "2"
members = [
"zarrs",
+ "zarrs_data_type",
"zarrs_metadata",
"zarrs_storage",
"zarrs_filesystem",
@@ -26,8 +27,12 @@ module_name_repetitions = "allow"
missing_panics_doc = "warn"
missing_errors_doc = "warn"
+[workspace.dependencies.zarrs_data_type]
+version = "0.1.0"
+path = "zarrs_data_type"
+
[workspace.dependencies.zarrs_metadata]
-version = "0.3.0"
+version = "0.3.4"
path = "zarrs_metadata"
[workspace.dependencies.zarrs_storage]
@@ -62,3 +67,10 @@ version = "0.51.0"
[workspace.dependencies.zip]
version = "2.1.3"
+
+[workspace.dependencies.half]
+version = "2.0.0"
+features = ["bytemuck"]
+
+[workspace.dependencies.num]
+version = "0.4.1"
diff --git a/README.md b/README.md
index 9435fedc..31b03404 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,29 @@
# zarrs
[](https://crates.io/crates/zarrs)
-[](https://docs.rs/zarrs)
+[][documentation]

[](https://crates.io/crates/zarrs)
[](https://github.com/LDeakin/zarrs/actions/workflows/ci.yml)
[](https://codecov.io/gh/LDeakin/zarrs)
[](https://zenodo.org/badge/latestdoi/695021547)
-`zarrs` is a Rust library for the [Zarr](https://zarr.dev) storage format for multidimensional arrays and metadata. It supports [Zarr V3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) and a [V3 compatible subset](https://docs.rs/zarrs/latest/zarrs/#implementation-status) of [Zarr V2](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html).
+`zarrs` is a Rust library for the [Zarr] storage format for multidimensional arrays and metadata. It supports [Zarr V3] and a V3 compatible subset of [Zarr V2].
-A changelog can be found [here](https://github.com/LDeakin/zarrs/blob/main/CHANGELOG.md).
-Correctness issues with past versions are [detailed here](https://github.com/LDeakin/zarrs/blob/main/doc/correctness_issues.md).
+A changelog can be found [here][CHANGELOG].
+Correctness issues with past versions are [detailed here][correctness_issues].
-Developed at the [Department of Materials Physics](https://physics.anu.edu.au/research/mp/), Australian National University, Canberra, Australia.
+Developed at the [Department of Materials Physics, Australian National University, Canberra, Australia].
> [!TIP]
-> If you are a Python user, check out [`zarrs-python`](https://github.com/ilan-gold/zarrs-python).
-> It includes a high-performance codec pipeline for the reference [`zarr-python`](https://github.com/zarr-developers/zarr-python) implementation.
+> If you are a Python user, check out [`zarrs-python`].
+> It includes a high-performance codec pipeline for the reference [`zarr-python`] implementation.
## Getting Started
-- Review the [implementation status](https://docs.rs/zarrs/latest/zarrs/#implementation-status), [array support](https://docs.rs/zarrs/latest/zarrs/#array-support), and [storage support](https://docs.rs/zarrs/latest/zarrs/#storage-support).
-- Read [The `zarrs` Book](https://book.zarrs.dev).
-- View the [examples](https://github.com/LDeakin/zarrs/tree/main/zarrs/examples) and [the example below](#example).
-- Read the [documentation](https://docs.rs/zarrs/latest/zarrs/). [`array::Array`](https://docs.rs/zarrs/latest/zarrs/array/struct.Array.html) is a good place to start.
-- Check out the [`zarrs` ecosystem](#zarrs-ecosystem).
+- Review the [implementation status] ([zarr version support], [array support], [storage support], and the [`zarrs` ecosystem](#zarrs-ecosystem)).
+- Read [The `zarrs` Book].
+- View the [examples] and [the example below](#example).
+- Read the [documentation].
## Example
```rust
@@ -90,61 +89,31 @@ println!("{array_ndarray:4}");
## `zarrs` Ecosystem
-| Crate | Docs / Description |
-| --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| **Core** | |
-| [![zarrs_ver]](https://crates.io/crates/zarrs) [zarrs] | [![docs]](https://docs.rs/zarrs) The core library for manipulating Zarr hierarchies |
-| [![zarrs_metadata_ver]](https://crates.io/crates/zarrs_metadata) [zarrs_metadata] | [![docs]](https://docs.rs/zarrs_metadata) Zarr metadata support (re-exported as `zarrs::metadata`) |
-| [![zarrs_storage_ver]](https://crates.io/crates/zarrs_storage) [zarrs_storage] | [![docs]](https://docs.rs/zarrs_storage) The storage API for `zarrs` (re-exported as `zarrs::storage`) |
-| **Stores** | |
-| [![zarrs_filesystem_ver]](https://crates.io/crates/zarrs_filesystem) [zarrs_filesystem] | [![docs]](https://docs.rs/zarrs_filesystem) A filesystem store (re-exported as `zarrs::filesystem`) |
-| [![zarrs_object_store_ver]](https://crates.io/crates/zarrs_object_store) [zarrs_object_store] | [![docs]](https://docs.rs/zarrs_object_store) [`object_store`](https://docs.rs/object_store/latest/object_store/) store support |
-| [![zarrs_opendal_ver]](https://crates.io/crates/zarrs_opendal) [zarrs_opendal] | [![docs]](https://docs.rs/zarrs_opendal) [`opendal`](https://docs.rs/opendal/latest/opendal/) store support |
-| [![zarrs_http_ver]](https://crates.io/crates/zarrs_http) [zarrs_http] | [![docs]](https://docs.rs/zarrs_http) A synchronous http store |
-| [![zarrs_zip_ver]](https://crates.io/crates/zarrs_zip) [zarrs_zip] | [![docs]](https://docs.rs/zarrs_zip) A storage adapter for zip files |
-| [![zarrs_icechunk_ver]](https://crates.io/crates/zarrs_icechunk) [zarrs_icechunk] | [![docs]](https://docs.rs/zarrs_icechunk) [`icechunk`](https://docs.rs/icechunk/latest/icechunk/) store support |
-| **Bindings** | |
-| [![zarrs_python_ver]](https://pypi.org/project/zarrs/) [zarrs-python] | [![docs]](https://zarrs-python.readthedocs.io/en/latest/) A codec pipeline for [zarr-python] |
-| [![zarrs_ffi_ver]](https://crates.io/crates/zarrs_ffi) [zarrs_ffi] | [![docs]](https://docs.rs/zarrs_ffi) A subset of `zarrs` exposed as a C/C++ API |
-| **Zarr Metadata Conventions** | |
-| [![ome_zarr_metadata_ver]](https://crates.io/crates/ome_zarr_metadata) [ome_zarr_metadata] | [![docs]](https://docs.rs/ome_zarr_metadata) A library for OME-Zarr (previously OME-NGFF) metadata |
-
-[docs]: https://img.shields.io/badge/docs-brightgreen
-[zarrs_ver]: https://img.shields.io/crates/v/zarrs
-[zarrs]: https://github.com/LDeakin/zarrs/tree/main/zarrs
-[zarrs_metadata_ver]: https://img.shields.io/crates/v/zarrs_metadata
-[zarrs_metadata]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata
-[zarrs_storage_ver]: https://img.shields.io/crates/v/zarrs_storage
-[zarrs_storage]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage
-[zarrs_filesystem_ver]: https://img.shields.io/crates/v/zarrs_filesystem
-[zarrs_filesystem]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem
-[zarrs_http_ver]: https://img.shields.io/crates/v/zarrs_http
-[zarrs_http]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http
-[zarrs_object_store_ver]: https://img.shields.io/crates/v/zarrs_object_store
-[zarrs_object_store]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store
-[zarrs_opendal_ver]: https://img.shields.io/crates/v/zarrs_opendal
-[zarrs_opendal]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal
-[zarrs_zip_ver]: https://img.shields.io/crates/v/zarrs_zip
-[zarrs_zip]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip
-[zarrs_icechunk_ver]: https://img.shields.io/crates/v/zarrs_icechunk
-[zarrs_icechunk]: https://github.com/LDeakin/zarrs_icechunk
-[zarrs_ffi_ver]: https://img.shields.io/crates/v/zarrs_ffi
-[zarrs_ffi]: https://github.com/LDeakin/zarrs_ffi
-[zarrs_python_ver]: https://img.shields.io/pypi/v/zarrs
-[zarrs-python]: https://github.com/ilan-gold/zarrs-python
-[zarr-python]: https://github.com/zarr-developers/zarr-python
-[ome_zarr_metadata_ver]: https://img.shields.io/crates/v/ome_zarr_metadata
-[ome_zarr_metadata]: https://github.com/LDeakin/rust_ome_zarr_metadata
-
-#### [zarrs_tools]
-[![zarrs_tools_ver]](https://crates.io/crates/zarrs_tools) [![zarrs_tools_doc]](https://docs.rs/zarrs_tools)
-
-[zarrs_tools]: https://github.com/LDeakin/zarrs_tools
-[zarrs_tools_ver]: https://img.shields.io/crates/v/zarrs_tools.svg
-[zarrs_tools_doc]: https://docs.rs/zarrs_tools/badge.svg
-
+### Core
+- [`zarrs`]: The core library for manipulating Zarr hierarchies.
+- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`).
+- [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`).
+- [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`).
+
+### Stores
+- [`zarrs_filesystem`]: A filesystem store (re-exported as `zarrs::filesystem`).
+- [`zarrs_object_store`]: [`object_store`] store support.
+- [`zarrs_opendal`]: [`opendal`] store support.
+- [`zarrs_http`]: A synchronous http store.
+- [`zarrs_zip`]: A storage adapter for zip files.
+- [`zarrs_icechunk`]: [`icechunk`] store support.
+
+### Bindings
+- [`zarrs-python`]: A high-performance codec pipeline for [`zarr-python`].
+- [`zarrs_ffi`]: A subset of `zarrs` exposed as a C/C++ API.
+
+### Zarr Metadata Conventions
+- [`ome_zarr_metadata`]: A library for OME-Zarr (previously OME-NGFF) metadata.
+
+### Tools
+- [`zarrs_tools`]: Various tools for creating and manipulating Zarr V3 data with the zarrs rust crate
- A reencoder that can change codecs, chunk shape, convert Zarr V2 to V3, etc.
- - Create an [OME-Zarr](https://ngff.openmicroscopy.org/latest/) hierarchy from a Zarr array.
+ - Create an [OME-Zarr] hierarchy from a Zarr array.
- Transform arrays: crop, rescale, downsample, gradient magnitude, gaussian, noise filtering, etc.
- Benchmarking tools and performance benchmarks of `zarrs`.
@@ -154,3 +123,39 @@ println!("{array_ndarray:4}");
- the MIT license [LICENSE-MIT](./LICENCE-MIT) or , at your option.
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
+
+[CHANGELOG]: https://github.com/LDeakin/zarrs/blob/main/CHANGELOG.md
+[correctness_issues]: https://github.com/LDeakin/zarrs/blob/main/doc/correctness_issues.md
+[implementation status]: https://docs.rs/zarrs/latest/zarrs/#implementation-status
+[zarr version support]: https://docs.rs/zarrs/latest/zarrs/#zarr-version-support
+[array support]: https://docs.rs/zarrs/latest/zarrs/#array-support
+[storage support]: https://docs.rs/zarrs/latest/zarrs/#storage-support
+[examples]: https://github.com/LDeakin/zarrs/tree/main/zarrs/examples
+[documentation]: https://docs.rs/zarrs/latest/zarrs/
+[The `zarrs` Book]: https://book.zarrs.dev
+
+[`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs
+[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type
+[`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata
+[`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage
+[`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem
+[`zarrs_http`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http
+[`zarrs_object_store`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store
+[`zarrs_opendal`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal
+[`zarrs_zip`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip
+[`zarrs_icechunk`]: https://github.com/LDeakin/zarrs_icechunk
+[`zarrs_ffi`]: https://github.com/LDeakin/zarrs_ffi
+[`zarrs-python`]: https://github.com/ilan-gold/zarrs-python
+[`zarr-python`]: https://github.com/zarr-developers/zarr-python
+[`zarrs_tools`]: https://github.com/LDeakin/zarrs_tools
+[`ome_zarr_metadata`]: https://github.com/LDeakin/rust_ome_zarr_metadata
+[`object_store`]: https://github.com/apache/arrow-rs/tree/main/object_store
+[`opendal`]: https://github.com/apache/OpenDAL
+[`icechunk`]: https://github.com/earth-mover/icechunk
+
+[Zarr]: https://zarr.dev
+[Zarr V3]: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html
+[Zarr V2]: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
+[OME-Zarr]: https://ngff.openmicroscopy.org/latest/
+
+[Department of Materials Physics, Australian National University, Canberra, Australia]: https://physics.anu.edu.au/research/mp/
diff --git a/zarrs/Cargo.toml b/zarrs/Cargo.toml
index d3c3168a..881b5493 100644
--- a/zarrs/Cargo.toml
+++ b/zarrs/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "zarrs"
-version = "0.19.0"
+version = "0.20.0-dev"
authors = ["Lachlan Deakin "]
edition = "2021"
rust-version = "1.82"
@@ -52,13 +52,13 @@ derive_more = { version = "1.0.0", features = ["deref", "display", "from"] }
flate2 = { version = "1.0.30", optional = true }
futures = { version = "0.3.29", optional = true }
gdeflate-sys = { version = "0.4.1", optional = true }
-half = { version = "2.0.0", features = ["bytemuck"] }
+half = { workspace = true }
inventory = "0.3.0"
itertools = "0.14.0"
-lru = "0.12.4"
+lru = "0.13.0"
moka = { version = "0.12.8", features = ["sync"] }
ndarray = { version = ">=0.15.0,<17", optional = true }
-num = { version = "0.4.1" }
+num = { workspace = true }
pco = { version = "0.4.0", optional = true }
rayon = "1.10.0"
rayon_iter_concurrent_limit = "0.2.0"
@@ -68,6 +68,7 @@ thiserror = "2.0.0"
thread_local = "1.1.8"
unsafe_cell_slice = "0.2.0"
zarrs_filesystem = { workspace = true, optional = true }
+zarrs_data_type = { workspace = true }
zarrs_metadata = { workspace = true }
zarrs_storage = { workspace = true }
zfp-sys = {version = "0.3.0", features = ["static"], optional = true }
diff --git a/zarrs/doc/ecosystem.md b/zarrs/doc/ecosystem.md
index a048289d..db528a37 100644
--- a/zarrs/doc/ecosystem.md
+++ b/zarrs/doc/ecosystem.md
@@ -1,57 +1,48 @@
-| Crate | Docs / Description |
-| --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| **Core** | |
-| [![zarrs_ver]](https://crates.io/crates/zarrs) [zarrs] | [![docs]](https://docs.rs/zarrs) The core library for manipulating Zarr hierarchies |
-| [![zarrs_metadata_ver]](https://crates.io/crates/zarrs_metadata) [zarrs_metadata] | [![docs]](https://docs.rs/zarrs_metadata) Zarr metadata support (re-exported as `zarrs::metadata`) |
-| [![zarrs_storage_ver]](https://crates.io/crates/zarrs_storage) [zarrs_storage] | [![docs]](https://docs.rs/zarrs_storage) The storage API for `zarrs` (re-exported as `zarrs::storage`) |
-| **Stores** | |
-| [![zarrs_filesystem_ver]](https://crates.io/crates/zarrs_filesystem) [zarrs_filesystem] | [![docs]](https://docs.rs/zarrs_filesystem) A filesystem store (re-exported as `zarrs::filesystem`) |
-| [![zarrs_object_store_ver]](https://crates.io/crates/zarrs_object_store) [zarrs_object_store] | [![docs]](https://docs.rs/zarrs_object_store) [`object_store`](https://docs.rs/object_store/latest/object_store/) store support |
-| [![zarrs_opendal_ver]](https://crates.io/crates/zarrs_opendal) [zarrs_opendal] | [![docs]](https://docs.rs/zarrs_opendal) [`opendal`](https://docs.rs/opendal/latest/opendal/) store support |
-| [![zarrs_http_ver]](https://crates.io/crates/zarrs_http) [zarrs_http] | [![docs]](https://docs.rs/zarrs_http) A synchronous http store |
-| [![zarrs_zip_ver]](https://crates.io/crates/zarrs_zip) [zarrs_zip] | [![docs]](https://docs.rs/zarrs_zip) A storage adapter for zip files |
-| [![zarrs_icechunk_ver]](https://crates.io/crates/zarrs_icechunk) [zarrs_icechunk] | [![docs]](https://docs.rs/zarrs_icechunk) [`icechunk`](https://docs.rs/icechunk/latest/icechunk/) store support |
-| **Bindings** | |
-| [![zarrs_python_ver]](https://pypi.org/project/zarrs/) [zarrs-python] | [![docs]](https://zarrs-python.readthedocs.io/en/latest/) A codec pipeline for [zarr-python] |
-| [![zarrs_ffi_ver]](https://crates.io/crates/zarrs_ffi) [zarrs_ffi] | [![docs]](https://docs.rs/zarrs_ffi) A subset of `zarrs` exposed as a C/C++ API |
-| **Zarr Metadata Conventions** | |
-| [![ome_zarr_metadata_ver]](https://crates.io/crates/ome_zarr_metadata) [ome_zarr_metadata] | [![docs]](https://docs.rs/ome_zarr_metadata) A library for OME-Zarr (previously OME-NGFF) metadata |
+#### Core
+- [`zarrs`]: The core library for manipulating Zarr hierarchies.
+- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`).
+- [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`).
+- [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`).
-[docs]: https://img.shields.io/badge/docs-brightgreen
-[zarrs_ver]: https://img.shields.io/crates/v/zarrs
-[zarrs]: https://github.com/LDeakin/zarrs/tree/main/zarrs
-[zarrs_metadata_ver]: https://img.shields.io/crates/v/zarrs_metadata
-[zarrs_metadata]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata
-[zarrs_storage_ver]: https://img.shields.io/crates/v/zarrs_storage
-[zarrs_storage]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage
-[zarrs_filesystem_ver]: https://img.shields.io/crates/v/zarrs_filesystem
-[zarrs_filesystem]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem
-[zarrs_http_ver]: https://img.shields.io/crates/v/zarrs_http
-[zarrs_http]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http
-[zarrs_object_store_ver]: https://img.shields.io/crates/v/zarrs_object_store
-[zarrs_object_store]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store
-[zarrs_opendal_ver]: https://img.shields.io/crates/v/zarrs_opendal
-[zarrs_opendal]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal
-[zarrs_zip_ver]: https://img.shields.io/crates/v/zarrs_zip
-[zarrs_zip]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip
-[zarrs_icechunk_ver]: https://img.shields.io/crates/v/zarrs_icechunk
-[zarrs_icechunk]: https://github.com/LDeakin/zarrs_icechunk
-[zarrs_ffi_ver]: https://img.shields.io/crates/v/zarrs_ffi
-[zarrs_ffi]: https://github.com/LDeakin/zarrs_ffi
-[zarrs_python_ver]: https://img.shields.io/pypi/v/zarrs
-[zarrs-python]: https://github.com/ilan-gold/zarrs-python
-[zarr-python]: https://github.com/zarr-developers/zarr-python
-[ome_zarr_metadata_ver]: https://img.shields.io/crates/v/ome_zarr_metadata
-[ome_zarr_metadata]: https://github.com/LDeakin/rust_ome_zarr_metadata
+#### Stores
+- [`zarrs_filesystem`]: A filesystem store (re-exported as `zarrs::filesystem`).
+- [`zarrs_object_store`]: [`object_store`] store support.
+- [`zarrs_opendal`]: [`opendal`] store support.
+- [`zarrs_http`]: A synchronous http store.
+- [`zarrs_zip`]: A storage adapter for zip files.
+- [`zarrs_icechunk`]: [`icechunk`] store support.
-#### [zarrs_tools]
-[![zarrs_tools_ver]](https://crates.io/crates/zarrs_tools) [![zarrs_tools_doc]](https://docs.rs/zarrs_tools)
+#### Bindings
+- [`zarrs-python`]: A high-performance codec pipeline for [`zarr-python`].
+- [`zarrs_ffi`]: A subset of `zarrs` exposed as a C/C++ API.
-[zarrs_tools]: https://github.com/LDeakin/zarrs_tools
-[zarrs_tools_ver]: https://img.shields.io/crates/v/zarrs_tools.svg
-[zarrs_tools_doc]: https://docs.rs/zarrs_tools/badge.svg
+#### Zarr Metadata Conventions
+- [`ome_zarr_metadata`]: A library for OME-Zarr (previously OME-NGFF) metadata.
+#### Tools
+- [`zarrs_tools`]: Various tools for creating and manipulating Zarr V3 data with the zarrs rust crate
- A reencoder that can change codecs, chunk shape, convert Zarr V2 to V3, etc.
- - Create an [OME-Zarr](https://ngff.openmicroscopy.org/latest/) hierarchy from a Zarr array.
+ - Create an [OME-Zarr] hierarchy from a Zarr array.
- Transform arrays: crop, rescale, downsample, gradient magnitude, gaussian, noise filtering, etc.
- Benchmarking tools and performance benchmarks of `zarrs`.
+
+[`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs
+[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type
+[`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata
+[`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage
+[`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem
+[`zarrs_http`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http
+[`zarrs_object_store`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store
+[`zarrs_opendal`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal
+[`zarrs_zip`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip
+[`zarrs_icechunk`]: https://github.com/LDeakin/zarrs_icechunk
+[`zarrs_ffi`]: https://github.com/LDeakin/zarrs_ffi
+[`zarrs-python`]: https://github.com/ilan-gold/zarrs-python
+[`zarr-python`]: https://github.com/zarr-developers/zarr-python
+[`zarrs_tools`]: https://github.com/LDeakin/zarrs_tools
+[`ome_zarr_metadata`]: https://github.com/LDeakin/rust_ome_zarr_metadata
+[`object_store`]: https://github.com/apache/arrow-rs/tree/main/object_store
+[`opendal`]: https://github.com/apache/OpenDAL
+[`icechunk`]: https://github.com/earth-mover/icechunk
+
+[OME-Zarr]: https://ngff.openmicroscopy.org/latest/
diff --git a/zarrs/doc/status/ZEPs.md b/zarrs/doc/status/ZEPs.md
deleted file mode 100644
index fd653605..00000000
--- a/zarrs/doc/status/ZEPs.md
+++ /dev/null
@@ -1,14 +0,0 @@
-| [Zarr Enhancement Proposal] | Status | Zarrs |
-| --------------------------------------- | -------------------------- | ------------ |
-| [ZEP0001]: Zarr specification version 3 | Accepted | Full support |
-| [ZEP0002]: Sharding codec | Accepted | Full support |
-| Draft [ZEP0003]: Variable chunking | [zarr-developers #52] | Full support |
-| Draft ZEP0007: Strings | [zarr-developers/zeps #47] | Prototype |
-
-[Zarr Enhancement Proposal]: https://zarr.dev/zeps/
-[ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html
-[ZEP0002]: https://zarr.dev/zeps/accepted/ZEP0002.html
-[ZEP0003]: https://zarr.dev/zeps/draft/ZEP0003.html
-
-[zarr-developers #52]: https://github.com/orgs/zarr-developers/discussions/52
-[zarr-developers/zeps #47]: https://github.com/zarr-developers/zeps/pull/47#issuecomment-1710505141
diff --git a/zarrs/doc/status/chunk_grids.md b/zarrs/doc/status/chunk_grids.md
index 3de5024e..ff033044 100644
--- a/zarrs/doc/status/chunk_grids.md
+++ b/zarrs/doc/status/chunk_grids.md
@@ -1,7 +1,7 @@
-| Chunk Grid | ZEP | V3 | V2 | Feature Flag |
-| ------------- | --------- | ------- | ------- | ------------ |
-| [regular] | [ZEP0001] | ✓ | ✓ | |
-| [rectangular] | [ZEP0003] | ✓ | | |
+| Chunk Grid | ZEP | V3 | V2 | Feature Flag |
+| ---------------------------- | ----------------- | ------- | ------- | ------------ |
+| [regular] | [ZEP0001] | ✓ | ✓ | |
+| [rectangular] (experimental) | [ZEP0003] (draft) | ✓ | | |
[regular]: crate::array::chunk_grid::RegularChunkGrid
[rectangular]: crate::array::chunk_grid::RectangularChunkGrid
diff --git a/zarrs/doc/status/data_types.md b/zarrs/doc/status/data_types.md
index af053bf6..2b4c1202 100644
--- a/zarrs/doc/status/data_types.md
+++ b/zarrs/doc/status/data_types.md
@@ -8,24 +8,24 @@
† Experimental data types are recommended for evaluation only.
-[bool]: crate::array::data_type::DataType::Bool
-[int8]: crate::array::data_type::DataType::Int8
-[int16]: crate::array::data_type::DataType::Int16
-[int32]: crate::array::data_type::DataType::Int32
-[int64]: crate::array::data_type::DataType::Int64
-[uint8]: crate::array::data_type::DataType::UInt8
-[uint16]: crate::array::data_type::DataType::UInt16
-[uint32]: crate::array::data_type::DataType::UInt32
-[uint64]: crate::array::data_type::DataType::UInt64
-[float16]: crate::array::data_type::DataType::Float16
-[float32]: crate::array::data_type::DataType::Float32
-[float64]: crate::array::data_type::DataType::Float64
-[complex64]: crate::array::data_type::DataType::Complex64
-[complex128]: crate::array::data_type::DataType::Complex128
-[bfloat16]: crate::array::data_type::DataType::BFloat16
-[r* (raw bits)]: crate::array::data_type::DataType::RawBits
-[string]: crate::array::data_type::DataType::String
-[bytes]: crate::array::data_type::DataType::Bytes
+[bool]: crate::data_type::DataType::Bool
+[int8]: crate::data_type::DataType::Int8
+[int16]: crate::data_type::DataType::Int16
+[int32]: crate::data_type::DataType::Int32
+[int64]: crate::data_type::DataType::Int64
+[uint8]: crate::data_type::DataType::UInt8
+[uint16]: crate::data_type::DataType::UInt16
+[uint32]: crate::data_type::DataType::UInt32
+[uint64]: crate::data_type::DataType::UInt64
+[float16]: crate::data_type::DataType::Float16
+[float32]: crate::data_type::DataType::Float32
+[float64]: crate::data_type::DataType::Float64
+[complex64]: crate::data_type::DataType::Complex64
+[complex128]: crate::data_type::DataType::Complex128
+[bfloat16]: crate::data_type::DataType::BFloat16
+[r* (raw bits)]: crate::data_type::DataType::RawBits
+[string]: crate::data_type::DataType::String
+[bytes]: crate::data_type::DataType::Bytes
[ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html
[zarr-specs #130]: https://github.com/zarr-developers/zarr-specs/issues/130
diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs
index e76d28db..f0ae3f96 100644
--- a/zarrs/src/array.rs
+++ b/zarrs/src/array.rs
@@ -23,6 +23,7 @@
mod array_builder;
mod array_bytes;
+mod array_bytes_fixed_disjoint_view;
mod array_errors;
mod array_metadata_options;
mod array_representation;
@@ -32,10 +33,9 @@ pub mod chunk_grid;
pub mod chunk_key_encoding;
pub mod codec;
pub mod concurrency;
-pub mod data_type;
mod element;
-mod fill_value;
pub mod storage_transformer;
+pub use crate::data_type; // re-export for zarrs < 0.20 compat
#[cfg(feature = "sharding")]
mod array_sharded_ext;
@@ -48,7 +48,10 @@ pub use self::{
array_builder::ArrayBuilder,
array_bytes::{
copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesError, RawBytes,
- RawBytesOffsets,
+ RawBytesOffsets, RawBytesOffsetsCreateError, RawBytesOffsetsOutOfBoundsError,
+ },
+ array_bytes_fixed_disjoint_view::{
+ ArrayBytesFixedDisjointView, ArrayBytesFixedDisjointViewCreateError,
},
array_errors::{ArrayCreateError, ArrayError},
array_metadata_options::ArrayMetadataOptions,
@@ -61,11 +64,11 @@ pub use self::{
codec::ArrayCodecTraits,
codec::CodecChain,
concurrency::RecommendedConcurrency,
- data_type::DataType,
element::{Element, ElementFixedLength, ElementOwned},
- fill_value::FillValue,
storage_transformer::StorageTransformerChain,
};
+pub use crate::data_type::{DataType, FillValue}; // re-export for zarrs < 0.20 compat
+
pub use crate::metadata::v2::ArrayMetadataV2;
use crate::metadata::v2_to_v3::ArrayMetadataV2ToV3ConversionError;
pub use crate::metadata::v3::{
@@ -603,7 +606,7 @@ impl Array {
// Codec metadata manipulation
match &mut metadata {
ArrayMetadata::V3(metadata) => {
- metadata.codecs = self.codecs().create_metadatas_opt(options);
+ metadata.codecs = self.codecs().create_metadatas_opt(options.codec_options());
}
ArrayMetadata::V2(_metadata) => {
// NOTE: The codec related options in ArrayMetadataOptions do not impact V2 codecs
@@ -910,10 +913,7 @@ pub fn elements_to_ndarray(
) -> Result, ArrayError> {
let length = elements.len();
ndarray::ArrayD::::from_shape_vec(iter_u64_to_usize(shape.iter()), elements).map_err(|_| {
- ArrayError::CodecError(codec::CodecError::UnexpectedChunkDecodedSize(
- length * std::mem::size_of::(),
- shape.iter().product::() * std::mem::size_of::() as u64,
- ))
+ ArrayError::CodecError(codec::InvalidArrayShapeError::new(shape.to_vec(), length).into())
})
}
@@ -926,7 +926,7 @@ pub fn bytes_to_ndarray(
shape: &[u64],
bytes: Vec,
) -> Result, ArrayError> {
- let expected_len = shape.iter().product::() * core::mem::size_of::() as u64;
+ let expected_len = shape.iter().product::() * size_of::() as u64;
if bytes.len() as u64 != expected_len {
return Err(ArrayError::InvalidBytesInputSize(bytes.len(), expected_len));
}
@@ -1102,12 +1102,12 @@ mod tests {
)
}
- #[ignore] // FIXME: Reported upstream https://github.com/zarr-developers/zarr-python/issues/2675
+ #[cfg(feature = "transpose")]
#[test]
fn array_v2_none_f() {
array_v2_to_v3(
"tests/data/v2/array_none_F.zarr",
- "tests/data/v3/array_none_tranpose.zarr",
+ "tests/data/v3/array_none_transpose.zarr",
)
}
@@ -1122,7 +1122,6 @@ mod tests {
}
#[cfg(feature = "blosc")]
- #[ignore] // FIXME: Reported upstream https://github.com/zarr-developers/zarr-python/issues/2675
#[test]
#[cfg_attr(miri, ignore)]
fn array_v2_blosc_f() {
diff --git a/zarrs/src/array/array_async_readable.rs b/zarrs/src/array/array_async_readable.rs
index 79ca0f72..f418968e 100644
--- a/zarrs/src/array/array_async_readable.rs
+++ b/zarrs/src/array/array_async_readable.rs
@@ -18,8 +18,8 @@ use super::{
},
concurrency::concurrency_chunks_and_codec,
element::ElementOwned,
- Array, ArrayBytes, ArrayCreateError, ArrayError, ArrayMetadata, ArrayMetadataV2,
- ArrayMetadataV3, ArraySize, DataTypeSize,
+ Array, ArrayBytes, ArrayBytesFixedDisjointView, ArrayCreateError, ArrayError, ArrayMetadata,
+ ArrayMetadataV2, ArrayMetadataV3, ArraySize, DataTypeSize,
};
#[cfg(feature = "ndarray")]
@@ -335,12 +335,10 @@ impl Array {
}
/// Async variant of [`retrieve_chunk_into`](Array::retrieve_chunk_into).
- async unsafe fn async_retrieve_chunk_into(
+ async fn async_retrieve_chunk_into(
&self,
chunk_indices: &[u64],
- output: &UnsafeCellSlice<'_, u8>,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), ArrayError> {
if chunk_indices.len() != self.dimensionality() {
@@ -360,29 +358,17 @@ impl Array {
if let Some(chunk_encoded) = chunk_encoded {
let chunk_encoded: Vec = chunk_encoded.into();
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- unsafe {
- self.codecs()
- .decode_into(
- Cow::Owned(chunk_encoded),
- &chunk_representation,
- output,
- output_shape,
- output_subset,
- options,
- )
- .map_err(ArrayError::CodecError)
- }
- } else {
- unsafe {
- copy_fill_value_into(
- self.data_type(),
- self.fill_value(),
- output,
- output_shape,
- output_subset,
+ self.codecs()
+ .decode_into(
+ Cow::Owned(chunk_encoded),
+ &chunk_representation,
+ output_view,
+ options,
)
.map_err(ArrayError::CodecError)
- }
+ } else {
+ copy_fill_value_into(self.data_type(), self.fill_value(), output_view)
+ .map_err(ArrayError::CodecError)
}
}
@@ -650,19 +636,25 @@ impl Array {
let chunk_subset = self.chunk_subset(&chunk_indices)?;
let chunk_subset_overlap =
chunk_subset.overlap(array_subset)?;
- unsafe {
- self.async_retrieve_chunk_subset_into(
- &chunk_indices,
- &chunk_subset_overlap
- .relative_to(chunk_subset.start())?,
- &output,
+
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ output,
+ data_type_size,
array_subset.shape(),
- &chunk_subset_overlap
- .relative_to(array_subset.start())?,
- &options,
+ chunk_subset_overlap
+ .relative_to(array_subset.start())
+ .unwrap(),
)
- .await?;
- }
+ };
+ self.async_retrieve_chunk_subset_into(
+ &chunk_indices,
+ &chunk_subset_overlap.relative_to(chunk_subset.start())?,
+ &mut output_view,
+ &options,
+ )
+ .await?;
// let chunk_subset_bytes = self
// .async_retrieve_chunk_subset_opt(
// &chunk_indices,
@@ -737,7 +729,7 @@ impl Array {
options: &CodecOptions,
) -> Result, ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- if !chunk_subset.inbounds(&chunk_representation.shape_u64()) {
+ if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
@@ -773,17 +765,15 @@ impl Array {
Ok(bytes)
}
- async unsafe fn async_retrieve_chunk_subset_into(
+ async fn async_retrieve_chunk_subset_into(
&self,
chunk_indices: &[u64],
chunk_subset: &ArraySubset,
- output: &UnsafeCellSlice<'_, u8>,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- if !chunk_subset.inbounds(&chunk_representation.shape_u64()) {
+ if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
@@ -794,16 +784,8 @@ impl Array {
&& chunk_subset.shape() == chunk_representation.shape_u64()
{
// Fast path if `chunk_subset` encompasses the whole chunk
- unsafe {
- self.async_retrieve_chunk_into(
- chunk_indices,
- output,
- output_shape,
- output_subset,
- options,
- )
+ self.async_retrieve_chunk_into(chunk_indices, output_view, options)
.await
- }
} else {
let storage_handle = Arc::new(StorageHandle::new(self.storage.clone()));
let storage_transformer = self
@@ -815,14 +797,12 @@ impl Array {
self.chunk_key(chunk_indices),
));
- unsafe {
- self.codecs
- .clone()
- .async_partial_decoder(input_handle, &chunk_representation, options)
- .await?
- .partial_decode_into(chunk_subset, output, output_shape, output_subset, options)
- .await?;
- }
+ self.codecs
+ .clone()
+ .async_partial_decoder(input_handle, &chunk_representation, options)
+ .await?
+ .partial_decode_into(chunk_subset, output_view, options)
+ .await?;
Ok(())
}
}
diff --git a/zarrs/src/array/array_async_readable_writable.rs b/zarrs/src/array/array_async_readable_writable.rs
index 4a958ede..74a3ceaa 100644
--- a/zarrs/src/array/array_async_readable_writable.rs
+++ b/zarrs/src/array/array_async_readable_writable.rs
@@ -158,15 +158,13 @@ impl Array &mut Self {
self.additional_fields = additional_fields;
self
diff --git a/zarrs/src/array/array_bytes.rs b/zarrs/src/array/array_bytes.rs
index 7357134b..227b8cd0 100644
--- a/zarrs/src/array/array_bytes.rs
+++ b/zarrs/src/array/array_bytes.rs
@@ -1,5 +1,6 @@
use std::borrow::Cow;
+use derive_more::derive::Display;
use itertools::Itertools;
use thiserror::Error;
use unsafe_cell_slice::UnsafeCellSlice;
@@ -9,25 +10,46 @@ use crate::{
indexer::IncompatibleIndexerAndShapeError, metadata::v3::array::data_type::DataTypeSize,
};
-use super::{codec::CodecError, ravel_indices, ArraySize, DataType, FillValue};
+use super::{
+ codec::{CodecError, InvalidBytesLengthError},
+ ravel_indices, ArrayBytesFixedDisjointView, ArraySize, DataType, FillValue,
+};
+
+mod raw_bytes_offsets;
+pub use raw_bytes_offsets::{RawBytesOffsets, RawBytesOffsetsCreateError};
/// Array element bytes.
+///
+/// These can represent:
+/// - [`ArrayBytes::Fixed`]: fixed length elements of an array in C-contiguous order,
+/// - [`ArrayBytes::Variable`]: variable length elements of an array in C-contiguous order with padding permitted,
+/// - Encoded array bytes after an array to bytes or bytes to bytes codecs.
pub type RawBytes<'a> = Cow<'a, [u8]>;
-/// Array element byte offsets.
-pub type RawBytesOffsets<'a> = Cow<'a, [usize]>;
-
/// Fixed or variable length array bytes.
-///
-/// Offsets are [`None`] if bytes are composed of fixed size data types.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ArrayBytes<'a> {
/// Bytes for a fixed length array.
+ ///
+ /// These represent elements in C-contiguous order (i.e. row-major order) where the last dimension varies the fastest.
Fixed(RawBytes<'a>),
/// Bytes and element byte offsets for a variable length array.
+ ///
+ /// The bytes and offsets are modeled on the [Apache Arrow Variable-size Binary Layout](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout).
+ /// - The offsets buffer contains length + 1 ~~signed integers (either 32-bit or 64-bit, depending on the data type)~~ usize integers.
+ /// - Offsets must be monotonically increasing, that is `offsets[j+1] >= offsets[j]` for `0 <= j < length`, even for null slots. Thus, the bytes represent C-contiguous elements with padding permitted.
+ /// - The final offset must be less than or equal to the length of the bytes buffer.
Variable(RawBytes<'a>, RawBytesOffsets<'a>),
}
+/// An error raised if variable length array bytes offsets are out of bounds.
+#[derive(Debug, Error, Display)]
+#[display("Offset {offset} is out of bounds for bytes of length {len}")]
+pub struct RawBytesOffsetsOutOfBoundsError {
+ offset: usize,
+ len: usize,
+}
+
/// Errors related to [`ArrayBytes<'_>`] and [`ArrayBytes`].
#[derive(Debug, Error)]
pub enum ArrayBytesError {
@@ -38,16 +60,42 @@ pub enum ArrayBytesError {
impl<'a> ArrayBytes<'a> {
/// Create a new fixed length array bytes from `bytes`.
+ ///
+ /// `bytes` must be C-contiguous.
pub fn new_flen(bytes: impl Into>) -> Self {
Self::Fixed(bytes.into())
}
/// Create a new variable length array bytes from `bytes` and `offsets`.
+ ///
+ /// # Errors
+ /// Returns a [`RawBytesOffsetsOutOfBoundsError`] if the last offset is out of bounds of the bytes.
pub fn new_vlen(
bytes: impl Into>,
- offsets: impl Into>,
+ offsets: RawBytesOffsets<'a>,
+ ) -> Result {
+ let bytes = bytes.into();
+ if offsets.last() <= bytes.len() {
+ Ok(Self::Variable(bytes, offsets))
+ } else {
+ Err(RawBytesOffsetsOutOfBoundsError {
+ offset: offsets.last(),
+ len: bytes.len(),
+ })
+ }
+ }
+
+ /// Create a new variable length array bytes from `bytes` and `offsets` without checking the offsets.
+ ///
+ /// # Safety
+ /// The last offset must be less than or equal to the length of the bytes.
+ pub unsafe fn new_vlen_unchecked(
+ bytes: impl Into>,
+ offsets: RawBytesOffsets<'a>,
) -> Self {
- Self::Variable(bytes.into(), offsets.into())
+ let bytes = bytes.into();
+ debug_assert!(offsets.last() <= bytes.len());
+ Self::Variable(bytes, offsets)
}
/// Create a new [`ArrayBytes`] with `num_elements` composed entirely of the `fill_value`.
@@ -66,12 +114,18 @@ impl<'a> ArrayBytes<'a> {
}
ArraySize::Variable { num_elements } => {
let num_elements = usize::try_from(num_elements).unwrap();
- Self::new_vlen(
- fill_value.as_ne_bytes().repeat(num_elements),
- (0..=num_elements)
- .map(|i| i * fill_value.size())
- .collect::>(),
- )
+ let offsets = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(
+ (0..=num_elements)
+ .map(|i| i * fill_value.size())
+ .collect::>(),
+ )
+ };
+ unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ Self::new_vlen_unchecked(fill_value.as_ne_bytes().repeat(num_elements), offsets)
+ }
}
}
}
@@ -121,9 +175,9 @@ impl<'a> ArrayBytes<'a> {
#[must_use]
pub fn into_owned<'b>(self) -> ArrayBytes<'b> {
match self {
- Self::Fixed(bytes) => ArrayBytes::<'b>::new_flen(bytes.into_owned()),
+ Self::Fixed(bytes) => ArrayBytes::<'b>::Fixed(bytes.into_owned().into()),
Self::Variable(bytes, offsets) => {
- ArrayBytes::<'b>::new_vlen(bytes.into_owned(), offsets.into_owned())
+ ArrayBytes::<'b>::Variable(bytes.into_owned().into(), offsets.into_owned())
}
}
}
@@ -188,7 +242,15 @@ impl<'a> ArrayBytes<'a> {
ss_bytes.extend_from_slice(&bytes[curr..next]);
}
ss_offsets.push(ss_bytes.len());
- Ok(ArrayBytes::new_vlen(ss_bytes, ss_offsets))
+ let ss_offsets = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(ss_offsets)
+ };
+ let array_bytes = unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ ArrayBytes::new_vlen_unchecked(ss_bytes, ss_offsets)
+ };
+ Ok(array_bytes)
}
ArrayBytes::Fixed(bytes) => {
let byte_ranges =
@@ -201,14 +263,11 @@ impl<'a> ArrayBytes<'a> {
}
/// Validate fixed length array bytes for a given array size.
-fn validate_bytes_flen(bytes: &RawBytes, array_size: u64) -> Result<(), CodecError> {
- if bytes.len() as u64 == array_size {
+fn validate_bytes_flen(bytes: &RawBytes, array_size: usize) -> Result<(), InvalidBytesLengthError> {
+ if bytes.len() == array_size {
Ok(())
} else {
- Err(CodecError::UnexpectedChunkDecodedSize(
- bytes.len(),
- array_size,
- ))
+ Err(InvalidBytesLengthError::new(bytes.len(), array_size))
}
}
@@ -243,9 +302,10 @@ fn validate_bytes(
data_type_size: DataTypeSize,
) -> Result<(), CodecError> {
match (bytes, data_type_size) {
- (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => {
- validate_bytes_flen(bytes, num_elements * data_type_size as u64)
- }
+ (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => Ok(validate_bytes_flen(
+ bytes,
+ usize::try_from(num_elements * data_type_size as u64).unwrap(),
+ )?),
(ArrayBytes::Variable(bytes, offsets), DataTypeSize::Variable) => {
validate_bytes_vlen(bytes, offsets, num_elements)
}
@@ -258,157 +318,126 @@ fn validate_bytes(
}
}
-/// This function is used internally by various array/codec methods to write the bytes of a chunk subset into an output with an associated array subset.
-/// This approach only works for fixed length data types.
-pub(crate) fn update_bytes_flen(
- output_bytes: &UnsafeCellSlice,
- output_shape: &[u64],
- subset_bytes: &RawBytes,
- subset: &ArraySubset,
- data_type_size: usize,
-) {
- debug_assert_eq!(
- output_bytes.len(),
- usize::try_from(output_shape.iter().product::()).unwrap() * data_type_size
- );
- debug_assert_eq!(
- subset_bytes.len(),
- subset.num_elements_usize() * data_type_size,
- );
-
- let contiguous_indices =
- unsafe { subset.contiguous_linearised_indices_unchecked(output_shape) };
- let length = contiguous_indices.contiguous_elements_usize() * data_type_size;
- let mut decoded_offset = 0;
- // TODO: Par iteration?
- for array_subset_element_index in &contiguous_indices {
- let output_offset = usize::try_from(array_subset_element_index).unwrap() * data_type_size;
- debug_assert!((output_offset + length) <= output_bytes.len());
- debug_assert!((decoded_offset + length) <= subset_bytes.len());
- unsafe {
- output_bytes
- .index_mut(output_offset..output_offset + length)
- .copy_from_slice(&subset_bytes[decoded_offset..decoded_offset + length]);
- }
- decoded_offset += length;
+pub(crate) fn update_bytes_vlen<'a>(
+ input_bytes: &RawBytes,
+ input_offsets: &RawBytesOffsets,
+ input_shape: &[u64],
+ update_bytes: &RawBytes,
+ update_offsets: &RawBytesOffsets,
+ update_subset: &ArraySubset,
+) -> Result, IncompatibleArraySubsetAndShapeError> {
+ if !update_subset.inbounds_shape(input_shape) {
+ return Err(IncompatibleArraySubsetAndShapeError::new(
+ update_subset.clone(),
+ input_shape.to_vec(),
+ ));
}
-}
-pub(crate) fn update_bytes_vlen<'a>(
- output_bytes: &RawBytes,
- output_offsets: &RawBytesOffsets,
- output_shape: &[u64],
- subset_bytes: &RawBytes,
- subset_offsets: &RawBytesOffsets,
- subset: &ArraySubset,
-) -> ArrayBytes<'a> {
// Get the current and new length of the bytes in the chunk subset
- let size_subset_new = {
- let chunk_subset_indices = ArraySubset::new_with_shape(subset.shape().to_vec())
- .linearised_indices(subset.shape())
- .unwrap();
- chunk_subset_indices
- .iter()
- .map(|index| {
- let index = usize::try_from(index).unwrap();
- subset_offsets[index + 1] - subset_offsets[index]
- })
- .sum::()
- };
+ let size_subset_new = update_offsets
+ .iter()
+ .tuple_windows()
+ .map(|(curr, next)| next - curr)
+ .sum::();
let size_subset_old = {
- let chunk_indices = subset.linearised_indices(output_shape).unwrap();
+ let chunk_indices = update_subset.linearised_indices(input_shape).unwrap();
chunk_indices
.iter()
.map(|index| {
let index = usize::try_from(index).unwrap();
- output_offsets[index + 1] - output_offsets[index]
+ input_offsets[index + 1] - input_offsets[index]
})
.sum::()
};
// Populate new offsets and bytes
- let mut offsets_new = Vec::with_capacity(output_offsets.len());
- let bytes_new_len = (output_bytes.len() + size_subset_new)
+ let mut offsets_new = Vec::with_capacity(input_offsets.len());
+ let bytes_new_len = (input_bytes.len() + size_subset_new)
.checked_sub(size_subset_old)
.unwrap();
let mut bytes_new = Vec::with_capacity(bytes_new_len);
- let indices = ArraySubset::new_with_shape(output_shape.to_vec()).indices();
+ let indices = ArraySubset::new_with_shape(input_shape.to_vec()).indices();
for (chunk_index, indices) in indices.iter().enumerate() {
offsets_new.push(bytes_new.len());
- if subset.contains(&indices) {
+ if update_subset.contains(&indices) {
let subset_indices = indices
.iter()
- .zip(subset.start())
+ .zip(update_subset.start())
.map(|(i, s)| i - s)
.collect::>();
let subset_index =
- usize::try_from(ravel_indices(&subset_indices, subset.shape())).unwrap();
- let start = subset_offsets[subset_index];
- let end = subset_offsets[subset_index + 1];
- bytes_new.extend_from_slice(&subset_bytes[start..end]);
+ usize::try_from(ravel_indices(&subset_indices, update_subset.shape())).unwrap();
+ let start = update_offsets[subset_index];
+ let end = update_offsets[subset_index + 1];
+ bytes_new.extend_from_slice(&update_bytes[start..end]);
} else {
- let start = output_offsets[chunk_index];
- let end = output_offsets[chunk_index + 1];
- bytes_new.extend_from_slice(&output_bytes[start..end]);
+ let start = input_offsets[chunk_index];
+ let end = input_offsets[chunk_index + 1];
+ bytes_new.extend_from_slice(&input_bytes[start..end]);
}
}
offsets_new.push(bytes_new.len());
-
- ArrayBytes::new_vlen(bytes_new, offsets_new)
+ let offsets_new = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(offsets_new)
+ };
+ let array_bytes = unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ ArrayBytes::new_vlen_unchecked(bytes_new, offsets_new)
+ };
+ Ok(array_bytes)
}
/// Update a subset of an array.
///
/// This function is used internally by [`crate::array::Array::store_chunk_subset_opt`] and [`crate::array::Array::async_store_chunk_subset_opt`].
///
-/// # Safety
-/// The caller must ensure that:
-/// - `output_bytes` is an array with `output_shape` and `data_type_size`,
-/// - `output_subset_bytes` is an array with the shape of `output_subset` and `data_type_size`,
-/// - `output_subset` is within the bounds of `output_shape`, and
-/// - `output_bytes` and `output_subset_bytes` are compatible (e.g. both fixed or both variable sized).
-#[must_use]
-pub unsafe fn update_array_bytes<'a>(
+/// # Errors
+/// Returns a [`CodecError`] if
+/// - `output_bytes` are not compatible with the `output_shape` and `data_type_size`,
+/// - `output_subset_bytes` are not compatible with the `output_subset` and `data_type_size`,
+/// - `output_subset` is not within the bounds of `output_shape`
+pub fn update_array_bytes<'a>(
output_bytes: ArrayBytes,
output_shape: &[u64],
output_subset: &ArraySubset,
output_subset_bytes: &ArrayBytes,
data_type_size: DataTypeSize,
-) -> ArrayBytes<'a> {
+) -> Result, CodecError> {
match (output_bytes, output_subset_bytes, data_type_size) {
(
ArrayBytes::Variable(chunk_bytes, chunk_offsets),
ArrayBytes::Variable(chunk_subset_bytes, chunk_subset_offsets),
DataTypeSize::Variable,
- ) => update_bytes_vlen(
+ ) => Ok(update_bytes_vlen(
&chunk_bytes,
&chunk_offsets,
output_shape,
chunk_subset_bytes,
chunk_subset_offsets,
output_subset,
- ),
+ )?),
(
ArrayBytes::Fixed(chunk_bytes),
ArrayBytes::Fixed(chunk_subset_bytes),
DataTypeSize::Fixed(data_type_size),
) => {
let mut chunk_bytes = chunk_bytes.into_owned();
- {
- let chunk_bytes = UnsafeCellSlice::new(&mut chunk_bytes);
- update_bytes_flen(
- &chunk_bytes,
- output_shape,
- chunk_subset_bytes,
- output_subset,
+ let mut output_view = unsafe {
+ // SAFETY: Only one view is created, so it is disjoint
+ ArrayBytesFixedDisjointView::new(
+ UnsafeCellSlice::new(&mut chunk_bytes),
data_type_size,
- );
+ output_shape,
+ output_subset.clone(),
+ )
}
- ArrayBytes::new_flen(chunk_bytes)
- }
- (_, _, _) => {
- unreachable!("Validation should occur outside of this function")
+ .map_err(CodecError::from)?;
+ output_view.copy_from_slice(chunk_subset_bytes)?;
+ Ok(ArrayBytes::new_flen(chunk_bytes))
}
+ (_, _, DataTypeSize::Variable) => Err(CodecError::ExpectedVariableLengthBytes),
+ (_, _, DataTypeSize::Fixed(_)) => Err(CodecError::ExpectedFixedLengthBytes),
}
}
@@ -459,10 +488,14 @@ pub(crate) fn merge_chunks_vlen<'a>(
*acc += sz;
Some(*acc)
}));
+ let offsets = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(offsets)
+ };
// Write bytes
// TODO: Go parallel
- let mut bytes = vec![0; *offsets.last().unwrap()];
+ let mut bytes = vec![0; offsets.last()];
for (chunk_bytes, chunk_subset) in chunk_bytes_and_subsets {
let (chunk_bytes, chunk_offsets) = chunk_bytes.into_variable()?;
let indices = chunk_subset.linearised_indices(array_shape).unwrap();
@@ -476,7 +509,12 @@ pub(crate) fn merge_chunks_vlen<'a>(
}
}
- Ok(ArrayBytes::new_vlen(bytes, offsets))
+ let array_bytes = unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ ArrayBytes::new_vlen_unchecked(bytes, offsets)
+ };
+
+ Ok(array_bytes)
}
pub(crate) fn extract_decoded_regions_vlen<'a>(
@@ -506,7 +544,15 @@ pub(crate) fn extract_decoded_regions_vlen<'a>(
region_bytes.extend_from_slice(&bytes[curr..next]);
}
region_offsets.push(region_bytes.len());
- out.push(ArrayBytes::new_vlen(region_bytes, region_offsets));
+ let region_offsets = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(region_offsets)
+ };
+ let array_bytes = unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ ArrayBytes::new_vlen_unchecked(region_bytes, region_offsets)
+ };
+ out.push(array_bytes);
}
Ok(out)
}
@@ -524,25 +570,15 @@ pub(crate) fn extract_decoded_regions_vlen<'a>(
/// - `data_type` and `fill_value` are compatible,
/// - `output` holds enough space for the preallocated bytes of an array with `output_shape` and `data_type`, and
/// - `output_subset` is within the bounds of `output_shape`.
-pub unsafe fn copy_fill_value_into(
+pub fn copy_fill_value_into(
data_type: &DataType,
fill_value: &FillValue,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView,
) -> Result<(), CodecError> {
- let array_size = ArraySize::new(data_type.size(), output_subset.num_elements());
- if let (ArrayBytes::Fixed(fill_value_bytes), Some(data_type_size)) = (
- ArrayBytes::new_fill_value(array_size, fill_value),
- data_type.fixed_size(),
- ) {
- update_bytes_flen(
- output,
- output_shape,
- &fill_value_bytes,
- output_subset,
- data_type_size,
- );
+ let array_size = ArraySize::new(data_type.size(), output_view.num_elements());
+ if let ArrayBytes::Fixed(fill_value_bytes) = ArrayBytes::new_fill_value(array_size, fill_value)
+ {
+ output_view.copy_from_slice(&fill_value_bytes)?;
Ok(())
} else {
// TODO: Variable length data type support?
@@ -602,7 +638,6 @@ impl<'a, const N: usize> From<&'a [u8; N]> for ArrayBytes<'a> {
#[cfg(test)]
mod tests {
use std::error::Error;
- use std::mem::size_of;
use crate::array::Element;
@@ -620,6 +655,17 @@ mod tests {
Ok(())
}
+ #[test]
+ fn array_bytes_vlen() {
+ let data = [0u8, 1, 2, 3, 4];
+ assert!(ArrayBytes::new_vlen(&data, vec![0].try_into().unwrap()).is_ok());
+ assert!(ArrayBytes::new_vlen(&data, vec![0, 5].try_into().unwrap()).is_ok());
+ assert!(ArrayBytes::new_vlen(&data, vec![0, 5, 5].try_into().unwrap()).is_ok());
+ assert!(ArrayBytes::new_vlen(&data, vec![0, 5, 6].try_into().unwrap()).is_err());
+ assert!(ArrayBytes::new_vlen(&data, vec![0, 1, 3, 5].try_into().unwrap()).is_ok());
+ assert!(ArrayBytes::new_vlen(&data, vec![0, 1, 3, 6].try_into().unwrap()).is_err());
+ }
+
#[test]
fn array_bytes_str() -> Result<(), Box> {
let data = ["a", "bb", "ccc"];
@@ -638,21 +684,27 @@ mod tests {
let mut bytes_array = vec![0u8; 4 * 4];
{
let bytes_array = UnsafeCellSlice::new(&mut bytes_array);
- update_bytes_flen(
- &bytes_array,
- &vec![4, 4],
- &vec![1u8, 2].into(),
- &ArraySubset::new_with_ranges(&[1..2, 1..3]),
- 1,
- );
-
- update_bytes_flen(
- &bytes_array,
- &vec![4, 4],
- &vec![3u8, 4].into(),
- &ArraySubset::new_with_ranges(&[3..4, 0..2]),
- 1,
- );
+ let mut output_non_overlapping_0 = unsafe {
+ // SAFETY: Only one view is created, so it is disjoint
+ ArrayBytesFixedDisjointView::new_unchecked(
+ bytes_array,
+ size_of::(),
+ &[4, 4],
+ ArraySubset::new_with_ranges(&[1..2, 1..3]),
+ )
+ };
+ output_non_overlapping_0.copy_from_slice(&[1u8, 2]).unwrap();
+
+ let mut output_non_overlapping_1 = unsafe {
+ // SAFETY: Only one view is created, so it is disjoint
+ ArrayBytesFixedDisjointView::new_unchecked(
+ bytes_array,
+ size_of::(),
+ &[4, 4],
+ ArraySubset::new_with_ranges(&[3..4, 0..2]),
+ )
+ };
+ output_non_overlapping_1.copy_from_slice(&[3u8, 4]).unwrap();
}
debug_assert_eq!(
diff --git a/zarrs/src/array/array_bytes/raw_bytes_offsets.rs b/zarrs/src/array/array_bytes/raw_bytes_offsets.rs
new file mode 100644
index 00000000..3de578f3
--- /dev/null
+++ b/zarrs/src/array/array_bytes/raw_bytes_offsets.rs
@@ -0,0 +1,128 @@
+use std::{borrow::Cow, ops::Deref};
+
+use derive_more::derive::Display;
+use thiserror::Error;
+
+/// Array element byte offsets.
+///
+/// These must be monotonically increasing. See [`ArrayBytes::Variable`](crate::array::ArrayBytes::Variable).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct RawBytesOffsets<'a>(Cow<'a, [usize]>);
+
+impl Deref for RawBytesOffsets<'_> {
+ type Target = [usize];
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+/// An error creating [`RawBytesOffsets`].
+#[derive(Debug, Error, Display)]
+pub enum RawBytesOffsetsCreateError {
+ /// The offsets length must be greater than zero.
+ #[display("offsets length must be greater than zero")]
+ ZeroLength,
+ /// The offsets are not monotonically increasing.
+ #[display("offsets are not monotonically increasing")]
+ NotMonotonicallyIncreasing,
+}
+
+impl<'a> RawBytesOffsets<'a> {
+ /// Creates a new `RawBytesOffsets`.
+ ///
+ /// # Errors
+ /// Returns an error if the offsets are not monotonically increasing.
+ pub fn new(offsets: impl Into>) -> Result {
+ let offsets = offsets.into();
+ if offsets.is_empty() {
+ Err(RawBytesOffsetsCreateError::ZeroLength)
+ } else if offsets.windows(2).all(|w| w[1] >= w[0]) {
+ Ok(Self(offsets))
+ } else {
+ Err(RawBytesOffsetsCreateError::NotMonotonicallyIncreasing)
+ }
+ }
+
+ /// Creates a new `RawBytesOffsets` without checking the offsets.
+ ///
+ /// # Safety
+ /// The offsets must be monotonically increasing.
+ #[must_use]
+ pub unsafe fn new_unchecked(offsets: impl Into>) -> Self {
+ let offsets = offsets.into();
+ debug_assert!(!offsets.is_empty());
+ debug_assert!(offsets.windows(2).all(|w| w[1] >= w[0]));
+ Self(offsets)
+ }
+
+ /// Clones the offsets if not already owned.
+ #[must_use]
+ pub fn into_owned(self) -> RawBytesOffsets<'static> {
+ RawBytesOffsets(self.0.into_owned().into())
+ }
+
+ /// Returns the last offset.
+ #[must_use]
+ pub fn last(&self) -> usize {
+ unsafe {
+ // SAFETY: The offsets cannot be empty.
+ *self.0.last().unwrap_unchecked()
+ }
+ }
+}
+
+impl<'a> TryFrom> for RawBytesOffsets<'a> {
+ type Error = RawBytesOffsetsCreateError;
+
+ fn try_from(value: Cow<'a, [usize]>) -> Result {
+ Self::new(value)
+ }
+}
+
+impl<'a> TryFrom<&'a [usize]> for RawBytesOffsets<'a> {
+ type Error = RawBytesOffsetsCreateError;
+
+ fn try_from(value: &'a [usize]) -> Result {
+ Self::new(value)
+ }
+}
+
+impl<'a, const N: usize> TryFrom<&'a [usize; N]> for RawBytesOffsets<'a> {
+ type Error = RawBytesOffsetsCreateError;
+
+ fn try_from(value: &'a [usize; N]) -> Result {
+ Self::new(value)
+ }
+}
+
+impl TryFrom> for RawBytesOffsets<'_> {
+ type Error = RawBytesOffsetsCreateError;
+
+ fn try_from(value: Vec) -> Result {
+ Self::new(value)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn raw_bytes_offsets() {
+ let offsets = RawBytesOffsets::new(vec![0, 1, 2, 3]).unwrap();
+ assert_eq!(&*offsets, &[0, 1, 2, 3]);
+ assert!(RawBytesOffsets::new(vec![]).is_err());
+ assert!(RawBytesOffsets::new(vec![0]).is_ok());
+ assert!(RawBytesOffsets::new(vec![10]).is_ok()); // nonsense, but not invalid
+ assert!(RawBytesOffsets::new(vec![0, 1, 1]).is_ok());
+ assert!(RawBytesOffsets::new(vec![0, 1, 0]).is_err());
+ assert!(RawBytesOffsets::try_from(vec![0, 1, 2]).is_ok());
+ assert!(RawBytesOffsets::try_from(vec![0, 1, 0]).is_err());
+ assert!(RawBytesOffsets::try_from([0, 1, 2].as_slice()).is_ok());
+ assert!(RawBytesOffsets::try_from([0, 1, 0].as_slice()).is_err());
+ assert!(RawBytesOffsets::try_from(&[0, 1, 2]).is_ok());
+ assert!(RawBytesOffsets::try_from(&[0, 1, 0]).is_err());
+ assert!(RawBytesOffsets::try_from(Cow::Owned(vec![0, 1, 0])).is_err());
+ }
+}
diff --git a/zarrs/src/array/array_bytes_fixed_disjoint_view.rs b/zarrs/src/array/array_bytes_fixed_disjoint_view.rs
new file mode 100644
index 00000000..7e5caf8c
--- /dev/null
+++ b/zarrs/src/array/array_bytes_fixed_disjoint_view.rs
@@ -0,0 +1,354 @@
+use derive_more::derive::Display;
+use thiserror::Error;
+use unsafe_cell_slice::UnsafeCellSlice;
+
+use crate::array_subset::{
+ iterators::{ContiguousIndices, ContiguousLinearisedIndices},
+ ArraySubset,
+};
+
+use super::codec::{CodecError, InvalidBytesLengthError, SubsetOutOfBoundsError};
+
+/// A disjoint view of the bytes in an array with a fixed-length data type.
+///
+/// The `subset` represented by this view must not overlap with the `subset` of any other created views that reference the same array bytes.
+pub struct ArrayBytesFixedDisjointView<'a> {
+ bytes: UnsafeCellSlice<'a, u8>,
+ data_type_size: usize,
+ shape: &'a [u64],
+ subset: ArraySubset,
+ bytes_in_subset_len: usize,
+}
+
+/// Errors that can occur when creating a [`ArrayBytesFixedDisjointView`].
+#[derive(Debug, Error, Display)]
+pub enum ArrayBytesFixedDisjointViewCreateError {
+ /// The subset is out-of-bounds of the array shape.
+ SubsetOutOfBounds(#[from] SubsetOutOfBoundsError),
+ /// The length of the bytes is not the correct length.
+ InvalidBytesLength(#[from] InvalidBytesLengthError),
+}
+
+impl From for CodecError {
+ fn from(value: ArrayBytesFixedDisjointViewCreateError) -> Self {
+ match value {
+ ArrayBytesFixedDisjointViewCreateError::SubsetOutOfBounds(e) => e.into(),
+ ArrayBytesFixedDisjointViewCreateError::InvalidBytesLength(e) => e.into(),
+ }
+ }
+}
+
+impl<'a> ArrayBytesFixedDisjointView<'a> {
+ /// Create a new non-overlapping view of the bytes in an array.
+ ///
+ /// # Errors
+ /// Returns [`ArrayBytesFixedDisjointViewCreateError`] if
+ /// - `subset` is out-of-bounds of `shape`, or
+ /// - the length of `bytes` is not the product of the elements in `shape` multiplied by `data_type_size`.
+ ///
+ /// # Safety
+ /// The `subset` represented by this view must not overlap with the `subset` of any other created views that reference the same array bytes.
+ ///
+ /// # Panics
+ /// Panics if the product of the elements in `shape` multiplied by `data_type_size` exceeds [`usize::MAX`].
+ pub unsafe fn new(
+ bytes: UnsafeCellSlice<'a, u8>,
+ data_type_size: usize,
+ shape: &'a [u64],
+ subset: ArraySubset,
+ ) -> Result {
+ if !subset.inbounds_shape(shape) {
+ let bounding_subset = ArraySubset::new_with_shape(shape.to_vec());
+ return Err(SubsetOutOfBoundsError::new(subset, bounding_subset).into());
+ }
+ let bytes_in_array_len =
+ usize::try_from(shape.iter().product::()).unwrap() * data_type_size;
+ if bytes.len() != bytes_in_array_len {
+ return Err(InvalidBytesLengthError::new(bytes.len(), bytes_in_array_len).into());
+ }
+
+ let bytes_in_subset_len = subset.num_elements_usize() * data_type_size;
+ Ok(Self {
+ bytes,
+ data_type_size,
+ shape,
+ subset,
+ bytes_in_subset_len,
+ })
+ }
+
+ /// Create a new non-overlapping view of the bytes in an array.
+ ///
+ /// # Safety
+ /// - `subset` must be inbounds of `shape`,
+ /// - the length of `bytes` must be the product of the elements in `shape` multiplied by `data_type_size`, and
+ /// - the `subset` represented by this view must not overlap with the `subset` of any other created views that reference the same array bytes.
+ ///
+ /// # Panics
+ /// Panics if the product of the elements in `shape` multiplied by `data_type_size` exceeds [`usize::MAX`].
+ #[must_use]
+ pub unsafe fn new_unchecked(
+ bytes: UnsafeCellSlice<'a, u8>,
+ data_type_size: usize,
+ shape: &'a [u64],
+ subset: ArraySubset,
+ ) -> Self {
+ debug_assert!(subset.inbounds_shape(shape));
+ debug_assert_eq!(
+ bytes.len(),
+ usize::try_from(shape.iter().product::()).unwrap() * data_type_size
+ );
+
+ let bytes_in_subset_len = subset.num_elements_usize() * data_type_size;
+ Self {
+ bytes,
+ data_type_size,
+ shape,
+ subset,
+ bytes_in_subset_len,
+ }
+ }
+
+ /// Create a new non-overlapping view of the bytes in an array that is a subset of the current view.
+ ///
+ /// # Errors
+ /// Returns [`SubsetOutOfBoundsError`] if `subset` is out-of-bounds of the parent subset.
+ ///
+ /// # Safety
+ /// The `subset` represented by this view must not overlap with the `subset` of any other created views that reference the same array bytes.
+ pub unsafe fn subdivide(
+ &self,
+ subset: ArraySubset,
+ ) -> Result, SubsetOutOfBoundsError> {
+ if !subset.inbounds(&self.subset) {
+ return Err(SubsetOutOfBoundsError::new(subset, self.subset.clone()));
+ }
+
+ Ok(unsafe {
+ // SAFETY: all inputs have been validated
+ Self::new_unchecked(self.bytes, self.data_type_size, self.shape, subset)
+ })
+ }
+
+ /// Create a new non-overlapping view of the bytes in an array that is a subset of the current view.
+ ///
+ /// # Safety
+ /// - `subset` must be inbounds of the parent subset, and
+ /// - the `subset` represented by this view must not overlap with the `subset` of any other created views that reference the same array bytes.
+ #[must_use]
+ pub unsafe fn subdivide_unchecked(
+ &self,
+ subset: ArraySubset,
+ ) -> ArrayBytesFixedDisjointView<'a> {
+ debug_assert!(subset.inbounds(&self.subset));
+
+ unsafe { Self::new_unchecked(self.bytes, self.data_type_size, self.shape, subset) }
+ }
+
+ /// Return the shape of the bytes this view is created from.
+ #[must_use]
+ pub fn shape(&self) -> &[u64] {
+ self.shape
+ }
+
+ /// Return the subset of the bytes this view is created from.
+ #[must_use]
+ pub fn subset(&self) -> &ArraySubset {
+ &self.subset
+ }
+
+ /// Return the number of elements in the view.
+ #[must_use]
+ pub fn num_elements(&self) -> u64 {
+ self.subset.num_elements()
+ }
+
+ fn contiguous_indices(&self) -> ContiguousIndices {
+ unsafe {
+ // SAFETY: the output shape encapsulates the output subset, checked in constructor
+ self.subset.contiguous_indices_unchecked(self.shape)
+ }
+ }
+
+ fn contiguous_linearised_indices(&self) -> ContiguousLinearisedIndices {
+ unsafe {
+ // SAFETY: the output shape encapsulates the output subset, checked in constructor
+ self.subset
+ .contiguous_linearised_indices_unchecked(self.shape)
+ }
+ }
+
+ /// Return the contiguous element length of the view.
+ ///
+ /// This is the number of elements that are accessed in a single contiguous block.
+ #[must_use]
+ pub fn num_contiguous_elements(&self) -> usize {
+ self.contiguous_indices().contiguous_elements_usize()
+ }
+
+ /// Return the size in bytes of contiguous elements in the view.
+ ///
+ /// This is the number of elements that are accessed in a single contiguous block.
+ #[must_use]
+ pub fn contiguous_bytes_len(&self) -> usize {
+ self.contiguous_indices().contiguous_elements_usize() * self.data_type_size
+ }
+
+ /// Fill the view with the fill value.
+ ///
+ /// # Errors
+ /// Returns [`InvalidBytesLengthError`] if the length of the `fill_value` does not match the data type size.
+ ///
+ /// # Panics
+ /// Panics if an offset into the internal bytes reference exceeds [`usize::MAX`].
+ pub fn fill(&mut self, fill_value: &[u8]) -> Result<(), InvalidBytesLengthError> {
+ if fill_value.len() != self.data_type_size {
+ return Err(InvalidBytesLengthError::new(
+ fill_value.len(),
+ self.data_type_size,
+ ));
+ }
+
+ let fill_value_contiguous = fill_value.repeat(self.num_contiguous_elements());
+ let length = self.contiguous_bytes_len();
+ debug_assert_eq!(fill_value_contiguous.len(), length);
+ let contiguous_indices = self.contiguous_linearised_indices();
+ contiguous_indices.into_iter().for_each(|index| {
+ let offset = usize::try_from(index * self.data_type_size as u64).unwrap();
+ unsafe {
+ self.bytes
+ .index_mut(offset..offset + length)
+ .copy_from_slice(&fill_value_contiguous);
+ }
+ });
+ Ok(())
+ }
+
+ /// Copy bytes into the view.
+ ///
+ /// The `subset_bytes` must be the same length as the byte length of the elements in the view.
+ ///
+ /// # Errors
+ /// Returns an [`InvalidBytesLengthError`] if the length of `subset_bytes` is not the same as the byte length of the elements in the view.
+ ///
+ /// # Panics
+ /// Panics if an offset into the internal bytes reference exceeds [`usize::MAX`].
+ pub fn copy_from_slice(&mut self, subset_bytes: &[u8]) -> Result<(), InvalidBytesLengthError> {
+ if subset_bytes.len() != self.bytes_in_subset_len {
+ return Err(InvalidBytesLengthError::new(
+ subset_bytes.len(),
+ self.bytes_in_subset_len,
+ ));
+ }
+
+ let contiguous_indices = self.contiguous_linearised_indices();
+ let length = contiguous_indices.contiguous_elements_usize() * self.data_type_size;
+
+ let bytes_copied = contiguous_indices.into_iter().fold(
+ 0,
+ |subset_offset: usize, array_subset_element_index: u64| {
+ let output_offset =
+ usize::try_from(array_subset_element_index).unwrap() * self.data_type_size;
+ debug_assert!((output_offset + length) <= self.bytes.len());
+ debug_assert!((subset_offset + length) <= subset_bytes.len());
+ let subset_offset_end = subset_offset + length;
+ unsafe {
+ self.bytes
+ .index_mut(output_offset..output_offset + length)
+ .copy_from_slice(&subset_bytes[subset_offset..subset_offset_end]);
+ }
+ subset_offset_end
+ },
+ );
+ debug_assert_eq!(bytes_copied, subset_bytes.len());
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn disjoint_view() {
+ let mut bytes = (0..9).collect::>();
+ let shape = vec![3, 3];
+ {
+ let bytes = UnsafeCellSlice::new(&mut bytes);
+
+ assert!(unsafe {
+ ArrayBytesFixedDisjointView::new(
+ bytes,
+ 1,
+ &[10, 10],
+ ArraySubset::new_with_ranges(&[0..2, 1..3]),
+ )
+ }
+ .is_err()); // incompatible shape
+ assert!(unsafe {
+ ArrayBytesFixedDisjointView::new(
+ bytes,
+ 2,
+ &shape,
+ ArraySubset::new_with_ranges(&[0..2, 1..3]),
+ )
+ }
+ .is_err()); // invalid bytes length
+ assert!(unsafe {
+ ArrayBytesFixedDisjointView::new(
+ bytes,
+ 1,
+ &shape,
+ ArraySubset::new_with_ranges(&[0..2, 1..10]),
+ )
+ }
+ .is_err()); // OOB
+
+ let mut view0 = unsafe {
+ ArrayBytesFixedDisjointView::new(
+ bytes,
+ 1,
+ &shape,
+ ArraySubset::new_with_ranges(&[0..2, 1..3]),
+ )
+ }
+ .unwrap();
+ assert_eq!(view0.shape(), shape);
+
+ view0.copy_from_slice(&[11, 12, 14, 15]).unwrap();
+ assert!(view0.copy_from_slice(&[11, 12, 14, 15, 255]).is_err()); // wrong length
+
+ let mut view0a =
+ unsafe { view0.subdivide(ArraySubset::new_with_ranges(&[1..2, 1..3])) }.unwrap();
+ view0a.copy_from_slice(&[24, 25]).unwrap();
+ assert!(view0a.copy_from_slice(&[]).is_err()); // wrong length
+
+ assert!(
+ unsafe { view0a.subdivide(ArraySubset::new_with_ranges(&[1..2, 1..3])) }.is_ok()
+ );
+ assert!(
+ unsafe { view0a.subdivide(ArraySubset::new_with_ranges(&[1..2, 2..3])) }.is_ok()
+ );
+ assert!(
+ unsafe { view0a.subdivide(ArraySubset::new_with_ranges(&[0..2, 1..3])) }.is_err()
+ ); // OOB
+ assert!(
+ unsafe { view0a.subdivide(ArraySubset::new_with_ranges(&[1..2, 1..4])) }.is_err()
+ ); // OOB
+
+ let mut view1 = unsafe {
+ ArrayBytesFixedDisjointView::new(
+ bytes,
+ 1,
+ &shape,
+ ArraySubset::new_with_ranges(&[2..3, 1..3]),
+ )
+ }
+ .unwrap();
+ view1.fill(&[255]).unwrap();
+ assert!(view1.fill(&[255, 255]).is_err()); // invalid fill value
+ }
+ assert_eq!(&bytes, &[0, 11, 12, 3, 24, 25, 6, 255, 255]);
+ }
+}
diff --git a/zarrs/src/array/array_errors.rs b/zarrs/src/array/array_errors.rs
index 527d0a58..272aff53 100644
--- a/zarrs/src/array/array_errors.rs
+++ b/zarrs/src/array/array_errors.rs
@@ -2,19 +2,16 @@ use thiserror::Error;
use crate::{
array_subset::{ArraySubset, IncompatibleDimensionalityError},
+ data_type::{
+ IncompatibleFillValueError, IncompatibleFillValueMetadataError, UnsupportedDataTypeError,
+ },
metadata::v3::UnsupportedAdditionalFieldError,
node::NodePathError,
plugin::PluginCreateError,
storage::StorageError,
};
-use super::{
- codec::CodecError,
- data_type::{
- IncompatibleFillValueError, IncompatibleFillValueMetadataError, UnsupportedDataTypeError,
- },
- ArrayIndices, ArrayShape,
-};
+use super::{codec::CodecError, ArrayIndices, ArrayShape};
/// An array creation error.
#[derive(Debug, Error)]
diff --git a/zarrs/src/array/array_metadata_options.rs b/zarrs/src/array/array_metadata_options.rs
index 2ec189a0..e0dafc36 100644
--- a/zarrs/src/array/array_metadata_options.rs
+++ b/zarrs/src/array/array_metadata_options.rs
@@ -1,9 +1,11 @@
use crate::config::{global_config, MetadataConvertVersion};
+use super::codec::CodecMetadataOptions;
+
/// Options for writing array metadata.
#[derive(Debug, Clone)]
pub struct ArrayMetadataOptions {
- experimental_codec_store_metadata_if_encode_only: bool,
+ codec_options: CodecMetadataOptions,
convert_version: MetadataConvertVersion,
include_zarrs_metadata: bool,
}
@@ -11,7 +13,7 @@ pub struct ArrayMetadataOptions {
impl Default for ArrayMetadataOptions {
fn default() -> Self {
Self {
- experimental_codec_store_metadata_if_encode_only: false,
+ codec_options: CodecMetadataOptions::default(),
convert_version: global_config().metadata_convert_version(),
include_zarrs_metadata: global_config().include_zarrs_metadata(),
}
@@ -19,26 +21,16 @@ impl Default for ArrayMetadataOptions {
}
impl ArrayMetadataOptions {
- /// Return the [experimental codec store metadata if encode only](crate::config::Config#experimental-codec-store-metadata-if-encode-only) setting.
+ /// Return the codec options.
#[must_use]
- pub fn experimental_codec_store_metadata_if_encode_only(&self) -> bool {
- self.experimental_codec_store_metadata_if_encode_only
+ pub fn codec_options(&self) -> &CodecMetadataOptions {
+ &self.codec_options
}
- /// Set the [experimental codec store metadata if encode only](crate::config::Config#experimental-codec-store-metadata-if-encode-only) setting.
+ /// Return a mutable reference to the codec options.
#[must_use]
- pub fn with_experimental_codec_store_metadata_if_encode_only(mut self, enabled: bool) -> Self {
- self.experimental_codec_store_metadata_if_encode_only = enabled;
- self
- }
-
- /// Set the [experimental codec store metadata if encode only](crate::config::Config#experimental-codec-store-metadata-if-encode-only) setting.
- pub fn set_experimental_codec_store_metadata_if_encode_only(
- &mut self,
- enabled: bool,
- ) -> &mut Self {
- self.experimental_codec_store_metadata_if_encode_only = enabled;
- self
+ pub fn codec_options_mut(&mut self) -> &mut CodecMetadataOptions {
+ &mut self.codec_options
}
/// Get the [metadata convert version](crate::config::Config#metadata-convert-version) configuration.
diff --git a/zarrs/src/array/array_representation.rs b/zarrs/src/array/array_representation.rs
index 2c781fc5..5f186d50 100644
--- a/zarrs/src/array/array_representation.rs
+++ b/zarrs/src/array/array_representation.rs
@@ -1,6 +1,7 @@
use std::num::NonZeroU64;
-use super::{data_type::IncompatibleFillValueError, ArrayShape, DataType, DataTypeSize, FillValue};
+use super::{ArrayShape, DataType, DataTypeSize, FillValue};
+use crate::data_type::IncompatibleFillValueError;
use derive_more::Display;
/// The shape, data type, and fill value of an `array`.
diff --git a/zarrs/src/array/array_sync_readable.rs b/zarrs/src/array/array_sync_readable.rs
index 200e0ba6..d699e944 100644
--- a/zarrs/src/array/array_sync_readable.rs
+++ b/zarrs/src/array/array_sync_readable.rs
@@ -20,7 +20,8 @@ use super::{
},
concurrency::concurrency_chunks_and_codec,
element::ElementOwned,
- Array, ArrayCreateError, ArrayError, ArrayMetadata, ArrayMetadataV3, ArraySize, DataTypeSize,
+ Array, ArrayBytesFixedDisjointView, ArrayCreateError, ArrayError, ArrayMetadata,
+ ArrayMetadataV3, ArraySize, DataTypeSize,
};
#[cfg(feature = "ndarray")]
@@ -458,12 +459,10 @@ impl Array {
}
}
- unsafe fn retrieve_chunk_into(
+ fn retrieve_chunk_into(
&self,
chunk_indices: &[u64],
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), ArrayError> {
if chunk_indices.len() != self.dimensionality() {
@@ -481,28 +480,17 @@ impl Array {
if let Some(chunk_encoded) = chunk_encoded {
let chunk_encoded: Vec = chunk_encoded.into();
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- unsafe {
- self.codecs().decode_into(
+ self.codecs()
+ .decode_into(
Cow::Owned(chunk_encoded),
&chunk_representation,
- output,
- output_shape,
- output_subset,
+ output_view,
options,
)
- }
- .map_err(ArrayError::CodecError)
+ .map_err(ArrayError::CodecError)
} else {
- unsafe {
- copy_fill_value_into(
- self.data_type(),
- self.fill_value(),
- output,
- output_shape,
- output_subset,
- )
- }
- .map_err(ArrayError::CodecError)
+ copy_fill_value_into(self.data_type(), self.fill_value(), output_view)
+ .map_err(ArrayError::CodecError)
}
}
@@ -720,16 +708,21 @@ impl Array {
let retrieve_chunk = |chunk_indices: Vec| {
let chunk_subset = self.chunk_subset(&chunk_indices)?;
let chunk_subset_overlap = chunk_subset.overlap(array_subset)?;
- unsafe {
- self.retrieve_chunk_subset_into(
- &chunk_indices,
- &chunk_subset_overlap.relative_to(chunk_subset.start())?,
- &output,
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ output,
+ data_type_size,
array_subset.shape(),
- &chunk_subset_overlap.relative_to(array_subset.start())?,
- &options,
- )?;
- }
+ chunk_subset_overlap.relative_to(array_subset.start())?,
+ )
+ };
+ self.retrieve_chunk_subset_into(
+ &chunk_indices,
+ &chunk_subset_overlap.relative_to(chunk_subset.start())?,
+ &mut output_view,
+ &options,
+ )?;
// let chunk_subset_bytes = self.retrieve_chunk_subset_opt(
// &chunk_indices,
// &chunk_subset_overlap.relative_to(chunk_subset.start())?,
@@ -794,7 +787,7 @@ impl Array {
options: &CodecOptions,
) -> Result, ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- if !chunk_subset.inbounds(&chunk_representation.shape_u64()) {
+ if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
@@ -827,17 +820,15 @@ impl Array {
Ok(bytes)
}
- unsafe fn retrieve_chunk_subset_into(
+ fn retrieve_chunk_subset_into(
&self,
chunk_indices: &[u64],
chunk_subset: &ArraySubset,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- if !chunk_subset.inbounds(&chunk_representation.shape_u64()) {
+ if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
@@ -848,15 +839,7 @@ impl Array {
&& chunk_subset.shape() == chunk_representation.shape_u64()
{
// Fast path if `chunk_subset` encompasses the whole chunk
- unsafe {
- self.retrieve_chunk_into(
- chunk_indices,
- output,
- output_shape,
- output_subset,
- options,
- )
- }
+ self.retrieve_chunk_into(chunk_indices, output_view, options)
} else {
let storage_handle = Arc::new(StorageHandle::new(self.storage.clone()));
let storage_transformer = self
@@ -866,19 +849,10 @@ impl Array {
storage_transformer,
self.chunk_key(chunk_indices),
));
-
- unsafe {
- self.codecs
- .clone()
- .partial_decoder(input_handle, &chunk_representation, options)?
- .partial_decode_into(
- chunk_subset,
- output,
- output_shape,
- output_subset,
- options,
- )?;
- }
+ self.codecs
+ .clone()
+ .partial_decoder(input_handle, &chunk_representation, options)?
+ .partial_decode_into(chunk_subset, output_view, options)?;
Ok(())
}
}
diff --git a/zarrs/src/array/array_sync_readable_writable.rs b/zarrs/src/array/array_sync_readable_writable.rs
index 520cb933..478f93a2 100644
--- a/zarrs/src/array/array_sync_readable_writable.rs
+++ b/zarrs/src/array/array_sync_readable_writable.rs
@@ -200,15 +200,13 @@ impl Array
chunk_bytes_old.validate(chunk_shape.iter().product(), self.data_type().size())?;
// Update the chunk
- let chunk_bytes_new = unsafe {
- update_array_bytes(
- chunk_bytes_old,
- &chunk_shape,
- chunk_subset,
- &chunk_subset_bytes,
- self.data_type().size(),
- )
- };
+ let chunk_bytes_new = update_array_bytes(
+ chunk_bytes_old,
+ &chunk_shape,
+ chunk_subset,
+ &chunk_subset_bytes,
+ self.data_type().size(),
+ )?;
// Store the updated chunk
self.store_chunk_opt(chunk_indices, chunk_bytes_new, options)
diff --git a/zarrs/src/array/array_sync_sharded_readable_ext.rs b/zarrs/src/array/array_sync_sharded_readable_ext.rs
index 461df909..04ef4d4e 100644
--- a/zarrs/src/array/array_sync_sharded_readable_ext.rs
+++ b/zarrs/src/array/array_sync_sharded_readable_ext.rs
@@ -7,7 +7,7 @@ use zarrs_metadata::v3::array::codec::sharding::ShardingCodecConfiguration;
use zarrs_storage::byte_range::ByteRange;
use zarrs_storage::StorageHandle;
-use super::array_bytes::{merge_chunks_vlen, update_bytes_flen};
+use super::array_bytes::merge_chunks_vlen;
use super::codec::array_to_bytes::sharding::ShardingPartialDecoder;
use super::codec::{CodecError, ShardingCodec};
use super::element::ElementOwned;
@@ -15,7 +15,7 @@ use super::{
codec::CodecOptions, concurrency::concurrency_chunks_and_codec, Array, ArrayError,
ArrayShardedExt, ChunkGrid,
};
-use super::{ArrayBytes, ArraySize, DataTypeSize};
+use super::{ArrayBytes, ArrayBytesFixedDisjointView, ArraySize, DataTypeSize};
use crate::array::codec::StoragePartialDecoder;
use crate::storage::ReadableStorageTraits;
use crate::{array::codec::ArrayPartialDecoderTraits, array_subset::ArraySubset};
@@ -612,13 +612,18 @@ impl ArrayShardedReadableExt
)?
.remove(0)
.into_owned();
- update_bytes_flen(
- &output,
- array_subset.shape(),
- &bytes.into_fixed()?,
- &shard_subset_overlap.relative_to(array_subset.start())?,
- data_type_size,
- );
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ output,
+ data_type_size,
+ array_subset.shape(),
+ shard_subset_overlap.relative_to(array_subset.start())?,
+ )
+ };
+ output_view
+ .copy_from_slice(&bytes.into_fixed()?)
+ .map_err(CodecError::from)?;
Ok::<_, ArrayError>(())
};
let indices = shards.indices();
diff --git a/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs b/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs
index db3f3fee..71e4a8fa 100644
--- a/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs
+++ b/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs
@@ -6,10 +6,11 @@ use unsafe_cell_slice::UnsafeCellSlice;
use crate::{
array::{
- array_bytes::{merge_chunks_vlen, update_bytes_flen},
- codec::CodecOptions,
+ array_bytes::merge_chunks_vlen,
+ codec::{CodecError, CodecOptions},
concurrency::concurrency_chunks_and_codec,
- Array, ArrayBytes, ArrayError, ArraySize, DataTypeSize, ElementOwned,
+ Array, ArrayBytes, ArrayBytesFixedDisjointView, ArrayError, ArraySize, DataTypeSize,
+ ElementOwned,
},
array_subset::ArraySubset,
storage::ReadableStorageTraits,
@@ -229,7 +230,7 @@ impl ArrayChunkCacheExt Result, ArrayError> {
let chunk_representation = self.chunk_array_representation(chunk_indices)?;
- if !chunk_subset.inbounds(&chunk_representation.shape_u64()) {
+ if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) {
return Err(ArrayError::InvalidArraySubset(
chunk_subset.clone(),
self.shape().to_vec(),
@@ -408,13 +409,18 @@ impl ArrayChunkCacheExt unreachable!(),
};
- update_bytes_flen(
- &output,
- array_subset.shape(),
- fixed,
- &chunk_subset_overlap.relative_to(array_subset.start())?,
- data_type_size,
- );
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ output,
+ data_type_size,
+ array_subset.shape(),
+ chunk_subset_overlap.relative_to(array_subset.start())?,
+ )
+ };
+ output_view
+ .copy_from_slice(fixed)
+ .map_err(CodecError::from)?;
Ok::<_, ArrayError>(())
};
iter_concurrent_limit!(
diff --git a/zarrs/src/array/chunk_cache/chunk_cache_lru.rs b/zarrs/src/array/chunk_cache/chunk_cache_lru.rs
index 73a2f332..fae78303 100644
--- a/zarrs/src/array/chunk_cache/chunk_cache_lru.rs
+++ b/zarrs/src/array/chunk_cache/chunk_cache_lru.rs
@@ -359,7 +359,7 @@ impl ChunkCache for ChunkCacheDecodedLruSizeLimitThreadLo
mod tests {
use super::*;
- use std::{mem::size_of, sync::Arc};
+ use std::sync::Arc;
use crate::{
array::{
diff --git a/zarrs/src/array/chunk_grid.rs b/zarrs/src/array/chunk_grid.rs
index 79ff9ee7..9f5c3ea8 100644
--- a/zarrs/src/array/chunk_grid.rs
+++ b/zarrs/src/array/chunk_grid.rs
@@ -127,6 +127,9 @@ impl TryFrom for ChunkGrid {
}
/// Chunk grid traits.
+// TODO: Unsafe trait? ChunkGridTraits has invariants that must be upheld by implementations.
+// - chunks must be disjoint for downstream `ArrayBytesFixedDisjoint` construction and otherwise sane behavior
+// - this is true for regular and rectangular grids, but a custom grid could violate this
pub trait ChunkGridTraits: core::fmt::Debug + Send + Sync {
/// Create metadata.
fn create_metadata(&self) -> MetadataV3;
diff --git a/zarrs/src/array/chunk_grid/rectangular.rs b/zarrs/src/array/chunk_grid/rectangular.rs
index 05733fb5..6dcedd59 100644
--- a/zarrs/src/array/chunk_grid/rectangular.rs
+++ b/zarrs/src/array/chunk_grid/rectangular.rs
@@ -1,5 +1,7 @@
//! The `rectangular` chunk grid.
//!
+//! This chunk grid is considered experimental as it is based on a draft Zarr enhancement proposal.
+//!
//! See .
use std::num::NonZeroU64;
diff --git a/zarrs/src/array/codec.rs b/zarrs/src/array/codec.rs
index 234dfdc5..864d88a8 100644
--- a/zarrs/src/array/codec.rs
+++ b/zarrs/src/array/codec.rs
@@ -13,8 +13,11 @@
pub mod array_to_array;
pub mod array_to_bytes;
pub mod bytes_to_bytes;
+pub mod metadata_options;
pub mod options;
+use derive_more::derive::Display;
+pub use metadata_options::CodecMetadataOptions;
pub use options::{CodecOptions, CodecOptionsBuilder};
// Array to array
@@ -67,7 +70,6 @@ pub use byte_interval_partial_decoder::ByteIntervalPartialDecoder;
#[cfg(feature = "async")]
pub use byte_interval_partial_decoder::AsyncByteIntervalPartialDecoder;
-use unsafe_cell_slice::UnsafeCellSlice;
mod array_partial_encoder_default;
pub use array_partial_encoder_default::ArrayPartialEncoderDefault;
@@ -77,6 +79,7 @@ pub use array_to_array_partial_encoder_default::ArrayToArrayPartialEncoderDefaul
mod bytes_partial_encoder_default;
pub use bytes_partial_encoder_default::BytesPartialEncoderDefault;
+use zarrs_metadata::ArrayShape;
use crate::indexer::IncompatibleIndexerAndShapeError;
use crate::storage::{StoreKeyOffsetValue, WritableStorage};
@@ -95,12 +98,12 @@ use std::any::Any;
use std::borrow::Cow;
use std::sync::Arc;
-use super::array_bytes::update_bytes_flen;
+use super::RawBytesOffsetsOutOfBoundsError;
use super::{
- concurrency::RecommendedConcurrency, ArrayMetadataOptions, BytesRepresentation,
- ChunkRepresentation, ChunkShape, DataType,
+ array_bytes::RawBytesOffsetsCreateError, concurrency::RecommendedConcurrency, ArrayBytes,
+ ArrayBytesFixedDisjointView, BytesRepresentation, ChunkRepresentation, ChunkShape, DataType,
+ RawBytes,
};
-use super::{ArrayBytes, RawBytes};
/// A codec plugin.
pub type CodecPlugin = Plugin;
@@ -200,13 +203,13 @@ pub trait CodecTraits: Send + Sync {
/// Create metadata.
///
/// A hidden codec (e.g. a cache) will return [`None`], since it will not have any associated metadata.
- fn create_metadata_opt(&self, options: &ArrayMetadataOptions) -> Option;
+ fn create_metadata_opt(&self, options: &CodecMetadataOptions) -> Option;
/// Create metadata with default options.
///
/// A hidden codec (e.g. a cache) will return [`None`], since it will not have any associated metadata.
fn create_metadata(&self) -> Option {
- self.create_metadata_opt(&ArrayMetadataOptions::default())
+ self.create_metadata_opt(&CodecMetadataOptions::default())
}
/// Indicates if the input to a codecs partial decoder should be cached for optimal performance.
@@ -360,34 +363,26 @@ pub trait ArrayPartialDecoderTraits: Any + Send + Sync {
/// Extracted elements from the `array_subset` are written to the subset of the output in C order.
///
/// # Errors
- /// Returns [`CodecError`] if a codec fails or an array subset is invalid.
- ///
- /// # Safety
- /// The caller must ensure that:
- /// - `output` holds enough space for the preallocated bytes of an array with shape `output_shape` of the appropriate data type,
- /// - `output_subset` is within the bounds of `output_shape`, and
- /// - `output_subset` has the same number of elements as `array_subset`.
- unsafe fn partial_decode_into(
+ /// Returns [`CodecError`] if a codec fails or the number of elements in `array_subset` does not match the number of elements in `output_view`,
+ fn partial_decode_into(
&self,
array_subset: &ArraySubset,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), CodecError> {
- debug_assert!(output_subset.inbounds(output_shape));
- debug_assert_eq!(array_subset.num_elements(), output_subset.num_elements());
+ if array_subset.num_elements() != output_view.num_elements() {
+ return Err(InvalidNumberOfElementsError::new(
+ array_subset.num_elements(),
+ output_view.num_elements(),
+ )
+ .into());
+ }
+
let decoded_value = self
.partial_decode(&[array_subset.clone()], options)?
.remove(0);
if let ArrayBytes::Fixed(decoded_value) = decoded_value {
- update_bytes_flen(
- output,
- output_shape,
- &decoded_value,
- output_subset,
- self.data_type().fixed_size().unwrap(),
- );
+ output_view.copy_from_slice(&decoded_value)?;
Ok(())
} else {
Err(CodecError::ExpectedFixedLengthBytes)
@@ -452,28 +447,25 @@ pub trait AsyncArrayPartialDecoderTraits: Any + Send + Sync {
/// Async variant of [`ArrayPartialDecoderTraits::partial_decode_into`].
#[allow(clippy::missing_safety_doc)]
- async unsafe fn partial_decode_into(
+ async fn partial_decode_into(
&self,
array_subset: &ArraySubset,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), CodecError> {
- debug_assert!(output_subset.inbounds(output_shape));
- debug_assert_eq!(array_subset.shape(), output_subset.shape());
+ if array_subset.num_elements() != output_view.num_elements() {
+ return Err(InvalidNumberOfElementsError::new(
+ output_view.num_elements(),
+ array_subset.num_elements(),
+ )
+ .into());
+ }
let decoded_value = self
.partial_decode(&[array_subset.clone()], options)
.await?
.remove(0);
if let ArrayBytes::Fixed(decoded_value) = decoded_value {
- update_bytes_flen(
- output,
- output_shape,
- &decoded_value,
- output_subset,
- self.data_type().fixed_size().unwrap(),
- );
+ output_view.copy_from_slice(&decoded_value)?;
Ok(())
} else {
Err(CodecError::ExpectedFixedLengthBytes)
@@ -711,36 +703,24 @@ pub trait ArrayToBytesCodecTraits: ArrayCodecTraits + core::fmt::Debug {
/// Chunk elements are written to the subset of the output in C order.
///
/// # Errors
- /// Returns [`CodecError`] if a codec fails or the decoded output is incompatible with `decoded_representation`.
- ///
- /// # Safety
- /// The caller must ensure that:
- /// - `output` holds enough space for the preallocated bytes of an array with shape `output_shape` of the appropriate data type, and
- /// - `output_subset` is within the bounds of `output_shape`, and
- /// - `output_subset` has the same number of elements as the decoded representation shape.
- unsafe fn decode_into(
+ /// Returns [`CodecError`] if a codec fails or the number of elements in `decoded_representation` does not match the number of elements in `output_view`,
+ fn decode_into(
&self,
bytes: RawBytes<'_>,
decoded_representation: &ChunkRepresentation,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), CodecError> {
- debug_assert!(output_subset.inbounds(output_shape));
- debug_assert_eq!(
- decoded_representation.num_elements(),
- output_subset.num_elements()
- );
+ if decoded_representation.num_elements() != output_view.num_elements() {
+ return Err(InvalidNumberOfElementsError::new(
+ output_view.num_elements(),
+ decoded_representation.num_elements(),
+ )
+ .into());
+ }
let decoded_value = self.decode(bytes, decoded_representation, options)?;
if let ArrayBytes::Fixed(decoded_value) = decoded_value {
- update_bytes_flen(
- output,
- output_shape,
- &decoded_value,
- output_subset,
- decoded_representation.data_type().fixed_size().unwrap(),
- );
+ output_view.copy_from_slice(&decoded_value)?;
} else {
return Err(CodecError::ExpectedFixedLengthBytes);
}
@@ -961,6 +941,76 @@ impl AsyncBytesPartialDecoderTraits for std::io::Cursor> {
}
}
+/// An error indicating the length of bytes does not match the expected length.
+#[derive(Debug, Error, Display)]
+#[display("Invalid bytes len {len}, expected {expected_len}")]
+pub struct InvalidBytesLengthError {
+ len: usize,
+ expected_len: usize,
+}
+
+impl InvalidBytesLengthError {
+ /// Create a new [`InvalidBytesLengthError`].
+ #[must_use]
+ pub fn new(len: usize, expected_len: usize) -> Self {
+ Self { len, expected_len }
+ }
+}
+
+/// An error indicating the shape is not compatible with the expected number of elements.
+#[derive(Debug, Error, Display)]
+#[display("Invalid shape {shape:?} for number of elements {expected_num_elements}")]
+pub struct InvalidArrayShapeError {
+ shape: ArrayShape,
+ expected_num_elements: usize,
+}
+
+impl InvalidArrayShapeError {
+ /// Create a new [`InvalidArrayShapeError`].
+ #[must_use]
+ pub fn new(shape: ArrayShape, expected_num_elements: usize) -> Self {
+ Self {
+ shape,
+ expected_num_elements,
+ }
+ }
+}
+
+/// An error indicating the length of elements does not match the expected length.
+#[derive(Debug, Error, Display)]
+#[display("Invalid number of elements {num}, expected {expected}")]
+pub struct InvalidNumberOfElementsError {
+ num: u64,
+ expected: u64,
+}
+
+impl InvalidNumberOfElementsError {
+ /// Create a new [`InvalidNumberOfElementsError`].
+ #[must_use]
+ pub fn new(num: u64, expected: u64) -> Self {
+ Self { num, expected }
+ }
+}
+
+/// An array subset is out of bounds.
+#[derive(Debug, Error, Display)]
+#[display("Subset {subset} is out of bounds of {must_be_within}")]
+pub struct SubsetOutOfBoundsError {
+ subset: ArraySubset,
+ must_be_within: ArraySubset,
+}
+
+impl SubsetOutOfBoundsError {
+ /// Create a new [`InvalidNumberOfElementsError`].
+ #[must_use]
+ pub fn new(subset: ArraySubset, must_be_within: ArraySubset) -> Self {
+ Self {
+ subset,
+ must_be_within,
+ }
+ }
+}
+
/// A codec error.
#[derive(Debug, Error)]
pub enum CodecError {
@@ -977,8 +1027,8 @@ pub enum CodecError {
#[error("the array subset {_0} has the wrong dimensionality, expected {_1}")]
InvalidArraySubsetDimensionalityError(ArraySubset, usize),
/// The decoded size of a chunk did not match what was expected.
- #[error("the size of a decoded chunk is {_0}, expected {_1}")]
- UnexpectedChunkDecodedSize(usize, u64),
+ #[error("the size of a decoded chunk is {}, expected {}", _0.len, _0.expected_len)]
+ UnexpectedChunkDecodedSize(#[from] InvalidBytesLengthError),
/// An embedded checksum does not match the decoded value.
#[error("the checksum is invalid")]
InvalidChecksum,
@@ -1003,6 +1053,21 @@ pub enum CodecError {
/// Expected variable length bytes.
#[error("Expected variable length array bytes")]
ExpectedVariableLengthBytes,
+ /// Invalid array shape.
+ #[error(transparent)]
+ InvalidArrayShape(#[from] InvalidArrayShapeError),
+ /// Invalid number of elements.
+ #[error(transparent)]
+ InvalidNumberOfElements(#[from] InvalidNumberOfElementsError),
+ /// Subset out of bounds.
+ #[error(transparent)]
+ SubsetOutOfBounds(#[from] SubsetOutOfBoundsError),
+ /// Invalid byte offsets for variable length data.
+ #[error(transparent)]
+ RawBytesOffsetsCreate(#[from] RawBytesOffsetsCreateError),
+ /// Variable length array bytes offsets are out of bounds.
+ #[error(transparent)]
+ RawBytesOffsetsOutOfBounds(#[from] RawBytesOffsetsOutOfBoundsError),
}
impl From<&str> for CodecError {
diff --git a/zarrs/src/array/codec/array_partial_encoder_default.rs b/zarrs/src/array/codec/array_partial_encoder_default.rs
index 407c07ab..98b78b20 100644
--- a/zarrs/src/array/codec/array_partial_encoder_default.rs
+++ b/zarrs/src/array/codec/array_partial_encoder_default.rs
@@ -76,15 +76,13 @@ impl ArrayPartialEncoderTraits for ArrayPartialEncoderDefault {
self.decoded_representation.data_type().size(),
)?;
- chunk_bytes = unsafe {
- update_array_bytes(
- chunk_bytes,
- &chunk_shape,
- chunk_subset,
- chunk_subset_bytes,
- self.decoded_representation.data_type().size(),
- )
- };
+ chunk_bytes = update_array_bytes(
+ chunk_bytes,
+ &chunk_shape,
+ chunk_subset,
+ chunk_subset_bytes,
+ self.decoded_representation.data_type().size(),
+ )?;
}
let is_fill_value = !options.store_empty_chunks()
diff --git a/zarrs/src/array/codec/array_to_array/bitround/bitround_codec.rs b/zarrs/src/array/codec/array_to_array/bitround/bitround_codec.rs
index 51c8249f..fc288375 100644
--- a/zarrs/src/array/codec/array_to_array/bitround/bitround_codec.rs
+++ b/zarrs/src/array/codec/array_to_array/bitround/bitround_codec.rs
@@ -5,9 +5,9 @@ use crate::{
codec::{
options::CodecOptions, ArrayBytes, ArrayCodecTraits, ArrayPartialDecoderTraits,
ArrayPartialEncoderTraits, ArrayToArrayCodecTraits, ArrayToArrayPartialEncoderDefault,
- CodecError, CodecTraits, RecommendedConcurrency,
+ CodecError, CodecMetadataOptions, CodecTraits, RecommendedConcurrency,
},
- ArrayMetadataOptions, ChunkRepresentation, ChunkShape, DataType,
+ ChunkRepresentation, ChunkShape, DataType,
},
config::global_config,
metadata::v3::MetadataV3,
@@ -47,7 +47,7 @@ impl BitroundCodec {
}
impl CodecTraits for BitroundCodec {
- fn create_metadata_opt(&self, options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, options: &CodecMetadataOptions) -> Option {
if options.experimental_codec_store_metadata_if_encode_only() {
let configuration = BitroundCodecConfigurationV1 {
keepbits: self.keepbits,
diff --git a/zarrs/src/array/codec/array_to_array/transpose.rs b/zarrs/src/array/codec/array_to_array/transpose.rs
index 7818ccf7..8e0f1ec5 100644
--- a/zarrs/src/array/codec/array_to_array/transpose.rs
+++ b/zarrs/src/array/codec/array_to_array/transpose.rs
@@ -120,8 +120,15 @@ fn transpose_vlen<'a>(
bytes_new.extend_from_slice(&bytes[curr..next]);
}
offsets_new.push(bytes_new.len());
-
- ArrayBytes::new_vlen(bytes_new, offsets_new)
+ let offsets_new = unsafe {
+ // SAFETY: The offsets are monotonically increasing.
+ RawBytesOffsets::new_unchecked(offsets_new)
+ };
+ let array_bytes = unsafe {
+ // SAFETY: The last offset is equal to the length of the bytes
+ ArrayBytes::new_vlen_unchecked(bytes_new, offsets_new)
+ };
+ array_bytes
}
#[cfg(test)]
diff --git a/zarrs/src/array/codec/array_to_array/transpose/transpose_codec.rs b/zarrs/src/array/codec/array_to_array/transpose/transpose_codec.rs
index 1ac8a968..85032f30 100644
--- a/zarrs/src/array/codec/array_to_array/transpose/transpose_codec.rs
+++ b/zarrs/src/array/codec/array_to_array/transpose/transpose_codec.rs
@@ -5,9 +5,9 @@ use crate::{
codec::{
options::CodecOptions, ArrayBytes, ArrayCodecTraits, ArrayPartialDecoderTraits,
ArrayPartialEncoderTraits, ArrayToArrayCodecTraits, ArrayToArrayPartialEncoderDefault,
- CodecError, CodecTraits, RecommendedConcurrency,
+ CodecError, CodecMetadataOptions, CodecTraits, RecommendedConcurrency,
},
- ArrayMetadataOptions, ChunkRepresentation, ChunkShape,
+ ChunkRepresentation, ChunkShape,
},
metadata::v3::{array::codec::transpose::TransposeCodecConfigurationV1, MetadataV3},
plugin::PluginCreateError,
@@ -48,7 +48,7 @@ impl TransposeCodec {
}
impl CodecTraits for TransposeCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = TransposeCodecConfigurationV1 {
order: self.order.clone(),
};
diff --git a/zarrs/src/array/codec/array_to_array_partial_encoder_default.rs b/zarrs/src/array/codec/array_to_array_partial_encoder_default.rs
index 5c854b8c..2cea5e05 100644
--- a/zarrs/src/array/codec/array_to_array_partial_encoder_default.rs
+++ b/zarrs/src/array/codec/array_to_array_partial_encoder_default.rs
@@ -87,15 +87,13 @@ impl ArrayPartialEncoderTraits for ArrayToArrayPartialEncoderDefault {
self.decoded_representation.data_type().size(),
)?;
- decoded_value = unsafe {
- update_array_bytes(
- decoded_value,
- &chunk_shape,
- chunk_subset,
- chunk_subset_bytes,
- self.decoded_representation.data_type().size(),
- )
- };
+ decoded_value = update_array_bytes(
+ decoded_value,
+ &chunk_shape,
+ chunk_subset,
+ chunk_subset_bytes,
+ self.decoded_representation.data_type().size(),
+ )?;
}
let is_fill_value = !options.store_empty_chunks()
diff --git a/zarrs/src/array/codec/array_to_bytes/bytes.rs b/zarrs/src/array/codec/array_to_bytes/bytes.rs
index 98ba7fc9..4962dc1e 100644
--- a/zarrs/src/array/codec/array_to_bytes/bytes.rs
+++ b/zarrs/src/array/codec/array_to_bytes/bytes.rs
@@ -73,6 +73,10 @@ pub(crate) fn reverse_endianness(v: &mut [u8], data_type: &DataType) {
}
// Variable-sized data types are not supported and are rejected outside of this function
DataType::String | DataType::Bytes => unreachable!(),
+ _ => {
+ // FIXME: Data type extensions, endianness reversal for custom data types
+ unimplemented!("Reverse endianness for data type {:?}", data_type)
+ }
}
}
@@ -297,7 +301,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![4, 8];
@@ -343,7 +347,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![4, 8];
diff --git a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs
index f6cff4a3..a199d326 100644
--- a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs
@@ -7,11 +7,10 @@ use crate::{
codec::{
ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
- BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits,
- RecommendedConcurrency,
+ BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ InvalidBytesLengthError, RecommendedConcurrency,
},
- ArrayBytes, ArrayMetadataOptions, BytesRepresentation, ChunkRepresentation, DataTypeSize,
- RawBytes,
+ ArrayBytes, BytesRepresentation, ChunkRepresentation, DataTypeSize, RawBytes,
},
metadata::v3::MetadataV3,
};
@@ -77,12 +76,11 @@ impl BytesCodec {
));
}
DataTypeSize::Fixed(data_type_size) => {
- let array_size = decoded_representation.num_elements() * data_type_size as u64;
- if value.len() as u64 != array_size {
- return Err(CodecError::UnexpectedChunkDecodedSize(
- value.len(),
- array_size,
- ));
+ let array_size =
+ usize::try_from(decoded_representation.num_elements() * data_type_size as u64)
+ .unwrap();
+ if value.len() != array_size {
+ return Err(InvalidBytesLengthError::new(value.len(), array_size).into());
} else if data_type_size > 1 && self.endian.is_none() {
return Err(CodecError::Other(format!(
"tried to encode an array with element size {data_type_size} with endianness None"
@@ -101,7 +99,7 @@ impl BytesCodec {
}
impl CodecTraits for BytesCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = BytesCodecConfigurationV1 {
endian: self.endian,
};
diff --git a/zarrs/src/array/codec/array_to_bytes/codec_chain.rs b/zarrs/src/array/codec/array_to_bytes/codec_chain.rs
index 2b41fe63..8fc9bd95 100644
--- a/zarrs/src/array/codec/array_to_bytes/codec_chain.rs
+++ b/zarrs/src/array/codec/array_to_bytes/codec_chain.rs
@@ -2,22 +2,19 @@
use std::sync::Arc;
-use unsafe_cell_slice::UnsafeCellSlice;
-
use crate::{
array::{
- array_bytes::update_bytes_flen,
codec::{
ArrayCodecTraits, ArrayPartialDecoderCache, ArrayPartialDecoderTraits,
ArrayPartialEncoderTraits, ArrayToArrayCodecTraits, ArrayToBytesCodecTraits,
BytesPartialDecoderCache, BytesPartialDecoderTraits, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, Codec, CodecError, CodecOptions, CodecTraits,
+ BytesToBytesCodecTraits, Codec, CodecError, CodecMetadataOptions, CodecOptions,
+ CodecTraits,
},
concurrency::RecommendedConcurrency,
- ArrayBytes, ArrayMetadataOptions, BytesRepresentation, ChunkRepresentation, ChunkShape,
- RawBytes,
+ ArrayBytes, ArrayBytesFixedDisjointView, BytesRepresentation, ChunkRepresentation,
+ ChunkShape, RawBytes,
},
- array_subset::ArraySubset,
metadata::v3::MetadataV3,
plugin::PluginCreateError,
};
@@ -137,7 +134,7 @@ impl CodecChain {
/// Create codec chain metadata.
#[must_use]
- pub fn create_metadatas_opt(&self, options: &ArrayMetadataOptions) -> Vec {
+ pub fn create_metadatas_opt(&self, options: &CodecMetadataOptions) -> Vec {
let mut metadatas =
Vec::with_capacity(self.array_to_array.len() + 1 + self.bytes_to_bytes.len());
for codec in &self.array_to_array {
@@ -159,7 +156,7 @@ impl CodecChain {
/// Create codec chain metadata with default options.
#[must_use]
pub fn create_metadatas(&self) -> Vec {
- self.create_metadatas_opt(&ArrayMetadataOptions::default())
+ self.create_metadatas_opt(&CodecMetadataOptions::default())
}
/// Get the array to array codecs
@@ -215,7 +212,7 @@ impl CodecTraits for CodecChain {
/// Returns [`None`] since a codec chain does not have standard codec metadata.
///
/// Note that usage of the codec chain is explicit in [`Array`](crate::array::Array) and [`CodecChain::create_metadatas_opt()`] will call [`CodecTraits::create_metadata_opt()`] from for each codec.
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
None
}
@@ -309,13 +306,11 @@ impl ArrayToBytesCodecTraits for CodecChain {
Ok(bytes)
}
- unsafe fn decode_into(
+ fn decode_into(
&self,
mut bytes: RawBytes<'_>,
decoded_representation: &ChunkRepresentation,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), CodecError> {
let array_representations =
@@ -325,16 +320,12 @@ impl ArrayToBytesCodecTraits for CodecChain {
if self.bytes_to_bytes.is_empty() && self.array_to_array.is_empty() {
// Fast path if no bytes to bytes or array to array codecs
- return unsafe {
- self.array_to_bytes.decode_into(
- bytes,
- array_representations.last().unwrap(),
- output,
- output_shape,
- output_subset,
- options,
- )
- };
+ return self.array_to_bytes.decode_into(
+ bytes,
+ array_representations.last().unwrap(),
+ output_view,
+ options,
+ );
}
// bytes->bytes
@@ -347,16 +338,12 @@ impl ArrayToBytesCodecTraits for CodecChain {
if self.array_to_array.is_empty() {
// Fast path if no array to array codecs
- return unsafe {
- self.array_to_bytes.decode_into(
- bytes,
- array_representations.last().unwrap(),
- output,
- output_shape,
- output_subset,
- options,
- )
- };
+ return self.array_to_bytes.decode_into(
+ bytes,
+ array_representations.last().unwrap(),
+ output_view,
+ options,
+ );
}
// bytes->array
@@ -377,13 +364,7 @@ impl ArrayToBytesCodecTraits for CodecChain {
)?;
if let ArrayBytes::Fixed(decoded_value) = bytes {
- update_bytes_flen(
- output,
- output_shape,
- &decoded_value,
- output_subset,
- decoded_representation.data_type().fixed_size().unwrap(),
- );
+ output_view.copy_from_slice(&decoded_value)?;
} else {
// TODO: Variable length data type support?
return Err(CodecError::ExpectedFixedLengthBytes);
@@ -852,7 +833,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| f32::from_ne_bytes(b.try_into().unwrap()))
.collect();
println!("decoded_partial_chunk {decoded_partial_chunk:?}");
diff --git a/zarrs/src/array/codec/array_to_bytes/pcodec.rs b/zarrs/src/array/codec/array_to_bytes/pcodec.rs
index cf77b2ab..47cc8904 100644
--- a/zarrs/src/array/codec/array_to_bytes/pcodec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/pcodec.rs
@@ -271,7 +271,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().into_owned())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![4, 8];
@@ -323,7 +323,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().into_owned())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![4, 8];
diff --git a/zarrs/src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs b/zarrs/src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs
index 10c9f6db..94b47549 100644
--- a/zarrs/src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs
@@ -10,11 +10,11 @@ use crate::{
codec::{
ArrayBytes, ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
- BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits, RawBytes,
- RecommendedConcurrency,
+ BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RawBytes, RecommendedConcurrency,
},
- convert_from_bytes_slice, transmute_to_bytes_vec, ArrayMetadataOptions,
- BytesRepresentation, ChunkRepresentation, DataType,
+ convert_from_bytes_slice, transmute_to_bytes_vec, BytesRepresentation, ChunkRepresentation,
+ DataType,
},
config::global_config,
metadata::v3::{array::codec::pcodec::PcodecModeSpecConfiguration, MetadataV3},
@@ -84,7 +84,7 @@ impl PcodecCodec {
}
impl CodecTraits for PcodecCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let mode_spec = mode_spec_pco_to_config(&self.chunk_config.mode_spec);
let (delta_spec, delta_encoding_order) = match self.chunk_config.delta_spec {
DeltaSpec::Auto => (PcodecDeltaSpecConfiguration::Auto, None),
diff --git a/zarrs/src/array/codec/array_to_bytes/sharding.rs b/zarrs/src/array/codec/array_to_bytes/sharding.rs
index b0a7e216..7e2a5955 100644
--- a/zarrs/src/array/codec/array_to_bytes/sharding.rs
+++ b/zarrs/src/array/codec/array_to_bytes/sharding.rs
@@ -111,7 +111,7 @@ fn decode_shard_index(
)?;
let decoded_shard_index = decoded_shard_index.into_fixed()?;
Ok(decoded_shard_index
- .chunks_exact(core::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|v| u64::from_ne_bytes(v.try_into().unwrap() /* safe */))
.collect())
}
@@ -499,7 +499,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
assert_eq!(answer, decoded_partial_chunk);
@@ -584,7 +584,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
assert_eq!(answer, decoded_partial_chunk);
@@ -653,7 +653,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
@@ -695,7 +695,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![4, 8];
diff --git a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_codec.rs b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_codec.rs
index f6ec782f..ccf76f54 100644
--- a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_codec.rs
@@ -6,16 +6,17 @@ use std::{
use crate::{
array::{
- array_bytes::{merge_chunks_vlen, update_bytes_flen},
+ array_bytes::merge_chunks_vlen,
chunk_shape_to_array_shape,
codec::{
ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderTraits,
ArrayToBytesCodecTraits, BytesPartialDecoderTraits, BytesPartialEncoderTraits,
- CodecChain, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ CodecChain, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
concurrency::calc_concurrency_outer_inner,
- transmute_to_bytes_vec, unravel_index, ArrayBytes, ArrayMetadataOptions, ArraySize,
- BytesRepresentation, ChunkRepresentation, ChunkShape, DataTypeSize, FillValue, RawBytes,
+ transmute_to_bytes_vec, unravel_index, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize,
+ BytesRepresentation, ChunkRepresentation, ChunkShape, DataTypeSize, RawBytes,
},
array_subset::ArraySubset,
metadata::v3::MetadataV3,
@@ -85,7 +86,7 @@ impl ShardingCodec {
}
impl CodecTraits for ShardingCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = ShardingCodecConfigurationV1 {
chunk_shape: self.chunk_shape.clone(),
codecs: self.inner_codecs.create_metadatas(),
@@ -104,21 +105,6 @@ impl CodecTraits for ShardingCodec {
}
}
-/// Repeat the fill value into a contiguous vec
-/// The length is the contiguous elements of an inner chunk in the shard. See `ContiguousLinearisedIndices`.
-fn get_contiguous_fill_value(
- fill_value: &FillValue,
- chunk_shape: &[NonZeroU64],
- shard_shape: &[u64],
-) -> Vec {
- let chunk_subset = ArraySubset::new_with_shape(chunk_shape_to_array_shape(chunk_shape));
- let contiguous_iterator =
- unsafe { chunk_subset.contiguous_linearised_indices_unchecked(shard_shape) };
- fill_value
- .as_ne_bytes()
- .repeat(contiguous_iterator.contiguous_elements_usize())
-}
-
impl ArrayCodecTraits for ShardingCodec {
fn recommended_concurrency(
&self,
@@ -182,7 +168,6 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
shard_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result, CodecError> {
- let shard_shape = shard_representation.shape_u64();
let chunk_representation = unsafe {
ChunkRepresentation::new_unchecked(
self.chunk_shape.as_slice().to_vec(),
@@ -201,10 +186,6 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
let shard_index =
self.decode_index(&encoded_shard, chunks_per_shard.as_slice(), options)?;
- let any_empty = shard_index
- .par_iter()
- .any(|offset_or_size| *offset_or_size == u64::MAX);
-
// Calc self/internal concurrent limits
let (shard_concurrent_limit, concurrency_limit_inner_chunks) = calc_concurrency_outer_inner(
options.concurrent_target(),
@@ -271,49 +252,29 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
}
let mut decoded_shard = Vec::::with_capacity(size_output);
- let contiguous_fill_value = if any_empty {
- Some(get_contiguous_fill_value(
- shard_representation.fill_value(),
- &self.chunk_shape,
- &shard_shape,
- ))
- } else {
- None
- };
-
{
let output =
UnsafeCellSlice::new_from_vec_with_spare_capacity(&mut decoded_shard);
+ let shard_shape = shard_representation.shape_u64();
let decode_chunk = |chunk_index: usize| {
let chunk_subset = self
.chunk_index_to_subset(chunk_index as u64, chunks_per_shard.as_slice());
+ let mut output_view_inner_chunk = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ output,
+ data_type_size,
+ &shard_shape,
+ chunk_subset,
+ )
+ };
// Read the offset/size
let offset = shard_index[chunk_index * 2];
let size = shard_index[chunk_index * 2 + 1];
if offset == u64::MAX && size == u64::MAX {
- if let Some(fv) = &contiguous_fill_value {
- let contiguous_iterator = unsafe {
- chunk_subset
- .contiguous_linearised_indices_unchecked(&shard_shape)
- };
- let elements = contiguous_iterator.contiguous_elements();
- for index in &contiguous_iterator {
- debug_assert_eq!(
- fv.len() as u64,
- elements * data_type_size as u64
- );
- let shard_offset =
- usize::try_from(index * data_type_size as u64).unwrap();
- unsafe {
- output
- .index_mut(shard_offset..shard_offset + fv.len())
- .copy_from_slice(fv);
- }
- }
- } else {
- unreachable!();
- }
+ output_view_inner_chunk
+ .fill(shard_representation.fill_value().as_ne_bytes())?;
} else if usize::try_from(offset + size).unwrap() > encoded_shard.len() {
return Err(CodecError::Other(
"The shard index references out-of-bounds bytes. The chunk may be corrupted."
@@ -328,13 +289,9 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
&chunk_representation,
&options,
)?;
- update_bytes_flen(
- &output,
- &shard_representation.shape_u64(),
- &decoded_chunk.into_fixed()?,
- &chunk_subset,
- data_type_size,
- );
+ output_view_inner_chunk
+ .copy_from_slice(&decoded_chunk.into_fixed()?)
+ .map_err(CodecError::from)?;
};
Ok::<_, CodecError>(())
@@ -354,16 +311,13 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
}
#[allow(clippy::too_many_lines)]
- unsafe fn decode_into(
+ fn decode_into(
&self,
encoded_shard: RawBytes<'_>,
shard_representation: &ChunkRepresentation,
- output: &UnsafeCellSlice,
- output_shape: &[u64],
- output_subset: &ArraySubset,
+ output_view: &mut ArrayBytesFixedDisjointView<'_>,
options: &CodecOptions,
) -> Result<(), CodecError> {
- let shard_shape = shard_representation.shape_u64();
let chunk_representation = unsafe {
ChunkRepresentation::new_unchecked(
self.chunk_shape.as_slice().to_vec(),
@@ -382,10 +336,6 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
let shard_index =
self.decode_index(&encoded_shard, chunks_per_shard.as_slice(), options)?;
- let any_empty = shard_index
- .par_iter()
- .any(|offset_or_size| *offset_or_size == u64::MAX);
-
// Calc self/internal concurrent limits
let (shard_concurrent_limit, concurrency_limit_inner_chunks) = calc_concurrency_outer_inner(
options.concurrent_target(),
@@ -399,96 +349,55 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
.concurrent_target(concurrency_limit_inner_chunks)
.build();
- match shard_representation.data_type().size() {
- DataTypeSize::Variable => {
- // TODO: Variable length data type support?
- Err(CodecError::ExpectedFixedLengthBytes)
- }
- DataTypeSize::Fixed(data_type_size) => {
- let contiguous_fill_value = if any_empty {
- Some(get_contiguous_fill_value(
- shard_representation.fill_value(),
- &self.chunk_shape,
- &shard_shape,
- ))
- } else {
- None
- };
-
- {
- let decode_chunk = |chunk_index: usize| {
- let chunk_subset = self
- .chunk_index_to_subset(chunk_index as u64, chunks_per_shard.as_slice());
+ let decode_chunk = |chunk_index: usize| {
+ let chunk_subset =
+ self.chunk_index_to_subset(chunk_index as u64, chunks_per_shard.as_slice());
- let output_subset_chunk = ArraySubset::new_with_start_shape(
- std::iter::zip(output_subset.start(), chunk_subset.start())
- .map(|(o, s)| o + s)
- .collect(),
- chunk_subset.shape().to_vec(),
- )
- .unwrap();
+ let output_subset_chunk = ArraySubset::new_with_start_shape(
+ std::iter::zip(output_view.subset().start(), chunk_subset.start())
+ .map(|(o, s)| o + s)
+ .collect(),
+ chunk_subset.shape().to_vec(),
+ )
+ .unwrap();
+ let mut output_view_inner_chunk = unsafe {
+ // SAFETY: inner chunks represent disjoint array subsets
+ output_view.subdivide_unchecked(output_subset_chunk)
+ };
- // Read the offset/size
- let offset = shard_index[chunk_index * 2];
- let size = shard_index[chunk_index * 2 + 1];
- if offset == u64::MAX && size == u64::MAX {
- if let Some(fv) = &contiguous_fill_value {
- let contiguous_iterator = unsafe {
- output_subset_chunk
- .contiguous_linearised_indices_unchecked(output_shape)
- };
- let elements = contiguous_iterator.contiguous_elements();
- for index in &contiguous_iterator {
- debug_assert_eq!(
- fv.len() as u64,
- elements * data_type_size as u64
- );
- let shard_offset =
- usize::try_from(index * data_type_size as u64).unwrap();
- unsafe {
- output
- .index_mut(shard_offset..shard_offset + fv.len())
- .copy_from_slice(fv);
- }
- }
- } else {
- unreachable!();
- }
- } else if usize::try_from(offset + size).unwrap() > encoded_shard.len() {
- return Err(CodecError::Other(
- "The shard index references out-of-bounds bytes. The chunk may be corrupted."
- .to_string(),
- ));
- } else {
- let offset: usize = offset.try_into().unwrap();
- let size: usize = size.try_into().unwrap();
- let encoded_chunk = &encoded_shard[offset..offset + size];
- unsafe {
- self.inner_codecs.decode_into(
- Cow::Borrowed(encoded_chunk),
- &chunk_representation,
- output,
- output_shape,
- &output_subset_chunk,
- &options,
- )?;
- }
- };
+ // Read the offset/size
+ let offset = shard_index[chunk_index * 2];
+ let size = shard_index[chunk_index * 2 + 1];
+ if offset == u64::MAX && size == u64::MAX {
+ output_view_inner_chunk.fill(shard_representation.fill_value().as_ne_bytes())?;
+ } else if usize::try_from(offset + size).unwrap() > encoded_shard.len() {
+ return Err(CodecError::Other(
+ "The shard index references out-of-bounds bytes. The chunk may be corrupted."
+ .to_string(),
+ ));
+ } else {
+ let offset: usize = offset.try_into().unwrap();
+ let size: usize = size.try_into().unwrap();
+ let encoded_chunk = &encoded_shard[offset..offset + size];
+ self.inner_codecs.decode_into(
+ Cow::Borrowed(encoded_chunk),
+ &chunk_representation,
+ &mut output_view_inner_chunk,
+ &options,
+ )?;
+ };
- Ok::<_, CodecError>(())
- };
+ Ok::<_, CodecError>(())
+ };
- rayon_iter_concurrent_limit::iter_concurrent_limit!(
- shard_concurrent_limit,
- (0..num_chunks),
- try_for_each,
- decode_chunk
- )?;
+ rayon_iter_concurrent_limit::iter_concurrent_limit!(
+ shard_concurrent_limit,
+ (0..num_chunks),
+ try_for_each,
+ decode_chunk
+ )?;
- Ok(())
- }
- }
- }
+ Ok(())
}
fn partial_decoder(
diff --git a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs
index bc90361e..303b70b0 100644
--- a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs
+++ b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_decoder.rs
@@ -5,15 +5,15 @@ use unsafe_cell_slice::UnsafeCellSlice;
use zarrs_storage::byte_range::ByteRange;
use crate::array::{
- array_bytes::{merge_chunks_vlen, update_bytes_flen},
+ array_bytes::merge_chunks_vlen,
codec::{
ArrayCodecTraits, ArrayPartialDecoderTraits, ArraySubset, ArrayToBytesCodecTraits,
ByteIntervalPartialDecoder, BytesPartialDecoderTraits, CodecChain, CodecError,
CodecOptions,
},
concurrency::{calc_concurrency_outer_inner, RecommendedConcurrency},
- ravel_indices, ArrayBytes, ArraySize, ChunkRepresentation, ChunkShape, DataType, DataTypeSize,
- RawBytes,
+ ravel_indices, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize, ChunkRepresentation,
+ ChunkShape, DataType, DataTypeSize, RawBytes,
};
#[cfg(feature = "async")]
@@ -305,16 +305,20 @@ impl ArrayPartialDecoderTraits for ShardingPartialDecoder {
.into_owned()
};
let decoded_bytes = decoded_bytes.into_fixed()?;
- update_bytes_flen(
- &out_array_subset_slice,
- array_subset.shape(),
- &decoded_bytes,
- &chunk_subset_overlap
- .relative_to(array_subset.start())
- .unwrap(),
- data_type_size,
- );
- Ok::<_, CodecError>(())
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ out_array_subset_slice,
+ data_type_size,
+ array_subset.shape(),
+ chunk_subset_overlap
+ .relative_to(array_subset.start())
+ .unwrap(),
+ )
+ };
+ output_view
+ .copy_from_slice(&decoded_bytes)
+ .map_err(CodecError::from)
};
rayon_iter_concurrent_limit::iter_concurrent_limit!(
@@ -597,15 +601,20 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder {
Vec,
ArraySubset,
) = subset_and_decoded_chunk?;
- update_bytes_flen(
- &shard_slice,
- array_subset.shape(),
- &chunk_subset_bytes.into(),
- &chunk_subset_overlap
- .relative_to(array_subset.start())
- .unwrap(),
- data_type_size,
- );
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ shard_slice,
+ data_type_size,
+ array_subset.shape(),
+ chunk_subset_overlap
+ .relative_to(array_subset.start())
+ .unwrap(),
+ )
+ };
+ output_view
+ .copy_from_slice(&chunk_subset_bytes)
+ .expect("chunk subset bytes are the correct length");
Ok::<_, CodecError>(())
}
)?;
@@ -627,26 +636,26 @@ impl AsyncArrayPartialDecoderTraits for AsyncShardingPartialDecoder {
rayon_iter_concurrent_limit::iter_concurrent_limit!(
options.concurrent_target(),
filled_chunks,
- for_each,
+ try_for_each,
|chunk_subset: &ArraySubset| {
let chunk_subset_overlap =
unsafe { array_subset.overlap_unchecked(chunk_subset) };
- let filled_chunk = self
- .decoded_representation
- .fill_value()
- .as_ne_bytes()
- .repeat(chunk_subset_overlap.num_elements_usize());
- update_bytes_flen(
- &shard_slice,
- array_subset.shape(),
- &filled_chunk.into(),
- &chunk_subset_overlap
- .relative_to(array_subset.start())
- .unwrap(),
- data_type_size,
- );
+ let mut output_view = unsafe {
+ // SAFETY: chunks represent disjoint array subsets
+ ArrayBytesFixedDisjointView::new_unchecked(
+ shard_slice,
+ data_type_size,
+ array_subset.shape(),
+ chunk_subset_overlap
+ .relative_to(array_subset.start())
+ .unwrap(),
+ )
+ };
+ output_view
+ .fill(self.decoded_representation.fill_value().as_ne_bytes())
+ .map_err(CodecError::from)
}
- );
+ )?;
};
unsafe { shard.set_len(shard_size) };
out.push(ArrayBytes::from(shard));
diff --git a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_encoder.rs b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_encoder.rs
index 2a5abbac..e01c9912 100644
--- a/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_encoder.rs
+++ b/zarrs/src/array/codec/array_to_bytes/sharding/sharding_partial_encoder.rs
@@ -304,17 +304,15 @@ impl ArrayPartialEncoderTraits for ShardingPartialEncoder {
};
// Update the inner chunk
- let inner_chunk_updated = unsafe {
- update_array_bytes(
- inner_chunk_decoded,
- &self.inner_chunk_representation.shape_u64(),
- &inner_chunk_subset_overlap
- .relative_to(inner_chunk_subset.start())
- .unwrap(),
- &inner_chunk_bytes,
- self.inner_chunk_representation.data_type().size(),
- )
- };
+ let inner_chunk_updated = update_array_bytes(
+ inner_chunk_decoded,
+ &self.inner_chunk_representation.shape_u64(),
+ &inner_chunk_subset_overlap
+ .relative_to(inner_chunk_subset.start())
+ .unwrap(),
+ &inner_chunk_bytes,
+ self.inner_chunk_representation.data_type().size(),
+ )?;
inner_chunks_decoded
.lock()
.unwrap()
diff --git a/zarrs/src/array/codec/array_to_bytes/vlen.rs b/zarrs/src/array/codec/array_to_bytes/vlen.rs
index 4075aa32..b246c08e 100644
--- a/zarrs/src/array/codec/array_to_bytes/vlen.rs
+++ b/zarrs/src/array/codec/array_to_bytes/vlen.rs
@@ -3,7 +3,7 @@
mod vlen_codec;
mod vlen_partial_decoder;
-use std::{mem::size_of, num::NonZeroU64, sync::Arc};
+use std::{num::NonZeroU64, sync::Arc};
use itertools::Itertools;
pub use vlen::IDENTIFIER;
@@ -13,7 +13,7 @@ pub use crate::metadata::v3::array::codec::vlen::{
};
use crate::{
array::{
- codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions},
+ codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions, InvalidBytesLengthError},
convert_from_bytes_slice, ChunkRepresentation, CodecChain, DataType, Endianness, FillValue,
RawBytes,
},
@@ -62,10 +62,7 @@ fn get_vlen_bytes_and_offsets(
) -> Result<(Vec, Vec), CodecError> {
// Get the index length and data start
if bytes.len() < size_of::() {
- return Err(CodecError::UnexpectedChunkDecodedSize(
- bytes.len(),
- size_of::() as u64,
- ));
+ return Err(InvalidBytesLengthError::new(bytes.len(), size_of::()).into());
}
let index_len = u64::from_le_bytes(bytes[0..size_of::()].try_into().unwrap());
let index_len = usize::try_from(index_len)
diff --git a/zarrs/src/array/codec/array_to_bytes/vlen/vlen_codec.rs b/zarrs/src/array/codec/array_to_bytes/vlen/vlen_codec.rs
index 4c181488..9e0a23a3 100644
--- a/zarrs/src/array/codec/array_to_bytes/vlen/vlen_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/vlen/vlen_codec.rs
@@ -1,15 +1,15 @@
-use std::{mem::size_of, num::NonZeroU64, sync::Arc};
+use std::{num::NonZeroU64, sync::Arc};
use crate::{
array::{
codec::{
ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesCodec,
- BytesPartialDecoderTraits, BytesPartialEncoderTraits, CodecError, CodecOptions,
- CodecTraits, RecommendedConcurrency,
+ BytesPartialDecoderTraits, BytesPartialEncoderTraits, CodecError, CodecMetadataOptions,
+ CodecOptions, CodecTraits, RecommendedConcurrency,
},
- transmute_to_bytes_vec, ArrayBytes, ArrayMetadataOptions, BytesRepresentation,
- ChunkRepresentation, CodecChain, DataType, DataTypeSize, Endianness, FillValue, RawBytes,
+ transmute_to_bytes_vec, ArrayBytes, BytesRepresentation, ChunkRepresentation, CodecChain,
+ DataType, DataTypeSize, Endianness, FillValue, RawBytes, RawBytesOffsets,
},
config::global_config,
metadata::v3::{array::codec::vlen::VlenIndexDataType, MetadataV3},
@@ -21,7 +21,7 @@ use crate::array::codec::{AsyncArrayPartialDecoderTraits, AsyncBytesPartialDecod
use super::{vlen_partial_decoder, VlenCodecConfiguration, VlenCodecConfigurationV1};
-/// A `bytes` codec implementation.
+/// A `vlen` codec implementation.
#[derive(Debug, Clone)]
pub struct VlenCodec {
index_codecs: Arc,
@@ -83,7 +83,7 @@ impl VlenCodec {
}
impl CodecTraits for VlenCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = VlenCodecConfigurationV1 {
index_codecs: self.index_codecs.create_metadatas(),
data_codecs: self.data_codecs.create_metadatas(),
@@ -265,14 +265,16 @@ impl ArrayToBytesCodecTraits for VlenCodec {
}
}
.unwrap();
- let (data, index) = super::get_vlen_bytes_and_offsets(
+ let (bytes, offsets) = super::get_vlen_bytes_and_offsets(
&index_chunk_rep,
&bytes,
&self.index_codecs,
&self.data_codecs,
options,
)?;
- Ok(ArrayBytes::new_vlen(data, index))
+ let offsets = RawBytesOffsets::new(offsets)?;
+ let array_bytes = ArrayBytes::new_vlen(bytes, offsets)?;
+ Ok(array_bytes)
}
fn partial_decoder(
diff --git a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs
index 5a36eb96..e22a0d28 100644
--- a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs
+++ b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs
@@ -5,13 +5,16 @@ mod vlen_v2_partial_decoder;
pub(crate) mod vlen_v2_macros;
-use std::{mem::size_of, sync::Arc};
+use std::sync::Arc;
/// The identifier for the `vlen_v2` codec.
pub(crate) const IDENTIFIER: &str = "vlen_v2";
// pub use vlen_v2::IDENTIFIER;
-use crate::array::{codec::CodecError, RawBytes};
+use crate::array::{
+ codec::{CodecError, InvalidBytesLengthError},
+ RawBytes,
+};
pub(crate) use vlen_v2_codec::VlenV2Codec;
@@ -67,10 +70,7 @@ fn get_interleaved_bytes_and_offsets(
// Validate the bytes is long enough to contain header and element lengths
let header_length = size_of::() * (1 + num_elements);
if bytes.len() < header_length {
- return Err(CodecError::UnexpectedChunkDecodedSize(
- bytes.len(),
- header_length as u64,
- ));
+ return Err(InvalidBytesLengthError::new(bytes.len(), header_length).into());
}
// Validate the number of elements from the header
diff --git a/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs b/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs
index dd781f67..e0bcbef3 100644
--- a/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs
@@ -1,4 +1,4 @@
-use std::{mem::size_of, sync::Arc};
+use std::sync::Arc;
use itertools::Itertools;
@@ -7,11 +7,11 @@ use crate::{
codec::{
ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
- BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits,
+ BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
RecommendedConcurrency,
},
- ArrayBytes, ArrayMetadataOptions, BytesRepresentation, ChunkRepresentation, DataTypeSize,
- RawBytes,
+ ArrayBytes, BytesRepresentation, ChunkRepresentation, DataTypeSize, RawBytes,
+ RawBytesOffsets,
},
config::global_config,
metadata::v3::MetadataV3,
@@ -35,7 +35,7 @@ impl VlenV2Codec {
}
impl CodecTraits for VlenV2Codec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let config = global_config();
let name = config
.experimental_codec_names()
@@ -111,7 +111,9 @@ impl ArrayToBytesCodecTraits for VlenV2Codec {
) -> Result, CodecError> {
let num_elements = decoded_representation.num_elements_usize();
let (bytes, offsets) = super::get_interleaved_bytes_and_offsets(num_elements, &bytes)?;
- Ok(ArrayBytes::new_vlen(bytes, offsets))
+ let offsets = RawBytesOffsets::new(offsets)?;
+ let array_bytes = ArrayBytes::new_vlen(bytes, offsets)?;
+ Ok(array_bytes)
}
fn partial_decoder(
diff --git a/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_macros.rs b/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_macros.rs
index 0ea12587..8a5f35b2 100644
--- a/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_macros.rs
+++ b/zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_macros.rs
@@ -45,23 +45,24 @@ macro_rules! vlen_v2_codec {
codec::{
array_to_bytes::vlen_v2::VlenV2Codec, ArrayPartialDecoderTraits,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
- BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits,
+ BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions,
+ CodecTraits,
},
- ArrayBytes, ArrayCodecTraits, ArrayMetadataOptions, BytesRepresentation,
- ChunkRepresentation, RawBytes, RecommendedConcurrency,
+ ArrayBytes, ArrayCodecTraits, BytesRepresentation, ChunkRepresentation, RawBytes,
+ RecommendedConcurrency,
};
#[cfg(feature = "async")]
use crate::array::codec::{AsyncArrayPartialDecoderTraits, AsyncBytesPartialDecoderTraits};
- /// The `$identifier` codec implementation.
+ #[doc = concat!("The `", $identifier, "` codec implementation.")]
#[derive(Debug, Clone)]
pub struct $struct {
inner: Arc,
}
impl $struct {
- /// Create a new `$identifier` codec.
+ #[doc = concat!("Create a new `", $identifier, "` codec.")]
#[must_use]
pub fn new() -> Self {
Self {
@@ -77,7 +78,7 @@ macro_rules! vlen_v2_codec {
}
impl CodecTraits for $struct {
- fn create_metadata_opt(&self, options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, options: &CodecMetadataOptions) -> Option {
self.inner.create_metadata_opt(options)
}
diff --git a/zarrs/src/array/codec/array_to_bytes/zfp.rs b/zarrs/src/array/codec/array_to_bytes/zfp.rs
index 8c48c08d..5ccdf3be 100644
--- a/zarrs/src/array/codec/array_to_bytes/zfp.rs
+++ b/zarrs/src/array/codec/array_to_bytes/zfp.rs
@@ -554,7 +554,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| f32::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![
@@ -614,7 +614,7 @@ mod tests {
.map(|bytes| bytes.into_fixed().unwrap().to_vec())
.flatten()
.collect::>()
- .chunks(std::mem::size_of::())
+ .chunks(size_of::())
.map(|b| f32::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![
diff --git a/zarrs/src/array/codec/array_to_bytes/zfp/zfp_codec.rs b/zarrs/src/array/codec/array_to_bytes/zfp/zfp_codec.rs
index 8a7a1406..81294871 100644
--- a/zarrs/src/array/codec/array_to_bytes/zfp/zfp_codec.rs
+++ b/zarrs/src/array/codec/array_to_bytes/zfp/zfp_codec.rs
@@ -15,10 +15,10 @@ use crate::{
codec::{
ArrayBytes, ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
- BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits, RawBytes,
- RecommendedConcurrency,
+ BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RawBytes, RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, ChunkRepresentation, DataType,
+ BytesRepresentation, ChunkRepresentation, DataType,
},
config::global_config,
metadata::v3::{array::codec::zfp::ZfpMode, MetadataV3},
@@ -129,7 +129,7 @@ impl ZfpCodec {
}
impl CodecTraits for ZfpCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = ZfpCodecConfigurationV1 {
write_header: Some(self.write_header),
mode: self.mode,
diff --git a/zarrs/src/array/codec/bytes_to_bytes/blosc.rs b/zarrs/src/array/codec/bytes_to_bytes/blosc.rs
index 9ad54c9f..b8a00612 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/blosc.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/blosc.rs
@@ -127,11 +127,7 @@ fn blosc_compress_bytes(
fn blosc_validate(src: &[u8]) -> Option {
let mut destsize: usize = 0;
let valid = unsafe {
- blosc_cbuffer_validate(
- src.as_ptr().cast::(),
- src.len(),
- std::ptr::addr_of_mut!(destsize),
- )
+ blosc_cbuffer_validate(src.as_ptr().cast::(), src.len(), &raw mut destsize)
} == 0;
valid.then_some(destsize)
}
@@ -145,8 +141,8 @@ fn blosc_typesize(src: &[u8]) -> Option {
unsafe {
blosc_cbuffer_metainfo(
src.as_ptr().cast::(),
- std::ptr::addr_of_mut!(typesize),
- std::ptr::addr_of_mut!(flags),
+ &raw mut typesize,
+ &raw mut flags,
);
};
(typesize != 0).then_some(typesize)
@@ -164,9 +160,9 @@ fn blosc_nbytes(src: &[u8]) -> Option {
unsafe {
blosc_cbuffer_sizes(
src.as_ptr().cast::(),
- std::ptr::addr_of_mut!(uncompressed_bytes),
- std::ptr::addr_of_mut!(cbytes),
- std::ptr::addr_of_mut!(blocksize),
+ &raw mut uncompressed_bytes,
+ &raw mut cbytes,
+ &raw mut blocksize,
);
};
(uncompressed_bytes > 0 && cbytes > 0 && blocksize > 0).then_some(uncompressed_bytes)
@@ -379,7 +375,7 @@ mod tests {
let decoded: Vec = decoded
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
@@ -428,7 +424,7 @@ mod tests {
let decoded: Vec = decoded
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
diff --git a/zarrs/src/array/codec/bytes_to_bytes/blosc/blosc_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/blosc/blosc_codec.rs
index 62972cd9..afd1bdb0 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/blosc/blosc_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/blosc/blosc_codec.rs
@@ -6,9 +6,10 @@ use crate::{
array::{
codec::{
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes,
+ BytesRepresentation, RawBytes,
},
metadata::v3::MetadataV3,
plugin::PluginCreateError,
@@ -131,7 +132,7 @@ impl BloscCodec {
}
impl CodecTraits for BloscCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = BloscCodecConfigurationV1 {
cname: self.cname,
clevel: self.clevel,
diff --git a/zarrs/src/array/codec/bytes_to_bytes/bz2.rs b/zarrs/src/array/codec/bytes_to_bytes/bz2.rs
index 58eff05a..6a182f49 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/bz2.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/bz2.rs
@@ -126,7 +126,7 @@ mod tests {
let decoded: Vec = decoded
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
@@ -174,7 +174,7 @@ mod tests {
let decoded: Vec = decoded
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
diff --git a/zarrs/src/array/codec/bytes_to_bytes/bz2/bz2_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/bz2/bz2_codec.rs
index d1380c3e..e967e681 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/bz2/bz2_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/bz2/bz2_codec.rs
@@ -8,9 +8,10 @@ use crate::{
array::{
codec::{
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes,
+ BytesRepresentation, RawBytes,
},
config::global_config,
metadata::v3::MetadataV3,
@@ -46,7 +47,7 @@ impl Bz2Codec {
}
impl CodecTraits for Bz2Codec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = Bz2CodecConfigurationV1 {
level: Bz2CompressionLevel::try_from(self.compression.level())
.expect("checked on init"),
diff --git a/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs b/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs
index e95ff365..7f8667cd 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/crc32c.rs
@@ -38,7 +38,7 @@ pub(crate) fn create_codec_crc32c(metadata: &MetadataV3) -> Result();
+const CHECKSUM_SIZE: usize = size_of::();
#[cfg(test)]
mod tests {
@@ -89,8 +89,7 @@ mod tests {
assert_eq!(bytes, decoded.to_vec());
// Check that the checksum is correct
- let checksum: &[u8; 4] = &encoded
- [encoded.len() - core::mem::size_of::()..encoded.len()]
+ let checksum: &[u8; 4] = &encoded[encoded.len() - size_of::()..encoded.len()]
.try_into()
.unwrap();
println!("checksum {checksum:?}");
diff --git a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs
index 7e88534b..47d1cfc6 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs
@@ -5,9 +5,10 @@ use crate::{
codec::{
bytes_to_bytes::strip_suffix_partial_decoder::StripSuffixPartialDecoder,
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes,
+ BytesRepresentation, RawBytes,
},
metadata::v3::MetadataV3,
};
@@ -39,7 +40,7 @@ impl Crc32cCodec {
}
impl CodecTraits for Crc32cCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = Crc32cCodecConfigurationV1 {};
Some(MetadataV3::new_with_serializable_configuration(IDENTIFIER, &configuration).unwrap())
}
diff --git a/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs b/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs
index 74411496..159ed82b 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/fletcher32.rs
@@ -53,7 +53,7 @@ pub(crate) fn create_codec_fletcher32(metadata: &MetadataV3) -> Result();
+const CHECKSUM_SIZE: usize = size_of::();
#[cfg(test)]
mod tests {
@@ -106,8 +106,7 @@ mod tests {
assert_eq!(bytes, decoded.to_vec());
// Check that the checksum is correct
- let checksum: &[u8; 4] = &encoded
- [encoded.len() - core::mem::size_of::()..encoded.len()]
+ let checksum: &[u8; 4] = &encoded[encoded.len() - size_of::()..encoded.len()]
.try_into()
.unwrap();
println!("checksum {checksum:?}");
diff --git a/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs
index 04e319f6..0944ea56 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/fletcher32/fletcher32_codec.rs
@@ -7,9 +7,10 @@ use crate::{
codec::{
bytes_to_bytes::strip_suffix_partial_decoder::StripSuffixPartialDecoder,
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes,
+ BytesRepresentation, RawBytes,
},
metadata::v3::MetadataV3,
};
@@ -43,7 +44,7 @@ impl Fletcher32Codec {
}
impl CodecTraits for Fletcher32Codec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = Fletcher32CodecConfigurationV1 {};
Some(MetadataV3::new_with_serializable_configuration(IDENTIFIER, &configuration).unwrap())
}
diff --git a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs
index 9a14cf0c..13ade3f2 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs
@@ -28,7 +28,7 @@ pub use gdeflate_codec::GDeflateCodec;
use crate::{
array::{
- codec::{Codec, CodecError, CodecPlugin},
+ codec::{Codec, CodecError, CodecPlugin, InvalidBytesLengthError},
RawBytes,
},
metadata::v3::{array::codec::gdeflate, MetadataV3},
@@ -37,7 +37,6 @@ use crate::{
pub use gdeflate::IDENTIFIER;
-use core::mem::size_of;
use std::sync::Arc;
// Register the codec.
@@ -62,10 +61,11 @@ const GDEFLATE_STATIC_HEADER_LENGTH: usize = 2 * size_of::();
fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError> {
if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH {
- return Err(CodecError::UnexpectedChunkDecodedSize(
+ return Err(InvalidBytesLengthError::new(
encoded_value.len(),
- GDEFLATE_STATIC_HEADER_LENGTH as u64,
- ));
+ GDEFLATE_STATIC_HEADER_LENGTH,
+ )
+ .into());
}
// Decode the static header
@@ -78,10 +78,11 @@ fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError>
// Check length of dynamic header
let dynamic_header_length = num_pages * size_of::();
if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length {
- return Err(CodecError::UnexpectedChunkDecodedSize(
+ return Err(InvalidBytesLengthError::new(
encoded_value.len(),
- (GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length) as u64,
- ));
+ GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length,
+ )
+ .into());
}
// Decode the pages
@@ -329,7 +330,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![2, 3, 5];
@@ -372,7 +373,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![2, 3, 5];
diff --git a/zarrs/src/array/codec/bytes_to_bytes/gdeflate/gdeflate_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/gdeflate/gdeflate_codec.rs
index a26dc588..8d969abd 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/gdeflate/gdeflate_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/gdeflate/gdeflate_codec.rs
@@ -1,13 +1,12 @@
-use core::mem::size_of;
use std::{borrow::Cow, sync::Arc};
use crate::{
array::{
codec::{
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes, RecommendedConcurrency,
+ BytesRepresentation, RawBytes, RecommendedConcurrency,
},
metadata::v3::MetadataV3,
};
@@ -48,7 +47,7 @@ impl GDeflateCodec {
}
impl CodecTraits for GDeflateCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = GDeflateCodecConfigurationV1 {
level: self.compression_level,
};
diff --git a/zarrs/src/array/codec/bytes_to_bytes/gzip.rs b/zarrs/src/array/codec/bytes_to_bytes/gzip.rs
index f9dab455..7be5438e 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/gzip.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/gzip.rs
@@ -129,7 +129,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![2, 3, 5];
@@ -171,7 +171,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![2, 3, 5];
diff --git a/zarrs/src/array/codec/bytes_to_bytes/gzip/gzip_codec.rs b/zarrs/src/array/codec/bytes_to_bytes/gzip/gzip_codec.rs
index af57ada4..aa249c68 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/gzip/gzip_codec.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/gzip/gzip_codec.rs
@@ -10,9 +10,10 @@ use crate::{
array::{
codec::{
BytesPartialDecoderTraits, BytesPartialEncoderDefault, BytesPartialEncoderTraits,
- BytesToBytesCodecTraits, CodecError, CodecOptions, CodecTraits, RecommendedConcurrency,
+ BytesToBytesCodecTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits,
+ RecommendedConcurrency,
},
- ArrayMetadataOptions, BytesRepresentation, RawBytes,
+ BytesRepresentation, RawBytes,
},
metadata::v3::MetadataV3,
};
@@ -52,7 +53,7 @@ impl GzipCodec {
}
impl CodecTraits for GzipCodec {
- fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option {
+ fn create_metadata_opt(&self, _options: &CodecMetadataOptions) -> Option {
let configuration = GzipCodecConfigurationV1 {
level: self.compression_level,
};
diff --git a/zarrs/src/array/codec/bytes_to_bytes/test_unbounded.rs b/zarrs/src/array/codec/bytes_to_bytes/test_unbounded.rs
index 8336d9ba..90b379df 100644
--- a/zarrs/src/array/codec/bytes_to_bytes/test_unbounded.rs
+++ b/zarrs/src/array/codec/bytes_to_bytes/test_unbounded.rs
@@ -69,7 +69,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();
let answer: Vec = vec![2, 3, 5];
@@ -112,7 +112,7 @@ mod tests {
let decoded_partial_chunk: Vec = decoded_partial_chunk
.to_vec()
- .chunks_exact(std::mem::size_of::())
+ .chunks_exact(size_of::