From 0458b9fef7a3d173fcd77ae21abed37c885bf5e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Galkin?= Date: Wed, 19 Feb 2025 20:47:06 -0300 Subject: [PATCH] Treat zarr metadata as a blob (mostly) (#749) * Treat zarr metadata as a blob (mostly) We were parsing too much of Zarr metadata. Icechunk currently is only interested in the array size and chunk sizes. It may become interested in the dimension names at some point. But still, we were parsing the whole metadata, storing internally as parsed object and then formatting it back to json. We did this when the project started, imagining we may need more from the metadata. For example, we thought we could need to incorporate the codec pipeline in Icechunk. With this patch, we now only extract the parts of the zarr metadata we care about. And we preserve the original blob of metadata as is, in a new user_data byte array. We return this blob in metadata requests. If, in the future, we need more from the metadata, we can parse it and add it to the storage. Simpler and less code. It works with zarr extensions, it's more resilient to zarr spec changes. There is a price to this: we are no longer separating the user attributes from the rest of the metadata. The only impact of this, is we no longe can treat conflicts in user attributes separate from the rest of the zarr metadata. If we consider this important in the short term, we can add it back by parsing more of the metadata blobs. Also in this change: - No more AttributeFile. We'll implement it when we need it - Better snapshot serialization [on-disk breaking change] * Enable complex arrays in tests * fix xarray test --------- Co-authored-by: Deepak Cherian --- .github/workflows/python-check.yaml | 1 + docs/docs/icechunk-python/version-control.md | 8 +- .../python/icechunk/_icechunk_python.pyi | 47 +- icechunk-python/src/conflicts.rs | 33 +- icechunk-python/src/repository.rs | 34 +- .../test-repo/chunks/20BVFJQXWR84R1F1Q58G | Bin 0 -> 19 bytes .../test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 | Bin 0 -> 19 bytes .../test-repo/chunks/AN2V69NWNGNCW91839Q0 | Bin 0 -> 19 bytes .../test-repo/chunks/HTKY8SN65KBX42E4V9FG | Bin 0 -> 19 bytes .../tests/data/test-repo/config.yaml | 24 +- .../test-repo/manifests/0GQQ44D2837GGMHY81CG | Bin 0 -> 237 bytes .../test-repo/manifests/73Q2CY1JSN768PFJS2M0 | Bin 0 -> 168 bytes .../test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG | Bin 0 -> 277 bytes .../test-repo/manifests/C38XX4Z2517M93GQ5MA0 | Bin 0 -> 174 bytes .../test-repo/refs/branch.main/ZZZZZZZW.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZX.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZY.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZZ.json | 2 +- .../refs/branch.my-branch/ZZZZZZZX.json | 2 +- .../refs/branch.my-branch/ZZZZZZZY.json | 2 +- .../refs/branch.my-branch/ZZZZZZZZ.json | 2 +- .../data/test-repo/refs/tag.deleted/ref.json | 2 +- .../refs/tag.it also works!/ref.json | 2 +- .../test-repo/refs/tag.it works!/ref.json | 2 +- .../test-repo/snapshots/394QWZDXAY74HP6Q8P3G | Bin 0 -> 867 bytes .../test-repo/snapshots/3EKE17N8YF5ZK5NRMZJ0 | Bin 0 -> 801 bytes .../test-repo/snapshots/GNFK0SSWD5B8CVA53XEG | Bin 0 -> 867 bytes .../test-repo/snapshots/HNG82GMS51ECXFXFCYJG | Bin 0 -> 871 bytes .../test-repo/snapshots/R7F1RJHPZ428N4AK19K0 | Bin 0 -> 178 bytes .../test-repo/snapshots/TNE0TX645A2G7VTXFA1G | Bin 0 -> 1053 bytes .../transactions/394QWZDXAY74HP6Q8P3G | Bin 0 -> 147 bytes .../transactions/3EKE17N8YF5ZK5NRMZJ0 | Bin 0 -> 157 bytes .../transactions/GNFK0SSWD5B8CVA53XEG | Bin 0 -> 235 bytes .../transactions/HNG82GMS51ECXFXFCYJG | Bin 0 -> 146 bytes .../transactions/TNE0TX645A2G7VTXFA1G | Bin 0 -> 173 bytes icechunk-python/tests/test_can_read_old.py | 4 +- icechunk-python/tests/test_conflicts.py | 39 +- icechunk-python/tests/test_timetravel.py | 31 +- .../tests/test_zarr/test_properties.py | 3 - .../tests/test_zarr/test_stateful.py | 2 - icechunk/examples/low_level_dataset.rs | 76 +- .../examples/multithreaded_get_chunk_refs.rs | 35 +- icechunk/flatbuffers/snapshot.fbs | 54 +- icechunk/flatbuffers/transaction_log.fbs | 8 +- icechunk/src/change_set.rs | 209 ++- icechunk/src/conflicts/basic_solver.rs | 60 +- icechunk/src/conflicts/detector.rs | 55 +- icechunk/src/conflicts/mod.rs | 6 +- .../src/format/flatbuffers/all_generated.rs | 1114 ++++++--------- icechunk/src/format/mod.rs | 7 +- icechunk/src/format/snapshot.rs | 484 +++---- icechunk/src/format/transaction_log.rs | 82 +- icechunk/src/lib.rs | 1 - icechunk/src/metadata/data_type.rs | 99 -- icechunk/src/metadata/fill_value.rs | 284 ---- icechunk/src/metadata/mod.rs | 83 -- icechunk/src/repository.rs | 75 +- icechunk/src/session.rs | 1256 ++++++++--------- icechunk/src/store.rs | 498 ++----- icechunk/src/strategies.rs | 76 +- icechunk/tests/test_concurrency.rs | 32 +- icechunk/tests/test_distributed_writes.rs | 22 +- icechunk/tests/test_gc.rs | 51 +- icechunk/tests/test_virtual_refs.rs | 41 +- 64 files changed, 1728 insertions(+), 3226 deletions(-) create mode 100644 icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G create mode 100644 icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 create mode 100644 icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 create mode 100644 icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG create mode 100644 icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG create mode 100644 icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 create mode 100644 icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG create mode 100644 icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G create mode 100644 icechunk-python/tests/data/test-repo/snapshots/3EKE17N8YF5ZK5NRMZJ0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG create mode 100644 icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG create mode 100644 icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G create mode 100644 icechunk-python/tests/data/test-repo/transactions/394QWZDXAY74HP6Q8P3G create mode 100644 icechunk-python/tests/data/test-repo/transactions/3EKE17N8YF5ZK5NRMZJ0 create mode 100644 icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG create mode 100644 icechunk-python/tests/data/test-repo/transactions/HNG82GMS51ECXFXFCYJG create mode 100644 icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G delete mode 100644 icechunk/src/metadata/data_type.rs delete mode 100644 icechunk/src/metadata/fill_value.rs delete mode 100644 icechunk/src/metadata/mod.rs diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml index 03878f47..aaa033f6 100644 --- a/.github/workflows/python-check.yaml +++ b/.github/workflows/python-check.yaml @@ -163,5 +163,6 @@ jobs: python3 -m venv .venv source .venv/bin/activate pip install icechunk['test'] --find-links dist --force-reinstall + pip install pytest-mypy-plugins # pass xarray's pyproject.toml so that pytest can find the `flaky` fixture pytest -c=../../xarray/pyproject.toml -W ignore tests/run_xarray_backends_tests.py diff --git a/docs/docs/icechunk-python/version-control.md b/docs/docs/icechunk-python/version-control.md index ca3f1e00..a053d55e 100644 --- a/docs/docs/icechunk-python/version-control.md +++ b/docs/docs/icechunk-python/version-control.md @@ -327,7 +327,7 @@ session2.rebase(icechunk.ConflictDetector()) # RebaseFailedError: Rebase failed on snapshot AE9XS2ZWXT861KD2JGHG: 1 conflicts found ``` -This however fails because both sessions modified the `foo` attribute on the root group. We can use the `ConflictError` to get more information about the conflict. +This however fails because both sessions modified metadata. We can use the `ConflictError` to get more information about the conflict. ```python try: @@ -335,13 +335,13 @@ try: except icechunk.RebaseFailedError as e: print(e.conflicts) -# [Conflict(UserAttributesDoubleUpdate, path=/)] +# [Conflict(ZarrMetadataDoubleUpdate, path=/)] ``` -This tells us that the conflict is caused by the two sessions modifying the user attributes of the root group (`/`). In this casewe have decided that second session set the `foo` attribute to the correct value, so we can now try to rebase by instructing the `rebase` method to use the second session's changes with the [`BasicConflictSolver`](../reference/#icechunk.BasicConflictSolver). +This tells us that the conflict is caused by the two sessions modifying the metadata attributes of the root group (`/`). In this case we have decided that second session set the `foo` attribute to the correct value, so we can now try to rebase by instructing the `rebase` method to use the second session's changes with the [`BasicConflictSolver`](../reference/#icechunk.BasicConflictSolver). ```python -session2.rebase(icechunk.BasicConflictSolver(on_user_attributes_conflict=icechunk.VersionSelection.UseOurs)) +session2.rebase(icechunk.BasicConflictSolver()) ``` Success! We can now try and commit the changes again. diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 8dc188f6..0db86fc5 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1393,7 +1393,6 @@ class BasicConflictSolver(ConflictSolver): This conflict solver allows for simple configuration of resolution behavior for conflicts that may occur during a rebase operation. It will attempt to resolve a limited set of conflicts based on the configuration options provided. - - When a user attribute conflict is encountered, the behavior is determined by the `on_user_attributes_conflict` option - When a chunk conflict is encountered, the behavior is determined by the `on_chunk_conflict` option - When an array is deleted that has been updated, `fail_on_delete_of_updated_array` will determine whether to fail the rebase operation - When a group is deleted that has been updated, `fail_on_delete_of_updated_group` will determine whether to fail the rebase operation @@ -1402,7 +1401,6 @@ class BasicConflictSolver(ConflictSolver): def __init__( self, *, - on_user_attributes_conflict: VersionSelection = VersionSelection.UseOurs, on_chunk_conflict: VersionSelection = VersionSelection.UseOurs, fail_on_delete_of_updated_array: bool = False, fail_on_delete_of_updated_group: bool = False, @@ -1411,8 +1409,6 @@ class BasicConflictSolver(ConflictSolver): Parameters ---------- - on_user_attributes_conflict: VersionSelection - The behavior to use when a user attribute conflict is encountered, by default VersionSelection.use_ours() on_chunk_conflict: VersionSelection The behavior to use when a chunk conflict is encountered, by default VersionSelection.use_theirs() fail_on_delete_of_updated_array: bool @@ -1476,39 +1472,36 @@ class ConflictType(Enum): Attributes: NewNodeConflictsWithExistingNode: int A new node conflicts with an existing node - NewNodeInInvalidGroup: int + NewNodeInInvalidGroup: tuple[int] A new node is in an invalid group - ZarrMetadataDoubleUpdate: int + ZarrMetadataDoubleUpdate: tuple[int] A zarr metadata update conflicts with an existing zarr metadata update - ZarrMetadataUpdateOfDeletedArray: int + ZarrMetadataUpdateOfDeletedArray: tuple[int] A zarr metadata update is attempted on a deleted array - UserAttributesDoubleUpdate: int - A user attribute update conflicts with an existing user attribute update - UserAttributesUpdateOfDeletedNode: int - A user attribute update is attempted on a deleted node - ChunkDoubleUpdate: int + ZarrMetadataUpdateOfDeletedGroup: tuple[int] + A zarr metadata update is attempted on a deleted group + ChunkDoubleUpdate: tuple[int] A chunk update conflicts with an existing chunk update - ChunksUpdatedInDeletedArray: int + ChunksUpdatedInDeletedArray: tuple[int] Chunks are updated in a deleted array - ChunksUpdatedInUpdatedArray: int + ChunksUpdatedInUpdatedArray: tuple[int] Chunks are updated in an updated array - DeleteOfUpdatedArray: int + DeleteOfUpdatedArray: tuple[int] A delete is attempted on an updated array - DeleteOfUpdatedGroup: int + DeleteOfUpdatedGroup: tuple[int] A delete is attempted on an updated group """ - NewNodeConflictsWithExistingNode = 1 - NewNodeInInvalidGroup = 2 - ZarrMetadataDoubleUpdate = 3 - ZarrMetadataUpdateOfDeletedArray = 4 - UserAttributesDoubleUpdate = 5 - UserAttributesUpdateOfDeletedNode = 6 - ChunkDoubleUpdate = 7 - ChunksUpdatedInDeletedArray = 8 - ChunksUpdatedInUpdatedArray = 9 - DeleteOfUpdatedArray = 10 - DeleteOfUpdatedGroup = 11 + NewNodeConflictsWithExistingNode = (1,) + NewNodeInInvalidGroup = (2,) + ZarrMetadataDoubleUpdate = (3,) + ZarrMetadataUpdateOfDeletedArray = (4,) + ZarrMetadataUpdateOfDeletedGroup = (5,) + ChunkDoubleUpdate = (6,) + ChunksUpdatedInDeletedArray = (7,) + ChunksUpdatedInUpdatedArray = (8,) + DeleteOfUpdatedArray = (9,) + DeleteOfUpdatedGroup = (10,) class Conflict: """A conflict detected between snapshots""" diff --git a/icechunk-python/src/conflicts.rs b/icechunk-python/src/conflicts.rs index 13d3702e..af169384 100644 --- a/icechunk-python/src/conflicts.rs +++ b/icechunk-python/src/conflicts.rs @@ -14,13 +14,12 @@ pub enum PyConflictType { NewNodeInInvalidGroup = 2, ZarrMetadataDoubleUpdate = 3, ZarrMetadataUpdateOfDeletedArray = 4, - UserAttributesDoubleUpdate = 5, - UserAttributesUpdateOfDeletedNode = 6, - ChunkDoubleUpdate = 7, - ChunksUpdatedInDeletedArray = 8, - ChunksUpdatedInUpdatedArray = 9, - DeleteOfUpdatedArray = 10, - DeleteOfUpdatedGroup = 11, + ZarrMetadataUpdateOfDeletedGroup = 5, + ChunkDoubleUpdate = 6, + ChunksUpdatedInDeletedArray = 7, + ChunksUpdatedInUpdatedArray = 8, + DeleteOfUpdatedArray = 9, + DeleteOfUpdatedGroup = 10, } impl Display for PyConflictType { @@ -31,13 +30,12 @@ impl Display for PyConflictType { } PyConflictType::NewNodeInInvalidGroup => "New node in invalid group", PyConflictType::ZarrMetadataDoubleUpdate => "Zarr metadata double update", + PyConflictType::ZarrMetadataUpdateOfDeletedGroup => { + "Zarr metadata update of deleted group" + } PyConflictType::ZarrMetadataUpdateOfDeletedArray => { "Zarr metadata update of deleted array" } - PyConflictType::UserAttributesDoubleUpdate => "User attributes double update", - PyConflictType::UserAttributesUpdateOfDeletedNode => { - "User attributes update of deleted node" - } PyConflictType::ChunkDoubleUpdate => "Chunk double update", PyConflictType::ChunksUpdatedInDeletedArray => { "Chunks updated in deleted array" @@ -108,13 +106,8 @@ impl From<&Conflict> for PyConflict { path: path.to_string(), conflicted_chunks: None, }, - Conflict::UserAttributesDoubleUpdate { path, node_id: _ } => PyConflict { - conflict_type: PyConflictType::UserAttributesDoubleUpdate, - path: path.to_string(), - conflicted_chunks: None, - }, - Conflict::UserAttributesUpdateOfDeletedNode(path) => PyConflict { - conflict_type: PyConflictType::UserAttributesUpdateOfDeletedNode, + Conflict::ZarrMetadataUpdateOfDeletedGroup(path) => PyConflict { + conflict_type: PyConflictType::ZarrMetadataUpdateOfDeletedGroup, path: path.to_string(), conflicted_chunks: None, }, @@ -188,9 +181,8 @@ pub struct PyBasicConflictSolver; #[pymethods] impl PyBasicConflictSolver { #[new] - #[pyo3(signature = (*, on_user_attributes_conflict=PyVersionSelection::UseOurs, on_chunk_conflict=PyVersionSelection::UseOurs, fail_on_delete_of_updated_array = false, fail_on_delete_of_updated_group = false))] + #[pyo3(signature = (*, on_chunk_conflict=PyVersionSelection::UseOurs, fail_on_delete_of_updated_array = false, fail_on_delete_of_updated_group = false))] fn new( - on_user_attributes_conflict: PyVersionSelection, on_chunk_conflict: PyVersionSelection, fail_on_delete_of_updated_array: bool, fail_on_delete_of_updated_group: bool, @@ -198,7 +190,6 @@ impl PyBasicConflictSolver { ( Self, PyConflictSolver(Arc::new(BasicConflictSolver { - on_user_attributes_conflict: on_user_attributes_conflict.into(), on_chunk_conflict: on_chunk_conflict.into(), fail_on_delete_of_updated_array, fail_on_delete_of_updated_group, diff --git a/icechunk-python/src/repository.rs b/icechunk-python/src/repository.rs index e8b83434..ab9da2e3 100644 --- a/icechunk-python/src/repository.rs +++ b/icechunk-python/src/repository.rs @@ -195,9 +195,9 @@ pub struct PyDiff { #[pyo3(get)] pub deleted_arrays: BTreeSet, #[pyo3(get)] - pub updated_user_attributes: BTreeSet, + pub updated_groups: BTreeSet, #[pyo3(get)] - pub updated_zarr_metadata: BTreeSet, + pub updated_arrays: BTreeSet, #[pyo3(get)] // A Vec instead of a set to avoid issues with list not being hashable in python pub updated_chunks: BTreeMap>>, @@ -213,16 +213,10 @@ impl From for PyDiff { value.deleted_groups.into_iter().map(|path| path.to_string()).collect(); let deleted_arrays = value.deleted_arrays.into_iter().map(|path| path.to_string()).collect(); - let updated_user_attributes = value - .updated_user_attributes - .into_iter() - .map(|path| path.to_string()) - .collect(); - let updated_zarr_metadata = value - .updated_zarr_metadata - .into_iter() - .map(|path| path.to_string()) - .collect(); + let updated_groups = + value.updated_groups.into_iter().map(|path| path.to_string()).collect(); + let updated_arrays = + value.updated_arrays.into_iter().map(|path| path.to_string()).collect(); let updated_chunks = value .updated_chunks .into_iter() @@ -238,8 +232,8 @@ impl From for PyDiff { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, } } @@ -266,17 +260,17 @@ impl PyDiff { res.push('\n'); } - if !self.updated_zarr_metadata.is_empty() { - res.push_str("Zarr metadata updated:\n"); - for g in self.updated_zarr_metadata.iter() { + if !self.updated_groups.is_empty() { + res.push_str("Group definitions updated:\n"); + for g in self.updated_groups.iter() { writeln!(res, " {}", g).unwrap(); } res.push('\n'); } - if !self.updated_user_attributes.is_empty() { - res.push_str("User attributes updated:\n"); - for g in self.updated_user_attributes.iter() { + if !self.updated_arrays.is_empty() { + res.push_str("Array definitions updated:\n"); + for g in self.updated_arrays.iter() { writeln!(res, " {}", g).unwrap(); } res.push('\n'); diff --git a/icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G b/icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G new file mode 100644 index 0000000000000000000000000000000000000000..8666a286982f8d95ce7ab61a59741fcac452468f GIT binary patch literal 19 acmdPcs{dCZC6s|dfq_B8iIL&&lMVnkjRn^L literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 b/icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 new file mode 100644 index 0000000000000000000000000000000000000000..8666a286982f8d95ce7ab61a59741fcac452468f GIT binary patch literal 19 acmdPcs{dCZC6s|dfq_B8iIL&&lMVnkjRn^L literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 b/icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 new file mode 100644 index 0000000000000000000000000000000000000000..8666a286982f8d95ce7ab61a59741fcac452468f GIT binary patch literal 19 acmdPcs{dCZC6s|dfq_B8iIL&&lMVnkjRn^L literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG b/icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG new file mode 100644 index 0000000000000000000000000000000000000000..8666a286982f8d95ce7ab61a59741fcac452468f GIT binary patch literal 19 acmdPcs{dCZC6s|dfq_B8iIL&&lMVnkjRn^L literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/config.yaml b/icechunk-python/tests/data/test-repo/config.yaml index e3d9b500..65e4dd1e 100644 --- a/icechunk-python/tests/data/test-repo/config.yaml +++ b/icechunk-python/tests/data/test-repo/config.yaml @@ -5,14 +5,6 @@ compression: null caching: null storage: null virtual_chunk_containers: - gcs: - name: gcs - url_prefix: gcs - store: !gcs {} - az: - name: az - url_prefix: az - store: !azure {} tigris: name: tigris url_prefix: tigris @@ -21,6 +13,18 @@ virtual_chunk_containers: endpoint_url: https://fly.storage.tigris.dev anonymous: false allow_http: false + file: + name: file + url_prefix: file + store: !local_file_system '' + gcs: + name: gcs + url_prefix: gcs + store: !gcs {} + az: + name: az + url_prefix: az + store: !azure {} s3: name: s3 url_prefix: s3:// @@ -29,8 +33,4 @@ virtual_chunk_containers: endpoint_url: http://localhost:9000 anonymous: false allow_http: true - file: - name: file - url_prefix: file - store: !local_file_system '' manifest: null diff --git a/icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG b/icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG new file mode 100644 index 0000000000000000000000000000000000000000..5dff37e3a650db3fcde7ba9659016e1ce2383ea4 GIT binary patch literal 237 zcmeZtcKtAad6%(=7Ai@07k-dZ7D;1*da9k$Hzy~uxgky#C$>CK0WlJ+LE zuPm3)P}ox95hUcJpi?*ZRamlSkECeh^ck%a-FiJZ&Zsu9E|%f%@=iPO%B4l3E;Taj zQ9)n-o>n_^1xM{gnZK8L=Znv4=X(A~P_V{i%kKQ>(*2C5CRB*>GsrtLIILN_%juX* ebD{XG#|GR1QM0~s%{<6!Xsl|$b?dd@6gdFZx>)-F literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 b/icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 new file mode 100644 index 0000000000000000000000000000000000000000..7b1848c26bf262bb5d8e1cd6f6240f2681e7575f GIT binary patch literal 168 zcmeZtcKtAad6%@xkaqc9Fq3b-aJ^gF+irCk&B@`fnmAyWupsw7Dp+SKKHIO M4rW?h9JQ4l0LLvssQ>@~ literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG b/icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG new file mode 100644 index 0000000000000000000000000000000000000000..1d3cb4ab9b7a5bbecb6f982935debdb55edc903e GIT binary patch literal 277 zcmeZtcKtAad6%_R-uF2K>|DSU@d*9r( zx6>!)O0`dHoV8Zz`OS$=JeC{7*jhiV*E@5X_kT=PmmNDhkCt$^a1g^IgO8W%rK;=> z1}{?*n!7|MM=Q+6B;cQV@d34GN=;=9EIK~Lf4|)O-T0y2{`qvN&S^c?llU?%RsWvX zxAcnH(93t)31$*TjUorv!@ceUK literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 b/icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 new file mode 100644 index 0000000000000000000000000000000000000000..98ce9238ee6f5cec1f12e8206e80ce2c0209b4ef GIT binary patch literal 174 zcmeZtcKtAad6%fQ=U-|6wqf;`DZy5o9#YR^E literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json index 5b2aafdc..c7dc109c 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json @@ -1 +1 @@ -{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file +{"snapshot":"HNG82GMS51ECXFXFCYJG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json index 75370f32..0fc7cd40 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"K1BMYVG1HNVTNV1FSBH0"} \ No newline at end of file +{"snapshot":"GNFK0SSWD5B8CVA53XEG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json index d26031d3..cf192bde 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"RPA0WQCNM2N9HBBRHJQ0"} \ No newline at end of file +{"snapshot":"3EKE17N8YF5ZK5NRMZJ0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json index 66c78fb1..346eeb78 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"6Q9GDTXKF17BGQVSQZFG"} \ No newline at end of file +{"snapshot":"R7F1RJHPZ428N4AK19K0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json index c31e956b..a52be478 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file +{"snapshot":"TNE0TX645A2G7VTXFA1G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json index 5b2aafdc..c7dc109c 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file +{"snapshot":"HNG82GMS51ECXFXFCYJG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json index c31e956b..a52be478 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json @@ -1 +1 @@ -{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file +{"snapshot":"TNE0TX645A2G7VTXFA1G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G b/icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G new file mode 100644 index 0000000000000000000000000000000000000000..3b09dece9fc5649e0afd7ffa0a2e5c6413940d1e GIT binary patch literal 867 zcmV-p1DyOxLq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zof!ajtB*zC7$uDX1P%ftB4Id+HOK|tF}Rut`X9IAg#4c;@%t; z7vN0?z$b=q&?jXe?&vra$_z4BnrJNreE@v`g8-WbcPc5?n897^0nV;9v%70~*bB;3 zJaur_8u1|lYNRnGyE?sCl9_4KG7j4$X-Dex>{>lIyQAIRjnitRo=Gch90qr3oSsQi zxEGYDxWN&7!AzqjlFsIxFi2AaaArm(LXF%k#f(NlKJ-%qL&FmlpN7z=vg5xa%^6W0 z%qZn?)CBV)k&Pi!I%GCQWJENGLi2rAVhkE|fFJa`{MGka+3`IV4wWd%mAWm`N|L5X zx!OUQaa)AFV2vosl}MCEo)`!+683_UN9*8dFDNl>CsG<$V%oNGJFU}BwUYt=zX*ij ze;faATv_Fm*~5siKSja`k^xeVY2NumB0;T4~PZJ>nri2TB)k5G0{h|Nm ztt$ds&?edhU8qqdLdcN`Mn=Mnl3@;D0udQ2vjY-J|u;NOX-eE*YKIja7jfoToAK(!!CoFu0e8h0f-S)2-UxJl&ppF@3}y& z%j52sos=)?Uxm4tac3-9PEbd-A!k68`c@ZCtLUGKey9(D0+H%aT>->{)HjQ8g6&1f z>qq-&<^HmNdo(Z;T`W{_J=GpGSqE4a_(g6u7ti|~)bt)^1^z2U9Yzj>Y=IvH_>#qT tq`c35nM7#-^X({<m zjTZox=!Hh`vL+ou2E?rML|vq()lmq zE(tUmK06B@h-$3BK_W$mofkYxfXEq9;D6{RNALsZlP-G6KWBO9|4}E+!G8`U#(GAW zIJawK;=EVRW~?cc8;#9u<6XU2Ep8nPV;#K-VX`?T*P~{vYjCLbc`*zl8!5It=Czs=@^DLX2uysnx?|3e2kfd3%>P5xWo=Yz&SbGg)*svT!3 ztT&4$Si5UGZXH!&tS?N}j>~F=F(i}}1&no!k+w^Wb=2ev)(Salk}_l~tvp>DulFMB zy)fDdImSBWS(Fw7|NmIMt9kWWUR_lx{ul6n^wIvSFyx34UdDEAtXy0HPEkQYSD- zFoKz2C}!d&qRhe;8*N9tlU!?Hs;cow2Pd80eecIXJZo=G@d_I%hoJP!T|!K-Bbd literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG b/icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG new file mode 100644 index 0000000000000000000000000000000000000000..93decc5f58bed160705f4ec7d083e1d5aca16fdd GIT binary patch literal 867 zcmV-p1DyOxLq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zof!a@>W)g_vNj!}LTW&r0RTM8?lPdDAcxNJGLvz*9t;$~0zh9lYS6a+)a)N?t(R?k z%Zb7O4b-?`(EKoAUus;Jy_)5ZRD?i}o7sGu0(<~=0Dl0k%+XA}*{!Lhx;2;XZtX&E zv$m*eoZTAEFFNsyV*j&0B#Xh&BcTIOj)BnsvIKfP1m+)!{Ujm&k3O=}5)n&4x5*;MRaZ~F&0BBk_44?tH)nfiZ!@IU3+wD2Yj)X;UG>D5-ex5)z2WY) z-!#qMCKfZrv{lD^N%~a}46P@2VdY$P1?$#escuwvcPQFSF$vl)6ua4t=_SF6XK%Bz zXgG0to1VoWw+cyBH(zpSXWt2ha`B61K=?3W$A+t3@Bl&a|08!VWsN3Oixe?v`b9=0 z1p+uUh>&tar^ZiBSqAMq^w35AhyIuTXJr|b;)iJ|Tv=HB#*pI7RghvakF$DW*wWj~ zl!e8Q!rajuxs`6MfX=8B?|3BhEGxhpj`~5HZ zZyI&~TN!l3DCyxdv(rNKlMgt_Nk@71(og=Alx0ctAEw<)hP_RisYi2VabC^TvsWBL zvAc=?v9r+m!4afK#tHx)98OjGWjq1ukvfpOs-8CDl=VN zSXwd_Z(pckB|?ai2}VSMnUV~1Z~_q-BeMe%fPjjc;z@{vL;{gOD5IPbGYT0FU?{0w ztL1Q5o44-?Hax1F<*|vJDG%RN+~F85n>XBLkkhpiDChwRJ6QsilSds^h0|H6hA7t>Jq?XOW$PTMBR%| zm-PFtTJ}Z!_ExYMy3|m0PO8e#gB?%+xYlVHF422#SPwnZGgKC0(MFntp8&W9%x$Fh tdA`&BmqgnD|F**vXXRgs$gn%_i(#Aa{Rf?iZ!im8Gq0GOc)U7Cj;m_VrtSa$ literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG b/icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG new file mode 100644 index 0000000000000000000000000000000000000000..bd82739d03887139f7d8951db0b8c096b03e55d8 GIT binary patch literal 871 zcmV-t1DO0tLq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zy%_)|tB*eg1eyR{3w zN$Q#_EW0(DUnJre?flRFj!Xdto(Ua*Y7B(_k0TK59WDRI&JP0O|L7klE}b03(+VE0az7toY^O5Lb0aKE*Ktqhpf6`4}@&oUmUOLG~XF2KrS*s4h z{|++s<~%93Q`MVniI~;VQ*YAJ8EU`mS<&pTZdXDdz>^**qAVFyy~MOm?KEtcv=b$5ed&GU+1Re@nAyYIB(N3ZNn z($)keO>fex_v2WgxauTJ1_LN5d4^VXLZMLnqMryk6l&OfrQ!pQ5&w@lYiA3aP%bgN zpXv7)krE8xz(7Jtj2jp}G20_#gUT`k#$sP>DaLr7msVP=&#HGe^I9!;B^M zLNKK_nP=;UD(>D1JDs4Y^d>2+Vu|Wa(yqSdy|Zao!JdsPuB(h`76bo33JLiCx1D0X zb*3}L{{j3rjk^D?3^ZJX+-UKFb3*iy6FA68CwcbLNB)D1;|TLVrdcb3y~&xUH#2Q< z9?jFMHpF1qJ;eW=o4E0jk%Q+&4+7CBzbX^pBM&QVJiz&xks`#ZIQXBDME;BZoBY}{ zk!#Xry2zE4U#MXvLWr3OMnuAll3@;T0udP_vjY-@0+B!{qnr{m3K_}hD_z^?gu*~3sBmUS~5AxFfBoCW#JHPvtb&UPa;33hLD9R9}qJ@Q1l>MsBAKHUJair2)7jvL6%T1~| z;r1fsCD6Wy7JSj)-U*gMmlmp4PgMjxuLJY~*OKPuQh!g1>$ztnfl442{YW$6B!FlE xCmN}Joo}?CCDAm%ep^Wjx&F5|^2VL@V%TbY?}KjQgD^9%@hC1Q9Mpstf=C literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 b/icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 new file mode 100644 index 0000000000000000000000000000000000000000..493bb36234807ae0c37188edabd877d288654fe1 GIT binary patch literal 178 zcmeZtcKtAad6%MjMjwhGnx0--)FtE zUSEuOKV-Y7O(uuG^c?jXZK|xVWGQ?3w=*0dNOYQZqW6sxXpZFK)OiPmuIJp z3Yf&F?_@L=P0%l9dA~_5yhbLx$?53BY2o#I_NEGmv@kzsRLbP#p6++sRWtd~IX6uP W2H{F>7KV>p<#S$0?^4k2U;+R;xk4uZ literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G b/icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G new file mode 100644 index 0000000000000000000000000000000000000000..77790ad2e1ed88861367a69c940eaadb71e9d364 GIT binary patch literal 1053 zcmV+&1mgQiLq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zZ65&EbFN0fT1{&}RNyQ!0>h^BiBvO&9mz?6GNkfOvH(l7l%GZ-pZU9c^kO4&ds#K$ zfSh4Q&|{{akB&CVP?iAX6AubEh9@K>1(g7s0Hy#P<>|Rq1Zgzs&g!imowKDgG-pd^ zCpPDtEf15U_NW$-bk3HDR~32H)6aUEo7IpVG9;T79I3x7zr6TYPXqL;2@gQ_HLtHR z^DWs6(D67kew%9`9Np+a<-z9&eUA4zAU+4^f2E)h1as;Q7w42fX@c*S}Fwcj2D_JhI7AtMrkv$x+*VNp&3SwA~lZI?0dxiY{sI zjp!Tit(F8UZF1ChBaJ${GdT)cBnx_%RI=c`A4n%h!}@ulrE^l!-XtQjv!2+|pc-@o zE7A-EKN^M6$-IX!dp?VqG3t+@FN2YGA9(I$S574EXP2Nh;lt8Xo1CP z42{Pr%cw9VIuMB9#N(tX%Exb@ETdRPs_013;6gzcF-l%#DaHkYw{UW_-s;tBZ8Oi_ z>V2R{hGI94zsEw(U}z$0K_LT%D$JB0Z5VzqA6XC2(M=p{u%8a#*01^-{H(GRv*llE z4UN_|iZn8;xp5{<^oW#GT*|5w@ZcDvhn$!SN? zHmyx0{{la3)%{05BFmG_mn9pRh^WT-zyWqWVAij3{#Ys0f{B$gKcED0pekwYBZ9>U zysCkOiv=dAD@JTUS%shFy7^N&KZ|v93koQP1}!QsI%1=Y(?2-_!D=I6vDK>S2$&9D zP!gUH47&W3r5UDwbt?70%<%Y${oL^LQRN7As39Rj3X&NTQqU%aHUn@15gDqY9ulB% znwY{Vh6o}eB7sOC5+{K~lN1!BU4k&L!Ws$NCRXdiOC|};oNgZ>_=q{sN zB+DI^w9C?lL}sbCfR>K_ZHeAna=@WoOdexn^EE{bIgZW=A%Am|Z|slGa^~On4&Xg@ zr95ApAc>g3tNCAM~{K>`X6k`e6NTTi#2(YW0NR{ma9B=O+ew1{et*y!zElG$FeDpG=iw(T)TG p;nt^&olNNqo6lc6X{N7OY$969hx!o6m6yvK)G%?#i`ai#2M=G1U;mrAhOJUwq&>oZ#!^GVA=u z$bh&>hcUj literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG b/icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG new file mode 100644 index 0000000000000000000000000000000000000000..29b3ccfcb87cae95bc4ed88da024bc9821fe7dc7 GIT binary patch literal 235 zcmeZtcKtAad6%y0aX8-v;2aLWHNQPP{ztf)gP?SYTaZ7OhwmoNB7tXMGu%@N=&x+{w z9S6=&5=i{vu~J~p%{_i@$w#d>WUp0}sCnoh1egYEM)4M-{GgMplNE3D*|#$WM)(b9C?P1ppqhH|GEV literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G b/icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G new file mode 100644 index 0000000000000000000000000000000000000000..cd830e5baf7608ea179322158a89aa67184ba3cf GIT binary patch literal 173 zcmeZtcKtAad6%MWutX=LXq|Bo=;Z1Ao8u|o$}Pcn>zQ70hKT?F None: ) assert diff.deleted_groups == set() assert diff.deleted_arrays == set() - assert diff.updated_user_attributes == set() - assert diff.updated_zarr_metadata == set() + assert diff.updated_groups == set() + assert diff.updated_arrays == set() if __name__ == "__main__": diff --git a/icechunk-python/tests/test_conflicts.py b/icechunk-python/tests/test_conflicts.py index 1f7c4ded..2615c4ea 100644 --- a/icechunk-python/tests/test_conflicts.py +++ b/icechunk-python/tests/test_conflicts.py @@ -48,18 +48,22 @@ def test_detect_conflicts(repo: icechunk.Repository) -> None: try: session_b.rebase(icechunk.ConflictDetector()) except icechunk.RebaseFailedError as e: - assert len(e.conflicts) == 2 + assert len(e.conflicts) == 3 + assert e.conflicts[0].path == "/foo/bar/some-array" assert e.conflicts[0].path == "/foo/bar/some-array" assert ( e.conflicts[0].conflict_type - == icechunk.ConflictType.UserAttributesDoubleUpdate + == icechunk.ConflictType.ZarrMetadataDoubleUpdate ) - assert e.conflicts[0].conflicted_chunks is None - assert e.conflicts[1].path == "/foo/bar/some-array" - assert e.conflicts[1].conflict_type == icechunk.ConflictType.ChunkDoubleUpdate - assert e.conflicts[1].conflicted_chunks - assert len(e.conflicts[1].conflicted_chunks) == 100 + assert ( + e.conflicts[1].conflict_type + == icechunk.ConflictType.ChunksUpdatedInUpdatedArray + ) + assert e.conflicts[2].path == "/foo/bar/some-array" + assert e.conflicts[2].conflict_type == icechunk.ConflictType.ChunkDoubleUpdate + assert e.conflicts[2].conflicted_chunks + assert len(e.conflicts[2].conflicted_chunks) == 100 raise e @@ -94,7 +98,7 @@ def test_rebase_no_conflicts(repo: icechunk.Repository) -> None: "on_chunk_conflict", [icechunk.VersionSelection.UseOurs, icechunk.VersionSelection.UseTheirs], ) -def test_rebase_user_attrs_with_ours( +def test_rebase_fails_on_user_atts_double_edit( repo: icechunk.Repository, on_chunk_conflict: icechunk.VersionSelection ) -> None: session_a = repo.writable_session("main") @@ -116,24 +120,7 @@ def test_rebase_user_attrs_with_ours( # Make sure it fails if the resolver is not set with pytest.raises(icechunk.RebaseFailedError): - session_b.rebase( - icechunk.BasicConflictSolver( - on_user_attributes_conflict=icechunk.VersionSelection.Fail - ) - ) - - solver = icechunk.BasicConflictSolver( - on_user_attributes_conflict=icechunk.VersionSelection.UseOurs, - ) - - session_b.rebase(solver) - session_b.commit("after conflict") - - assert ( - array_b.attrs["repo"] == 2 - if on_chunk_conflict == icechunk.VersionSelection.UseOurs - else 1 - ) + session_b.rebase(icechunk.BasicConflictSolver()) @pytest.mark.parametrize( diff --git a/icechunk-python/tests/test_timetravel.py b/icechunk-python/tests/test_timetravel.py index feab90d0..09a08cf2 100644 --- a/icechunk-python/tests/test_timetravel.py +++ b/icechunk-python/tests/test_timetravel.py @@ -42,8 +42,8 @@ def test_timetravel() -> None: ) assert status.deleted_groups == set() assert status.deleted_arrays == set() - assert status.updated_user_attributes == {"/", "/air_temp"} # why? - assert status.updated_zarr_metadata == set() + assert status.updated_arrays == set() + assert status.updated_groups == set() first_snapshot_id = session.commit("commit 1") assert session.read_only @@ -137,8 +137,8 @@ def test_timetravel() -> None: ) assert diff.deleted_groups == set() assert diff.deleted_arrays == set() - assert diff.updated_user_attributes == {"/", "/air_temp"} # why? - assert diff.updated_zarr_metadata == set() + assert diff.updated_arrays == set() + assert diff.updated_groups == set() assert ( repr(diff) == """\ @@ -148,10 +148,6 @@ def test_timetravel() -> None: Arrays created: /air_temp -User attributes updated: - / - /air_temp - Chunks updated: /air_temp: [0, 0] @@ -168,6 +164,25 @@ def test_timetravel() -> None: """ ) + session = repo.writable_session("main") + store = session.store + + group = zarr.open_group(store=store) + air_temp = group.create_array( + "air_temp", shape=(1000, 1000), chunks=(100, 100), dtype="i4", overwrite=True + ) + assert ( + repr(session.status()) + == """\ +Arrays created: + /air_temp + +Arrays deleted: + /air_temp + +""" + ) + with pytest.raises(ValueError, match="doesn't include"): # if we call diff in the wrong order it fails with a message repo.diff(from_tag="v1.0", to_snapshot=parents[-1].id) diff --git a/icechunk-python/tests/test_zarr/test_properties.py b/icechunk-python/tests/test_zarr/test_properties.py index 86444aaa..58656e99 100644 --- a/icechunk-python/tests/test_zarr/test_properties.py +++ b/icechunk-python/tests/test_zarr/test_properties.py @@ -1,6 +1,5 @@ from typing import Any -import numpy as np import pytest from numpy.testing import assert_array_equal @@ -40,8 +39,6 @@ def create() -> IcechunkStore: def test_roundtrip(data: st.DataObject, nparray: Any) -> None: # TODO: support size-0 arrays GH392 assume(nparray.size > 0) - # TODO: fix complex fill values GH391 - assume(not np.iscomplexobj(nparray)) zarray = data.draw( arrays( diff --git a/icechunk-python/tests/test_zarr/test_stateful.py b/icechunk-python/tests/test_zarr/test_stateful.py index 031fa6ae..a3477848 100644 --- a/icechunk-python/tests/test_zarr/test_stateful.py +++ b/icechunk-python/tests/test_zarr/test_stateful.py @@ -106,8 +106,6 @@ def add_array( array, _ = array_and_chunks # TODO: support size-0 arrays GH392 assume(array.size > 0) - # TODO: fix complex fill values GH391 - assume(not np.iscomplexobj(array)) super().add_array(data, name, array_and_chunks) ##### TODO: port everything below to zarr diff --git a/icechunk/examples/low_level_dataset.rs b/icechunk/examples/low_level_dataset.rs index 8088f82e..17b6ccaa 100644 --- a/icechunk/examples/low_level_dataset.rs +++ b/icechunk/examples/low_level_dataset.rs @@ -1,12 +1,9 @@ #![allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] -use std::{collections::HashMap, iter, num::NonZeroU64, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; +use bytes::Bytes; use icechunk::{ - format::{manifest::ChunkPayload, snapshot::ZarrArrayMetadata, ChunkIndices, Path}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - UserAttributes, - }, + format::{manifest::ChunkPayload, snapshot::ArrayShape, ChunkIndices, Path}, repository::VersionInfo, session::{Session, SessionError}, storage::new_in_memory_storage, @@ -46,9 +43,10 @@ ds.add_group("/group2".into()).await?; "#, ); - ds.add_group(Path::root()).await?; - ds.add_group("/group1".try_into().unwrap()).await?; - ds.add_group("/group2".try_into().unwrap()).await?; + let user_data = Bytes::new(); + ds.add_group(Path::root(), user_data.clone()).await?; + ds.add_group("/group1".try_into().unwrap(), user_data.clone()).await?; + ds.add_group("/group2".try_into().unwrap(), user_data.clone()).await?; println!(); print_nodes(&ds).await?; @@ -96,58 +94,13 @@ ds.add_array(array1_path.clone(), zarr_meta1).await?; "#, ); - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![3], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(3, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into()]); let array1_path: Path = "/group1/array1".try_into().unwrap(); - ds.add_array(array1_path.clone(), zarr_meta1).await?; + ds.add_array(array1_path.clone(), shape, dimension_names, user_data.clone()).await?; println!(); print_nodes(&ds).await?; - println!(); - println!(); - println!("## Setting array user attributes"); - println!( - r#" -``` -ds.set_user_attributes(array1_path.clone(), Some("{{n:42}}".to_string())).await?; -``` - "#, - ); - ds.set_user_attributes( - array1_path.clone(), - Some(UserAttributes::try_new(br#"{"n":42}"#).unwrap()), - ) - .await?; - print_nodes(&ds).await?; - println!("## Committing"); let v1_id = ds.commit("some message", Default::default()).await?; println!( @@ -288,14 +241,7 @@ async fn print_nodes(ds: &Session) -> Result<(), SessionError> { .await? .map(|n| n.unwrap()) .sorted_by_key(|n| n.path.clone()) - .map(|node| { - format!( - "|{:10?}|{:15}|{:10?}\n", - node.node_type(), - node.path.to_string(), - node.user_attributes, - ) - }) + .map(|node| format!("|{:10?}|{:15}\n", node.node_type(), node.path.to_string(),)) .format(""); println!("{}", rows); diff --git a/icechunk/examples/multithreaded_get_chunk_refs.rs b/icechunk/examples/multithreaded_get_chunk_refs.rs index b0a94d85..1c6769d7 100644 --- a/icechunk/examples/multithreaded_get_chunk_refs.rs +++ b/icechunk/examples/multithreaded_get_chunk_refs.rs @@ -13,22 +13,19 @@ use std::{ collections::HashMap, env::{self}, - num::NonZeroU64, sync::Arc, time::Instant, }; +use bytes::Bytes; use futures::{stream::FuturesUnordered, StreamExt}; use icechunk::{ config::CompressionConfig, format::{ manifest::{ChunkPayload, ChunkRef}, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ChunkId, ChunkIndices, Path, }, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, new_local_filesystem_storage, repository::VersionInfo, session::Session, @@ -62,30 +59,12 @@ async fn mk_repo( async fn do_writes(path: &std::path::Path) -> Result<(), Box> { let repo = mk_repo(path).await?; let mut session = repo.writable_session("main").await?; - let meta = ZarrArrayMetadata { - shape: vec![MAX_I, MAX_J, MAX_L, MAX_K], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = + ArrayShape::new(vec![(MAX_I, 1), (MAX_J, 1), (MAX_K, 1), (MAX_L, 1)]).unwrap(); + let user_data = Bytes::new(); + let dimension_names = Some(vec!["i".into(), "j".into(), "k".into(), "l".into()]); let path: Path = "/array".try_into().unwrap(); - session.add_array(path.clone(), meta).await?; + session.add_array(path.clone(), shape, dimension_names, user_data).await?; session.commit("array created", None).await?; let session = Arc::new(RwLock::new(repo.writable_session("main").await?)); diff --git a/icechunk/flatbuffers/snapshot.fbs b/icechunk/flatbuffers/snapshot.fbs index 7f36da33..157762a6 100644 --- a/icechunk/flatbuffers/snapshot.fbs +++ b/icechunk/flatbuffers/snapshot.fbs @@ -24,33 +24,6 @@ struct ManifestFileInfo { num_chunk_refs: uint32; } -// a pointer to a user attributes file -struct AttributeFileInfo { - // id of the object in the repo's object store - id: ObjectId12; -} - -// a pointer to a user attributes file -table UserAttributesRef { - // id of the object in the repo's object store - object_id: ObjectId12 (required); - - // index where the user attributes for the array start - location: uint32; -} - -// user attributes written inline -table InlineUserAttributes { - // user attributes data, serialized as rmp_serde of the json value - // TODO: better serialization format - data :[uint8] (required); -} - -union UserAttributesSnapshot { - Inline :InlineUserAttributes, - Reference :UserAttributesRef, -} - // A range of chunk indexes struct ChunkIndexRange { // inclusive @@ -69,15 +42,25 @@ table ManifestRef { extents: [ChunkIndexRange] (required); } +// the shape of the array along a given dimension +struct DimensionShape { + array_length: uint64; + chunk_length: uint64; +} + +table DimensionName { + // optional + name: string; +} + // a marker for a group node table GroupNodeData {} // data for an array node table ArrayNodeData { - // the zarr metadata for the array - // serialized as rmp_serde of the json value - // TODO: better serialization format - zarr_metadata: [uint8] (required); + shape: [DimensionShape] (required); + + dimension_names: [DimensionName]; // pointers to all the manifests where this array has chunk references manifests: [ManifestRef] (required); @@ -97,8 +80,9 @@ table NodeSnapshot { // absolute path of the node within the repo path: string (required); - // pointer to the user attributes for the node - user_attributes: UserAttributesSnapshot; + // the metadata for the node according to what the user passed + // this will generally be the full zarr metadata for the node + user_data: [uint8] (required); // node's data node_data: NodeData (required); @@ -128,10 +112,6 @@ table Snapshot { // the list of all manifest files this snapshot points to // sorted in ascending order of ManifestFileInfo.id manifest_files: [ManifestFileInfo] (required); - - // the list of all attribute files this snapshot points to - // sorted in ascending order of AttributeFileInfo.id - attribute_files: [AttributeFileInfo] (required); } root_type Snapshot; diff --git a/icechunk/flatbuffers/transaction_log.fbs b/icechunk/flatbuffers/transaction_log.fbs index 3afbe564..3f996119 100644 --- a/icechunk/flatbuffers/transaction_log.fbs +++ b/icechunk/flatbuffers/transaction_log.fbs @@ -36,13 +36,13 @@ table TransactionLog { // sorted in ascending order deleted_arrays: [ObjectId8] (required); - // node ids of the nodes that had user attributes modified in this transaction + // node ids of the groups that had user definitions modified in this transaction // sorted in ascending order - updated_user_attributes: [ObjectId8] (required); + updated_arrays: [ObjectId8] (required); - // node ids of the nodes that had zarr metadata modified in this transaction + // node ids of the arrays that had user definitions modified in this transaction // sorted in ascending order - updated_zarr_metadata: [ObjectId8] (required); + updated_groups: [ObjectId8] (required); // chunk ref changes made in this transaction // sorted in ascending order of the node_id of the ArrayUpdatedChunks diff --git a/icechunk/src/change_set.rs b/icechunk/src/change_set.rs index 1bca04ed..130f465d 100644 --- a/icechunk/src/change_set.rs +++ b/icechunk/src/change_set.rs @@ -4,27 +4,32 @@ use std::{ mem::take, }; +use bytes::Bytes; use itertools::{Either, Itertools as _}; use serde::{Deserialize, Serialize}; use crate::{ format::{ manifest::{ChunkInfo, ChunkPayload}, - snapshot::{NodeData, NodeSnapshot, UserAttributesSnapshot, ZarrArrayMetadata}, + snapshot::{ArrayShape, DimensionName, NodeData, NodeSnapshot}, ChunkIndices, NodeId, Path, }, - metadata::UserAttributes, session::SessionResult, }; -#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArrayData { + pub shape: ArrayShape, + pub dimension_names: Option>, + pub user_data: Bytes, +} + +#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)] pub struct ChangeSet { - new_groups: HashMap, - new_arrays: HashMap, - updated_arrays: HashMap, - // These paths may point to Arrays or Groups, - // since both Groups and Arrays support UserAttributes - updated_attributes: HashMap>, + new_groups: HashMap, + new_arrays: HashMap, + updated_arrays: HashMap, + updated_groups: HashMap, // It's important we keep these sorted, we use this fact in TransactionLog creation set_chunks: BTreeMap>>, deleted_groups: HashSet<(Path, NodeId)>, @@ -32,10 +37,6 @@ pub struct ChangeSet { } impl ChangeSet { - pub fn zarr_updated_arrays(&self) -> impl Iterator { - self.updated_arrays.keys() - } - pub fn deleted_arrays(&self) -> impl Iterator { self.deleted_arrays.iter() } @@ -44,8 +45,12 @@ impl ChangeSet { self.deleted_groups.iter() } - pub fn user_attributes_updated_nodes(&self) -> impl Iterator { - self.updated_attributes.keys() + pub fn updated_arrays(&self) -> impl Iterator { + self.updated_arrays.keys() + } + + pub fn updated_groups(&self) -> impl Iterator { + self.updated_groups.keys() } pub fn array_is_deleted(&self, path_and_id: &(Path, NodeId)) -> bool { @@ -71,39 +76,55 @@ impl ChangeSet { self == &ChangeSet::default() } - pub fn add_group(&mut self, path: Path, node_id: NodeId) { - self.new_groups.insert(path, node_id); + pub fn add_group(&mut self, path: Path, node_id: NodeId, definition: Bytes) { + debug_assert!(!self.updated_groups.contains_key(&node_id)); + self.new_groups.insert(path, (node_id, definition)); } - pub fn get_group(&self, path: &Path) -> Option<&NodeId> { + pub fn get_group(&self, path: &Path) -> Option<&(NodeId, Bytes)> { self.new_groups.get(path) } - pub fn get_array(&self, path: &Path) -> Option<&(NodeId, ZarrArrayMetadata)> { + pub fn get_array(&self, path: &Path) -> Option<&(NodeId, ArrayData)> { self.new_arrays.get(path) } /// IMPORTANT: This method does not delete children. The caller /// is responsible for doing that pub fn delete_group(&mut self, path: Path, node_id: &NodeId) { - self.updated_attributes.remove(node_id); + self.updated_groups.remove(node_id); if self.new_groups.remove(&path).is_none() { // it's an old group, we need to flag it as deleted self.deleted_groups.insert((path, node_id.clone())); } } - pub fn add_array( - &mut self, - path: Path, - node_id: NodeId, - metadata: ZarrArrayMetadata, - ) { - self.new_arrays.insert(path, (node_id, metadata)); + pub fn add_array(&mut self, path: Path, node_id: NodeId, array_data: ArrayData) { + self.new_arrays.insert(path, (node_id, array_data)); + } + + pub fn update_array(&mut self, node_id: &NodeId, path: &Path, array_data: ArrayData) { + match self.new_arrays.get(path) { + Some((id, _)) => { + debug_assert!(!self.updated_arrays.contains_key(id)); + self.new_arrays.insert(path.clone(), (node_id.clone(), array_data)); + } + None => { + self.updated_arrays.insert(node_id.clone(), array_data); + } + } } - pub fn update_array(&mut self, node_id: NodeId, metadata: ZarrArrayMetadata) { - self.updated_arrays.insert(node_id, metadata); + pub fn update_group(&mut self, node_id: &NodeId, path: &Path, definition: Bytes) { + match self.new_groups.get(path) { + Some((id, _)) => { + debug_assert!(!self.updated_groups.contains_key(id)); + self.new_groups.insert(path.clone(), (node_id.clone(), definition)); + } + None => { + self.updated_groups.insert(node_id.clone(), definition); + } + } } pub fn delete_array(&mut self, path: Path, node_id: &NodeId) { @@ -116,7 +137,6 @@ impl ChangeSet { ); self.updated_arrays.remove(node_id); - self.updated_attributes.remove(node_id); self.set_chunks.remove(node_id); if !is_new_array { self.deleted_arrays.insert((path, node_id.clone())); @@ -128,30 +148,16 @@ impl ChangeSet { self.deleted_groups.contains(&key) || self.deleted_arrays.contains(&key) } - pub fn has_updated_attributes(&self, node_id: &NodeId) -> bool { - self.updated_attributes.contains_key(node_id) - } + //pub fn has_updated_definition(&self, node_id: &NodeId) -> bool { + // self.updated_definitions.contains_key(node_id) + //} - pub fn get_updated_zarr_metadata( - &self, - node_id: &NodeId, - ) -> Option<&ZarrArrayMetadata> { + pub fn get_updated_array(&self, node_id: &NodeId) -> Option<&ArrayData> { self.updated_arrays.get(node_id) } - pub fn update_user_attributes( - &mut self, - node_id: NodeId, - atts: Option, - ) { - self.updated_attributes.insert(node_id, atts); - } - - pub fn get_user_attributes( - &self, - node_id: &NodeId, - ) -> Option<&Option> { - self.updated_attributes.get(node_id) + pub fn get_updated_group(&self, node_id: &NodeId) -> Option<&Bytes> { + self.updated_groups.get(node_id) } pub fn set_chunk_ref( @@ -233,7 +239,7 @@ impl ChangeSet { } pub fn new_groups(&self) -> impl Iterator { - self.new_groups.iter() + self.new_groups.iter().map(|(path, (node_id, _))| (path, node_id)) } pub fn new_arrays(&self) -> impl Iterator { @@ -263,8 +269,8 @@ impl ChangeSet { // TODO: optimize self.new_groups.extend(other.new_groups); self.new_arrays.extend(other.new_arrays); + self.updated_groups.extend(other.updated_groups); self.updated_arrays.extend(other.updated_arrays); - self.updated_attributes.extend(other.updated_attributes); self.deleted_groups.extend(other.deleted_groups); self.deleted_arrays.extend(other.deleted_arrays); @@ -320,27 +326,30 @@ impl ChangeSet { } pub fn get_new_array(&self, path: &Path) -> Option { - self.get_array(path).map(|(id, meta)| { - let meta = self.get_updated_zarr_metadata(id).unwrap_or(meta).clone(); - let atts = self.get_user_attributes(id).cloned(); + self.get_array(path).map(|(id, array_data)| { + debug_assert!(!self.updated_arrays.contains_key(id)); NodeSnapshot { id: id.clone(), path: path.clone(), - user_attributes: atts.flatten().map(UserAttributesSnapshot::Inline), + user_data: array_data.user_data.clone(), // We put no manifests in new arrays, see get_chunk_ref to understand how chunks get // fetched for those arrays - node_data: NodeData::Array(meta.clone(), vec![]), + node_data: NodeData::Array { + shape: array_data.shape.clone(), + dimension_names: array_data.dimension_names.clone(), + manifests: vec![], + }, } }) } pub fn get_new_group(&self, path: &Path) -> Option { - self.get_group(path).map(|id| { - let atts = self.get_user_attributes(id).cloned(); + self.get_group(path).map(|(id, definition)| { + debug_assert!(!self.updated_groups.contains_key(id)); NodeSnapshot { id: id.clone(), path: path.clone(), - user_attributes: atts.flatten().map(UserAttributesSnapshot::Inline), + user_data: definition.clone(), node_data: NodeData::Group, } }) @@ -365,51 +374,51 @@ impl ChangeSet { return None; } - let session_atts = self - .get_user_attributes(&node.id) - .cloned() - .map(|a| a.map(UserAttributesSnapshot::Inline)); - let new_atts = session_atts.unwrap_or(node.user_attributes); match node.node_data { - NodeData::Group => Some(NodeSnapshot { user_attributes: new_atts, ..node }), - NodeData::Array(old_zarr_meta, manifests) => { - let new_zarr_meta = self - .get_updated_zarr_metadata(&node.id) - .cloned() - .unwrap_or(old_zarr_meta); - + NodeData::Group => { + let new_definition = + self.updated_groups.get(&node.id).cloned().unwrap_or(node.user_data); + Some(NodeSnapshot { user_data: new_definition, ..node }) + } + NodeData::Array { shape, dimension_names, manifests } => { + let new_data = + self.updated_arrays.get(&node.id).cloned().unwrap_or_else(|| { + ArrayData { shape, dimension_names, user_data: node.user_data } + }); Some(NodeSnapshot { - node_data: NodeData::Array(new_zarr_meta, manifests), - user_attributes: new_atts, + user_data: new_data.user_data, + node_data: NodeData::Array { + shape: new_data.shape, + dimension_names: new_data.dimension_names, + manifests, + }, ..node }) } } } - pub fn undo_user_attributes_update(&mut self, node_id: &NodeId) { - self.updated_attributes.remove(node_id); + pub fn undo_update(&mut self, node_id: &NodeId) { + self.updated_arrays.remove(node_id); + self.updated_groups.remove(node_id); } } #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { - use std::num::NonZeroU64; - + use bytes::Bytes; use itertools::Itertools; use super::ChangeSet; use crate::{ + change_set::ArrayData, format::{ manifest::{ChunkInfo, ChunkPayload}, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ChunkIndices, NodeId, }, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, }; #[test] @@ -417,36 +426,26 @@ mod tests { let mut change_set = ChangeSet::default(); assert_eq!(None, change_set.new_arrays_chunk_iterator().next()); - let zarr_meta = ZarrArrayMetadata { - shape: vec![2, 2, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(2, 1), (2, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "t".into()]); let node_id1 = NodeId::random(); let node_id2 = NodeId::random(); + let array_data = ArrayData { + shape: shape.clone(), + dimension_names: dimension_names.clone(), + user_data: Bytes::from_static(b"foobar"), + }; change_set.add_array( "/foo/bar".try_into().unwrap(), node_id1.clone(), - zarr_meta.clone(), + array_data.clone(), + ); + change_set.add_array( + "/foo/baz".try_into().unwrap(), + node_id2.clone(), + array_data.clone(), ); - change_set.add_array("/foo/baz".try_into().unwrap(), node_id2.clone(), zarr_meta); assert_eq!(None, change_set.new_arrays_chunk_iterator().next()); change_set.set_chunk_ref(node_id1.clone(), ChunkIndices(vec![0, 1]), None); diff --git a/icechunk/src/conflicts/basic_solver.rs b/icechunk/src/conflicts/basic_solver.rs index 642c6b7f..c154da6e 100644 --- a/icechunk/src/conflicts/basic_solver.rs +++ b/icechunk/src/conflicts/basic_solver.rs @@ -17,7 +17,6 @@ pub enum VersionSelection { #[derive(Debug, Clone)] pub struct BasicConflictSolver { - pub on_user_attributes_conflict: VersionSelection, pub on_chunk_conflict: VersionSelection, pub fail_on_delete_of_updated_array: bool, pub fail_on_delete_of_updated_group: bool, @@ -26,7 +25,6 @@ pub struct BasicConflictSolver { impl Default for BasicConflictSolver { fn default() -> Self { Self { - on_user_attributes_conflict: VersionSelection::UseOurs, on_chunk_conflict: VersionSelection::UseOurs, fail_on_delete_of_updated_array: false, fail_on_delete_of_updated_group: false, @@ -72,32 +70,24 @@ impl BasicConflictSolver { conflicts: Vec, ) -> SessionResult { use Conflict::*; - let unsolvable = conflicts.iter().any( - |conflict| { - matches!( - conflict, - NewNodeConflictsWithExistingNode(_) | - NewNodeInInvalidGroup(_) | - ZarrMetadataDoubleUpdate(_) | - ZarrMetadataUpdateOfDeletedArray(_) | - UserAttributesUpdateOfDeletedNode(_) | - ChunksUpdatedInDeletedArray{..} | - ChunksUpdatedInUpdatedArray{..} - ) || - matches!(conflict, - UserAttributesDoubleUpdate{..} if self.on_user_attributes_conflict == VersionSelection::Fail - ) || - matches!(conflict, - ChunkDoubleUpdate{..} if self.on_chunk_conflict == VersionSelection::Fail - ) || - matches!(conflict, - DeleteOfUpdatedArray{..} if self.fail_on_delete_of_updated_array - ) || - matches!(conflict, - DeleteOfUpdatedGroup{..} if self.fail_on_delete_of_updated_group - ) - }, - ); + let unsolvable = conflicts.iter().any(|conflict| { + matches!( + conflict, + NewNodeConflictsWithExistingNode(_) + | NewNodeInInvalidGroup(_) + | ZarrMetadataDoubleUpdate(_) + | ZarrMetadataUpdateOfDeletedArray(_) + | ZarrMetadataUpdateOfDeletedGroup(_) + | ChunksUpdatedInDeletedArray { .. } + | ChunksUpdatedInUpdatedArray { .. } + ) || matches!(conflict, + ChunkDoubleUpdate{..} if self.on_chunk_conflict == VersionSelection::Fail + ) || matches!(conflict, + DeleteOfUpdatedArray{..} if self.fail_on_delete_of_updated_array + ) || matches!(conflict, + DeleteOfUpdatedGroup{..} if self.fail_on_delete_of_updated_group + ) + }); if unsolvable { return Ok(ConflictResolution::Unsolvable { @@ -123,20 +113,6 @@ impl BasicConflictSolver { VersionSelection::Fail => panic!("Bug in conflict resolution: ChunkDoubleUpdate flagged as unrecoverable") } } - UserAttributesDoubleUpdate { node_id, .. } => { - match self.on_user_attributes_conflict { - VersionSelection::UseOurs => { - // this is a no-op, our change will override the conflicting change - } - VersionSelection::UseTheirs => { - current_changes.undo_user_attributes_update(&node_id); - } - // we can panic here because we have returned from the function if there - // were any unsolvable conflicts - #[allow(clippy::panic)] - VersionSelection::Fail => panic!("Bug in conflict resolution: UserAttributesDoubleUpdate flagged as unrecoverable") - } - } DeleteOfUpdatedArray { .. } => { assert!(!self.fail_on_delete_of_updated_array); // this is a no-op, the solution is to still delete the array diff --git a/icechunk/src/conflicts/detector.rs b/icechunk/src/conflicts/detector.rs index 2569b560..5bdd8dbe 100644 --- a/icechunk/src/conflicts/detector.rs +++ b/icechunk/src/conflicts/detector.rs @@ -66,8 +66,8 @@ impl ConflictSolver for ConflictDetector { let path_finder = PathFinder::new(current_repo.list_nodes().await?); let updated_arrays_already_updated = current_changes - .zarr_updated_arrays() - .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) + .updated_arrays() + .filter(|node_id| previous_change.array_updated(node_id)) .map(Ok); let updated_arrays_already_updated = stream::iter(updated_arrays_already_updated) @@ -76,43 +76,37 @@ impl ConflictSolver for ConflictDetector { Ok(Conflict::ZarrMetadataDoubleUpdate(path)) }); - let updated_arrays_were_deleted = current_changes - .zarr_updated_arrays() - .filter(|node_id| previous_change.array_deleted(node_id)) + let updated_groups_already_updated = current_changes + .updated_groups() + .filter(|node_id| previous_change.group_updated(node_id)) .map(Ok); - let updated_arrays_were_deleted = stream::iter(updated_arrays_were_deleted) + let updated_groups_already_updated = stream::iter(updated_groups_already_updated) .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::ZarrMetadataUpdateOfDeletedArray(path)) + Ok(Conflict::ZarrMetadataDoubleUpdate(path)) }); - let updated_attributes_already_updated = current_changes - .user_attributes_updated_nodes() - .filter(|node_id| previous_change.user_attributes_updated(node_id)) + let updated_arrays_were_deleted = current_changes + .updated_arrays() + .filter(|node_id| previous_change.array_deleted(node_id)) .map(Ok); - let updated_attributes_already_updated = - stream::iter(updated_attributes_already_updated).and_then(|node_id| async { + let updated_arrays_were_deleted = stream::iter(updated_arrays_were_deleted) + .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::UserAttributesDoubleUpdate { - path, - node_id: node_id.clone(), - }) + Ok(Conflict::ZarrMetadataUpdateOfDeletedArray(path)) }); - let updated_attributes_on_deleted_node = current_changes - .user_attributes_updated_nodes() - .filter(|node_id| { - previous_change.array_deleted(node_id) - || previous_change.group_deleted(node_id) - }) + let updated_groups_were_deleted = current_changes + .updated_groups() + .filter(|node_id| previous_change.group_deleted(node_id)) .map(Ok); - let updated_attributes_on_deleted_node = - stream::iter(updated_attributes_on_deleted_node).and_then(|node_id| async { + let updated_groups_were_deleted = stream::iter(updated_groups_were_deleted) + .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::UserAttributesUpdateOfDeletedNode(path)) + Ok(Conflict::ZarrMetadataUpdateOfDeletedGroup(path)) }); let chunks_updated_in_deleted_array = current_changes @@ -131,7 +125,7 @@ impl ConflictSolver for ConflictDetector { let chunks_updated_in_updated_array = current_changes .arrays_with_chunk_changes() - .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) + .filter(|node_id| previous_change.array_updated(node_id)) .map(Ok); let chunks_updated_in_updated_array = @@ -187,8 +181,7 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.zarr_metadata_updated(&node_id) - || previous_change.user_attributes_updated(&node_id) + if previous_change.array_updated(&node_id) || previous_change.chunks_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedArray { @@ -216,7 +209,7 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.user_attributes_updated(&node_id) { + if previous_change.group_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedGroup { path: path.clone(), node_id: node_id.clone(), @@ -232,9 +225,9 @@ impl ConflictSolver for ConflictDetector { let all_conflicts: Vec<_> = new_nodes_explicit_conflicts .chain(new_nodes_implicit_conflicts) .chain(updated_arrays_already_updated) + .chain(updated_groups_already_updated) .chain(updated_arrays_were_deleted) - .chain(updated_attributes_already_updated) - .chain(updated_attributes_on_deleted_node) + .chain(updated_groups_were_deleted) .chain(chunks_updated_in_deleted_array) .chain(chunks_updated_in_updated_array) .chain(chunks_double_updated) diff --git a/icechunk/src/conflicts/mod.rs b/icechunk/src/conflicts/mod.rs index 6bb84e5a..bc6b69d7 100644 --- a/icechunk/src/conflicts/mod.rs +++ b/icechunk/src/conflicts/mod.rs @@ -17,11 +17,7 @@ pub enum Conflict { NewNodeInInvalidGroup(Path), ZarrMetadataDoubleUpdate(Path), ZarrMetadataUpdateOfDeletedArray(Path), - UserAttributesDoubleUpdate { - path: Path, - node_id: NodeId, - }, - UserAttributesUpdateOfDeletedNode(Path), + ZarrMetadataUpdateOfDeletedGroup(Path), ChunkDoubleUpdate { path: Path, node_id: NodeId, diff --git a/icechunk/src/format/flatbuffers/all_generated.rs b/icechunk/src/format/flatbuffers/all_generated.rs index 6b67d66e..a0fe584e 100644 --- a/icechunk/src/format/flatbuffers/all_generated.rs +++ b/icechunk/src/format/flatbuffers/all_generated.rs @@ -17,104 +17,6 @@ pub mod gen { extern crate flatbuffers; use self::flatbuffers::{EndianScalar, Follow}; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - pub const ENUM_MIN_USER_ATTRIBUTES_SNAPSHOT: u8 = 0; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - pub const ENUM_MAX_USER_ATTRIBUTES_SNAPSHOT: u8 = 2; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - #[allow(non_camel_case_types)] - pub const ENUM_VALUES_USER_ATTRIBUTES_SNAPSHOT: [UserAttributesSnapshot; 3] = [ - UserAttributesSnapshot::NONE, - UserAttributesSnapshot::Inline, - UserAttributesSnapshot::Reference, - ]; - - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] - #[repr(transparent)] - pub struct UserAttributesSnapshot(pub u8); - #[allow(non_upper_case_globals)] - impl UserAttributesSnapshot { - pub const NONE: Self = Self(0); - pub const Inline: Self = Self(1); - pub const Reference: Self = Self(2); - - pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 2; - pub const ENUM_VALUES: &'static [Self] = - &[Self::NONE, Self::Inline, Self::Reference]; - /// Returns the variant's name or "" if unknown. - pub fn variant_name(self) -> Option<&'static str> { - match self { - Self::NONE => Some("NONE"), - Self::Inline => Some("Inline"), - Self::Reference => Some("Reference"), - _ => None, - } - } - } - impl core::fmt::Debug for UserAttributesSnapshot { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - if let Some(name) = self.variant_name() { - f.write_str(name) - } else { - f.write_fmt(format_args!("", self.0)) - } - } - } - impl<'a> flatbuffers::Follow<'a> for UserAttributesSnapshot { - type Inner = Self; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - let b = flatbuffers::read_scalar_at::(buf, loc); - Self(b) - } - } - - impl flatbuffers::Push for UserAttributesSnapshot { - type Output = UserAttributesSnapshot; - #[inline] - unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { - flatbuffers::emplace_scalar::(dst, self.0); - } - } - - impl flatbuffers::EndianScalar for UserAttributesSnapshot { - type Scalar = u8; - #[inline] - fn to_little_endian(self) -> u8 { - self.0.to_le() - } - #[inline] - #[allow(clippy::wrong_self_convention)] - fn from_little_endian(v: u8) -> Self { - let b = u8::from_le(v); - Self(b) - } - } - - impl<'a> flatbuffers::Verifiable for UserAttributesSnapshot { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - u8::run_verifier(v, pos) - } - } - - impl flatbuffers::SimpleToVerifyInSlice for UserAttributesSnapshot {} - pub struct UserAttributesSnapshotUnionTableOffset {} - #[deprecated( since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021." @@ -513,53 +415,56 @@ pub mod gen { } } - // struct AttributeFileInfo, aligned to 1 + // struct ChunkIndexRange, aligned to 4 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] - pub struct AttributeFileInfo(pub [u8; 12]); - impl Default for AttributeFileInfo { + pub struct ChunkIndexRange(pub [u8; 8]); + impl Default for ChunkIndexRange { fn default() -> Self { - Self([0; 12]) + Self([0; 8]) } } - impl core::fmt::Debug for AttributeFileInfo { + impl core::fmt::Debug for ChunkIndexRange { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct("AttributeFileInfo").field("id", &self.id()).finish() + f.debug_struct("ChunkIndexRange") + .field("from", &self.from()) + .field("to", &self.to()) + .finish() } } - impl flatbuffers::SimpleToVerifyInSlice for AttributeFileInfo {} - impl<'a> flatbuffers::Follow<'a> for AttributeFileInfo { - type Inner = &'a AttributeFileInfo; + impl flatbuffers::SimpleToVerifyInSlice for ChunkIndexRange {} + impl<'a> flatbuffers::Follow<'a> for ChunkIndexRange { + type Inner = &'a ChunkIndexRange; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - <&'a AttributeFileInfo>::follow(buf, loc) + <&'a ChunkIndexRange>::follow(buf, loc) } } - impl<'a> flatbuffers::Follow<'a> for &'a AttributeFileInfo { - type Inner = &'a AttributeFileInfo; + impl<'a> flatbuffers::Follow<'a> for &'a ChunkIndexRange { + type Inner = &'a ChunkIndexRange; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - flatbuffers::follow_cast_ref::(buf, loc) + flatbuffers::follow_cast_ref::(buf, loc) } } - impl<'b> flatbuffers::Push for AttributeFileInfo { - type Output = AttributeFileInfo; + impl<'b> flatbuffers::Push for ChunkIndexRange { + type Output = ChunkIndexRange; #[inline] unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { let src = ::core::slice::from_raw_parts( - self as *const AttributeFileInfo as *const u8, + self as *const ChunkIndexRange as *const u8, ::size(), ); dst.copy_from_slice(src); } #[inline] fn alignment() -> flatbuffers::PushAlignment { - flatbuffers::PushAlignment::new(1) + flatbuffers::PushAlignment::new(4) } } - impl<'a> flatbuffers::Verifiable for AttributeFileInfo { + impl<'a> flatbuffers::Verifiable for ChunkIndexRange { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -570,77 +475,126 @@ pub mod gen { } } - impl<'a> AttributeFileInfo { + impl<'a> ChunkIndexRange { #[allow(clippy::too_many_arguments)] - pub fn new(id: &ObjectId12) -> Self { - let mut s = Self([0; 12]); - s.set_id(id); + pub fn new(from: u32, to: u32) -> Self { + let mut s = Self([0; 8]); + s.set_from(from); + s.set_to(to); s } - pub fn id(&self) -> &ObjectId12 { + pub fn from(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object - // Which contains a valid struct in this slot - unsafe { &*(self.0[0..].as_ptr() as *const ObjectId12) } + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[0..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) } - #[allow(clippy::identity_op)] - pub fn set_id(&mut self, x: &ObjectId12) { - self.0[0..0 + 12].copy_from_slice(&x.0) + pub fn set_from(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[0..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + + pub fn to(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[4..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_to(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[4..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } } } - // struct ChunkIndexRange, aligned to 4 + // struct DimensionShape, aligned to 8 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] - pub struct ChunkIndexRange(pub [u8; 8]); - impl Default for ChunkIndexRange { + pub struct DimensionShape(pub [u8; 16]); + impl Default for DimensionShape { fn default() -> Self { - Self([0; 8]) + Self([0; 16]) } } - impl core::fmt::Debug for ChunkIndexRange { + impl core::fmt::Debug for DimensionShape { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct("ChunkIndexRange") - .field("from", &self.from()) - .field("to", &self.to()) + f.debug_struct("DimensionShape") + .field("array_length", &self.array_length()) + .field("chunk_length", &self.chunk_length()) .finish() } } - impl flatbuffers::SimpleToVerifyInSlice for ChunkIndexRange {} - impl<'a> flatbuffers::Follow<'a> for ChunkIndexRange { - type Inner = &'a ChunkIndexRange; + impl flatbuffers::SimpleToVerifyInSlice for DimensionShape {} + impl<'a> flatbuffers::Follow<'a> for DimensionShape { + type Inner = &'a DimensionShape; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - <&'a ChunkIndexRange>::follow(buf, loc) + <&'a DimensionShape>::follow(buf, loc) } } - impl<'a> flatbuffers::Follow<'a> for &'a ChunkIndexRange { - type Inner = &'a ChunkIndexRange; + impl<'a> flatbuffers::Follow<'a> for &'a DimensionShape { + type Inner = &'a DimensionShape; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - flatbuffers::follow_cast_ref::(buf, loc) + flatbuffers::follow_cast_ref::(buf, loc) } } - impl<'b> flatbuffers::Push for ChunkIndexRange { - type Output = ChunkIndexRange; + impl<'b> flatbuffers::Push for DimensionShape { + type Output = DimensionShape; #[inline] unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { let src = ::core::slice::from_raw_parts( - self as *const ChunkIndexRange as *const u8, + self as *const DimensionShape as *const u8, ::size(), ); dst.copy_from_slice(src); } #[inline] fn alignment() -> flatbuffers::PushAlignment { - flatbuffers::PushAlignment::new(4) + flatbuffers::PushAlignment::new(8) } } - impl<'a> flatbuffers::Verifiable for ChunkIndexRange { + impl<'a> flatbuffers::Verifiable for DimensionShape { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -651,18 +605,18 @@ pub mod gen { } } - impl<'a> ChunkIndexRange { + impl<'a> DimensionShape { #[allow(clippy::too_many_arguments)] - pub fn new(from: u32, to: u32) -> Self { - let mut s = Self([0; 8]); - s.set_from(from); - s.set_to(to); + pub fn new(array_length: u64, chunk_length: u64) -> Self { + let mut s = Self([0; 16]); + s.set_array_length(array_length); + s.set_chunk_length(chunk_length); s } - pub fn from(&self) -> u32 { + pub fn array_length(&self) -> u64 { let mut mem = - core::mem::MaybeUninit::<::Scalar>::uninit(); + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object // Which contains a valid value in this slot @@ -670,13 +624,13 @@ pub mod gen { core::ptr::copy_nonoverlapping( self.0[0..].as_ptr(), mem.as_mut_ptr() as *mut u8, - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); mem.assume_init() }) } - pub fn set_from(&mut self, x: u32) { + pub fn set_array_length(&mut self, x: u64) { let x_le = x.to_little_endian(); // Safety: // Created from a valid Table for this object @@ -685,28 +639,28 @@ pub mod gen { core::ptr::copy_nonoverlapping( &x_le as *const _ as *const u8, self.0[0..].as_mut_ptr(), - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); } } - pub fn to(&self) -> u32 { + pub fn chunk_length(&self) -> u64 { let mut mem = - core::mem::MaybeUninit::<::Scalar>::uninit(); + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object // Which contains a valid value in this slot EndianScalar::from_little_endian(unsafe { core::ptr::copy_nonoverlapping( - self.0[4..].as_ptr(), + self.0[8..].as_ptr(), mem.as_mut_ptr() as *mut u8, - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); mem.assume_init() }) } - pub fn set_to(&mut self, x: u32) { + pub fn set_chunk_length(&mut self, x: u64) { let x_le = x.to_little_endian(); // Safety: // Created from a valid Table for this object @@ -714,8 +668,8 @@ pub mod gen { unsafe { core::ptr::copy_nonoverlapping( &x_le as *const _ as *const u8, - self.0[4..].as_mut_ptr(), - core::mem::size_of::<::Scalar>(), + self.0[8..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), ); } } @@ -1300,312 +1254,28 @@ pub mod gen { ds.finish() } } - pub enum MetadataItemOffset {} - #[derive(Copy, Clone, PartialEq)] - - pub struct MetadataItem<'a> { - pub _tab: flatbuffers::Table<'a>, - } - - impl<'a> flatbuffers::Follow<'a> for MetadataItem<'a> { - type Inner = MetadataItem<'a>; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - Self { _tab: flatbuffers::Table::new(buf, loc) } - } - } - - impl<'a> MetadataItem<'a> { - pub const VT_NAME: flatbuffers::VOffsetT = 4; - pub const VT_VALUE: flatbuffers::VOffsetT = 6; - - #[inline] - pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - MetadataItem { _tab: table } - } - #[allow(unused_mut)] - pub fn create< - 'bldr: 'args, - 'args: 'mut_bldr, - 'mut_bldr, - A: flatbuffers::Allocator + 'bldr, - >( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args MetadataItemArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = MetadataItemBuilder::new(_fbb); - if let Some(x) = args.value { - builder.add_value(x); - } - if let Some(x) = args.name { - builder.add_name(x); - } - builder.finish() - } - - #[inline] - pub fn name(&self) -> &'a str { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::>( - MetadataItem::VT_NAME, - None, - ) - .unwrap() - } - } - #[inline] - pub fn value(&self) -> flatbuffers::Vector<'a, u8> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::>>( - MetadataItem::VT_VALUE, - None, - ) - .unwrap() - } - } - } - - impl flatbuffers::Verifiable for MetadataItem<'_> { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - v.visit_table(pos)? - .visit_field::>( - "name", - Self::VT_NAME, - true, - )? - .visit_field::>>( - "value", - Self::VT_VALUE, - true, - )? - .finish(); - Ok(()) - } - } - pub struct MetadataItemArgs<'a> { - pub name: Option>, - pub value: Option>>, - } - impl<'a> Default for MetadataItemArgs<'a> { - #[inline] - fn default() -> Self { - MetadataItemArgs { - name: None, // required field - value: None, // required field - } - } - } - - pub struct MetadataItemBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - start_: flatbuffers::WIPOffset, - } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MetadataItemBuilder<'a, 'b, A> { - #[inline] - pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { - self.fbb_.push_slot_always::>( - MetadataItem::VT_NAME, - name, - ); - } - #[inline] - pub fn add_value( - &mut self, - value: flatbuffers::WIPOffset>, - ) { - self.fbb_.push_slot_always::>( - MetadataItem::VT_VALUE, - value, - ); - } - #[inline] - pub fn new( - _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> MetadataItemBuilder<'a, 'b, A> { - let start = _fbb.start_table(); - MetadataItemBuilder { fbb_: _fbb, start_: start } - } - #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { - let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, MetadataItem::VT_NAME, "name"); - self.fbb_.required(o, MetadataItem::VT_VALUE, "value"); - flatbuffers::WIPOffset::new(o.value()) - } - } - - impl core::fmt::Debug for MetadataItem<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("MetadataItem"); - ds.field("name", &self.name()); - ds.field("value", &self.value()); - ds.finish() - } - } - pub enum UserAttributesRefOffset {} - #[derive(Copy, Clone, PartialEq)] - - pub struct UserAttributesRef<'a> { - pub _tab: flatbuffers::Table<'a>, - } - - impl<'a> flatbuffers::Follow<'a> for UserAttributesRef<'a> { - type Inner = UserAttributesRef<'a>; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - Self { _tab: flatbuffers::Table::new(buf, loc) } - } - } - - impl<'a> UserAttributesRef<'a> { - pub const VT_OBJECT_ID: flatbuffers::VOffsetT = 4; - pub const VT_LOCATION: flatbuffers::VOffsetT = 6; - - #[inline] - pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - UserAttributesRef { _tab: table } - } - #[allow(unused_mut)] - pub fn create< - 'bldr: 'args, - 'args: 'mut_bldr, - 'mut_bldr, - A: flatbuffers::Allocator + 'bldr, - >( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args UserAttributesRefArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = UserAttributesRefBuilder::new(_fbb); - builder.add_location(args.location); - if let Some(x) = args.object_id { - builder.add_object_id(x); - } - builder.finish() - } - - #[inline] - pub fn object_id(&self) -> &'a ObjectId12 { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::(UserAttributesRef::VT_OBJECT_ID, None) - .unwrap() - } - } - #[inline] - pub fn location(&self) -> u32 { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab.get::(UserAttributesRef::VT_LOCATION, Some(0)).unwrap() - } - } - } - - impl flatbuffers::Verifiable for UserAttributesRef<'_> { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - v.visit_table(pos)? - .visit_field::("object_id", Self::VT_OBJECT_ID, true)? - .visit_field::("location", Self::VT_LOCATION, false)? - .finish(); - Ok(()) - } - } - pub struct UserAttributesRefArgs<'a> { - pub object_id: Option<&'a ObjectId12>, - pub location: u32, - } - impl<'a> Default for UserAttributesRefArgs<'a> { - #[inline] - fn default() -> Self { - UserAttributesRefArgs { - object_id: None, // required field - location: 0, - } - } - } - - pub struct UserAttributesRefBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - start_: flatbuffers::WIPOffset, - } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> UserAttributesRefBuilder<'a, 'b, A> { - #[inline] - pub fn add_object_id(&mut self, object_id: &ObjectId12) { - self.fbb_.push_slot_always::<&ObjectId12>( - UserAttributesRef::VT_OBJECT_ID, - object_id, - ); - } - #[inline] - pub fn add_location(&mut self, location: u32) { - self.fbb_.push_slot::(UserAttributesRef::VT_LOCATION, location, 0); - } - #[inline] - pub fn new( - _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> UserAttributesRefBuilder<'a, 'b, A> { - let start = _fbb.start_table(); - UserAttributesRefBuilder { fbb_: _fbb, start_: start } - } - #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { - let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, UserAttributesRef::VT_OBJECT_ID, "object_id"); - flatbuffers::WIPOffset::new(o.value()) - } - } - - impl core::fmt::Debug for UserAttributesRef<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("UserAttributesRef"); - ds.field("object_id", &self.object_id()); - ds.field("location", &self.location()); - ds.finish() - } - } - pub enum InlineUserAttributesOffset {} + pub enum MetadataItemOffset {} #[derive(Copy, Clone, PartialEq)] - pub struct InlineUserAttributes<'a> { + pub struct MetadataItem<'a> { pub _tab: flatbuffers::Table<'a>, } - impl<'a> flatbuffers::Follow<'a> for InlineUserAttributes<'a> { - type Inner = InlineUserAttributes<'a>; + impl<'a> flatbuffers::Follow<'a> for MetadataItem<'a> { + type Inner = MetadataItem<'a>; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { Self { _tab: flatbuffers::Table::new(buf, loc) } } } - impl<'a> InlineUserAttributes<'a> { - pub const VT_DATA: flatbuffers::VOffsetT = 4; + impl<'a> MetadataItem<'a> { + pub const VT_NAME: flatbuffers::VOffsetT = 4; + pub const VT_VALUE: flatbuffers::VOffsetT = 6; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - InlineUserAttributes { _tab: table } + MetadataItem { _tab: table } } #[allow(unused_mut)] pub fn create< @@ -1615,24 +1285,41 @@ pub mod gen { A: flatbuffers::Allocator + 'bldr, >( _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args InlineUserAttributesArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = InlineUserAttributesBuilder::new(_fbb); - if let Some(x) = args.data { - builder.add_data(x); + args: &'args MetadataItemArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = MetadataItemBuilder::new(_fbb); + if let Some(x) = args.value { + builder.add_value(x); + } + if let Some(x) = args.name { + builder.add_name(x); } builder.finish() } #[inline] - pub fn data(&self) -> flatbuffers::Vector<'a, u8> { + pub fn name(&self) -> &'a str { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + MetadataItem::VT_NAME, + None, + ) + .unwrap() + } + } + #[inline] + pub fn value(&self) -> flatbuffers::Vector<'a, u8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>>( - InlineUserAttributes::VT_DATA, + MetadataItem::VT_VALUE, None, ) .unwrap() @@ -1640,7 +1327,7 @@ pub mod gen { } } - impl flatbuffers::Verifiable for InlineUserAttributes<'_> { + impl flatbuffers::Verifiable for MetadataItem<'_> { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -1648,61 +1335,77 @@ pub mod gen { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? + .visit_field::>( + "name", + Self::VT_NAME, + true, + )? .visit_field::>>( - "data", - Self::VT_DATA, + "value", + Self::VT_VALUE, true, )? .finish(); Ok(()) } } - pub struct InlineUserAttributesArgs<'a> { - pub data: Option>>, + pub struct MetadataItemArgs<'a> { + pub name: Option>, + pub value: Option>>, } - impl<'a> Default for InlineUserAttributesArgs<'a> { + impl<'a> Default for MetadataItemArgs<'a> { #[inline] fn default() -> Self { - InlineUserAttributesArgs { - data: None, // required field - } + MetadataItemArgs { + name: None, // required field + value: None, // required field + } } } - pub struct InlineUserAttributesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + pub struct MetadataItemBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, start_: flatbuffers::WIPOffset, } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> InlineUserAttributesBuilder<'a, 'b, A> { + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MetadataItemBuilder<'a, 'b, A> { + #[inline] + pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + MetadataItem::VT_NAME, + name, + ); + } #[inline] - pub fn add_data( + pub fn add_value( &mut self, - data: flatbuffers::WIPOffset>, + value: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - InlineUserAttributes::VT_DATA, - data, + MetadataItem::VT_VALUE, + value, ); } #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> InlineUserAttributesBuilder<'a, 'b, A> { + ) -> MetadataItemBuilder<'a, 'b, A> { let start = _fbb.start_table(); - InlineUserAttributesBuilder { fbb_: _fbb, start_: start } + MetadataItemBuilder { fbb_: _fbb, start_: start } } #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { + pub fn finish(self) -> flatbuffers::WIPOffset> { let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, InlineUserAttributes::VT_DATA, "data"); + self.fbb_.required(o, MetadataItem::VT_NAME, "name"); + self.fbb_.required(o, MetadataItem::VT_VALUE, "value"); flatbuffers::WIPOffset::new(o.value()) } } - impl core::fmt::Debug for InlineUserAttributes<'_> { + impl core::fmt::Debug for MetadataItem<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("InlineUserAttributes"); - ds.field("data", &self.data()); + let mut ds = f.debug_struct("MetadataItem"); + ds.field("name", &self.name()); + ds.field("value", &self.value()); ds.finish() } } @@ -1842,6 +1545,119 @@ pub mod gen { ds.finish() } } + pub enum DimensionNameOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct DimensionName<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for DimensionName<'a> { + type Inner = DimensionName<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> DimensionName<'a> { + pub const VT_NAME: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + DimensionName { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args DimensionNameArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = DimensionNameBuilder::new(_fbb); + if let Some(x) = args.name { + builder.add_name(x); + } + builder.finish() + } + + #[inline] + pub fn name(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + DimensionName::VT_NAME, + None, + ) + } + } + } + + impl flatbuffers::Verifiable for DimensionName<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>( + "name", + Self::VT_NAME, + false, + )? + .finish(); + Ok(()) + } + } + pub struct DimensionNameArgs<'a> { + pub name: Option>, + } + impl<'a> Default for DimensionNameArgs<'a> { + #[inline] + fn default() -> Self { + DimensionNameArgs { name: None } + } + } + + pub struct DimensionNameBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DimensionNameBuilder<'a, 'b, A> { + #[inline] + pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + DimensionName::VT_NAME, + name, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> DimensionNameBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + DimensionNameBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for DimensionName<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("DimensionName"); + ds.field("name", &self.name()); + ds.finish() + } + } pub enum GroupNodeDataOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1937,8 +1753,9 @@ pub mod gen { } impl<'a> ArrayNodeData<'a> { - pub const VT_ZARR_METADATA: flatbuffers::VOffsetT = 4; - pub const VT_MANIFESTS: flatbuffers::VOffsetT = 6; + pub const VT_SHAPE: flatbuffers::VOffsetT = 4; + pub const VT_DIMENSION_NAMES: flatbuffers::VOffsetT = 6; + pub const VT_MANIFESTS: flatbuffers::VOffsetT = 8; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -1958,24 +1775,37 @@ pub mod gen { if let Some(x) = args.manifests { builder.add_manifests(x); } - if let Some(x) = args.zarr_metadata { - builder.add_zarr_metadata(x); + if let Some(x) = args.dimension_names { + builder.add_dimension_names(x); + } + if let Some(x) = args.shape { + builder.add_shape(x); } builder.finish() } #[inline] - pub fn zarr_metadata(&self) -> flatbuffers::Vector<'a, u8> { + pub fn shape(&self) -> flatbuffers::Vector<'a, DimensionShape> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab - .get::>>( - ArrayNodeData::VT_ZARR_METADATA, - None, - ) - .unwrap() + self._tab.get::>>(ArrayNodeData::VT_SHAPE, None).unwrap() + } + } + #[inline] + pub fn dimension_names( + &self, + ) -> Option< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + > { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(ArrayNodeData::VT_DIMENSION_NAMES, None) } } #[inline] @@ -2007,20 +1837,21 @@ pub mod gen { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? - .visit_field::>>( - "zarr_metadata", - Self::VT_ZARR_METADATA, - true, - )? - .visit_field::>, - >>("manifests", Self::VT_MANIFESTS, true)? - .finish(); + .visit_field::>>("shape", Self::VT_SHAPE, true)? + .visit_field::>>>("dimension_names", Self::VT_DIMENSION_NAMES, false)? + .visit_field::>>>("manifests", Self::VT_MANIFESTS, true)? + .finish(); Ok(()) } } pub struct ArrayNodeDataArgs<'a> { - pub zarr_metadata: Option>>, + pub shape: + Option>>, + pub dimension_names: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, pub manifests: Option< flatbuffers::WIPOffset< flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, @@ -2031,8 +1862,9 @@ pub mod gen { #[inline] fn default() -> Self { ArrayNodeDataArgs { - zarr_metadata: None, // required field - manifests: None, // required field + shape: None, // required field + dimension_names: None, + manifests: None, // required field } } } @@ -2043,13 +1875,25 @@ pub mod gen { } impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayNodeDataBuilder<'a, 'b, A> { #[inline] - pub fn add_zarr_metadata( + pub fn add_shape( + &mut self, + shape: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ArrayNodeData::VT_SHAPE, + shape, + ); + } + #[inline] + pub fn add_dimension_names( &mut self, - zarr_metadata: flatbuffers::WIPOffset>, + dimension_names: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, ) { self.fbb_.push_slot_always::>( - ArrayNodeData::VT_ZARR_METADATA, - zarr_metadata, + ArrayNodeData::VT_DIMENSION_NAMES, + dimension_names, ); } #[inline] @@ -2074,7 +1918,7 @@ pub mod gen { #[inline] pub fn finish(self) -> flatbuffers::WIPOffset> { let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, ArrayNodeData::VT_ZARR_METADATA, "zarr_metadata"); + self.fbb_.required(o, ArrayNodeData::VT_SHAPE, "shape"); self.fbb_.required(o, ArrayNodeData::VT_MANIFESTS, "manifests"); flatbuffers::WIPOffset::new(o.value()) } @@ -2083,7 +1927,8 @@ pub mod gen { impl core::fmt::Debug for ArrayNodeData<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut ds = f.debug_struct("ArrayNodeData"); - ds.field("zarr_metadata", &self.zarr_metadata()); + ds.field("shape", &self.shape()); + ds.field("dimension_names", &self.dimension_names()); ds.field("manifests", &self.manifests()); ds.finish() } @@ -2106,10 +1951,9 @@ pub mod gen { impl<'a> NodeSnapshot<'a> { pub const VT_ID: flatbuffers::VOffsetT = 4; pub const VT_PATH: flatbuffers::VOffsetT = 6; - pub const VT_USER_ATTRIBUTES_TYPE: flatbuffers::VOffsetT = 8; - pub const VT_USER_ATTRIBUTES: flatbuffers::VOffsetT = 10; - pub const VT_NODE_DATA_TYPE: flatbuffers::VOffsetT = 12; - pub const VT_NODE_DATA: flatbuffers::VOffsetT = 14; + pub const VT_USER_DATA: flatbuffers::VOffsetT = 8; + pub const VT_NODE_DATA_TYPE: flatbuffers::VOffsetT = 10; + pub const VT_NODE_DATA: flatbuffers::VOffsetT = 12; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -2129,8 +1973,8 @@ pub mod gen { if let Some(x) = args.node_data { builder.add_node_data(x); } - if let Some(x) = args.user_attributes { - builder.add_user_attributes(x); + if let Some(x) = args.user_data { + builder.add_user_data(x); } if let Some(x) = args.path { builder.add_path(x); @@ -2139,7 +1983,6 @@ pub mod gen { builder.add_id(x); } builder.add_node_data_type(args.node_data_type); - builder.add_user_attributes_type(args.user_attributes_type); builder.finish() } @@ -2165,32 +2008,20 @@ pub mod gen { } } #[inline] - pub fn user_attributes_type(&self) -> UserAttributesSnapshot { + pub fn user_data(&self) -> flatbuffers::Vector<'a, u8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab - .get::( - NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, - Some(UserAttributesSnapshot::NONE), + .get::>>( + NodeSnapshot::VT_USER_DATA, + None, ) .unwrap() } } #[inline] - pub fn user_attributes(&self) -> Option> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab.get::>>( - NodeSnapshot::VT_USER_ATTRIBUTES, - None, - ) - } - } - #[inline] pub fn node_data_type(&self) -> NodeData { // Safety: // Created from valid Table for this object @@ -2218,36 +2049,6 @@ pub mod gen { .unwrap() } } - #[inline] - #[allow(non_snake_case)] - pub fn user_attributes_as_inline(&self) -> Option> { - if self.user_attributes_type() == UserAttributesSnapshot::Inline { - self.user_attributes().map(|t| { - // Safety: - // Created from a valid Table for this object - // Which contains a valid union in this slot - unsafe { InlineUserAttributes::init_from_table(t) } - }) - } else { - None - } - } - - #[inline] - #[allow(non_snake_case)] - pub fn user_attributes_as_reference(&self) -> Option> { - if self.user_attributes_type() == UserAttributesSnapshot::Reference { - self.user_attributes().map(|t| { - // Safety: - // Created from a valid Table for this object - // Which contains a valid union in this slot - unsafe { UserAttributesRef::init_from_table(t) } - }) - } else { - None - } - } - #[inline] #[allow(non_snake_case)] pub fn node_data_as_array(&self) -> Option> { @@ -2287,13 +2088,7 @@ pub mod gen { v.visit_table(pos)? .visit_field::("id", Self::VT_ID, true)? .visit_field::>("path", Self::VT_PATH, true)? - .visit_union::("user_attributes_type", Self::VT_USER_ATTRIBUTES_TYPE, "user_attributes", Self::VT_USER_ATTRIBUTES, false, |key, v, pos| { - match key { - UserAttributesSnapshot::Inline => v.verify_union_variant::>("UserAttributesSnapshot::Inline", pos), - UserAttributesSnapshot::Reference => v.verify_union_variant::>("UserAttributesSnapshot::Reference", pos), - _ => Ok(()), - } - })? + .visit_field::>>("user_data", Self::VT_USER_DATA, true)? .visit_union::("node_data_type", Self::VT_NODE_DATA_TYPE, "node_data", Self::VT_NODE_DATA, true, |key, v, pos| { match key { NodeData::Array => v.verify_union_variant::>("NodeData::Array", pos), @@ -2308,8 +2103,7 @@ pub mod gen { pub struct NodeSnapshotArgs<'a> { pub id: Option<&'a ObjectId8>, pub path: Option>, - pub user_attributes_type: UserAttributesSnapshot, - pub user_attributes: Option>, + pub user_data: Option>>, pub node_data_type: NodeData, pub node_data: Option>, } @@ -2317,10 +2111,9 @@ pub mod gen { #[inline] fn default() -> Self { NodeSnapshotArgs { - id: None, // required field - path: None, // required field - user_attributes_type: UserAttributesSnapshot::NONE, - user_attributes: None, + id: None, // required field + path: None, // required field + user_data: None, // required field node_data_type: NodeData::NONE, node_data: None, // required field } @@ -2344,24 +2137,13 @@ pub mod gen { ); } #[inline] - pub fn add_user_attributes_type( - &mut self, - user_attributes_type: UserAttributesSnapshot, - ) { - self.fbb_.push_slot::( - NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, - user_attributes_type, - UserAttributesSnapshot::NONE, - ); - } - #[inline] - pub fn add_user_attributes( + pub fn add_user_data( &mut self, - user_attributes: flatbuffers::WIPOffset, + user_data: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - NodeSnapshot::VT_USER_ATTRIBUTES, - user_attributes, + NodeSnapshot::VT_USER_DATA, + user_data, ); } #[inline] @@ -2394,6 +2176,7 @@ pub mod gen { let o = self.fbb_.end_table(self.start_); self.fbb_.required(o, NodeSnapshot::VT_ID, "id"); self.fbb_.required(o, NodeSnapshot::VT_PATH, "path"); + self.fbb_.required(o, NodeSnapshot::VT_USER_DATA, "user_data"); self.fbb_.required(o, NodeSnapshot::VT_NODE_DATA, "node_data"); flatbuffers::WIPOffset::new(o.value()) } @@ -2404,27 +2187,7 @@ pub mod gen { let mut ds = f.debug_struct("NodeSnapshot"); ds.field("id", &self.id()); ds.field("path", &self.path()); - ds.field("user_attributes_type", &self.user_attributes_type()); - match self.user_attributes_type() { - UserAttributesSnapshot::Inline => { - if let Some(x) = self.user_attributes_as_inline() { - ds.field("user_attributes", &x) - } else { - ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") - } - } - UserAttributesSnapshot::Reference => { - if let Some(x) = self.user_attributes_as_reference() { - ds.field("user_attributes", &x) - } else { - ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") - } - } - _ => { - let x: Option<()> = None; - ds.field("user_attributes", &x) - } - }; + ds.field("user_data", &self.user_data()); ds.field("node_data_type", &self.node_data_type()); match self.node_data_type() { NodeData::Array => { @@ -2472,7 +2235,6 @@ pub mod gen { pub const VT_MESSAGE: flatbuffers::VOffsetT = 12; pub const VT_METADATA: flatbuffers::VOffsetT = 14; pub const VT_MANIFEST_FILES: flatbuffers::VOffsetT = 16; - pub const VT_ATTRIBUTE_FILES: flatbuffers::VOffsetT = 18; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -2490,9 +2252,6 @@ pub mod gen { ) -> flatbuffers::WIPOffset> { let mut builder = SnapshotBuilder::new(_fbb); builder.add_flushed_at(args.flushed_at); - if let Some(x) = args.attribute_files { - builder.add_attribute_files(x); - } if let Some(x) = args.manifest_files { builder.add_manifest_files(x); } @@ -2597,19 +2356,6 @@ pub mod gen { .unwrap() } } - #[inline] - pub fn attribute_files(&self) -> flatbuffers::Vector<'a, AttributeFileInfo> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::, - >>(Snapshot::VT_ATTRIBUTE_FILES, None) - .unwrap() - } - } } impl flatbuffers::Verifiable for Snapshot<'_> { @@ -2627,7 +2373,6 @@ pub mod gen { .visit_field::>("message", Self::VT_MESSAGE, true)? .visit_field::>>>("metadata", Self::VT_METADATA, true)? .visit_field::>>("manifest_files", Self::VT_MANIFEST_FILES, true)? - .visit_field::>>("attribute_files", Self::VT_ATTRIBUTE_FILES, true)? .finish(); Ok(()) } @@ -2649,8 +2394,6 @@ pub mod gen { >, pub manifest_files: Option>>, - pub attribute_files: - Option>>, } impl<'a> Default for SnapshotArgs<'a> { #[inline] @@ -2660,10 +2403,9 @@ pub mod gen { parent_id: None, nodes: None, // required field flushed_at: 0, - message: None, // required field - metadata: None, // required field - manifest_files: None, // required field - attribute_files: None, // required field + message: None, // required field + metadata: None, // required field + manifest_files: None, // required field } } } @@ -2727,18 +2469,6 @@ pub mod gen { ); } #[inline] - pub fn add_attribute_files( - &mut self, - attribute_files: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, AttributeFileInfo>, - >, - ) { - self.fbb_.push_slot_always::>( - Snapshot::VT_ATTRIBUTE_FILES, - attribute_files, - ); - } - #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> SnapshotBuilder<'a, 'b, A> { @@ -2753,7 +2483,6 @@ pub mod gen { self.fbb_.required(o, Snapshot::VT_MESSAGE, "message"); self.fbb_.required(o, Snapshot::VT_METADATA, "metadata"); self.fbb_.required(o, Snapshot::VT_MANIFEST_FILES, "manifest_files"); - self.fbb_.required(o, Snapshot::VT_ATTRIBUTE_FILES, "attribute_files"); flatbuffers::WIPOffset::new(o.value()) } } @@ -2768,7 +2497,6 @@ pub mod gen { ds.field("message", &self.message()); ds.field("metadata", &self.metadata()); ds.field("manifest_files", &self.manifest_files()); - ds.field("attribute_files", &self.attribute_files()); ds.finish() } } @@ -3063,8 +2791,8 @@ pub mod gen { pub const VT_NEW_ARRAYS: flatbuffers::VOffsetT = 8; pub const VT_DELETED_GROUPS: flatbuffers::VOffsetT = 10; pub const VT_DELETED_ARRAYS: flatbuffers::VOffsetT = 12; - pub const VT_UPDATED_USER_ATTRIBUTES: flatbuffers::VOffsetT = 14; - pub const VT_UPDATED_ZARR_METADATA: flatbuffers::VOffsetT = 16; + pub const VT_UPDATED_ARRAYS: flatbuffers::VOffsetT = 14; + pub const VT_UPDATED_GROUPS: flatbuffers::VOffsetT = 16; pub const VT_UPDATED_CHUNKS: flatbuffers::VOffsetT = 18; #[inline] @@ -3085,11 +2813,11 @@ pub mod gen { if let Some(x) = args.updated_chunks { builder.add_updated_chunks(x); } - if let Some(x) = args.updated_zarr_metadata { - builder.add_updated_zarr_metadata(x); + if let Some(x) = args.updated_groups { + builder.add_updated_groups(x); } - if let Some(x) = args.updated_user_attributes { - builder.add_updated_user_attributes(x); + if let Some(x) = args.updated_arrays { + builder.add_updated_arrays(x); } if let Some(x) = args.deleted_arrays { builder.add_deleted_arrays(x); @@ -3153,21 +2881,21 @@ pub mod gen { } } #[inline] - pub fn updated_user_attributes(&self) -> flatbuffers::Vector<'a, ObjectId8> { + pub fn updated_arrays(&self) -> flatbuffers::Vector<'a, ObjectId8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab.get::>>(TransactionLog::VT_UPDATED_USER_ATTRIBUTES, None).unwrap() + self._tab.get::>>(TransactionLog::VT_UPDATED_ARRAYS, None).unwrap() } } #[inline] - pub fn updated_zarr_metadata(&self) -> flatbuffers::Vector<'a, ObjectId8> { + pub fn updated_groups(&self) -> flatbuffers::Vector<'a, ObjectId8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab.get::>>(TransactionLog::VT_UPDATED_ZARR_METADATA, None).unwrap() + self._tab.get::>>(TransactionLog::VT_UPDATED_GROUPS, None).unwrap() } } #[inline] @@ -3204,8 +2932,8 @@ pub mod gen { .visit_field::>>("new_arrays", Self::VT_NEW_ARRAYS, true)? .visit_field::>>("deleted_groups", Self::VT_DELETED_GROUPS, true)? .visit_field::>>("deleted_arrays", Self::VT_DELETED_ARRAYS, true)? - .visit_field::>>("updated_user_attributes", Self::VT_UPDATED_USER_ATTRIBUTES, true)? - .visit_field::>>("updated_zarr_metadata", Self::VT_UPDATED_ZARR_METADATA, true)? + .visit_field::>>("updated_arrays", Self::VT_UPDATED_ARRAYS, true)? + .visit_field::>>("updated_groups", Self::VT_UPDATED_GROUPS, true)? .visit_field::>>>("updated_chunks", Self::VT_UPDATED_CHUNKS, true)? .finish(); Ok(()) @@ -3221,9 +2949,9 @@ pub mod gen { Option>>, pub deleted_arrays: Option>>, - pub updated_user_attributes: + pub updated_arrays: Option>>, - pub updated_zarr_metadata: + pub updated_groups: Option>>, pub updated_chunks: Option< flatbuffers::WIPOffset< @@ -3238,14 +2966,14 @@ pub mod gen { #[inline] fn default() -> Self { TransactionLogArgs { - id: None, // required field - new_groups: None, // required field - new_arrays: None, // required field - deleted_groups: None, // required field - deleted_arrays: None, // required field - updated_user_attributes: None, // required field - updated_zarr_metadata: None, // required field - updated_chunks: None, // required field + id: None, // required field + new_groups: None, // required field + new_arrays: None, // required field + deleted_groups: None, // required field + deleted_arrays: None, // required field + updated_arrays: None, // required field + updated_groups: None, // required field + updated_chunks: None, // required field } } } @@ -3300,27 +3028,23 @@ pub mod gen { ); } #[inline] - pub fn add_updated_user_attributes( + pub fn add_updated_arrays( &mut self, - updated_user_attributes: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, ObjectId8>, - >, + updated_arrays: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - TransactionLog::VT_UPDATED_USER_ATTRIBUTES, - updated_user_attributes, + TransactionLog::VT_UPDATED_ARRAYS, + updated_arrays, ); } #[inline] - pub fn add_updated_zarr_metadata( + pub fn add_updated_groups( &mut self, - updated_zarr_metadata: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, ObjectId8>, - >, + updated_groups: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - TransactionLog::VT_UPDATED_ZARR_METADATA, - updated_zarr_metadata, + TransactionLog::VT_UPDATED_GROUPS, + updated_groups, ); } #[inline] @@ -3353,16 +3077,8 @@ pub mod gen { self.fbb_.required(o, TransactionLog::VT_NEW_ARRAYS, "new_arrays"); self.fbb_.required(o, TransactionLog::VT_DELETED_GROUPS, "deleted_groups"); self.fbb_.required(o, TransactionLog::VT_DELETED_ARRAYS, "deleted_arrays"); - self.fbb_.required( - o, - TransactionLog::VT_UPDATED_USER_ATTRIBUTES, - "updated_user_attributes", - ); - self.fbb_.required( - o, - TransactionLog::VT_UPDATED_ZARR_METADATA, - "updated_zarr_metadata", - ); + self.fbb_.required(o, TransactionLog::VT_UPDATED_ARRAYS, "updated_arrays"); + self.fbb_.required(o, TransactionLog::VT_UPDATED_GROUPS, "updated_groups"); self.fbb_.required(o, TransactionLog::VT_UPDATED_CHUNKS, "updated_chunks"); flatbuffers::WIPOffset::new(o.value()) } @@ -3376,8 +3092,8 @@ pub mod gen { ds.field("new_arrays", &self.new_arrays()); ds.field("deleted_groups", &self.deleted_groups()); ds.field("deleted_arrays", &self.deleted_arrays()); - ds.field("updated_user_attributes", &self.updated_user_attributes()); - ds.field("updated_zarr_metadata", &self.updated_zarr_metadata()); + ds.field("updated_arrays", &self.updated_arrays()); + ds.field("updated_groups", &self.updated_groups()); ds.field("updated_chunks", &self.updated_chunks()); ds.finish() } diff --git a/icechunk/src/format/mod.rs b/icechunk/src/format/mod.rs index 202820e7..dcb2c877 100644 --- a/icechunk/src/format/mod.rs +++ b/icechunk/src/format/mod.rs @@ -19,7 +19,7 @@ use serde_with::{serde_as, TryFromInto}; use thiserror::Error; use typed_path::Utf8UnixPathBuf; -use crate::{error::ICError, metadata::DataType, private}; +use crate::{error::ICError, private}; pub mod attributes; pub mod manifest; @@ -241,11 +241,6 @@ pub type TableOffset = u32; pub enum IcechunkFormatErrorKind { #[error(transparent)] VirtualReferenceError(VirtualReferenceErrorKind), - - #[error("error decoding fill_value from array. Found size: {found_size}, target size: {target_size}, type: {target_type}")] - FillValueDecodeError { found_size: usize, target_size: usize, target_type: DataType }, - #[error("error decoding fill_value from json. Type: {data_type}, value: {value}")] - FillValueParse { data_type: DataType, value: serde_json::Value }, #[error("node not found at `{path:?}`")] NodeNotFound { path: Path }, #[error("chunk coordinates not found `{coords:?}`")] diff --git a/icechunk/src/format/snapshot.rs b/icechunk/src/format/snapshot.rs index 13aeaac6..c6ebcbfb 100644 --- a/icechunk/src/format/snapshot.rs +++ b/icechunk/src/format/snapshot.rs @@ -1,5 +1,6 @@ -use std::{collections::BTreeMap, convert::Infallible, sync::Arc}; +use std::{collections::BTreeMap, convert::Infallible, num::NonZeroU64, sync::Arc}; +use bytes::Bytes; use chrono::{DateTime, Utc}; use err_into::ErrorInto; use flatbuffers::{FlatBufferBuilder, VerifierOptions}; @@ -7,68 +8,44 @@ use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DataType, DimensionNames, FillValue, - StorageTransformer, UserAttributes, -}; - use super::{ flatbuffers::gen, manifest::{Manifest, ManifestExtents, ManifestRef}, AttributesId, ChunkIndices, IcechunkFormatError, IcechunkFormatErrorKind, - IcechunkResult, ManifestId, NodeId, Path, SnapshotId, TableOffset, + IcechunkResult, ManifestId, NodeId, Path, SnapshotId, }; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct UserAttributesRef { - pub object_id: AttributesId, - pub location: TableOffset, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum UserAttributesSnapshot { - Inline(UserAttributes), - Ref(UserAttributesRef), -} - -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum NodeType { - Group, - Array, +pub struct DimensionShape { + dim_length: u64, + chunk_length: u64, } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ZarrArrayMetadata { - pub shape: ArrayShape, - pub data_type: DataType, - pub chunk_shape: ChunkShape, - pub chunk_key_encoding: ChunkKeyEncoding, - pub fill_value: FillValue, - pub codecs: Vec, - pub storage_transformers: Option>, - pub dimension_names: Option, +impl DimensionShape { + pub fn new(array_length: u64, chunk_length: NonZeroU64) -> Self { + Self { dim_length: array_length, chunk_length: chunk_length.get() } + } + pub fn array_length(&self) -> u64 { + self.dim_length + } + pub fn chunk_length(&self) -> u64 { + self.chunk_length + } } -impl ZarrArrayMetadata { - /// Returns an iterator over the maximum permitted chunk indices for the array. - /// - /// This function calculates the maximum chunk indices based on the shape of the array - /// and the chunk shape, using (shape - 1) / chunk_shape. Given integer division is truncating, - /// this will always result in proper indices at the boundaries. - /// - /// # Returns - /// - /// A ChunkIndices type containing the max chunk index for each dimension. - fn max_chunk_indices_permitted(&self) -> ChunkIndices { - debug_assert_eq!(self.shape.len(), self.chunk_shape.0.len()); +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArrayShape(Vec); - ChunkIndices( - self.shape - .iter() - .zip(self.chunk_shape.0.iter()) - .map(|(s, cs)| if *s == 0 { 0 } else { ((s - 1) / cs.get()) as u32 }) - .collect(), - ) +impl ArrayShape { + pub fn new(it: I) -> Option + where + I: IntoIterator, + { + let v = it.into_iter().map(|(al, cl)| { + let cl = NonZeroU64::new(cl)?; + Some(DimensionShape::new(al, cl)) + }); + v.collect::>>().map(Self) } /// Validates the provided chunk coordinates for the array. @@ -87,27 +64,75 @@ impl ZarrArrayMetadata { /// /// Returns false if the chunk coordinates are invalid. pub fn valid_chunk_coord(&self, coord: &ChunkIndices) -> bool { - debug_assert_eq!(self.shape.len(), coord.0.len()); - coord .0 .iter() - .zip(self.max_chunk_indices_permitted().0) + .zip(self.max_chunk_indices_permitted()) .all(|(index, index_permitted)| *index <= index_permitted) } + + /// Returns an iterator over the maximum permitted chunk indices for the array. + /// + /// This function calculates the maximum chunk indices based on the shape of the array + /// and the chunk shape, using (shape - 1) / chunk_shape. Given integer division is truncating, + /// this will always result in proper indices at the boundaries. + fn max_chunk_indices_permitted(&self) -> impl Iterator + '_ { + self.0.iter().map(|dim_shape| { + if dim_shape.chunk_length == 0 || dim_shape.dim_length == 0 { + 0 + } else { + ((dim_shape.dim_length - 1) / dim_shape.chunk_length) as u32 + } + }) + } } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum DimensionName { + NotSpecified, + Name(String), +} + +impl From> for DimensionName { + fn from(value: Option<&str>) -> Self { + match value { + Some(s) => s.into(), + None => DimensionName::NotSpecified, + } + } +} + +impl From<&str> for DimensionName { + fn from(value: &str) -> Self { + if value.is_empty() { + DimensionName::NotSpecified + } else { + DimensionName::Name(value.to_string()) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum NodeData { - Array(ZarrArrayMetadata, Vec), + Array { + shape: ArrayShape, + dimension_names: Option>, + manifests: Vec, + }, + Group, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum NodeType { Group, + Array, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeSnapshot { pub id: NodeId, pub path: Path, - pub user_attributes: Option, + pub user_data: Bytes, pub node_data: NodeData, } @@ -115,7 +140,7 @@ impl NodeSnapshot { pub fn node_type(&self) -> NodeType { match &self.node_data { NodeData::Group => NodeType::Group, - NodeData::Array(_, _) => NodeType::Array, + NodeData::Array { .. } => NodeType::Array, } } } @@ -147,14 +172,23 @@ impl<'a> From> for ManifestRef { } } -impl<'a> TryFrom> for NodeData { - type Error = rmp_serde::decode::Error; +impl From<&gen::DimensionShape> for DimensionShape { + fn from(value: &gen::DimensionShape) -> Self { + DimensionShape { + dim_length: value.array_length(), + chunk_length: value.chunk_length(), + } + } +} - fn try_from(value: gen::ArrayNodeData<'a>) -> Result { - // TODO: is it ok to call `bytes` here? Or do we need to collect an iterator - let meta = rmp_serde::from_slice(value.zarr_metadata().bytes())?; - let manifest_refs = value.manifests().iter().map(|m| m.into()).collect(); - Ok(Self::Array(meta, manifest_refs)) +impl<'a> From> for NodeData { + fn from(value: gen::ArrayNodeData<'a>) -> Self { + let dimension_names = value + .dimension_names() + .map(|dn| dn.iter().map(|name| name.name().into()).collect()); + let shape = ArrayShape(value.shape().iter().map(|dim| dim.into()).collect()); + let manifests = value.manifests().iter().map(|m| m.into()).collect(); + Self::Array { shape, dimension_names, manifests } } } @@ -164,62 +198,25 @@ impl<'a> From> for NodeData { } } -impl<'a> TryFrom> for UserAttributesSnapshot { - type Error = rmp_serde::decode::Error; - - fn try_from(value: gen::InlineUserAttributes<'a>) -> Result { - let parsed = rmp_serde::from_slice(value.data().bytes())?; - Ok(Self::Inline(UserAttributes { parsed })) - } -} - -impl<'a> From> for UserAttributesSnapshot { - fn from(value: gen::UserAttributesRef<'a>) -> Self { - Self::Ref(UserAttributesRef { - object_id: value.object_id().into(), - location: value.location(), - }) - } -} - impl<'a> TryFrom> for NodeSnapshot { type Error = IcechunkFormatError; fn try_from(value: gen::NodeSnapshot<'a>) -> Result { #[allow(clippy::expect_used, clippy::panic)] let node_data: NodeData = match value.node_data_type() { - gen::NodeData::Array => value - .node_data_as_array() - .expect("Bug in flatbuffers library") - .try_into()?, + gen::NodeData::Array => { + value.node_data_as_array().expect("Bug in flatbuffers library").into() + } gen::NodeData::Group => { value.node_data_as_group().expect("Bug in flatbuffers library").into() } x => panic!("Invalid node data type in flatbuffers file {:?}", x), }; - #[allow(clippy::expect_used, clippy::panic)] - let user_attributes: Option = - match value.user_attributes_type() { - gen::UserAttributesSnapshot::Inline => Some( - value - .user_attributes_as_inline() - .expect("Bug in flatbuffers library") - .try_into()?, - ), - gen::UserAttributesSnapshot::Reference => Some( - value - .user_attributes_as_reference() - .expect("Bug in flatbuffers library") - .into(), - ), - gen::UserAttributesSnapshot::NONE => None, - x => panic!("Invalid user attributes type in flatbuffers file {:?}", x), - }; let res = NodeSnapshot { id: value.id().into(), path: value.path().to_string().try_into()?, - user_attributes, node_data, + user_data: Bytes::copy_from_slice(value.user_data().bytes()), }; Ok(res) } @@ -235,12 +232,6 @@ impl From<&gen::ManifestFileInfo> for ManifestFileInfo { } } -impl From<&gen::AttributeFileInfo> for AttributeFileInfo { - fn from(value: &gen::AttributeFileInfo) -> Self { - Self { id: value.id().into() } - } -} - pub type SnapshotProperties = BTreeMap; #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Eq, Hash)] @@ -260,11 +251,6 @@ impl ManifestFileInfo { } } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -pub struct AttributeFileInfo { - pub id: AttributesId, -} - #[derive(Debug, PartialEq)] pub struct Snapshot { buffer: Vec, @@ -327,7 +313,6 @@ impl Snapshot { message: String, properties: Option, mut manifest_files: Vec, - mut attribute_files: Vec, sorted_iter: I, ) -> IcechunkResult where @@ -347,16 +332,6 @@ impl Snapshot { .collect::>(); let manifest_files = builder.create_vector(&manifest_files); - attribute_files.sort_by(|a, b| a.id.cmp(&b.id)); - let attribute_files = attribute_files - .iter() - .map(|att| { - let id = gen::ObjectId12::new(&att.id.0); - gen::AttributeFileInfo::new(&id) - }) - .collect::>(); - let attribute_files = builder.create_vector(&attribute_files); - let metadata_items: Vec<_> = properties .unwrap_or_default() .iter() @@ -393,7 +368,6 @@ impl Snapshot { message: Some(message), metadata: Some(metadata_items), manifest_files: Some(manifest_files), - attribute_files: Some(attribute_files), }, ); @@ -413,7 +387,6 @@ impl Snapshot { Self::INITIAL_COMMIT_MESSAGE.to_string(), Some(properties), Default::default(), - Default::default(), nodes, ) } @@ -475,10 +448,6 @@ impl Snapshot { self.root().manifest_files().iter().map(|mf| mf.into()) } - pub fn attribute_files(&self) -> impl Iterator + '_ { - self.root().attribute_files().iter().map(|f| f.into()) - } - /// Cretase a new `Snapshot` with all the same data as `new_child` but `self` as parent pub fn adopt(&self, new_child: &Snapshot) -> IcechunkResult { // Rust flatbuffers implementation doesn't allow mutation of scalars, so we need to @@ -490,7 +459,6 @@ impl Snapshot { new_child.message().clone(), Some(new_child.metadata()?.clone()), new_child.manifest_files().collect(), - new_child.attribute_files().collect(), new_child.iter(), ) } @@ -560,54 +528,20 @@ fn mk_node<'bldr>( ) -> IcechunkResult>> { let id = gen::ObjectId8::new(&node.id.0); let path = builder.create_string(node.path.to_string().as_str()); - let (user_attributes_type, user_attributes) = - mk_user_attributes(builder, node.user_attributes.as_ref())?; let (node_data_type, node_data) = mk_node_data(builder, &node.node_data)?; + let user_data = Some(builder.create_vector(&node.user_data)); Ok(gen::NodeSnapshot::create( builder, &gen::NodeSnapshotArgs { id: Some(&id), path: Some(path), - user_attributes_type, - user_attributes, node_data_type, node_data, + user_data, }, )) } -fn mk_user_attributes( - builder: &mut flatbuffers::FlatBufferBuilder<'_>, - atts: Option<&UserAttributesSnapshot>, -) -> IcechunkResult<( - gen::UserAttributesSnapshot, - Option>, -)> { - match atts { - Some(UserAttributesSnapshot::Inline(user_attributes)) => { - let data = builder - .create_vector(rmp_serde::to_vec(&user_attributes.parsed)?.as_slice()); - let inl = gen::InlineUserAttributes::create( - builder, - &gen::InlineUserAttributesArgs { data: Some(data) }, - ); - Ok((gen::UserAttributesSnapshot::Inline, Some(inl.as_union_value()))) - } - Some(UserAttributesSnapshot::Ref(uatts)) => { - let id = gen::ObjectId12::new(&uatts.object_id.0); - let reference = gen::UserAttributesRef::create( - builder, - &gen::UserAttributesRefArgs { - object_id: Some(&id), - location: uatts.location, - }, - ); - Ok((gen::UserAttributesSnapshot::Reference, Some(reference.as_union_value()))) - } - None => Ok((gen::UserAttributesSnapshot::NONE, None)), - } -} - fn mk_node_data( builder: &mut FlatBufferBuilder<'_>, node_data: &NodeData, @@ -616,9 +550,7 @@ fn mk_node_data( Option>, )> { match node_data { - NodeData::Array(zarr, manifests) => { - let zarr_metadata = - Some(builder.create_vector(rmp_serde::to_vec(zarr)?.as_slice())); + NodeData::Array { manifests, dimension_names, shape } => { let manifests = manifests .iter() .map(|manref| { @@ -639,14 +571,40 @@ fn mk_node_data( }) .collect::>(); let manifests = builder.create_vector(manifests.as_slice()); + let dimensions = dimension_names.as_ref().map(|dn| { + let names = dn + .iter() + .map(|n| match n { + DimensionName::Name(s) => { + let n = builder.create_shared_string(s.as_str()); + gen::DimensionName::create( + builder, + &gen::DimensionNameArgs { name: Some(n) }, + ) + } + DimensionName::NotSpecified => gen::DimensionName::create( + builder, + &gen::DimensionNameArgs { name: None }, + ), + }) + .collect::>(); + builder.create_vector(names.as_slice()) + }); + let shape = shape + .0 + .iter() + .map(|ds| gen::DimensionShape::new(ds.dim_length, ds.chunk_length)) + .collect::>(); + let shape = builder.create_vector(shape.as_slice()); Ok(( gen::NodeData::Array, Some( gen::ArrayNodeData::create( builder, &gen::ArrayNodeDataArgs { - zarr_metadata, manifests: Some(manifests), + shape: Some(shape), + dimension_names: dimensions, }, ) .as_union_value(), @@ -670,54 +628,23 @@ mod tests { use super::*; use pretty_assertions::assert_eq; - use std::{ - collections::HashMap, - iter::{self}, - num::NonZeroU64, - }; + use std::iter::{self}; #[test] fn test_get_node() -> Result<(), Box> { - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![10u64, 20, 30], - data_type: DataType::Float32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(3).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float32(0f32), - - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; - let zarr_meta2 = ZarrArrayMetadata { - storage_transformers: None, - data_type: DataType::Int32, - dimension_names: Some(vec![None, None, Some("t".to_string())]), - fill_value: FillValue::Int32(0i32), - ..zarr_meta1.clone() - }; - let zarr_meta3 = - ZarrArrayMetadata { dimension_names: None, ..zarr_meta2.clone() }; + let shape1 = ArrayShape::new(vec![(10u64, 3), (20, 2), (30, 1)]).unwrap(); + let dim_names1 = Some(vec!["x".into(), "y".into(), "t".into()]); + + let shape2 = shape1.clone(); + let dim_names2 = Some(vec![ + DimensionName::NotSpecified, + DimensionName::NotSpecified, + "t".into(), + ]); + + let shape3 = shape1.clone(); + let dim_names3 = None; + let man_ref1 = ManifestRef { object_id: ObjectId::random(), extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), @@ -727,58 +654,61 @@ mod tests { extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), }; - let oid = ObjectId::random(); let node_ids = iter::repeat_with(NodeId::random).take(7).collect::>(); // nodes must be sorted by path let nodes = vec![ NodeSnapshot { path: Path::root(), id: node_ids[0].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/a".try_into().unwrap(), id: node_ids[1].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/array2".try_into().unwrap(), id: node_ids[5].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta2.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape2.clone(), + dimension_names: dim_names2.clone(), + manifests: vec![], + }, }, NodeSnapshot { path: "/b".try_into().unwrap(), id: node_ids[2].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/b/array1".try_into().unwrap(), id: node_ids[4].clone(), - user_attributes: Some(UserAttributesSnapshot::Ref(UserAttributesRef { - object_id: oid.clone(), - location: 42, - })), - node_data: NodeData::Array( - zarr_meta1.clone(), - vec![man_ref1.clone(), man_ref2.clone()], - ), + user_data: Bytes::copy_from_slice(b"hello"), + node_data: NodeData::Array { + shape: shape1.clone(), + dimension_names: dim_names1.clone(), + manifests: vec![man_ref1.clone(), man_ref2.clone()], + }, }, NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta3.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape3.clone(), + dimension_names: dim_names3.clone(), + manifests: vec![], + }, }, NodeSnapshot { path: "/b/c".try_into().unwrap(), id: node_ids[3].clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), - )), + user_data: Bytes::copy_from_slice(b"bye"), node_data: NodeData::Group, }, ]; @@ -801,7 +731,6 @@ mod tests { String::default(), Default::default(), manifests, - vec![], nodes.into_iter().map(Ok::), ) .unwrap(); @@ -822,9 +751,7 @@ mod tests { NodeSnapshot { path: "/b/c".try_into().unwrap(), id: node_ids[3].clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), - )), + user_data: Bytes::copy_from_slice(b"bye"), node_data: NodeData::Group, }, ); @@ -834,7 +761,7 @@ mod tests { NodeSnapshot { path: Path::root(), id: node_ids[0].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, ); @@ -844,11 +771,12 @@ mod tests { NodeSnapshot { path: "/b/array1".try_into().unwrap(), id: node_ids[4].clone(), - user_attributes: Some(UserAttributesSnapshot::Ref(UserAttributesRef { - object_id: oid, - location: 42, - })), - node_data: NodeData::Array(zarr_meta1.clone(), vec![man_ref1, man_ref2]), + user_data: Bytes::copy_from_slice(b"hello"), + node_data: NodeData::Array { + shape: shape1.clone(), + dimension_names: dim_names1.clone(), + manifests: vec![man_ref1, man_ref2] + }, }, ); let node = st.get_node(&"/array2".try_into().unwrap()).unwrap(); @@ -857,8 +785,12 @@ mod tests { NodeSnapshot { path: "/array2".try_into().unwrap(), id: node_ids[5].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta2.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape2.clone(), + dimension_names: dim_names2.clone(), + manifests: vec![] + }, }, ); let node = st.get_node(&"/b/array3".try_into().unwrap()).unwrap(); @@ -867,8 +799,12 @@ mod tests { NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta3.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape3.clone(), + dimension_names: dim_names3.clone(), + manifests: vec![] + }, }, ); Ok(()) @@ -876,45 +812,17 @@ mod tests { #[test] fn test_valid_chunk_coord() { - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![10000, 10001, 9999], - data_type: DataType::Float32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float32(0f32), - - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: None, - dimension_names: None, - }; - - let zarr_meta2 = ZarrArrayMetadata { - shape: vec![0, 0, 0], - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - ]), - ..zarr_meta1.clone() - }; - + let shape1 = + ArrayShape::new(vec![(10_000, 1_000), (10_001, 1_000), (9_999, 1_000)]) + .unwrap(); + let shape2 = ArrayShape::new(vec![(0, 1_000), (0, 1_000), (0, 1_000)]).unwrap(); let coord1 = ChunkIndices(vec![9, 10, 9]); let coord2 = ChunkIndices(vec![10, 11, 10]); let coord3 = ChunkIndices(vec![0, 0, 0]); - assert!(zarr_meta1.valid_chunk_coord(&coord1)); - assert!(!zarr_meta1.valid_chunk_coord(&coord2)); + assert!(shape1.valid_chunk_coord(&coord1)); + assert!(!shape1.valid_chunk_coord(&coord2)); - assert!(zarr_meta2.valid_chunk_coord(&coord3)); + assert!(shape2.valid_chunk_coord(&coord3)); } } diff --git a/icechunk/src/format/transaction_log.rs b/icechunk/src/format/transaction_log.rs index 9c995d46..eb2b4540 100644 --- a/icechunk/src/format/transaction_log.rs +++ b/icechunk/src/format/transaction_log.rs @@ -30,12 +30,10 @@ impl TransactionLog { let mut deleted_arrays: Vec<_> = cs.deleted_arrays().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); - let mut updated_user_attributes: Vec<_> = cs - .user_attributes_updated_nodes() - .map(|id| gen::ObjectId8::new(&id.0)) - .collect(); - let mut updated_zarr_metadata: Vec<_> = - cs.zarr_updated_arrays().map(|id| gen::ObjectId8::new(&id.0)).collect(); + let mut updated_arrays: Vec<_> = + cs.updated_arrays().map(|id| gen::ObjectId8::new(&id.0)).collect(); + let mut updated_groups: Vec<_> = + cs.updated_groups().map(|id| gen::ObjectId8::new(&id.0)).collect(); // TODO: what's a good capacity? let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1_024 * 1_024); @@ -70,17 +68,15 @@ impl TransactionLog { new_arrays.sort_by(|a, b| a.0.cmp(&b.0)); deleted_groups.sort_by(|a, b| a.0.cmp(&b.0)); deleted_arrays.sort_by(|a, b| a.0.cmp(&b.0)); - updated_user_attributes.sort_by(|a, b| a.0.cmp(&b.0)); - updated_zarr_metadata.sort_by(|a, b| a.0.cmp(&b.0)); + updated_groups.sort_by(|a, b| a.0.cmp(&b.0)); + updated_arrays.sort_by(|a, b| a.0.cmp(&b.0)); let new_groups = Some(builder.create_vector(new_groups.as_slice())); let new_arrays = Some(builder.create_vector(new_arrays.as_slice())); let deleted_groups = Some(builder.create_vector(deleted_groups.as_slice())); let deleted_arrays = Some(builder.create_vector(deleted_arrays.as_slice())); - let updated_user_attributes = - Some(builder.create_vector(updated_user_attributes.as_slice())); - let updated_zarr_metadata = - Some(builder.create_vector(updated_zarr_metadata.as_slice())); + let updated_groups = Some(builder.create_vector(updated_groups.as_slice())); + let updated_arrays = Some(builder.create_vector(updated_arrays.as_slice())); let id = ObjectId12::new(&id.0); let id = Some(&id); @@ -92,8 +88,8 @@ impl TransactionLog { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, }, ); @@ -129,12 +125,12 @@ impl TransactionLog { self.root().deleted_arrays().iter().map(From::from) } - pub fn updated_user_attributes(&self) -> impl Iterator + '_ { - self.root().updated_user_attributes().iter().map(From::from) + pub fn updated_groups(&self) -> impl Iterator + '_ { + self.root().updated_groups().iter().map(From::from) } - pub fn updated_zarr_metadata(&self) -> impl Iterator + '_ { - self.root().updated_zarr_metadata().iter().map(From::from) + pub fn updated_arrays(&self) -> impl Iterator + '_ { + self.root().updated_arrays().iter().map(From::from) } pub fn updated_chunks( @@ -179,11 +175,12 @@ impl TransactionLog { self.root().deleted_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() } - pub fn user_attributes_updated(&self, id: &NodeId) -> bool { - self.root() - .updated_user_attributes() - .lookup_by_key(id.0, |a, b| a.0.cmp(b)) - .is_some() + pub fn group_updated(&self, id: &NodeId) -> bool { + self.root().updated_groups().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn array_updated(&self, id: &NodeId) -> bool { + self.root().updated_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() } pub fn chunks_updated(&self, id: &NodeId) -> bool { @@ -193,13 +190,6 @@ impl TransactionLog { .is_some() } - pub fn zarr_metadata_updated(&self, id: &NodeId) -> bool { - self.root() - .updated_zarr_metadata() - .lookup_by_key(id.0, |a, b| a.0.cmp(b)) - .is_some() - } - fn root(&self) -> gen::TransactionLog { // without the unsafe version this is too slow // if we try to keep the root in the TransactionLog struct, we would need a lifetime @@ -216,8 +206,8 @@ impl TransactionLog { + root.new_arrays().len() + root.deleted_groups().len() + root.deleted_arrays().len() - + root.updated_user_attributes().len() - + root.updated_zarr_metadata().len() + + root.updated_groups().len() + + root.updated_arrays().len() + root.updated_chunks().iter().map(|s| s.chunks().len()).sum::() } @@ -240,8 +230,8 @@ pub struct DiffBuilder { new_arrays: HashSet, deleted_groups: HashSet, deleted_arrays: HashSet, - updated_user_attributes: HashSet, - updated_zarr_metadata: HashSet, + updated_groups: HashSet, + updated_arrays: HashSet, // we use sorted set here to simply move it to a diff without having to rebuild updated_chunks: HashMap>, } @@ -252,8 +242,8 @@ impl DiffBuilder { self.new_arrays.extend(tx.new_arrays()); self.deleted_groups.extend(tx.deleted_groups()); self.deleted_arrays.extend(tx.deleted_arrays()); - self.updated_user_attributes.extend(tx.updated_user_attributes()); - self.updated_zarr_metadata.extend(tx.updated_zarr_metadata()); + self.updated_groups.extend(tx.updated_groups()); + self.updated_arrays.extend(tx.updated_arrays()); for (node, chunks) in tx.updated_chunks() { match self.updated_chunks.get_mut(&node) { @@ -284,8 +274,8 @@ pub struct Diff { pub new_arrays: BTreeSet, pub deleted_groups: BTreeSet, pub deleted_arrays: BTreeSet, - pub updated_user_attributes: BTreeSet, - pub updated_zarr_metadata: BTreeSet, + pub updated_groups: BTreeSet, + pub updated_arrays: BTreeSet, pub updated_chunks: BTreeMap>, } @@ -315,14 +305,14 @@ impl Diff { .flat_map(|node_id| nodes.get(node_id)) .cloned() .collect(); - let updated_user_attributes = tx - .updated_user_attributes + let updated_groups = tx + .updated_groups .iter() .flat_map(|node_id| nodes.get(node_id)) .cloned() .collect(); - let updated_zarr_metadata = tx - .updated_zarr_metadata + let updated_arrays = tx + .updated_arrays .iter() .flat_map(|node_id| nodes.get(node_id)) .cloned() @@ -340,8 +330,8 @@ impl Diff { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, } } @@ -352,8 +342,8 @@ impl Diff { && self.new_arrays.is_empty() && self.deleted_groups.is_empty() && self.deleted_arrays.is_empty() - && self.updated_user_attributes.is_empty() - && self.updated_user_attributes.is_empty() + && self.updated_groups.is_empty() + && self.updated_arrays.is_empty() && self.updated_chunks.is_empty() } } diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs index e8308cb0..beaf4d40 100644 --- a/icechunk/src/lib.rs +++ b/icechunk/src/lib.rs @@ -23,7 +23,6 @@ pub mod config; pub mod conflicts; pub mod error; pub mod format; -pub mod metadata; pub mod ops; pub mod refs; pub mod repository; diff --git a/icechunk/src/metadata/data_type.rs b/icechunk/src/metadata/data_type.rs deleted file mode 100644 index 4349c5f2..00000000 --- a/icechunk/src/metadata/data_type.rs +++ /dev/null @@ -1,99 +0,0 @@ -use std::fmt::Display; - -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[non_exhaustive] -#[serde(rename_all = "lowercase")] -pub enum DataType { - Bool, - Int8, - Int16, - Int32, - Int64, - UInt8, - UInt16, - UInt32, - UInt64, - Float16, - Float32, - Float64, - Complex64, - Complex128, - String, - Bytes, -} - -impl DataType { - pub fn fits_i64(&self, n: i64) -> bool { - use DataType::*; - match self { - Int8 => n >= i8::MIN as i64 && n <= i8::MAX as i64, - Int16 => n >= i16::MIN as i64 && n <= i16::MAX as i64, - Int32 => n >= i32::MIN as i64 && n <= i32::MAX as i64, - Int64 => true, - _ => false, - } - } - - pub fn fits_u64(&self, n: u64) -> bool { - use DataType::*; - match self { - UInt8 => n >= u8::MIN as u64 && n <= u8::MAX as u64, - UInt16 => n >= u16::MIN as u64 && n <= u16::MAX as u64, - UInt32 => n >= u32::MIN as u64 && n <= u32::MAX as u64, - UInt64 => true, - _ => false, - } - } -} - -impl TryFrom<&str> for DataType { - type Error = &'static str; - - fn try_from(value: &str) -> Result { - match value { - "bool" => Ok(DataType::Bool), - "int8" => Ok(DataType::Int8), - "int16" => Ok(DataType::Int16), - "int32" => Ok(DataType::Int32), - "int64" => Ok(DataType::Int64), - "uint8" => Ok(DataType::UInt8), - "uint16" => Ok(DataType::UInt16), - "uint32" => Ok(DataType::UInt32), - "uint64" => Ok(DataType::UInt64), - "float16" => Ok(DataType::Float16), - "float32" => Ok(DataType::Float32), - "float64" => Ok(DataType::Float64), - "complex64" => Ok(DataType::Complex64), - "complex128" => Ok(DataType::Complex128), - "string" => Ok(DataType::String), - "bytes" => Ok(DataType::Bytes), - _ => Err("Unknown data type, cannot parse"), - } - } -} - -impl Display for DataType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DataType::*; - match self { - Bool => f.write_str("bool"), - Int8 => f.write_str("int8"), - Int16 => f.write_str("int16"), - Int32 => f.write_str("int32"), - Int64 => f.write_str("int64"), - UInt8 => f.write_str("uint8"), - UInt16 => f.write_str("uint16"), - UInt32 => f.write_str("uint32"), - UInt64 => f.write_str("uint64"), - Float16 => f.write_str("float16"), - Float32 => f.write_str("float32"), - Float64 => f.write_str("float64"), - Complex64 => f.write_str("complex64"), - Complex128 => f.write_str("complex128"), - String => f.write_str("string"), - Bytes => f.write_str("bytes"), - } - } -} diff --git a/icechunk/src/metadata/fill_value.rs b/icechunk/src/metadata/fill_value.rs deleted file mode 100644 index e107d753..00000000 --- a/icechunk/src/metadata/fill_value.rs +++ /dev/null @@ -1,284 +0,0 @@ -use itertools::Itertools; -use serde::{Deserialize, Serialize}; -use test_strategy::Arbitrary; - -use crate::format::{IcechunkFormatError, IcechunkFormatErrorKind}; - -use super::DataType; - -#[derive(Arbitrary, Clone, Debug, PartialEq, Serialize, Deserialize)] -pub enum FillValue { - // FIXME: test all json (de)serializations - Bool(bool), - Int8(i8), - Int16(i16), - Int32(i32), - Int64(i64), - UInt8(u8), - UInt16(u16), - UInt32(u32), - UInt64(u64), - Float16(f32), - Float32(f32), - Float64(f64), - Complex64(f32, f32), - Complex128(f64, f64), - String(String), - Bytes(Vec), -} - -impl FillValue { - pub const NAN_STR: &'static str = "NaN"; - pub const INF_STR: &'static str = "Infinity"; - pub const NEG_INF_STR: &'static str = "-Infinity"; - - pub fn from_data_type_and_json( - dt: &DataType, - value: &serde_json::Value, - ) -> Result { - #![allow(clippy::expect_used)] // before calling `as_foo` we check with `fits_foo` - match (dt, value) { - (DataType::Bool, serde_json::Value::Bool(b)) => Ok(FillValue::Bool(*b)), - (DataType::Int8, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int8( - n.as_i64().expect("bug in from_data_type_and_json") as i8 - )) - } - (DataType::Int16, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int16( - n.as_i64().expect("bug in from_data_type_and_json") as i16 - )) - } - (DataType::Int32, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int32( - n.as_i64().expect("bug in from_data_type_and_json") as i32 - )) - } - (DataType::Int64, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int64(n.as_i64().expect("bug in from_data_type_and_json"))) - } - (DataType::UInt8, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt8( - n.as_u64().expect("bug in from_data_type_and_json") as u8 - )) - } - (DataType::UInt16, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt16( - n.as_u64().expect("bug in from_data_type_and_json") as u16 - )) - } - (DataType::UInt32, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt32( - n.as_u64().expect("bug in from_data_type_and_json") as u32 - )) - } - (DataType::UInt64, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt64(n.as_u64().expect("bug in from_data_type_and_json"))) - } - (DataType::Float16, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float16( - n.as_f64().expect("bug in from_data_type_and_json") as f32, - )) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float16(f32::NAN)) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float16(f32::INFINITY)) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float16(f32::NEG_INFINITY)) - } - - (DataType::Float32, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float32( - n.as_f64().expect("bug in from_data_type_and_json") as f32, - )) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float32(f32::NAN)) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float32(f32::INFINITY)) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float32(f32::NEG_INFINITY)) - } - - (DataType::Float64, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float64( - n.as_f64().expect("bug in from_data_type_and_json"), - )) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float64(f64::NAN)) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float64(f64::INFINITY)) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float64(f64::NEG_INFINITY)) - } - (DataType::Complex64, serde_json::Value::Array(arr)) if arr.len() == 2 => { - let r = FillValue::from_data_type_and_json(&DataType::Float32, &arr[0])?; - let i = FillValue::from_data_type_and_json(&DataType::Float32, &arr[1])?; - match (r, i) { - (FillValue::Float32(r), FillValue::Float32(i)) => { - Ok(FillValue::Complex64(r, i)) - } - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - (DataType::Complex128, serde_json::Value::Array(arr)) if arr.len() == 2 => { - let r = FillValue::from_data_type_and_json(&DataType::Float64, &arr[0])?; - let i = FillValue::from_data_type_and_json(&DataType::Float64, &arr[1])?; - match (r, i) { - (FillValue::Float64(r), FillValue::Float64(i)) => { - Ok(FillValue::Complex128(r, i)) - } - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - - (DataType::String, serde_json::Value::String(s)) => { - Ok(FillValue::String(s.clone())) - } - - (DataType::Bytes, serde_json::Value::Array(arr)) => { - let bytes = arr - .iter() - .map(|b| FillValue::from_data_type_and_json(&DataType::UInt8, b)) - .collect::, _>>()?; - Ok(FillValue::Bytes( - bytes - .iter() - .map(|b| match b { - FillValue::UInt8(n) => Ok::<_, IcechunkFormatError>(*n), - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - }) - .try_collect()?, - )) - } - - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - - pub fn get_data_type(&self) -> DataType { - match self { - FillValue::Bool(_) => DataType::Bool, - FillValue::Int8(_) => DataType::Int8, - FillValue::Int16(_) => DataType::Int16, - FillValue::Int32(_) => DataType::Int32, - FillValue::Int64(_) => DataType::Int64, - FillValue::UInt8(_) => DataType::UInt8, - FillValue::UInt16(_) => DataType::UInt16, - FillValue::UInt32(_) => DataType::UInt32, - FillValue::UInt64(_) => DataType::UInt64, - FillValue::Float16(_) => DataType::Float16, - FillValue::Float32(_) => DataType::Float32, - FillValue::Float64(_) => DataType::Float64, - FillValue::Complex64(_, _) => DataType::Complex64, - FillValue::Complex128(_, _) => DataType::Complex128, - FillValue::String(_) => DataType::String, - FillValue::Bytes(_) => DataType::Bytes, - } - } -} - -#[cfg(test)] -#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] -mod tests { - use super::*; - - #[test] - fn test_nan_inf_parsing() { - assert_eq!( - FillValue::from_data_type_and_json(&DataType::Float64, &55f64.into()) - .unwrap(), - FillValue::Float64(55.0) - ); - - assert_eq!( - FillValue::from_data_type_and_json(&DataType::Float64, &55.into()).unwrap(), - FillValue::Float64(55.0) - ); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"NaN".into() - ) - .unwrap(), - FillValue::Float64(n) if n.is_nan() - )); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"Infinity".into() - ) - .unwrap(), - FillValue::Float64(n) if n == f64::INFINITY - )); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"-Infinity".into() - ) - .unwrap(), - FillValue::Float64(n) if n == f64::NEG_INFINITY - )); - } -} diff --git a/icechunk/src/metadata/mod.rs b/icechunk/src/metadata/mod.rs deleted file mode 100644 index 4735b527..00000000 --- a/icechunk/src/metadata/mod.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::{collections::HashMap, num::NonZeroU64}; - -use bytes::Bytes; -use serde::{Deserialize, Serialize}; -use test_strategy::Arbitrary; - -pub mod data_type; -pub mod fill_value; - -pub use data_type::DataType; -pub use fill_value::FillValue; - -/// The shape of an array. -/// 0 is a valid shape member -pub type ArrayShape = Vec; -// each dimension name can be null in Zarr -pub type DimensionName = Option; -pub type DimensionNames = Vec; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct Codec { - pub name: String, - pub configuration: Option>, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct StorageTransformer { - pub name: String, - pub configuration: Option>, -} - -#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] -pub struct ChunkShape(pub Vec); - -#[derive(Arbitrary, Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] -pub enum ChunkKeyEncoding { - Slash, - Dot, - Default, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct UserAttributes { - #[serde(flatten)] - pub parsed: serde_json::Value, -} - -impl UserAttributes { - pub fn try_new(json: &[u8]) -> Result { - serde_json::from_slice(json).map(|json| UserAttributes { parsed: json }) - } - - pub fn to_bytes(&self) -> Bytes { - // We can unwrap because a Value is always valid json - #[allow(clippy::expect_used)] - serde_json::to_vec(&self.parsed) - .expect("Bug in UserAttributes serialization") - .into() - } -} - -impl TryFrom for ChunkKeyEncoding { - type Error = &'static str; - - fn try_from(value: u8) -> Result { - match value { - b'/' => Ok(ChunkKeyEncoding::Slash), - b'.' => Ok(ChunkKeyEncoding::Dot), - b'x' => Ok(ChunkKeyEncoding::Default), - _ => Err("Invalid chunk key encoding character"), - } - } -} - -impl From for u8 { - fn from(value: ChunkKeyEncoding) -> Self { - match value { - ChunkKeyEncoding::Slash => b'/', - ChunkKeyEncoding::Dot => b'.', - ChunkKeyEncoding::Default => b'x', - } - } -} diff --git a/icechunk/src/repository.rs b/icechunk/src/repository.rs index bd4f3657..86b35e9c 100644 --- a/icechunk/src/repository.rs +++ b/icechunk/src/repository.rs @@ -710,7 +710,7 @@ impl Repository { } Ok(node) => match node.node_data { NodeData::Group => {} - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { for manifest in manifests { if !loaded_manifests.contains(&manifest.object_id) { let manifest_id = manifest.object_id; @@ -823,9 +823,7 @@ pub async fn raise_if_invalid_snapshot_id( #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { - use std::{ - collections::HashMap, error::Error, num::NonZeroU64, path::PathBuf, sync::Arc, - }; + use std::{collections::HashMap, error::Error, path::PathBuf, sync::Arc}; use storage::logging::LoggingStorage; use tempfile::TempDir; @@ -834,10 +832,7 @@ mod tests { config::{ CachingConfig, ManifestConfig, ManifestPreloadConfig, RepositoryConfig, }, - format::{manifest::ChunkPayload, snapshot::ZarrArrayMetadata, ChunkIndices}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, + format::{manifest::ChunkPayload, snapshot::ArrayShape, ChunkIndices}, new_local_filesystem_storage, storage::new_in_memory_storage, Repository, Storage, @@ -1030,34 +1025,48 @@ mod tests { let mut session = repository.writable_session("main").await?; - session.add_group(Path::root()).await?; - - let zarr_meta = ZarrArrayMetadata { - shape: vec![1_000, 1, 1], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let def = Bytes::from_static(br#"{"this":"array"}"#); + session.add_group(Path::root(), def.clone()).await?; + + let shape = ArrayShape::new(vec![(1_000, 1), (1, 1), (1, 1)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let time_path: Path = "/time".try_into().unwrap(); let temp_path: Path = "/temperature".try_into().unwrap(); let lat_path: Path = "/latitude".try_into().unwrap(); let lon_path: Path = "/longitude".try_into().unwrap(); - session.add_array(time_path.clone(), zarr_meta.clone()).await?; - session.add_array(temp_path.clone(), zarr_meta.clone()).await?; - session.add_array(lat_path.clone(), zarr_meta.clone()).await?; - session.add_array(lon_path.clone(), zarr_meta.clone()).await?; + session + .add_array( + time_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + temp_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + lat_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + lon_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; session .set_chunk_ref( @@ -1139,11 +1148,11 @@ mod tests { let ops = logging.fetch_operations(); let lat_manifest_id = match session.get_node(&lat_path).await?.node_data { - NodeData::Array(_, vec) => vec[0].object_id.to_string(), + NodeData::Array { manifests, .. } => manifests[0].object_id.to_string(), NodeData::Group => panic!(), }; let lon_manifest_id = match session.get_node(&lon_path).await?.node_data { - NodeData::Array(_, vec) => vec[0].object_id.to_string(), + NodeData::Array { manifests, .. } => manifests[0].object_id.to_string(), NodeData::Group => panic!(), }; assert_eq!(ops[0].0, "fetch_snapshot"); diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index f610bc29..88cba53a 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -21,7 +21,7 @@ use tracing::{debug, info, instrument, trace, warn, Instrument}; use crate::{ asset_manager::AssetManager, - change_set::ChangeSet, + change_set::{ArrayData, ChangeSet}, conflicts::{Conflict, ConflictResolution, ConflictSolver}, error::ICError, format::{ @@ -31,14 +31,13 @@ use crate::{ VirtualReferenceErrorKind, }, snapshot::{ - ManifestFileInfo, NodeData, NodeSnapshot, NodeType, Snapshot, - SnapshotProperties, UserAttributesSnapshot, ZarrArrayMetadata, + ArrayShape, DimensionName, ManifestFileInfo, NodeData, NodeSnapshot, + NodeType, Snapshot, SnapshotProperties, }, transaction_log::{Diff, DiffBuilder, TransactionLog}, ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, IcechunkFormatErrorKind, ManifestId, NodeId, ObjectId, Path, SnapshotId, }, - metadata::UserAttributes, refs::{fetch_branch_tip, update_branch, RefError, RefErrorKind}, repository::{RepositoryError, RepositoryErrorKind}, storage::{self, StorageErrorKind}, @@ -272,12 +271,16 @@ impl Session { /// Add a group to the store. /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self))] - pub async fn add_group(&mut self, path: Path) -> SessionResult<()> { + #[instrument(skip(self, definition))] + pub async fn add_group( + &mut self, + path: Path, + definition: Bytes, + ) -> SessionResult<()> { match self.get_node(&path).await { Err(SessionError { kind: SessionErrorKind::NodeNotFound { .. }, .. }) => { let id = NodeId::random(); - self.change_set.add_group(path.clone(), id); + self.change_set.add_group(path.clone(), id, definition); Ok(()) } Ok(node) => Err(SessionErrorKind::AlreadyExists { @@ -295,9 +298,9 @@ impl Session { NodeSnapshot { node_data: NodeData::Group, path: node_path, .. } => { Ok(self.delete_group(node_path).await?) } - NodeSnapshot { node_data: NodeData::Array(..), path: node_path, .. } => { - Ok(self.delete_array(node_path).await?) - } + NodeSnapshot { + node_data: NodeData::Array { .. }, path: node_path, .. + } => Ok(self.delete_array(node_path).await?), } } /// Delete a group in the hierarchy @@ -332,16 +335,22 @@ impl Session { /// Add an array to the store. /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self, metadata))] + #[instrument(skip(self, user_data))] pub async fn add_array( &mut self, path: Path, - metadata: ZarrArrayMetadata, + shape: ArrayShape, + dimension_names: Option>, + user_data: Bytes, ) -> SessionResult<()> { match self.get_node(&path).await { Err(SessionError { kind: SessionErrorKind::NodeNotFound { .. }, .. }) => { let id = NodeId::random(); - self.change_set.add_array(path, id, metadata); + self.change_set.add_array( + path, + id, + ArrayData { shape, dimension_names, user_data }, + ); Ok(()) } Ok(node) => Err(SessionErrorKind::AlreadyExists { @@ -356,15 +365,35 @@ impl Session { // Updates an array Zarr metadata /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self, metadata))] + #[instrument(skip(self, user_data))] pub async fn update_array( &mut self, - path: Path, - metadata: ZarrArrayMetadata, + path: &Path, + shape: ArrayShape, + dimension_names: Option>, + user_data: Bytes, + ) -> SessionResult<()> { + self.get_array(path).await.map(|node| { + self.change_set.update_array( + &node.id, + path, + ArrayData { shape, dimension_names, user_data }, + ) + }) + } + + // Updates an group Zarr metadata + /// + /// Calling this only records the operation in memory, doesn't have any consequence on the storage + #[instrument(skip(self, definition))] + pub async fn update_group( + &mut self, + path: &Path, + definition: Bytes, ) -> SessionResult<()> { - self.get_array(&path) + self.get_group(path) .await - .map(|node| self.change_set.update_array(node.id, metadata)) + .map(|node| self.change_set.update_group(&node.id, path, definition)) } /// Delete an array in the hierarchy @@ -395,18 +424,6 @@ impl Session { Ok(()) } - /// Record the write or delete of user attributes to array or group - #[instrument(skip(self, atts))] - pub async fn set_user_attributes( - &mut self, - path: Path, - atts: Option, - ) -> SessionResult<()> { - let node = self.get_node(&path).await?; - self.change_set.update_user_attributes(node.id, atts); - Ok(()) - } - // Record the write, referencing or delete of a chunk // // Caller has to write the chunk before calling this. @@ -430,8 +447,8 @@ impl Session { coord: ChunkIndices, data: Option, ) -> SessionResult<()> { - if let NodeData::Array(zarr_metadata, _) = node.node_data { - if zarr_metadata.valid_chunk_coord(&coord) { + if let NodeData::Array { shape, .. } = node.node_data { + if shape.valid_chunk_coord(&coord) { self.change_set.set_chunk_ref(node.id, coord, data); Ok(()) } else { @@ -474,7 +491,7 @@ impl Session { pub async fn get_array(&self, path: &Path) -> SessionResult { match self.get_node(path).await { - res @ Ok(NodeSnapshot { node_data: NodeData::Array(..), .. }) => res, + res @ Ok(NodeSnapshot { node_data: NodeData::Array { .. }, .. }) => res, Ok(node @ NodeSnapshot { .. }) => Err(SessionErrorKind::NotAnArray { node, message: "getting an array".to_string(), @@ -525,7 +542,7 @@ impl Session { message: "getting chunk reference".to_string(), } .into()), - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { // check the chunks modified in this session first // TODO: I hate rust forces me to clone to search in a hashmap. How to do better? let session_chunk = @@ -1050,7 +1067,7 @@ async fn verified_node_chunk_iterator<'a>( ) -> impl Stream> + 'a { match node.node_data { NodeData::Group => futures::future::Either::Left(futures::stream::empty()), - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { let new_chunk_indices: Box> = Box::new( change_set .array_chunks_iterator(&node.id, &node.path) @@ -1244,25 +1261,31 @@ async fn get_existing_node( .into(), err => SessionError::from(err), })?; - let session_atts = change_set - .get_user_attributes(&node.id) - .cloned() - .map(|a| a.map(UserAttributesSnapshot::Inline)); - let res = NodeSnapshot { - user_attributes: session_atts.unwrap_or_else(|| node.user_attributes.clone()), - ..node.clone() - }; - if let Some(session_meta) = change_set.get_updated_zarr_metadata(&node.id).cloned() { - if let NodeData::Array(_, manifests) = res.node_data { - Ok(NodeSnapshot { - node_data: NodeData::Array(session_meta, manifests), - ..res - }) - } else { - Ok(res) + + match node.node_data { + NodeData::Array { ref manifests, .. } => { + if let Some(new_data) = change_set.get_updated_array(&node.id) { + let node_data = NodeData::Array { + shape: new_data.shape.clone(), + dimension_names: new_data.dimension_names.clone(), + manifests: manifests.clone(), + }; + Ok(NodeSnapshot { + user_data: new_data.user_data.clone(), + node_data, + ..node + }) + } else { + Ok(node) + } + } + NodeData::Group => { + if let Some(updated_definition) = change_set.get_updated_group(&node.id) { + Ok(NodeSnapshot { user_data: updated_definition.clone(), ..node }) + } else { + Ok(node) + } } - } else { - Ok(res) } } @@ -1428,7 +1451,7 @@ impl<'a> FlushProcess<'a> { /// Record the previous manifests for an array that was not modified in the session fn copy_previous_manifest(&mut self, node: &NodeSnapshot, old_snapshot: &Snapshot) { match &node.node_data { - NodeData::Array(_, array_refs) => { + NodeData::Array { manifests: array_refs, .. } => { self.manifest_files.extend(array_refs.iter().map(|mr| { // It's ok to unwrap here, the snapshot had the node, it has to have the // manifest file info @@ -1510,12 +1533,17 @@ async fn flush( .map_ok(|node| { let id = &node.id; // TODO: many clones - if let NodeData::Array(meta, _) = node.node_data { + if let NodeData::Array { shape, dimension_names, .. } = node.node_data { NodeSnapshot { - node_data: NodeData::Array( - meta.clone(), - flush_data.manifest_refs.get(id).cloned().unwrap_or_default(), - ), + node_data: NodeData::Array { + shape, + dimension_names, + manifests: flush_data + .manifest_refs + .get(id) + .cloned() + .unwrap_or_default(), + }, ..node } } else { @@ -1532,7 +1560,6 @@ async fn flush( message.to_string(), Some(properties), flush_data.manifest_files.into_iter().collect(), - vec![], all_nodes.into_iter().map(Ok::<_, Infallible>), )?; @@ -1712,7 +1739,7 @@ fn aggregate_extents<'a, T: std::fmt::Debug, E>( #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { - use std::{collections::HashMap, error::Error, num::NonZeroU64}; + use std::{collections::HashMap, error::Error}; use crate::{ conflicts::{ @@ -1720,14 +1747,11 @@ mod tests { detector::ConflictDetector, }, format::manifest::ManifestExtents, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, refs::{fetch_ref, Ref}, repository::VersionInfo, - storage::{logging::LoggingStorage, new_in_memory_storage}, + storage::new_in_memory_storage, strategies::{ - chunk_indices, empty_writable_session, node_paths, zarr_array_metadata, + chunk_indices, empty_writable_session, node_paths, shapes_and_dims, ShapeDim, }, ObjectStorage, Repository, }; @@ -1736,6 +1760,7 @@ mod tests { use itertools::Itertools; use pretty_assertions::assert_eq; use proptest::prelude::{prop_assert, prop_assert_eq}; + use storage::logging::LoggingStorage; use test_strategy::proptest; use tokio::sync::Barrier; @@ -1750,11 +1775,13 @@ mod tests { #[strategy(node_paths())] path: Path, #[strategy(empty_writable_session())] mut session: Session, ) { + let user_data = Bytes::new(); + // getting any path from an empty repository must fail prop_assert!(session.get_node(&path).await.is_err()); // adding a new group must succeed - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), user_data.clone()).await.is_ok()); // Getting a group just added must succeed let node = session.get_node(&path).await; @@ -1765,7 +1792,7 @@ mod tests { // adding an existing group fails let matches = matches!( - session.add_group(path.clone()).await.unwrap_err(), + session.add_group(path.clone(), user_data.clone()).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1780,7 +1807,7 @@ mod tests { prop_assert!(session.get_node(&path).await.is_err()); // adding again must succeed - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), user_data.clone()).await.is_ok()); // deleting again must succeed prop_assert!(session.delete_group(path.clone()).await.is_ok()); @@ -1789,14 +1816,30 @@ mod tests { #[proptest(async = "tokio")] async fn test_add_delete_array( #[strategy(node_paths())] path: Path, - #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(shapes_and_dims(None))] metadata: ShapeDim, #[strategy(empty_writable_session())] mut session: Session, ) { // new array must always succeed - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); // adding to the same path must fail - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_err()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_err()); // first delete must succeed prop_assert!(session.delete_array(path.clone()).await.is_ok()); @@ -1805,7 +1848,15 @@ mod tests { prop_assert!(session.delete_array(path.clone()).await.is_ok()); // adding again must succeed - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); // deleting again must succeed prop_assert!(session.delete_array(path.clone()).await.is_ok()); @@ -1814,13 +1865,21 @@ mod tests { #[proptest(async = "tokio")] async fn test_add_array_group_clash( #[strategy(node_paths())] path: Path, - #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(shapes_and_dims(None))] metadata: ShapeDim, #[strategy(empty_writable_session())] mut session: Session, ) { // adding a group at an existing array node must fail - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); let matches = matches!( - session.add_group(path.clone()).await.unwrap_err(), + session.add_group(path.clone(), Bytes::new()).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1833,9 +1892,13 @@ mod tests { prop_assert!(session.delete_array(path.clone()).await.is_ok()); // adding an array at an existing group node must fail - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), Bytes::new()).await.is_ok()); let matches = matches!( - session.add_array(path.clone(), metadata.clone()).await.unwrap_err(), + session.add_array(path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1909,47 +1972,35 @@ mod tests { let manifest_id = manifest.id(); let manifest_size = asset_manager.write_manifest(Arc::clone(&manifest)).await?; - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![2, 2, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(2, 1), (2, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "t".into()]); + let manifest_ref = ManifestRef { object_id: manifest_id.clone(), extents: ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]), }; + + let group_def = Bytes::from_static(br#"{"some":"group"}"#); + let array_def = Bytes::from_static(br#"{"this":"array"}"#); + let array1_path: Path = "/array1".try_into().unwrap(); let node_id = NodeId::random(); let nodes = vec![ NodeSnapshot { path: Path::root(), id: node_id, - user_attributes: None, node_data: NodeData::Group, + user_data: group_def.clone(), }, NodeSnapshot { path: array1_path.clone(), id: array_id.clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo":1}"#).unwrap(), - )), - node_data: NodeData::Array(zarr_meta1.clone(), vec![manifest_ref]), + node_data: NodeData::Array { + shape: shape.clone(), + dimension_names: dimension_names.clone(), + manifests: vec![manifest_ref.clone()], + }, + user_data: array_def.clone(), }, ]; @@ -1961,7 +2012,6 @@ mod tests { "message".to_string(), None, manifests, - vec![], nodes.iter().cloned().map(Ok::), )?); asset_manager.write_snapshot(Arc::clone(&snapshot)).await?; @@ -1985,61 +2035,75 @@ mod tests { assert_eq!(nodes.get(1).unwrap(), &node); let group_name = "/tbd-group".to_string(); - ds.add_group(group_name.clone().try_into().unwrap()).await?; + ds.add_group( + group_name.clone().try_into().unwrap(), + Bytes::copy_from_slice(b"somedef"), + ) + .await?; ds.delete_group(group_name.clone().try_into().unwrap()).await?; // deleting non-existing is no-op assert!(ds.delete_group(group_name.clone().try_into().unwrap()).await.is_ok()); assert!(ds.get_node(&group_name.try_into().unwrap()).await.is_err()); // add a new array and retrieve its node - ds.add_group("/group".try_into().unwrap()).await?; - - let zarr_meta2 = ZarrArrayMetadata { - shape: vec![3], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(2).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + ds.add_group("/group".try_into().unwrap(), Bytes::copy_from_slice(b"somedef2")) + .await?; + + let shape2 = ArrayShape::new(vec![(2, 2)]).unwrap(); + let dimension_names2 = Some(vec!["t".into()]); + + let array_def2 = Bytes::from_static(br#"{"this":"other array"}"#); let new_array_path: Path = "/group/array2".to_string().try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta2.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape2.clone(), + dimension_names2.clone(), + array_def2.clone(), + ) + .await?; ds.delete_array(new_array_path.clone()).await?; // Delete a non-existent array is no-op assert!(ds.delete_array(new_array_path.clone()).await.is_ok()); assert!(ds.get_node(&new_array_path.clone()).await.is_err()); - ds.add_array(new_array_path.clone(), zarr_meta2.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape2.clone(), + dimension_names2.clone(), + array_def2.clone(), + ) + .await?; let node = ds.get_node(&new_array_path).await; assert!(matches!( node.ok(), - Some(NodeSnapshot {path, user_attributes, node_data,..}) - if path== new_array_path.clone() && user_attributes.is_none() && node_data == NodeData::Array(zarr_meta2.clone(), vec![]) + Some(NodeSnapshot {path,node_data, user_data, .. }) + if path== new_array_path.clone() && + user_data == array_def2 && + node_data == NodeData::Array{shape:shape2, dimension_names:dimension_names2, manifests: vec![]} )); - // set user attributes for the new array and retrieve them - ds.set_user_attributes( - new_array_path.clone(), - Some(UserAttributes::try_new(br#"{"n":42}"#).unwrap()), + // update the array definition + let shape3 = ArrayShape::new(vec![(4, 3)]).unwrap(); + let dimension_names3 = Some(vec!["tt".into()]); + + let array_def3 = Bytes::from_static(br#"{"this":"yet other array"}"#); + ds.update_array( + &new_array_path.clone(), + shape3.clone(), + dimension_names3.clone(), + array_def3.clone(), ) .await?; let node = ds.get_node(&new_array_path).await; assert!(matches!( node.ok(), - Some(NodeSnapshot {path, user_attributes, node_data, ..}) + Some(NodeSnapshot {path,node_data, user_data, .. }) if path == "/group/array2".try_into().unwrap() && - user_attributes == Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"n":42}"#).unwrap() - )) && - node_data == NodeData::Array(zarr_meta2.clone(), vec![]) + user_data == array_def3 && + node_data == NodeData::Array { shape:shape3, dimension_names: dimension_names3, manifests: vec![] } )); let payload = ds.get_chunk_writer()(Bytes::copy_from_slice(b"foo")).await?; @@ -2053,30 +2117,21 @@ mod tests { let non_chunk = ds.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; assert_eq!(non_chunk, None); - // update old array use attributes and check them - ds.set_user_attributes( - array1_path.clone(), - Some(UserAttributes::try_new(br#"{"updated": true}"#).unwrap()), + // update old array zarr metadata and check it + let shape3 = ArrayShape::new(vec![(8, 3)]).unwrap(); + let dimension_names3 = Some(vec!["tt".into()]); + + let array_def3 = Bytes::from_static(br#"{"this":"more arrays"}"#); + ds.update_array( + &array1_path.clone(), + shape3.clone(), + dimension_names3.clone(), + array_def3.clone(), ) .await?; - let node = ds.get_node(&array1_path).await.unwrap(); - assert_eq!( - node.user_attributes, - Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"updated": true}"#).unwrap() - )) - ); - - // update old array zarr metadata and check it - let new_zarr_meta1 = ZarrArrayMetadata { shape: vec![2, 2, 3], ..zarr_meta1 }; - ds.update_array(array1_path.clone(), new_zarr_meta1).await?; let node = ds.get_node(&array1_path).await; - if let Ok(NodeSnapshot { - node_data: NodeData::Array(ZarrArrayMetadata { shape, .. }, _), - .. - }) = &node - { - assert_eq!(shape, &vec![2, 2, 3]); + if let Ok(NodeSnapshot { node_data: NodeData::Array { shape, .. }, .. }) = &node { + assert_eq!(shape, &shape3); } else { panic!("Failed to update zarr metadata"); } @@ -2098,13 +2153,6 @@ mod tests { ) .await?; assert_eq!(chunk, Some(data)); - - let path: Path = "/group/array2".try_into().unwrap(); - let node = ds.get_node(&path).await; - assert!(ds.change_set.has_updated_attributes(&node.as_ref().unwrap().id)); - assert!(ds.delete_array(path.clone()).await.is_ok()); - assert!(!ds.change_set.has_updated_attributes(&node?.id)); - Ok(()) } @@ -2125,8 +2173,9 @@ mod tests { let diff = ds.status().await?; assert!(diff.is_empty()); + let user_data = Bytes::copy_from_slice(b"foo"); // add a new array and retrieve its node - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), user_data.clone()).await?; let diff = ds.status().await?; assert!(!diff.is_empty()); assert_eq!(diff.new_groups, [Path::root()].into()); @@ -2141,47 +2190,40 @@ mod tests { assert_eq!(first_commit, ds.snapshot_id); assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data == actual_user_data && node_data == NodeData::Group )); - ds.add_group("/group".try_into().unwrap()).await?; + let user_data2 = Bytes::copy_from_slice(b"bar"); + ds.add_group("/group".try_into().unwrap(), user_data2.clone()).await?; let _snapshot_id = ds.commit("commit", Some(SnapshotProperties::default())).await?; let mut ds = repository.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data==actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&"/group".try_into().unwrap()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == "/group".try_into().unwrap() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data:actual_user_data, node_data, .. }) + if path == "/group".try_into().unwrap() && user_data2 == actual_user_data && node_data == NodeData::Group )); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let shape = ArrayShape::new([(1, 1), (1, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "z".into()]); + let array_user_data = Bytes::copy_from_slice(b"array"); let new_array_path: Path = "/group/array1".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + array_user_data.clone(), + ) + .await?; let diff = ds.status().await?; assert!(!diff.is_empty()); @@ -2194,7 +2236,13 @@ mod tests { let mut ds = repository.writable_session("main").await?; let new_new_array_path: Path = "/group/array2".try_into().unwrap(); - ds.add_array(new_new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + array_user_data.clone(), + ) + .await?; assert!(ds.has_uncommitted_changes()); let changes = ds.discard_changes(); @@ -2222,22 +2270,26 @@ mod tests { let mut ds = repository.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data == actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&"/group".try_into().unwrap()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == "/group".try_into().unwrap() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == "/group".try_into().unwrap() && user_data2 == actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&new_array_path).await.ok(), Some(NodeSnapshot { path, - user_attributes: None, - node_data: NodeData::Array(meta, manifests), + user_data: actual_user_data, + node_data: NodeData::Array{ shape: actual_shape, dimension_names: actual_dim, manifests }, .. - }) if path == new_array_path && meta == zarr_meta.clone() && manifests.len() == 1 + }) if path == new_array_path + && actual_user_data == array_user_data + && actual_shape == shape + && actual_dim == dimension_names + && manifests.len() == 1 )); assert_eq!( ds.get_chunk_ref(&new_array_path, &ChunkIndices(vec![0, 0, 0])).await?, @@ -2267,7 +2319,7 @@ mod tests { let snap = repository.asset_manager().fetch_snapshot(&previous_snapshot_id).await?; match &snap.get_node(&new_array_path)?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { assert_eq!( manifests.first().unwrap().extents, ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]) @@ -2291,14 +2343,15 @@ mod tests { ds.set_chunk_ref(new_array_path.clone(), ChunkIndices(vec![0, 0, 1]), None) .await?; - let new_meta = ZarrArrayMetadata { shape: vec![1, 1, 1], ..zarr_meta }; + let new_shape = ArrayShape::new([(1, 1), (1, 1), (1, 1)]).unwrap(); + let new_dimension_names = Some(vec!["X".into(), "X".into(), "Z".into()]); + let new_user_data = Bytes::copy_from_slice(b"new data"); // we change zarr metadata - ds.update_array(new_array_path.clone(), new_meta.clone()).await?; - - // we change user attributes metadata - ds.set_user_attributes( - new_array_path.clone(), - Some(UserAttributes::try_new(br#"{"foo":42}"#).unwrap()), + ds.update_array( + &new_array_path.clone(), + new_shape.clone(), + new_dimension_names.clone(), + new_user_data.clone(), ) .await?; @@ -2306,7 +2359,7 @@ mod tests { let snap = repository.asset_manager().fetch_snapshot(&snapshot_id).await?; match &snap.get_node(&new_array_path)?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { assert_eq!( manifests.first().unwrap().extents, ManifestExtents::new(&[0, 0, 0], &[1, 1, 1]) @@ -2333,12 +2386,14 @@ mod tests { ds.get_node(&new_array_path).await.ok(), Some(NodeSnapshot { path, - user_attributes: Some(atts), - node_data: NodeData::Array(meta, manifests), + user_data: actual_user_data, + node_data: NodeData::Array{shape: actual_shape, dimension_names: actual_dims, manifests}, .. - }) if path == new_array_path && meta == new_meta.clone() && + }) if path == new_array_path && + actual_user_data == new_user_data && manifests.len() == 1 && - atts == UserAttributesSnapshot::Inline(UserAttributes::try_new(br#"{"foo":42}"#).unwrap()) + actual_shape == new_shape && + actual_dims == new_dimension_names )); // since we wrote every asset we should only have one fetch for the initial snapshot @@ -2385,8 +2440,8 @@ mod tests { )] .into() ); - assert_eq!(&diff.updated_user_attributes, &[new_array_path.clone()].into()); - assert_eq!(&diff.updated_zarr_metadata, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_arrays, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_groups, &[].into()); let diff = repository .diff( @@ -2411,8 +2466,8 @@ mod tests { )] .into() ); - assert_eq!(&diff.updated_user_attributes, &[new_array_path.clone()].into()); - assert_eq!(&diff.updated_zarr_metadata, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_arrays, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_groups, &[].into()); Ok(()) } @@ -2420,8 +2475,8 @@ mod tests { async fn test_basic_delete_and_flush() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/1".try_into().unwrap()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; + ds.add_group("/1".try_into().unwrap(), Bytes::copy_from_slice(b"")).await?; ds.delete_group("/1".try_into().unwrap()).await?; assert_eq!(ds.list_nodes().await?.count(), 1); ds.commit("commit", None).await?; @@ -2439,8 +2494,8 @@ mod tests { async fn test_basic_delete_after_flush() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/1".try_into().unwrap()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; + ds.add_group("/1".try_into().unwrap(), Bytes::copy_from_slice(b"")).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2455,7 +2510,7 @@ mod tests { async fn test_commit_after_deleting_old_node() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2471,15 +2526,16 @@ mod tests { #[tokio::test] async fn test_delete_children() -> Result<(), Box> { + let def = Bytes::copy_from_slice(b""); let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), def.clone()).await?; ds.commit("initialize", None).await?; let mut ds = repository.writable_session("main").await?; - ds.add_group("/a".try_into().unwrap()).await?; - ds.add_group("/b".try_into().unwrap()).await?; - ds.add_group("/b/bb".try_into().unwrap()).await?; + ds.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b/bb".try_into().unwrap(), def.clone()).await?; ds.delete_group("/b".try_into().unwrap()).await?; assert!(ds.get_group(&"/b".try_into().unwrap()).await.is_err()); @@ -2500,10 +2556,11 @@ mod tests { async fn test_delete_children_of_old_nodes() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/a".try_into().unwrap()).await?; - ds.add_group("/b".try_into().unwrap()).await?; - ds.add_group("/b/bb".try_into().unwrap()).await?; + let def = Bytes::copy_from_slice(b""); + ds.add_group(Path::root(), def.clone()).await?; + ds.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b/bb".try_into().unwrap(), def.clone()).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2518,29 +2575,22 @@ mod tests { let storage: Arc = new_in_memory_storage().await?; let repo = Repository::create(None, storage, HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; + let def = Bytes::copy_from_slice(b""); // add a new array and retrieve its node - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![4, 2, 4], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + ds.add_group(Path::root(), def.clone()).await?; + + let shape = ArrayShape::new(vec![(4, 2), (2, 1), (4, 2)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; // we 3 chunks ds.set_chunk_ref( new_array_path.clone(), @@ -2614,29 +2664,19 @@ mod tests { ); let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5, 5], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let def = Bytes::copy_from_slice(b""); + let shape = ArrayShape::new(vec![(5, 2), (5, 1)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); + + ds.add_group(Path::root(), def.clone()).await?; let a1path: Path = "/array1".try_into()?; let a2path: Path = "/array2".try_into()?; - ds.add_array(a1path.clone(), zarr_meta.clone()).await?; - ds.add_array(a2path.clone(), zarr_meta.clone()).await?; + ds.add_array(a1path.clone(), shape.clone(), dimension_names.clone(), def.clone()) + .await?; + ds.add_array(a2path.clone(), shape.clone(), dimension_names.clone(), def.clone()) + .await?; let _ = ds.commit("first commit", None).await?; @@ -2698,7 +2738,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2726,7 +2766,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2765,7 +2805,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2781,7 +2821,7 @@ mod tests { let _snap_id = ds.commit("chunk deleted", None).await?; let manifests = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => manifests, + NodeData::Array { manifests, .. } => manifests, NodeData::Group => panic!("must be an array"), }; assert!(manifests.is_empty()); @@ -2817,8 +2857,10 @@ mod tests { let storage_settings = storage.default_settings(); let mut ds = repo.writable_session("main").await?; + let def = Bytes::copy_from_slice(b""); + // add a new array and retrieve its node - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), def.clone()).await?; let new_snapshot_id = ds.commit("first commit", None).await?; assert_eq!( new_snapshot_id, @@ -2834,37 +2876,29 @@ mod tests { assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, ..}) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { node_data, path, ..}) + if path == Path::root() && node_data == NodeData::Group )); let mut ds = repo.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, ..}) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, node_data, ..}) + if path == Path::root() && node_data == NodeData::Group )); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + + let shape = ArrayShape::new(vec![(1, 1), (2, 1), (4, 2)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let new_array_path: Path = "/array1".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; ds.set_chunk_ref( new_array_path.clone(), ChunkIndices(vec![0, 0, 0]), @@ -2900,8 +2934,9 @@ mod tests { let mut ds1 = repository.writable_session("main").await?; let mut ds2 = repository.writable_session("main").await?; - ds1.add_group("/a".try_into().unwrap()).await?; - ds2.add_group("/b".try_into().unwrap()).await?; + let def = Bytes::copy_from_slice(b""); + ds1.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds2.add_group("/b".try_into().unwrap(), def.clone()).await?; let barrier = Arc::new(Barrier::new(2)); let barrier_c = Arc::clone(&barrier); @@ -2963,24 +2998,12 @@ mod tests { let repo = Repository::create(None, Arc::clone(&storage), HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5, 5], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: None, - dimension_names: None, - }; + let shape = ArrayShape::new(vec![(5, 2), (5, 2)]).unwrap(); + ds.add_group(Path::root(), Bytes::new()).await?; let apath: Path = "/array1".try_into()?; - ds.add_array(apath.clone(), zarr_meta.clone()).await?; + ds.add_array(apath.clone(), shape, None, Bytes::new()).await?; ds.commit("first commit", None).await?; @@ -3035,8 +3058,14 @@ mod tests { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group("/foo/bar".try_into().unwrap()).await?; - ds.add_array("/foo/bar/some-array".try_into().unwrap(), basic_meta()).await?; + ds.add_group("/foo/bar".try_into().unwrap(), Bytes::new()).await?; + ds.add_array( + "/foo/bar/some-array".try_into().unwrap(), + basic_shape(), + None, + Bytes::new(), + ) + .await?; ds.commit("create directory", None).await?; Ok(repository) @@ -3050,17 +3079,12 @@ mod tests { Ok((ds, ds2)) } - fn basic_meta() -> ZarrArrayMetadata { - ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - } + fn basic_shape() -> ArrayShape { + ArrayShape::new(vec![(5, 1)]).unwrap() + } + + fn user_data() -> Bytes { + Bytes::new() } fn assert_has_conflict(conflict: &Conflict, rebase_result: SessionResult<()>) { @@ -3085,10 +3109,10 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let conflict_path: Path = "/foo/bar/conflict".try_into().unwrap(); - ds1.add_group(conflict_path.clone()).await?; + ds1.add_group(conflict_path.clone(), user_data()).await?; ds1.commit("create group", None).await?; - ds2.add_array(conflict_path.clone(), basic_meta()).await?; + ds2.add_array(conflict_path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("create array", None).await.unwrap_err(); assert_has_conflict( &Conflict::NewNodeConflictsWithExistingNode(conflict_path), @@ -3107,11 +3131,11 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let conflict_path: Path = "/foo/bar/conflict".try_into().unwrap(); - ds1.add_array(conflict_path.clone(), basic_meta()).await?; + ds1.add_array(conflict_path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("create array", None).await?; let inner_path: Path = "/foo/bar/conflict/inner".try_into().unwrap(); - ds2.add_array(inner_path.clone(), basic_meta()).await?; + ds2.add_array(inner_path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("create inner array", None).await.unwrap_err(); assert_has_conflict( &Conflict::NewNodeInInvalidGroup(conflict_path), @@ -3130,10 +3154,10 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; - ds2.update_array(path.clone(), basic_meta()).await?; + ds2.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("update array again", None).await.unwrap_err(); assert_has_conflict( &Conflict::ZarrMetadataDoubleUpdate(path), @@ -3155,7 +3179,7 @@ mod tests { ds1.delete_array(path.clone()).await?; ds1.commit("delete array", None).await?; - ds2.update_array(path.clone(), basic_meta()).await?; + ds2.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("update array again", None).await.unwrap_err(); assert_has_conflict( &Conflict::ZarrMetadataUpdateOfDeletedArray(path), @@ -3164,63 +3188,6 @@ mod tests { Ok(()) } - #[tokio::test()] - /// Test conflict detection - /// - /// This session: uptade user attributes - /// Previous commit: update user attributes - async fn test_conflict_detection_double_user_atts_edit() -> Result<(), Box> - { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - let node_id = ds2.get_array(&path).await?.id; - assert_has_conflict( - &Conflict::UserAttributesDoubleUpdate { path, node_id }, - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - - #[tokio::test()] - /// Test conflict detection - /// - /// This session: uptade user attributes - /// Previous commit: delete same array - async fn test_conflict_detection_user_atts_edit_of_deleted( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.delete_array(path.clone()).await?; - ds1.commit("delete array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - assert_has_conflict( - &Conflict::UserAttributesUpdateOfDeletedNode(path), - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - #[tokio::test()] /// Test conflict detection /// @@ -3231,7 +3198,7 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; let node = ds2.get_node(&path).await.unwrap(); @@ -3244,33 +3211,6 @@ mod tests { Ok(()) } - #[tokio::test()] - /// Test conflict detection - /// - /// This session: delete array - /// Previous commit: update same array user attributes - async fn test_conflict_detection_delete_when_array_user_atts_updated( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds1.commit("update user attributes", None).await?; - - let node = ds2.get_node(&path).await.unwrap(); - ds2.delete_array(path.clone()).await?; - ds2.commit("delete array", None).await.unwrap_err(); - assert_has_conflict( - &Conflict::DeleteOfUpdatedArray { path, node_id: node.id }, - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - #[tokio::test()] /// Test conflict detection /// @@ -3304,16 +3244,12 @@ mod tests { /// /// This session: delete group /// Previous commit: update same group user attributes - async fn test_conflict_detection_delete_when_group_user_atts_updated( + async fn test_conflict_detection_delete_when_group_user_data_updated( ) -> Result<(), Box> { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; + ds1.update_group(&path, Bytes::new()).await?; ds1.commit("update user attributes", None).await?; let node = ds2.get_node(&path).await.unwrap(); @@ -3332,20 +3268,10 @@ mod tests { let mut ds = repo.writable_session("main").await?; - ds.add_group("/".try_into().unwrap()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + ds.add_group("/".try_into().unwrap(), user_data()).await?; let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), basic_shape(), None, user_data()).await?; ds.commit("create array", None).await?; // one writer sets chunks @@ -3404,20 +3330,9 @@ mod tests { let mut ds = repo.writable_session("main").await?; - ds.add_group("/".try_into().unwrap()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; - + ds.add_group("/".try_into().unwrap(), user_data()).await?; let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), basic_shape(), None, user_data()).await?; let _array_created_snap = ds.commit("create array", None).await?; let mut ds1 = repo.writable_session("main").await?; @@ -3437,7 +3352,7 @@ mod tests { .await?; let new_array_2_path: Path = "/array_2".try_into().unwrap(); - ds1.add_array(new_array_2_path.clone(), zarr_meta.clone()).await?; + ds1.add_array(new_array_2_path.clone(), basic_shape(), None, user_data()).await?; ds1.set_chunk_ref( new_array_2_path.clone(), ChunkIndices(vec![0]), @@ -3504,217 +3419,126 @@ mod tests { false, ) .await?; - - // TODO: We can't create writable sessions from arbitrary snapshots anymore so not sure what to do about this? - // let's try to create a new commit, that conflicts with the previous one and writes - // to the same chunk, recovering with "Fail" policy (so it shouldn't recover) - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::Fail, - // ..BasicConflictSolver::default() - // }; - - // let res = repo2.rebase(&solver, "main").await; - // assert!(matches!( - // res, - // Err(SessionError::RebaseFailed { snapshot, conflicts, }) - // if snapshot == conflicting_snap && - // conflicts.len() == 1 && - // matches!(conflicts[0], Conflict::ChunkDoubleUpdate { ref path, ref chunk_coordinates, .. } - // if path == &new_array_path && chunk_coordinates == &[ChunkIndices(vec![1])].into()) - // )); - // } else { - // panic!("Bad test, it should conflict") - // } - - // // reset the branch to what repo1 wrote - // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; - // update_branch( - // storage.as_ref(), - // "main", - // conflicting_snap.clone(), - // Some(¤t_snap), - // false, - // ) - // .await?; - - // // let's try to create a new commit, that conflicts with the previous one and writes - // // to the same chunk, recovering with "UseOurs" policy - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::UseOurs, - // ..Default::default() - // }; - - // repo2.rebase(&solver, "main").await?; - // repo2.commit("main", "after conflict", None).await?; - // let data = - // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; - // assert_eq!(data, Some(ChunkPayload::Inline("overridden".into()))); - // let commits = repo2.ancestry().await?.try_collect::>().await?; - // assert_eq!(commits[0].message, "after conflict"); - // assert_eq!(commits[1].message, "write two chunks with repo 1"); - // } else { - // panic!("Bad test, it should conflict") - // } - - // // reset the branch to what repo1 wrote - // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; - // update_branch( - // storage.as_ref(), - // "main", - // conflicting_snap.clone(), - // Some(¤t_snap), - // false, - // ) - // .await?; - - // // let's try to create a new commit, that conflicts with the previous one and writes - // // to the same chunk, recovering with "UseTheirs" policy - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::UseTheirs, - // ..Default::default() - // }; - - // repo2.rebase(&solver, "main").await?; - // repo2.commit("main", "after conflict", None).await?; - // let data = - // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; - // assert_eq!(data, Some(ChunkPayload::Inline("hello1".into()))); - // let commits = repo2.ancestry().await?.try_collect::>().await?; - // assert_eq!(commits[0].message, "after conflict"); - // assert_eq!(commits[1].message, "write two chunks with repo 1"); - // } else { - // panic!("Bad test, it should conflict") - // } - - Ok(()) - } - - #[tokio::test] - /// Test conflict resolution with rebase - /// - /// Two sessions write user attributes to the same array - /// We attempt to recover using [`VersionSelection::UseOurs`] policy - async fn test_conflict_resolution_double_user_atts_edit_with_ours( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":2}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - - let solver = BasicConflictSolver { - on_user_attributes_conflict: VersionSelection::UseOurs, - ..Default::default() - }; - - ds2.rebase(&solver).await?; - ds2.commit("after conflict", None).await?; - - let atts = ds2.get_node(&path).await.unwrap().user_attributes.unwrap(); - assert_eq!( - atts, - UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"repo":2}"#).unwrap() - ) - ); Ok(()) } - #[tokio::test] - /// Test conflict resolution with rebase - /// - /// Two sessions write user attributes to the same array - /// We attempt to recover using [`VersionSelection::UseTheirs`] policy - async fn test_conflict_resolution_double_user_atts_edit_with_theirs( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - // we made one extra random change to the repo, because we'll undo the user attributes - // update and we cannot commit an empty change - ds2.add_group("/baz".try_into().unwrap()).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":2}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - - let solver = BasicConflictSolver { - on_user_attributes_conflict: VersionSelection::UseTheirs, - ..Default::default() - }; - - ds2.rebase(&solver).await?; - ds2.commit("after conflict", None).await?; - - let atts = ds2.get_node(&path).await.unwrap().user_attributes.unwrap(); - assert_eq!( - atts, - UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"repo":1}"#).unwrap() - ) - ); - - ds2.get_node(&"/baz".try_into().unwrap()).await?; - Ok(()) - } + // TODO: We can't create writable sessions from arbitrary snapshots anymore so not sure what to do about this? + // let's try to create a new commit, that conflicts with the previous one and writes + // to the same chunk, recovering with "Fail" policy (so it shouldn't recover) + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::Fail, + // ..BasicConflictSolver::default() + // }; + + // let res = repo2.rebase(&solver, "main").await; + // assert!(matches!( + // res, + // Err(SessionError::RebaseFailed { snapshot, conflicts, }) + // if snapshot == conflicting_snap && + // conflicts.len() == 1 && + // matches!(conflicts[0], Conflict::ChunkDoubleUpdate { ref path, ref chunk_coordinates, .. } + // if path == &new_array_path && chunk_coordinates == &[ChunkIndices(vec![1])].into()) + // )); + // } else { + // panic!("Bad test, it should conflict") + // } + + // // reset the branch to what repo1 wrote + // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; + // update_branch( + // storage.as_ref(), + // "main", + // conflicting_snap.clone(), + // Some(¤t_snap), + // false, + // ) + // .await?; + + // // let's try to create a new commit, that conflicts with the previous one and writes + // // to the same chunk, recovering with "UseOurs" policy + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::UseOurs, + // ..Default::default() + // }; + + // repo2.rebase(&solver, "main").await?; + // repo2.commit("main", "after conflict", None).await?; + // let data = + // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; + // assert_eq!(data, Some(ChunkPayload::Inline("overridden".into()))); + // let commits = repo2.ancestry().await?.try_collect::>().await?; + // assert_eq!(commits[0].message, "after conflict"); + // assert_eq!(commits[1].message, "write two chunks with repo 1"); + // } else { + // panic!("Bad test, it should conflict") + // } + + // // reset the branch to what repo1 wrote + // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; + // update_branch( + // storage.as_ref(), + // "main", + // conflicting_snap.clone(), + // Some(¤t_snap), + // false, + // ) + // .await?; + + // // let's try to create a new commit, that conflicts with the previous one and writes + // // to the same chunk, recovering with "UseTheirs" policy + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::UseTheirs, + // ..Default::default() + // }; + + // repo2.rebase(&solver, "main").await?; + // repo2.commit("main", "after conflict", None).await?; + // let data = + // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; + // assert_eq!(data, Some(ChunkPayload::Inline("hello1".into()))); + // let commits = repo2.ancestry().await?.try_collect::>().await?; + // assert_eq!(commits[0].message, "after conflict"); + // assert_eq!(commits[1].message, "write two chunks with repo 1"); + // } else { + // panic!("Bad test, it should conflict") + // } #[tokio::test] /// Test conflict resolution with rebase @@ -3727,7 +3551,7 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path, basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; ds2.delete_array(path.clone()).await?; @@ -3806,12 +3630,13 @@ mod tests { let mut ds2 = repo.writable_session("main").await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( + ds1.set_chunk_ref( path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), + ChunkIndices(vec![1]), + Some(ChunkPayload::Inline("repo 1".into())), ) .await?; - let non_conflicting_snap = ds1.commit("update user atts", None).await?; + let non_conflicting_snap = ds1.commit("updated non-conflict chunk", None).await?; let mut ds1 = repo.writable_session("main").await?; ds1.set_chunk_ref( @@ -3860,6 +3685,7 @@ mod tests { mod state_machine_test { use crate::format::snapshot::NodeData; use crate::format::Path; + use bytes::Bytes; use futures::Future; use proptest::prelude::*; use proptest::sample; @@ -3874,16 +3700,17 @@ mod tests { use proptest::test_runner::Config; use super::create_memory_store_repository; + use super::ArrayShape; + use super::DimensionName; use super::Session; - use super::ZarrArrayMetadata; - use super::{node_paths, zarr_array_metadata}; + use super::{node_paths, shapes_and_dims}; #[derive(Clone, Debug)] enum RepositoryTransition { - AddArray(Path, ZarrArrayMetadata), - UpdateArray(Path, ZarrArrayMetadata), + AddArray(Path, ArrayShape, Option>, Bytes), + UpdateArray(Path, ArrayShape, Option>, Bytes), DeleteArray(Option), - AddGroup(Path), + AddGroup(Path, Bytes), DeleteGroup(Option), } @@ -3892,8 +3719,8 @@ mod tests { #[derive(Clone, Default, Debug)] struct RepositoryModel { - arrays: HashMap, - groups: Vec, + arrays: HashMap>, Bytes)>, + groups: HashMap, } impl ReferenceStateMachine for RepositoryStateMachine { @@ -3924,7 +3751,7 @@ mod tests { let delete_groups = { if !state.groups.is_empty() { - sample::select(state.groups.clone()) + sample::select(state.groups.keys().cloned().collect::>()) .prop_map(|p| RepositoryTransition::DeleteGroup(Some(p))) .boxed() } else { @@ -3933,12 +3760,38 @@ mod tests { }; prop_oneof![ - (node_paths(), zarr_array_metadata()) - .prop_map(|(a, b)| RepositoryTransition::AddArray(a, b)), - (node_paths(), zarr_array_metadata()) - .prop_map(|(a, b)| RepositoryTransition::UpdateArray(a, b)), + ( + node_paths(), + shapes_and_dims(None), + proptest::collection::vec(any::(), 0..=100) + ) + .prop_map(|(a, shape, user_data)| { + RepositoryTransition::AddArray( + a, + shape.shape, + shape.dimension_names, + Bytes::copy_from_slice(user_data.as_slice()), + ) + }), + ( + node_paths(), + shapes_and_dims(None), + proptest::collection::vec(any::(), 0..=100) + ) + .prop_map(|(a, shape, user_data)| { + RepositoryTransition::UpdateArray( + a, + shape.shape, + shape.dimension_names, + Bytes::copy_from_slice(user_data.as_slice()), + ) + }), delete_arrays, - node_paths().prop_map(RepositoryTransition::AddGroup), + (node_paths(), proptest::collection::vec(any::(), 0..=100)) + .prop_map(|(p, ud)| RepositoryTransition::AddGroup( + p, + Bytes::copy_from_slice(ud.as_slice()) + )), delete_groups, ] .boxed() @@ -3950,14 +3803,20 @@ mod tests { ) -> Self::State { match transition { // Array ops - RepositoryTransition::AddArray(path, metadata) => { - let res = state.arrays.insert(path.clone(), metadata.clone()); + RepositoryTransition::AddArray(path, shape, dims, ud) => { + let res = state.arrays.insert( + path.clone(), + (shape.clone(), dims.clone(), ud.clone()), + ); assert!(res.is_none()); } - RepositoryTransition::UpdateArray(path, metadata) => { + RepositoryTransition::UpdateArray(path, shape, dims, ud) => { state .arrays - .insert(path.clone(), metadata.clone()) + .insert( + path.clone(), + (shape.clone(), dims.clone(), ud.clone()), + ) .expect("(postcondition) insertion failed"); } RepositoryTransition::DeleteArray(path) => { @@ -3969,17 +3828,13 @@ mod tests { } // Group ops - RepositoryTransition::AddGroup(path) => { - state.groups.push(path.clone()); + RepositoryTransition::AddGroup(path, ud) => { + state.groups.insert(path.clone(), ud.clone()); // TODO: postcondition } RepositoryTransition::DeleteGroup(Some(path)) => { - let index = - state.groups.iter().position(|x| x == path).expect( - "Attempting to delete a non-existent path: {path}", - ); - state.groups.swap_remove(index); - state.groups.retain(|group| !group.starts_with(path)); + state.groups.remove(path); + state.groups.retain(|group, _| !group.starts_with(path)); state.arrays.retain(|array, _| !array.starts_with(path)); } _ => panic!(), @@ -3989,15 +3844,17 @@ mod tests { fn preconditions(state: &Self::State, transition: &Self::Transition) -> bool { match transition { - RepositoryTransition::AddArray(path, _) => { - !state.arrays.contains_key(path) && !state.groups.contains(path) + RepositoryTransition::AddArray(path, _, _, _) => { + !state.arrays.contains_key(path) + && !state.groups.contains_key(path) } - RepositoryTransition::UpdateArray(path, _) => { + RepositoryTransition::UpdateArray(path, _, _, _) => { state.arrays.contains_key(path) } RepositoryTransition::DeleteArray(path) => path.is_some(), - RepositoryTransition::AddGroup(path) => { - !state.arrays.contains_key(path) && !state.groups.contains(path) + RepositoryTransition::AddGroup(path, _) => { + !state.arrays.contains_key(path) + && !state.groups.contains_key(path) } RepositoryTransition::DeleteGroup(p) => p.is_some(), } @@ -4046,17 +3903,17 @@ mod tests { let runtime = &state.runtime; let repository = &mut state.session; match transition { - RepositoryTransition::AddArray(path, metadata) => { - runtime.unwrap(repository.add_array(path, metadata)) + RepositoryTransition::AddArray(path, shape, dims, ud) => { + runtime.unwrap(repository.add_array(path, shape, dims, ud)) } - RepositoryTransition::UpdateArray(path, metadata) => { - runtime.unwrap(repository.update_array(path, metadata)) + RepositoryTransition::UpdateArray(path, shape, dims, ud) => { + runtime.unwrap(repository.update_array(&path, shape, dims, ud)) } RepositoryTransition::DeleteArray(Some(path)) => { runtime.unwrap(repository.delete_array(path)) } - RepositoryTransition::AddGroup(path) => { - runtime.unwrap(repository.add_group(path)) + RepositoryTransition::AddGroup(path, ud) => { + runtime.unwrap(repository.add_group(path, ud)) } RepositoryTransition::DeleteGroup(Some(path)) => { runtime.unwrap(repository.delete_group(path)) @@ -4071,23 +3928,28 @@ mod tests { ref_state: &::State, ) { let runtime = &state.runtime; - for (path, metadata) in ref_state.arrays.iter() { + for (path, (shape, dims, ud)) in ref_state.arrays.iter() { let node = runtime.unwrap(state.session.get_array(path)); let actual_metadata = match node.node_data { - NodeData::Array(metadata, _) => Ok(metadata), + NodeData::Array { shape, dimension_names, .. } => { + Ok((shape, dimension_names)) + } _ => Err("foo"), } .unwrap(); - assert_eq!(metadata, &actual_metadata); + assert_eq!(shape, &actual_metadata.0); + assert_eq!(dims, &actual_metadata.1); + assert_eq!(ud, &node.user_data); } - for path in ref_state.groups.iter() { + for (path, ud) in ref_state.groups.iter() { let node = runtime.unwrap(state.session.get_group(path)); match node.node_data { NodeData::Group => Ok(()), _ => Err("foo"), } .unwrap(); + assert_eq!(&node.user_data, ud) } } } diff --git a/icechunk/src/store.rs b/icechunk/src/store.rs index 12ec9134..936cb3b1 100644 --- a/icechunk/src/store.rs +++ b/icechunk/src/store.rs @@ -2,7 +2,6 @@ use std::{ collections::HashSet, fmt::Display, iter, - num::NonZeroU64, ops::{Deref, DerefMut}, sync::{ atomic::{AtomicUsize, Ordering}, @@ -24,12 +23,8 @@ use crate::{ error::ICError, format::{ manifest::{ChunkPayload, VirtualChunkRef}, - snapshot::{NodeData, NodeSnapshot, UserAttributesSnapshot, ZarrArrayMetadata}, - ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, Path, PathError, - }, - metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DataType, DimensionNames, - FillValue, StorageTransformer, UserAttributes, + snapshot::{ArrayShape, DimensionName, NodeData, NodeSnapshot}, + ByteRange, ChunkIndices, ChunkOffset, Path, PathError, }, refs::{RefError, RefErrorKind}, repository::{RepositoryError, RepositoryErrorKind}, @@ -101,6 +96,8 @@ pub enum StoreErrorKind { "invalid chunk location, no matching virtual chunk container: `{chunk_location}`" )] InvalidVirtualChunkContainer { chunk_location: String }, + #[error("{0}")] + Other(String), #[error("unknown store error")] Unknown(Box), } @@ -299,12 +296,12 @@ impl Store { match Key::parse(key)? { Key::Metadata { node_path } => { if let Ok(array_meta) = serde_json::from_slice(value.as_ref()) { - self.set_array_meta(node_path, array_meta, locked_session).await + self.set_array_meta(node_path, value, array_meta, locked_session) + .await } else { - match serde_json::from_slice(value.as_ref()) { - Ok(group_meta) => { - self.set_group_meta(node_path, group_meta, locked_session) - .await + match serde_json::from_slice::(value.as_ref()) { + Ok(_) => { + self.set_group_meta(node_path, value, locked_session).await } Err(err) => Err(StoreErrorKind::BadMetadata(err).into()), } @@ -414,7 +411,7 @@ impl Store { let node = guard.get_closest_ancestor_node(&path).await; if let Ok(NodeSnapshot { path: node_path, - node_data: NodeData::Array(..), + node_data: NodeData::Array { .. }, .. }) = node { @@ -540,7 +537,7 @@ impl Store { let path = Path::try_from(absolute_prefix)?; let session = Arc::clone(&self.session).read_owned().await; let results = match session.get_node(&path).await { - Ok(NodeSnapshot { node_data: NodeData::Array(..), .. }) => { + Ok(NodeSnapshot { node_data: NodeData::Array { .. }, .. }) => { // if this is an array we know what to yield vec![ ListDirItem::Key("zarr.json".to_string()), @@ -551,7 +548,7 @@ impl Store { ListDirItem::Prefix("c".to_string()), ] } - Ok(NodeSnapshot { node_data: NodeData::Group, .. }) => { + Ok(NodeSnapshot { node_data: NodeData::Group { .. }, .. }) => { // if the prefix is the path to a group we need to discover any nodes with the prefix as node path // listing chunks is unnecessary self.list_metadata_prefix(prefix, true) @@ -647,47 +644,49 @@ impl Store { async fn set_array_meta( &self, path: Path, + user_data: Bytes, array_meta: ArrayMetadata, - locked_repo: Option<&mut Session>, + locked_session: Option<&mut Session>, ) -> Result<(), StoreError> { - match locked_repo { - Some(repo) => set_array_meta(path, array_meta, repo).await, - None => self.set_array_meta_locking(path, array_meta).await, + match locked_session { + Some(session) => set_array_meta(path, user_data, array_meta, session).await, + None => self.set_array_meta_locking(path, user_data, array_meta).await, } } async fn set_array_meta_locking( &self, path: Path, + user_data: Bytes, array_meta: ArrayMetadata, ) -> Result<(), StoreError> { // we need to hold the lock while we search the array and do the update to avoid race // conditions with other writers (notice we don't take &mut self) let mut guard = self.session.write().await; - set_array_meta(path, array_meta, guard.deref_mut()).await + set_array_meta(path, user_data, array_meta, guard.deref_mut()).await } async fn set_group_meta( &self, path: Path, - group_meta: GroupMetadata, + user_data: Bytes, locked_repo: Option<&mut Session>, ) -> Result<(), StoreError> { match locked_repo { - Some(repo) => set_group_meta(path, group_meta, repo).await, - None => self.set_group_meta_locking(path, group_meta).await, + Some(repo) => set_group_meta(path, user_data, repo).await, + None => self.set_group_meta_locking(path, user_data).await, } } async fn set_group_meta_locking( &self, path: Path, - group_meta: GroupMetadata, + user_data: Bytes, ) -> Result<(), StoreError> { // we need to hold the lock while we search the array and do the update to avoid race // conditions with other writers (notice we don't take &mut self) let mut guard = self.session.write().await; - set_group_meta(path, group_meta, guard.deref_mut()).await + set_group_meta(path, user_data, guard.deref_mut()).await } async fn list_metadata_prefix<'a, 'b: 'a>( @@ -741,60 +740,45 @@ impl Store { async fn set_array_meta( path: Path, + user_data: Bytes, array_meta: ArrayMetadata, - repo: &mut Session, + session: &mut Session, ) -> StoreResult<()> { - // TODO: Consider deleting all this logic? - // We try hard to not overwrite existing metadata here. - // I don't think this is actually useful because Zarr's - // Array/Group API will require that the user set `overwrite=True` - // which will delete any existing array metadata. This path is only - // applicable when using the explicit `store.set` interface. - if let Ok(node) = repo.get_array(&path).await { - // Check if the user attributes are different, if they are update them - let existing_attrs = match node.user_attributes { - None => None, - Some(UserAttributesSnapshot::Inline(atts)) => Some(atts), - // FIXME: implement - Some(UserAttributesSnapshot::Ref(_)) => None, - }; - - if existing_attrs != array_meta.attributes { - repo.set_user_attributes(path.clone(), array_meta.attributes).await?; - } - - // Check if the zarr metadata is different, if it is update it - if let NodeData::Array(existing_array_metadata, _) = node.node_data { - if existing_array_metadata != array_meta.zarr_metadata { - repo.update_array(path, array_meta.zarr_metadata).await?; + let shape = array_meta + .shape() + .ok_or(StoreErrorKind::Other("Invalid chunk grid metadata".to_string()))?; + if let Ok(node) = session.get_array(&path).await { + if let NodeData::Array { .. } = node.node_data { + if node.user_data != user_data { + session + .update_array(&path, shape, array_meta.dimension_names(), user_data) + .await?; } - } else { - // This should be unreachable, but just in case... - repo.update_array(path, array_meta.zarr_metadata).await?; } - + // FIXME: don't ignore error Ok(()) } else { - repo.add_array(path.clone(), array_meta.zarr_metadata).await?; - repo.set_user_attributes(path, array_meta.attributes).await?; + session + .add_array(path.clone(), shape, array_meta.dimension_names(), user_data) + .await?; Ok(()) } } async fn set_group_meta( path: Path, - group_meta: GroupMetadata, - repo: &mut Session, + user_data: Bytes, + session: &mut Session, ) -> StoreResult<()> { - // we need to hold the lock while we search the group and do the update to avoid race - // conditions with other writers (notice we don't take &mut self) - // - if repo.get_group(&path).await.is_ok() { - repo.set_user_attributes(path, group_meta.attributes).await?; + if let Ok(node) = session.get_group(&path).await { + if let NodeData::Group = node.node_data { + if node.user_data != user_data { + session.update_group(&path, user_data).await?; + } + } Ok(()) } else { - repo.add_group(path.clone()).await?; - repo.set_user_attributes(path, group_meta.attributes).await?; + session.add_group(path.clone(), user_data).await?; Ok(()) } } @@ -803,29 +787,13 @@ async fn get_metadata( _key: &str, path: &Path, range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { // FIXME: don't skip errors - let node = repo.get_node(path).await.map_err(|_| { + let node = session.get_node(path).await.map_err(|_| { StoreErrorKind::NotFound(KeyNotFoundError::NodeNotFound { path: path.clone() }) })?; - let user_attributes = match node.user_attributes { - None => None, - Some(UserAttributesSnapshot::Inline(atts)) => Some(atts), - // FIXME: implement - #[allow(clippy::unimplemented)] - Some(UserAttributesSnapshot::Ref(_)) => unimplemented!(), - }; - let full_metadata = match node.node_data { - NodeData::Group => { - Ok::(GroupMetadata::new(user_attributes).to_bytes()) - } - NodeData::Array(zarr_metadata, _) => { - Ok(ArrayMetadata::new(user_attributes, zarr_metadata).to_bytes()) - } - }?; - - Ok(range.slice(full_metadata)) + Ok(range.slice(node.user_data)) } async fn get_chunk_bytes( @@ -833,9 +801,9 @@ async fn get_chunk_bytes( path: Path, coords: ChunkIndices, byte_range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { - let reader = repo.get_chunk_reader(&path, &coords, byte_range).await?; + let reader = session.get_chunk_reader(&path, &coords, byte_range).await?; // then we can fetch the bytes without holding the lock let chunk = get_chunk(reader).await?; @@ -849,8 +817,12 @@ async fn get_chunk_bytes( ) } -async fn get_metadata_size(key: &str, path: &Path, repo: &Session) -> StoreResult { - let bytes = get_metadata(key, path, &ByteRange::From(0), repo).await?; +async fn get_metadata_size( + key: &str, + path: &Path, + session: &Session, +) -> StoreResult { + let bytes = get_metadata(key, path, &ByteRange::From(0), session).await?; Ok(bytes.len() as u64) } @@ -858,9 +830,9 @@ async fn get_chunk_size( _key: &str, path: &Path, coords: &ChunkIndices, - repo: &Session, + session: &Session, ) -> StoreResult { - let chunk_ref = repo.get_chunk_ref(path, coords).await?; + let chunk_ref = session.get_chunk_ref(path, coords).await?; let size = chunk_ref .map(|payload| match payload { ChunkPayload::Inline(bytes) => bytes.len() as u64, @@ -874,14 +846,14 @@ async fn get_chunk_size( async fn get_key( key: &str, byte_range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { let bytes = match Key::parse(key)? { Key::Metadata { node_path } => { - get_metadata(key, &node_path, byte_range, repo).await + get_metadata(key, &node_path, byte_range, session).await } Key::Chunk { node_path, coords } => { - get_chunk_bytes(key, node_path, coords, byte_range, repo).await + get_chunk_bytes(key, node_path, coords, byte_range, session).await } Key::ZarrV2(key) => { Err(StoreErrorKind::NotFound(KeyNotFoundError::ZarrV2KeyNotFound { key }) @@ -1037,153 +1009,39 @@ impl Display for Key { #[serde_as] #[derive(Debug, Serialize, Deserialize, PartialEq)] struct ArrayMetadata { - zarr_format: u8, + pub shape: Vec, + #[serde(deserialize_with = "validate_array_node_type")] node_type: String, - #[serde(skip_serializing_if = "Option::is_none")] - attributes: Option, - #[serde(flatten)] - #[serde_as(as = "TryFromInto")] - zarr_metadata: ZarrArrayMetadata, -} - -#[serde_as] -#[derive(Serialize, Deserialize)] -pub struct ZarrArrayMetadataSerialzer { - pub shape: ArrayShape, - pub data_type: DataType, #[serde_as(as = "TryFromInto")] - #[serde(rename = "chunk_grid")] - pub chunk_shape: ChunkShape, + pub chunk_grid: Vec, - #[serde_as(as = "TryFromInto")] - pub chunk_key_encoding: ChunkKeyEncoding, - pub fill_value: serde_json::Value, - pub codecs: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub storage_transformers: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - // each dimension name can be null in Zarr - pub dimension_names: Option, + pub dimension_names: Option>>, } -impl TryFrom for ZarrArrayMetadata { - type Error = IcechunkFormatError; - - fn try_from(value: ZarrArrayMetadataSerialzer) -> Result { - let ZarrArrayMetadataSerialzer { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names, - } = value; - { - let fill_value = FillValue::from_data_type_and_json(&data_type, &fill_value)?; - Ok(ZarrArrayMetadata { - fill_value, - shape, - data_type, - chunk_shape, - chunk_key_encoding, - codecs, - storage_transformers, - dimension_names, - }) - } +impl ArrayMetadata { + fn dimension_names(&self) -> Option> { + self.dimension_names + .as_ref() + .map(|ds| ds.iter().map(|d| d.as_ref().map(|s| s.as_str()).into()).collect()) } -} - -impl From for ZarrArrayMetadataSerialzer { - fn from(value: ZarrArrayMetadata) -> Self { - let ZarrArrayMetadata { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names, - } = value; - { - fn fill_value_to_json(f: FillValue) -> serde_json::Value { - match f { - FillValue::Bool(b) => b.into(), - FillValue::Int8(n) => n.into(), - FillValue::Int16(n) => n.into(), - FillValue::Int32(n) => n.into(), - FillValue::Int64(n) => n.into(), - FillValue::UInt8(n) => n.into(), - FillValue::UInt16(n) => n.into(), - FillValue::UInt32(n) => n.into(), - FillValue::UInt64(n) => n.into(), - FillValue::Float16(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f32::INFINITY { - FillValue::INF_STR.into() - } else if f == f32::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Float32(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f32::INFINITY { - FillValue::INF_STR.into() - } else if f == f32::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Float64(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f64::INFINITY { - FillValue::INF_STR.into() - } else if f == f64::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Complex64(r, i) => ([r, i].as_ref()).into(), - FillValue::Complex128(r, i) => ([r, i].as_ref()).into(), - FillValue::String(s) => s.into(), - FillValue::Bytes(b) => b.into(), - } - } - let fill_value = fill_value_to_json(fill_value); - ZarrArrayMetadataSerialzer { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - codecs, - storage_transformers, - dimension_names, - fill_value, - } + fn shape(&self) -> Option { + if self.shape.len() != self.chunk_grid.len() { + None + } else { + ArrayShape::new( + self.shape.iter().zip(self.chunk_grid.iter()).map(|(a, b)| (*a, *b)), + ) } } } #[derive(Debug, Serialize, Deserialize)] struct GroupMetadata { - zarr_format: u8, #[serde(deserialize_with = "validate_group_node_type")] node_type: String, - #[serde(skip_serializing_if = "Option::is_none")] - attributes: Option, } fn validate_group_node_type<'de, D>(d: D) -> Result @@ -1218,49 +1076,18 @@ where Ok(value) } -impl ArrayMetadata { - fn new(attributes: Option, zarr_metadata: ZarrArrayMetadata) -> Self { - Self { zarr_format: 3, node_type: "array".to_string(), attributes, zarr_metadata } - } - - fn to_bytes(&self) -> Bytes { - Bytes::from_iter( - // We can unpack because it comes from controlled datastructures that can be serialized - #[allow(clippy::expect_used)] - serde_json::to_vec(self).expect("bug in ArrayMetadata serialization"), - ) - } -} - -impl GroupMetadata { - fn new(attributes: Option) -> Self { - Self { zarr_format: 3, node_type: "group".to_string(), attributes } - } - - fn to_bytes(&self) -> Bytes { - Bytes::from_iter( - // We can unpack because it comes from controlled datastructures that can be serialized - #[allow(clippy::expect_used)] - serde_json::to_vec(self).expect("bug in GroupMetadata serialization"), - ) - } -} - #[derive(Serialize, Deserialize)] struct NameConfigSerializer { name: String, configuration: serde_json::Value, } -impl From for NameConfigSerializer { - fn from(value: ChunkShape) -> Self { +impl From> for NameConfigSerializer { + fn from(value: Vec) -> Self { let arr = serde_json::Value::Array( value - .0 .iter() - .map(|v| { - serde_json::Value::Number(serde_json::value::Number::from(v.get())) - }) + .map(|v| serde_json::Value::Number(serde_json::value::Number::from(*v))) .collect(), ); let kvs = serde_json::value::Map::from_iter(iter::once(( @@ -1274,7 +1101,7 @@ impl From for NameConfigSerializer { } } -impl TryFrom for ChunkShape { +impl TryFrom for Vec { type Error = &'static str; fn try_from(value: NameConfigSerializer) -> Result { @@ -1289,52 +1116,16 @@ impl TryFrom for ChunkShape { .ok_or("cannot parse ChunkShape")?; let shape = values .iter() - .map(|v| v.as_u64().and_then(|u64| NonZeroU64::try_from(u64).ok())) + .map(|v| v.as_u64()) .collect::>>() .ok_or("cannot parse ChunkShape")?; - Ok(ChunkShape(shape)) + Ok(shape) } _ => Err("cannot parse ChunkShape"), } } } -impl From for NameConfigSerializer { - fn from(_value: ChunkKeyEncoding) -> Self { - let kvs = serde_json::value::Map::from_iter(iter::once(( - "separator".to_string(), - serde_json::Value::String("/".to_string()), - ))); - Self { - name: "default".to_string(), - configuration: serde_json::Value::Object(kvs), - } - } -} - -impl TryFrom for ChunkKeyEncoding { - type Error = &'static str; - - fn try_from(value: NameConfigSerializer) -> Result { - //FIXME: we are hardcoding / as the separator - match value { - NameConfigSerializer { - name, - configuration: serde_json::Value::Object(kvs), - } if name == "default" => { - if let Some("/") = - kvs.get("separator").ok_or("cannot parse ChunkKeyEncoding")?.as_str() - { - Ok(ChunkKeyEncoding::Slash) - } else { - Err("cannot parse ChunkKeyEncoding") - } - } - _ => Err("cannot parse ChunkKeyEncoding"), - } - } -} - #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { @@ -1537,97 +1328,28 @@ mod tests { .is_err()); assert!(serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ) - .is_ok()); + r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ) + .is_ok()); assert!(serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"group","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ) - .is_err()); + r#"{"zarr_format":3,"node_type":"group","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ) + .is_err()); // deserialize with nan - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(n) if n.is_nan() - )); - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(n) if n.is_nan() - )); - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(n) if n.is_nan() - )); - - // deserialize with infinity - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(f32::INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(f32::INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(f64::INFINITY) - ); - - // deserialize with -infinity assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(f32::NEG_INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(f32::NEG_INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(f64::NEG_INFINITY) - ); - - // infinity roundtrip - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![NonZeroU64::new(2).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; - let zarr_meta = ArrayMetadata::new(None, zarr_meta); + serde_json::from_str::( + r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ).unwrap().dimension_names(), + Some(vec!["x".into(), "y".into(), "t".into()]) + ); assert_eq!( - serde_json::from_str::( - serde_json::to_string(&zarr_meta).unwrap().as_str() - ) - .unwrap(), - zarr_meta, - ) + serde_json::from_str::( + r#"{"zarr_format":3,"node_type":"array","shape":[2,3,4],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,2,3]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ).unwrap().shape(), + ArrayShape::new(vec![(2,1), (3,2), (4,3) ]) + ); } #[tokio::test] @@ -1644,7 +1366,7 @@ mod tests { store .set( "zarr.json", - Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#), + Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#), ) .await?; assert_eq!( @@ -1652,13 +1374,13 @@ mod tests { Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#) ); - store.set("a/b/zarr.json", Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group", "attributes": {"spam":"ham", "eggs":42}}"#)).await?; + store.set("a/b/zarr.json", Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group","attributes":{"spam":"ham","eggs":42}}"#)).await?; assert_eq!( - store.get("a/b/zarr.json", &ByteRange::ALL).await.unwrap(), - Bytes::copy_from_slice( - br#"{"zarr_format":3,"node_type":"group","attributes":{"eggs":42,"spam":"ham"}}"# - ) - ); + store.get("a/b/zarr.json", &ByteRange::ALL).await.unwrap(), + Bytes::copy_from_slice( + br#"{"zarr_format":3,"node_type":"group","attributes":{"spam":"ham","eggs":42}}"# + ) + ); let zarr_meta = Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"array","attributes":{"foo":42},"shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"#); store.set("a/b/array/zarr.json", zarr_meta.clone()).await?; @@ -2568,7 +2290,7 @@ mod tests { store .set( "zarr.json", - Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#), + Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#), ) .await .unwrap(); diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs index 3719f0db..cd3d6555 100644 --- a/icechunk/src/strategies.rs +++ b/icechunk/src/strategies.rs @@ -6,12 +6,8 @@ use prop::string::string_regex; use proptest::prelude::*; use proptest::{collection::vec, option, strategy::Strategy}; -use crate::format::snapshot::ZarrArrayMetadata; +use crate::format::snapshot::{ArrayShape, DimensionName}; use crate::format::{ChunkIndices, Path}; -use crate::metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DimensionNames, FillValue, - StorageTransformer, -}; use crate::session::Session; use crate::storage::new_in_memory_storage; use crate::Repository; @@ -62,25 +58,10 @@ prop_compose! { } } -pub fn codecs() -> impl Strategy> { - prop_oneof![Just(vec![Codec { name: "mycodec".to_string(), configuration: None }]),] -} - -pub fn storage_transformers() -> impl Strategy>> { - prop_oneof![ - Just(Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }])), - Just(None), - ] -} - #[derive(Debug)] pub struct ShapeDim { - shape: ArrayShape, - chunk_shape: ChunkShape, - dimension_names: Option, + pub shape: ArrayShape, + pub dimension_names: Option>, } pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy { @@ -105,32 +86,37 @@ pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy()), ndim))) }) .prop_map(|(shape, chunk_shape, dimension_names)| ShapeDim { - shape, - chunk_shape: ChunkShape(chunk_shape), - dimension_names, + #[allow(clippy::expect_used)] + shape: ArrayShape::new( + shape.into_iter().zip(chunk_shape.iter().map(|n| n.get())), + ) + .expect("Invalid array shape"), + dimension_names: dimension_names.map(|ds| { + ds.iter().map(|s| From::from(s.as_ref().map(|s| s.as_str()))).collect() + }), }) } -prop_compose! { - pub fn zarr_array_metadata()( - chunk_key_encoding: ChunkKeyEncoding, - fill_value: FillValue, - shape_and_dim in shapes_and_dims(None), - storage_transformers in storage_transformers(), - codecs in codecs(), - ) -> ZarrArrayMetadata { - ZarrArrayMetadata { - shape: shape_and_dim.shape, - data_type: fill_value.get_data_type(), - chunk_shape: shape_and_dim.chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names: shape_and_dim.dimension_names, - } - } -} +//prop_compose! { +// pub fn zarr_array_metadata()( +// chunk_key_encoding: ChunkKeyEncoding, +// fill_value: FillValue, +// shape_and_dim in shapes_and_dims(None), +// storage_transformers in storage_transformers(), +// codecs in codecs(), +// ) -> ZarrArrayMetadata { +// ZarrArrayMetadata { +// shape: shape_and_dim.shape, +// data_type: fill_value.get_data_type(), +// chunk_shape: shape_and_dim.chunk_shape, +// chunk_key_encoding, +// fill_value, +// codecs, +// storage_transformers, +// dimension_names: shape_and_dim.dimension_names, +// } +// } +//} prop_compose! { pub fn chunk_indices(dim: usize, values_in: Range)(v in proptest::collection::vec(values_in, dim..(dim+1))) -> ChunkIndices { diff --git a/icechunk/tests/test_concurrency.rs b/icechunk/tests/test_concurrency.rs index 41418fd4..a153237d 100644 --- a/icechunk/tests/test_concurrency.rs +++ b/icechunk/tests/test_concurrency.rs @@ -1,10 +1,7 @@ #![allow(clippy::expect_used, clippy::unwrap_used)] use bytes::Bytes; use icechunk::{ - format::{snapshot::ZarrArrayMetadata, ByteRange, ChunkIndices, Path}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, + format::{snapshot::ArrayShape, ByteRange, ChunkIndices, Path}, session::{get_chunk, Session}, storage::new_in_memory_storage, Repository, Storage, @@ -13,7 +10,6 @@ use pretty_assertions::assert_eq; use rand::{rng, Rng}; use std::{ collections::{HashMap, HashSet}, - num::NonZeroU64, sync::Arc, time::Duration, }; @@ -39,26 +35,11 @@ async fn test_concurrency() -> Result<(), Box> { let repo = Repository::create(None, storage, HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![N as u64, N as u64], - data_type: DataType::Float64, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).expect("Cannot create NonZero"), - NonZeroU64::new(1).expect("Cannot create NonZero"), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float64(f64::NAN), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("x".to_string()), Some("y".to_string())]), - }; - + let shape = ArrayShape::new(vec![(N as u64, 1), (N as u64, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into()]); + let user_data = Bytes::new(); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), shape, dimension_names, user_data).await?; let ds = Arc::new(RwLock::new(ds)); @@ -154,8 +135,7 @@ async fn list_task(ds: Arc>, barrier: Arc) { expected_indices.insert(ChunkIndices(vec![x, y])); } } - let expected_nodes: HashSet = - HashSet::from_iter(vec!["/".to_string(), "/array".to_string()]); + let expected_nodes: HashSet = HashSet::from_iter(vec!["/array".to_string()]); barrier.wait().await; loop { diff --git a/icechunk/tests/test_distributed_writes.rs b/icechunk/tests/test_distributed_writes.rs index e12f21d3..f5e25158 100644 --- a/icechunk/tests/test_distributed_writes.rs +++ b/icechunk/tests/test_distributed_writes.rs @@ -1,12 +1,11 @@ #![allow(clippy::unwrap_used)] use pretty_assertions::assert_eq; -use std::{collections::HashMap, num::NonZeroU64, ops::Range, sync::Arc}; +use std::{collections::HashMap, ops::Range, sync::Arc}; use bytes::Bytes; use icechunk::{ config::{S3Credentials, S3Options, S3StaticCredentials}, - format::{snapshot::ZarrArrayMetadata, ByteRange, ChunkIndices, Path, SnapshotId}, - metadata::{ChunkKeyEncoding, ChunkShape, DataType, FillValue}, + format::{snapshot::ArrayShape, ByteRange, ChunkIndices, Path, SnapshotId}, repository::VersionInfo, session::{get_chunk, Session}, storage::new_s3_storage, @@ -124,22 +123,11 @@ async fn test_distributed_writes() -> Result<(), Box Result<(), Box> { let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![1100], - data_type: DataType::Int8, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).expect("Cannot create NonZero")]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int8(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + let user_data = Bytes::new(); + ds.add_group(Path::root(), user_data.clone()).await?; + + let shape = ArrayShape::new(vec![(1100, 1)]).unwrap(); let array_path: Path = "/array".try_into().unwrap(); - ds.add_array(array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(array_path.clone(), shape, None, user_data.clone()).await?; // we write more than 1k chunks to go beyond the chunk size for object listing and delete for idx in 0..1100 { let bytes = Bytes::copy_from_slice(&42i8.to_be_bytes()); @@ -200,56 +192,57 @@ async fn make_design_doc_repo( repo: &mut Repository, ) -> Result, Box> { let mut session = repo.writable_session("main").await?; - session.add_group(Path::root()).await?; + let user_data = Bytes::new(); + session.add_group(Path::root(), user_data.clone()).await?; session.commit("1", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/2").unwrap()).await?; + session.add_group(Path::try_from("/2").unwrap(), user_data.clone()).await?; let commit_2 = session.commit("2", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/4").unwrap()).await?; + session.add_group(Path::try_from("/4").unwrap(), user_data.clone()).await?; session.commit("4", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/5").unwrap()).await?; + session.add_group(Path::try_from("/5").unwrap(), user_data.clone()).await?; let snap_id = session.commit("5", None).await?; repo.create_tag("tag2", &snap_id).await?; repo.create_branch("develop", &commit_2).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/3").unwrap()).await?; + session.add_group(Path::try_from("/3").unwrap(), user_data.clone()).await?; let snap_id = session.commit("3", None).await?; repo.create_tag("tag1", &snap_id).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/6").unwrap()).await?; + session.add_group(Path::try_from("/6").unwrap(), user_data.clone()).await?; let commit_6 = session.commit("6", None).await?; repo.create_branch("test", &commit_6).await?; let mut session = repo.writable_session("test").await?; - session.add_group(Path::try_from("/7").unwrap()).await?; + session.add_group(Path::try_from("/7").unwrap(), user_data.clone()).await?; let commit_7 = session.commit("7", None).await?; let expire_older_than = Utc::now(); repo.create_branch("qa", &commit_7).await?; let mut session = repo.writable_session("qa").await?; - session.add_group(Path::try_from("/8").unwrap()).await?; + session.add_group(Path::try_from("/8").unwrap(), user_data.clone()).await?; session.commit("8", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/12").unwrap()).await?; + session.add_group(Path::try_from("/12").unwrap(), user_data.clone()).await?; session.commit("12", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/13").unwrap()).await?; + session.add_group(Path::try_from("/13").unwrap(), user_data.clone()).await?; session.commit("13", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/14").unwrap()).await?; + session.add_group(Path::try_from("/14").unwrap(), user_data.clone()).await?; session.commit("14", None).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/10").unwrap()).await?; + session.add_group(Path::try_from("/10").unwrap(), user_data.clone()).await?; session.commit("10", None).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/11").unwrap()).await?; + session.add_group(Path::try_from("/11").unwrap(), user_data.clone()).await?; session.commit("11", None).await?; let mut session = repo.writable_session("test").await?; - session.add_group(Path::try_from("/9").unwrap()).await?; + session.add_group(Path::try_from("/9").unwrap(), user_data.clone()).await?; session.commit("9", None).await?; // let's double check the structuer of the commit tree diff --git a/icechunk/tests/test_virtual_refs.rs b/icechunk/tests/test_virtual_refs.rs index 64034c6f..03cfd665 100644 --- a/icechunk/tests/test_virtual_refs.rs +++ b/icechunk/tests/test_virtual_refs.rs @@ -9,10 +9,9 @@ mod tests { Checksum, ChunkPayload, SecondsSinceEpoch, VirtualChunkLocation, VirtualChunkRef, VirtualReferenceErrorKind, }, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ByteRange, ChunkId, ChunkIndices, Path, }, - metadata::{ChunkKeyEncoding, ChunkShape, DataType, FillValue}, repository::VersionInfo, session::{get_chunk, SessionErrorKind}, storage::{ @@ -25,7 +24,6 @@ mod tests { use std::{ collections::{HashMap, HashSet}, error::Error, - num::NonZeroU64, path::PathBuf, vec, }; @@ -196,20 +194,9 @@ mod tests { let repo_dir = TempDir::new()?; let repo = create_local_repository(repo_dir.path()).await; let mut ds = repo.writable_session("main").await.unwrap(); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + + let shape = ArrayShape::new(vec![(1, 1), (1, 1), (2, 1)]).unwrap(); + let user_data = Bytes::new(); let payload1 = ChunkPayload::Virtual(VirtualChunkRef { location: VirtualChunkLocation::from_absolute_path(&format!( // intentional extra '/' @@ -231,7 +218,7 @@ mod tests { }); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await.unwrap(); + ds.add_array(new_array_path.clone(), shape, None, user_data).await.unwrap(); ds.set_chunk_ref( new_array_path.clone(), @@ -313,20 +300,8 @@ mod tests { let repo = create_minio_repository().await; let mut ds = repo.writable_session("main").await.unwrap(); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + let shape = ArrayShape::new(vec![(1, 1), (1, 1), (2, 1)]).unwrap(); + let user_data = Bytes::new(); let payload1 = ChunkPayload::Virtual(VirtualChunkRef { location: VirtualChunkLocation::from_absolute_path(&format!( // intentional extra '/' @@ -348,7 +323,7 @@ mod tests { }); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await.unwrap(); + ds.add_array(new_array_path.clone(), shape, None, user_data).await.unwrap(); ds.set_chunk_ref( new_array_path.clone(),